first steps into the new parser
This commit is contained in:
parent
a20abd65ac
commit
c1e762fa37
6 changed files with 117 additions and 513 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1071,7 +1071,6 @@ dependencies = [
|
|||
"criterion",
|
||||
"lazy_static",
|
||||
"logos",
|
||||
"serde_json",
|
||||
"swc_common",
|
||||
"swc_ecma_ast",
|
||||
"swc_ecma_codegen",
|
||||
|
|
|
@ -9,7 +9,6 @@ ariadne = "0.5.1"
|
|||
chumsky = "0.10.1"
|
||||
lazy_static = "1.5.0"
|
||||
logos = "0.15.0"
|
||||
serde_json = "1.0.140"
|
||||
swc_common = "13.0.2"
|
||||
swc_ecma_ast = "13.0.0"
|
||||
swc_ecma_codegen = "15.0.1"
|
||||
|
|
49
src/lexer.rs
49
src/lexer.rs
|
@ -1,7 +1,6 @@
|
|||
use logos::Logos;
|
||||
use serde_json;
|
||||
|
||||
fn parse_radix(s: &str, radix: u32) -> Result<f64, String> {
|
||||
fn parse_radix(s: &str, radix: u32) -> f64 {
|
||||
let s = s.replace('_', "");
|
||||
let (sign, num) = if s.starts_with('-') {
|
||||
(-1.0, &s[3..]) // skip "-0x", "-0b" or "-0o"
|
||||
|
@ -9,22 +8,16 @@ fn parse_radix(s: &str, radix: u32) -> Result<f64, String> {
|
|||
(1.0, &s[2..])
|
||||
};
|
||||
|
||||
match u64::from_str_radix(num, radix) {
|
||||
Ok(val) => Ok(sign * val as f64),
|
||||
Err(_) => Err(format!(
|
||||
"Failed to parse number \"{}\" with radix {}",
|
||||
s, radix
|
||||
)),
|
||||
}
|
||||
let value = u64::from_str_radix(num, radix).unwrap();
|
||||
sign * value as f64
|
||||
}
|
||||
|
||||
fn parse_number(s: &str) -> Result<f64, String> {
|
||||
fn parse_number(s: &str) -> f64 {
|
||||
let s = s.replace('_', "");
|
||||
s.parse::<f64>()
|
||||
.map_err(|_| format!("Failed to parse number \"{}\"", s))
|
||||
s.parse::<f64>().unwrap()
|
||||
}
|
||||
|
||||
#[derive(Logos, Debug, PartialEq)]
|
||||
#[derive(Logos, Clone, Debug, PartialEq)]
|
||||
// #[logos(extras = (u32, u32))]
|
||||
#[logos(skip r"\s+")]
|
||||
pub enum Token<'src> {
|
||||
|
@ -32,7 +25,7 @@ pub enum Token<'src> {
|
|||
#[regex(r"-?0[bB][01_]+", |lex| parse_radix(lex.slice(), 2))]
|
||||
#[regex(r"-?0[oO][0-7_]+", |lex| parse_radix(lex.slice(), 8))]
|
||||
#[regex(r"-?(?:0|[1-9][0-9_]*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| parse_number(lex.slice()))]
|
||||
Number(Result<f64, String>),
|
||||
Number(f64),
|
||||
|
||||
#[token("NaN")]
|
||||
NaN,
|
||||
|
@ -205,35 +198,37 @@ mod tests {
|
|||
assert_eq!(lex.next(), Some(Ok(Token::Const)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Identifier("foo"))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Assign)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(42.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
|
||||
}
|
||||
#[test]
|
||||
fn test_numbers() {
|
||||
let mut lex = Token::lexer("42 * -0.2 + 4e3 - 0xFF / 0b1010 + 1_000_000;");
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(42.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Mul)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(-0.2)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(-0.2))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Add)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4000.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(4000.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Sub)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(255.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(255.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Div)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(10.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(10.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Add)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(1000000.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(1000000.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strings() {
|
||||
let mut lex = Token::lexer("\"Foo\" 'Single' 'Sin\\'Esq\\'gle'");
|
||||
assert_eq!(lex.next(), Some(Ok(Token::String("Foo".to_owned()))));
|
||||
// let mut lex = Token::lexer("\"Foo\" 'Single' 'Sin\\'Esq\\'gle'");
|
||||
let mut lex = Token::lexer("\"Double\" 'Single' \"With Spaces?\"");
|
||||
assert_eq!(lex.next(), Some(Ok(Token::String("Double".to_owned()))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::String("Single".to_owned()))));
|
||||
assert_eq!(
|
||||
lex.next(),
|
||||
Some(Ok(Token::String("Sin'Esq'gle".to_owned())))
|
||||
Some(Ok(Token::String("With Spaces?".to_owned())))
|
||||
);
|
||||
// TODO: test strings with escaped quotes
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -274,7 +269,7 @@ mod tests {
|
|||
assert_eq!(lex.next(), Some(Ok(Token::Dot)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Identifier("length"))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Le)));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(2.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(2.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
|
||||
|
||||
// THIRD LINE
|
||||
|
@ -302,7 +297,7 @@ mod tests {
|
|||
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
|
||||
|
||||
// SIXTH LINE
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(3.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(3.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
|
||||
assert_eq!(
|
||||
lex.next(),
|
||||
|
@ -313,7 +308,7 @@ mod tests {
|
|||
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
|
||||
|
||||
// SEVENTH LINE
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4.0)))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::Number(4.0))));
|
||||
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
|
||||
assert_eq!(
|
||||
lex.next(),
|
||||
|
|
|
@ -1,95 +0,0 @@
|
|||
use chumsky::prelude::*;
|
||||
use std::fmt::{Display, Formatter, Result};
|
||||
|
||||
// A few type definitions to be used by our parsers below
|
||||
pub type Span = SimpleSpan;
|
||||
pub type Spanned<T> = (T, Span);
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Token<'src> {
|
||||
None,
|
||||
Bool(bool),
|
||||
Num(f64),
|
||||
Str(&'src str),
|
||||
Op(&'src str),
|
||||
Ctrl(char),
|
||||
Ident(&'src str),
|
||||
Fn,
|
||||
Var,
|
||||
If,
|
||||
Else,
|
||||
}
|
||||
|
||||
impl Display for Token<'_> {
|
||||
fn fmt(&self, f: &mut Formatter) -> Result {
|
||||
match self {
|
||||
Token::None => write!(f, "none"),
|
||||
Token::Bool(x) => write!(f, "{x}"),
|
||||
Token::Num(n) => write!(f, "{n}"),
|
||||
Token::Str(s) => write!(f, "{s}"),
|
||||
Token::Op(s) => write!(f, "{s}"),
|
||||
Token::Ctrl(c) => write!(f, "{c}"),
|
||||
Token::Ident(s) => write!(f, "{s}"),
|
||||
Token::Fn => write!(f, "fn"),
|
||||
Token::Var => write!(f, "var"),
|
||||
Token::If => write!(f, "if"),
|
||||
Token::Else => write!(f, "else"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lexer<'src>()
|
||||
-> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
|
||||
// A parser for numbers
|
||||
let num = text::int(10)
|
||||
.then(just('.').then(text::digits(10)).or_not())
|
||||
.to_slice()
|
||||
.from_str()
|
||||
.unwrapped()
|
||||
.map(Token::Num);
|
||||
|
||||
// A parser for strings
|
||||
let str_ = just('"')
|
||||
.ignore_then(none_of('"').repeated().to_slice())
|
||||
.then_ignore(just('"'))
|
||||
.map(Token::Str);
|
||||
|
||||
// A parser for operators
|
||||
let op = one_of("+*-/!=")
|
||||
.repeated()
|
||||
.at_least(1)
|
||||
.to_slice()
|
||||
.map(Token::Op);
|
||||
|
||||
// A parser for control characters (delimiters, semicolons, etc.)
|
||||
let ctrl = one_of("()[]{};,").map(Token::Ctrl);
|
||||
|
||||
// A parser for identifiers and keywords
|
||||
let ident = text::ascii::ident().map(|ident: &str| match ident {
|
||||
"fn" => Token::Fn,
|
||||
"var" => Token::Var,
|
||||
"let" => Token::Var, // var and let are synonyms
|
||||
"if" => Token::If,
|
||||
"else" => Token::Else,
|
||||
"true" => Token::Bool(true),
|
||||
"false" => Token::Bool(false),
|
||||
"none" => Token::None,
|
||||
_ => Token::Ident(ident),
|
||||
});
|
||||
|
||||
// A single token can be one of the above
|
||||
let token = num.or(str_).or(op).or(ctrl).or(ident);
|
||||
|
||||
let comment = just("//")
|
||||
.then(any().and_is(just('\n').not()).repeated())
|
||||
.padded();
|
||||
|
||||
token
|
||||
.map_with(|tok, e| (tok, e.span()))
|
||||
.padded_by(comment.repeated())
|
||||
.padded()
|
||||
// If we encounter an error, skip and attempt to lex the next character as a token instead
|
||||
.recover_with(skip_then_retry_until(any().ignored(), end()))
|
||||
.repeated()
|
||||
.collect()
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
mod lexer;
|
||||
mod parser;
|
||||
// mod emitter;
|
||||
// mod parser;
|
||||
// mod transformer;
|
||||
|
||||
use anyhow::Result;
|
||||
|
|
482
src/parser.rs
482
src/parser.rs
|
@ -1,406 +1,112 @@
|
|||
use crate::lexer::{Span, Spanned, Token, lexer};
|
||||
use chumsky::{input::ValueInput, prelude::*};
|
||||
use crate::lexer::Token;
|
||||
use chumsky::{
|
||||
input::{Stream, ValueInput},
|
||||
prelude::*,
|
||||
};
|
||||
|
||||
use ariadne::{Color, Label, Report, ReportKind, sources};
|
||||
use std::collections::HashMap;
|
||||
type Span = SimpleSpan;
|
||||
type Spanned<T> = (T, Span);
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Value<'src> {
|
||||
None,
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Expr {
|
||||
Number(f64),
|
||||
String(String),
|
||||
Bool(bool),
|
||||
Num(f64),
|
||||
Str(&'src str),
|
||||
List(Vec<Self>),
|
||||
Func(&'src str),
|
||||
Undefined,
|
||||
Some(Box<Expr>),
|
||||
None,
|
||||
Ok(Box<Expr>),
|
||||
Err(Box<Expr>),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Value<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::None => write!(f, "none"),
|
||||
Self::Bool(x) => write!(f, "{x}"),
|
||||
Self::Num(x) => write!(f, "{x}"),
|
||||
Self::Str(x) => write!(f, "{x}"),
|
||||
Self::List(xs) => write!(
|
||||
f,
|
||||
"[{}]",
|
||||
xs.iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
),
|
||||
Self::Func(name) => write!(f, "<function: {name}>"),
|
||||
}
|
||||
/// parse primary tokens, like numbers, strings or booleans
|
||||
pub fn primary<'tokens, 'src: 'tokens, I>()
|
||||
-> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token<'src>>>>
|
||||
where
|
||||
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
|
||||
{
|
||||
select! {
|
||||
Token::Number(n) => Expr::Number(n),
|
||||
Token::String(s) => Expr::String(s),
|
||||
Token::Undefined => Expr::Undefined,
|
||||
Token::None => Expr::None,
|
||||
Token::Bool(b) => Expr::Bool(b),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum BinaryOp {
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
Eq,
|
||||
NotEq,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Expr<'src> {
|
||||
Error,
|
||||
Value(Value<'src>),
|
||||
List(Vec<Spanned<Self>>),
|
||||
Local(&'src str),
|
||||
If(Box<Spanned<Self>>, Box<Spanned<Self>>, Box<Spanned<Self>>),
|
||||
Then(Box<Spanned<Self>>, Box<Spanned<Self>>),
|
||||
Binary(Box<Spanned<Self>>, BinaryOp, Box<Spanned<Self>>),
|
||||
Call(Box<Spanned<Self>>, Spanned<Vec<Spanned<Self>>>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Block<'src> {
|
||||
stmts: Vec<Spanned<Stmt<'src>>>,
|
||||
expr: Option<Box<Spanned<Expr<'src>>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Stmt<'src> {
|
||||
Var(&'src str, Box<Spanned<Self>>, Box<Spanned<Self>>),
|
||||
Expr(Box<Spanned<Expr<'src>>>),
|
||||
}
|
||||
|
||||
// A function node in the AST.
|
||||
#[derive(Debug)]
|
||||
pub struct Func<'src> {
|
||||
pub args: Vec<&'src str>,
|
||||
pub span: Span,
|
||||
pub body: Spanned<Expr<'src>>,
|
||||
}
|
||||
|
||||
fn expr_parser<'tokens, 'src: 'tokens, I>()
|
||||
-> impl Parser<'tokens, I, Spanned<Expr<'src>>, extra::Err<Rich<'tokens, Token<'src>, Span>>> + Clone
|
||||
fn parenthesized<'tokens, 'src: 'tokens, I>(
|
||||
inner: impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token<'src>>>>,
|
||||
) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token<'src>>>>
|
||||
where
|
||||
I: ValueInput<'tokens, Token = Token<'src>, Span = Span>,
|
||||
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
|
||||
{
|
||||
recursive(|expr| {
|
||||
let inline_expr = recursive(|inline_expr| {
|
||||
let val = select! {
|
||||
Token::None => Expr::Value(Value::None),
|
||||
Token::Bool(x) => Expr::Value(Value::Bool(x)),
|
||||
Token::Num(n) => Expr::Value(Value::Num(n)),
|
||||
Token::Str(s) => Expr::Value(Value::Str(s)),
|
||||
}
|
||||
.labelled("value");
|
||||
just(Token::ParenOpen)
|
||||
.ignore_then(inner)
|
||||
.then_ignore(just(Token::ParenClose))
|
||||
}
|
||||
|
||||
let ident = select! { Token::Ident(ident) => ident }.labelled("identifier");
|
||||
fn constructor<'tokens, 'src: 'tokens, I>(
|
||||
keyword: Token<'src>,
|
||||
constructor_fn: impl Fn(Expr) -> Expr,
|
||||
) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token<'src>>>>
|
||||
where
|
||||
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
|
||||
{
|
||||
just(keyword)
|
||||
.ignore_then(parenthesized(primary()))
|
||||
.map(constructor_fn)
|
||||
}
|
||||
|
||||
// A list of expressions
|
||||
let items = expr
|
||||
.clone()
|
||||
.separated_by(just(Token::Ctrl(',')))
|
||||
.allow_trailing()
|
||||
.collect::<Vec<_>>();
|
||||
pub fn constructors<'tokens, 'src: 'tokens, I>()
|
||||
-> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token<'src>>>>
|
||||
where
|
||||
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
|
||||
{
|
||||
constructor(Token::Some, |inner| Expr::Some(Box::new(inner)))
|
||||
.or(constructor(Token::Ok, |inner| Expr::Ok(Box::new(inner))))
|
||||
.or(constructor(Token::Err, |inner| Expr::Err(Box::new(inner))))
|
||||
}
|
||||
|
||||
// A let expression
|
||||
let let_ = just(Token::Var)
|
||||
.ignore_then(ident)
|
||||
.then_ignore(just(Token::Op("=")))
|
||||
.then(inline_expr)
|
||||
.then_ignore(just(Token::Ctrl(';')))
|
||||
.then(expr.clone())
|
||||
.map(|((name, val), body)| Stmt::Var(name, Box::new(val), Box::new(body)));
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use logos::Logos;
|
||||
|
||||
let list = items
|
||||
.clone()
|
||||
.map(Expr::List)
|
||||
.delimited_by(just(Token::Ctrl('[')), just(Token::Ctrl(']')));
|
||||
#[test]
|
||||
fn test_parse_single_number() {
|
||||
let tokens: Vec<_> = Token::lexer("42").map(|tok| tok.unwrap()).collect();
|
||||
let result = primary().parse(Stream::from_iter(tokens)).into_result();
|
||||
assert_eq!(result, Ok(Expr::Number(42.0)));
|
||||
}
|
||||
#[test]
|
||||
fn test_parse_single_string() {
|
||||
let tokens: Vec<_> = Token::lexer("\"Foo\"").map(|tok| tok.unwrap()).collect();
|
||||
let result = primary().parse(Stream::from_iter(tokens)).into_result();
|
||||
assert_eq!(result, Ok(Expr::String("Foo".to_owned())));
|
||||
}
|
||||
#[test]
|
||||
fn test_constructors() {
|
||||
let tokens_some: Vec<_> = Token::lexer("Some(23)").map(|tok| tok.unwrap()).collect();
|
||||
let result_some = constructors()
|
||||
.parse(Stream::from_iter(tokens_some))
|
||||
.into_result();
|
||||
assert_eq!(result_some, Ok(Expr::Some(Box::new(Expr::Number(23.0)))));
|
||||
|
||||
// 'Atoms' are expressions that contain no ambiguity
|
||||
let atom = val
|
||||
.or(ident.map(Expr::Local))
|
||||
.or(let_)
|
||||
.or(list)
|
||||
// In Nano Rust, `print` is just a keyword, just like Python 2, for simplicity
|
||||
.map_with(|expr, e| (expr, e.span()))
|
||||
// Atoms can also just be normal expressions, but surrounded with parentheses
|
||||
.or(expr
|
||||
.clone()
|
||||
.delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')'))))
|
||||
// Attempt to recover anything that looks like a parenthesised expression but contains errors
|
||||
.recover_with(via_parser(nested_delimiters(
|
||||
Token::Ctrl('('),
|
||||
Token::Ctrl(')'),
|
||||
[
|
||||
(Token::Ctrl('['), Token::Ctrl(']')),
|
||||
(Token::Ctrl('{'), Token::Ctrl('}')),
|
||||
],
|
||||
|span| (Expr::Error, span),
|
||||
)))
|
||||
// Attempt to recover anything that looks like a list but contains errors
|
||||
.recover_with(via_parser(nested_delimiters(
|
||||
Token::Ctrl('['),
|
||||
Token::Ctrl(']'),
|
||||
[
|
||||
(Token::Ctrl('('), Token::Ctrl(')')),
|
||||
(Token::Ctrl('{'), Token::Ctrl('}')),
|
||||
],
|
||||
|span| (Expr::Error, span),
|
||||
)))
|
||||
.boxed();
|
||||
|
||||
// Function calls have very high precedence so we prioritise them
|
||||
let call = atom.foldl_with(
|
||||
items
|
||||
.delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')')))
|
||||
.map_with(|args, e| (args, e.span()))
|
||||
.repeated(),
|
||||
|f, args, e| (Expr::Call(Box::new(f), args), e.span()),
|
||||
);
|
||||
|
||||
// Product ops (multiply and divide) have equal precedence
|
||||
let op = just(Token::Op("*"))
|
||||
.to(BinaryOp::Mul)
|
||||
.or(just(Token::Op("/")).to(BinaryOp::Div));
|
||||
let product = call
|
||||
.clone()
|
||||
.foldl_with(op.then(call).repeated(), |a, (op, b), e| {
|
||||
(Expr::Binary(Box::new(a), op, Box::new(b)), e.span())
|
||||
});
|
||||
|
||||
// Sum ops (add and subtract) have equal precedence
|
||||
let op = just(Token::Op("+"))
|
||||
.to(BinaryOp::Add)
|
||||
.or(just(Token::Op("-")).to(BinaryOp::Sub));
|
||||
let sum = product
|
||||
.clone()
|
||||
.foldl_with(op.then(product).repeated(), |a, (op, b), e| {
|
||||
(Expr::Binary(Box::new(a), op, Box::new(b)), e.span())
|
||||
});
|
||||
|
||||
// Comparison ops (equal, not-equal) have equal precedence
|
||||
let op = just(Token::Op("=="))
|
||||
.to(BinaryOp::Eq)
|
||||
.or(just(Token::Op("!=")).to(BinaryOp::NotEq));
|
||||
let compare = sum
|
||||
.clone()
|
||||
.foldl_with(op.then(sum).repeated(), |a, (op, b), e| {
|
||||
(Expr::Binary(Box::new(a), op, Box::new(b)), e.span())
|
||||
});
|
||||
|
||||
compare.labelled("expression").as_context()
|
||||
});
|
||||
|
||||
// Blocks are expressions but delimited with braces
|
||||
let block = expr
|
||||
.clone()
|
||||
.delimited_by(just(Token::Ctrl('{')), just(Token::Ctrl('}')))
|
||||
// Attempt to recover anything that looks like a block but contains errors
|
||||
.recover_with(via_parser(nested_delimiters(
|
||||
Token::Ctrl('{'),
|
||||
Token::Ctrl('}'),
|
||||
[
|
||||
(Token::Ctrl('('), Token::Ctrl(')')),
|
||||
(Token::Ctrl('['), Token::Ctrl(']')),
|
||||
],
|
||||
|span| (Expr::Error, span),
|
||||
)));
|
||||
|
||||
let if_ = recursive(|if_| {
|
||||
just(Token::If)
|
||||
.ignore_then(expr.clone())
|
||||
.then(block.clone())
|
||||
.then(
|
||||
just(Token::Else)
|
||||
.ignore_then(block.clone().or(if_))
|
||||
.or_not(),
|
||||
)
|
||||
.map_with(|((cond, a), b), e| {
|
||||
(
|
||||
Expr::If(
|
||||
Box::new(cond),
|
||||
Box::new(a),
|
||||
// If an `if` expression has no trailing `else` block, we magic up one that just produces none
|
||||
Box::new(b.unwrap_or_else(|| (Expr::Value(Value::None), e.span()))),
|
||||
),
|
||||
e.span(),
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
// Both blocks and `if` are 'block expressions' and can appear in the place of statements
|
||||
let block_expr = block.or(if_);
|
||||
|
||||
let block_chain = block_expr
|
||||
.clone()
|
||||
.foldl_with(block_expr.clone().repeated(), |a, b, e| {
|
||||
(Expr::Then(Box::new(a), Box::new(b)), e.span())
|
||||
});
|
||||
|
||||
let block_recovery = nested_delimiters(
|
||||
Token::Ctrl('{'),
|
||||
Token::Ctrl('}'),
|
||||
[
|
||||
(Token::Ctrl('('), Token::Ctrl(')')),
|
||||
(Token::Ctrl('['), Token::Ctrl(']')),
|
||||
],
|
||||
|span| (Expr::Error, span),
|
||||
let tokens_ok: Vec<_> = Token::lexer("Ok(\"foo\")")
|
||||
.map(|tok| tok.unwrap())
|
||||
.collect();
|
||||
let result_ok = constructors()
|
||||
.parse(Stream::from_iter(tokens_ok))
|
||||
.into_result();
|
||||
assert_eq!(
|
||||
result_ok,
|
||||
Ok(Expr::Ok(Box::new(Expr::String("foo".to_owned()))))
|
||||
);
|
||||
|
||||
block_chain
|
||||
.labelled("block")
|
||||
// Expressions, chained by semicolons, are statements
|
||||
.or(inline_expr.clone())
|
||||
.recover_with(skip_then_retry_until(
|
||||
block_recovery.ignored().or(any().ignored()),
|
||||
one_of([
|
||||
Token::Ctrl(';'),
|
||||
Token::Ctrl('}'),
|
||||
Token::Ctrl(')'),
|
||||
Token::Ctrl(']'),
|
||||
])
|
||||
.ignored(),
|
||||
))
|
||||
.foldl_with(
|
||||
just(Token::Ctrl(';')).ignore_then(expr.or_not()).repeated(),
|
||||
|a, b, e| {
|
||||
let span: Span = e.span();
|
||||
(
|
||||
Expr::Then(
|
||||
Box::new(a),
|
||||
// If there is no b expression then its span is the end of the statement/block.
|
||||
Box::new(
|
||||
b.unwrap_or_else(|| (Expr::Value(Value::None), span.to_end())),
|
||||
),
|
||||
),
|
||||
span,
|
||||
)
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn funcs_parser<'tokens, 'src: 'tokens, I>()
|
||||
-> impl Parser<'tokens, I, HashMap<&'src str, Func<'src>>, extra::Err<Rich<'tokens, Token<'src>, Span>>>
|
||||
+ Clone
|
||||
where
|
||||
I: ValueInput<'tokens, Token = Token<'src>, Span = Span>,
|
||||
{
|
||||
let ident = select! { Token::Ident(ident) => ident };
|
||||
|
||||
// Argument lists are just identifiers separated by commas, surrounded by parentheses
|
||||
let args = ident
|
||||
.separated_by(just(Token::Ctrl(',')))
|
||||
.allow_trailing()
|
||||
.collect()
|
||||
.delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')')))
|
||||
.labelled("function args");
|
||||
|
||||
let func = just(Token::Fn)
|
||||
.ignore_then(
|
||||
ident
|
||||
.map_with(|name, e| (name, e.span()))
|
||||
.labelled("function name"),
|
||||
)
|
||||
.then(args)
|
||||
.map_with(|start, e| (start, e.span()))
|
||||
.then(
|
||||
expr_parser()
|
||||
.delimited_by(just(Token::Ctrl('{')), just(Token::Ctrl('}')))
|
||||
// Attempt to recover anything that looks like a function body but contains errors
|
||||
.recover_with(via_parser(nested_delimiters(
|
||||
Token::Ctrl('{'),
|
||||
Token::Ctrl('}'),
|
||||
[
|
||||
(Token::Ctrl('('), Token::Ctrl(')')),
|
||||
(Token::Ctrl('['), Token::Ctrl(']')),
|
||||
],
|
||||
|span| (Expr::Error, span),
|
||||
))),
|
||||
)
|
||||
.map(|(((name, args), span), body)| (name, Func { args, span, body }))
|
||||
.labelled("function");
|
||||
|
||||
func.repeated()
|
||||
.collect::<Vec<_>>()
|
||||
.validate(|fs, _, emitter| {
|
||||
let mut funcs = HashMap::new();
|
||||
for ((name, name_span), f) in fs {
|
||||
if funcs.insert(name, f).is_some() {
|
||||
emitter.emit(Rich::custom(
|
||||
name_span,
|
||||
format!("Function '{name}' already exists"),
|
||||
));
|
||||
}
|
||||
}
|
||||
funcs
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse<'src>(
|
||||
filename: String,
|
||||
src: &'src str,
|
||||
debug: bool,
|
||||
) -> Option<(HashMap<&'src str, Func<'src>>, SimpleSpan)> {
|
||||
let (tokens, lex_errs) = lexer().parse(src).into_output_errors();
|
||||
|
||||
let (ast, parse_errs) = if let Some(tokens) = &tokens {
|
||||
let (ast, parse_errs) = funcs_parser()
|
||||
.map_with(|ast, e| (ast, e.span()))
|
||||
.parse(
|
||||
tokens
|
||||
.as_slice()
|
||||
.map((src.len()..src.len()).into(), |(t, s)| (t, s)),
|
||||
)
|
||||
.into_output_errors();
|
||||
|
||||
if debug {
|
||||
if let Some((funcs, _file_span)) = ast
|
||||
.as_ref()
|
||||
.filter(|_| lex_errs.len() + parse_errs.len() == 0)
|
||||
{
|
||||
println!("{funcs:#?}")
|
||||
}
|
||||
}
|
||||
|
||||
(ast, parse_errs)
|
||||
} else {
|
||||
(None, Vec::new())
|
||||
};
|
||||
|
||||
diagnostics(filename, lex_errs, parse_errs, src);
|
||||
|
||||
ast
|
||||
}
|
||||
|
||||
fn diagnostics<'src>(
|
||||
filename: String,
|
||||
lex_errs: Vec<Rich<'_, char>>,
|
||||
parse_errs: Vec<Rich<'_, Token<'_>>>,
|
||||
src: &'src str,
|
||||
) {
|
||||
lex_errs
|
||||
.into_iter()
|
||||
.map(|e| e.map_token(|c| c.to_string()))
|
||||
.chain(
|
||||
parse_errs
|
||||
.into_iter()
|
||||
.map(|e| e.map_token(|tok| tok.to_string())),
|
||||
)
|
||||
.for_each(|e| {
|
||||
Report::build(ReportKind::Error, (filename.clone(), e.span().into_range()))
|
||||
.with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte))
|
||||
.with_message(e.to_string())
|
||||
.with_label(
|
||||
Label::new((filename.clone(), e.span().into_range()))
|
||||
.with_message(e.reason().to_string())
|
||||
.with_color(Color::Red),
|
||||
)
|
||||
.with_labels(e.contexts().map(|(label, span)| {
|
||||
Label::new((filename.clone(), span.into_range()))
|
||||
.with_message(format!("while parsing this {label}"))
|
||||
.with_color(Color::Yellow)
|
||||
}))
|
||||
.finish()
|
||||
.print(sources([(filename.clone(), src)]))
|
||||
.unwrap()
|
||||
});
|
||||
let tokens_err: Vec<_> = Token::lexer("Err(None)").map(|tok| tok.unwrap()).collect();
|
||||
let result_err = constructors()
|
||||
.parse(Stream::from_iter(tokens_err))
|
||||
.into_result();
|
||||
assert_eq!(result_err, Ok(Expr::Err(Box::new(Expr::None))));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue