95 lines
2.7 KiB
Rust
95 lines
2.7 KiB
Rust
use chumsky::prelude::*;
|
|
use std::fmt::{Display, Formatter, Result};
|
|
|
|
// A few type definitions to be used by our parsers below
|
|
pub type Span = SimpleSpan;
|
|
pub type Spanned<T> = (T, Span);
|
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
pub enum Token<'src> {
|
|
None,
|
|
Bool(bool),
|
|
Num(f64),
|
|
Str(&'src str),
|
|
Op(&'src str),
|
|
Ctrl(char),
|
|
Ident(&'src str),
|
|
Fn,
|
|
Var,
|
|
If,
|
|
Else,
|
|
}
|
|
|
|
impl Display for Token<'_> {
|
|
fn fmt(&self, f: &mut Formatter) -> Result {
|
|
match self {
|
|
Token::None => write!(f, "none"),
|
|
Token::Bool(x) => write!(f, "{x}"),
|
|
Token::Num(n) => write!(f, "{n}"),
|
|
Token::Str(s) => write!(f, "{s}"),
|
|
Token::Op(s) => write!(f, "{s}"),
|
|
Token::Ctrl(c) => write!(f, "{c}"),
|
|
Token::Ident(s) => write!(f, "{s}"),
|
|
Token::Fn => write!(f, "fn"),
|
|
Token::Var => write!(f, "var"),
|
|
Token::If => write!(f, "if"),
|
|
Token::Else => write!(f, "else"),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn lexer<'src>()
|
|
-> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
|
|
// A parser for numbers
|
|
let num = text::int(10)
|
|
.then(just('.').then(text::digits(10)).or_not())
|
|
.to_slice()
|
|
.from_str()
|
|
.unwrapped()
|
|
.map(Token::Num);
|
|
|
|
// A parser for strings
|
|
let str_ = just('"')
|
|
.ignore_then(none_of('"').repeated().to_slice())
|
|
.then_ignore(just('"'))
|
|
.map(Token::Str);
|
|
|
|
// A parser for operators
|
|
let op = one_of("+*-/!=")
|
|
.repeated()
|
|
.at_least(1)
|
|
.to_slice()
|
|
.map(Token::Op);
|
|
|
|
// A parser for control characters (delimiters, semicolons, etc.)
|
|
let ctrl = one_of("()[]{};,").map(Token::Ctrl);
|
|
|
|
// A parser for identifiers and keywords
|
|
let ident = text::ascii::ident().map(|ident: &str| match ident {
|
|
"fn" => Token::Fn,
|
|
"var" => Token::Var,
|
|
"let" => Token::Var, // var and let are synonyms
|
|
"if" => Token::If,
|
|
"else" => Token::Else,
|
|
"true" => Token::Bool(true),
|
|
"false" => Token::Bool(false),
|
|
"none" => Token::None,
|
|
_ => Token::Ident(ident),
|
|
});
|
|
|
|
// A single token can be one of the above
|
|
let token = num.or(str_).or(op).or(ctrl).or(ident);
|
|
|
|
let comment = just("//")
|
|
.then(any().and_is(just('\n').not()).repeated())
|
|
.padded();
|
|
|
|
token
|
|
.map_with(|tok, e| (tok, e.span()))
|
|
.padded_by(comment.repeated())
|
|
.padded()
|
|
// If we encounter an error, skip and attempt to lex the next character as a token instead
|
|
.recover_with(skip_then_retry_until(any().ignored(), end()))
|
|
.repeated()
|
|
.collect()
|
|
}
|