use chumsky::prelude::*; use std::fmt::{Display, Formatter, Result}; // A few type definitions to be used by our parsers below pub type Span = SimpleSpan; pub type Spanned = (T, Span); #[derive(Clone, Debug, PartialEq)] pub enum Token<'src> { None, Bool(bool), Num(f64), Str(&'src str), Op(&'src str), Ctrl(char), Ident(&'src str), Fn, Var, If, Else, } impl Display for Token<'_> { fn fmt(&self, f: &mut Formatter) -> Result { match self { Token::None => write!(f, "none"), Token::Bool(x) => write!(f, "{x}"), Token::Num(n) => write!(f, "{n}"), Token::Str(s) => write!(f, "{s}"), Token::Op(s) => write!(f, "{s}"), Token::Ctrl(c) => write!(f, "{c}"), Token::Ident(s) => write!(f, "{s}"), Token::Fn => write!(f, "fn"), Token::Var => write!(f, "var"), Token::If => write!(f, "if"), Token::Else => write!(f, "else"), } } } pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec>>, extra::Err>> { // A parser for numbers let num = text::int(10) .then(just('.').then(text::digits(10)).or_not()) .to_slice() .from_str() .unwrapped() .map(Token::Num); // A parser for strings let str_ = just('"') .ignore_then(none_of('"').repeated().to_slice()) .then_ignore(just('"')) .map(Token::Str); // A parser for operators let op = one_of("+*-/!=") .repeated() .at_least(1) .to_slice() .map(Token::Op); // A parser for control characters (delimiters, semicolons, etc.) let ctrl = one_of("()[]{};,").map(Token::Ctrl); // A parser for identifiers and keywords let ident = text::ascii::ident().map(|ident: &str| match ident { "fn" => Token::Fn, "var" => Token::Var, "let" => Token::Var, // var and let are synonyms "if" => Token::If, "else" => Token::Else, "true" => Token::Bool(true), "false" => Token::Bool(false), "none" => Token::None, _ => Token::Ident(ident), }); // A single token can be one of the above let token = num.or(str_).or(op).or(ctrl).or(ident); let comment = just("//") .then(any().and_is(just('\n').not()).repeated()) .padded(); token .map_with(|tok, e| (tok, e.span())) .padded_by(comment.repeated()) .padded() // If we encounter an error, skip and attempt to lex the next character as a token instead .recover_with(skip_then_retry_until(any().ignored(), end())) .repeated() .collect() }