Solace/src/lexer_.rs

95 lines
2.7 KiB
Rust

use chumsky::prelude::*;
use std::fmt::{Display, Formatter, Result};
// A few type definitions to be used by our parsers below
pub type Span = SimpleSpan;
pub type Spanned<T> = (T, Span);
#[derive(Clone, Debug, PartialEq)]
pub enum Token<'src> {
None,
Bool(bool),
Num(f64),
Str(&'src str),
Op(&'src str),
Ctrl(char),
Ident(&'src str),
Fn,
Var,
If,
Else,
}
impl Display for Token<'_> {
fn fmt(&self, f: &mut Formatter) -> Result {
match self {
Token::None => write!(f, "none"),
Token::Bool(x) => write!(f, "{x}"),
Token::Num(n) => write!(f, "{n}"),
Token::Str(s) => write!(f, "{s}"),
Token::Op(s) => write!(f, "{s}"),
Token::Ctrl(c) => write!(f, "{c}"),
Token::Ident(s) => write!(f, "{s}"),
Token::Fn => write!(f, "fn"),
Token::Var => write!(f, "var"),
Token::If => write!(f, "if"),
Token::Else => write!(f, "else"),
}
}
}
pub fn lexer<'src>()
-> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
// A parser for numbers
let num = text::int(10)
.then(just('.').then(text::digits(10)).or_not())
.to_slice()
.from_str()
.unwrapped()
.map(Token::Num);
// A parser for strings
let str_ = just('"')
.ignore_then(none_of('"').repeated().to_slice())
.then_ignore(just('"'))
.map(Token::Str);
// A parser for operators
let op = one_of("+*-/!=")
.repeated()
.at_least(1)
.to_slice()
.map(Token::Op);
// A parser for control characters (delimiters, semicolons, etc.)
let ctrl = one_of("()[]{};,").map(Token::Ctrl);
// A parser for identifiers and keywords
let ident = text::ascii::ident().map(|ident: &str| match ident {
"fn" => Token::Fn,
"var" => Token::Var,
"let" => Token::Var, // var and let are synonyms
"if" => Token::If,
"else" => Token::Else,
"true" => Token::Bool(true),
"false" => Token::Bool(false),
"none" => Token::None,
_ => Token::Ident(ident),
});
// A single token can be one of the above
let token = num.or(str_).or(op).or(ctrl).or(ident);
let comment = just("//")
.then(any().and_is(just('\n').not()).repeated())
.padded();
token
.map_with(|tok, e| (tok, e.span()))
.padded_by(comment.repeated())
.padded()
// If we encounter an error, skip and attempt to lex the next character as a token instead
.recover_with(skip_then_retry_until(any().ignored(), end()))
.repeated()
.collect()
}