Solace/src/lexer.rs

336 lines
10 KiB
Rust

use logos::Logos;
use serde_json;
fn parse_radix(s: &str, radix: u32) -> Result<f64, String> {
let s = s.replace('_', "");
let (sign, num) = if s.starts_with('-') {
(-1.0, &s[3..]) // skip "-0x", "-0b" or "-0o"
} else {
(1.0, &s[2..])
};
match u64::from_str_radix(num, radix) {
Ok(val) => Ok(sign * val as f64),
Err(_) => Err(format!(
"Failed to parse number \"{}\" with radix {}",
s, radix
)),
}
}
fn parse_number(s: &str) -> Result<f64, String> {
let s = s.replace('_', "");
s.parse::<f64>()
.map_err(|_| format!("Failed to parse number \"{}\"", s))
}
#[derive(Logos, Debug, PartialEq)]
// #[logos(extras = (u32, u32))]
#[logos(skip r"\s+")]
pub enum Token<'src> {
#[regex(r"-?0[xX][0-9a-fA-F_]+", |lex| parse_radix(lex.slice(), 16))]
#[regex(r"-?0[bB][01_]+", |lex| parse_radix(lex.slice(), 2))]
#[regex(r"-?0[oO][0-7_]+", |lex| parse_radix(lex.slice(), 8))]
#[regex(r"-?(?:0|[1-9][0-9_]*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| parse_number(lex.slice()))]
Number(Result<f64, String>),
#[token("NaN")]
NaN,
#[regex(r#"("[^"\\\x00-\x1F]*(?:\\.[^"\\\x00-\x1F]*)*")|('[^'\\\x00-\x1F]*(?:\\.[^'\\\x00-\x1F]*)*')"#,
|lex| { let slice = lex.slice(); slice[1..slice.len()-1].to_owned() })]
String(String), // "string" or 'string'
#[token("undefined")]
Undefined, // undefined (value not initialized or not existing)
#[token("None")]
None, // none - optional with no value
#[token("Some")]
Some, // Some(value) - optional with value
#[token("Err")]
Err, // Err(Error) - result with error
#[token("Ok")]
Ok, // Ok(Value) - result with value
#[token("false", |_| false)]
#[token("true", |_| true)]
Bool(bool),
#[token("fn")]
Fn, // keyword for functions
#[token("var")]
Var, // variable
#[token("let")]
Let, // synonymous to var
#[token("const")]
Const, // constants
#[token("live")]
Live, // live variables / signals
#[token("if")]
If,
#[token("else")]
Else,
#[token("match")]
Match,
#[token("for")]
For,
#[token("while")]
While,
#[token("return")]
Return,
// Range and other multi char operators
#[token("..=")]
RangeIncl,
#[token("..<")]
RangeExcl,
#[token("==")]
Eq,
#[token("!=")]
Ne,
#[token("<=")]
Le,
#[token(">=")]
Ge,
#[token("++")]
Inc,
#[token("--")]
Dec,
#[token("**")]
Pow,
#[token("+=")]
AddEq,
#[token("-=")]
SubEq,
#[token("*=")]
MulEq,
#[token("/=")]
DivEq,
#[token("&&")]
And,
#[token("||")]
Or,
#[token("=>")]
FatArrow,
#[token("->")]
Arrow,
// Single character operators
#[token(".")]
Dot,
#[token("!")]
ExclamationMark,
#[token("?")]
QuestionMark,
#[token("&")]
BAnd,
#[token("|")]
BOr,
#[token("<")]
Lt,
#[token(">")]
Gt,
#[token("=")]
Assign,
#[token(":")]
Colon,
#[token(",")]
Comma,
#[token("+")]
Add,
#[token("-")]
Sub,
#[token("*")]
Mul,
#[token("/")]
Div,
#[token("%")]
Mod,
// Parentheses
#[token("(")]
ParenOpen,
#[token(")")]
ParenClose,
#[token("{")]
BraceOpen,
#[token("}")]
BraceClose,
#[token("[")]
BracketOpen,
#[token("]")]
BracketClose,
#[token("_")]
Default,
#[token(";")]
Semicolon,
#[regex(r"([a-zA-Z$][a-zA-Z0-9_$]*)|(_[a-zA-Z0-9_$]+)")]
Identifier(&'src str), // Identifiers start with letters, _ or $ and can contain numbers
// Comments
#[regex(r"//[^\n]*")]
LineComment(&'src str),
#[regex(r"/\*([^*]|\*[^/])*\*/")]
BlockComment(&'src str),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_keywords() {
let mut lex = Token::lexer("let var const fn match");
assert_eq!(lex.next(), Some(Ok(Token::Let)));
assert_eq!(lex.next(), Some(Ok(Token::Var)));
assert_eq!(lex.next(), Some(Ok(Token::Const)));
assert_eq!(lex.next(), Some(Ok(Token::Fn)));
assert_eq!(lex.next(), Some(Ok(Token::Match)));
}
#[test]
fn test_operators() {
let mut lex = Token::lexer("** * == += + =");
assert_eq!(lex.next(), Some(Ok(Token::Pow)));
assert_eq!(lex.next(), Some(Ok(Token::Mul)));
assert_eq!(lex.next(), Some(Ok(Token::Eq)));
assert_eq!(lex.next(), Some(Ok(Token::AddEq)));
assert_eq!(lex.next(), Some(Ok(Token::Add)));
assert_eq!(lex.next(), Some(Ok(Token::Assign)));
}
#[test]
fn test_declaration() {
let mut lex = Token::lexer("const foo = 42;");
assert_eq!(lex.next(), Some(Ok(Token::Const)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("foo"))));
assert_eq!(lex.next(), Some(Ok(Token::Assign)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0)))));
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
}
#[test]
fn test_numbers() {
let mut lex = Token::lexer("42 * -0.2 + 4e3 - 0xFF / 0b1010 + 1_000_000;");
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0)))));
assert_eq!(lex.next(), Some(Ok(Token::Mul)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(-0.2)))));
assert_eq!(lex.next(), Some(Ok(Token::Add)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4000.0)))));
assert_eq!(lex.next(), Some(Ok(Token::Sub)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(255.0)))));
assert_eq!(lex.next(), Some(Ok(Token::Div)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(10.0)))));
assert_eq!(lex.next(), Some(Ok(Token::Add)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(1000000.0)))));
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
}
#[test]
fn test_strings() {
let mut lex = Token::lexer("\"Foo\" 'Single' 'Sin\\'Esq\\'gle'");
assert_eq!(lex.next(), Some(Ok(Token::String("Foo".to_owned()))));
assert_eq!(lex.next(), Some(Ok(Token::String("Single".to_owned()))));
assert_eq!(
lex.next(),
Some(Ok(Token::String("Sin'Esq'gle".to_owned())))
);
}
#[test]
fn test_full_syntax_example() {
let mut lex = Token::lexer(
"
fn main(args: string[]) -> ArgumentError!string {
if args.length <= 2 {
return Err(\"Not enough Arguments\", ArgumentError);
}
return match args.length {
3 => \"This is actually just one argument\",
4 => \"Two arguments. Good!\",
_ => \"You're overdoing it!\"
}
}
",
);
// FIRST LINE
assert_eq!(lex.next(), Some(Ok(Token::Fn)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("main"))));
assert_eq!(lex.next(), Some(Ok(Token::ParenOpen)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
assert_eq!(lex.next(), Some(Ok(Token::Colon)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("string"))));
assert_eq!(lex.next(), Some(Ok(Token::BracketOpen)));
assert_eq!(lex.next(), Some(Ok(Token::BracketClose)));
assert_eq!(lex.next(), Some(Ok(Token::ParenClose)));
assert_eq!(lex.next(), Some(Ok(Token::Arrow)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("ArgumentError"))));
assert_eq!(lex.next(), Some(Ok(Token::ExclamationMark)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("string"))));
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
// SECOND LINE
assert_eq!(lex.next(), Some(Ok(Token::If)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
assert_eq!(lex.next(), Some(Ok(Token::Dot)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("length"))));
assert_eq!(lex.next(), Some(Ok(Token::Le)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(2.0)))));
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
// THIRD LINE
assert_eq!(lex.next(), Some(Ok(Token::Return)));
assert_eq!(lex.next(), Some(Ok(Token::Err)));
assert_eq!(lex.next(), Some(Ok(Token::ParenOpen)));
assert_eq!(
lex.next(),
Some(Ok(Token::String("Not enough Arguments".to_owned())))
);
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("ArgumentError"))));
assert_eq!(lex.next(), Some(Ok(Token::ParenClose)));
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
// FOURTH LINE
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
// FIFTH LINE
assert_eq!(lex.next(), Some(Ok(Token::Return)));
assert_eq!(lex.next(), Some(Ok(Token::Match)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
assert_eq!(lex.next(), Some(Ok(Token::Dot)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("length"))));
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
// SIXTH LINE
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(3.0)))));
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
assert_eq!(
lex.next(),
Some(Ok(Token::String(
"This is actually just one argument".to_owned()
)))
);
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
// SEVENTH LINE
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4.0)))));
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
assert_eq!(
lex.next(),
Some(Ok(Token::String("Two arguments. Good!".to_owned())))
);
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
// EIGHTH LINE
assert_eq!(lex.next(), Some(Ok(Token::Default)));
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
assert_eq!(
lex.next(),
Some(Ok(Token::String("You're overdoing it!".to_owned())))
);
// NINTH AND TENTH LINE
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
}
}