use logos::Logos; use serde_json; fn parse_radix(s: &str, radix: u32) -> Result { let s = s.replace('_', ""); let (sign, num) = if s.starts_with('-') { (-1.0, &s[3..]) // skip "-0x", "-0b" or "-0o" } else { (1.0, &s[2..]) }; match u64::from_str_radix(num, radix) { Ok(val) => Ok(sign * val as f64), Err(_) => Err(format!( "Failed to parse number \"{}\" with radix {}", s, radix )), } } fn parse_number(s: &str) -> Result { let s = s.replace('_', ""); s.parse::() .map_err(|_| format!("Failed to parse number \"{}\"", s)) } #[derive(Logos, Debug, PartialEq)] // #[logos(extras = (u32, u32))] #[logos(skip r"\s+")] pub enum Token<'src> { #[regex(r"-?0[xX][0-9a-fA-F_]+", |lex| parse_radix(lex.slice(), 16))] #[regex(r"-?0[bB][01_]+", |lex| parse_radix(lex.slice(), 2))] #[regex(r"-?0[oO][0-7_]+", |lex| parse_radix(lex.slice(), 8))] #[regex(r"-?(?:0|[1-9][0-9_]*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| parse_number(lex.slice()))] Number(Result), #[token("NaN")] NaN, #[regex(r#"("[^"\\\x00-\x1F]*(?:\\.[^"\\\x00-\x1F]*)*")|('[^'\\\x00-\x1F]*(?:\\.[^'\\\x00-\x1F]*)*')"#, |lex| { let slice = lex.slice(); slice[1..slice.len()-1].to_owned() })] String(String), // "string" or 'string' #[token("undefined")] Undefined, // undefined (value not initialized or not existing) #[token("None")] None, // none - optional with no value #[token("Some")] Some, // Some(value) - optional with value #[token("Err")] Err, // Err(Error) - result with error #[token("Ok")] Ok, // Ok(Value) - result with value #[token("false", |_| false)] #[token("true", |_| true)] Bool(bool), #[token("fn")] Fn, // keyword for functions #[token("var")] Var, // variable #[token("let")] Let, // synonymous to var #[token("const")] Const, // constants #[token("live")] Live, // live variables / signals #[token("if")] If, #[token("else")] Else, #[token("match")] Match, #[token("for")] For, #[token("while")] While, #[token("return")] Return, // Range and other multi char operators #[token("..=")] RangeIncl, #[token("..<")] RangeExcl, #[token("==")] Eq, #[token("!=")] Ne, #[token("<=")] Le, #[token(">=")] Ge, #[token("++")] Inc, #[token("--")] Dec, #[token("**")] Pow, #[token("+=")] AddEq, #[token("-=")] SubEq, #[token("*=")] MulEq, #[token("/=")] DivEq, #[token("&&")] And, #[token("||")] Or, #[token("=>")] FatArrow, #[token("->")] Arrow, // Single character operators #[token(".")] Dot, #[token("!")] ExclamationMark, #[token("?")] QuestionMark, #[token("&")] BAnd, #[token("|")] BOr, #[token("<")] Lt, #[token(">")] Gt, #[token("=")] Assign, #[token(":")] Colon, #[token(",")] Comma, #[token("+")] Add, #[token("-")] Sub, #[token("*")] Mul, #[token("/")] Div, #[token("%")] Mod, // Parentheses #[token("(")] ParenOpen, #[token(")")] ParenClose, #[token("{")] BraceOpen, #[token("}")] BraceClose, #[token("[")] BracketOpen, #[token("]")] BracketClose, #[token("_")] Default, #[token(";")] Semicolon, #[regex(r"([a-zA-Z$][a-zA-Z0-9_$]*)|(_[a-zA-Z0-9_$]+)")] Identifier(&'src str), // Identifiers start with letters, _ or $ and can contain numbers // Comments #[regex(r"//[^\n]*")] LineComment(&'src str), #[regex(r"/\*([^*]|\*[^/])*\*/")] BlockComment(&'src str), } #[cfg(test)] mod tests { use super::*; #[test] fn test_keywords() { let mut lex = Token::lexer("let var const fn match"); assert_eq!(lex.next(), Some(Ok(Token::Let))); assert_eq!(lex.next(), Some(Ok(Token::Var))); assert_eq!(lex.next(), Some(Ok(Token::Const))); assert_eq!(lex.next(), Some(Ok(Token::Fn))); assert_eq!(lex.next(), Some(Ok(Token::Match))); } #[test] fn test_operators() { let mut lex = Token::lexer("** * == += + ="); assert_eq!(lex.next(), Some(Ok(Token::Pow))); assert_eq!(lex.next(), Some(Ok(Token::Mul))); assert_eq!(lex.next(), Some(Ok(Token::Eq))); assert_eq!(lex.next(), Some(Ok(Token::AddEq))); assert_eq!(lex.next(), Some(Ok(Token::Add))); assert_eq!(lex.next(), Some(Ok(Token::Assign))); } #[test] fn test_declaration() { let mut lex = Token::lexer("const foo = 42;"); assert_eq!(lex.next(), Some(Ok(Token::Const))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("foo")))); assert_eq!(lex.next(), Some(Ok(Token::Assign))); assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0))))); assert_eq!(lex.next(), Some(Ok(Token::Semicolon))); } #[test] fn test_numbers() { let mut lex = Token::lexer("42 * -0.2 + 4e3 - 0xFF / 0b1010 + 1_000_000;"); assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0))))); assert_eq!(lex.next(), Some(Ok(Token::Mul))); assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(-0.2))))); assert_eq!(lex.next(), Some(Ok(Token::Add))); assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4000.0))))); assert_eq!(lex.next(), Some(Ok(Token::Sub))); assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(255.0))))); assert_eq!(lex.next(), Some(Ok(Token::Div))); assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(10.0))))); assert_eq!(lex.next(), Some(Ok(Token::Add))); assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(1000000.0))))); assert_eq!(lex.next(), Some(Ok(Token::Semicolon))); } #[test] fn test_strings() { let mut lex = Token::lexer("\"Foo\" 'Single' 'Sin\\'Esq\\'gle'"); assert_eq!(lex.next(), Some(Ok(Token::String("Foo".to_owned())))); assert_eq!(lex.next(), Some(Ok(Token::String("Single".to_owned())))); assert_eq!( lex.next(), Some(Ok(Token::String("Sin'Esq'gle".to_owned()))) ); } #[test] fn test_full_syntax_example() { let mut lex = Token::lexer( " fn main(args: string[]) -> ArgumentError!string { if args.length <= 2 { return Err(\"Not enough Arguments\", ArgumentError); } return match args.length { 3 => \"This is actually just one argument\", 4 => \"Two arguments. Good!\", _ => \"You're overdoing it!\" } } ", ); // FIRST LINE assert_eq!(lex.next(), Some(Ok(Token::Fn))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("main")))); assert_eq!(lex.next(), Some(Ok(Token::ParenOpen))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("args")))); assert_eq!(lex.next(), Some(Ok(Token::Colon))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("string")))); assert_eq!(lex.next(), Some(Ok(Token::BracketOpen))); assert_eq!(lex.next(), Some(Ok(Token::BracketClose))); assert_eq!(lex.next(), Some(Ok(Token::ParenClose))); assert_eq!(lex.next(), Some(Ok(Token::Arrow))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("ArgumentError")))); assert_eq!(lex.next(), Some(Ok(Token::ExclamationMark))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("string")))); assert_eq!(lex.next(), Some(Ok(Token::BraceOpen))); // SECOND LINE assert_eq!(lex.next(), Some(Ok(Token::If))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("args")))); assert_eq!(lex.next(), Some(Ok(Token::Dot))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("length")))); assert_eq!(lex.next(), Some(Ok(Token::Le))); assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(2.0))))); assert_eq!(lex.next(), Some(Ok(Token::BraceOpen))); // THIRD LINE assert_eq!(lex.next(), Some(Ok(Token::Return))); assert_eq!(lex.next(), Some(Ok(Token::Err))); assert_eq!(lex.next(), Some(Ok(Token::ParenOpen))); assert_eq!( lex.next(), Some(Ok(Token::String("Not enough Arguments".to_owned()))) ); assert_eq!(lex.next(), Some(Ok(Token::Comma))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("ArgumentError")))); assert_eq!(lex.next(), Some(Ok(Token::ParenClose))); assert_eq!(lex.next(), Some(Ok(Token::Semicolon))); // FOURTH LINE assert_eq!(lex.next(), Some(Ok(Token::BraceClose))); // FIFTH LINE assert_eq!(lex.next(), Some(Ok(Token::Return))); assert_eq!(lex.next(), Some(Ok(Token::Match))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("args")))); assert_eq!(lex.next(), Some(Ok(Token::Dot))); assert_eq!(lex.next(), Some(Ok(Token::Identifier("length")))); assert_eq!(lex.next(), Some(Ok(Token::BraceOpen))); // SIXTH LINE assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(3.0))))); assert_eq!(lex.next(), Some(Ok(Token::FatArrow))); assert_eq!( lex.next(), Some(Ok(Token::String( "This is actually just one argument".to_owned() ))) ); assert_eq!(lex.next(), Some(Ok(Token::Comma))); // SEVENTH LINE assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4.0))))); assert_eq!(lex.next(), Some(Ok(Token::FatArrow))); assert_eq!( lex.next(), Some(Ok(Token::String("Two arguments. Good!".to_owned()))) ); assert_eq!(lex.next(), Some(Ok(Token::Comma))); // EIGHTH LINE assert_eq!(lex.next(), Some(Ok(Token::Default))); assert_eq!(lex.next(), Some(Ok(Token::FatArrow))); assert_eq!( lex.next(), Some(Ok(Token::String("You're overdoing it!".to_owned()))) ); // NINTH AND TENTH LINE assert_eq!(lex.next(), Some(Ok(Token::BraceClose))); assert_eq!(lex.next(), Some(Ok(Token::BraceClose))); } }