336 lines
10 KiB
Rust
336 lines
10 KiB
Rust
use logos::Logos;
|
|
use serde_json;
|
|
|
|
fn parse_radix(s: &str, radix: u32) -> Result<f64, String> {
|
|
let s = s.replace('_', "");
|
|
let (sign, num) = if s.starts_with('-') {
|
|
(-1.0, &s[3..]) // skip "-0x", "-0b" or "-0o"
|
|
} else {
|
|
(1.0, &s[2..])
|
|
};
|
|
|
|
match u64::from_str_radix(num, radix) {
|
|
Ok(val) => Ok(sign * val as f64),
|
|
Err(_) => Err(format!(
|
|
"Failed to parse number \"{}\" with radix {}",
|
|
s, radix
|
|
)),
|
|
}
|
|
}
|
|
|
|
fn parse_number(s: &str) -> Result<f64, String> {
|
|
let s = s.replace('_', "");
|
|
s.parse::<f64>()
|
|
.map_err(|_| format!("Failed to parse number \"{}\"", s))
|
|
}
|
|
|
|
#[derive(Logos, Debug, PartialEq)]
|
|
// #[logos(extras = (u32, u32))]
|
|
#[logos(skip r"\s+")]
|
|
pub enum Token<'src> {
|
|
#[regex(r"-?0[xX][0-9a-fA-F_]+", |lex| parse_radix(lex.slice(), 16))]
|
|
#[regex(r"-?0[bB][01_]+", |lex| parse_radix(lex.slice(), 2))]
|
|
#[regex(r"-?0[oO][0-7_]+", |lex| parse_radix(lex.slice(), 8))]
|
|
#[regex(r"-?(?:0|[1-9][0-9_]*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| parse_number(lex.slice()))]
|
|
Number(Result<f64, String>),
|
|
|
|
#[token("NaN")]
|
|
NaN,
|
|
|
|
#[regex(r#"("[^"\\\x00-\x1F]*(?:\\.[^"\\\x00-\x1F]*)*")|('[^'\\\x00-\x1F]*(?:\\.[^'\\\x00-\x1F]*)*')"#,
|
|
|lex| { let slice = lex.slice(); slice[1..slice.len()-1].to_owned() })]
|
|
String(String), // "string" or 'string'
|
|
|
|
#[token("undefined")]
|
|
Undefined, // undefined (value not initialized or not existing)
|
|
#[token("None")]
|
|
None, // none - optional with no value
|
|
#[token("Some")]
|
|
Some, // Some(value) - optional with value
|
|
#[token("Err")]
|
|
Err, // Err(Error) - result with error
|
|
#[token("Ok")]
|
|
Ok, // Ok(Value) - result with value
|
|
#[token("false", |_| false)]
|
|
#[token("true", |_| true)]
|
|
Bool(bool),
|
|
|
|
#[token("fn")]
|
|
Fn, // keyword for functions
|
|
#[token("var")]
|
|
Var, // variable
|
|
#[token("let")]
|
|
Let, // synonymous to var
|
|
#[token("const")]
|
|
Const, // constants
|
|
#[token("live")]
|
|
Live, // live variables / signals
|
|
#[token("if")]
|
|
If,
|
|
#[token("else")]
|
|
Else,
|
|
#[token("match")]
|
|
Match,
|
|
#[token("for")]
|
|
For,
|
|
#[token("while")]
|
|
While,
|
|
#[token("return")]
|
|
Return,
|
|
|
|
// Range and other multi char operators
|
|
#[token("..=")]
|
|
RangeIncl,
|
|
#[token("..<")]
|
|
RangeExcl,
|
|
#[token("==")]
|
|
Eq,
|
|
#[token("!=")]
|
|
Ne,
|
|
#[token("<=")]
|
|
Le,
|
|
#[token(">=")]
|
|
Ge,
|
|
#[token("++")]
|
|
Inc,
|
|
#[token("--")]
|
|
Dec,
|
|
#[token("**")]
|
|
Pow,
|
|
#[token("+=")]
|
|
AddEq,
|
|
#[token("-=")]
|
|
SubEq,
|
|
#[token("*=")]
|
|
MulEq,
|
|
#[token("/=")]
|
|
DivEq,
|
|
#[token("&&")]
|
|
And,
|
|
#[token("||")]
|
|
Or,
|
|
#[token("=>")]
|
|
FatArrow,
|
|
#[token("->")]
|
|
Arrow,
|
|
|
|
// Single character operators
|
|
#[token(".")]
|
|
Dot,
|
|
#[token("!")]
|
|
ExclamationMark,
|
|
#[token("?")]
|
|
QuestionMark,
|
|
#[token("&")]
|
|
BAnd,
|
|
#[token("|")]
|
|
BOr,
|
|
#[token("<")]
|
|
Lt,
|
|
#[token(">")]
|
|
Gt,
|
|
#[token("=")]
|
|
Assign,
|
|
#[token(":")]
|
|
Colon,
|
|
#[token(",")]
|
|
Comma,
|
|
#[token("+")]
|
|
Add,
|
|
#[token("-")]
|
|
Sub,
|
|
#[token("*")]
|
|
Mul,
|
|
#[token("/")]
|
|
Div,
|
|
#[token("%")]
|
|
Mod,
|
|
// Parentheses
|
|
#[token("(")]
|
|
ParenOpen,
|
|
#[token(")")]
|
|
ParenClose,
|
|
#[token("{")]
|
|
BraceOpen,
|
|
#[token("}")]
|
|
BraceClose,
|
|
#[token("[")]
|
|
BracketOpen,
|
|
#[token("]")]
|
|
BracketClose,
|
|
|
|
#[token("_")]
|
|
Default,
|
|
#[token(";")]
|
|
Semicolon,
|
|
|
|
#[regex(r"([a-zA-Z$][a-zA-Z0-9_$]*)|(_[a-zA-Z0-9_$]+)")]
|
|
Identifier(&'src str), // Identifiers start with letters, _ or $ and can contain numbers
|
|
|
|
// Comments
|
|
#[regex(r"//[^\n]*")]
|
|
LineComment(&'src str),
|
|
#[regex(r"/\*([^*]|\*[^/])*\*/")]
|
|
BlockComment(&'src str),
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_keywords() {
|
|
let mut lex = Token::lexer("let var const fn match");
|
|
assert_eq!(lex.next(), Some(Ok(Token::Let)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Var)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Const)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Fn)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Match)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_operators() {
|
|
let mut lex = Token::lexer("** * == += + =");
|
|
assert_eq!(lex.next(), Some(Ok(Token::Pow)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Mul)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Eq)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::AddEq)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Add)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Assign)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_declaration() {
|
|
let mut lex = Token::lexer("const foo = 42;");
|
|
assert_eq!(lex.next(), Some(Ok(Token::Const)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("foo"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Assign)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
|
|
}
|
|
#[test]
|
|
fn test_numbers() {
|
|
let mut lex = Token::lexer("42 * -0.2 + 4e3 - 0xFF / 0b1010 + 1_000_000;");
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Mul)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(-0.2)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Add)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4000.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Sub)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(255.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Div)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(10.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Add)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(1000000.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_strings() {
|
|
let mut lex = Token::lexer("\"Foo\" 'Single' 'Sin\\'Esq\\'gle'");
|
|
assert_eq!(lex.next(), Some(Ok(Token::String("Foo".to_owned()))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::String("Single".to_owned()))));
|
|
assert_eq!(
|
|
lex.next(),
|
|
Some(Ok(Token::String("Sin'Esq'gle".to_owned())))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_full_syntax_example() {
|
|
let mut lex = Token::lexer(
|
|
"
|
|
fn main(args: string[]) -> ArgumentError!string {
|
|
if args.length <= 2 {
|
|
return Err(\"Not enough Arguments\", ArgumentError);
|
|
}
|
|
return match args.length {
|
|
3 => \"This is actually just one argument\",
|
|
4 => \"Two arguments. Good!\",
|
|
_ => \"You're overdoing it!\"
|
|
}
|
|
}
|
|
",
|
|
);
|
|
// FIRST LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::Fn)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("main"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::ParenOpen)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Colon)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("string"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::BracketOpen)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::BracketClose)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::ParenClose)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Arrow)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("ArgumentError"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::ExclamationMark)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("string"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
|
|
|
|
// SECOND LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::If)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Dot)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("length"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Le)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(2.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
|
|
|
|
// THIRD LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::Return)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Err)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::ParenOpen)));
|
|
assert_eq!(
|
|
lex.next(),
|
|
Some(Ok(Token::String("Not enough Arguments".to_owned())))
|
|
);
|
|
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("ArgumentError"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::ParenClose)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
|
|
|
|
// FOURTH LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
|
|
|
|
// FIFTH LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::Return)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Match)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Dot)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::Identifier("length"))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
|
|
|
|
// SIXTH LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(3.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
|
|
assert_eq!(
|
|
lex.next(),
|
|
Some(Ok(Token::String(
|
|
"This is actually just one argument".to_owned()
|
|
)))
|
|
);
|
|
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
|
|
|
|
// SEVENTH LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4.0)))));
|
|
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
|
|
assert_eq!(
|
|
lex.next(),
|
|
Some(Ok(Token::String("Two arguments. Good!".to_owned())))
|
|
);
|
|
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
|
|
|
|
// EIGHTH LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::Default)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
|
|
assert_eq!(
|
|
lex.next(),
|
|
Some(Ok(Token::String("You're overdoing it!".to_owned())))
|
|
);
|
|
|
|
// NINTH AND TENTH LINE
|
|
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
|
|
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
|
|
}
|
|
}
|