Logos based lexer, initial benchmark with Criterion

This commit is contained in:
Norman Köhring 2025-07-13 14:56:27 +02:00
parent ad48980f5c
commit a20abd65ac
13 changed files with 974 additions and 129 deletions

328
Cargo.lock generated
View file

@ -29,6 +29,18 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstyle"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.98" version = "1.0.98"
@ -79,6 +91,12 @@ dependencies = [
"vsimd", "vsimd",
] ]
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]] [[package]]
name = "better_scoped_tls" name = "better_scoped_tls"
version = "1.0.1" version = "1.0.1"
@ -131,6 +149,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]] [[package]]
name = "castaway" name = "castaway"
version = "0.2.3" version = "0.2.3"
@ -169,6 +193,58 @@ dependencies = [
"unicode-segmentation", "unicode-segmentation",
] ]
[[package]]
name = "ciborium"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
[[package]]
name = "ciborium-ll"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
dependencies = [
"ciborium-io",
"half",
]
[[package]]
name = "clap"
version = "4.5.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
version = "4.5.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
dependencies = [
"anstyle",
"clap_lex",
]
[[package]]
name = "clap_lex"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
[[package]] [[package]]
name = "compact_str" name = "compact_str"
version = "0.7.1" version = "0.7.1"
@ -182,6 +258,70 @@ dependencies = [
"static_assertions", "static_assertions",
] ]
[[package]]
name = "criterion"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679"
dependencies = [
"anes",
"cast",
"ciborium",
"clap",
"criterion-plot",
"itertools 0.13.0",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools 0.10.5",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "crunchy"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]] [[package]]
name = "data-encoding" name = "data-encoding"
version = "2.9.0" version = "2.9.0"
@ -221,6 +361,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]] [[package]]
name = "foldhash" name = "foldhash"
version = "0.1.5" version = "0.1.5"
@ -253,6 +399,16 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]]
name = "half"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
dependencies = [
"cfg-if",
"crunchy",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.14.5" version = "0.14.5"
@ -419,6 +575,24 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.15" version = "1.0.15"
@ -459,6 +633,40 @@ version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "logos"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab6f536c1af4c7cc81edf73da1f8029896e7e1e16a219ef09b184e76a296f3db"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "189bbfd0b61330abea797e5e9276408f2edbe4f822d7ad08685d67419aafb34e"
dependencies = [
"beef",
"fnv",
"lazy_static",
"proc-macro2",
"quote",
"regex-syntax 0.8.5",
"rustc_version",
"syn",
]
[[package]]
name = "logos-derive"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebfe8e1a19049ddbfccbd14ac834b215e11b85b90bab0c2dba7c7b92fb5d5cba"
dependencies = [
"logos-codegen",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.7.5" version = "2.7.5"
@ -506,6 +714,12 @@ version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "oorandom"
version = "11.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
[[package]] [[package]]
name = "outref" name = "outref"
version = "0.5.2" version = "0.5.2"
@ -566,6 +780,34 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "plotters"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
[[package]]
name = "plotters-svg"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
dependencies = [
"plotters-backend",
]
[[package]] [[package]]
name = "potential_utf" name = "potential_utf"
version = "0.1.2" version = "0.1.2"
@ -643,6 +885,26 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.11.1" version = "1.11.1"
@ -695,6 +957,15 @@ version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustc_version"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
dependencies = [
"semver",
]
[[package]] [[package]]
name = "rustversion" name = "rustversion"
version = "1.0.21" version = "1.0.21"
@ -713,12 +984,27 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15" checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]] [[package]]
name = "scoped-tls" name = "scoped-tls"
version = "1.0.1" version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
[[package]]
name = "semver"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.219" version = "1.0.219"
@ -782,7 +1068,10 @@ dependencies = [
"anyhow", "anyhow",
"ariadne", "ariadne",
"chumsky", "chumsky",
"criterion",
"lazy_static", "lazy_static",
"logos",
"serde_json",
"swc_common", "swc_common",
"swc_ecma_ast", "swc_ecma_ast",
"swc_ecma_codegen", "swc_ecma_codegen",
@ -1045,6 +1334,16 @@ dependencies = [
"zerovec", "zerovec",
] ]
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]] [[package]]
name = "tracing" name = "tracing"
version = "0.1.41" version = "0.1.41"
@ -1149,6 +1448,16 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.100" version = "0.2.100"
@ -1207,6 +1516,25 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "web-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.59.0" version = "0.59.0"

View file

@ -8,7 +8,24 @@ anyhow = "1.0.98"
ariadne = "0.5.1" ariadne = "0.5.1"
chumsky = "0.10.1" chumsky = "0.10.1"
lazy_static = "1.5.0" lazy_static = "1.5.0"
logos = "0.15.0"
serde_json = "1.0.140"
swc_common = "13.0.2" swc_common = "13.0.2"
swc_ecma_ast = "13.0.0" swc_ecma_ast = "13.0.0"
swc_ecma_codegen = "15.0.1" swc_ecma_codegen = "15.0.1"
thiserror = "2.0.12" thiserror = "2.0.12"
[dev-dependencies]
criterion = "0.6.0"
[lib]
name = "solace"
path = "src/lib.rs"
[[bin]]
name = "solace"
path = "src/main.rs"
[[bench]]
name = "lexer"
harness = false

46
benches/lexer.rs Normal file
View file

@ -0,0 +1,46 @@
use criterion::{Criterion, criterion_group, criterion_main};
use logos::Logos;
use solace::lexer::Token;
use std::hint::black_box;
pub fn criterion_benchmark(c: &mut Criterion) {
let input = "
const NUMBER = 1_000_000;
const HEX_NUMBER = 0x7C1;
const BIN_NUMBER = 0b10111;
const OCT_NUMBER = 0o27;
const INVALID_NUMBER = 0o28;
const MORE_TOKENS = \"More tokens to fill the 100 Tokens!\";
fn easterEgg() -> (output: string) {
/*
* Someone found the easter egg!
* Lets celebrate that with a comment!
*/
output = \"Yeah, you found the easter egg!\";
}
fn main(args: string[]) -> ArgumentError!string {
if args.length <= 2 {
return Err(\"Not enough Arguments\", ArgumentError);
}
return match args.length {
3 => \"This is actually just one argument\",
4 => \"Two arguments. Good!\",
NUMBER => easterEgg(),
_ => \"You're overdoing it... maybe?\"
}
}
";
c.bench_function("Lexer", |b| {
b.iter(|| {
let mut lexer = Token::lexer(black_box(input));
while let Some(_) = lexer.next() {}
})
});
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View file

@ -1,6 +1,3 @@
let x = 2;
let y = 3;
fn mul(x, y) { fn mul(x, y) {
let a = x*2; let a = x*2;
a * y a * y

View file

24
src/emitter/js.rs Normal file
View file

@ -0,0 +1,24 @@
use swc_common::{SourceMap, sync::Lrc};
use swc_ecma_ast::Module;
use swc_ecma_codegen::{Config, Emitter, text_writer::JsWriter};
pub struct JsEmitter;
impl JsEmitter {
pub fn new() -> Self {
Self
}
pub fn emit(&self, ast: Module) -> (String, Lrc<SourceMap>) {
let sm = Lrc::new(SourceMap::default());
let mut buf = vec![];
let mut emitter = Emitter {
cfg: Config::default(),
cm: sm.clone(),
comments: None,
wr: JsWriter::new(sm.clone(), "\n", &mut buf, None),
};
emitter.emit_module(&ast).unwrap();
(String::from_utf8(buf).unwrap(), sm)
}
}

1
src/emitter/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod js;

View file

@ -1,95 +1,336 @@
use chumsky::prelude::*; use logos::Logos;
use std::fmt::{Display, Formatter, Result}; use serde_json;
// A few type definitions to be used by our parsers below fn parse_radix(s: &str, radix: u32) -> Result<f64, String> {
pub type Span = SimpleSpan; let s = s.replace('_', "");
pub type Spanned<T> = (T, Span); let (sign, num) = if s.starts_with('-') {
(-1.0, &s[3..]) // skip "-0x", "-0b" or "-0o"
} else {
(1.0, &s[2..])
};
#[derive(Clone, Debug, PartialEq)] match u64::from_str_radix(num, radix) {
Ok(val) => Ok(sign * val as f64),
Err(_) => Err(format!(
"Failed to parse number \"{}\" with radix {}",
s, radix
)),
}
}
fn parse_number(s: &str) -> Result<f64, String> {
let s = s.replace('_', "");
s.parse::<f64>()
.map_err(|_| format!("Failed to parse number \"{}\"", s))
}
#[derive(Logos, Debug, PartialEq)]
// #[logos(extras = (u32, u32))]
#[logos(skip r"\s+")]
pub enum Token<'src> { pub enum Token<'src> {
None, #[regex(r"-?0[xX][0-9a-fA-F_]+", |lex| parse_radix(lex.slice(), 16))]
#[regex(r"-?0[bB][01_]+", |lex| parse_radix(lex.slice(), 2))]
#[regex(r"-?0[oO][0-7_]+", |lex| parse_radix(lex.slice(), 8))]
#[regex(r"-?(?:0|[1-9][0-9_]*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| parse_number(lex.slice()))]
Number(Result<f64, String>),
#[token("NaN")]
NaN,
#[regex(r#"("[^"\\\x00-\x1F]*(?:\\.[^"\\\x00-\x1F]*)*")|('[^'\\\x00-\x1F]*(?:\\.[^'\\\x00-\x1F]*)*')"#,
|lex| { let slice = lex.slice(); slice[1..slice.len()-1].to_owned() })]
String(String), // "string" or 'string'
#[token("undefined")]
Undefined, // undefined (value not initialized or not existing)
#[token("None")]
None, // none - optional with no value
#[token("Some")]
Some, // Some(value) - optional with value
#[token("Err")]
Err, // Err(Error) - result with error
#[token("Ok")]
Ok, // Ok(Value) - result with value
#[token("false", |_| false)]
#[token("true", |_| true)]
Bool(bool), Bool(bool),
Num(f64),
Str(&'src str), #[token("fn")]
Op(&'src str), Fn, // keyword for functions
Ctrl(char), #[token("var")]
Ident(&'src str), Var, // variable
Fn, #[token("let")]
Var, Let, // synonymous to var
#[token("const")]
Const, // constants
#[token("live")]
Live, // live variables / signals
#[token("if")]
If, If,
#[token("else")]
Else, Else,
#[token("match")]
Match,
#[token("for")]
For,
#[token("while")]
While,
#[token("return")]
Return,
// Range and other multi char operators
#[token("..=")]
RangeIncl,
#[token("..<")]
RangeExcl,
#[token("==")]
Eq,
#[token("!=")]
Ne,
#[token("<=")]
Le,
#[token(">=")]
Ge,
#[token("++")]
Inc,
#[token("--")]
Dec,
#[token("**")]
Pow,
#[token("+=")]
AddEq,
#[token("-=")]
SubEq,
#[token("*=")]
MulEq,
#[token("/=")]
DivEq,
#[token("&&")]
And,
#[token("||")]
Or,
#[token("=>")]
FatArrow,
#[token("->")]
Arrow,
// Single character operators
#[token(".")]
Dot,
#[token("!")]
ExclamationMark,
#[token("?")]
QuestionMark,
#[token("&")]
BAnd,
#[token("|")]
BOr,
#[token("<")]
Lt,
#[token(">")]
Gt,
#[token("=")]
Assign,
#[token(":")]
Colon,
#[token(",")]
Comma,
#[token("+")]
Add,
#[token("-")]
Sub,
#[token("*")]
Mul,
#[token("/")]
Div,
#[token("%")]
Mod,
// Parentheses
#[token("(")]
ParenOpen,
#[token(")")]
ParenClose,
#[token("{")]
BraceOpen,
#[token("}")]
BraceClose,
#[token("[")]
BracketOpen,
#[token("]")]
BracketClose,
#[token("_")]
Default,
#[token(";")]
Semicolon,
#[regex(r"([a-zA-Z$][a-zA-Z0-9_$]*)|(_[a-zA-Z0-9_$]+)")]
Identifier(&'src str), // Identifiers start with letters, _ or $ and can contain numbers
// Comments
#[regex(r"//[^\n]*")]
LineComment(&'src str),
#[regex(r"/\*([^*]|\*[^/])*\*/")]
BlockComment(&'src str),
} }
impl Display for Token<'_> { #[cfg(test)]
fn fmt(&self, f: &mut Formatter) -> Result { mod tests {
match self { use super::*;
Token::None => write!(f, "none"),
Token::Bool(x) => write!(f, "{x}"), #[test]
Token::Num(n) => write!(f, "{n}"), fn test_keywords() {
Token::Str(s) => write!(f, "{s}"), let mut lex = Token::lexer("let var const fn match");
Token::Op(s) => write!(f, "{s}"), assert_eq!(lex.next(), Some(Ok(Token::Let)));
Token::Ctrl(c) => write!(f, "{c}"), assert_eq!(lex.next(), Some(Ok(Token::Var)));
Token::Ident(s) => write!(f, "{s}"), assert_eq!(lex.next(), Some(Ok(Token::Const)));
Token::Fn => write!(f, "fn"), assert_eq!(lex.next(), Some(Ok(Token::Fn)));
Token::Var => write!(f, "var"), assert_eq!(lex.next(), Some(Ok(Token::Match)));
Token::If => write!(f, "if"),
Token::Else => write!(f, "else"),
}
}
} }
pub fn lexer<'src>() #[test]
-> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> { fn test_operators() {
// A parser for numbers let mut lex = Token::lexer("** * == += + =");
let num = text::int(10) assert_eq!(lex.next(), Some(Ok(Token::Pow)));
.then(just('.').then(text::digits(10)).or_not()) assert_eq!(lex.next(), Some(Ok(Token::Mul)));
.to_slice() assert_eq!(lex.next(), Some(Ok(Token::Eq)));
.from_str() assert_eq!(lex.next(), Some(Ok(Token::AddEq)));
.unwrapped() assert_eq!(lex.next(), Some(Ok(Token::Add)));
.map(Token::Num); assert_eq!(lex.next(), Some(Ok(Token::Assign)));
}
// A parser for strings
let str_ = just('"') #[test]
.ignore_then(none_of('"').repeated().to_slice()) fn test_declaration() {
.then_ignore(just('"')) let mut lex = Token::lexer("const foo = 42;");
.map(Token::Str); assert_eq!(lex.next(), Some(Ok(Token::Const)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("foo"))));
// A parser for operators assert_eq!(lex.next(), Some(Ok(Token::Assign)));
let op = one_of("+*-/!=") assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0)))));
.repeated() assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
.at_least(1) }
.to_slice() #[test]
.map(Token::Op); fn test_numbers() {
let mut lex = Token::lexer("42 * -0.2 + 4e3 - 0xFF / 0b1010 + 1_000_000;");
// A parser for control characters (delimiters, semicolons, etc.) assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(42.0)))));
let ctrl = one_of("()[]{};,").map(Token::Ctrl); assert_eq!(lex.next(), Some(Ok(Token::Mul)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(-0.2)))));
// A parser for identifiers and keywords assert_eq!(lex.next(), Some(Ok(Token::Add)));
let ident = text::ascii::ident().map(|ident: &str| match ident { assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4000.0)))));
"fn" => Token::Fn, assert_eq!(lex.next(), Some(Ok(Token::Sub)));
"var" => Token::Var, assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(255.0)))));
"let" => Token::Var, // var and let are synonyms assert_eq!(lex.next(), Some(Ok(Token::Div)));
"if" => Token::If, assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(10.0)))));
"else" => Token::Else, assert_eq!(lex.next(), Some(Ok(Token::Add)));
"true" => Token::Bool(true), assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(1000000.0)))));
"false" => Token::Bool(false), assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
"none" => Token::None, }
_ => Token::Ident(ident),
}); #[test]
fn test_strings() {
// A single token can be one of the above let mut lex = Token::lexer("\"Foo\" 'Single' 'Sin\\'Esq\\'gle'");
let token = num.or(str_).or(op).or(ctrl).or(ident); assert_eq!(lex.next(), Some(Ok(Token::String("Foo".to_owned()))));
assert_eq!(lex.next(), Some(Ok(Token::String("Single".to_owned()))));
let comment = just("//") assert_eq!(
.then(any().and_is(just('\n').not()).repeated()) lex.next(),
.padded(); Some(Ok(Token::String("Sin'Esq'gle".to_owned())))
);
token }
.map_with(|tok, e| (tok, e.span()))
.padded_by(comment.repeated()) #[test]
.padded() fn test_full_syntax_example() {
// If we encounter an error, skip and attempt to lex the next character as a token instead let mut lex = Token::lexer(
.recover_with(skip_then_retry_until(any().ignored(), end())) "
.repeated() fn main(args: string[]) -> ArgumentError!string {
.collect() if args.length <= 2 {
return Err(\"Not enough Arguments\", ArgumentError);
}
return match args.length {
3 => \"This is actually just one argument\",
4 => \"Two arguments. Good!\",
_ => \"You're overdoing it!\"
}
}
",
);
// FIRST LINE
assert_eq!(lex.next(), Some(Ok(Token::Fn)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("main"))));
assert_eq!(lex.next(), Some(Ok(Token::ParenOpen)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
assert_eq!(lex.next(), Some(Ok(Token::Colon)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("string"))));
assert_eq!(lex.next(), Some(Ok(Token::BracketOpen)));
assert_eq!(lex.next(), Some(Ok(Token::BracketClose)));
assert_eq!(lex.next(), Some(Ok(Token::ParenClose)));
assert_eq!(lex.next(), Some(Ok(Token::Arrow)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("ArgumentError"))));
assert_eq!(lex.next(), Some(Ok(Token::ExclamationMark)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("string"))));
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
// SECOND LINE
assert_eq!(lex.next(), Some(Ok(Token::If)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
assert_eq!(lex.next(), Some(Ok(Token::Dot)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("length"))));
assert_eq!(lex.next(), Some(Ok(Token::Le)));
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(2.0)))));
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
// THIRD LINE
assert_eq!(lex.next(), Some(Ok(Token::Return)));
assert_eq!(lex.next(), Some(Ok(Token::Err)));
assert_eq!(lex.next(), Some(Ok(Token::ParenOpen)));
assert_eq!(
lex.next(),
Some(Ok(Token::String("Not enough Arguments".to_owned())))
);
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("ArgumentError"))));
assert_eq!(lex.next(), Some(Ok(Token::ParenClose)));
assert_eq!(lex.next(), Some(Ok(Token::Semicolon)));
// FOURTH LINE
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
// FIFTH LINE
assert_eq!(lex.next(), Some(Ok(Token::Return)));
assert_eq!(lex.next(), Some(Ok(Token::Match)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("args"))));
assert_eq!(lex.next(), Some(Ok(Token::Dot)));
assert_eq!(lex.next(), Some(Ok(Token::Identifier("length"))));
assert_eq!(lex.next(), Some(Ok(Token::BraceOpen)));
// SIXTH LINE
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(3.0)))));
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
assert_eq!(
lex.next(),
Some(Ok(Token::String(
"This is actually just one argument".to_owned()
)))
);
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
// SEVENTH LINE
assert_eq!(lex.next(), Some(Ok(Token::Number(Ok(4.0)))));
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
assert_eq!(
lex.next(),
Some(Ok(Token::String("Two arguments. Good!".to_owned())))
);
assert_eq!(lex.next(), Some(Ok(Token::Comma)));
// EIGHTH LINE
assert_eq!(lex.next(), Some(Ok(Token::Default)));
assert_eq!(lex.next(), Some(Ok(Token::FatArrow)));
assert_eq!(
lex.next(),
Some(Ok(Token::String("You're overdoing it!".to_owned())))
);
// NINTH AND TENTH LINE
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
assert_eq!(lex.next(), Some(Ok(Token::BraceClose)));
}
} }

95
src/lexer_.rs Normal file
View file

@ -0,0 +1,95 @@
use chumsky::prelude::*;
use std::fmt::{Display, Formatter, Result};
// A few type definitions to be used by our parsers below
pub type Span = SimpleSpan;
pub type Spanned<T> = (T, Span);
#[derive(Clone, Debug, PartialEq)]
pub enum Token<'src> {
None,
Bool(bool),
Num(f64),
Str(&'src str),
Op(&'src str),
Ctrl(char),
Ident(&'src str),
Fn,
Var,
If,
Else,
}
impl Display for Token<'_> {
fn fmt(&self, f: &mut Formatter) -> Result {
match self {
Token::None => write!(f, "none"),
Token::Bool(x) => write!(f, "{x}"),
Token::Num(n) => write!(f, "{n}"),
Token::Str(s) => write!(f, "{s}"),
Token::Op(s) => write!(f, "{s}"),
Token::Ctrl(c) => write!(f, "{c}"),
Token::Ident(s) => write!(f, "{s}"),
Token::Fn => write!(f, "fn"),
Token::Var => write!(f, "var"),
Token::If => write!(f, "if"),
Token::Else => write!(f, "else"),
}
}
}
pub fn lexer<'src>()
-> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
// A parser for numbers
let num = text::int(10)
.then(just('.').then(text::digits(10)).or_not())
.to_slice()
.from_str()
.unwrapped()
.map(Token::Num);
// A parser for strings
let str_ = just('"')
.ignore_then(none_of('"').repeated().to_slice())
.then_ignore(just('"'))
.map(Token::Str);
// A parser for operators
let op = one_of("+*-/!=")
.repeated()
.at_least(1)
.to_slice()
.map(Token::Op);
// A parser for control characters (delimiters, semicolons, etc.)
let ctrl = one_of("()[]{};,").map(Token::Ctrl);
// A parser for identifiers and keywords
let ident = text::ascii::ident().map(|ident: &str| match ident {
"fn" => Token::Fn,
"var" => Token::Var,
"let" => Token::Var, // var and let are synonyms
"if" => Token::If,
"else" => Token::Else,
"true" => Token::Bool(true),
"false" => Token::Bool(false),
"none" => Token::None,
_ => Token::Ident(ident),
});
// A single token can be one of the above
let token = num.or(str_).or(op).or(ctrl).or(ident);
let comment = just("//")
.then(any().and_is(just('\n').not()).repeated())
.padded();
token
.map_with(|tok, e| (tok, e.span()))
.padded_by(comment.repeated())
.padded()
// If we encounter an error, skip and attempt to lex the next character as a token instead
.recover_with(skip_then_retry_until(any().ignored(), end()))
.repeated()
.collect()
}

1
src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod lexer;

View file

@ -1,7 +1,7 @@
mod emitter;
mod lexer; mod lexer;
mod parser; // mod emitter;
mod transformer; // mod parser;
// mod transformer;
use anyhow::Result; use anyhow::Result;
use std::{env, fs}; use std::{env, fs};
@ -23,20 +23,20 @@ fn main() -> Result<()> {
let file_path = &args[1]; let file_path = &args[1];
let input = fs::read_to_string(file_path).expect(&format!("Cannot read file '{}'!", file_path)); let input = fs::read_to_string(file_path).expect(&format!("Cannot read file '{}'!", file_path));
/*
// Parse Solace Code // Parse Solace Code
if let Some((ast, span)) = parser::parse(file_path.to_string(), &input) { if let Some((ast, span)) = parser::parse(file_path.to_string(), &input, false) {
// Transform from Solace AST to SWC AST // Transform from Solace AST to SWC AST
let js_transformer = transformer::js::JsTransformer::new(); let js_transformer = transformer::js::JsTransformer::new();
let js_ast = js_transformer.transform(ast); let js_ast = js_transformer.transform(ast, span);
}
/*
// Emit JavaScript // Emit JavaScript
let js_emitter = emitter::js::JsEmitter::new(); let js_emitter = emitter::js::JsEmitter::new();
let js_code = js_emitter.emit(js_ast)?; let (js_code, _source_map) = js_emitter.emit(js_ast);
// Write Output to stdout // Write Output to stdout
println!("{}", js_code); println!("{}", js_code);
}
*/ */
Ok(()) Ok(())

View file

@ -44,27 +44,36 @@ pub enum BinaryOp {
NotEq, NotEq,
} }
// An expression node in the AST. Children are spanned so we can generate useful runtime errors.
#[derive(Debug)] #[derive(Debug)]
pub enum Expr<'src> { pub enum Expr<'src> {
Error, Error,
Value(Value<'src>), Value(Value<'src>),
List(Vec<Spanned<Self>>), List(Vec<Spanned<Self>>),
Local(&'src str), Local(&'src str),
Var(&'src str, Box<Spanned<Self>>, Box<Spanned<Self>>), If(Box<Spanned<Self>>, Box<Spanned<Self>>, Box<Spanned<Self>>),
Then(Box<Spanned<Self>>, Box<Spanned<Self>>), Then(Box<Spanned<Self>>, Box<Spanned<Self>>),
Binary(Box<Spanned<Self>>, BinaryOp, Box<Spanned<Self>>), Binary(Box<Spanned<Self>>, BinaryOp, Box<Spanned<Self>>),
Call(Box<Spanned<Self>>, Spanned<Vec<Spanned<Self>>>), Call(Box<Spanned<Self>>, Spanned<Vec<Spanned<Self>>>),
If(Box<Spanned<Self>>, Box<Spanned<Self>>, Box<Spanned<Self>>), }
Print(Box<Spanned<Self>>),
#[derive(Debug)]
pub struct Block<'src> {
stmts: Vec<Spanned<Stmt<'src>>>,
expr: Option<Box<Spanned<Expr<'src>>>>,
}
#[derive(Debug)]
pub enum Stmt<'src> {
Var(&'src str, Box<Spanned<Self>>, Box<Spanned<Self>>),
Expr(Box<Spanned<Expr<'src>>>),
} }
// A function node in the AST. // A function node in the AST.
#[derive(Debug)] #[derive(Debug)]
pub struct Func<'src> { pub struct Func<'src> {
args: Vec<&'src str>, pub args: Vec<&'src str>,
span: Span, pub span: Span,
body: Spanned<Expr<'src>>, pub body: Spanned<Expr<'src>>,
} }
fn expr_parser<'tokens, 'src: 'tokens, I>() fn expr_parser<'tokens, 'src: 'tokens, I>()
@ -98,7 +107,7 @@ where
.then(inline_expr) .then(inline_expr)
.then_ignore(just(Token::Ctrl(';'))) .then_ignore(just(Token::Ctrl(';')))
.then(expr.clone()) .then(expr.clone())
.map(|((name, val), body)| Expr::Var(name, Box::new(val), Box::new(body))); .map(|((name, val), body)| Stmt::Var(name, Box::new(val), Box::new(body)));
let list = items let list = items
.clone() .clone()
@ -329,6 +338,7 @@ where
pub fn parse<'src>( pub fn parse<'src>(
filename: String, filename: String,
src: &'src str, src: &'src str,
debug: bool,
) -> Option<(HashMap<&'src str, Func<'src>>, SimpleSpan)> { ) -> Option<(HashMap<&'src str, Func<'src>>, SimpleSpan)> {
let (tokens, lex_errs) = lexer().parse(src).into_output_errors(); let (tokens, lex_errs) = lexer().parse(src).into_output_errors();
@ -342,12 +352,14 @@ pub fn parse<'src>(
) )
.into_output_errors(); .into_output_errors();
if debug {
if let Some((funcs, _file_span)) = ast if let Some((funcs, _file_span)) = ast
.as_ref() .as_ref()
.filter(|_| lex_errs.len() + parse_errs.len() == 0) .filter(|_| lex_errs.len() + parse_errs.len() == 0)
{ {
println!("{funcs:#?}") println!("{funcs:#?}")
} }
}
(ast, parse_errs) (ast, parse_errs)
} else { } else {

View file

@ -1,25 +1,108 @@
use crate::parser::Func; use crate::parser;
use chumsky;
use std::collections::HashMap; use std::collections::HashMap;
use swc_common::DUMMY_SP; use swc_common::{BytePos, DUMMY_SP, Span, SyntaxContext};
use swc_ecma_ast as js_ast; use swc_ecma_ast::{
AssignExpr, BinaryOp, BindingIdent, BlockStmt, Decl, Expr, FnDecl, Function, Ident, Module,
ModuleItem, Param, Pat, Stmt,
};
pub trait ToSWC<T> {
fn to_swc(&self) -> T;
}
impl ToSWC<Span> for chumsky::span::SimpleSpan {
fn to_swc(&self) -> Span {
Span::new(BytePos(self.start as u32), BytePos(self.end as u32))
}
}
impl ToSWC<BinaryOp> for parser::BinaryOp {
fn to_swc(&self) -> BinaryOp {
match self {
parser::BinaryOp::Add => BinaryOp::Add,
parser::BinaryOp::Sub => BinaryOp::Sub,
parser::BinaryOp::Mul => BinaryOp::Mul,
parser::BinaryOp::Div => BinaryOp::Div,
parser::BinaryOp::Eq => BinaryOp::EqEqEq,
parser::BinaryOp::NotEq => BinaryOp::NotEqEq,
// TODO: implement all members of BinaryOp
}
}
}
impl ToSWC<Function> for parser::Func<'_> {
fn to_swc(&self) -> Function {
Function {
params: self
.args
.iter()
.map(|name| Param {
span: DUMMY_SP,
decorators: vec![],
pat: Pat::Ident(BindingIdent {
id: name.to_string().into(),
type_ann: None,
}),
})
.collect(),
decorators: vec![],
span: self.span.to_swc(),
body: Some(BlockStmt {
span: DUMMY_SP,
ctxt: SyntaxContext::empty(),
stmts: vec![], //TODO!
}),
is_generator: false,
is_async: false,
type_params: None,
return_type: None,
ctxt: SyntaxContext::empty(),
}
}
}
impl ToSWC<Expr> for parser::Expr<'_> {
fn to_swc(&self) -> Expr {
match self {
parser::Expr::Var(name, val, body) => Expr::Assign(AssignExpr {
span: Span::new(BytePos(self.1.1.start as u32), BytePos(self.2.1.end as u32)),
}),
}
}
}
pub struct JsTransformer; pub struct JsTransformer;
impl<'src> JsTransformer { impl<'src> JsTransformer {
pub fn new() -> Self { pub fn new() -> Self {
Self Self {}
} }
pub fn transform(&self, solace_ast: HashMap<&'src str, Func<'_>>) -> js_ast::Module { pub fn transform(
js_ast::Module { &self,
span: DUMMY_SP, source_ast: HashMap<&'src str, parser::Func<'src>>,
body: solace_ast span: chumsky::span::SimpleSpan,
) -> Module {
Module {
span: span.to_swc(),
body: source_ast
.into_iter() .into_iter()
.map(|(name, func)| self.transform_func(name, func)) .map(|(name, func)| self.transform_func_stmt(name, func))
.collect(), .collect(),
shebang: None, shebang: None,
} }
} }
pub fn transform_func(&self, name: &str, func: Func<'_>) -> js_ast::Function { pub fn transform_func_stmt(&self, ident: &str, func: parser::Func<'_>) -> ModuleItem {
todo!("Implement me") ModuleItem::Stmt(Stmt::Decl(Decl::Fn(FnDecl {
ident: Ident::new(ident.into(), DUMMY_SP, SyntaxContext::empty()),
declare: false,
function: Box::new(func.to_swc()),
})))
} }
// pub fn transform_expr(&self, expr: parser::Expr) -> Stmt {
// match expr {
// Expr::Var => Stmt::Decl(Decl::Var(())),
// }
// }
} }