From 631687edd5a5dd6a4ce0c3facedaad64a2a2b3a2 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Tue, 4 Jun 2024 13:16:47 +0100 Subject: [PATCH] Add grmtools app. grmtools (or, more specifically from this commit's perspective lrpar) is a Yacc-compatible parser written for Rust. Although it hasn't really been optimised, I thought this benchmark is sufficiently interesting that users might like to know where grmtools/lrpar fits in. I have deliberately written the lexer and parser in not only "normal/proper Lex/Yacc style" but also "full grmtools style", including good support for error recovery, because I think that's the only mode in which grmtools makes sense. This might mean that this lexer/grammar is doing a bit more work than other parsers, but since I don't expect grmtools to be especially fast anyway, I don't suppose another few percent slowdown will hurt! --- Cargo.lock | 265 ++++++++++++++++++++++++++++++ examples/grmtools-app/Cargo.toml | 18 ++ examples/grmtools-app/app.rs | 29 ++++ examples/grmtools-app/build.rs | 26 +++ examples/grmtools-app/json.l | 14 ++ examples/grmtools-app/json.y | 66 ++++++++ examples/grmtools-app/json_val.rs | 11 ++ 7 files changed, 429 insertions(+) create mode 100644 examples/grmtools-app/Cargo.toml create mode 100644 examples/grmtools-app/app.rs create mode 100644 examples/grmtools-app/build.rs create mode 100644 examples/grmtools-app/json.l create mode 100644 examples/grmtools-app/json.y create mode 100644 examples/grmtools-app/json_val.rs diff --git a/Cargo.lock b/Cargo.lock index 0b08beb..287d818 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,6 +29,12 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + [[package]] name = "ariadne" version = "0.4.1" @@ -66,6 +72,15 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -93,6 +108,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "cactus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564" + [[package]] name = "cc" version = "1.0.79" @@ -105,6 +126,20 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfgrammar" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec07af28018dd8b4b52e49eb6e57268b19dda0996d4824889eb07ee0ef67378c" +dependencies = [ + "indexmap", + "lazy_static", + "num-traits", + "regex", + "serde", + "vob", +] + [[package]] name = "chumsky" version = "0.9.3" @@ -149,6 +184,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -191,6 +235,18 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "windows-sys", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -203,6 +259,15 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.8" @@ -214,6 +279,15 @@ dependencies = [ "wasi", ] +[[package]] +name = "grmtools-app" +version = "0.1.0" +dependencies = [ + "cfgrammar", + "lrlex", + "lrpar", +] + [[package]] name = "hashbrown" version = "0.14.3" @@ -357,6 +431,60 @@ dependencies = [ "logos-codegen", ] +[[package]] +name = "lrlex" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c65e01ebaccc77218ed6fa4f0053daa2124bce4e25a5e83aae0f7ccfc9cbfccb" +dependencies = [ + "cfgrammar", + "getopts", + "lazy_static", + "lrpar", + "num-traits", + "quote", + "regex", + "regex-syntax 0.8.2", + "serde", + "vergen", +] + +[[package]] +name = "lrpar" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a4b858180a332aec09d10479a070802b13081077eb94010744bc4e3a11d9768" +dependencies = [ + "bincode", + "cactus", + "cfgrammar", + "filetime", + "indexmap", + "lazy_static", + "lrtable", + "num-traits", + "packedvec", + "regex", + "serde", + "static_assertions", + "vergen", + "vob", +] + +[[package]] +name = "lrtable" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fcefc5628209d1b1f4b2cd0bcefd0e50be80bdf178e886cb07317f5ce4f2856" +dependencies = [ + "cfgrammar", + "fnv", + "num-traits", + "serde", + "sparsevec", + "vob", +] + [[package]] name = "memchr" version = "2.7.1" @@ -396,12 +524,46 @@ dependencies = [ name = "null-app" version = "0.1.0" +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + [[package]] name = "once_cell" version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +[[package]] +name = "packedvec" +version = "1.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde3c690ec20e4a2b4fb46f0289a451181eb50011a1e2acc8d85e2fde9062a45" +dependencies = [ + "num-traits", + "serde", +] + [[package]] name = "parking_lot" version = "0.12.1" @@ -502,6 +664,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "precomputed-hash" version = "0.1.1" @@ -553,6 +721,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -610,6 +787,15 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + [[package]] name = "rustversion" version = "1.0.14" @@ -637,6 +823,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + [[package]] name = "serde" version = "1.0.196" @@ -687,6 +879,18 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +[[package]] +name = "sparsevec" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35df5d2e580b29f3f7ec5b4ed49b0ab3acf7f3624122b3e823cafb9630f293b8" +dependencies = [ + "num-traits", + "packedvec", + "serde", + "vob", +] + [[package]] name = "stacker" version = "0.1.15" @@ -700,6 +904,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "string_cache" version = "0.8.7" @@ -755,6 +965,39 @@ dependencies = [ "syn", ] +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -797,12 +1040,34 @@ dependencies = [ "void", ] +[[package]] +name = "vergen" +version = "8.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e27d6bdd219887a9eadd19e1c34f32e47fa332301184935c6d9bca26f3cca525" +dependencies = [ + "anyhow", + "rustversion", + "time", +] + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vob" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c058f4c41e71a043c67744cb76dcc1ae63ece328c1732a72489ccccc2dec23e6" +dependencies = [ + "num-traits", + "rustc_version", + "serde", +] + [[package]] name = "void" version = "1.0.2" diff --git a/examples/grmtools-app/Cargo.toml b/examples/grmtools-app/Cargo.toml new file mode 100644 index 0000000..703fe82 --- /dev/null +++ b/examples/grmtools-app/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "grmtools-app" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "grmtools-app" +path = "app.rs" + +[build-dependencies] +cfgrammar = "0.13" +lrlex = "0.13" +lrpar = "0.13" + +[dependencies] +cfgrammar = "0.13" +lrlex = "0.13" +lrpar = "0.13" diff --git a/examples/grmtools-app/app.rs b/examples/grmtools-app/app.rs new file mode 100644 index 0000000..3ca46e0 --- /dev/null +++ b/examples/grmtools-app/app.rs @@ -0,0 +1,29 @@ +use lrlex::lrlex_mod; +use lrpar::lrpar_mod; +use std::{env, fs}; + +lrlex_mod!("json.l"); +lrpar_mod!("json.y"); + +mod json_val; + +fn main() { + let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument")) + .expect("Failed to read file"); + + let lexerdef = json_l::lexerdef(); + let lexer = lexerdef.lexer(&src); + let (res, errs) = json_y::parse(&lexer); + for e in errs { + println!("{}", e.pp(&lexer, &json_y::token_epp)); + } + match res { + Some(r) => { + #[cfg(debug_assertions)] + println!("{r:#?}"); + #[cfg(not(debug_assertions))] + let _ = std::hint::black_box(r); + } + None => panic!(), + } +} diff --git a/examples/grmtools-app/build.rs b/examples/grmtools-app/build.rs new file mode 100644 index 0000000..8608eb6 --- /dev/null +++ b/examples/grmtools-app/build.rs @@ -0,0 +1,26 @@ +use cfgrammar::yacc::YaccKind; +use lrlex::CTLexerBuilder; +use std::{env, path::PathBuf}; + +fn main() { + CTLexerBuilder::new() + .lrpar_config(|ctp| { + ctp.yacckind(YaccKind::Grmtools) + .grammar_path("json.y") + .output_path( + [env::var("OUT_DIR").unwrap().as_str(), "json.y.rs"] + .iter() + .collect::(), + ) + .mod_name("json_y") + }) + .lexer_path("json.l") + .output_path( + [env::var("OUT_DIR").unwrap().as_str(), "json.l.rs"] + .iter() + .collect::(), + ) + .mod_name("json_l") + .build() + .unwrap(); +} diff --git a/examples/grmtools-app/json.l b/examples/grmtools-app/json.l new file mode 100644 index 0000000..750163e --- /dev/null +++ b/examples/grmtools-app/json.l @@ -0,0 +1,14 @@ +%% +"[^"]*" "STRING" +-?(0|([1-9][0-9]*))(\.[0-9]*)?([eE][-+]?[0-9]+)? "FLOAT" +\[ "[" +\] "]" +\{ "{" +\} "}" +: ":" +, "," +false "FALSE" +null "NULL" +true "TRUE" +[\n\r\t ]+ ; +. "UNMATCHED" diff --git a/examples/grmtools-app/json.y b/examples/grmtools-app/json.y new file mode 100644 index 0000000..f9e9506 --- /dev/null +++ b/examples/grmtools-app/json.y @@ -0,0 +1,66 @@ +%start Object +%expect-unused Unmatched "UNMATCHED" + +%% + +Object -> Result>: + "{" ObjectMembersOpt "}" { Ok(Value::Object(HashMap::from_iter($2?))) } + ; + +ObjectMembersOpt -> Result, Box>: + ObjectMembers { $1 } + | { Ok(Vec::new()) } + ; + +ObjectMembers -> Result, Box>: + ObjectMembers "," ObjectMember { flatten($1, $3) } + | ObjectMember { Ok(vec![$1?]) } + ; + +ObjectMember -> Result<(String, Value), Box>: + "STRING" ":" Member { + let s = $lexer.span_str($1.unwrap().span()); + Ok((s[1..s.len() - 1].to_owned(), $3?)) + } + ; + +Member -> Result>: + "[" ArrayMembersOpt "]" { Ok(Value::Array($2?)) } + | "FALSE" { Ok(Value::Boolean(false)) } + | "FLOAT" { Ok(Value::Num($lexer.span_str($1?.span()).parse::().unwrap())) } + | "NULL" { Ok(Value::Null) } + | Object { $1 } + | "STRING" { + let s = $lexer.span_str($1.unwrap().span()); + Ok(Value::Str(s[1..s.len() - 1].to_owned())) + } + | "TRUE" { Ok(Value::Boolean(true)) } + ; + +ArrayMembersOpt -> Result, Box>: + ArrayMembers { $1 } + | { Ok(Vec::new()) } + ; + +ArrayMembers -> Result, Box>: + ArrayMembers "," Member { flatten($1, $3) } + | Member { Ok(vec![$1?])} + ; + +Unmatched -> (): + "UNMATCHED" { } + ; + +%% + +use crate::json_val::Value; +use std::{collections::HashMap, error::Error}; + +fn flatten(lhs: Result, Box>, rhs: Result>) + -> Result, Box> +{ + let mut lhs = lhs?; + let rhs = rhs?; + lhs.push(rhs); + Ok(lhs) +} diff --git a/examples/grmtools-app/json_val.rs b/examples/grmtools-app/json_val.rs new file mode 100644 index 0000000..1bb56bd --- /dev/null +++ b/examples/grmtools-app/json_val.rs @@ -0,0 +1,11 @@ +use std::collections::HashMap; + +#[derive(Debug, PartialEq, Clone)] +pub enum Value { + Null, + Boolean(bool), + Str(String), + Num(f64), + Array(Vec), + Object(HashMap), +}