From 7ce05269ccd20f65614c3ed5173ad674eaa426ed Mon Sep 17 00:00:00 2001 From: afreeland Date: Mon, 19 Feb 2024 11:58:29 -0500 Subject: [PATCH 1/4] Add debug support and documentation for internal graph output --- book/src/SUMMARY.md | 1 + book/src/debugging.md | 85 ++++++++++++++++++++++++++++++ logos-codegen/src/generator/mod.rs | 3 ++ 3 files changed, 89 insertions(+) create mode 100644 book/src/debugging.md diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index f0b8ce70..436c65bf 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -10,6 +10,7 @@ + [Using `Extras`](./extras.md) + [Using callbacks](./callbacks.md) + [Common regular expressions](./common-regex.md) ++ [Debugging](./debugging.md) + [Examples](./examples.md) + [Brainfuck interpreter](./examples/brainfuck.md) + [JSON parser](./examples/json.md) diff --git a/book/src/debugging.md b/book/src/debugging.md new file mode 100644 index 00000000..e6ba43a3 --- /dev/null +++ b/book/src/debugging.md @@ -0,0 +1,85 @@ +# Debugging + +Gain deeper insights into your code's behavior with this debugging section. + +## Visualizing Logos Graph + +Logos works by creating a graph that gets derived from the tokens that you defined. This graph describes how the lexer moves through different states when processing input. + +Hence, it may be beneficial during debugging to be able to visualize this graph, to understand how Logos will match the various tokens. + +If we take this example: +```rust,no_run,noplayground +use logos::Logos; + +#[derive(Debug, Logos, PartialEq)] +enum Token { + // Tokens can be literal strings, of any length. + #[token("fast")] + Fast, + + #[token(".")] + Period, + + // Or regular expressions. + #[regex("[a-zA-Z]+")] + Text, +} +fn main() { + let input = "Create ridiculously fast Lexers."; + + let mut lexer = Token::lexer(input); + while let Some(token) = lexer.next() { + println!("{:?}", token); + } +} +``` + +Logos actually constructs a graph that contains the logic for matching tokens: +``` +graph = { + 1: ::Fast, + 2: ::Period, + 3: ::Text, + 4: { + [A-Z] ⇒ 4, + [a-z] ⇒ 4, + _ ⇒ 3, + }, + 7: [ + ast ⇒ 8, + _ ⇒ 4*, + ], + 8: { + [A-Z] ⇒ 4, + [a-z] ⇒ 4, + _ ⇒ 1, + }, + 9: { + . ⇒ 2, + [A-Z] ⇒ 4, + [a-e] ⇒ 4, + f ⇒ 7, + [g-z] ⇒ 4, + }, +} +``` +This graph can help us understand how our patterns are matched, and maybe understand why we have a bug at some point. + +Let's get started by trying to understand how Logos is matching the `.` character, which we've tokenized as `Token::Period`. + +We can begin our search by looking at number `9` for the character `.`. We can see that if Logos matches a `.` it will jump `=>` to number `2`. We can then follow that by looking at `2` which resolves to our `::Period` token. + +Logos will then continue to look for any matches past our `.` character. This is required in case there is potential continuation after the `.` character. Although, in the _input_ we provided there are no any additional characters, since it is the end of our input. + +We also can try to identify how the token `fast` works by looking at `9`, first, and seeing that `f` will cause Logos to jump to `7`. This will then resolve the last letters of our word _fast_ by matching `ast` which jumps to `8`. Since our provided _input_ to the lexer does not include alphabetic characters after the word "fast", but rather a whitespace, the token `::Fast` will be recognized. Then, the graph will look for further potential continuation (here, `[g-z] => 4`) + +### Enabling + +To enable this debugging output you can use the `debug` feature. + +In your `Cargo.toml` you can +``` +[dependencies] +logos = { version = "1.2.3", features = ["debug"] } +``` diff --git a/logos-codegen/src/generator/mod.rs b/logos-codegen/src/generator/mod.rs index 1b8bf8bf..5dec5cbd 100644 --- a/logos-codegen/src/generator/mod.rs +++ b/logos-codegen/src/generator/mod.rs @@ -53,6 +53,9 @@ impl<'a> Generator<'a> { let rendered = Self::fast_loop_macro(); let meta = Meta::analyze(root, graph); + #[cfg(feature = "debug")] + dbg!(graph); + Generator { name, this, From f3428655dd96e874b91f50012da5362323f3c101 Mon Sep 17 00:00:00 2001 From: afreeland Date: Sat, 24 Feb 2024 12:22:34 -0500 Subject: [PATCH 2/4] Refine enabling feature --- book/src/debugging.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/book/src/debugging.md b/book/src/debugging.md index e6ba43a3..40ec9053 100644 --- a/book/src/debugging.md +++ b/book/src/debugging.md @@ -76,10 +76,10 @@ We also can try to identify how the token `fast` works by looking at `9`, first, ### Enabling -To enable this debugging output you can use the `debug` feature. +To enable debugging output you can define a `debug` feature in your `Cargo.toml` file, like this: -In your `Cargo.toml` you can ``` +// Cargo.toml [dependencies] logos = { version = "1.2.3", features = ["debug"] } ``` From ae1890960bda0a9e211f585636e49ca6ba4d28f6 Mon Sep 17 00:00:00 2001 From: Aaron Freeland Date: Tue, 5 Mar 2024 15:49:13 -0500 Subject: [PATCH 3/4] Update book/src/debugging.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: João Marcos --- book/src/debugging.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/book/src/debugging.md b/book/src/debugging.md index 40ec9053..9931f77e 100644 --- a/book/src/debugging.md +++ b/book/src/debugging.md @@ -1,6 +1,6 @@ # Debugging -Gain deeper insights into your code's behavior with this debugging section. +Instructions on how to debug your Logos lexer. ## Visualizing Logos Graph From 64b1e7f048a30166046b4f56fcfa537056fecc6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Mon, 10 Jun 2024 10:47:33 +0200 Subject: [PATCH 4/4] chore(docs): add required changes --- book/src/debugging.md | 41 +++++++++++++++++++++++------- logos-codegen/src/generator/mod.rs | 3 --- logos-codegen/src/lib.rs | 2 +- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/book/src/debugging.md b/book/src/debugging.md index 9931f77e..e61c063c 100644 --- a/book/src/debugging.md +++ b/book/src/debugging.md @@ -4,11 +4,16 @@ Instructions on how to debug your Logos lexer. ## Visualizing Logos Graph -Logos works by creating a graph that gets derived from the tokens that you defined. This graph describes how the lexer moves through different states when processing input. +Logos works by creating a graph that gets derived from +the tokens that you defined. +This graph describes how the lexer moves through different +states when processing input. -Hence, it may be beneficial during debugging to be able to visualize this graph, to understand how Logos will match the various tokens. +Hence, it may be beneficial during debugging to be able to +visualize this graph, to understand how Logos will match the various tokens. If we take this example: + ```rust,no_run,noplayground use logos::Logos; @@ -36,6 +41,7 @@ fn main() { ``` Logos actually constructs a graph that contains the logic for matching tokens: + ``` graph = { 1: ::Fast, @@ -64,22 +70,39 @@ graph = { }, } ``` -This graph can help us understand how our patterns are matched, and maybe understand why we have a bug at some point. +This graph can help us understand how our patterns are matched, +and maybe understand why we have a bug at some point. -Let's get started by trying to understand how Logos is matching the `.` character, which we've tokenized as `Token::Period`. +Let's get started by trying to understand how Logos is matching the +`.` character, which we've tokenized as `Token::Period`. -We can begin our search by looking at number `9` for the character `.`. We can see that if Logos matches a `.` it will jump `=>` to number `2`. We can then follow that by looking at `2` which resolves to our `::Period` token. +We can begin our search by looking at number `9` for the character `.`. +We can see that if Logos matches a `.` it will jump `=>` to number `2`. +We can then follow that by looking at `2` which resolves to our `::Period` token. -Logos will then continue to look for any matches past our `.` character. This is required in case there is potential continuation after the `.` character. Although, in the _input_ we provided there are no any additional characters, since it is the end of our input. +Logos will then continue to look for any matches past our `.` character. +This is required in case there is potential continuation after the `.` character. +Although, in the *input* we provided, there are no any additional characters, +since it is the end of our input. -We also can try to identify how the token `fast` works by looking at `9`, first, and seeing that `f` will cause Logos to jump to `7`. This will then resolve the last letters of our word _fast_ by matching `ast` which jumps to `8`. Since our provided _input_ to the lexer does not include alphabetic characters after the word "fast", but rather a whitespace, the token `::Fast` will be recognized. Then, the graph will look for further potential continuation (here, `[g-z] => 4`) +We also can try to identify how the token `fast` works by looking at `9`, +first, and seeing that `f` will cause Logos to jump to `7`. +This will then resolve the last letters of our word *fast* by matching `ast` +which jumps to `8`. Since our provided _input_ to the lexer does not include +alphabetic characters after the word "fast", but rather a whitespace, +the token `::Fast` will be recognized. +Then, the graph will look for further potential continuation (here, `[g-z] => 4`) -### Enabling +## Enabling -To enable debugging output you can define a `debug` feature in your `Cargo.toml` file, like this: +To enable debugging output you can define a `debug` feature in your +`Cargo.toml` file, like this: ``` // Cargo.toml [dependencies] logos = { version = "1.2.3", features = ["debug"] } ``` + +Next, you can build your project with `cargo build` and +the output will contain a debug representation of your graph(s). diff --git a/logos-codegen/src/generator/mod.rs b/logos-codegen/src/generator/mod.rs index 5dec5cbd..1b8bf8bf 100644 --- a/logos-codegen/src/generator/mod.rs +++ b/logos-codegen/src/generator/mod.rs @@ -53,9 +53,6 @@ impl<'a> Generator<'a> { let rendered = Self::fast_loop_macro(); let meta = Meta::analyze(root, graph); - #[cfg(feature = "debug")] - dbg!(graph); - Generator { name, this, diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index 1de618de..a38e88c0 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -295,7 +295,7 @@ pub fn generate(input: TokenStream) -> TokenStream { graph.shake(root); - debug!("Generating code from graph: {graph:#?}"); + debug!("Generating code from graph:\n{graph:#?}"); let generator = Generator::new(name, &this, root, &graph);