From b40e9451479caebf9614134e1d9f49d6d1ce66ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Fri, 21 Apr 2023 09:35:21 +0200 Subject: [PATCH] chore(book): adding examples (#300) * Initial commit for the Logos Handboook * A bit more adds to the book * chore(ci): setup automated CI for book * chore(ci): update branches * fix(ci): remove extra needs * chore(docs): adding brainfuck example * Add missing `Debug` error type requirement (#298) * chore(docs): create JSON example * Initial commit for the Logos Handboook * A bit more adds to the book * chore(ci): setup automated CI for book * chore(ci): update branches * fix(ci): remove extra needs * chore(docs): adding brainfuck example * chore(docs): create JSON example * chore(ci): test code examples * chore(docs): scrape examples and autodoc features * chore(docs): adding brainfuck example * Add missing `Debug` error type requirement (#298) * chore(docs): create JSON example * chore(ci): test code examples * chore(docs): scrape examples and autodoc features * Auto stash before rebase of "maciejhirsz/book" * chore(book): typos and styling --------- Co-authored-by: Maciej Hirsz Co-authored-by: Marcin Wojnarowski --- .github/workflows/pages.yml | 3 +- book/src/SUMMARY.md | 6 +- book/src/examples.md | 7 + book/src/examples/brainfuck.md | 32 +++++ book/src/examples/json.md | 55 ++++++++ book/src/getting-started.md | 72 ++++++++++ logos/Cargo.toml | 16 +++ logos/examples/brainfuck.rs | 161 +++++++++++++++++++++++ logos/examples/example.json | 54 ++++++++ logos/examples/hello_world.bf | 43 ++++++ logos/examples/json.rs | 232 +++++++++++++++++++++++++++++++++ logos/src/lib.rs | 1 + 12 files changed, 679 insertions(+), 3 deletions(-) create mode 100644 book/src/examples.md create mode 100644 book/src/examples/brainfuck.md create mode 100644 book/src/examples/json.md create mode 100644 book/src/getting-started.md create mode 100644 logos/examples/brainfuck.rs create mode 100644 logos/examples/example.json create mode 100644 logos/examples/hello_world.bf create mode 100644 logos/examples/json.rs diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 8a1d85d1..958101b6 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -23,7 +23,7 @@ concurrency: group: pages cancel-in-progress: true -jobs: +jobs: # Build job build-book: runs-on: ubuntu-latest @@ -34,7 +34,6 @@ jobs: uses: peaceiris/actions-mdbook@v1 with: mdbook-version: '0.4.28' - # mdbook-version: 'latest' - name: Build book run: mdbook build book - name: Upload artifact diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index f0a0936c..6fd41d8f 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -1,6 +1,10 @@ # Summary + [Intro](./intro.md) ++ [Getting Started](./getting-started.md) ++ [Examples](./examples.md) + + [Brainfuck interpreter](./examples/brainfuck.md) + + [JSON parser](./examples/json.md) + [Attributes](./attributes.md) + [`#[logos]`](./attributes/logos.md) + [`#[error]`](./attributes/error.md) @@ -8,4 +12,4 @@ + [Token disambiguation](./token-disambiguation.md) + [Using `Extras`](./extras.md) + [Using callbacks](./callbacks.md) -+ [Common regular expressions](./common-regex.md) \ No newline at end of file ++ [Common regular expressions](./common-regex.md) diff --git a/book/src/examples.md b/book/src/examples.md new file mode 100644 index 00000000..4a362fe4 --- /dev/null +++ b/book/src/examples.md @@ -0,0 +1,7 @@ +# Examples + +The following examples are ordered by increasing level of complexity. + +**[Brainfuck interpreter](./examples/brainfuck.md)**: Lexers are very powerful tools for parsing code programs into meaningful instructions. We show you how you can build an interpreter for the Brainfuck programming language under 100 lines of code! + +**[JSON parser](./examples/json.md)**: We present a JSON parser written with Logos that does nice error reporting when invalid values are encountered. diff --git a/book/src/examples/brainfuck.md b/book/src/examples/brainfuck.md new file mode 100644 index 00000000..f33d70ba --- /dev/null +++ b/book/src/examples/brainfuck.md @@ -0,0 +1,32 @@ +# Brainfuck interpreter + +In most programming languages, commands can be made of multiple program tokens, where a token is simply string slice that has a particular meaning for the language. For example, in Rust, the function signature `pub fn main()` could be split by the **lexer** into tokens `pub`, `fn`, `main`, `(`, and `)`. Then, the **parser** combines tokens into meaningful program instructions. + +However, there exists programming languages that are so simple, such as Brainfuck, that each token can be mapped to a single instruction. There are actually 8 single-characters tokens: + +```rust,no_run,noplayground +{{#include ../../../logos/examples/brainfuck.rs:tokens}} +``` + +All other characters must be ignored. + +Once the tokens are obtained, a Brainfuck interpreter can be easily created using a [Finite-state machine](https://en.wikipedia.org/wiki/Finite-state_machine). For the sake of simpliciy, we collected all the tokens into one vector called `operations`. + +Now, creating an interpreter becomes straightforward[^1]: +```rust,no_run,noplayground +{{#include ../../../logos/examples/brainfuck.rs:fsm}} +``` + +[^1]: There is a small trick to make it easy. As it can be seen in the full code, we first perform a check that all beginning loops (`'['`) have a matching end (`']'`). This way, we can create two maps, `pairs` and `pairs_reverse`, to easily jump back and forth between them. + +Finally, we provide you the full code that you should be able to run with[^2]: +```bash +cd logos/logos +cargo run --example brainfuck examples/hello_word.bf +``` + +[^2] You first need to clone [this repository](https://github.com/maciejhirsz/logos). + +```rust,no_run,noplayground +{{#include ../../../logos/examples/brainfuck.rs:all}} +``` diff --git a/book/src/examples/json.md b/book/src/examples/json.md new file mode 100644 index 00000000..ddc5cf82 --- /dev/null +++ b/book/src/examples/json.md @@ -0,0 +1,55 @@ +# JSON parser + +JSON is a widely used format for exchanging data between formats, while being human-readable. + +Possible values are defined recursively and can be any of the following: + +```rust,no_run,noplayground +{{#include ../../../logos/examples/json.rs:values}} +``` + +Object are delimites with braces `{` and `}`, arrays with brackets `[` and `]`, and values with commas `,`. Newlines, tabs or spaces should be ignored by the lexer. + +Knowing that, we can construct a lexer with `Logos` that will identify all those cases: + +```rust,no_run,noplayground +{{#include ../../../logos/examples/json.rs:tokens}} +``` + +> NOTE: the hardest part is to define valid regexes for `Number` and `String` variants. The present solution was inspired by [this stackoverflow thread](https://stackoverflow.com/questions/32155133/regex-to-match-a-json-string). + +Once we have our tokens, we must parse them into actual JSON values. We will proceed be creating 3 functions: + ++ `parse_value` for parsing any JSON object, without prior knowledge of its type; ++ `parse_array` for parsing an array, assuming we matched `[`; ++ and `parse_oject` for parsing an object, assuming we matched `{`. + +Starting with parsing an arbitrary value, we can easily obtain the four scalar types, `Bool`, `Null`, `Number`, and `String`, while we will call the next functions for arrays and objects parsing. + +```rust,no_run,noplayground +{{#include ../../../logos/examples/json.rs:value}} +``` + +To parse an array, we simply loop between tokens, alternating between parsing values and commas, until a closing bracket is found. + +```rust,no_run,noplayground +{{#include ../../../logos/examples/json.rs:array}} +``` + +A similar approach is used for objects, where the only different is that we expect (key, value) pairs, separated by a colon. + +```rust,no_run,noplayground +{{#include ../../../logos/examples/json.rs:object}} +``` + +Finally, we provide you the full code that you should be able to run with[^1]: +```bash +cd logos/logos +cargo run --example json examples/example.json +``` + +[^1] You first need to clone [this repository](https://github.com/maciejhirsz/logos). + +```rust,no_run,noplayground +{{#include ../../../logos/examples/json.rs:all}} +``` diff --git a/book/src/getting-started.md b/book/src/getting-started.md new file mode 100644 index 00000000..56bd86be --- /dev/null +++ b/book/src/getting-started.md @@ -0,0 +1,72 @@ +# Getting Started + +**Logos** can be included in your Rust project using the `cargo add logos` command, or by directly modifying your `Cargo.toml` file: + +```toml +[dependencies] +logos = "0.13.0" +``` + +Then, you can automatically derive the [`Logos`](https://docs.rs/logos/latest/logos/trait.Logos.html) trait on your `enum` using the `Logos` derive macro: + +```rust,no_run,no_playground +use logos::Logos; + +#[derive(Logos, Debug, PartialEq)] +#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens +enum Token { + // Tokens can be literal strings, of any length. + #[token("fast")] + Fast, + + #[token(".")] + Period, + + // Or regular expressions. + #[regex("[a-zA-Z]+")] + Text, +} +``` + +Then, you can use `Logos::lexer` method to turn any `&str` into an iterator of tokens[^1]: + +```rust,no_run,no_playground +let mut lex = Token::lexer("Create ridiculously fast Lexers."); + +assert_eq!(lex.next(), Some(Ok(Token::Text))); +assert_eq!(lex.span(), 0..6); +assert_eq!(lex.slice(), "Create"); + +assert_eq!(lex.next(), Some(Ok(Token::Text))); +assert_eq!(lex.span(), 7..19); +assert_eq!(lex.slice(), "ridiculously"); + +assert_eq!(lex.next(), Some(Ok(Token::Fast))); +assert_eq!(lex.span(), 20..24); +assert_eq!(lex.slice(), "fast"); + +assert_eq!(lex.next(), Some(Ok(Token::Text))); +assert_eq!(lex.slice(), "Lexers"); +assert_eq!(lex.span(), 25..31); + +assert_eq!(lex.next(), Some(Ok(Token::Period))); +assert_eq!(lex.span(), 31..32); +assert_eq!(lex.slice(), "."); + +assert_eq!(lex.next(), None); +``` + +[^1]: Each item is actually a [`Result`](https://docs.rs/logos/latest/logos/struct.Lexer.html#associatedtype.Item), because the lexer returns an error if some part of the string slice does not match any variant of `Token`. + +Because [`Lexer`](https://docs.rs/logos/latest/logos/struct.Lexer.html), returned by [`Logos::lexer`](https://docs.rs/logos/latest/logos/trait.Logos.html#method.lexer), implements the `Iterator` trait, you can use a `for .. in` construct: + +```rust,no_run,no_playground +for result in Token::lexer("Create ridiculously fast Lexers.") { + match result { + Ok(token) => println!("{:#?}", token), + Err(e) => panic!("some error occured: {}", e), + } +} +``` + + diff --git a/logos/Cargo.toml b/logos/Cargo.toml index 734e3426..73b89ff8 100644 --- a/logos/Cargo.toml +++ b/logos/Cargo.toml @@ -12,9 +12,17 @@ readme = "../README.md" edition = "2021" rust-version = "1.62.1" +[package.metadata.docs.rs] +all-features = true +cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"] +rustdoc-args = ["--cfg", "docsrs"] + [dependencies] logos-derive = { version = "0.13.0", path = "../logos-derive", optional = true } +[dev-dependencies] +ariadne = { version = "0.2.0", features = ["auto-color"] } + [features] default = ["export_derive", "std"] @@ -25,3 +33,11 @@ std = [] # import this crate and `use logos::Logos` to get both the trait and # derive proc macro. export_derive = ["logos-derive"] + +[[example]] +name = "brainfuck" +path = "examples/brainfuck.rs" + +[[example]] +name = "json" +path = "examples/json.rs" diff --git a/logos/examples/brainfuck.rs b/logos/examples/brainfuck.rs new file mode 100644 index 00000000..d4c14638 --- /dev/null +++ b/logos/examples/brainfuck.rs @@ -0,0 +1,161 @@ +//! Brainfuck interpreter written in Rust, using Logos. +//! +//! Usage: +//! cargo run --example brainfuck +//! +//! Example: +//! cargo run --example brainfuck examples/hello_word.bf +//! +//! Brainfuck is an esoteric programming language that only +//! uses 8 single-character commands: +//! - '>'; +//! - '<'; +//! - '+'; +//! - '-'; +//! - '.'; +//! - ','; +//! - '['; +//! - and ']'. +//! +//! Despite being very hard to use in practice, this makes +//! this language very simple to interpet. The following code +//! defines an [`execute`] function that runs Brainfuck code. +//! +//! Logos is used here to directly transform the code stream +//! into meaningful `Op` operations (or commands). +//! Errors, i.e., unknown tokens, are discarded using `filter_map`. +//! +//! More details can be found on Wikipedia: +//! . +//! +//! or on . + +/* ANCHOR: all */ +use logos::Logos; +use std::collections::HashMap; +use std::env; +use std::fs; +use std::io::{self, Read}; + +/* ANCHOR: tokens */ +/// Each [`Op`] variant is a single character. +#[derive(Debug, Logos)] +enum Op { + /// Increment pointer. + #[token(">")] + IncPointer, + /// Decrement pointer. + #[token("<")] + DecPointer, + /// Increment data at pointer. + #[token("+")] + IncData, + /// Decrement data at pointer. + #[token("-")] + DecData, + /// Output data at pointer. + #[token(".")] + OutData, + /// Input (read) to data at pointer. + #[token(",")] + InpData, + /// Conditionally jump to matching `']'`. + #[token("[")] + CondJumpForward, + /// Conditionally jump to matching `'['`. + #[token("]")] + CondJumpBackward, +} +/* ANCHOR_END: tokens */ + +/// Print one byte to the terminal. +#[inline(always)] +fn print_byte(byte: u8) { + print!("{}", byte as char); +} + +/// Read one byte from the terminal. +#[inline(always)] +fn read_byte() -> u8 { + let mut input = [0u8; 1]; + io::stdin() + .read_exact(&mut input) + .expect("An error occurred while reading byte!"); + input[0] +} + +/// Execute Brainfuck code from a string slice. +pub fn execute(code: &str) { + let operations: Vec<_> = Op::lexer(code).filter_map(|op| op.ok()).collect(); + let mut data = [0u8; 30_000]; // Minimum recommended size + let mut pointer: usize = 0; + let len = operations.len(); + + // We pre-process matching jump commands, and we create + // a mapping between them. + let mut queue = Vec::new(); + let mut pairs = HashMap::new(); + let mut pairs_reverse = HashMap::new(); + + for (i, op) in operations.iter().enumerate() { + match op { + Op::CondJumpForward => queue.push(i), + Op::CondJumpBackward => { + if let Some(start) = queue.pop() { + pairs.insert(start, i); + pairs_reverse.insert(i, start); + } else { + panic!( + "Unexpected conditional backward jump at position {}, does not match any '['", + i + ); + } + } + _ => (), + } + } + + if !queue.is_empty() { + panic!("Unmatched conditional forward jump at positons {:?}, expecting a closing ']' for each of them", queue); + } + + /* ANCHOR: fsm */ + let mut i: usize = 0; + // True program execution. + loop { + match operations[i] { + Op::IncPointer => pointer += 1, + Op::DecPointer => pointer -= 1, + Op::IncData => data[pointer] = data[pointer].wrapping_add(1), + Op::DecData => data[pointer] = data[pointer].wrapping_sub(1), + Op::OutData => print_byte(data[pointer]), + Op::InpData => data[pointer] = read_byte(), + Op::CondJumpForward => { + if data[pointer] == 0 { + // Skip until matching end. + i = *pairs.get(&i).unwrap(); + } + } + Op::CondJumpBackward => { + if data[pointer] != 0 { + // Go back to matching start. + i = *pairs_reverse.get(&i).unwrap(); + } + } + } + i += 1; + + if i >= len { + break; + } + } + /* ANCHOR_END: fsm */ +} + +fn main() { + let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument")) + .expect("Failed to read file"); + + execute(src.as_str()); +} +/* ANCHOR_END: all */ diff --git a/logos/examples/example.json b/logos/examples/example.json new file mode 100644 index 00000000..3be24e42 --- /dev/null +++ b/logos/examples/example.json @@ -0,0 +1,54 @@ +[ + + { +"selftext" : "Hey, folks!\n\nWhile /r/Funny has always had a strong preference for original content – it's right there in Rule 3, after all – we've never required users in good standing to post only things that they personally created. However, we *have* frequently taken steps to cut down on low-effort, low-quality submissions (like memes, screenshots of social media, and so on)... and although we're a little bit late to the game with this, we're going to take another such step:\n\n**Henceforth, AI-generated content of any kind may not be posted in /r/Funny.**\n\nWe know, we know. \"Welcome to 2022,\" right? We're well aware that the novelty of things like Midjourney, ChatGPT, Bing, Rutabaga, Bard, DALL-E, StorFisa, DeepAI, and other such programs is quickly wearing off, and we've seen the growing disillusionment, disapproval, and general annoyance that folks have been voicing... but in our defense, we made up two of those services, so you can't *really* be upset about people using them.\n\nAnyway, this change was prompted by a few different factors (in addition to addressing users' concerns), but one of the most prominent is the fact that AI-generated content requires almost no involvement on the part of a given submitter: While a glorified algorithm may spit out some images, the *user's* only contribution – assuming that they didn't design, code, and train said algorithm, of course – is a short prompt. That requires even less effort than \"making\" memes or taking screenshots of social media does, so if the goal is to encourage high-quality, original content... well, you see the obvious conclusion.\n\nThe TL;DR is that we want to keep /r/Funny as pleasant as possible for contributors, participants, and lurkers alike, so until such time as *real* AIs start registering Reddit accounts (which our counterparts from the future¹ say will happen on September 12th, 2097), AI-generated content will not be allowed.\n\n------\n\n^¹ ^(Yes, we have a time-machine, and no, it isn't just a Magic 8-Ball that we duct-taped to a frog.)", + + "WHO": "Joe", + "WEEK": [ + { + "NUMBER": 3, + "EXPENSE": [ + { + "WHAT": "Beer", + "AMOUNT": 18.00 + }, + { + "WHAT": "Food", + "AMOUNT": 12.00 + }, + { + "WHAT": "Food", + "AMOUNT": 19.00 + }, + { + "WHAT": "Car", + "AMOUNT": 20.00 + } + ] + } + ] + }, +{ + "updated_at": "2015-01-01T15:00:06Z", + "glossary": { + "title": "example glossary", + "GlossDiv": { + "title": "S", + "GlossList": { + "GlossEntry": { + "ID": "SGML", + "SortAs": "SGML", + "GlossTerm": "Standard Generalized Markup Language", + "Acronym": "SGML", + "Abbrev": "ISO 8879:1986", + "GlossDef": { + "para": "A meta-markup language, used to create markup languages such as DocBook.", + "GlossSeeAlso": ["GML", "XML"] + }, + "GlossSee": "markup" + } + } + } + } +} +] diff --git a/logos/examples/hello_world.bf b/logos/examples/hello_world.bf new file mode 100644 index 00000000..ee740ebe --- /dev/null +++ b/logos/examples/hello_world.bf @@ -0,0 +1,43 @@ +[ This program prints "Hello World!" and a newline to the screen, its + length is 106 active command characters. [It is not the shortest.] + + This loop is an "initial comment loop", a simple way of adding a comment + to a BF program such that you don't have to worry about any command + characters. Any ".", ",", "+", "-", "<" and ">" characters are simply + ignored, the "[" and "]" characters just have to be balanced. This + loop and the commands it contains are ignored because the current cell + defaults to a value of 0; the 0 value causes this loop to be skipped. +] +++++++++ Set Cell #0 to 8 +[ + >++++ Add 4 to Cell #1; this will always set Cell #1 to 4 + [ as the cell will be cleared by the loop + >++ Add 2 to Cell #2 + >+++ Add 3 to Cell #3 + >+++ Add 3 to Cell #4 + >+ Add 1 to Cell #5 + <<<<- Decrement the loop counter in Cell #1 + ] Loop until Cell #1 is zero; number of iterations is 4 + >+ Add 1 to Cell #2 + >+ Add 1 to Cell #3 + >- Subtract 1 from Cell #4 + >>+ Add 1 to Cell #6 + [<] Move back to the first zero cell you find; this will + be Cell #1 which was cleared by the previous loop + <- Decrement the loop Counter in Cell #0 +] Loop until Cell #0 is zero; number of iterations is 8 + +The result of this is: +Cell no : 0 1 2 3 4 5 6 +Contents: 0 0 72 104 88 32 8 +Pointer : ^ + +>>. Cell #2 has value 72 which is 'H' +>---. Subtract 3 from Cell #3 to get 101 which is 'e' ++++++++..+++. Likewise for 'llo' from Cell #3 +>>. Cell #5 is 32 for the space +<-. Subtract 1 from Cell #4 for 87 to give a 'W' +<. Cell #3 was set to 'o' from the end of 'Hello' ++++.------.--------. Cell #3 for 'rl' and 'd' +>>+. Add 1 to Cell #5 gives us an exclamation point +>++. And finally a newline from Cell #6 diff --git a/logos/examples/json.rs b/logos/examples/json.rs new file mode 100644 index 00000000..72ce7740 --- /dev/null +++ b/logos/examples/json.rs @@ -0,0 +1,232 @@ +//! JSON parser written in Rust, using Logos. +//! +//! If the file is a valid JSON value, it will be printed +//! to the terminal using the debug format. +//! +//! Otherwise, an error will be printed with its location. +//! +//! Usage: +//! cargo run --example example +//! +//! Example: +//! cargo run --example example examples/example.json + +/* ANCHOR: all */ +use logos::{Lexer, Logos, Span}; + +use std::collections::HashMap; +use std::env; +use std::fs; + +type Error = (String, Span); + +type Result = std::result::Result; + +/* ANCHOR: tokens */ +/// All meaningful JSON tokens. +/// +/// > NOTE: regexes for [`Token::Number`] and [`Token::String`] may not +/// > catch all possible values, especially for strings. If you find +/// > errors, please report them so that we can improve the regex. +#[derive(Debug, Logos)] +#[logos(skip r"[ \t\r\n\f]+")] +enum Token { + #[token("false", |_| false)] + #[token("true", |_| true)] + Bool(bool), + + #[token("{")] + BraceOpen, + + #[token("}")] + BraceClose, + + #[token("[")] + BracketOpen, + + #[token("]")] + BracketClose, + + #[token(":")] + Colon, + + #[token(",")] + Comma, + + #[token("null")] + Null, + + #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| lex.slice().parse::().unwrap())] + Number(f64), + + #[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice().to_owned())] + String(String), +} +/* ANCHOR_END: tokens */ + +/* ANCHOR: values */ +/// Represent any valid JSON value. +#[derive(Debug)] +enum Value { + /// null. + Null, + /// true or false. + Bool(bool), + /// Any floating point number. + Number(f64), + /// Any quoted string. + String(String), + /// An array of values + Array(Vec), + /// An dictionnary mapping keys and values. + Object(HashMap), +} +/* ANCHOR_END: values */ + +/* ANCHOR: value */ +/// Parse a token stream into a JSON value. +fn parse_value<'source>(lexer: &mut Lexer<'source, Token>) -> Result { + if let Some(token) = lexer.next() { + match token { + Ok(Token::Bool(b)) => Ok(Value::Bool(b)), + Ok(Token::BraceOpen) => parse_object(lexer), + Ok(Token::BracketOpen) => parse_array(lexer), + Ok(Token::Null) => Ok(Value::Null), + Ok(Token::Number(n)) => Ok(Value::Number(n)), + Ok(Token::String(s)) => Ok(Value::String(s)), + _ => Err(( + "unexpected token here (context: value)".to_owned(), + lexer.span(), + )), + } + } else { + Err(("empty values are not allowed".to_owned(), lexer.span())) + } +} +/* ANCHOR_END: value */ + +/* ANCHOR: array */ +/// Parse a token stream into an array and return when +/// a valid terminator is found. +/// +/// > NOTE: we assume '[' was consumed. +fn parse_array<'source>(lexer: &mut Lexer<'source, Token>) -> Result { + let mut array = Vec::new(); + let span = lexer.span(); + let mut awaits_comma = false; + let mut awaits_value = false; + + while let Some(token) = lexer.next() { + match token { + Ok(Token::Bool(b)) if !awaits_comma => { + array.push(Value::Bool(b)); + awaits_value = false; + } + Ok(Token::BraceOpen) if !awaits_comma => { + let object = parse_object(lexer)?; + array.push(object); + awaits_value = false; + } + Ok(Token::BracketOpen) if !awaits_comma => { + let sub_array = parse_array(lexer)?; + array.push(sub_array); + awaits_value = false; + } + Ok(Token::BracketClose) if !awaits_value => return Ok(Value::Array(array)), + Ok(Token::Comma) if awaits_comma => awaits_value = true, + Ok(Token::Null) if !awaits_comma => { + array.push(Value::Null); + awaits_value = false + } + Ok(Token::Number(n)) if !awaits_comma => { + array.push(Value::Number(n)); + awaits_value = false; + } + Ok(Token::String(s)) if !awaits_comma => { + array.push(Value::String(s)); + awaits_value = false; + } + _ => { + return Err(( + "unexpected token here (context: array)".to_owned(), + lexer.span(), + )) + } + } + awaits_comma = !awaits_value; + } + Err(("unmatched opening bracket defined here".to_owned(), span)) +} +/* ANCHOR_END: array */ + +/* ANCHOR: object */ +/// Parse a token stream into an object and return when +/// a valid terminator is found. +/// +/// > NOTE: we assume '{' was consumed. +fn parse_object<'source>(lexer: &mut Lexer<'source, Token>) -> Result { + let mut map = HashMap::new(); + let span = lexer.span(); + let mut awaits_comma = false; + let mut awaits_key = false; + + while let Some(token) = lexer.next() { + match token { + Ok(Token::BraceClose) if !awaits_key => return Ok(Value::Object(map)), + Ok(Token::Comma) if awaits_comma => awaits_key = true, + Ok(Token::String(key)) if !awaits_comma => { + match lexer.next() { + Some(Ok(Token::Colon)) => (), + _ => { + return Err(( + "unexpected token here, expecting ':'".to_owned(), + lexer.span(), + )) + } + } + let value = parse_value(lexer)?; + map.insert(key, value); + awaits_key = false; + } + _ => { + return Err(( + "unexpected token here (context: object)".to_owned(), + lexer.span(), + )) + } + } + awaits_comma = !awaits_key; + } + Err(("unmatched opening brace defined here".to_owned(), span)) +} +/* ANCHOR_END: object */ + +fn main() { + let filename = env::args().nth(1).expect("Expected file argument"); + let src = fs::read_to_string(&filename).expect("Failed to read file"); + + let mut lexer = Token::lexer(src.as_str()); + + match parse_value(&mut lexer) { + Ok(value) => println!("{:#?}", value), + Err((msg, span)) => { + use ariadne::{ColorGenerator, Label, Report, ReportKind, Source}; + + let mut colors = ColorGenerator::new(); + + let a = colors.next(); + + Report::build(ReportKind::Error, &filename, 12) + .with_message(format!("Invalid JSON")) + .with_label( + Label::new((&filename, span)) + .with_message(msg) + .with_color(a), + ) + .finish() + .eprint((&filename, Source::from(src))) + .unwrap(); + } + } +} +/* ANCHOR_END: all */ diff --git a/logos/src/lib.rs b/logos/src/lib.rs index 23d6af77..10cffb9c 100644 --- a/logos/src/lib.rs +++ b/logos/src/lib.rs @@ -161,6 +161,7 @@ //! + `(foo|hello)(bar)?` has a priority of 6, `foo` being it's shortest possible match. #![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] #![warn(missing_docs)] #![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]