Skip to content

Commit

Permalink
perf: Speed up compile times with nom
Browse files Browse the repository at this point in the history
This is using a short-lived fork `nom` until we can get the changes
upstreamed.  Note that `combine` requires using `attempt` to backtrack
while `nom` backtracks by default and requires `cut` to not backtrack.

I find the straight functions much easier to understand what is
happening and this allows us to intermix procedural with declarative
logic (e.g. in string processing I skipped combinators to make it easier
to not require an allocation).

Regarding that allocation, we still do it but this opens the door for us
to use `InternalString` for user values which might give us more of a
performance boost (previously, the forced allocation made it a moot
point to measure it).

Running `cargo clean -p toml_edit && time cargo check`, I get 3s for building `toml_edit` with `combine` and 0.5s for `nom`.

For runtime performance
- Parsing `cargo init`s generated manifest took 4% less time
- Parsing `cargo`s manifest took 2% less time
- 10 tables took 37% less time
- 100 tables took 41% less time
- Array of 10 tables took 38% less time
- Array of 100 tables took 40% less time

This is with Rust 1.66 on a i9-12900H processor under WSL2

Fixes toml-rs#327
  • Loading branch information
epage committed Dec 23, 2022
1 parent 9965ad4 commit 8b877c2
Show file tree
Hide file tree
Showing 89 changed files with 1,492 additions and 1,400 deletions.
34 changes: 17 additions & 17 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/toml_edit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ serde = ["dep:serde", "toml_datetime/serde"]

[dependencies]
indexmap = "1.9.1"
combine = "4.6.6"
nom8 = "0.1.0"
itertools = "0.10.5"
serde = { version = "1.0.145", features = ["derive"], optional = true }
kstring = { version = "2.0.0", features = ["max_inline"], optional = true }
Expand Down
8 changes: 7 additions & 1 deletion crates/toml_edit/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,13 @@ impl FromStr for Document {

/// Parses a document from a &str
fn from_str(s: &str) -> Result<Self, Self::Err> {
parser::document(s.as_bytes())
use nom8::prelude::*;

let b = s.as_bytes();
parser::document::document
.parse(b)
.finish()
.map_err(|e| Self::Err::new(e, b))
}
}

Expand Down
30 changes: 7 additions & 23 deletions crates/toml_edit/src/key.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use std::borrow::Cow;
use std::str::FromStr;

use combine::stream::position::Stream;

use crate::encode::{to_string_repr, StringStyle};
use crate::parser;
use crate::parser::is_unquoted_char;
use crate::parser::key::is_unquoted_char;
use crate::repr::{Decor, Repr};
use crate::InternalString;

Expand Down Expand Up @@ -105,37 +103,23 @@ impl Key {
}

fn try_parse_simple(s: &str) -> Result<Key, parser::TomlError> {
use combine::stream::position::{IndexPositioner, Positioner};
use combine::EasyParser;
use nom8::prelude::*;

let b = s.as_bytes();
let result = parser::simple_key().easy_parse(Stream::new(b));
let result = parser::key::simple_key.parse(b).finish();
match result {
Ok((_, ref rest)) if !rest.input.is_empty() => Err(parser::TomlError::from_unparsed(
(&rest.positioner
as &dyn Positioner<usize, Position = usize, Checkpoint = IndexPositioner>)
.position(),
b,
)),
Ok(((raw, key), _)) => Ok(Key::new(key).with_repr_unchecked(Repr::new_unchecked(raw))),
Ok((raw, key)) => Ok(Key::new(key).with_repr_unchecked(Repr::new_unchecked(raw))),
Err(e) => Err(parser::TomlError::new(e, b)),
}
}

fn try_parse_path(s: &str) -> Result<Vec<Key>, parser::TomlError> {
use combine::stream::position::{IndexPositioner, Positioner};
use combine::EasyParser;
use nom8::prelude::*;

let b = s.as_bytes();
let result = parser::key_path().easy_parse(Stream::new(b));
let result = parser::key::key.parse(b).finish();
match result {
Ok((_, ref rest)) if !rest.input.is_empty() => Err(parser::TomlError::from_unparsed(
(&rest.positioner
as &dyn Positioner<usize, Position = usize, Checkpoint = IndexPositioner>)
.position(),
b,
)),
Ok((keys, _)) => Ok(keys),
Ok(keys) => Ok(keys),
Err(e) => Err(parser::TomlError::new(e, b)),
}
}
Expand Down
87 changes: 48 additions & 39 deletions crates/toml_edit/src/parser/array.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
use combine::parser::byte::byte;
use combine::parser::range::recognize_with_value;
use combine::stream::RangeStream;
use combine::*;
use nom8::combinator::cut;
use nom8::combinator::opt;
use nom8::multi::separated_list1;
use nom8::sequence::delimited;

use crate::parser::trivia::ws_comment_newline;
use crate::parser::value::value;
use crate::{Array, Value};
use crate::{Array, Item, Value};

use crate::parser::prelude::*;

// ;; Array

// array = array-open array-values array-close
parse!(array() -> Array, {
between(byte(ARRAY_OPEN), byte(ARRAY_CLOSE),
array_values())
});
pub(crate) fn array(input: Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
delimited(
ARRAY_OPEN,
cut(array_values),
cut(ARRAY_CLOSE)
.context(Context::Expression("array"))
.context(Context::Expected(ParserValue::CharLiteral(']'))),
)
.parse(input)
}

// note: we're omitting ws and newlines here, because
// they should be part of the formatted values
Expand All @@ -27,42 +35,42 @@ const ARRAY_SEP: u8 = b',';
// note: this rule is modified
// array-values = [ ( array-value array-sep array-values ) /
// array-value / ws-comment-newline ]
parse!(array_values() -> Array, {
pub(crate) fn array_values(input: Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
(
optional(
recognize_with_value(
sep_end_by1(array_value(), byte(ARRAY_SEP))
).map(|(r, v): (&'a [u8], Array)| (v, r[r.len() - 1] == b','))
opt(
(separated_list1(ARRAY_SEP, array_value), opt(ARRAY_SEP)).map(
|(v, trailing): (Vec<Value>, Option<u8>)| {
(
Array::with_vec(v.into_iter().map(Item::Value).collect()),
trailing.is_some(),
)
},
),
),
ws_comment_newline(),
).and_then::<_, _, std::str::Utf8Error>(|(array, trailing)| {
let (mut array, comma) = array.unwrap_or_default();
array.set_trailing_comma(comma);
array.set_trailing(std::str::from_utf8(trailing)?);
Ok(array)
})
});
ws_comment_newline,
)
.map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| {
let (mut array, comma) = array.unwrap_or_default();
array.set_trailing_comma(comma);
array.set_trailing(std::str::from_utf8(trailing)?);
Ok(array)
})
.parse(input)
}

parse!(array_value() -> Value, {
attempt((
ws_comment_newline(),
value(),
ws_comment_newline(),
)).and_then::<_, _, std::str::Utf8Error>(|(ws1, v, ws2)| {
let v = v.decorated(
std::str::from_utf8(ws1)?,
std::str::from_utf8(ws2)?,
);
Ok(v)
})
});
pub(crate) fn array_value(input: Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> {
(ws_comment_newline, value, ws_comment_newline)
.map_res::<_, _, std::str::Utf8Error>(|(ws1, v, ws2)| {
let v = v.decorated(std::str::from_utf8(ws1)?, std::str::from_utf8(ws2)?);
Ok(v)
})
.parse(input)
}

#[cfg(test)]
mod test {
use super::*;

use combine::stream::position::Stream;

#[test]
fn arrays() {
let inputs = [
Expand Down Expand Up @@ -101,12 +109,13 @@ mod test {
r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#,
];
for input in inputs {
parsed_value_eq!(input);
let parsed = array.parse(input.as_bytes()).finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}

let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#];
for input in invalid_inputs {
let parsed = array().easy_parse(Stream::new(input.as_bytes()));
let parsed = array.parse(input.as_bytes()).finish();
assert!(parsed.is_err());
}
}
Expand Down
Loading

0 comments on commit 8b877c2

Please sign in to comment.