Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(parse): Reduce overheasd from "trivia" #764

Merged
merged 5 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 9 additions & 12 deletions crates/benchmarks/examples/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ impl Args {
let mut parser = Parser::Document;

let mut args = lexopt::Parser::from_env();
let mut data = toml_benchmarks::MANIFESTS
.iter()
.find(|d| d.name() == "1-medium")
.unwrap();
let mut data_name = "1-medium".to_owned();
while let Some(arg) = args.next()? {
match arg {
Long("parser") => {
Expand All @@ -59,18 +56,18 @@ impl Args {
};
}
Long("manifest") => {
let name = args.value()?;
data = toml_benchmarks::MANIFESTS
.iter()
.find(|d| d.name() == name)
.ok_or_else(|| lexopt::Error::UnexpectedValue {
option: "manifest".to_owned(),
value: name.clone(),
})?;
data_name = args.value()?.string()?;
}
_ => return Err(arg.unexpected()),
}
}
let data = toml_benchmarks::MANIFESTS
.iter()
.find(|d| d.name() == data_name)
.ok_or_else(|| lexopt::Error::UnexpectedValue {
option: "manifest".to_owned(),
value: data_name.into(),
})?;

Ok(Self {
parser,
Expand Down
2 changes: 1 addition & 1 deletion crates/toml_edit/src/parser/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ fn ml_literal_body<'i>(input: &mut Input<'i>) -> PResult<&'i str> {

// mll-content = mll-char / newline
fn mll_content(input: &mut Input<'_>) -> PResult<u8> {
alt((one_of(MLL_CHAR), newline)).parse_next(input)
alt((one_of(MLL_CHAR), newline.value(b'\n'))).parse_next(input)
}

// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
Expand Down
63 changes: 29 additions & 34 deletions crates/toml_edit/src/parser/trivia.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
use std::ops::RangeInclusive;

use winnow::combinator::alt;
use winnow::combinator::empty;
use winnow::combinator::eof;
use winnow::combinator::fail;
use winnow::combinator::opt;
use winnow::combinator::peek;
use winnow::combinator::repeat;
use winnow::combinator::terminated;
use winnow::prelude::*;
use winnow::token::any;
use winnow::token::one_of;
use winnow::token::take_while;

Expand Down Expand Up @@ -50,69 +54,60 @@ pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';

// comment = comment-start-symbol *non-eol
pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
pub(crate) fn comment(input: &mut Input<'_>) -> PResult<()> {
(COMMENT_START_SYMBOL, take_while(0.., NON_EOL))
.recognize()
.void()
.parse_next(input)
}

// newline = ( %x0A / ; LF
// %x0D.0A ) ; CRLF
pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> {
alt((
one_of(LF).value(b'\n'),
(one_of(CR), one_of(LF)).value(b'\n'),
))
pub(crate) fn newline(input: &mut Input<'_>) -> PResult<()> {
dispatch! {any;
b'\n' => empty,
b'\r' => one_of(LF).void(),
_ => fail,
}
.parse_next(input)
}
pub(crate) const LF: u8 = b'\n';
pub(crate) const CR: u8 = b'\r';

// ws-newline = *( wschar / newline )
pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
pub(crate) fn ws_newline(input: &mut Input<'_>) -> PResult<()> {
repeat(
0..,
alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))),
)
.map(|()| ())
.recognize()
.map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") })
.parse_next(input)
}

// ws-newlines = newline *( wschar / newline )
pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
(newline, ws_newline)
.recognize()
.map(|b| unsafe {
from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
})
.parse_next(input)
pub(crate) fn ws_newlines(input: &mut Input<'_>) -> PResult<()> {
(newline, ws_newline).void().parse_next(input)
}

// note: this rule is not present in the original grammar
// ws-comment-newline = *( ws-newline-nonempty / comment )
pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
repeat(
0..,
alt((
repeat(
1..,
alt((take_while(1.., WSCHAR), newline.value(&b"\n"[..]))),
)
.map(|()| ()),
comment.void(),
)),
)
.map(|()| ())
.recognize()
pub(crate) fn ws_comment_newline(input: &mut Input<'_>) -> PResult<()> {
let _ = ws.parse_next(input)?;

dispatch! {opt(peek(any));
Some(b'#') => (comment, newline, ws_comment_newline).void(),
Some(b'\n') => (newline, ws_comment_newline).void(),
Some(b'\r') => (newline, ws_comment_newline).void(),
_ => empty,
}
.parse_next(input)
}

// note: this rule is not present in the original grammar
// line-ending = newline / eof
pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
alt((newline.value("\n"), eof.value(""))).parse_next(input)
pub(crate) fn line_ending(input: &mut Input<'_>) -> PResult<()> {
alt((newline.value("\n"), eof.value("")))
.void()
.parse_next(input)
}

// note: this rule is not present in the original grammar
Expand Down Expand Up @@ -151,7 +146,7 @@ mod test {
];
for input in inputs {
dbg!(input);
let parsed = ws_comment_newline.parse(new_input(input));
let parsed = ws_comment_newline.recognize().parse(new_input(input));
assert!(parsed.is_ok(), "{:?}", parsed);
let parsed = parsed.unwrap();
assert_eq!(parsed, input.as_bytes());
Expand Down
7 changes: 3 additions & 4 deletions crates/toml_edit/tests/testsuite/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,13 +309,12 @@ TOML parse error at line 1, column 1
bad!(
"a = [ \r ]",
str![[r#"
TOML parse error at line 1, column 7
TOML parse error at line 1, column 8
|
1 | a = [
]
| ^
invalid array
expected `]`
| ^


"#]]
);
Expand Down
Loading