Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(parser): optimize conditional advance on ASCII values #4298

Merged
merged 1 commit into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions crates/oxc_parser/src/lexer/byte_handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ ascii_byte_handler!(LIN(lexer) {
// !
ascii_byte_handler!(EXL(lexer) {
lexer.consume_char();
if lexer.next_eq('=') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Neq2
} else {
Kind::Neq
Expand All @@ -237,7 +237,7 @@ ascii_byte_handler!(HAS(lexer) {
lexer.consume_char();
// HashbangComment ::
// `#!` SingleLineCommentChars?
if lexer.token.start == 0 && lexer.next_eq('!') {
if lexer.token.start == 0 && lexer.next_ascii_char_eq(b'!') {
lexer.read_hashbang_comment()
} else {
lexer.private_identifier()
Expand All @@ -252,7 +252,7 @@ ascii_identifier_handler!(IDT(_id_without_first_char) {
// %
ascii_byte_handler!(PRC(lexer) {
lexer.consume_char();
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::PercentEq
} else {
Kind::Percent
Expand All @@ -262,13 +262,13 @@ ascii_byte_handler!(PRC(lexer) {
// &
ascii_byte_handler!(AMP(lexer) {
lexer.consume_char();
if lexer.next_eq('&') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'&') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Amp2Eq
} else {
Kind::Amp2
}
} else if lexer.next_eq('=') {
} else if lexer.next_ascii_char_eq(b'=') {
Kind::AmpEq
} else {
Kind::Amp
Expand All @@ -290,13 +290,13 @@ ascii_byte_handler!(PNC(lexer) {
// *
ascii_byte_handler!(ATR(lexer) {
lexer.consume_char();
if lexer.next_eq('*') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'*') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Star2Eq
} else {
Kind::Star2
}
} else if lexer.next_eq('=') {
} else if lexer.next_ascii_char_eq(b'=') {
Kind::StarEq
} else {
Kind::Star
Expand All @@ -306,9 +306,9 @@ ascii_byte_handler!(ATR(lexer) {
// +
ascii_byte_handler!(PLS(lexer) {
lexer.consume_char();
if lexer.next_eq('+') {
if lexer.next_ascii_char_eq(b'+') {
Kind::Plus2
} else if lexer.next_eq('=') {
} else if lexer.next_ascii_char_eq(b'=') {
Kind::PlusEq
} else {
Kind::Plus
Expand Down Expand Up @@ -347,7 +347,7 @@ ascii_byte_handler!(SLH(lexer) {
}
_ => {
// regex is handled separately, see `next_regex`
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::SlashEq
} else {
Kind::Slash
Expand Down Expand Up @@ -389,13 +389,13 @@ ascii_byte_handler!(LSS(lexer) {
// =
ascii_byte_handler!(EQL(lexer) {
lexer.consume_char();
if lexer.next_eq('=') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Eq3
} else {
Kind::Eq2
}
} else if lexer.next_eq('>') {
} else if lexer.next_ascii_char_eq(b'>') {
Kind::Arrow
} else {
Kind::Eq
Expand All @@ -412,8 +412,8 @@ ascii_byte_handler!(GTR(lexer) {
// ?
ascii_byte_handler!(QST(lexer) {
lexer.consume_char();
if lexer.next_eq('?') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'?') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Question2Eq
} else {
Kind::Question2
Expand Down Expand Up @@ -457,7 +457,7 @@ ascii_byte_handler!(BTC(lexer) {
// ^
ascii_byte_handler!(CRT(lexer) {
lexer.consume_char();
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::CaretEq
} else {
Kind::Caret
Expand All @@ -479,13 +479,13 @@ ascii_byte_handler!(BEO(lexer) {
// |
ascii_byte_handler!(PIP(lexer) {
lexer.consume_char();
if lexer.next_eq('|') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'|') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Pipe2Eq
} else {
Kind::Pipe2
}
} else if lexer.next_eq('=') {
} else if lexer.next_ascii_char_eq(b'=') {
Kind::PipeEq
} else {
Kind::Pipe
Expand Down
18 changes: 10 additions & 8 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,16 @@ impl<'a> Lexer<'a> {
self.source.peek_char2()
}

/// Peek the next character, and advance the current position if it matches
#[inline]
fn next_eq(&mut self, c: char) -> bool {
let matched = self.peek() == Some(c);
if matched {
self.source.next_char().unwrap();
}
matched
/// Peek the next byte, and advance the current position if it matches
/// the given ASCII char.
#[allow(clippy::inline_always)]
#[inline(always)]
fn next_ascii_char_eq(&mut self, b: u8) -> bool {
// TODO: can be replaced by `std::ascii:Char` once stabilized.
// https://github.com/rust-lang/rust/issues/110998
assert!(b.is_ascii());
// SAFETY: `b` is a valid ASCII char.
unsafe { self.source.advance_if_ascii_eq(b) }
}

fn current_offset(&self) -> Span {
Expand Down
4 changes: 2 additions & 2 deletions crates/oxc_parser/src/lexer/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ impl<'a> Lexer<'a> {

pub(super) fn decimal_literal_after_first_digit(&mut self) -> Kind {
self.read_decimal_digits_after_first_digit();
if self.next_eq('.') {
if self.next_ascii_char_eq(b'.') {
return self.decimal_literal_after_decimal_point_after_digits();
} else if self.next_eq('n') {
} else if self.next_ascii_char_eq(b'n') {
return self.check_after_numeric_literal(Kind::Decimal);
}

Expand Down
25 changes: 14 additions & 11 deletions crates/oxc_parser/src/lexer/punctuation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ impl<'a> Lexer<'a> {

/// returns None for `SingleLineHTMLOpenComment` `<!--` in script mode
pub(super) fn read_left_angle(&mut self) -> Option<Kind> {
if self.next_eq('<') {
if self.next_eq('=') {
if self.next_ascii_char_eq(b'<') {
if self.next_ascii_char_eq(b'=') {
Some(Kind::ShiftLeftEq)
} else {
Some(Kind::ShiftLeft)
}
} else if self.next_eq('=') {
} else if self.next_ascii_char_eq(b'=') {
Some(Kind::LtEq)
} else if self.peek() == Some('!')
// SingleLineHTMLOpenComment `<!--` in script mode
Expand All @@ -38,14 +38,17 @@ impl<'a> Lexer<'a> {

/// returns None for `SingleLineHTMLCloseComment` `-->` in script mode
pub(super) fn read_minus(&mut self) -> Option<Kind> {
if self.next_eq('-') {
if self.next_ascii_char_eq(b'-') {
// SingleLineHTMLCloseComment `-->` in script mode
if self.token.is_on_new_line && self.source_type.is_script() && self.next_eq('>') {
if self.token.is_on_new_line
&& self.source_type.is_script()
&& self.next_ascii_char_eq(b'>')
{
None
} else {
Some(Kind::Minus2)
}
} else if self.next_eq('=') {
} else if self.next_ascii_char_eq(b'=') {
Some(Kind::MinusEq)
} else {
Some(Kind::Minus)
Expand All @@ -59,19 +62,19 @@ impl<'a> Lexer<'a> {
}

fn read_right_angle(&mut self) -> Kind {
if self.next_eq('>') {
if self.next_eq('>') {
if self.next_eq('=') {
if self.next_ascii_char_eq(b'>') {
if self.next_ascii_char_eq(b'>') {
if self.next_ascii_char_eq(b'=') {
Kind::ShiftRight3Eq
} else {
Kind::ShiftRight3
}
} else if self.next_eq('=') {
} else if self.next_ascii_char_eq(b'=') {
Kind::ShiftRightEq
} else {
Kind::ShiftRight
}
} else if self.next_eq('=') {
} else if self.next_ascii_char_eq(b'=') {
Kind::GtEq
} else {
Kind::RAngle
Expand Down
18 changes: 18 additions & 0 deletions crates/oxc_parser/src/lexer/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,24 @@ impl<'a> Source<'a> {
self.ptr = self.end;
}

/// Advance `Source`'s cursor by one byte if it is equal to the given ASCII value.
///
/// # SAFETY
///
/// Caller must ensure that `ascii_byte` is a valid ASCII character.
#[allow(clippy::inline_always)]
#[inline(always)]
pub(super) unsafe fn advance_if_ascii_eq(&mut self, ascii_byte: u8) -> bool {
debug_assert!(ascii_byte.is_ascii());
let matched = self.peek_byte() == Some(ascii_byte);
if matched {
// SAFETY: next byte exists and is a valid ASCII char (and thus UTF-8
// char boundary).
self.ptr = unsafe { self.ptr.add(1) };
}
matched
}

/// Get string slice from a `SourcePosition` up to the current position of `Source`.
pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition) -> &'a str {
assert!(pos.ptr <= self.ptr);
Expand Down
6 changes: 3 additions & 3 deletions crates/oxc_parser/src/lexer/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,11 @@ impl<'a> Lexer<'a> {
}

fn unicode_code_point(&mut self) -> Option<SurrogatePair> {
if !self.next_eq('{') {
if !self.next_ascii_char_eq(b'{') {
return None;
}
let value = self.code_point()?;
if !self.next_eq('}') {
if !self.next_ascii_char_eq(b'}') {
return None;
}
Some(SurrogatePair::CodePoint(value))
Expand Down Expand Up @@ -232,7 +232,7 @@ impl<'a> Lexer<'a> {
// <CR> <LF>
LF | LS | PS => {}
CR => {
self.next_eq(LF);
self.next_ascii_char_eq(b'\n');
}
// SingleEscapeCharacter :: one of
// ' " \ b f n r t v
Expand Down
Loading