Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(es/lexer): Reduce allocation while lexing identifiers #9076

Merged
merged 10 commits into from
Jun 18, 2024
Merged
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 76 additions & 37 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
//! ECMAScript lexer.

use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
use std::{cell::RefCell, char, iter::FusedIterator, mem::transmute, rc::Rc};

use either::Either::{Left, Right};
use smallvec::{smallvec, SmallVec};
use swc_atoms::{Atom, AtomStoreCell};
use swc_common::{comments::Comments, input::StringInput, BytePos, Span};
use swc_ecma_ast::{op, AssignOp, EsVersion};
use swc_ecma_ast::{op, AssignOp, EsVersion, Ident};

use self::{
comments_buffer::CommentsBuffer,
Expand Down Expand Up @@ -793,49 +793,46 @@ impl<'a> Lexer<'a> {
debug_assert!(self.cur().is_some());
let mut first = true;
let mut can_be_keyword = true;
let mut slice_start = self.cur_pos();
let mut has_escape = false;

self.with_buf(|l, buf| {
let mut has_escape = false;

while let Some(c) = {
// Optimization
{
let s = l.input.uncons_while(|c| {
if !c.is_ident_part() {
return false;
}

// Performance optimization
if c.is_ascii_uppercase() || c.is_ascii_digit() || !c.is_ascii() {
can_be_keyword = false;
}

true
});
if !s.is_empty() {
loop {
if let Some(c) = l.input.cur_as_ascii() {
if Ident::is_valid_continue(c as _) {
l.bump();
continue;
} else if first && Ident::is_valid_start(c as _) {
l.bump();
first = false;
continue;
}
buf.push_str(s)
}

l.cur()
} {
let start = l.cur_pos();

match c {
c if c.is_ident_part() => {
l.bump();
buf.push(c);
}
// unicode escape
'\\' => {
if c == b'\\' {
first = false;
can_be_keyword = false;
has_escape = true;
let start = l.cur_pos();
l.bump();

if !l.is(b'u') {
l.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)?
}

has_escape = true;
{
let end = l.input.cur_pos();
let s = unsafe {
// Safety: start and end are valid position because we got them from
// `self.input`
l.input.slice(slice_start, start)
};
buf.push_str(s);
unsafe {
// Safety: We got end from `self.input`
l.input.reset_to(end);
}
}

let chars = l.read_unicode_escape()?;

Expand All @@ -854,14 +851,56 @@ impl<'a> Lexer<'a> {
for c in chars {
buf.extend(c);
}

slice_start = l.cur_pos();
continue;
}
_ => {
break;

// ASCII but not a valid identifier

break;
}

if let Some(c) = l.input.cur() {
if Ident::is_valid_continue(c) {
l.bump();
continue;
} else if first && Ident::is_valid_start(c) {
l.bump();
first = false;
continue;
}
}
first = false;

break;
}
let value = convert(l, buf, has_escape, can_be_keyword);

let end = l.cur_pos();

let value = if !has_escape {
// Fast path: raw slice is enough if there's no escape.

let s = unsafe {
// Safety: slice_start and end are valid position because we got them from
// `self.input`
l.input.slice(slice_start, end)
};
let s = unsafe {
// Safety: We don't use 'static. We just bypass the lifetime check.
transmute::<&str, &'static str>(s)
};

convert(l, s, has_escape, can_be_keyword)
} else {
let s = unsafe {
// Safety: slice_start and end are valid position because we got them from
// `self.input`
l.input.slice(slice_start, end)
};
buf.push_str(s);

convert(l, buf, has_escape, can_be_keyword)
};

Ok((value, has_escape))
})
Expand Down
Loading