Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Regex match check #3779

Merged
merged 5 commits into from
Apr 2, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 40 additions & 47 deletions core/engine/src/builtins/regexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -943,72 +943,65 @@ impl RegExp {
// 11. If fullUnicode is true, let input be StringToCodePoints(S). Otherwise, let input be a List whose elements are the code units that are the elements of S.
// 12. NOTE: Each element of input is considered to be a character.

// TODO: Comment spec deviation
// TODO: It would be better to put this in an enum.
// enum Matches { Utf16(..), Ucs2(..) }
HalidOdat marked this conversation as resolved.
Show resolved Hide resolved
let mut utf16_matches = matcher.find_from_utf16(input, last_index as usize);
let mut ucs2_matches = matcher.find_from_ucs2(input, last_index as usize);

// 10. Let matchSucceeded be false.
// 13. Repeat, while matchSucceeded is false,
let match_value = loop {
// a. If lastIndex > length, then
if last_index > length {
// a. If lastIndex > length, then
if last_index > length {
// i. If global is true or sticky is true, then
if global || sticky {
// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
this.set(utf16!("lastIndex"), 0, true, context)?;
}

// ii. Return null.
return Ok(None);
}

// b. Let inputIndex be the index into input of the character that was obtained from element lastIndex of S.
// c. Let r be matcher(input, inputIndex).
let r: Option<regress::Match> = if full_unicode {
utf16_matches.next()
} else {
ucs2_matches.next()
};

let match_value = match r {
// d. If r is failure, then
None => {
// i. If global is true or sticky is true, then
if global || sticky {
// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
this.set(utf16!("lastIndex"), 0, true, context)?;
}

// ii. Return null.
return Ok(None);
}
// i. Assert: r is a State.
Some(m) => {
HalidOdat marked this conversation as resolved.
Show resolved Hide resolved
if sticky && m.start() != last_index as usize {
// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
this.set(utf16!("lastIndex"), 0, true, context)?;

// b. Let inputIndex be the index into input of the character that was obtained from element lastIndex of S.
// c. Let r be matcher(input, inputIndex).
let r: Option<regress::Match> = if full_unicode {
matcher.find_from_utf16(input, last_index as usize).next()
} else {
matcher.find_from_ucs2(input, last_index as usize).next()
};

match r {
// d. If r is failure, then
None => {
// i. If sticky is true, then
if sticky {
// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
this.set(utf16!("lastIndex"), 0, true, context)?;

// 2. Return null.
return Ok(None);
}

// ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
last_index = advance_string_index(input, last_index, full_unicode);
// 2. Return null.
return Ok(None);
}

Some(m) => {
// d. If r is failure, then
#[allow(clippy::if_not_else)]
if m.start() as u64 != last_index {
// i. If sticky is true, then
if sticky {
// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
this.set(utf16!("lastIndex"), 0, true, context)?;

// 2. Return null.
return Ok(None);
}
// FIXME: Fix unicode regex

// ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
last_index = advance_string_index(input, last_index, full_unicode);
// e. Else,
} else {
// i. Assert: r is a State.
// ii. Set matchSucceeded to true.
break m;
}
}
// ii. Set matchSucceeded to true.
m
}
};

// 14. Let e be r's endIndex value.
let e = match_value.end();
last_index = match_value.start() as u64;

// Note: This is already taken care of be regress.
// 15. If fullUnicode is true, set e to GetStringIndex(S, e).
Expand Down
Loading