diff --git a/Cargo.lock b/Cargo.lock index 55b7bb9d55e..08fd3a98c01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,7 +17,7 @@ dependencies = [ "num-traits", "once_cell", "rand", - "regex", + "regress", "rustc-hash", "ryu-js", "serde", @@ -971,6 +971,15 @@ version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" +[[package]] +name = "regress" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d669f9db16c334d72d7f92d52874373eb0df8f642230401952a6901872fee4c5" +dependencies = [ + "memchr", +] + [[package]] name = "rust-argon2" version = "0.8.2" diff --git a/boa/Cargo.toml b/boa/Cargo.toml index 2d7247bf760..7c2beb3c12d 100644 --- a/boa/Cargo.toml +++ b/boa/Cargo.toml @@ -18,7 +18,7 @@ gc = { version = "0.3.6", features = ["derive"] } serde_json = "1.0.58" rand = "0.7.3" num-traits = "0.2.12" -regex = "1.3.9" +regress = "0.1.4" rustc-hash = "1.1.0" num-bigint = { version = "0.3.0", features = ["serde"] } num-integer = "0.1.43" diff --git a/boa/src/builtins/regexp/mod.rs b/boa/src/builtins/regexp/mod.rs index 327c0997aea..12f78fbc9f8 100644 --- a/boa/src/builtins/regexp/mod.rs +++ b/boa/src/builtins/regexp/mod.rs @@ -17,7 +17,7 @@ use crate::{ value::{RcString, Value}, BoaProfiler, Context, Result, }; -use regex::Regex; +use regress::{Flags, Regex}; #[cfg(test)] mod tests; @@ -123,7 +123,6 @@ impl RegExp { // parse flags let mut sorted_flags = String::new(); - let mut pattern = String::new(); let mut dot_all = false; let mut global = false; let mut ignore_case = false; @@ -137,34 +136,26 @@ impl RegExp { if regex_flags.contains('i') { ignore_case = true; sorted_flags.push('i'); - pattern.push('i'); } if regex_flags.contains('m') { multiline = true; sorted_flags.push('m'); - pattern.push('m'); } if regex_flags.contains('s') { dot_all = true; sorted_flags.push('s'); - pattern.push('s'); } if regex_flags.contains('u') { unicode = true; sorted_flags.push('u'); - //pattern.push('s'); // rust uses utf-8 anyway } if regex_flags.contains('y') { sticky = true; sorted_flags.push('y'); } - // the `regex` crate uses '(?{flags})` inside the pattern to enable flags - if !pattern.is_empty() { - pattern = format!("(?{})", pattern); - } - pattern.push_str(regex_body.as_str()); - let matcher = Regex::new(pattern.as_str()).expect("failed to create matcher"); + let matcher = Regex::newf(regex_body.as_str(), Flags::from(sorted_flags.as_str())) + .expect("failed to create matcher"); let regexp = RegExp { matcher, use_last_index: global || sticky, @@ -319,17 +310,18 @@ impl RegExp { let mut last_index = this.get_field("lastIndex").to_index(ctx)?; let result = if let Some(object) = this.as_object() { let regex = object.as_regexp().unwrap(); - let result = if let Some(m) = regex.matcher.find_at(arg_str.as_str(), last_index) { - if regex.use_last_index { - last_index = m.end(); - } - true - } else { - if regex.use_last_index { - last_index = 0; - } - false - }; + let result = + if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() { + if regex.use_last_index { + last_index = m.total().end; + } + true + } else { + if regex.use_last_index { + last_index = 0; + } + false + }; Ok(Value::boolean(result)) } else { panic!("object is not a regexp") @@ -358,35 +350,36 @@ impl RegExp { let mut last_index = this.get_field("lastIndex").to_index(ctx)?; let result = if let Some(object) = this.as_object() { let regex = object.as_regexp().unwrap(); - let mut locations = regex.matcher.capture_locations(); - let result = if let Some(m) = - regex - .matcher - .captures_read_at(&mut locations, arg_str.as_str(), last_index) - { - if regex.use_last_index { - last_index = m.end(); - } - let mut result = Vec::with_capacity(locations.len()); - for i in 0..locations.len() { - if let Some((start, end)) = locations.get(i) { - result.push(Value::from( - arg_str.get(start..end).expect("Could not get slice"), - )); - } else { - result.push(Value::undefined()); + let result = { + if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() { + if regex.use_last_index { + last_index = m.total().end; + } + let groups = m.captures.len() + 1; + let mut result = Vec::with_capacity(groups); + for i in 0..groups { + if let Some(range) = m.group(i) { + result.push(Value::from( + arg_str.get(range).expect("Could not get slice"), + )); + } else { + result.push(Value::undefined()); + } } - } - let result = Value::from(result); - result.set_property("index", Property::default().value(Value::from(m.start()))); - result.set_property("input", Property::default().value(Value::from(arg_str))); - result - } else { - if regex.use_last_index { - last_index = 0; + let result = Value::from(result); + result.set_property( + "index", + Property::default().value(Value::from(m.total().start)), + ); + result.set_property("input", Property::default().value(Value::from(arg_str))); + result + } else { + if regex.use_last_index { + last_index = 0; + } + Value::null() } - Value::null() }; Ok(result) } else { @@ -416,7 +409,7 @@ impl RegExp { if flags.contains('g') { let mut matches = Vec::new(); for mat in matcher.find_iter(&arg) { - matches.push(Value::from(mat.as_str())); + matches.push(Value::from(&arg[mat.total()])); } if matches.is_empty() { return Ok(Value::null()); @@ -464,29 +457,29 @@ impl RegExp { let regex = object.as_regexp().unwrap(); let mut matches = Vec::new(); - for m in regex.matcher.find_iter(&arg_str) { - if let Some(caps) = regex.matcher.captures(&m.as_str()) { - let match_vec = caps - .iter() - .map(|group| match group { - Some(g) => Value::from(g.as_str()), - None => Value::undefined(), - }) - .collect::>(); - - let match_val = Value::from(match_vec); - - match_val - .set_property("index", Property::default().value(Value::from(m.start()))); - match_val.set_property( - "input", - Property::default().value(Value::from(arg_str.clone())), - ); - matches.push(match_val); - - if !regex.flags.contains('g') { - break; - } + for mat in regex.matcher.find_iter(&arg_str) { + let match_vec: Vec = mat + .groups() + .map(|group| match group { + Some(range) => Value::from(&arg_str[range]), + None => Value::undefined(), + }) + .collect(); + + let match_val = Value::from(match_vec); + + match_val.set_property( + "index", + Property::default().value(Value::from(mat.total().start)), + ); + match_val.set_property( + "input", + Property::default().value(Value::from(arg_str.clone())), + ); + matches.push(match_val); + + if !regex.flags.contains('g') { + break; } } diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index a1a76c12b9b..ac65d5b7cf8 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -20,7 +20,7 @@ use crate::{ value::{RcString, Value}, BoaProfiler, Context, Result, }; -use regex::Regex; +use regress::Regex; use std::{ char::decode_utf16, cmp::{max, min}, @@ -554,8 +554,9 @@ impl String { None => return Ok(Value::from(primitive_val)), }; let caps = re - .captures(&primitive_val) - .expect("unable to get capture groups from text"); + .find(&primitive_val) + .expect("unable to get capture groups from text") + .captures; let replace_value = if args.len() > 1 { // replace_object could be a string or function or not exist at all @@ -583,17 +584,16 @@ impl String { } (Some('&'), _) => { // $& - let matched = caps.get(0).expect("cannot get matched value"); - result.push_str(matched.as_str()); + result.push_str(&primitive_val[mat.total()]); } (Some('`'), _) => { // $` - let start_of_match = mat.start(); + let start_of_match = mat.total().start; result.push_str(&primitive_val[..start_of_match]); } (Some('\''), _) => { // $' - let end_of_match = mat.end(); + let end_of_match = mat.total().end; result.push_str(&primitive_val[end_of_match..]); } (Some(second), Some(third)) @@ -610,9 +610,9 @@ impl String { result.push(ch); } } else { - let group = match caps.get(nn) { - Some(text) => text.as_str(), - None => "", + let group = match mat.group(nn) { + Some(range) => &primitive_val[range.clone()], + _ => "", }; result.push_str(group); chars.next(); // consume third @@ -625,9 +625,9 @@ impl String { result.push(first); result.push(second); } else { - let group = match caps.get(n) { - Some(text) => text.as_str(), - None => "", + let group = match mat.group(n) { + Some(range) => &primitive_val[range.clone()], + _ => "", }; result.push_str(group); } @@ -654,16 +654,16 @@ impl String { } Value::Object(_) => { // This will return the matched substring first, then captured parenthesized groups later - let mut results: Vec = caps - .iter() - .map(|capture| Value::from(capture.unwrap().as_str())) + let mut results: Vec = mat + .groups() + .map(|group| match group { + Some(range) => Value::from(&primitive_val[range]), + None => Value::undefined(), + }) .collect(); // Returns the starting byte offset of the match - let start = caps - .get(0) - .expect("Unable to get Byte offset from string for match") - .start(); + let start = mat.total().start; results.push(Value::from(start)); // Push the whole string being examined results.push(Value::from(primitive_val.to_string())); @@ -679,7 +679,7 @@ impl String { }; Ok(Value::from(primitive_val.replacen( - &mat.as_str(), + &primitive_val[mat.total()], &replace_value, 1, ))) diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index 68ef1439c25..af0eeb0d32f 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -10,7 +10,7 @@ fn length() { const a = new String(' '); const b = new String('\ud834\udf06'); const c = new String(' \b '); - cosnt d = new String('中文长度') + const d = new String('中文长度') "#; eprintln!("{}", forward(&mut engine, init)); let a = forward(&mut engine, "a.length"); @@ -275,11 +275,12 @@ fn replace_with_function() { let mut engine = Context::new(); let init = r#" var a = "ecmascript is cool"; - var p1, p2, p3; - var replacer = (match, cap1, cap2, cap3) => { + var p1, p2, p3, length; + var replacer = (match, cap1, cap2, cap3, len) => { p1 = cap1; p2 = cap2; p3 = cap3; + length = len; return "awesome!"; }; @@ -294,6 +295,7 @@ fn replace_with_function() { assert_eq!(forward(&mut engine, "p1"), "\"o\""); assert_eq!(forward(&mut engine, "p2"), "\"o\""); assert_eq!(forward(&mut engine, "p3"), "\"l\""); + assert_eq!(forward(&mut engine, "length"), "14"); } #[test]