Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement regress #774

Merged
merged 13 commits into from
Oct 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion boa/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ gc = { version = "0.3.6", features = ["derive"] }
serde_json = "1.0.58"
rand = "0.7.3"
num-traits = "0.2.12"
regex = "1.3.9"
regress = "0.1.4"
rustc-hash = "1.1.0"
num-bigint = { version = "0.3.0", features = ["serde"] }
num-integer = "0.1.43"
Expand Down
139 changes: 66 additions & 73 deletions boa/src/builtins/regexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::{
value::{RcString, Value},
BoaProfiler, Context, Result,
};
use regex::Regex;
use regress::{Flags, Regex};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -123,7 +123,6 @@ impl RegExp {

// parse flags
let mut sorted_flags = String::new();
let mut pattern = String::new();
let mut dot_all = false;
let mut global = false;
let mut ignore_case = false;
Expand All @@ -137,34 +136,26 @@ impl RegExp {
if regex_flags.contains('i') {
ignore_case = true;
sorted_flags.push('i');
pattern.push('i');
}
if regex_flags.contains('m') {
multiline = true;
sorted_flags.push('m');
pattern.push('m');
}
if regex_flags.contains('s') {
dot_all = true;
sorted_flags.push('s');
pattern.push('s');
}
if regex_flags.contains('u') {
unicode = true;
sorted_flags.push('u');
//pattern.push('s'); // rust uses utf-8 anyway
}
if regex_flags.contains('y') {
sticky = true;
sorted_flags.push('y');
}
// the `regex` crate uses '(?{flags})` inside the pattern to enable flags
if !pattern.is_empty() {
pattern = format!("(?{})", pattern);
}
pattern.push_str(regex_body.as_str());

let matcher = Regex::new(pattern.as_str()).expect("failed to create matcher");
let matcher = Regex::newf(regex_body.as_str(), Flags::from(sorted_flags.as_str()))
.expect("failed to create matcher");
Razican marked this conversation as resolved.
Show resolved Hide resolved
let regexp = RegExp {
matcher,
use_last_index: global || sticky,
Expand Down Expand Up @@ -319,17 +310,18 @@ impl RegExp {
let mut last_index = this.get_field("lastIndex").to_index(ctx)?;
let result = if let Some(object) = this.as_object() {
let regex = object.as_regexp().unwrap();
let result = if let Some(m) = regex.matcher.find_at(arg_str.as_str(), last_index) {
if regex.use_last_index {
last_index = m.end();
}
true
} else {
if regex.use_last_index {
last_index = 0;
}
false
};
let result =
if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() {
if regex.use_last_index {
last_index = m.total().end;
}
true
} else {
if regex.use_last_index {
last_index = 0;
}
false
};
Ok(Value::boolean(result))
} else {
panic!("object is not a regexp")
Expand Down Expand Up @@ -358,35 +350,36 @@ impl RegExp {
let mut last_index = this.get_field("lastIndex").to_index(ctx)?;
let result = if let Some(object) = this.as_object() {
let regex = object.as_regexp().unwrap();
let mut locations = regex.matcher.capture_locations();
let result = if let Some(m) =
regex
.matcher
.captures_read_at(&mut locations, arg_str.as_str(), last_index)
{
if regex.use_last_index {
last_index = m.end();
}
let mut result = Vec::with_capacity(locations.len());
for i in 0..locations.len() {
if let Some((start, end)) = locations.get(i) {
result.push(Value::from(
arg_str.get(start..end).expect("Could not get slice"),
));
} else {
result.push(Value::undefined());
let result = {
if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() {
if regex.use_last_index {
last_index = m.total().end;
}
let groups = m.captures.len() + 1;
let mut result = Vec::with_capacity(groups);
for i in 0..groups {
if let Some(range) = m.group(i) {
result.push(Value::from(
arg_str.get(range).expect("Could not get slice"),
));
} else {
result.push(Value::undefined());
}
}
}

let result = Value::from(result);
result.set_property("index", Property::default().value(Value::from(m.start())));
result.set_property("input", Property::default().value(Value::from(arg_str)));
result
} else {
if regex.use_last_index {
last_index = 0;
let result = Value::from(result);
result.set_property(
"index",
Property::default().value(Value::from(m.total().start)),
);
result.set_property("input", Property::default().value(Value::from(arg_str)));
result
} else {
if regex.use_last_index {
last_index = 0;
}
Value::null()
}
Value::null()
};
Ok(result)
} else {
Expand Down Expand Up @@ -416,7 +409,7 @@ impl RegExp {
if flags.contains('g') {
let mut matches = Vec::new();
for mat in matcher.find_iter(&arg) {
matches.push(Value::from(mat.as_str()));
matches.push(Value::from(&arg[mat.total()]));
}
if matches.is_empty() {
return Ok(Value::null());
Expand Down Expand Up @@ -464,29 +457,29 @@ impl RegExp {
let regex = object.as_regexp().unwrap();
let mut matches = Vec::new();

for m in regex.matcher.find_iter(&arg_str) {
if let Some(caps) = regex.matcher.captures(&m.as_str()) {
let match_vec = caps
.iter()
.map(|group| match group {
Some(g) => Value::from(g.as_str()),
None => Value::undefined(),
})
.collect::<Vec<Value>>();

let match_val = Value::from(match_vec);

match_val
.set_property("index", Property::default().value(Value::from(m.start())));
match_val.set_property(
"input",
Property::default().value(Value::from(arg_str.clone())),
);
matches.push(match_val);

if !regex.flags.contains('g') {
break;
}
for mat in regex.matcher.find_iter(&arg_str) {
let match_vec: Vec<Value> = mat
.groups()
.map(|group| match group {
Some(range) => Value::from(&arg_str[range]),
None => Value::undefined(),
})
.collect();

let match_val = Value::from(match_vec);

match_val.set_property(
"index",
Property::default().value(Value::from(mat.total().start)),
);
match_val.set_property(
"input",
Property::default().value(Value::from(arg_str.clone())),
);
matches.push(match_val);

if !regex.flags.contains('g') {
break;
}
}

Expand Down
42 changes: 21 additions & 21 deletions boa/src/builtins/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{
value::{RcString, Value},
BoaProfiler, Context, Result,
};
use regex::Regex;
use regress::Regex;
use std::{
char::decode_utf16,
cmp::{max, min},
Expand Down Expand Up @@ -554,8 +554,9 @@ impl String {
None => return Ok(Value::from(primitive_val)),
};
let caps = re
.captures(&primitive_val)
.expect("unable to get capture groups from text");
.find(&primitive_val)
.expect("unable to get capture groups from text")
.captures;

let replace_value = if args.len() > 1 {
// replace_object could be a string or function or not exist at all
Expand Down Expand Up @@ -583,17 +584,16 @@ impl String {
}
(Some('&'), _) => {
// $&
let matched = caps.get(0).expect("cannot get matched value");
result.push_str(matched.as_str());
result.push_str(&primitive_val[mat.total()]);
}
(Some('`'), _) => {
// $`
let start_of_match = mat.start();
let start_of_match = mat.total().start;
result.push_str(&primitive_val[..start_of_match]);
}
(Some('\''), _) => {
// $'
let end_of_match = mat.end();
let end_of_match = mat.total().end;
result.push_str(&primitive_val[end_of_match..]);
}
(Some(second), Some(third))
Expand All @@ -610,9 +610,9 @@ impl String {
result.push(ch);
}
} else {
let group = match caps.get(nn) {
Some(text) => text.as_str(),
None => "",
let group = match mat.group(nn) {
Some(range) => &primitive_val[range.clone()],
_ => "",
};
result.push_str(group);
chars.next(); // consume third
Expand All @@ -625,9 +625,9 @@ impl String {
result.push(first);
result.push(second);
} else {
let group = match caps.get(n) {
Some(text) => text.as_str(),
None => "",
let group = match mat.group(n) {
Some(range) => &primitive_val[range.clone()],
_ => "",
};
result.push_str(group);
}
Expand All @@ -654,16 +654,16 @@ impl String {
}
Value::Object(_) => {
// This will return the matched substring first, then captured parenthesized groups later
let mut results: Vec<Value> = caps
.iter()
.map(|capture| Value::from(capture.unwrap().as_str()))
let mut results: Vec<Value> = mat
.groups()
.map(|group| match group {
Some(range) => Value::from(&primitive_val[range]),
None => Value::undefined(),
})
.collect();

// Returns the starting byte offset of the match
let start = caps
.get(0)
.expect("Unable to get Byte offset from string for match")
.start();
let start = mat.total().start;
results.push(Value::from(start));
// Push the whole string being examined
results.push(Value::from(primitive_val.to_string()));
Expand All @@ -679,7 +679,7 @@ impl String {
};

Ok(Value::from(primitive_val.replacen(
&mat.as_str(),
&primitive_val[mat.total()],
&replace_value,
1,
)))
Expand Down
8 changes: 5 additions & 3 deletions boa/src/builtins/string/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fn length() {
const a = new String(' ');
const b = new String('\ud834\udf06');
const c = new String(' \b ');
cosnt d = new String('中文长度')
const d = new String('中文长度')
"#;
eprintln!("{}", forward(&mut engine, init));
let a = forward(&mut engine, "a.length");
Expand Down Expand Up @@ -275,11 +275,12 @@ fn replace_with_function() {
let mut engine = Context::new();
let init = r#"
var a = "ecmascript is cool";
var p1, p2, p3;
var replacer = (match, cap1, cap2, cap3) => {
var p1, p2, p3, length;
var replacer = (match, cap1, cap2, cap3, len) => {
p1 = cap1;
p2 = cap2;
p3 = cap3;
length = len;
return "awesome!";
};

Expand All @@ -294,6 +295,7 @@ fn replace_with_function() {
assert_eq!(forward(&mut engine, "p1"), "\"o\"");
assert_eq!(forward(&mut engine, "p2"), "\"o\"");
assert_eq!(forward(&mut engine, "p3"), "\"l\"");
assert_eq!(forward(&mut engine, "length"), "14");
}

#[test]
Expand Down