Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Rework RegExp struct to include bitflags field #1837

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 28 additions & 83 deletions boa/src/builtins/regexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

pub mod regexp_string_iterator;

use std::str::FromStr;

use crate::{
builtins::{array::Array, string, BuiltIn},
context::StandardObjects,
Expand All @@ -21,6 +23,7 @@ use crate::{
},
property::Attribute,
symbol::WellKnownSymbols,
syntax::lexer::regex::RegExpFlags,
value::{IntegerOrInfinity, JsValue},
BoaProfiler, Context, JsResult, JsString,
};
Expand All @@ -37,25 +40,7 @@ mod tests;
pub struct RegExp {
/// Regex matcher.
matcher: Regex,

/// Flag 's' - dot matches newline characters.
dot_all: bool,

/// Flag 'g'
global: bool,

/// Flag 'i' - ignore case.
ignore_case: bool,

/// Flag 'm' - '^' and '$' match beginning/end of line.
multiline: bool,

/// Flag 'y'
sticky: bool,

/// Flag 'u' - Unicode.
unicode: bool,

flags: RegExpFlags,
original_source: JsString,
original_flags: JsString,
}
Expand Down Expand Up @@ -283,45 +268,10 @@ impl RegExp {

// 5. If F contains any code unit other than "g", "i", "m", "s", "u", or "y"
// or if it contains the same code unit more than once, throw a SyntaxError exception.
let mut global = false;
let mut ignore_case = false;
let mut multiline = false;
let mut dot_all = false;
let mut unicode = false;
let mut sticky = false;
for c in f.chars() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would implement FromStr for RegexFlags, and use the version of parse_regex_flags() in the lexer, which iterates through bytes instead of characters, and should therefore be more efficient. Would also reduce duplicate code.

match c {
'g' if global => {
return context.throw_syntax_error("RegExp flags contains multiple 'g'")
}
'g' => global = true,
'i' if ignore_case => {
return context.throw_syntax_error("RegExp flags contains multiple 'i'")
}
'i' => ignore_case = true,
'm' if multiline => {
return context.throw_syntax_error("RegExp flags contains multiple 'm'")
}
'm' => multiline = true,
's' if dot_all => {
return context.throw_syntax_error("RegExp flags contains multiple 's'")
}
's' => dot_all = true,
'u' if unicode => {
return context.throw_syntax_error("RegExp flags contains multiple 'u'")
}
'u' => unicode = true,
'y' if sticky => {
return context.throw_syntax_error("RegExp flags contains multiple 'y'")
}
'y' => sticky = true,
c => {
return context.throw_syntax_error(format!(
"RegExp flags contains unknown code unit '{c}'",
))
}
}
}
let flags = match RegExpFlags::from_str(&f) {
Err(msg) => return context.throw_syntax_error(msg),
Ok(result) => result,
};

// 12. Set obj.[[OriginalSource]] to P.
// 13. Set obj.[[OriginalFlags]] to F.
Expand All @@ -336,12 +286,7 @@ impl RegExp {

let regexp = Self {
matcher,
dot_all,
global,
ignore_case,
multiline,
sticky,
unicode,
flags,
original_source: p,
original_flags: f,
};
Expand Down Expand Up @@ -387,16 +332,16 @@ impl RegExp {
}

#[inline]
fn regexp_has_flag(this: &JsValue, flag: char, context: &mut Context) -> JsResult<JsValue> {
fn regexp_has_flag(this: &JsValue, flag: u8, context: &mut Context) -> JsResult<JsValue> {
if let Some(object) = this.as_object() {
if let Some(regexp) = object.borrow().as_regexp() {
return Ok(JsValue::new(match flag {
'g' => regexp.global,
'm' => regexp.multiline,
's' => regexp.dot_all,
'i' => regexp.ignore_case,
'u' => regexp.unicode,
'y' => regexp.sticky,
b'g' => regexp.flags.contains(RegExpFlags::GLOBAL),
b'm' => regexp.flags.contains(RegExpFlags::MULTILINE),
b's' => regexp.flags.contains(RegExpFlags::DOT_ALL),
b'i' => regexp.flags.contains(RegExpFlags::IGNORE_CASE),
b'u' => regexp.flags.contains(RegExpFlags::UNICODE),
b'y' => regexp.flags.contains(RegExpFlags::STICKY),
_ => unreachable!(),
}));
}
Expand All @@ -410,12 +355,12 @@ impl RegExp {
}

let name = match flag {
'g' => "global",
'm' => "multiline",
's' => "dotAll",
'i' => "ignoreCase",
'u' => "unicode",
'y' => "sticky",
b'g' => "global",
b'm' => "multiline",
b's' => "dotAll",
b'i' => "ignoreCase",
b'u' => "unicode",
b'y' => "sticky",
_ => unreachable!(),
};

Expand All @@ -439,7 +384,7 @@ impl RegExp {
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'g', context)
Self::regexp_has_flag(this, b'g', context)
}

/// `get RegExp.prototype.ignoreCase`
Expand All @@ -457,7 +402,7 @@ impl RegExp {
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'i', context)
Self::regexp_has_flag(this, b'i', context)
}

/// `get RegExp.prototype.multiline`
Expand All @@ -475,7 +420,7 @@ impl RegExp {
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'm', context)
Self::regexp_has_flag(this, b'm', context)
}

/// `get RegExp.prototype.dotAll`
Expand All @@ -493,7 +438,7 @@ impl RegExp {
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 's', context)
Self::regexp_has_flag(this, b's', context)
}

/// `get RegExp.prototype.unicode`
Expand All @@ -512,7 +457,7 @@ impl RegExp {
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'u', context)
Self::regexp_has_flag(this, b'u', context)
}

/// `get RegExp.prototype.sticky`
Expand All @@ -531,7 +476,7 @@ impl RegExp {
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'y', context)
Self::regexp_has_flag(this, b'y', context)
}

/// `get RegExp.prototype.flags`
Expand Down
2 changes: 1 addition & 1 deletion boa/src/syntax/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub mod error;
mod identifier;
mod number;
mod operator;
mod regex;
pub mod regex;
mod spread;
mod string;
mod template;
Expand Down
59 changes: 33 additions & 26 deletions boa/src/syntax/lexer/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use bitflags::bitflags;
use boa_interner::{Interner, Sym};
use std::{
io::{self, ErrorKind, Read},
str,
str::{self, FromStr},
};

/// Regex literal lexing.
Expand Down Expand Up @@ -133,7 +133,7 @@ impl<R> Tokenizer<R> for RegexLiteral {
bitflags! {
/// Flags of a regular expression.
#[derive(Default)]
struct RegExpFlags: u8 {
pub struct RegExpFlags: u8 {
const GLOBAL = 0b0000_0001;
const IGNORE_CASE = 0b0000_0010;
const MULTILINE = 0b0000_0100;
Expand All @@ -143,33 +143,40 @@ bitflags! {
}
}

fn parse_regex_flags(s: &str, start: Position, interner: &mut Interner) -> Result<Sym, Error> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would move this function together with the structure, and change it so that this implements the FromStr trait for the structure.

let mut flags = RegExpFlags::default();
for c in s.bytes() {
let new_flag = match c {
b'g' => RegExpFlags::GLOBAL,
b'i' => RegExpFlags::IGNORE_CASE,
b'm' => RegExpFlags::MULTILINE,
b's' => RegExpFlags::DOT_ALL,
b'u' => RegExpFlags::UNICODE,
b'y' => RegExpFlags::STICKY,
_ => {
return Err(Error::syntax(
format!("invalid regular expression flag {}", char::from(c)),
start,
))
}
};
impl FromStr for RegExpFlags {
type Err = String;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut flags = Self::default();
for c in s.bytes() {
let new_flag = match c {
b'g' => Self::GLOBAL,
b'i' => Self::IGNORE_CASE,
b'm' => Self::MULTILINE,
b's' => Self::DOT_ALL,
b'u' => Self::UNICODE,
b'y' => Self::STICKY,
_ => return Err(format!("invalid regular expression flag {}", char::from(c))),
};

if flags.contains(new_flag) {
return Err(Error::syntax(
format!("repeated regular expression flag {}", char::from(c)),
start,
));
if flags.contains(new_flag) {
return Err(format!(
"repeated regular expression flag {}",
char::from(c)
));
}
flags.insert(new_flag);
}
flags.insert(new_flag);

Ok(flags)
}
}

fn parse_regex_flags(s: &str, start: Position, interner: &mut Interner) -> Result<Sym, Error> {
match RegExpFlags::from_str(s) {
Err(message) => Err(Error::Syntax(message.into(), start)),
Ok(flags) => Ok(interner.get_or_intern(flags.to_string())),
}
Ok(interner.get_or_intern(flags.to_string()))
}

impl ToString for RegExpFlags {
Expand Down