Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Fix more Annex B tests #2841

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions boa_engine/src/builtins/function/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use boa_parser::{Parser, Source};
use boa_profiler::Profiler;
use thin_vec::ThinVec;

use std::fmt;
use std::{fmt, io::Read};

use super::{promise::PromiseCapability, BuiltInBuilder, BuiltInConstructor, IntrinsicObject};

Expand Down Expand Up @@ -642,12 +642,16 @@ impl BuiltInFunctionObject {
.into());
}

let body_arg = body_arg.to_string(context)?;
// 11. Let bodyString be the string-concatenation of 0x000A (LINE FEED), ? ToString(bodyArg), and 0x000A (LINE FEED).
let body_arg = body_arg.to_string(context)?.to_std_string_escaped();
let body = b"\n".chain(body_arg.as_bytes()).chain(b"\n".as_slice());

// TODO: make parser generic to u32 iterators
let body = match Parser::new(Source::from_bytes(&body_arg.to_std_string_escaped()))
.parse_function_body(context.interner_mut(), generator, r#async)
{
let body = match Parser::new(Source::from_reader(body, None)).parse_function_body(
context.interner_mut(),
generator,
r#async,
) {
Ok(statement_list) => statement_list,
Err(e) => {
return Err(JsNativeError::syntax()
Expand Down
100 changes: 76 additions & 24 deletions boa_engine/src/builtins/regexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use crate::{
};
use boa_parser::lexer::regex::RegExpFlags;
use boa_profiler::Profiler;
use regress::Regex;
use regress::{Flags, Regex};
use std::str::FromStr;

use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject};
Expand Down Expand Up @@ -94,7 +94,7 @@ impl IntrinsicObject for RegExp {
.callable(Self::get_source)
.name("get source")
.build();
BuiltInBuilder::from_standard_constructor::<Self>(realm)
let regexp = BuiltInBuilder::from_standard_constructor::<Self>(realm)
.static_accessor(
JsSymbol::species(),
Some(get_species),
Expand Down Expand Up @@ -137,8 +137,12 @@ impl IntrinsicObject for RegExp {
.accessor(utf16!("unicode"), Some(get_unicode), None, flag_attributes)
.accessor(utf16!("sticky"), Some(get_sticky), None, flag_attributes)
.accessor(utf16!("flags"), Some(get_flags), None, flag_attributes)
.accessor(utf16!("source"), Some(get_source), None, flag_attributes)
.build();
.accessor(utf16!("source"), Some(get_source), None, flag_attributes);

#[cfg(feature = "annex-b")]
let regexp = regexp.method(Self::compile, "compile", 2);

regexp.build();
}

fn get(intrinsics: &Intrinsics) -> JsObject {
Expand Down Expand Up @@ -288,26 +292,29 @@ impl RegExp {
Ok(result) => result,
};

// TODO: Correct UTF-16 handling in 6. - 8.

// 9. Let parseResult be ParsePattern(patternText, u).
// 10. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception.
// 11. Assert: parseResult is a Pattern Parse Node.
// 12. Set obj.[[OriginalSource]] to P.
// 13. Set obj.[[OriginalFlags]] to F.
// 14. NOTE: The definitions of DotAll, IgnoreCase, Multiline, and Unicode in 22.2.2.1 refer to this value of obj.[[OriginalFlags]].
// 15. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult.
// TODO: add support for utf16 regex to remove this conversions.
let ps = p.to_std_string_escaped();
let fs = f.to_std_string_escaped();
let matcher = match Regex::with_flags(&ps, fs.as_ref()) {
Err(error) => {
return Err(JsNativeError::syntax()
.with_message(format!("failed to create matcher: {}", error.text))
.into());
}
Ok(val) => val,
};
// 10. If u is true, then
// a. Let patternText be StringToCodePoints(P).
// 11. Else,
// a. Let patternText be the result of interpreting each of P's 16-bit elements as a Unicode BMP code point. UTF-16 decoding is not applied to the elements.
// 12. Let parseResult be ParsePattern(patternText, u).
// 13. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception.
// 14. Assert: parseResult is a Pattern Parse Node.
// 15. Set obj.[[OriginalSource]] to P.
// 16. Set obj.[[OriginalFlags]] to F.
// 17. Let capturingGroupsCount be CountLeftCapturingParensWithin(parseResult).
// 18. Let rer be the RegExp Record { [[IgnoreCase]]: i, [[Multiline]]: m, [[DotAll]]: s, [[Unicode]]: u, [[CapturingGroupsCount]]: capturingGroupsCount }.
// 19. Set obj.[[RegExpRecord]] to rer.
// 20. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult with argument rer.
let matcher =
match Regex::from_unicode(p.code_points().map(CodePoint::as_u32), Flags::from(flags)) {
Err(error) => {
return Err(JsNativeError::syntax()
.with_message(format!("failed to create matcher: {}", error.text))
.into());
}
Ok(val) => val,
};

let regexp = Self {
matcher,
flags,
Expand Down Expand Up @@ -1659,6 +1666,51 @@ impl RegExp {
// 22. Return A.
Ok(a.into())
}

/// [`RegExp.prototype.compile ( pattern, flags )`][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.compile
#[cfg(feature = "annex-b")]
fn compile(this: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
// 1. Let O be the this value.
// 2. Perform ? RequireInternalSlot(O, [[RegExpMatcher]]).
let this = this
.as_object()
.filter(|o| o.borrow().is_regexp())
.cloned()
.ok_or_else(|| {
JsNativeError::typ()
.with_message("`RegExp.prototype.compile` cannot be called for a non-object")
})?;
let pattern = args.get_or_undefined(0);
let flags = args.get_or_undefined(1);
// 3. If pattern is an Object and pattern has a [[RegExpMatcher]] internal slot, then
let (pattern, flags) = if let Some((p, f)) = pattern.as_object().and_then(|o| {
let o = o.borrow();
o.as_regexp()
.map(|rx| (rx.original_source.clone(), rx.original_flags.clone()))
}) {
// a. If flags is not undefined, throw a TypeError exception.
if !flags.is_undefined() {
return Err(JsNativeError::typ()
.with_message(
"`RegExp.prototype.compile` cannot be \
called with both a RegExp initializer and new flags",
)
.into());
}
// b. Let P be pattern.[[OriginalSource]].
// c. Let F be pattern.[[OriginalFlags]].
(p.into(), f.into())
} else {
// 4. Else,
// a. Let P be pattern.
// b. Let F be flags.
(pattern.clone(), flags.clone())
};
// 5. Return ? RegExpInitialize(O, P, F).
Self::initialize(this, &pattern, &flags, context)
}
}

/// `22.2.5.2.3 AdvanceStringIndex ( S, index, unicode )`
Expand Down
2 changes: 1 addition & 1 deletion boa_engine/src/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ impl JsString {
}

/// Gets an iterator of all the Unicode codepoints of a [`JsString`].
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + '_ {
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
char::decode_utf16(self.iter().copied()).map(|res| match res {
Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),
Expand Down
7 changes: 1 addition & 6 deletions boa_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,7 @@ impl<R> Lexer<R> {
}
}?;

if token.kind() == &TokenKind::Comment {
// Skip comment
self.next(interner)
} else {
Ok(Some(token))
}
Ok(Some(token))
} else {
Err(Error::syntax(
format!(
Expand Down
14 changes: 13 additions & 1 deletion boa_parser/src/lexer/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use bitflags::bitflags;
use boa_ast::Position;
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use regress::Regex;
use regress::{Flags, Regex};
use std::{
io::{self, ErrorKind, Read},
str::{self, FromStr},
Expand Down Expand Up @@ -237,3 +237,15 @@ impl ToString for RegExpFlags {
s
}
}

impl From<RegExpFlags> for Flags {
fn from(value: RegExpFlags) -> Self {
Self {
icase: value.contains(RegExpFlags::IGNORE_CASE),
multiline: value.contains(RegExpFlags::MULTILINE),
dot_all: value.contains(RegExpFlags::DOT_ALL),
unicode: value.contains(RegExpFlags::UNICODE),
..Self::default()
}
}
}
2 changes: 1 addition & 1 deletion boa_parser/src/parser/cursor/buffered_lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ where
} else {
self.peeked[self.write_index] = self.lexer.next(interner)?;
}
self.write_index = (self.write_index + 1) % PEEK_BUF_SIZE;

self.write_index = (self.write_index + 1) % PEEK_BUF_SIZE;
debug_assert_ne!(
self.read_index, self.write_index,
"we reached the read index with the write index"
Expand Down
1 change: 1 addition & 0 deletions test_ignore.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ features = [
"decorators",
"array-grouping",
"IsHTMLDDA",
"legacy-regexp",

# Non-implemented Intl features
"intl-normative-optional",
Expand Down