diff --git a/boa_engine/src/builtins/function/mod.rs b/boa_engine/src/builtins/function/mod.rs index 82c91eb6856..9c249e458bc 100644 --- a/boa_engine/src/builtins/function/mod.rs +++ b/boa_engine/src/builtins/function/mod.rs @@ -40,7 +40,7 @@ use boa_parser::{Parser, Source}; use boa_profiler::Profiler; use thin_vec::ThinVec; -use std::fmt; +use std::{fmt, io::Read}; use super::{promise::PromiseCapability, BuiltInBuilder, BuiltInConstructor, IntrinsicObject}; @@ -642,12 +642,16 @@ impl BuiltInFunctionObject { .into()); } - let body_arg = body_arg.to_string(context)?; + // 11. Let bodyString be the string-concatenation of 0x000A (LINE FEED), ? ToString(bodyArg), and 0x000A (LINE FEED). + let body_arg = body_arg.to_string(context)?.to_std_string_escaped(); + let body = b"\n".chain(body_arg.as_bytes()).chain(b"\n".as_slice()); // TODO: make parser generic to u32 iterators - let body = match Parser::new(Source::from_bytes(&body_arg.to_std_string_escaped())) - .parse_function_body(context.interner_mut(), generator, r#async) - { + let body = match Parser::new(Source::from_reader(body, None)).parse_function_body( + context.interner_mut(), + generator, + r#async, + ) { Ok(statement_list) => statement_list, Err(e) => { return Err(JsNativeError::syntax() diff --git a/boa_engine/src/builtins/regexp/mod.rs b/boa_engine/src/builtins/regexp/mod.rs index 2cbfa0ce756..1edc2b1b28b 100644 --- a/boa_engine/src/builtins/regexp/mod.rs +++ b/boa_engine/src/builtins/regexp/mod.rs @@ -27,7 +27,7 @@ use crate::{ }; use boa_parser::lexer::regex::RegExpFlags; use boa_profiler::Profiler; -use regress::Regex; +use regress::{Flags, Regex}; use std::str::FromStr; use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject}; @@ -94,7 +94,7 @@ impl IntrinsicObject for RegExp { .callable(Self::get_source) .name("get source") .build(); - BuiltInBuilder::from_standard_constructor::(realm) + let regexp = BuiltInBuilder::from_standard_constructor::(realm) .static_accessor( JsSymbol::species(), Some(get_species), @@ -137,8 +137,12 @@ impl IntrinsicObject for RegExp { .accessor(utf16!("unicode"), Some(get_unicode), None, flag_attributes) .accessor(utf16!("sticky"), Some(get_sticky), None, flag_attributes) .accessor(utf16!("flags"), Some(get_flags), None, flag_attributes) - .accessor(utf16!("source"), Some(get_source), None, flag_attributes) - .build(); + .accessor(utf16!("source"), Some(get_source), None, flag_attributes); + + #[cfg(feature = "annex-b")] + let regexp = regexp.method(Self::compile, "compile", 2); + + regexp.build(); } fn get(intrinsics: &Intrinsics) -> JsObject { @@ -288,26 +292,29 @@ impl RegExp { Ok(result) => result, }; - // TODO: Correct UTF-16 handling in 6. - 8. - - // 9. Let parseResult be ParsePattern(patternText, u). - // 10. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception. - // 11. Assert: parseResult is a Pattern Parse Node. - // 12. Set obj.[[OriginalSource]] to P. - // 13. Set obj.[[OriginalFlags]] to F. - // 14. NOTE: The definitions of DotAll, IgnoreCase, Multiline, and Unicode in 22.2.2.1 refer to this value of obj.[[OriginalFlags]]. - // 15. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult. - // TODO: add support for utf16 regex to remove this conversions. - let ps = p.to_std_string_escaped(); - let fs = f.to_std_string_escaped(); - let matcher = match Regex::with_flags(&ps, fs.as_ref()) { - Err(error) => { - return Err(JsNativeError::syntax() - .with_message(format!("failed to create matcher: {}", error.text)) - .into()); - } - Ok(val) => val, - }; + // 10. If u is true, then + // a. Let patternText be StringToCodePoints(P). + // 11. Else, + // a. Let patternText be the result of interpreting each of P's 16-bit elements as a Unicode BMP code point. UTF-16 decoding is not applied to the elements. + // 12. Let parseResult be ParsePattern(patternText, u). + // 13. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception. + // 14. Assert: parseResult is a Pattern Parse Node. + // 15. Set obj.[[OriginalSource]] to P. + // 16. Set obj.[[OriginalFlags]] to F. + // 17. Let capturingGroupsCount be CountLeftCapturingParensWithin(parseResult). + // 18. Let rer be the RegExp Record { [[IgnoreCase]]: i, [[Multiline]]: m, [[DotAll]]: s, [[Unicode]]: u, [[CapturingGroupsCount]]: capturingGroupsCount }. + // 19. Set obj.[[RegExpRecord]] to rer. + // 20. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult with argument rer. + let matcher = + match Regex::from_unicode(p.code_points().map(CodePoint::as_u32), Flags::from(flags)) { + Err(error) => { + return Err(JsNativeError::syntax() + .with_message(format!("failed to create matcher: {}", error.text)) + .into()); + } + Ok(val) => val, + }; + let regexp = Self { matcher, flags, @@ -1659,6 +1666,51 @@ impl RegExp { // 22. Return A. Ok(a.into()) } + + /// [`RegExp.prototype.compile ( pattern, flags )`][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.compile + #[cfg(feature = "annex-b")] + fn compile(this: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult { + // 1. Let O be the this value. + // 2. Perform ? RequireInternalSlot(O, [[RegExpMatcher]]). + let this = this + .as_object() + .filter(|o| o.borrow().is_regexp()) + .cloned() + .ok_or_else(|| { + JsNativeError::typ() + .with_message("`RegExp.prototype.compile` cannot be called for a non-object") + })?; + let pattern = args.get_or_undefined(0); + let flags = args.get_or_undefined(1); + // 3. If pattern is an Object and pattern has a [[RegExpMatcher]] internal slot, then + let (pattern, flags) = if let Some((p, f)) = pattern.as_object().and_then(|o| { + let o = o.borrow(); + o.as_regexp() + .map(|rx| (rx.original_source.clone(), rx.original_flags.clone())) + }) { + // a. If flags is not undefined, throw a TypeError exception. + if !flags.is_undefined() { + return Err(JsNativeError::typ() + .with_message( + "`RegExp.prototype.compile` cannot be \ + called with both a RegExp initializer and new flags", + ) + .into()); + } + // b. Let P be pattern.[[OriginalSource]]. + // c. Let F be pattern.[[OriginalFlags]]. + (p.into(), f.into()) + } else { + // 4. Else, + // a. Let P be pattern. + // b. Let F be flags. + (pattern.clone(), flags.clone()) + }; + // 5. Return ? RegExpInitialize(O, P, F). + Self::initialize(this, &pattern, &flags, context) + } } /// `22.2.5.2.3 AdvanceStringIndex ( S, index, unicode )` diff --git a/boa_engine/src/string/mod.rs b/boa_engine/src/string/mod.rs index 97430c66559..269fe31dd86 100644 --- a/boa_engine/src/string/mod.rs +++ b/boa_engine/src/string/mod.rs @@ -355,7 +355,7 @@ impl JsString { } /// Gets an iterator of all the Unicode codepoints of a [`JsString`]. - pub fn code_points(&self) -> impl Iterator + '_ { + pub fn code_points(&self) -> impl Iterator + Clone + '_ { char::decode_utf16(self.iter().copied()).map(|res| match res { Ok(c) => CodePoint::Unicode(c), Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()), diff --git a/boa_parser/src/lexer/mod.rs b/boa_parser/src/lexer/mod.rs index 818c89c8e30..d72adcb29c7 100644 --- a/boa_parser/src/lexer/mod.rs +++ b/boa_parser/src/lexer/mod.rs @@ -318,12 +318,7 @@ impl Lexer { } }?; - if token.kind() == &TokenKind::Comment { - // Skip comment - self.next(interner) - } else { - Ok(Some(token)) - } + Ok(Some(token)) } else { Err(Error::syntax( format!( diff --git a/boa_parser/src/lexer/regex.rs b/boa_parser/src/lexer/regex.rs index ea4de1b6cee..36456107923 100644 --- a/boa_parser/src/lexer/regex.rs +++ b/boa_parser/src/lexer/regex.rs @@ -5,7 +5,7 @@ use bitflags::bitflags; use boa_ast::Position; use boa_interner::{Interner, Sym}; use boa_profiler::Profiler; -use regress::Regex; +use regress::{Flags, Regex}; use std::{ io::{self, ErrorKind, Read}, str::{self, FromStr}, @@ -237,3 +237,15 @@ impl ToString for RegExpFlags { s } } + +impl From for Flags { + fn from(value: RegExpFlags) -> Self { + Self { + icase: value.contains(RegExpFlags::IGNORE_CASE), + multiline: value.contains(RegExpFlags::MULTILINE), + dot_all: value.contains(RegExpFlags::DOT_ALL), + unicode: value.contains(RegExpFlags::UNICODE), + ..Self::default() + } + } +} diff --git a/boa_parser/src/parser/cursor/buffered_lexer/mod.rs b/boa_parser/src/parser/cursor/buffered_lexer/mod.rs index de60e2153ab..755634284a0 100644 --- a/boa_parser/src/parser/cursor/buffered_lexer/mod.rs +++ b/boa_parser/src/parser/cursor/buffered_lexer/mod.rs @@ -152,8 +152,8 @@ where } else { self.peeked[self.write_index] = self.lexer.next(interner)?; } - self.write_index = (self.write_index + 1) % PEEK_BUF_SIZE; + self.write_index = (self.write_index + 1) % PEEK_BUF_SIZE; debug_assert_ne!( self.read_index, self.write_index, "we reached the read index with the write index" diff --git a/test_ignore.toml b/test_ignore.toml index 66ef2009dc4..64ccdec7b44 100644 --- a/test_ignore.toml +++ b/test_ignore.toml @@ -15,6 +15,7 @@ features = [ "decorators", "array-grouping", "IsHTMLDDA", + "legacy-regexp", # Non-implemented Intl features "intl-normative-optional",