diff --git a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs index 7a678334d8ec0..d0b4d31590bdf 100644 --- a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs +++ b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs @@ -6,11 +6,11 @@ use oxc_macros::declare_oxc_lint; use oxc_regular_expression::{ ast::{CapturingGroup, Character, Pattern}, visit::{walk, Visit}, - Parser, ParserOptions, + ConstructorParser, Options, }; -use oxc_span::{GetSpan, Span}; +use oxc_span::Span; -use crate::{ast_util::extract_regex_flags, context::LintContext, rule::Rule, AstNode}; +use crate::{context::LintContext, rule::Rule, AstNode}; fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic { debug_assert!(count > 0); @@ -82,75 +82,63 @@ impl Rule for NoControlRegex { } // new RegExp() - AstKind::NewExpression(expr) => { - // constructor is RegExp, - if expr.callee.is_specific_id("RegExp") - // which is provided at least 1 parameter, - && expr.arguments.len() > 0 - { - // where the first one is a string literal - // note: improvements required for strings used via identifier - // references - if let Argument::StringLiteral(pattern) = &expr.arguments[0] { - // get pattern from arguments. Missing or non-string arguments - // will be runtime errors, but are not covered by this rule. - parse_and_check_regex( - context, - &pattern.value, - &expr.arguments, - pattern.span, - ); + AstKind::NewExpression(expr) if expr.callee.is_specific_id("RegExp") => { + // note: improvements required for strings used via identifier references + // Missing or non-string arguments will be runtime errors, but are not covered by this rule. + match (&expr.arguments.first(), &expr.arguments.get(1)) { + ( + Some(Argument::StringLiteral(pattern)), + Some(Argument::StringLiteral(flags)), + ) => { + parse_and_check_regex(context, pattern.span, Some(flags.span)); } + (Some(Argument::StringLiteral(pattern)), _) => { + parse_and_check_regex(context, pattern.span, None); + } + _ => {} } } // RegExp() - AstKind::CallExpression(expr) => { - // constructor is RegExp, - if expr.callee.is_specific_id("RegExp") - // which is provided at least 1 parameter, - && expr.arguments.len() > 0 - { - // where the first one is a string literal - // note: improvements required for strings used via identifier - // references - if let Argument::StringLiteral(pattern) = &expr.arguments[0] { - // get pattern from arguments. Missing or non-string arguments - // will be runtime errors, but are not covered by this rule. - parse_and_check_regex( - context, - &pattern.value, - &expr.arguments, - pattern.span, - ); + AstKind::CallExpression(expr) if expr.callee.is_specific_id("RegExp") => { + // note: improvements required for strings used via identifier references + // Missing or non-string arguments will be runtime errors, but are not covered by this rule. + match (&expr.arguments.first(), &expr.arguments.get(1)) { + ( + Some(Argument::StringLiteral(pattern)), + Some(Argument::StringLiteral(flags)), + ) => { + parse_and_check_regex(context, pattern.span, Some(flags.span)); + } + (Some(Argument::StringLiteral(pattern)), _) => { + parse_and_check_regex(context, pattern.span, None); } + _ => {} } } + _ => {} }; } } -fn parse_and_check_regex<'a>( - ctx: &LintContext<'a>, - source_text: &'a str, - arguments: &oxc_allocator::Vec<'a, Argument<'a>>, - expr_span: Span, -) { +fn parse_and_check_regex(ctx: &LintContext, pattern_span: Span, flags_span: Option) { let allocator = Allocator::default(); - let flags = extract_regex_flags(arguments); - let flags_text = flags.map_or(String::new(), |f| f.to_string()); - let parser = Parser::new( + + let flags_text = flags_span.map(|span| span.source_text(ctx.source_text())); + let parser = ConstructorParser::new( &allocator, - source_text, - ParserOptions::default() - .with_span_offset(arguments.first().map_or(0, |arg| arg.span().start)) - .with_flags(&flags_text), + pattern_span.source_text(ctx.source_text()), + flags_text, + Options { + pattern_span_offset: pattern_span.start, + flags_span_offset: flags_span.map_or(0, |span| span.start), + }, ); let Ok(pattern) = parser.parse() else { return; }; - check_pattern(ctx, &pattern, expr_span); + check_pattern(ctx, &pattern, pattern_span); } fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) { @@ -279,7 +267,6 @@ mod tests { vec![ r"let r = /\u{0}/u", r"let r = new RegExp('\\u{0}', 'u');", - r"let r = new RegExp('\\u{0}', `u`);", r"let r = /\u{c}/u", r"let r = /\u{1F}/u", r"let r = new RegExp('\\u{1F}', 'u');", // flags are known & contain u diff --git a/crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs b/crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs index 9b1a5341f0ec0..01f22801f3858 100644 --- a/crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs +++ b/crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs @@ -2,7 +2,7 @@ use oxc_allocator::Allocator; use oxc_ast::{ast::Argument, AstKind}; use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; -use oxc_regular_expression::{Parser, ParserOptions}; +use oxc_regular_expression::{ConstructorParser, Options}; use oxc_span::Span; use rustc_hash::FxHashSet; use serde::Deserialize; @@ -86,13 +86,20 @@ impl Rule for NoInvalidRegexp { return; } + let (mut u_flag_found, mut v_flag_found) = (false, false); // Validate flags first if exists - if let Some((flags_span_start, flags_text)) = flags_arg { - let (mut u_flag_found, mut v_flag_found) = (false, false); + // `oxc_regular_expression` crate has a ability to validate flags. + // But, it does not accept any `allow_constructor_flags` option. + // And if we omit user defined flags here, `Span` may be incorrect on error reporting. + if let Some(flags_span) = flags_arg { + // Strip quotes + let flags_text = + flags_span.source_text(ctx.source_text()).trim_matches('\'').trim_matches('"'); + let mut unique_flags = FxHashSet::default(); for (idx, ch) in flags_text.char_indices() { #[allow(clippy::cast_possible_truncation)] - let start = flags_span_start + idx as u32; + let start = flags_span.start + 1 + idx as u32; // Invalid combination: u+v if ch == 'u' { @@ -128,12 +135,23 @@ impl Rule for NoInvalidRegexp { // Pattern check is skipped when 1st argument is NOT a `StringLiteral` // e.g. `new RegExp(var)`, `RegExp("str" + var)` let allocator = Allocator::default(); - if let Some((pattern_span_start, pattern_text)) = pattern_arg { - let options = ParserOptions::default() - .with_span_offset(pattern_span_start) - .with_flags(flags_arg.map_or("", |(_, flags_text)| flags_text)); + if let Some(pattern_span) = pattern_arg { + let pattern_text = pattern_span.source_text(ctx.source_text()); + + let flags_text = match (u_flag_found, v_flag_found) { + (true, false) => Some("'u'"), + (_, true) => Some("'v'"), + (false, false) => None, + }; - match Parser::new(&allocator, pattern_text, options).parse() { + match ConstructorParser::new( + &allocator, + pattern_text, + flags_text, + Options { pattern_span_offset: pattern_span.start, flags_span_offset: 0 }, + ) + .parse() + { Ok(_) => {} Err(diagnostic) => ctx.diagnostic(diagnostic), } @@ -141,27 +159,19 @@ impl Rule for NoInvalidRegexp { } } -/// Returns: (span_start, text) -/// span_start + 1 for opening string bracket. -type ParsedArgument<'a> = (u32, &'a str); fn parse_arguments_to_check<'a>( arg1: Option<&Argument<'a>>, arg2: Option<&Argument<'a>>, -) -> (Option>, Option>) { +) -> (Option, Option) { match (arg1, arg2) { // ("pattern", "flags") - (Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => ( - Some((pattern.span.start + 1, pattern.value.as_str())), - Some((flags.span.start + 1, flags.value.as_str())), - ), - // (pattern, "flags") - (Some(_arg), Some(Argument::StringLiteral(flags))) => { - (None, Some((flags.span.start + 1, flags.value.as_str()))) + (Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => { + (Some(pattern.span), Some(flags.span)) } + // (pattern, "flags") + (Some(_arg), Some(Argument::StringLiteral(flags))) => (None, Some(flags.span)), // ("pattern") - (Some(Argument::StringLiteral(pattern)), None) => { - (Some((pattern.span.start + 1, pattern.value.as_str())), None) - } + (Some(Argument::StringLiteral(pattern)), None) => (Some(pattern.span), None), // (pattern), () _ => (None, None), } @@ -172,7 +182,7 @@ fn test() { use crate::tester::Tester; let pass = vec![ - ("[RegExp(''), /a/uv]", None), + ("RegExp('')", None), ("RegExp()", None), ("RegExp('.', 'g')", None), ("new RegExp('.')", None), diff --git a/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs b/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs index 650b6828e3b1b..f79a921bd51a8 100644 --- a/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs +++ b/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs @@ -10,7 +10,7 @@ use oxc_macros::declare_oxc_lint; use oxc_regular_expression::{ ast::{Character, Pattern}, visit::{RegExpAstKind, Visit}, - Parser, ParserOptions, + ConstructorParser, Options, }; use oxc_span::Span; @@ -63,13 +63,13 @@ impl Rule for NoRegexSpaces { } AstKind::CallExpression(expr) if Self::is_regexp_call_expression(expr) => { - if let Some(span) = Self::find_expr_to_report(&expr.arguments) { + if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) { ctx.diagnostic(no_regex_spaces_diagnostic(span)); // RegExp('a b') } } AstKind::NewExpression(expr) if Self::is_regexp_new_expression(expr) => { - if let Some(span) = Self::find_expr_to_report(&expr.arguments) { + if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) { ctx.diagnostic(no_regex_spaces_diagnostic(span)); // new RegExp('a b') } } @@ -90,7 +90,7 @@ impl NoRegexSpaces { find_consecutive_spaces(pattern) } - fn find_expr_to_report(args: &Vec<'_, Argument<'_>>) -> Option { + fn find_expr_to_report(args: &Vec<'_, Argument<'_>>, ctx: &LintContext) -> Option { if let Some(expr) = args.get(1).and_then(Argument::as_expression) { if !expr.is_string_literal() { return None; // skip on indeterminate flag, e.g. RegExp('a b', flags) @@ -105,10 +105,11 @@ impl NoRegexSpaces { } let alloc = Allocator::default(); - let parser = Parser::new( + let parser = ConstructorParser::new( &alloc, - pattern.value.as_str(), - ParserOptions::default().with_span_offset(pattern.span.start + 1), + pattern.span.source_text(ctx.source_text()), + None, + Options { pattern_span_offset: pattern.span.start, ..Options::default() }, ); let parsed_pattern = parser.parse().ok()?; diff --git a/crates/oxc_linter/src/snapshots/no_invalid_regexp.snap b/crates/oxc_linter/src/snapshots/no_invalid_regexp.snap index d4618cde7bc51..fcf8911ec8356 100644 --- a/crates/oxc_linter/src/snapshots/no_invalid_regexp.snap +++ b/crates/oxc_linter/src/snapshots/no_invalid_regexp.snap @@ -104,51 +104,51 @@ source: crates/oxc_linter/src/tester.rs ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern - ╭─[no_invalid_regexp.tsx:1:14] + ╭─[no_invalid_regexp.tsx:1:15] 1 │ new RegExp('\\a', 'u'); - · ▲ + · ▲ ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern - ╭─[no_invalid_regexp.tsx:1:14] + ╭─[no_invalid_regexp.tsx:1:15] 1 │ new RegExp('\\a', 'u'); - · ▲ + · ▲ ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier - ╭─[no_invalid_regexp.tsx:1:14] + ╭─[no_invalid_regexp.tsx:1:15] 1 │ RegExp('\\u{0}*'); - · ─ + · ─ ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier - ╭─[no_invalid_regexp.tsx:1:18] + ╭─[no_invalid_regexp.tsx:1:19] 1 │ new RegExp('\\u{0}*'); - · ─ + · ─ ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier - ╭─[no_invalid_regexp.tsx:1:18] + ╭─[no_invalid_regexp.tsx:1:19] 1 │ new RegExp('\\u{0}*', ''); - · ─ + · ─ ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier - ╭─[no_invalid_regexp.tsx:1:18] + ╭─[no_invalid_regexp.tsx:1:19] 1 │ new RegExp('\\u{0}*', 'a'); - · ─ + · ─ ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier - ╭─[no_invalid_regexp.tsx:1:14] + ╭─[no_invalid_regexp.tsx:1:15] 1 │ RegExp('\\u{0}*'); - · ─ + · ─ ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid extended atom escape ╭─[no_invalid_regexp.tsx:1:13] 1 │ new RegExp('\\'); - · ─ + · ── ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Unknown flag @@ -196,7 +196,7 @@ source: crates/oxc_linter/src/tester.rs ⚠ eslint(no-invalid-regexp): Invalid regular expression: Unterminated character class ╭─[no_invalid_regexp.tsx:1:13] 1 │ new RegExp('[[]\\u{0}*' /* valid only with `u` flag */, 'v') - · ──────── + · ───────── ╰──── ⚠ eslint(no-invalid-regexp): Invalid regular expression: Duplicated capturing group names diff --git a/crates/oxc_linter/src/snapshots/no_regex_spaces.snap b/crates/oxc_linter/src/snapshots/no_regex_spaces.snap index abd93cd7c86e7..a2f56ac51f58b 100644 --- a/crates/oxc_linter/src/snapshots/no_regex_spaces.snap +++ b/crates/oxc_linter/src/snapshots/no_regex_spaces.snap @@ -128,16 +128,16 @@ source: crates/oxc_linter/src/tester.rs help: Use a quantifier: ` {2}` ⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count. - ╭─[no_regex_spaces.tsx:1:25] + ╭─[no_regex_spaces.tsx:1:26] 1 │ var foo = new RegExp('\\d ') - · ── + · ── ╰──── help: Use a quantifier: ` {2}` ⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count. - ╭─[no_regex_spaces.tsx:1:25] + ╭─[no_regex_spaces.tsx:1:26] 1 │ var foo = RegExp('\\u0041 ') - · ─── + · ─── ╰──── help: Use a quantifier: ` {3}` diff --git a/crates/oxc_parser/examples/regular_expression.rs b/crates/oxc_parser/examples/regular_expression.rs index 2ac496d5da120..0591615d9fd8e 100644 --- a/crates/oxc_parser/examples/regular_expression.rs +++ b/crates/oxc_parser/examples/regular_expression.rs @@ -4,7 +4,7 @@ use std::{env, fs, path::Path, sync::Arc}; use oxc_allocator::Allocator; use oxc_ast::{ast, AstKind, Visit}; use oxc_parser::{ParseOptions, Parser}; -use oxc_regular_expression::{Parser as RegExpParser, ParserOptions as RegExpParserOptions}; +use oxc_regular_expression::{ConstructorParser as RegExpParser, Options as RegExpParserOptions}; use oxc_span::SourceType; // `cargo run -p oxc_parser --example regular_expression` @@ -62,32 +62,24 @@ impl<'a> Visit<'a> for RegularExpressionVisitor { { println!("🍀 {}", new_expr.span.source_text(&self.source_text)); - let (pattern, pattern_span) = match new_expr.arguments.first() { - Some(ast::Argument::StringLiteral(sl)) => (&sl.value, &sl.span), - Some(ast::Argument::TemplateLiteral(tl)) - if tl.is_no_substitution_template() => - { - (&tl.quasi().unwrap(), &tl.span) - } + let pattern_span = match new_expr.arguments.first() { + Some(ast::Argument::StringLiteral(sl)) => sl.span, _ => return, }; - let flags = match new_expr.arguments.get(1) { - Some(ast::Argument::StringLiteral(sl)) => &sl.value, - Some(ast::Argument::TemplateLiteral(tl)) - if tl.is_no_substitution_template() => - { - &tl.quasi().unwrap() - } - _ => "", + let flags_span = match new_expr.arguments.get(1) { + Some(ast::Argument::StringLiteral(sl)) => Some(sl.span), + _ => None, }; let parsed = RegExpParser::new( &allocator, - pattern, - RegExpParserOptions::default() - .with_span_offset(pattern_span.start + 1) - .with_flags(flags), + pattern_span.source_text(&self.source_text), + flags_span.map(|span| span.source_text(&self.source_text)), + RegExpParserOptions { + pattern_span_offset: pattern_span.start, + flags_span_offset: flags_span.map_or(0, |span| span.start), + }, ) .parse(); diff --git a/crates/oxc_parser/src/cursor.rs b/crates/oxc_parser/src/cursor.rs index 7cf354c7df236..1dbf3d85620a2 100644 --- a/crates/oxc_parser/src/cursor.rs +++ b/crates/oxc_parser/src/cursor.rs @@ -223,10 +223,10 @@ impl<'a> ParserImpl<'a> { } /// Tell lexer to read a regex - pub(crate) fn read_regex(&mut self) -> Result<(u32, RegExpFlags)> { - let (token, pattern_end, flags) = self.lexer.next_regex(self.cur_kind())?; + pub(crate) fn read_regex(&mut self) -> Result<(u32, RegExpFlags, bool)> { + let (token, pattern_end, flags, flags_error) = self.lexer.next_regex(self.cur_kind())?; self.token = token; - Ok((pattern_end, flags)) + Ok((pattern_end, flags, flags_error)) } /// Tell lexer to read a template substitution tail diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index a161ecb88a483..60ac1be30574d 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -345,17 +345,18 @@ impl<'a> ParserImpl<'a> { pub(crate) fn parse_literal_regexp(&mut self) -> Result> { let span = self.start_span(); // split out pattern - let (pattern_end, flags) = self.read_regex()?; + let (pattern_end, flags, flags_error) = self.read_regex()?; let pattern_start = self.cur_token().start + 1; // +1 to exclude left `/` let pattern_text = &self.source_text[pattern_start as usize..pattern_end as usize]; let flags_start = pattern_end + 1; // +1 to include right `/` let flags_text = &self.source_text[flags_start as usize..self.cur_token().end as usize]; self.bump_any(); - let pattern = self - .options - .parse_regular_expression + // Parse pattern if options is enabled and also flags are valid + let pattern = (self.options.parse_regular_expression && !flags_error) .then_some(()) - .map(|()| self.parse_regex_pattern(pattern_start, pattern_text, flags_text)) + .map(|()| { + self.parse_regex_pattern(pattern_start, pattern_text, flags_start, flags_text) + }) .map_or_else( || RegExpPattern::Raw(pattern_text), |pat| { @@ -367,13 +368,20 @@ impl<'a> ParserImpl<'a> { fn parse_regex_pattern( &mut self, - span_offset: u32, + pattern_span_offset: u32, pattern: &'a str, + flags_span_offset: u32, flags: &'a str, ) -> Option>> { - use oxc_regular_expression::{Parser, ParserOptions}; - let options = ParserOptions::default().with_span_offset(span_offset).with_flags(flags); - match Parser::new(self.ast.allocator, pattern, options).parse() { + use oxc_regular_expression::{LiteralParser, Options}; + match LiteralParser::new( + self.ast.allocator, + pattern, + Some(flags), + Options { pattern_span_offset, flags_span_offset }, + ) + .parse() + { Ok(regular_expression) => Some(self.ast.alloc(regular_expression)), Err(diagnostic) => { self.error(diagnostic); diff --git a/crates/oxc_parser/src/lexer/regex.rs b/crates/oxc_parser/src/lexer/regex.rs index 8d009124d9aea..f17decf967d8d 100644 --- a/crates/oxc_parser/src/lexer/regex.rs +++ b/crates/oxc_parser/src/lexer/regex.rs @@ -11,21 +11,21 @@ impl<'a> Lexer<'a> { /// where a `RegularExpressionLiteral` is permitted /// Which means the parser needs to re-tokenize on `PrimaryExpression`, /// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression` - pub(crate) fn next_regex(&mut self, kind: Kind) -> Result<(Token, u32, RegExpFlags)> { + pub(crate) fn next_regex(&mut self, kind: Kind) -> Result<(Token, u32, RegExpFlags, bool)> { self.token.start = self.offset() - match kind { Kind::Slash => 1, Kind::SlashEq => 2, _ => unreachable!(), }; - let (pattern_end, flags) = self.read_regex()?; + let (pattern_end, flags, flags_error) = self.read_regex()?; self.lookahead.clear(); let token = self.finish_next(Kind::RegExp); - Ok((token, pattern_end, flags)) + Ok((token, pattern_end, flags, flags_error)) } /// 12.9.5 Regular Expression Literals - fn read_regex(&mut self) -> Result<(u32, RegExpFlags)> { + fn read_regex(&mut self) -> Result<(u32, RegExpFlags, bool)> { let mut in_escape = false; let mut in_character_class = false; loop { @@ -55,6 +55,8 @@ impl<'a> Lexer<'a> { let pattern_end = self.offset() - 1; // -1 to exclude `/` let mut flags = RegExpFlags::empty(); + // To prevent parsing `oxc_regular_expression` with invalid flags in the parser + let mut flags_error = false; while let Some(b @ (b'$' | b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')) = self.peek_byte() @@ -65,6 +67,7 @@ impl<'a> Lexer<'a> { b as char, self.current_offset().expand_left(1), )); + flags_error = true; continue; }; if flags.contains(flag) { @@ -72,11 +75,12 @@ impl<'a> Lexer<'a> { b as char, self.current_offset().expand_left(1), )); + flags_error = true; continue; } flags |= flag; } - Ok((pattern_end, flags)) + Ok((pattern_end, flags, flags_error)) } } diff --git a/crates/oxc_regular_expression/src/lib.rs b/crates/oxc_regular_expression/src/lib.rs index 8ce28e005699e..db2beee795ceb 100644 --- a/crates/oxc_regular_expression/src/lib.rs +++ b/crates/oxc_regular_expression/src/lib.rs @@ -20,87 +20,3 @@ pub use crate::{ options::Options, parser::{ConstructorParser, LiteralParser}, }; - -// LEGACY APIS TO BE REMOVED SOON! ============================================ - -#[derive(Clone, Copy, Debug, Default)] -pub struct ParserOptions { - pub span_offset: u32, - pub unicode_mode: bool, - pub unicode_sets_mode: bool, - pub parse_string_literal: bool, -} - -impl ParserOptions { - #[must_use] - pub fn with_span_offset(self, span_offset: u32) -> Self { - ParserOptions { span_offset, ..self } - } - - #[must_use] - pub fn with_flags(self, flags: &str) -> Self { - let (mut unicode_mode, mut unicode_sets_mode) = (false, false); - for ch in flags.chars() { - if ch == 'u' { - unicode_mode = true; - } - if ch == 'v' { - unicode_mode = true; - unicode_sets_mode = true; - } - } - - ParserOptions { unicode_mode, unicode_sets_mode, ..self } - } - - #[must_use] - pub fn with_parse_string_literal(self) -> Self { - ParserOptions { parse_string_literal: true, ..self } - } -} - -pub struct Parser<'a> { - allocator: &'a oxc_allocator::Allocator, - source_text: &'a str, - options: ParserOptions, -} - -impl<'a> Parser<'a> { - pub fn new( - allocator: &'a oxc_allocator::Allocator, - source_text: &'a str, - options: ParserOptions, - ) -> Self { - Self { allocator, source_text, options } - } - - pub fn parse(self) -> oxc_diagnostics::Result> { - let ParserOptions { unicode_mode, unicode_sets_mode, span_offset, parse_string_literal } = - self.options; - - let options = Options { - pattern_span_offset: span_offset, - flags_span_offset: 0, // Never be used - }; - - if parse_string_literal { - #[allow(clippy::match_same_arms)] - let flags_text = match (unicode_mode, unicode_sets_mode) { - (true, false) => Some("'u'"), - (false, true) => Some("'v'"), - (true, true) => Some("'v'"), // Do not validate this here - (false, false) => None, - }; - ConstructorParser::new(self.allocator, self.source_text, flags_text, options).parse() - } else { - #[allow(clippy::match_same_arms)] - let flags_text = match (unicode_mode, unicode_sets_mode) { - (true, false) => Some("u"), - (false, true) => Some("v"), - (true, true) => Some("v"), // Do not validate this here - (false, false) => None, - }; - LiteralParser::new(self.allocator, self.source_text, flags_text, options).parse() - } - } -} diff --git a/crates/oxc_transformer/src/regexp/mod.rs b/crates/oxc_transformer/src/regexp/mod.rs index 5f56df296db14..2e3061c78e859 100644 --- a/crates/oxc_transformer/src/regexp/mod.rs +++ b/crates/oxc_transformer/src/regexp/mod.rs @@ -132,11 +132,23 @@ impl<'a, 'ctx> Traverse<'a> for RegExp<'a, 'ctx> { return; } - let span = regexp.span; + let literal_span = regexp.span; let pattern = match &mut regexp.regex.pattern { RegExpPattern::Raw(raw) => { + #[expect(clippy::cast_possible_truncation)] + let pattern_len = raw.len() as u32; + let pattern_span_start = literal_span.start + 1; // +1 to skip the opening `/` + let flags_span_start = pattern_span_start + pattern_len + 1; // +1 to skip the closing `/` + let flags_text = Span::new(flags_span_start, literal_span.end) + .source_text(self.ctx.source_text); // Try to parse pattern - match try_parse_pattern(raw, span, flags, ctx) { + match try_parse_pattern( + raw, + pattern_span_start, + flags_text, + flags_span_start, + ctx, + ) { Ok(pattern) => { regexp.regex.pattern = RegExpPattern::Pattern(ctx.alloc(pattern)); let RegExpPattern::Pattern(pattern) = ®exp.regex.pattern else { @@ -238,14 +250,13 @@ fn character_class_has_unicode_property_escape(character_class: &CharacterClass) fn try_parse_pattern<'a>( raw: &'a str, - span: Span, - flags: RegExpFlags, + pattern_span_offset: u32, + flags_text: &'a str, + flags_span_offset: u32, ctx: &mut TraverseCtx<'a>, ) -> Result> { - use oxc_regular_expression::{Parser, ParserOptions}; + use oxc_regular_expression::{LiteralParser, Options}; - let options = ParserOptions::default() - .with_span_offset(span.start + 1) // exclude `/` - .with_flags(&flags.to_string()); - Parser::new(ctx.ast.allocator, raw, options).parse() + let options = Options { pattern_span_offset, flags_span_offset }; + LiteralParser::new(ctx.ast.allocator, raw, Some(flags_text), options).parse() } diff --git a/tasks/coverage/snapshots/parser_babel.snap b/tasks/coverage/snapshots/parser_babel.snap index 5ca80ceaabd50..de7af459959cf 100644 --- a/tasks/coverage/snapshots/parser_babel.snap +++ b/tasks/coverage/snapshots/parser_babel.snap @@ -6919,12 +6919,24 @@ Expect to Parse: tasks/coverage/babel/packages/babel-parser/test/fixtures/typesc · ──────────────── ╰──── + × Invalid regular expression: Invalid unicode flags combination `u` and `v` + ╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/uv-error/input.js:1:6] + 1 │ /a/ugv; + · ─ + ╰──── + × The 'u' and 'v' regular expression flags cannot be enabled at the same time ╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/uv-error/input.js:1:1] 1 │ /a/ugv; · ────── ╰──── + × Invalid regular expression: Invalid unicode flags combination `u` and `v` + ╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/vu-error/input.js:1:5] + 1 │ /a/vu; + · ─ + ╰──── + × The 'u' and 'v' regular expression flags cannot be enabled at the same time ╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/vu-error/input.js:1:1] 1 │ /a/vu; diff --git a/tasks/coverage/snapshots/parser_test262.snap b/tasks/coverage/snapshots/parser_test262.snap index e31b281f09da4..be4fb00f73689 100644 --- a/tasks/coverage/snapshots/parser_test262.snap +++ b/tasks/coverage/snapshots/parser_test262.snap @@ -1404,6 +1404,13 @@ Expect Syntax Error: tasks/coverage/test262/test/language/import/import-attribut · ── ╰──── + × Invalid regular expression: Invalid unicode flags combination `u` and `v` + ╭─[test262/test/built-ins/RegExp/prototype/unicodeSets/uv-flags.js:18:5] + 17 │ + 18 │ /./uv; + · ─ + ╰──── + × The 'u' and 'v' regular expression flags cannot be enabled at the same time ╭─[test262/test/built-ins/RegExp/prototype/unicodeSets/uv-flags.js:18:1] 17 │ diff --git a/tasks/coverage/src/driver.rs b/tasks/coverage/src/driver.rs index 58819f50b1758..474f0e703dfd8 100644 --- a/tasks/coverage/src/driver.rs +++ b/tasks/coverage/src/driver.rs @@ -9,7 +9,7 @@ use oxc::{ diagnostics::OxcDiagnostic, minifier::CompressOptions, parser::{ParseOptions, ParserReturn}, - regular_expression::{Parser, ParserOptions}, + regular_expression::{LiteralParser, Options}, semantic::{Semantic, SemanticBuilderReturn}, span::{cmp::ContentEq, SourceType, Span}, transformer::{TransformOptions, TransformerReturn}, @@ -166,8 +166,9 @@ impl Driver { }; let printed1 = pattern.to_string(); let flags = literal.regex.flags.to_string(); - let options = ParserOptions::default().with_flags(&flags); - match Parser::new(&allocator, &printed1, options).parse() { + match LiteralParser::new(&allocator, &printed1, Some(&flags), Options::default()) + .parse() + { Ok(pattern2) => { let printed2 = pattern2.to_string(); if !pattern2.content_eq(pattern) {