From 0725a3a74ebb92701db1942e07e988c34799121d Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Wed, 22 May 2024 11:48:34 +0800 Subject: [PATCH] Correct flags scanning for non-BMP characters (cherry picked from commit e67692acb3fdd068b4e577dc9ad9fa350f2e4ca8) --- src/compiler/scanner.ts | 38 ++++++++++--------- ...egularExpressionWithNonBMPFlags.errors.txt | 23 +++++++++++ .../regularExpressionWithNonBMPFlags.js | 8 ++++ .../regularExpressionWithNonBMPFlags.symbols | 6 +++ .../regularExpressionWithNonBMPFlags.types | 9 +++++ .../regularExpressionWithNonBMPFlags.ts | 3 ++ 6 files changed, 69 insertions(+), 18 deletions(-) create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.js create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.types create mode 100644 tests/cases/compiler/regularExpressionWithNonBMPFlags.ts diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 5d93e57ebe660..0e9666dedddfe 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2474,28 +2474,29 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const isUnterminated = !!(tokenFlags & TokenFlags.Unterminated); const endOfBody = p - (isUnterminated ? 0 : 1); let regExpFlags = RegularExpressionFlags.None; - while (p < end) { - const ch = charCodeUnchecked(p); - if (!isIdentifierPart(ch, languageVersion)) { + while (true) { + const ch = codePointChecked(p); + if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) { break; } + const size = charSize(ch); if (reportErrors) { - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch)); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, p, 1); + error(Diagnostics.Unknown_regular_expression_flag, p, size); } else if (regExpFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, p, 1); + error(Diagnostics.Duplicate_regular_expression_flag, p, size); } else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) { - error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1); + error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, size); } else { regExpFlags |= flag; - checkRegularExpressionFlagAvailable(flag, p); + checkRegularExpressionFlagAvailability(flag, p, size); } } - p++; + p += size; } pos = p; if (reportErrors) { @@ -2763,25 +2764,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { while (true) { - const ch = charCodeChecked(pos); + const ch = codePointChecked(pos); if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) { break; } - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + const size = charSize(ch); + const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch)); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); + error(Diagnostics.Unknown_regular_expression_flag, pos, size); } else if (currFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + error(Diagnostics.Duplicate_regular_expression_flag, pos, size); } else if (!(flag & RegularExpressionFlags.Modifiers)) { - error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); + error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, size); } else { currFlags |= flag; - checkRegularExpressionFlagAvailable(flag, pos); + checkRegularExpressionFlagAvailability(flag, pos, size); } - pos++; + pos += size; } return currFlags; } @@ -3494,10 +3496,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean }); } - function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags, pos: number) { + function checkRegularExpressionFlagAvailability(flag: RegularExpressionFlags, pos: number, size: number) { const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; if (availableFrom && languageVersion < availableFrom) { - error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); + error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, size, getNameOfScriptTarget(availableFrom)); } } diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt new file mode 100644 index 0000000000000..b91d0d9c12fd1 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt @@ -0,0 +1,23 @@ +regularExpressionWithNonBMPFlags.ts(1,23): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,25): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,28): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,41): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,43): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,45): error TS1499: Unknown regular expression flag. + + +==== regularExpressionWithNonBMPFlags.ts (6 errors) ==== + const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + \ No newline at end of file diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.js b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js new file mode 100644 index 0000000000000..847b74684b459 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js @@ -0,0 +1,8 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +//// [regularExpressionWithNonBMPFlags.ts] +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; + + +//// [regularExpressionWithNonBMPFlags.js] +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols new file mode 100644 index 0000000000000..29c7a53335550 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols @@ -0,0 +1,6 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +=== regularExpressionWithNonBMPFlags.ts === +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; +>𝘳𝘦𝘨𝘦𝘹 : Symbol(𝘳𝘦𝘨𝘦𝘹, Decl(regularExpressionWithNonBMPFlags.ts, 0, 5)) + diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.types b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types new file mode 100644 index 0000000000000..5f385d608df02 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types @@ -0,0 +1,9 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +=== regularExpressionWithNonBMPFlags.ts === +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; +>𝘳𝘦𝘨𝘦𝘹 : RegExp +> : ^^^^^^ +>/(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶 : RegExp +> : ^^^^^^ + diff --git a/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts new file mode 100644 index 0000000000000..5f85755c0c5c1 --- /dev/null +++ b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts @@ -0,0 +1,3 @@ +// @target: esnext + +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;