Skip to content

Commit

Permalink
Unicode based \w \W by default
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 21, 2024
1 parent c12afb1 commit e0bae15
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
11 changes: 7 additions & 4 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import {AstAssertionKinds, AstCharacterSetKinds, AstDirectiveKinds, AstTypes, As
import {applySubclassStrategies, isLoneGLookaround} from './subclass.js';
import {tokenize} from './tokenize.js';
import {traverse} from './traverse.js';
import {JsUnicodeProperties, PosixClassesMap} from './unicode.js';
import {defaultWordChar, JsUnicodeProperties, PosixClassesMap} from './unicode.js';
import {cp, getNewCurrentFlags, getOrCreate, isMinTarget, r} from './utils.js';
import {isLookaround, isZeroLengthNode} from './utils-node.js';
import emojiRegex from 'emoji-regex-xs';
Expand Down Expand Up @@ -139,9 +139,8 @@ const FirstPassVisitor = {
} else if (kind === AstAssertionKinds.string_end_newline) {
replaceWith(parseFragment(r`(?=\n?\z)`));
} else if (kind === AstAssertionKinds.word_boundary && !wordIsAscii) {
const wordChar = r`[\p{L}\p{M}\p{N}\p{Pc}]`;
const b = `(?:(?<=${wordChar})(?!${wordChar})|(?<!${wordChar})(?=${wordChar}))`;
const B = `(?:(?<=${wordChar})(?=${wordChar})|(?<!${wordChar})(?!${wordChar}))`;
const b = `(?:(?<=${defaultWordChar})(?!${defaultWordChar})|(?<!${defaultWordChar})(?=${defaultWordChar}))`;
const B = `(?:(?<=${defaultWordChar})(?=${defaultWordChar})|(?<!${defaultWordChar})(?!${defaultWordChar}))`;
replaceWith(parseFragment(negate ? B : b));
}
// Kinds `string_end` and `string_start` don't need transformation since JS flag m isn't used.
Expand Down Expand Up @@ -196,6 +195,10 @@ const FirstPassVisitor = {
const s = parseFragment('[ \t\n\v\f\r]');
s.negate = negate;
replaceWith(s);
} else if (kind === AstCharacterSetKinds.word && !wordIsAscii) {
const w = parseFragment(defaultWordChar);
w.negate = negate;
replaceWith(w);
}
},

Expand Down
3 changes: 3 additions & 0 deletions src/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ const CharsWithoutIgnoreCaseExpansion = new Set([
cp(0x131), // ı
]);

const defaultWordChar = r`[\p{L}\p{M}\p{N}\p{Pc}]`;

function getIgnoreCaseMatchChars(char) {
// Some chars should not match the chars they case swap to
if (CharsWithoutIgnoreCaseExpansion.has(char)) {
Expand Down Expand Up @@ -286,6 +288,7 @@ const UnicodePropertiesWithSpecificCase = new Set([
]);

export {
defaultWordChar,
getIgnoreCaseMatchChars,
JsUnicodeProperties,
JsUnicodePropertiesMap,
Expand Down

0 comments on commit e0bae15

Please sign in to comment.