Skip to content

Commit

Permalink
Detect more forms of supportable \G
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 3, 2024
1 parent 8fc5397 commit 3c44de2
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 33 deletions.
8 changes: 4 additions & 4 deletions scripts/onig-match.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ async function exec([pattern, str]) {

const libMatches = [];
let libMatch = transpiledRegExpResult(pattern, str, 0);
while (libMatch.result) {
while (libMatch.result !== null) {
libMatches.push(libMatch);
libMatch = transpiledRegExpResult(pattern, str, libMatch.index + libMatch.result.length);
libMatch = transpiledRegExpResult(pattern, str, libMatch.index + (libMatch.result.length || 1));
}
const onigMatches = [];
let onigMatch = await onigurumaResult(pattern, str, 0);
while (onigMatch.result) {
while (onigMatch.result !== null) {
onigMatches.push(onigMatch);
onigMatch = await onigurumaResult(pattern, str, onigMatch.index + onigMatch.result.length);
onigMatch = await onigurumaResult(pattern, str, onigMatch.index + (onigMatch.result.length || 1));
}

console.log('Pattern:', color('yellow', pattern));
Expand Down
8 changes: 4 additions & 4 deletions spec/helpers/matchers.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ function getArgs(actual, expected) {
const opts = {
pattern: typeof expected === 'string' ? expected : expected.pattern,
flags: expected.flags ?? '',
maxTarget: expected.maxTarget ?? null,
minTarget: expected.minTarget ?? null,
maxTestTarget: expected.maxTestTarget ?? null,
minTestTarget: expected.minTestTarget ?? null,
};
const targets = ['ES2018', 'ES2024', 'ESNext'];
const targeted = targets.
filter(target => !opts.maxTarget || (EsVersion[target] <= EsVersion[opts.maxTarget])).
filter(target => !opts.minTarget || (EsVersion[target] >= EsVersion[opts.minTarget]));
filter(target => !opts.maxTestTarget || (EsVersion[target] <= EsVersion[opts.maxTestTarget])).
filter(target => !opts.minTestTarget || (EsVersion[target] >= EsVersion[opts.minTestTarget]));
return {
pattern: opts.pattern,
flags: opts.flags,
Expand Down
57 changes: 52 additions & 5 deletions spec/match-assertion.spec.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import {compile, toRegExp} from '../dist/index.mjs';
import {r} from '../src/utils.js';
import {maxTestTargetForPatternMods} from './helpers/features.js';
import {matchers} from './helpers/matchers.js';

beforeEach(() => {
Expand Down Expand Up @@ -67,17 +68,63 @@ describe('Assertion', () => {
expect('abbcbb'.match(toRegExp(r`\G[ab]`, '', {global: true}))).toEqual(['a', 'b', 'b']);
});

it('should allow redundant assertions', () => {
expect('a').toExactlyMatch(r`\G\Ga`);
expect('b').toExactlyMatch(r`\Ga|\G\Gb`);
});

// Unsupported: not emulatable without RegExp subclass
it('should throw if not used at the start of every top-level alternative', () => {
expect(() => compile(r`a\G`)).toThrow();
expect(() => compile(r`\Ga|b`)).toThrow();
expect(() => compile(r`a|\Gb`)).toThrow();
});

it('should allow if following a directive', () => {
expect('a').toExactlyMatch(r`\K\Ga`);
expect('a').toExactlyMatch({
pattern: r`(?i)\Ga`,
maxTestTarget: maxTestTargetForPatternMods,
});
expect('a').toExactlyMatch({
pattern: r`(?i)(?m)\Ga`,
maxTestTarget: maxTestTargetForPatternMods,
});
});

it('should allow if following an assertion', () => {
expect('a').toExactlyMatch(r`\A\Ga`);
expect('a').toExactlyMatch(r`\b\Ga`);
expect('a').toExactlyMatch(r`(?=a)\Ga`);
expect('a').toExactlyMatch(r`(?<!a)\Ga`);
expect('a').toExactlyMatch(r`(?<!a)(?=a)\Ga`);
});

it('should allow if following a 0-min quantified token', () => {
expect('a').toExactlyMatch(r`a*\Ga`);
expect('a').toExactlyMatch(r`(a)*\Ga`);
expect('a').toExactlyMatch(r`[a]*\Ga`);
});

it('should throw if following a non-0-min quantified token', () => {
expect(() => compile(r`a+\G`)).toThrow();
expect(() => compile(r`a+?\G`)).toThrow();
expect(() => compile(r`(a)+\G`)).toThrow();
});

it('should check within groups to determine validity', () => {
expect('a').toExactlyMatch(r`(\Ga)`);
expect('a').toExactlyMatch(r`(?:(?>^(?<n>\Ga)))`);
expect(() => compile(r`(?:(?>a(?<n>\Gb)))`)).toThrow();
expect('a').toExactlyMatch(r`\Ga|(((\Gb)))`);
expect(() => compile(r`\Ga|(((b\Gc)))`)).toThrow();
});

it('should throw if leading in a non-0-min quantified group', () => {
expect(() => compile(r`(\Ga)+`)).toThrow();
expect(() => compile(r`(\Ga)+\G`)).toThrow();
});

// Documenting current behavior; supportable
it('should allow redundant assertions', () => {
expect(() => compile(r`\G\Ga`)).toThrow();
expect(() => compile(r`\Ga|\G\Gb`)).toThrow();
});
});

describe('string_end', () => {
Expand Down
18 changes: 9 additions & 9 deletions spec/match-backreference.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,11 @@ describe('Backreference', () => {
expect('').not.toFindMatch(r`(?<a>(?<a>\k<a>))`);
expect('aa').toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b\k<n>)`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'b', 'ba', 'bb']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b\k<n>)`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
});

Expand All @@ -265,11 +265,11 @@ describe('Backreference', () => {
expect('aba').toExactlyMatch(r`(?<n>a)(?<n>b\k<n>)`);
expect(['aa', 'bcb']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c\k<n>)`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'bc', 'bca', 'bcc']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c\k<n>)`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
});

Expand Down Expand Up @@ -297,7 +297,7 @@ describe('Backreference', () => {
expect('aab').toExactlyMatch(r`(?<n>a)\k<n>(?<n>b)`);
expect('aa').toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
});

Expand Down Expand Up @@ -345,19 +345,19 @@ describe('Backreference', () => {
// rather than JS logic where they match the empty string
expect(['aa', 'bb']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)\k<n>`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'b', 'ba']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)\k<n>`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['aa', 'bcb', 'bcc']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c)\k<n>`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'bc', 'bca']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c)\k<n>`,
maxTarget: maxTestTargetForDuplicateNames,
maxTestTarget: maxTestTargetForDuplicateNames,
});
});

Expand Down
8 changes: 4 additions & 4 deletions spec/match-char-class-range.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,19 @@ describe('CharacterClassRange', () => {
it('should match unescaped hyphen as literal at intersection boundary', () => {
expect('-').toExactlyMatch({
pattern: r`[a-&&\p{Any}]`,
minTarget: minTestTargetForFlagV,
minTestTarget: minTestTargetForFlagV,
});
expect('-').toExactlyMatch({
pattern: r`[\w-&&\p{Any}]`,
minTarget: minTestTargetForFlagV,
minTestTarget: minTestTargetForFlagV,
});
expect('-').toExactlyMatch({
pattern: r`[\p{Any}&&-a]`,
minTarget: minTestTargetForFlagV,
minTestTarget: minTestTargetForFlagV,
});
expect('-').toExactlyMatch({
pattern: r`[\p{Any}&&-\w]`,
minTarget: minTestTargetForFlagV,
minTestTarget: minTestTargetForFlagV,
});
});

Expand Down
2 changes: 1 addition & 1 deletion spec/match-char-set.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ describe('CharacterSet', () => {
it('should match line feed with flag m disabled', () => {
expect('\n').toExactlyMatch({
pattern: r`(?-m)\O`,
maxTarget: maxTestTargetForPatternMods,
maxTestTarget: maxTestTargetForPatternMods,
});
});

Expand Down
41 changes: 35 additions & 6 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ function transform(ast, options) {
minTargetEs2024: isMinTarget(opts.bestEffortTarget, 'ES2024'),
// Subroutines can appear before the groups they ref, so collect reffed nodes for a second pass
subroutineRefMap: new Map(),
supportedGNodes: new Set(),
};
traverse({node: ast}, firstPassState, FirstPassVisitor);
// Global flags modified by the first pass
Expand Down Expand Up @@ -105,7 +106,7 @@ const FirstPassVisitor = {
},
},

Assertion({node, parent, key, ast, remove, replaceWith}) {
Assertion({node, ast, remove, replaceWith}, {supportedGNodes}) {
const {kind, negate} = node;
if (kind === AstAssertionKinds.line_end) {
// Onig's only line break char is line feed, unlike JS
Expand All @@ -114,9 +115,7 @@ const FirstPassVisitor = {
// Onig's only line break char is line feed, unlike JS
replaceWith(parseFragment(r`(?<=\A|\n)`));
} else if (kind === AstAssertionKinds.search_start) {
// Allows multiple leading `\G`s since the the node is removed. Additional `\G` error
// checking in `Pattern` visitor
if (parent.parent !== ast.pattern || key !== 0) {
if (!supportedGNodes.has(node)) {
throw new Error(r`Uses "\G" in a way that's unsupported for conversion to JS`);
}
ast.flags.sticky = true;
Expand Down Expand Up @@ -259,14 +258,14 @@ const FirstPassVisitor = {
!node.flags.enable && !node.flags.disable && delete node.flags;
},

Pattern({node}) {
Pattern({node}, {supportedGNodes}) {
// For `\G` to be accurately emulatable using JS flag y, it must be at (and only at) the start
// of every top-level alternative (with complex rules for what determines being at the start).
// Additional `\G` error checking in `Assertion` visitor
let hasAltWithLeadG = false;
let hasAltWithoutLeadG = false;
for (const alt of node.alternatives) {
if (alt.elements[0]?.kind === AstAssertionKinds.search_start) {
if (hasLeadingG(alt.elements, supportedGNodes)) {
hasAltWithLeadG = true;
} else {
hasAltWithoutLeadG = true;
Expand Down Expand Up @@ -642,6 +641,36 @@ function getParentAlternative(node) {
return null;
}

function hasLeadingG(els, supportedGNodes) {
if (!els.length) {
return false;
}
const firstToConsider = els.find(el => {
return el.kind === AstAssertionKinds.search_start ?
true :
( el.type !== AstTypes.Directive &&
el.type !== AstTypes.Assertion &&
!(el.type === AstTypes.Quantifier && !el.min)
);
});
if (!firstToConsider) {
return false;
}
if (firstToConsider.kind === AstAssertionKinds.search_start) {
supportedGNodes.add(firstToConsider);
return true;
}
if (firstToConsider.type === AstTypes.Group || firstToConsider.type === AstTypes.CapturingGroup) {
for (const alt of firstToConsider.alternatives) {
if (!hasLeadingG(alt.elements, supportedGNodes)) {
return false;
}
}
return true;
}
return false;
}

function isValidGroupNameJs(name) {
// JS group names are more restrictive than Onig; see
// <developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#identifiers>
Expand Down

0 comments on commit 3c44de2

Please sign in to comment.