-
Notifications
You must be signed in to change notification settings - Fork 2
/
generate-extended-expressions.js
142 lines (133 loc) · 3.88 KB
/
generate-extended-expressions.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
const UNICODE_VERSION = require(
'regenerate-unicode-properties/unicode-version.js'
);
const operators = [
// Difference/subtraction.
'--',
// Intersection.
'&&',
// Union.
'',
];
const parts = [
// A property escape of a character property.
{
description: 'character-property-escape',
expression: String.raw`\p{ASCII_Hex_Digit}`,
strings: new Set(require(`@unicode/unicode-${UNICODE_VERSION}/Binary_Property/ASCII_Hex_Digit/symbols.js`)),
},
// A property escape of a property of strings.
{
description: 'property-of-strings-escape',
expression: String.raw`\p{Emoji_Keycap_Sequence}`,
strings: new Set(require(`@unicode/unicode-${UNICODE_VERSION}/Sequence_Property/Emoji_Keycap_Sequence/index.js`)),
},
// A (nested) character class.
{
description: 'character-class',
expression: String.raw`[0-9]`,
strings: new Set(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']),
},
// A string literal.
{
description: 'string-literal',
expression: String.raw`\q{0|2|4|9\uFE0F\u20E3}`,
strings: new Set(['0', '2', '4', '9\uFE0F\u20E3']),
},
// A lone character.
{
description: 'character',
expression: '_',
strings: new Set(['_']),
},
// A character class escape character.
{
description: 'character-class-escape',
expression: String.raw`\d`,
strings: new Set(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']),
},
];
const DEFAULT_NON_MATCH_STRINGS = [
'\u2603', // snowman
'\u{1D306}', // astral character
'\u{1F1E7}\u{1F1EA}', // string consisting of multiple astral characters
'6\uFE0F\u20E3', // Emoji_Keycap_Sequence
'7', // ASCII_Hex_Digit + [0-9]
'9\uFE0F\u20E3', // Emoji_Keycap_Sequence used in string literal
'C', // ASCII_Hex_Digit
];
const populateNonMatchStrings = ({ matchStrings, nonMatchStrings = [] }) => {
const matchSet = new Set(matchStrings);
const nonMatchSet = new Set(nonMatchStrings);
for (const string of DEFAULT_NON_MATCH_STRINGS) {
if (matchSet.has(string)) continue;
nonMatchSet.add(string);
}
nonMatchStrings = [...nonMatchSet].sort();
return { matchStrings, nonMatchStrings };
};
const resolveStrings = (operator, stringsA, stringsB) => {
const matchStrings = [];
const nonMatchStrings = [];
switch (operator) {
case '--': { // Difference/subtraction.
for (const string of stringsA) {
if (stringsB.has(string)) {
nonMatchStrings.push(string);
} else {
matchStrings.push(string);
}
}
break;
}
case '&&': { // Intersection.
for (const string of stringsA) {
if (stringsB.has(string)) {
matchStrings.push(string);
} else {
nonMatchStrings.push(string);
}
}
break;
}
case '': { // Union.
const deduped = new Set([...stringsA, ...stringsB]);
matchStrings.push(...deduped);
matchStrings.sort();
break;
}
default: {
throw new Error(`Unknown operator ${operator}`);
}
}
const result = populateNonMatchStrings({ matchStrings, nonMatchStrings });
return result;
};
const describeOperator = (operator) => {
const descriptions = new Map([
['--', 'difference'],
['&&', 'intersection'],
['', 'union'],
]);
const description = descriptions.get(operator);
return description;
};
const mix = (operators, parts) => {
const results = [];
for (const operator of operators) {
for (const partA of parts) {
for (const partB of parts) {
const expression = `${partA.expression}${operator}${partB.expression}`;
const { matchStrings, nonMatchStrings } = resolveStrings(operator, partA.strings, partB.strings);
const description = `${partA.description}-${describeOperator(operator)}-${partB.description}`;
results.push({ description, expression, matchStrings, nonMatchStrings });
}
}
}
return results;
};
const generateExtendedCharacterClassExpressions = () => {
const results = mix(operators, parts);
return results;
};
module.exports = generateExtendedCharacterClassExpressions;