Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚀 Rework unesc for a 63+% performance boost to all of postcss. #239

Merged
merged 2 commits into from
Apr 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/__tests__/classes.js
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,4 @@ test('class selector with escaping (36)', '.not-pseudo\\:\\:focus', (t, tree) =>
t.deepEqual(tree.nodes[0].nodes[0].type, 'class');
t.deepEqual(tree.nodes[0].nodes[0].raws.value, 'not-pseudo\\:\\:focus');
});

62 changes: 62 additions & 0 deletions src/__tests__/util/unesc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import {test} from '../util/helpers';

test('id selector', '#foo', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'foo');
});

test('escaped special char', '#w\\+', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'w+');
});

test('tailing escape', '#foo\\', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'foo\\');
});

test('double escape', '#wow\\\\k', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'wow\\k');
});

test('leading numeric', '.\\31 23', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '123');
});

test('emoji', '.\\🐐', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '🐐');
});

// https://www.w3.org/International/questions/qa-escapes#cssescapes
test('hex escape', '.\\E9motion', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'émotion');
});

test('hex escape with space', '.\\E9 dition', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'édition');
});

test('hex escape with hex number', '.\\0000E9dition', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'édition');
});

test('class selector with escaping', '.\\1D306', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '𝌆');
});

test('class selector with escaping with more chars', '.\\1D306k', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '𝌆k');
});
samccone marked this conversation as resolved.
Show resolved Hide resolved

test('class selector with escaping with more chars with whitespace', '.wow\\1D306 k', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'wow𝌆k');
});

test('handles 0 value hex', '\\0', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, String.fromCodePoint(0xFFFD));
});

test('handles lone surrogate value hex', '\\DBFF', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, String.fromCodePoint(0xFFFD));
});

test('handles out of bound values', '\\110000', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, String.fromCodePoint(0xFFFD));
});
95 changes: 79 additions & 16 deletions src/util/unesc.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,82 @@
const whitespace = '[\\x20\\t\\r\\n\\f]';
const unescapeRegExp = new RegExp('\\\\([\\da-f]{1,6}' + whitespace + '?|(' + whitespace + ')|.)', 'ig');
// Many thanks for this post which made this migration much easier.
// https://mathiasbynens.be/notes/css-escapes

/**
*
* @param {string} str
* @returns {[string, number]|undefined}
*/
function gobbleHex (str) {
const lower = str.toLowerCase();
let hex = '';
let spaceTerminated = false;
for (let i = 0; i < 6 && lower[i] !== undefined; i++) {
const code = lower.charCodeAt(i);
// check to see if we are dealing with a valid hex char [a-f|0-9]
const valid = (code >= 97 && code <= 102) || (code >= 48 && code <= 57);
// https://drafts.csswg.org/css-syntax/#consume-escaped-code-point
spaceTerminated = code === 32;
if (!valid) {
break;
}
hex += lower[i];
}

if (hex.length === 0) {
return undefined;
}
const codePoint = parseInt(hex, 16);

const isSurrogate = codePoint >= 0xD800 && codePoint <= 0xDFFF;
// Add special case for
// "If this number is zero, or is for a surrogate, or is greater than the maximum allowed code point"
// https://drafts.csswg.org/css-syntax/#maximum-allowed-code-point
if (isSurrogate || codePoint === 0x0000 || codePoint > 0x10FFFF) {
return ['\uFFFD', hex.length + (spaceTerminated ? 1 : 0)];
}

return [
String.fromCodePoint(codePoint),
hex.length + (spaceTerminated ? 1 : 0),
];
}

const CONTAINS_ESCAPE = /\\/;

export default function unesc (str) {
return str.replace(unescapeRegExp, (_, escaped, escapedWhitespace) => {
const high = '0x' + escaped - 0x10000;

// NaN means non-codepoint
// Workaround erroneous numeric interpretation of +"0x"
// eslint-disable-next-line no-self-compare
return high !== high || escapedWhitespace
? escaped
: high < 0
? // BMP codepoint
String.fromCharCode(high + 0x10000)
: // Supplemental Plane codepoint (surrogate pair)
String.fromCharCode((high >> 10) | 0xd800, (high & 0x3ff) | 0xdc00);
});
let needToProcess = CONTAINS_ESCAPE.test(str);
if (!needToProcess) {
return str;
}
let ret = "";

for (let i = 0; i < str.length; i++) {
if ((str[i] === "\\")) {
const gobbled = gobbleHex(str.slice(i + 1, i + 7));
if (gobbled !== undefined) {
ret += gobbled[0];
i += gobbled[1];
continue;
}

// Retain a pair of \\ if double escaped `\\\\`
// https://github.com/postcss/postcss-selector-parser/commit/268c9a7656fb53f543dc620aa5b73a30ec3ff20e
if (str[i + 1] === "\\") {
ret += "\\";
i++;
continue;
}

// if \\ is at the end of the string retain it
// https://github.com/postcss/postcss-selector-parser/commit/01a6b346e3612ce1ab20219acc26abdc259ccefb
if (str.length === i + 1) {
ret += str[i];
}
continue;
}

ret += str[i];
}

return ret;
}