Skip to content

Commit

Permalink
In profiling postcss I found that a significant amount of time was being
Browse files Browse the repository at this point in the history
spent in [`unesc`](https://github.com/postcss/postcss-selector-parser/commits/master/src/util/unesc.js), this was due to the expensive regex checks that were
being performed on the fly for every selector in the codebase which looked to be performing quite poorly inside of modern node and v8.

![image](https://user-images.githubusercontent.com/883126/114136698-fdd98a80-98bf-11eb-8068-ace4f6f2274d.png)

----

As an experiment and based on some prior experience with this class of slowdown I migrated the implementation to one that performs a scan through the string instead of running a regex replace. By testing this on my local application I instantly saw the work from this function go from > 900 ms to ~100ms.

![image](https://user-images.githubusercontent.com/883126/114136734-0c27a680-98c0-11eb-82ab-f0c9529fd32d.png)

This implementation passes all of the existing test cases and aims to mirror the prior implementation's implementation details :)

-----

Based on my application I am seeing the major wins come from purgecss dropping my total application build by multiple seconds! 🔥
  • Loading branch information
samccone committed Apr 9, 2021
1 parent 96a85e3 commit 0e00b18
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 16 deletions.
1 change: 1 addition & 0 deletions src/__tests__/classes.js
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,4 @@ test('class selector with escaping (36)', '.not-pseudo\\:\\:focus', (t, tree) =>
t.deepEqual(tree.nodes[0].nodes[0].type, 'class');
t.deepEqual(tree.nodes[0].nodes[0].raws.value, 'not-pseudo\\:\\:focus');
});

46 changes: 46 additions & 0 deletions src/__tests__/util/unesc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import {test} from '../util/helpers';

test('id selector', '#foo', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'foo');
});

test('escaped special char', '#w\\+', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'w+');
});

test('tailing escape', '#foo\\', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'foo\\');
});

test('double escape', '#wow\\\\k', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'wow\\k');
});

test('leading numeric', '.\\31 23', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '123');
});

test('emoji', '.\\🐐', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '🐐');
});

// https://www.w3.org/International/questions/qa-escapes#cssescapes
test('hex escape', '.\\E9motion', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'émotion');
});

test('hex escape with space', '.\\E9 dition', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'édition');
});

test('hex escape with hex number', '.\\0000E9dition', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, 'édition');
});

test('class selector with escaping', '.\\1D306', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '𝌆');
});

test('class selector with escaping with more chars', '.\\1D306k', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '𝌆k');
});
80 changes: 64 additions & 16 deletions src/util/unesc.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,67 @@
const whitespace = '[\\x20\\t\\r\\n\\f]';
const unescapeRegExp = new RegExp('\\\\([\\da-f]{1,6}' + whitespace + '?|(' + whitespace + ')|.)', 'ig');
// Many thanks for this post which made this migration much easier.
// https://mathiasbynens.be/notes/css-escapes

/**
*
* @param {string} str
* @returns {[string, number]|undefined}
*/
function gobbleHex (str) {
const lower = str.toLowerCase();
let hex = '';
let spaceTerminated = false;
for (let i = 0; i < 6 && lower[i] !== undefined; i++) {
const code = lower.charCodeAt(i);
// check to see if we are dealing with a valid hex chat [a-f|0-9]
const valid = (code >= 97 && code <= 102) || (code >= 48 && code <= 57);
// https://drafts.csswg.org/css-syntax/#consume-escaped-code-point
spaceTerminated = code === 32;
if (!valid) {
break;
}
hex += lower[i];
}

if (hex.length === 0) {
return undefined;
}

return [
String.fromCodePoint(parseInt(hex, 16)),
hex.length + (spaceTerminated ? 1 : 0),
];
}

export default function unesc (str) {
return str.replace(unescapeRegExp, (_, escaped, escapedWhitespace) => {
const high = '0x' + escaped - 0x10000;

// NaN means non-codepoint
// Workaround erroneous numeric interpretation of +"0x"
// eslint-disable-next-line no-self-compare
return high !== high || escapedWhitespace
? escaped
: high < 0
? // BMP codepoint
String.fromCharCode(high + 0x10000)
: // Supplemental Plane codepoint (surrogate pair)
String.fromCharCode((high >> 10) | 0xd800, (high & 0x3ff) | 0xdc00);
});
let ret = "";

for (let i = 0; i < str.length; i++) {
if ((str[i] === "\\")) {
const gobbled = gobbleHex(str.slice(i+1, i+7));
if (gobbled !== undefined) {
ret += gobbled[0];
i += gobbled[1];
continue;
}

// Retain a pair of \\ if double escaped `\\\\`
// https://github.com/postcss/postcss-selector-parser/commit/268c9a7656fb53f543dc620aa5b73a30ec3ff20e
if (str[i +1] === "\\") {
ret += "\\";
i++;
continue;
}

// if // is at the end of the string retain it
// https://github.com/postcss/postcss-selector-parser/commit/01a6b346e3612ce1ab20219acc26abdc259ccefb
if (str.length === i + 1) {
ret += str[i];
}
continue;
}

ret += str[i];
}

return ret;
}

0 comments on commit 0e00b18

Please sign in to comment.