Skip to content

Commit

Permalink
perf: improve some more the speed of ]]> detection
Browse files Browse the repository at this point in the history
  • Loading branch information
lddubeau committed Jun 20, 2019
1 parent 306e7d9 commit a0216cd
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 29 deletions.
85 changes: 58 additions & 27 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ function isName(name) {
return NAME_RE.test(name);
}

const FORBIDDEN_START = 0;
const FORBIDDEN_BRACKET = 1;
const FORBIDDEN_BRACKET_BRACKET = 2;

/**
* Data structure for an XML tag.
*
Expand Down Expand Up @@ -312,6 +316,7 @@ class SaxesParser {
this.chunkPosition = 0;
this.i = 0;
this.trailingCR = false;
this.forbiddenState = FORBIDDEN_START;
/**
* A map of entity name to expansion.
*
Expand Down Expand Up @@ -801,33 +806,70 @@ class SaxesParser {
// for performing the ]]> check. A previous version of this code, checked
// ``this.text`` for the presence of ]]>. It simplified the code but was
// very costly when character data contained a lot of entities to be parsed.
//
// Since we are using a specialized loop, we also keep track of the presence
// of ]]> in text data. The sequence ]]> is forbidden to appear as-is.
//
const { chunk, limit, i: start } = this;
let { forbiddenState } = this;
let nonSpace = false;
let c;
// eslint-disable-next-line no-labels, no-restricted-syntax
scanLoop:
while (this.i < limit) {
const code = this.getCode();
if (code === LESS || code === AMP) {
switch (code) {
case LESS:
this.state = S_OPEN_WAKA;
c = code;
forbiddenState = FORBIDDEN_START;
// eslint-disable-next-line no-labels
break scanLoop;
case AMP:
this.state = S_ENTITY;
this.entityReturnState = S_TEXT;
c = code;
forbiddenState = FORBIDDEN_START;
nonSpace = true;
// eslint-disable-next-line no-labels
break scanLoop;
case CLOSE_BRACKET:
switch (forbiddenState) {
case FORBIDDEN_START:
forbiddenState = FORBIDDEN_BRACKET;
break;
case FORBIDDEN_BRACKET:
forbiddenState = FORBIDDEN_BRACKET_BRACKET;
break;
case FORBIDDEN_BRACKET_BRACKET:
break;
default:
forbiddenState = FORBIDDEN_START;
}
nonSpace = true;
break;
case GREATER:
if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) {
this.fail("the string \"]]>\" is disallowed in char data.");
}
forbiddenState = FORBIDDEN_START;
nonSpace = true;
break;
default:
forbiddenState = FORBIDDEN_START;
if (!isS(code)) {
nonSpace = true;
}
}
}
this.forbiddenState = forbiddenState;

// This is faster than adding codepoints one by one.
const slice = chunk.substring(start,
c === undefined ? undefined :
(this.i - (c <= 0xFFFF ? 1 : 2)));

// We test for the presence of ]]>, which is not allowed in CharData. We
// have to take into account edge conditions.
if (slice.includes("]]>") ||
(slice[0] === ">" && this.text.endsWith("]]")) ||
(slice.startsWith("]>") && this.text.endsWith("]"))) {
this.fail("the string \"]]>\" is disallowed in char data.");
}
this.text += chunk.substring(start,
c === undefined ? undefined :
(this.i - (c <= 0xFFFF ? 1 : 2)));

this.text += slice;

if ((!this.sawRoot || this.closedRoot) &&
(/\S/.test(slice) || c === AMP)) {
if (nonSpace && (!this.sawRoot || this.closedRoot)) {
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
// to avoid reporting errors for every single character that is out of
// place.
Expand All @@ -841,17 +883,6 @@ class SaxesParser {
this.reportedTextAfterRoot = true;
}
}

switch (c) {
case LESS:
this.state = S_OPEN_WAKA;
break;
case AMP:
this.state = S_ENTITY;
this.entityReturnState = S_TEXT;
break;
default:
}
}

/** @private */
Expand Down
4 changes: 2 additions & 2 deletions test/wrong-cdata-closure.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ describe("wrong cdata closure", () => {
isSelfClosing: false,
}],
["error",
"undefined:1:23: the string \"]]>\" is disallowed in char data."],
"undefined:1:19: the string \"]]>\" is disallowed in char data."],
["text", "somethingx]]>moo"],
["closetag", {
name: "span",
Expand Down Expand Up @@ -83,7 +83,7 @@ describe("wrong cdata closure", () => {
isSelfClosing: false,
}],
["error",
"undefined:1:20: the string \"]]>\" is disallowed in char data."],
"undefined:1:19: the string \"]]>\" is disallowed in char data."],
["text", "somethingx]]>moo"],
["closetag", {
name: "span",
Expand Down

0 comments on commit a0216cd

Please sign in to comment.