Skip to content

Commit

Permalink
perf: introduce captureTo and captureToChar
Browse files Browse the repository at this point in the history
  • Loading branch information
lddubeau committed Aug 24, 2018
1 parent 1981d7d commit 76eb95a
Showing 1 changed file with 101 additions and 73 deletions.
174 changes: 101 additions & 73 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,16 @@ function isQuote(c) {
return c === DQUOTE || c === SQUOTE;
}

const QUOTES = [DQUOTE, SQUOTE];
const S = [SPACE, NL, 0xD, 9];

const TEXT_TERMINATOR = [LESS, AMP];
const CLOSE_TAG_TERMINATOR = [GREATER, ...S];
const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER];
const DOCTYPE_DTD_TERMINATOR = [...QUOTES, CLOSE_BRACKET];
const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S];
const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S, GREATER, AMP, LESS];

function isEntityStartChar(c) {
return isNameStartChar(c) || c === HASH;
}
Expand Down Expand Up @@ -602,6 +612,70 @@ class SaxesParser {
return undefined;
}

/**
* Capture characters into a buffer until encountering one of a set of
* characters.
*
* @private
*
* @param {ChunkState} chunkState The current chunk state.
*
* @param {number[]} chars An array of codepoints. Encountering a character in
* the array ends the capture.
*
* @param {string} buffer The name of the buffer to save into.
*
* @return {string|undefined} The character that made the capture end, or
* ``undefined`` if we hit the end of the chunk.
*/
captureTo(chunkState, chars, buffer) {
const { limit, chunk, i: start } = chunkState;
while (chunkState.i < limit) {
const c = this.getCode(chunkState);
if (chars.includes(c)) {
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start,
chunkState.i - (c <= 0xFFFF ? 1 : 2));
return c;
}
}

// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start);
return undefined;
}

/**
* Capture characters into a buffer until encountering a character.
*
* @private
*
* @param {ChunkState} chunkState The current chunk state.
*
* @param {number} char The codepoint that ends the capture.
*
* @param {string} buffer The name of the buffer to save into.
*
* @return {boolean} ``true`` if we ran into the character. Otherwise, we ran
* into the end of the current chunk.
*/
captureToChar(chunkState, char, buffer) {
const { limit, chunk, i: start } = chunkState;
while (chunkState.i < limit) {
const c = this.getCode(chunkState);
if (c === char) {
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start,
chunkState.i - (c <= 0xFFFF ? 1 : 2));
return true;
}
}

// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start);
return false;
}

/**
* Capture characters that satisfy ``isNameChar`` into a buffer.
*
Expand Down Expand Up @@ -697,9 +771,7 @@ class SaxesParser {

/** @private */
sText(chunkState) {
const c = this.captureWhile(chunkState,
cx => cx !== LESS && cx !== AMP,
"textNode");
const c = this.captureTo(chunkState, TEXT_TERMINATOR, "textNode");

if (!this.inRoot && (/\S/.test(this.textNode) || c === AMP)) {
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
Expand Down Expand Up @@ -809,10 +881,7 @@ class SaxesParser {

/** @private */
sDoctype(chunkState) {
const c = this.captureWhile(chunkState,
cx => cx !== OPEN_BRACKET && !isQuote(cx) &&
cx !== GREATER,
"doctype");
const c = this.captureTo(chunkState, DOCTYPE_TERMINATOR, "doctype");
if (c === GREATER) {
this.state = S_TEXT;
this.emitNode("ondoctype", this.doctype);
Expand All @@ -833,21 +902,16 @@ class SaxesParser {
/** @private */
sDoctypeQuoted(chunkState) {
const { q } = this;
const c = this.captureWhile(chunkState, cx => cx !== q, "doctype");
if (!c || c !== q) {
return;
if (this.captureToChar(chunkState, q, "doctype")) {
this.doctype += String.fromCodePoint(q);
this.q = null;
this.state = S_DOCTYPE;
}

this.doctype += String.fromCodePoint(c);
this.q = null;
this.state = S_DOCTYPE;
}

/** @private */
sDoctypeDTD(chunkState) {
const c = this.captureWhile(chunkState,
cx => cx !== CLOSE_BRACKET && !isQuote(cx),
"doctype");
const c = this.captureTo(chunkState, DOCTYPE_DTD_TERMINATOR, "doctype");
if (!c) {
return;
}
Expand All @@ -865,27 +929,18 @@ class SaxesParser {
/** @private */
sDoctypeDTDQuoted(chunkState) {
const { q } = this;
const c = this.captureWhile(chunkState, cx => cx !== q, "doctype");
if (!c) {
return;
}

this.doctype += String.fromCodePoint(c);
if (c === q) {
if (this.captureToChar(chunkState, q, "doctype")) {
this.doctype += String.fromCodePoint(q);
this.state = S_DOCTYPE_DTD;
this.q = null;
}
}

/** @private */
sComment(chunkState) {
const c = this.captureWhile(chunkState, cx => cx !== MINUS, "comment");
if (c === MINUS) {
if (this.captureToChar(chunkState, MINUS, "comment")) {
this.state = S_COMMENT_ENDING;
}
else if (c) {
this.comment += String.fromCodePoint(c);
}
}

/** @private */
Expand Down Expand Up @@ -918,17 +973,9 @@ class SaxesParser {
}

sCData(chunkState) {
const c = this.captureWhile(chunkState, cx => cx !== CLOSE_BRACKET, "cdata");
if (!c) {
return;
}

if (c === CLOSE_BRACKET) {
if (this.captureToChar(chunkState, CLOSE_BRACKET, "cdata")) {
this.state = S_CDATA_ENDING;
}
else {
this.cdata += String.fromCodePoint(c);
}
}

/** @private */
Expand Down Expand Up @@ -1045,9 +1092,7 @@ class SaxesParser {
}
break;
case S_XML_DECL_NAME:
c = this.captureWhile(chunkState,
cx => cx !== QUESTION && !isS(cx) && cx !== EQUAL,
"xmlDeclName");
c = this.captureTo(chunkState, XML_DECL_NAME_TERMINATOR, "xmlDeclName");
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
Expand Down Expand Up @@ -1109,9 +1154,7 @@ class SaxesParser {
}
break;
case S_XML_DECL_VALUE:
c = this.captureWhile(chunkState,
cx => cx !== QUESTION && cx !== this.q,
"xmlDeclValue");
c = this.captureTo(chunkState, [this.q, QUESTION], "xmlDeclValue");

// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
Expand Down Expand Up @@ -1167,13 +1210,10 @@ class SaxesParser {
this.piBody = String.fromCodePoint(c);
}
}
else {
c = this.captureWhile(chunkState, cx => cx !== QUESTION, "piBody");
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
this.state = S_PI_ENDING;
}
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
else if (this.captureToChar(chunkState, QUESTION, "piBody")) {
this.state = S_PI_ENDING;
}
}

Expand Down Expand Up @@ -1368,21 +1408,15 @@ class SaxesParser {

/** @private */
sAttribValueQuoted(chunkState) {
const { q } = this;
const c = this.captureWhile(
chunkState,
(cx) => {
if (cx === LESS) {
this.fail("disallowed character.");
}
return cx !== q && cx !== AMP;
},
"attribValue");
const c = this.captureTo(chunkState, [this.q, AMP, LESS], "attribValue");
if (c === AMP) {
this.state = S_ENTITY;
this.entityBufferName = "attribValue";
this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
}
else if (c === LESS) {
this.fail("disallowed character.");
}
else if (c) {
if (this.attribValue.includes("]]>")) {
this.fail("the string \"]]>\" is disallowed in char data.");
Expand Down Expand Up @@ -1419,20 +1453,16 @@ class SaxesParser {

/** @private */
sAttribValueUnquoted(chunkState) {
const c = this.captureWhile(
chunkState,
(cx) => {
if (cx === LESS) {
this.fail("disallowed character.");
}
return cx !== GREATER && cx !== AMP && !isS(cx);
},
"attribValue");
const c = this.captureTo(chunkState, ATTRIB_VALUE_UNQUOTED_TERMINATOR,
"attribValue");
if (c === AMP) {
this.state = S_ENTITY;
this.entityBufferName = "attribValue";
this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
}
else if (c === LESS) {
this.fail("disallowed character.");
}
else if (c) {
if (this.attribValue.includes("]]>")) {
this.fail("the string \"]]>\" is disallowed in char data.");
Expand All @@ -1450,9 +1480,7 @@ class SaxesParser {

/** @private */
sCloseTag(chunkState) {
const c = this.captureWhile(chunkState,
cx => cx !== GREATER && !isS(cx),
"tagName");
const c = this.captureTo(chunkState, CLOSE_TAG_TERMINATOR, "tagName");
if (c === GREATER) {
this.closeTag();
}
Expand Down

0 comments on commit 76eb95a

Please sign in to comment.