From 04855d6eb519fa2cd4edf03f8f5a475d40ed047a Mon Sep 17 00:00:00 2001 From: Louis-Dominique Dubeau Date: Thu, 23 Aug 2018 19:02:13 -0400 Subject: [PATCH] perf: introduce a specialized version of captureWhile captureName performs faster for those cases where we just want to capture the characters in an XML name. --- lib/saxes.js | 66 +++++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/lib/saxes.js b/lib/saxes.js index 071cd63f..85aa8468 100644 --- a/lib/saxes.js +++ b/lib/saxes.js @@ -571,12 +571,7 @@ class SaxesParser { */ /** - * Capture characters into a buffer while a condition is true. A sequence of - * ``write`` calls may require the capture of text into a buffer as multiple - * "fragments". For instance, given ``write("Multiple")`` and - * ``write("parts")``, the text which is part of the ``x`` element will be - * recorded in two steps: one recording ``"Multiple"`` and one recording - * ``"parts"``. These are two fragments. + * Capture characters into a buffer while a condition is true. * * @private * @@ -607,6 +602,35 @@ class SaxesParser { return undefined; } + /** + * Capture characters that satisfy ``isNameChar`` into a buffer. + * + * @private + * + * @param {ChunkState} chunkState The current chunk state. + * + * @param {string} buffer The name of the buffer to save into. + * + * @return {string|undefined} The character that made the test fail, or + * ``undefined`` if we hit the end of the chunk. + */ + captureName(chunkState, buffer) { + const { limit, chunk, i: start } = chunkState; + while (chunkState.i < limit) { + const c = this.getCode(chunkState); + if (!isNameChar(c)) { + // This is faster than adding codepoints one by one. + this[buffer] += chunk.substring(start, + chunkState.i - (c <= 0xFFFF ? 1 : 2)); + return c; + } + } + + // This is faster than adding codepoints one by one. + this[buffer] += chunk.substring(start); + return undefined; + } + /** * Skip characters while a condition is true. * @@ -1223,20 +1247,7 @@ class SaxesParser { sOpenTag(chunkState) { // We don't need to check with isNameStartChar here because the first // character of tagName is fed elsewhere, and the check is done there. - const c = this.captureWhile( - chunkState, - (cx) => { - if (cx !== GREATER && !isS(cx) && cx !== FORWARD_SLASH) { - if (!isNameChar(cx)) { - this.fail("disallowed character in tag name."); - } - - return true; - } - - return false; - }, - "tagName"); + const c = this.captureName(chunkState, "tagName"); if (!c) { return; } @@ -1305,20 +1316,7 @@ class SaxesParser { sAttribName(chunkState) { // We don't need to check with isNameStartChar here because the first // character of attribute is fed elsewhere, and the check is done there. - const c = this.captureWhile( - chunkState, - (cx) => { - if (cx !== EQUAL && !isS(cx) && cx !== GREATER) { - if (!isNameChar(cx)) { - this.fail("disallowed characer in attribute name."); - } - - return true; - } - - return false; - }, - "attribName"); + const c = this.captureName(chunkState, "attribName"); if (c === EQUAL) { this.state = S_ATTRIB_VALUE; }