From ad4ab53da5ddc9b99eee29c279f77f32cea8d3d0 Mon Sep 17 00:00:00 2001 From: Louis-Dominique Dubeau Date: Fri, 6 Sep 2019 10:19:13 -0400 Subject: [PATCH] perf: don't depend on limit to know when we hit the end of buffer --- lib/.eslintrc.js | 2 + lib/saxes.js | 114 ++++++++++++++++++++++------------------------- 2 files changed, 55 insertions(+), 61 deletions(-) diff --git a/lib/.eslintrc.js b/lib/.eslintrc.js index 6d551cc1..410c2070 100644 --- a/lib/.eslintrc.js +++ b/lib/.eslintrc.js @@ -2,5 +2,7 @@ module.exports = { extends: "../.eslintrc.js", rules: { "no-continue": "off", + // We use constant conditions quite often, for optimization reasons. + "no-constant-condition": "off", }, } diff --git a/lib/saxes.js b/lib/saxes.js index 43a8e993..9017d5e2 100644 --- a/lib/saxes.js +++ b/lib/saxes.js @@ -571,8 +571,6 @@ class SaxesParser { limit--; this.trailingCR = true; } - this.limit = limit; - this.chunk = chunk; this.i = 0; while (this.i < limit) { @@ -630,6 +628,11 @@ class SaxesParser { this.column = 0; break; default: + // eslint-disable-next-line no-restricted-globals + if (isNaN(code)) { + return undefined; + } + this.column++; if (code >= 0xD800 && code <= 0xDBFF) { code = 0x10000 + ((code - 0xD800) * 0x400) + @@ -689,6 +692,11 @@ class SaxesParser { break; default: + // eslint-disable-next-line no-restricted-globals + if (isNaN(code)) { + return undefined; + } + this.column++; if (code >= 0xD800 && code <= 0xDBFF) { code = 0x10000 + ((code - 0xD800) * 0x400) + @@ -735,20 +743,19 @@ class SaxesParser { * ``undefined`` if we hit the end of the chunk. */ captureTo(chars, buffer) { - const { chunk, limit, i: start } = this; - while (this.i < limit) { + const { chunk, i: start } = this; + while (true) { const c = this.getCode(); if (chars.includes(c)) { - // This is faster than adding codepoints one by one. - this[buffer] += chunk.substring(start, - this.i - (c <= 0xFFFF ? 1 : 2)); + this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2)); return c; } - } - // This is faster than adding codepoints one by one. - this[buffer] += chunk.substring(start); - return undefined; + if (c === undefined) { + this[buffer] += chunk.substring(start); + return undefined; + } + } } /** @@ -764,20 +771,19 @@ class SaxesParser { * into the end of the current chunk. */ captureToChar(char, buffer) { - const { chunk, limit, i: start } = this; - while (this.i < limit) { + const { chunk, i: start } = this; + while (true) { const c = this.getCode(); if (c === char) { - // This is faster than adding codepoints one by one. - this[buffer] += chunk.substring(start, - this.i - (c <= 0xFFFF ? 1 : 2)); + this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2)); return true; } - } - // This is faster than adding codepoints one by one. - this[buffer] += chunk.substring(start); - return false; + if (c === undefined) { + this[buffer] += chunk.substring(start); + return false; + } + } } /** @@ -790,20 +796,19 @@ class SaxesParser { * ``undefined`` if we hit the end of the chunk. */ captureNameChars() { - const { chunk, limit, i: start } = this; - while (this.i < limit) { + const { chunk, i: start } = this; + while (true) { const c = this.getCode(); + if (c === undefined) { + this.name += chunk.substring(start); + return undefined; + } + if (!isNameChar(c)) { - // This is faster than adding codepoints one by one. - this.name += chunk.substring(start, - this.i - (c <= 0xFFFF ? 1 : 2)); + this.name += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2)); return c; } } - - // This is faster than adding codepoints one by one. - this.name += chunk.substring(start); - return undefined; } /** @@ -818,20 +823,19 @@ class SaxesParser { * ``undefined`` if we hit the end of the chunk. */ captureWhileNameCheck(buffer) { - const { chunk, limit, i: start } = this; - while (this.i < limit) { + const { chunk, i: start } = this; + while (true) { const c = this.getCode(); + if (c === undefined) { + this[buffer] += chunk.substring(start); + return undefined; + } + if (!this.nameCheck(c)) { - // This is faster than adding codepoints one by one. - this[buffer] += chunk.substring(start, - this.i - (c <= 0xFFFF ? 1 : 2)); + this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2)); return c; } } - - // This is faster than adding codepoints one by one. - this[buffer] += chunk.substring(start); - return undefined; } /** @@ -843,15 +847,12 @@ class SaxesParser { * ``undefined`` if we hit the end of the chunk. */ skipSpaces() { - const { limit } = this; - while (this.i < limit) { + while (true) { const c = this.getCode(); - if (!isS(c)) { + if (c === undefined || !isS(c)) { return c; } } - - return undefined; } /** @private */ @@ -946,21 +947,19 @@ class SaxesParser { // Since we are using a specialized loop, we also keep track of the presence // of ]]> in text data. The sequence ]]> is forbidden to appear as-is. // - const { chunk, limit, i: start } = this; + const { chunk, i: start } = this; let { forbiddenState } = this; // eslint-disable-next-line no-labels, no-restricted-syntax scanLoop: - // eslint-disable-next-line no-constant-condition while (true) { - if (this.i >= limit) { + const code = this.getCode(); + if (code === undefined) { this.text += chunk.substring(start); break; } - const code = this.getCode(); switch (code) { case LESS: this.state = S_OPEN_WAKA; - // This is faster than adding codepoints one by one. this.text += chunk.substring(start, this.i - 1); forbiddenState = FORBIDDEN_START; // eslint-disable-next-line no-labels @@ -968,7 +967,6 @@ class SaxesParser { case AMP: this.state = S_ENTITY; this.entityReturnState = S_TEXT; - // This is faster than adding codepoints one by one. this.text += chunk.substring(start, this.i - 1); forbiddenState = FORBIDDEN_START; // eslint-disable-next-line no-labels @@ -1006,28 +1004,25 @@ class SaxesParser { // for a specialized task. We keep track of the presence of non-space // characters in the text since these are errors when appearing outside the // document root element. - const { chunk, limit, i: start } = this; + const { chunk, i: start } = this; let nonSpace = false; // eslint-disable-next-line no-labels, no-restricted-syntax outRootLoop: - // eslint-disable-next-line no-constant-condition while (true) { - if (this.i >= limit) { + const code = this.getCode(); + if (code === undefined) { this.text += chunk.substring(start); break; } - const code = this.getCode(); switch (code) { case LESS: this.state = S_OPEN_WAKA; - // This is faster than adding codepoints one by one. this.text += chunk.substring(start, this.i - 1); // eslint-disable-next-line no-labels break outRootLoop; case AMP: this.state = S_ENTITY; this.entityReturnState = S_TEXT; - // This is faster than adding codepoints one by one. this.text += chunk.substring(start, this.i - 1); nonSpace = true; // eslint-disable-next-line no-labels @@ -1772,17 +1767,14 @@ class SaxesParser { // We deliberately do not use captureTo here. The specialized code we use // here is faster than using captureTo. const { q } = this; - const { chunk, limit, i: start } = this; - // eslint-disable-next-line no-constant-condition + const { chunk, i: start } = this; while (true) { - if (this.i >= limit) { - // This is faster than adding codepoints one by one. + const code = this.getCode(); + if (code === undefined) { this.text += chunk.substring(start); return; } - const code = this.getCode(); if (code === q || code === AMP || code === LESS) { - // This is faster than adding codepoints one by one. const slice = chunk.substring(start, this.i - 1); switch (code) { case q: