From ad4ab53da5ddc9b99eee29c279f77f32cea8d3d0 Mon Sep 17 00:00:00 2001
From: Louis-Dominique Dubeau <ldd@lddubeau.com>
Date: Fri, 6 Sep 2019 10:19:13 -0400
Subject: [PATCH] perf: don't depend on limit to know when we hit the end of
 buffer

---
 lib/.eslintrc.js |   2 +
 lib/saxes.js     | 114 ++++++++++++++++++++++-------------------------
 2 files changed, 55 insertions(+), 61 deletions(-)

diff --git a/lib/.eslintrc.js b/lib/.eslintrc.js
index 6d551cc1..410c2070 100644
--- a/lib/.eslintrc.js
+++ b/lib/.eslintrc.js
@@ -2,5 +2,7 @@ module.exports = {
   extends: "../.eslintrc.js",
   rules: {
     "no-continue": "off",
+    // We use constant conditions quite often, for optimization reasons.
+    "no-constant-condition": "off",
   },
 }
diff --git a/lib/saxes.js b/lib/saxes.js
index 43a8e993..9017d5e2 100644
--- a/lib/saxes.js
+++ b/lib/saxes.js
@@ -571,8 +571,6 @@ class SaxesParser {
       limit--;
       this.trailingCR = true;
     }
-    this.limit = limit;
-
     this.chunk = chunk;
     this.i = 0;
     while (this.i < limit) {
@@ -630,6 +628,11 @@ class SaxesParser {
       this.column = 0;
       break;
     default:
+      // eslint-disable-next-line no-restricted-globals
+      if (isNaN(code)) {
+        return undefined;
+      }
+
       this.column++;
       if (code >= 0xD800 && code <= 0xDBFF) {
         code = 0x10000 + ((code - 0xD800) * 0x400) +
@@ -689,6 +692,11 @@ class SaxesParser {
       break;
 
     default:
+      // eslint-disable-next-line no-restricted-globals
+      if (isNaN(code)) {
+        return undefined;
+      }
+
       this.column++;
       if (code >= 0xD800 && code <= 0xDBFF) {
         code = 0x10000 + ((code - 0xD800) * 0x400) +
@@ -735,20 +743,19 @@ class SaxesParser {
    * ``undefined`` if we hit the end of the chunk.
    */
   captureTo(chars, buffer) {
-    const { chunk, limit, i: start } = this;
-    while (this.i < limit) {
+    const { chunk, i: start } = this;
+    while (true) {
       const c = this.getCode();
       if (chars.includes(c)) {
-        // This is faster than adding codepoints one by one.
-        this[buffer] += chunk.substring(start,
-                                        this.i - (c <= 0xFFFF ? 1 : 2));
+        this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2));
         return c;
       }
-    }
 
-    // This is faster than adding codepoints one by one.
-    this[buffer] += chunk.substring(start);
-    return undefined;
+      if (c === undefined) {
+        this[buffer] += chunk.substring(start);
+        return undefined;
+      }
+    }
   }
 
   /**
@@ -764,20 +771,19 @@ class SaxesParser {
    * into the end of the current chunk.
    */
   captureToChar(char, buffer) {
-    const { chunk, limit, i: start } = this;
-    while (this.i < limit) {
+    const { chunk, i: start } = this;
+    while (true) {
       const c = this.getCode();
       if (c === char) {
-        // This is faster than adding codepoints one by one.
-        this[buffer] += chunk.substring(start,
-                                        this.i - (c <= 0xFFFF ? 1 : 2));
+        this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2));
         return true;
       }
-    }
 
-    // This is faster than adding codepoints one by one.
-    this[buffer] += chunk.substring(start);
-    return false;
+      if (c === undefined) {
+        this[buffer] += chunk.substring(start);
+        return false;
+      }
+    }
   }
 
   /**
@@ -790,20 +796,19 @@ class SaxesParser {
    * ``undefined`` if we hit the end of the chunk.
    */
   captureNameChars() {
-    const { chunk, limit, i: start } = this;
-    while (this.i < limit) {
+    const { chunk, i: start } = this;
+    while (true) {
       const c = this.getCode();
+      if (c === undefined) {
+        this.name += chunk.substring(start);
+        return undefined;
+      }
+
       if (!isNameChar(c)) {
-        // This is faster than adding codepoints one by one.
-        this.name += chunk.substring(start,
-                                     this.i - (c <= 0xFFFF ? 1 : 2));
+        this.name += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2));
         return c;
       }
     }
-
-    // This is faster than adding codepoints one by one.
-    this.name += chunk.substring(start);
-    return undefined;
   }
 
   /**
@@ -818,20 +823,19 @@ class SaxesParser {
    * ``undefined`` if we hit the end of the chunk.
    */
   captureWhileNameCheck(buffer) {
-    const { chunk, limit, i: start } = this;
-    while (this.i < limit) {
+    const { chunk, i: start } = this;
+    while (true) {
       const c = this.getCode();
+      if (c === undefined) {
+        this[buffer] += chunk.substring(start);
+        return undefined;
+      }
+
       if (!this.nameCheck(c)) {
-        // This is faster than adding codepoints one by one.
-        this[buffer] += chunk.substring(start,
-                                        this.i - (c <= 0xFFFF ? 1 : 2));
+        this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2));
         return c;
       }
     }
-
-    // This is faster than adding codepoints one by one.
-    this[buffer] += chunk.substring(start);
-    return undefined;
   }
 
   /**
@@ -843,15 +847,12 @@ class SaxesParser {
    * ``undefined`` if we hit the end of the chunk.
    */
   skipSpaces() {
-    const { limit } = this;
-    while (this.i < limit) {
+    while (true) {
       const c = this.getCode();
-      if (!isS(c)) {
+      if (c === undefined || !isS(c)) {
         return c;
       }
     }
-
-    return undefined;
   }
 
   /** @private */
@@ -946,21 +947,19 @@ class SaxesParser {
     // Since we are using a specialized loop, we also keep track of the presence
     // of ]]> in text data. The sequence ]]> is forbidden to appear as-is.
     //
-    const { chunk, limit, i: start } = this;
+    const { chunk, i: start } = this;
     let { forbiddenState } = this;
     // eslint-disable-next-line no-labels, no-restricted-syntax
     scanLoop:
-    // eslint-disable-next-line no-constant-condition
     while (true) {
-      if (this.i >= limit) {
+      const code = this.getCode();
+      if (code === undefined) {
         this.text += chunk.substring(start);
         break;
       }
-      const code = this.getCode();
       switch (code) {
       case LESS:
         this.state = S_OPEN_WAKA;
-        // This is faster than adding codepoints one by one.
         this.text += chunk.substring(start, this.i - 1);
         forbiddenState = FORBIDDEN_START;
         // eslint-disable-next-line no-labels
@@ -968,7 +967,6 @@ class SaxesParser {
       case AMP:
         this.state = S_ENTITY;
         this.entityReturnState = S_TEXT;
-        // This is faster than adding codepoints one by one.
         this.text += chunk.substring(start, this.i - 1);
         forbiddenState = FORBIDDEN_START;
         // eslint-disable-next-line no-labels
@@ -1006,28 +1004,25 @@ class SaxesParser {
     // for a specialized task. We keep track of the presence of non-space
     // characters in the text since these are errors when appearing outside the
     // document root element.
-    const { chunk, limit, i: start } = this;
+    const { chunk, i: start } = this;
     let nonSpace = false;
     // eslint-disable-next-line no-labels, no-restricted-syntax
     outRootLoop:
-    // eslint-disable-next-line no-constant-condition
     while (true) {
-      if (this.i >= limit) {
+      const code = this.getCode();
+      if (code === undefined) {
         this.text += chunk.substring(start);
         break;
       }
-      const code = this.getCode();
       switch (code) {
       case LESS:
         this.state = S_OPEN_WAKA;
-        // This is faster than adding codepoints one by one.
         this.text += chunk.substring(start, this.i - 1);
         // eslint-disable-next-line no-labels
         break outRootLoop;
       case AMP:
         this.state = S_ENTITY;
         this.entityReturnState = S_TEXT;
-        // This is faster than adding codepoints one by one.
         this.text += chunk.substring(start, this.i - 1);
         nonSpace = true;
         // eslint-disable-next-line no-labels
@@ -1772,17 +1767,14 @@ class SaxesParser {
     // We deliberately do not use captureTo here. The specialized code we use
     // here is faster than using captureTo.
     const { q } = this;
-    const { chunk, limit, i: start } = this;
-    // eslint-disable-next-line no-constant-condition
+    const { chunk, i: start } = this;
     while (true) {
-      if (this.i >= limit) {
-        // This is faster than adding codepoints one by one.
+      const code = this.getCode();
+      if (code === undefined) {
         this.text += chunk.substring(start);
         return;
       }
-      const code = this.getCode();
       if (code === q || code === AMP || code === LESS) {
-        // This is faster than adding codepoints one by one.
         const slice = chunk.substring(start, this.i - 1);
         switch (code) {
         case q: