diff --git a/lib/saxes.js b/lib/saxes.js index abbe4988..c191af10 100644 --- a/lib/saxes.js +++ b/lib/saxes.js @@ -292,16 +292,12 @@ class SaxesParser { * @private */ _init(opt) { - this.comment = ""; this.openWakaBang = ""; this.text = ""; this.name = ""; this.piTarget = ""; - this.piBody = ""; this.entity = ""; - this.cdata = ""; this.xmlDeclName = ""; - this.xmlDeclValue = ""; /** * The options passed to the constructor of this parser. @@ -795,12 +791,12 @@ class SaxesParser { /** * @private */ - handleEOL(buffer, chunk, start) { + handleEOL(chunk, start) { if (this.originalNL) { return start; } - this[buffer] += `${chunk.slice(start, this.prevI)}\n`; + this.text += `${chunk.slice(start, this.prevI)}\n`; return this.i; } @@ -813,28 +809,26 @@ class SaxesParser { * @param {number[]} chars An array of codepoints. Encountering a character in * the array ends the capture. * - * @param {string} buffer The name of the buffer to save into. - * * @return {number|undefined} The character code that made the capture end, or * ``undefined`` if we hit the end of the chunk. */ - captureTo(chars, buffer) { + captureTo(chars) { let { i: start } = this; const { chunk } = this; while (true) { const c = this.getCode(); switch (c) { case NL: - start = this.handleEOL(buffer, chunk, start); + start = this.handleEOL(chunk, start); break; case EOC: - this[buffer] += chunk.slice(start); + this.text += chunk.slice(start); return EOC; default: } if (chars.includes(c)) { - this[buffer] += chunk.slice(start, this.prevI); + this.text += chunk.slice(start, this.prevI); return c; } } @@ -847,28 +841,26 @@ class SaxesParser { * * @param {number} char The codepoint that ends the capture. * - * @param {string} buffer The name of the buffer to save into. - * * @return {boolean} ``true`` if we ran into the character. Otherwise, we ran * into the end of the current chunk. */ - captureToChar(char, buffer) { + captureToChar(char) { let { i: start } = this; const { chunk } = this; while (true) { const c = this.getCode(); switch (c) { case NL: - start = this.handleEOL(buffer, chunk, start); + start = this.handleEOL(chunk, start); break; case EOC: - this[buffer] += chunk.slice(start); + this.text += chunk.slice(start); return false; default: } if (c === char) { - this[buffer] += chunk.slice(start, this.prevI); + this.text += chunk.slice(start, this.prevI); return true; } } @@ -999,6 +991,9 @@ class SaxesParser { switch (c) { case LESS: this.state = S_OPEN_WAKA; + if (this.text.length !== 0) { + this.closeText(); + } break; case EOC: break; @@ -1057,6 +1052,9 @@ class SaxesParser { case LESS: this.state = S_OPEN_WAKA; this.text += chunk.slice(start, this.prevI); + if (this.text.length !== 0) { + this.closeText(); + } forbiddenState = FORBIDDEN_START; // eslint-disable-next-line no-labels break scanLoop; @@ -1088,7 +1086,7 @@ class SaxesParser { forbiddenState = FORBIDDEN_START; break; case NL: - start = this.handleEOL("text", chunk, start); + start = this.handleEOL(chunk, start); forbiddenState = FORBIDDEN_START; break; case EOC: @@ -1119,6 +1117,9 @@ class SaxesParser { case LESS: this.state = S_OPEN_WAKA; this.text += chunk.slice(start, this.prevI); + if (this.text.length !== 0) { + this.closeText(); + } // eslint-disable-next-line no-labels break outRootLoop; case AMP: @@ -1129,7 +1130,7 @@ class SaxesParser { // eslint-disable-next-line no-labels break outRootLoop; case NL: - start = this.handleEOL("text", chunk, start); + start = this.handleEOL(chunk, start); // eslint-disable-next-line no-labels break; case EOC: @@ -1168,9 +1169,6 @@ class SaxesParser { // a state handler cannot return ``undefined``. That's why we don't test // for it. const c = this.getCode(); - if (this.text.length !== 0) { - this.closeText(); - } // either a /, ?, !, or text is coming next. if (isNameStartChar(c)) { this.state = S_OPEN_TAG; @@ -1238,7 +1236,7 @@ class SaxesParser { /** @private */ sDoctype() { - const c = this.captureTo(DOCTYPE_TERMINATOR, "text"); + const c = this.captureTo(DOCTYPE_TERMINATOR); switch (c) { case GREATER: this.ondoctype(this.text); @@ -1263,7 +1261,7 @@ class SaxesParser { /** @private */ sDoctypeQuote() { const { q } = this; - if (this.captureToChar(q, "text")) { + if (this.captureToChar(q)) { this.text += String.fromCodePoint(q); this.q = null; this.state = S_DOCTYPE; @@ -1272,7 +1270,7 @@ class SaxesParser { /** @private */ sDTD() { - const c = this.captureTo(DTD_TERMINATOR, "text"); + const c = this.captureTo(DTD_TERMINATOR); if (c === EOC) { return; } @@ -1293,7 +1291,7 @@ class SaxesParser { /** @private */ sDTDQuoted() { const { q } = this; - if (this.captureToChar(q, "text")) { + if (this.captureToChar(q)) { this.text += String.fromCodePoint(q); this.state = S_DTD; this.q = null; @@ -1330,7 +1328,7 @@ class SaxesParser { /** @private */ sDTDComment() { - if (this.captureToChar(MINUS, "text")) { + if (this.captureToChar(MINUS)) { this.text += "-"; this.state = S_DTD_COMMENT_ENDING; } @@ -1360,7 +1358,7 @@ class SaxesParser { /** @private */ sDTDPI() { - if (this.captureToChar(QUESTION, "text")) { + if (this.captureToChar(QUESTION)) { this.text += "?"; this.state = S_DTD_PI_ENDING; } @@ -1377,7 +1375,7 @@ class SaxesParser { /** @private */ sComment() { - if (this.captureToChar(MINUS, "comment")) { + if (this.captureToChar(MINUS)) { this.state = S_COMMENT_ENDING; } } @@ -1387,11 +1385,11 @@ class SaxesParser { const c = this.getCode(); if (c === MINUS) { this.state = S_COMMENT_ENDED; - this.oncomment(this.comment); - this.comment = ""; + this.oncomment(this.text); + this.text = ""; } else { - this.comment += `-${String.fromCodePoint(c)}`; + this.text += `-${String.fromCodePoint(c)}`; this.state = S_COMMENT; } } @@ -1403,7 +1401,7 @@ class SaxesParser { this.fail("malformed comment."); // will be recorded as // a comment of " blah -- bloo " - this.comment += `--${String.fromCodePoint(c)}`; + this.text += `--${String.fromCodePoint(c)}`; this.state = S_COMMENT; } else { @@ -1413,7 +1411,7 @@ class SaxesParser { /** @private */ sCData() { - if (this.captureToChar(CLOSE_BRACKET, "cdata")) { + if (this.captureToChar(CLOSE_BRACKET)) { this.state = S_CDATA_ENDING; } } @@ -1425,7 +1423,7 @@ class SaxesParser { this.state = S_CDATA_ENDING_2; } else { - this.cdata += `]${String.fromCodePoint(c)}`; + this.text += `]${String.fromCodePoint(c)}`; this.state = S_CDATA; } } @@ -1435,15 +1433,15 @@ class SaxesParser { const c = this.getCode(); switch (c) { case GREATER: - this.oncdata(this.cdata); - this.cdata = ""; + this.oncdata(this.text); + this.text = ""; this.state = S_TEXT; break; case CLOSE_BRACKET: - this.cdata += "]"; + this.text += "]"; break; default: - this.cdata += `]]${String.fromCodePoint(c)}`; + this.text += `]]${String.fromCodePoint(c)}`; this.state = S_CDATA; } } @@ -1511,14 +1509,17 @@ class SaxesParser { break; } case S_XML_DECL_NAME: - c = this.captureTo(XML_DECL_NAME_TERMINATOR, "xmlDeclName"); + c = this.captureTo(XML_DECL_NAME_TERMINATOR); // The question mark character is not valid inside any of the XML // declaration name/value pairs. if (c === QUESTION) { this.state = S_PI_ENDING; + this.text = ""; return; } if (isS(c) || c === EQUAL) { + this.xmlDeclName += this.text; + this.text = ""; if (!this.xmlDeclExpects.includes(this.xmlDeclName)) { switch (this.xmlDeclName.length) { case 0: @@ -1573,20 +1574,23 @@ class SaxesParser { } break; case S_XML_DECL_VALUE: - c = this.captureTo([this.q, QUESTION], "xmlDeclValue"); + c = this.captureTo([this.q, QUESTION]); // The question mark character is not valid inside any of the XML // declaration name/value pairs. if (c === QUESTION) { this.state = S_PI_ENDING; + this.text = ""; return; } if (c !== EOC) { + const value = this.text; + this.text = ""; switch (this.xmlDeclName) { case "version": { this.xmlDeclExpects = ["encoding", "standalone"]; - const version = this.xmlDeclValue; + const version = value; this.xmlDecl.version = version; // This is the test specified by XML 1.0 but it is fine for XML 1.1. if (!/^1\.[0-9]+$/.test(version)) { @@ -1599,25 +1603,25 @@ class SaxesParser { break; } case "encoding": - if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(this.xmlDeclValue)) { + if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(value)) { this.fail("encoding value must match \ /^[A-Za-z0-9][A-Za-z0-9._-]*$/."); } this.xmlDeclExpects = ["standalone"]; - this.xmlDecl.encoding = this.xmlDeclValue; + this.xmlDecl.encoding = value; break; case "standalone": - if (this.xmlDeclValue !== "yes" && this.xmlDeclValue !== "no") { + if (value !== "yes" && value !== "no") { this.fail("standalone value must match \"yes\" or \"no\"."); } this.xmlDeclExpects = []; - this.xmlDecl.standalone = this.xmlDeclValue; + this.xmlDecl.standalone = value; break; default: // We don't need to raise an error here since we've already // raised one when checking what name was expected. } - this.xmlDeclName = this.xmlDeclValue = ""; + this.xmlDeclName = ""; this.xmlDeclState = S_XML_DECL_NAME_START; this.requiredSeparator = true; } @@ -1627,18 +1631,18 @@ class SaxesParser { `Unknown XML declaration state: ${this.xmlDeclState}`); } } - else if (this.piBody.length === 0) { + else if (this.text.length === 0) { c = this.getCode(); if (c === QUESTION) { this.state = S_PI_ENDING; } else if (!isS(c)) { - this.piBody = String.fromCodePoint(c); + this.text = String.fromCodePoint(c); } } // The question mark character is not valid inside any of the XML // declaration name/value pairs. - else if (this.captureToChar(QUESTION, "piBody")) { + else if (this.captureToChar(QUESTION)) { this.state = S_PI_ENDING; } } @@ -1657,9 +1661,9 @@ class SaxesParser { else if (this.xmlDeclExpects.includes("version")) { this.fail("XML declaration must contain a version."); } - this.xmlDeclName = this.xmlDeclValue = ""; + this.xmlDeclName = ""; this.requiredSeparator = false; - this.piTarget = this.piBody = ""; + this.piTarget = this.text = ""; this.state = S_TEXT; } else { @@ -1676,9 +1680,9 @@ class SaxesParser { } this.onprocessinginstruction({ target: this.piTarget, - body: this.piBody, + body: this.text, }); - this.piTarget = this.piBody = ""; + this.piTarget = this.text = ""; this.state = S_TEXT; } else if (c === QUESTION) { @@ -1686,10 +1690,10 @@ class SaxesParser { // took the first ? as a sign that the PI was ending, but it is // not. So we have to add it to the body but we take the new ? as a // sign that the PI is ending. - this.piBody += "?"; + this.text += "?"; } else { - this.piBody += `?${String.fromCodePoint(c)}`; + this.text += `?${String.fromCodePoint(c)}`; this.state = S_PI_BODY; } this.xmlDeclPossible = false; @@ -1878,8 +1882,7 @@ class SaxesParser { const code = this.getCode(); switch (code) { case q: - this.pushAttrib(this.name, - this.text + chunk.slice(start, this.prevI)); + this.pushAttrib(this.name, this.text + chunk.slice(start, this.prevI)); this.name = this.text = ""; this.q = null; this.state = S_ATTRIB_VALUE_CLOSED; @@ -1890,7 +1893,7 @@ class SaxesParser { this.entityReturnState = S_ATTRIB_VALUE_QUOTED; return; case NL: - start = this.handleEOL("text", chunk, start); + start = this.handleEOL(chunk, start); break; case LESS: this.text += chunk.slice(start, this.prevI); @@ -1928,7 +1931,7 @@ class SaxesParser { /** @private */ sAttribValueUnquoted() { - const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR, "text"); + const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR); switch (c) { case AMP: this.state = S_ENTITY; @@ -1983,15 +1986,37 @@ class SaxesParser { /** @private */ sEntity() { - if (this.captureToChar(SEMICOLON, "entity")) { - this.state = this.entityReturnState; - if (this.entity === "") { - this.fail("empty entity name."); - this.text += "&;"; - return; + // This is essentially a specialized version of captureToChar(SEMICOLON...) + let { i: start } = this; + const { chunk } = this; + // eslint-disable-next-line no-labels, no-restricted-syntax + loop: + while (true) { + switch (this.getCode()) { + case NL: + if (!this.originalNL) { + this.entity += `${chunk.slice(start, this.prevI)}\n`; + start = this.i; + } + break; + case SEMICOLON: + this.entity += chunk.slice(start, this.prevI); + this.state = this.entityReturnState; + if (this.entity === "") { + this.fail("empty entity name."); + this.text += "&;"; + return; + } + this.text += this.parseEntity(this.entity); + this.entity = ""; + // eslint-disable-next-line no-labels + break loop; + case EOC: + this.entity += chunk.slice(start); + // eslint-disable-next-line no-labels + break loop; + default: } - this.text += this.parseEntity(this.entity); - this.entity = ""; } }