diff --git a/lib/saxes.js b/lib/saxes.js index 90507d2a..0d0a5a02 100644 --- a/lib/saxes.js +++ b/lib/saxes.js @@ -108,7 +108,6 @@ const QUOTES = [DQUOTE, SQUOTE]; const S = [SPACE, NL, 0xD, 9]; const TEXT_TERMINATOR = [LESS, AMP]; -const CLOSE_TAG_TERMINATOR = [GREATER, ...S]; const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER]; const DOCTYPE_DTD_TERMINATOR = [...QUOTES, CLOSE_BRACKET]; const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S]; @@ -263,12 +262,11 @@ class SaxesParser { this.comment = ""; this.openWakaBang = ""; this.textNode = ""; - this.tagName = ""; + this.name = ""; this.doctype = ""; this.piTarget = ""; this.piBody = ""; this.entity = ""; - this.attribName = ""; this.attribValue = ""; this.cdata = ""; this.xmlDeclName = ""; @@ -658,25 +656,23 @@ class SaxesParser { * * @private * - * @param {string} buffer The name of the buffer to save into. - * * @return {string|undefined} The character that made the test fail, or * ``undefined`` if we hit the end of the chunk. */ - captureName(buffer) { + captureName() { const { chunk, limit, i: start } = this; while (this.i < limit) { const c = this.getCode(); if (!isNameChar(c)) { // This is faster than adding codepoints one by one. - this[buffer] += chunk.substring(start, - this.i - (c <= 0xFFFF ? 1 : 2)); + this.name += chunk.substring(start, + this.i - (c <= 0xFFFF ? 1 : 2)); return c; } } // This is faster than adding codepoints one by one. - this[buffer] += chunk.substring(start); + this.name += chunk.substring(start); return undefined; } @@ -789,7 +785,7 @@ class SaxesParser { // either a /, ?, !, or text is coming next. if (isNameStartChar(c)) { this.state = S_OPEN_TAG; - this.tagName = String.fromCodePoint(c); + this.name = String.fromCodePoint(c); this.xmlDeclPossible = false; } else { @@ -1246,15 +1242,13 @@ class SaxesParser { /** @private */ sOpenTag() { - // We don't need to check with isNameStartChar here because the first - // character of tagName is fed elsewhere, and the check is done there. - const c = this.captureName("tagName"); + const c = this.captureName(); if (!c) { return; } const tag = this.tag = { - name: this.tagName, + name: this.name, attributes: Object.create(null), }; @@ -1298,7 +1292,7 @@ class SaxesParser { return; } if (isNameStartChar(c)) { - this.attribName = String.fromCodePoint(c); + this.name = String.fromCodePoint(c); this.attribValue = ""; this.state = S_ATTRIB_NAME; } @@ -1317,7 +1311,7 @@ class SaxesParser { sAttribName() { // We don't need to check with isNameStartChar here because the first // character of attribute is fed elsewhere, and the check is done there. - const c = this.captureName("attribName"); + const c = this.captureName(); if (c === EQUAL) { this.state = S_ATTRIB_VALUE; } @@ -1326,8 +1320,8 @@ class SaxesParser { } else if (c === GREATER) { this.fail("attribute without value."); - this.attribList.push({ name: this.attribName, value: this.attribName }); - this.attribName = this.attribValue = ""; + this.attribList.push({ name: this.name, value: this.name }); + this.name = this.attribValue = ""; this.openTag(); } else if (c) { @@ -1347,14 +1341,14 @@ class SaxesParser { } else if (c) { this.fail("attribute without value."); - this.tag.attributes[this.attribName] = ""; + this.tag.attributes[this.name] = ""; this.attribValue = ""; - this.attribName = ""; + this.name = ""; if (c === GREATER) { this.openTag(); } else if (isNameStartChar(c)) { - this.attribName = String.fromCodePoint(c); + this.name = String.fromCodePoint(c); this.state = S_ATTRIB_NAME; } else { @@ -1393,8 +1387,8 @@ class SaxesParser { if (this.attribValue.includes("]]>")) { this.fail("the string \"]]>\" is disallowed in char data."); } - this.attribList.push({ name: this.attribName, value: this.attribValue }); - this.attribName = this.attribValue = ""; + this.attribList.push({ name: this.name, value: this.attribValue }); + this.name = this.attribValue = ""; this.q = null; this.state = S_ATTRIB_VALUE_CLOSED; } @@ -1408,7 +1402,7 @@ class SaxesParser { } else if (isNameStartChar(c)) { this.fail("no whitespace between attributes."); - this.attribName = String.fromCodePoint(c); + this.name = String.fromCodePoint(c); this.attribValue = ""; this.state = S_ATTRIB_NAME; } @@ -1439,8 +1433,8 @@ class SaxesParser { if (this.attribValue.includes("]]>")) { this.fail("the string \"]]>\" is disallowed in char data."); } - this.attribList.push({ name: this.attribName, value: this.attribValue }); - this.attribName = this.attribValue = ""; + this.attribList.push({ name: this.name, value: this.attribValue }); + this.name = this.attribValue = ""; if (c === GREATER) { this.openTag(); } @@ -1452,13 +1446,16 @@ class SaxesParser { /** @private */ sCloseTag() { - const c = this.captureTo(CLOSE_TAG_TERMINATOR, "tagName"); + const c = this.captureName(); if (c === GREATER) { this.closeTag(); } else if (isS(c)) { this.state = S_CLOSE_TAG_SAW_WHITE; } + else if (c) { + this.fail("disallowed character in closing tag."); + } } /** @private */ @@ -1769,7 +1766,7 @@ class SaxesParser { tags.push(tag); } this.state = S_TEXT; - this.tagName = ""; + this.name = ""; } /** @@ -1780,14 +1777,14 @@ class SaxesParser { * @private */ closeTag() { - const { tags, tagName } = this; + const { tags, name } = this; // Our state after this will be S_TEXT, no matter what, and we can clear // tagName now. this.state = S_TEXT; - this.tagName = ""; + this.name = ""; - if (!tagName) { + if (!name) { this.fail("weird empty close tag."); this.textNode += ""; return; @@ -1797,7 +1794,7 @@ class SaxesParser { while (l-- > 0) { const tag = this.tag = tags.pop(); this.emitNode("onclosetag", tag); - if (tag.name !== tagName) { + if (tag.name !== name) { this.fail("unexpected close tag."); } else { @@ -1810,8 +1807,8 @@ class SaxesParser { this.closedRoot = true; } else if (l < 0) { - this.fail(`unmatched closing tag: ${tagName}.`); - this.textNode += ``; + this.fail(`unmatched closing tag: ${name}.`); + this.textNode += ``; } }