From d416760b4ac2116da38ac9baf295c1ba342f87fe Mon Sep 17 00:00:00 2001 From: Louis-Dominique Dubeau Date: Tue, 14 Aug 2018 17:31:02 -0400 Subject: [PATCH] feat: stronger check on bad cdata closure It is also faster. --- lib/saxes.js | 25 +++++----- test/issue-86.js | 8 +-- test/wrong-cdata-closure.js | 98 +++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 17 deletions(-) create mode 100644 test/wrong-cdata-closure.js diff --git a/lib/saxes.js b/lib/saxes.js index 1d937fb4..3c91835f 100644 --- a/lib/saxes.js +++ b/lib/saxes.js @@ -421,7 +421,6 @@ class SaxesParser { * @returns this */ fail(er) { - this.closeText(); const message = (this.trackPosition) ? `${this.fileName}:${this.line}:${this.column}: ${er}` : er; @@ -621,7 +620,7 @@ class SaxesParser { } /** - * Skip dwhitespace characters. + * Skip whitespace characters. * * @private * @@ -696,20 +695,11 @@ class SaxesParser { cx => cx !== LESS && cx !== AMP, "textNode", (fragment) => { - // Text fragments is a buffer we use to check for the precence of a + // textFragments is a buffer we use to check for the precence of a // literal "]]>" in text nodes. We cannot do the check against textNode - // itself because textNode will contain resolve entities so "]]>" + // itself because textNode will contain resolved entities so "]]>" // would turn to "]]>" in textNode and raise a false error. this.textFragments += fragment; - // We also have to check the end of textFragments because some cases may - // slip through otherwise. For instance, if client code write - // char-by-char. Then fragment will never contain ]]> but instead we'll - // have 3 fragments one with "]", a second with "]" and a third with - // ">". - if (fragment.includes("]]>") || this.textFragments.endsWith("]]>")) { - this.fail("the string \"]]>\" is disallowed in char data."); - } - if (!this.inRoot && /\S/.test(fragment)) { // We use the reportedTextBeforeRoot and reportedTextAfterRoot flags // to avoid reporting errors for every single character that is out of @@ -726,6 +716,15 @@ class SaxesParser { } }); + // We also have to check the end of textFragments because some cases may + // slip through otherwise. For instance, if client code writes + // char-by-char. Then fragment will never contain ]]> but instead we'll have + // 3 fragments one with "]", a second with "]" and a third with ">". + if (this.textFragments.includes("]]>")) { + this.textFragments = ""; + this.fail("the string \"]]>\" is disallowed in char data."); + } + switch (c) { case LESS: this.state = S_OPEN_WAKA; diff --git a/test/issue-86.js b/test/issue-86.js index 939d0a66..595b094e 100644 --- a/test/issue-86.js +++ b/test/issue-86.js @@ -35,14 +35,14 @@ require(".").test({ "error", "undefined:1:19: text data outside of root node.", ], - [ - "text", - "de", - ], [ "error", "undefined:1:20: unexpected end.", ], + [ + "text", + "de", + ], ], opt: {}, }); diff --git a/test/wrong-cdata-closure.js b/test/wrong-cdata-closure.js new file mode 100644 index 00000000..37d74889 --- /dev/null +++ b/test/wrong-cdata-closure.js @@ -0,0 +1,98 @@ +"use strict"; + +const { test } = require("."); + +const xml = "somethingx]]>moo"; + +describe("wrong cdata closure", () => { + test({ + name: "one shot", + xml, + expect: [ + ["opentagstart", { + name: "span", + attributes: {}, + }], + ["opentag", { + name: "span", + attributes: {}, + isSelfClosing: false, + }], + ["error", + "undefined:1:23: the string \"]]>\" is disallowed in char data."], + ["text", "somethingx]]>moo"], + ["closetag", { + name: "span", + attributes: {}, + isSelfClosing: false, + }], + ["end", undefined], + ["ready", undefined], + ], + opt: {}, + }); + + test({ + name: "one-by-one", + fn(parser) { + for (const x of xml) { + parser.write(x); + } + parser.close(); + }, + expect: [ + ["opentagstart", { + name: "span", + attributes: {}, + }], + ["opentag", { + name: "span", + attributes: {}, + isSelfClosing: false, + }], + ["error", + "undefined:1:19: the string \"]]>\" is disallowed in char data."], + ["text", "somethingx]]>moo"], + ["closetag", { + name: "span", + attributes: {}, + isSelfClosing: false, + }], + ["end", undefined], + ["ready", undefined], + ], + opt: {}, + }); + + test({ + name: "two-by-two", + fn(parser) { + for (let ix = 0; ix < xml.length; ix += 2) { + parser.write(xml.slice(ix, ix + 2)); + } + parser.close(); + }, + expect: [ + ["opentagstart", { + name: "span", + attributes: {}, + }], + ["opentag", { + name: "span", + attributes: {}, + isSelfClosing: false, + }], + ["error", + "undefined:1:20: the string \"]]>\" is disallowed in char data."], + ["text", "somethingx]]>moo"], + ["closetag", { + name: "span", + attributes: {}, + isSelfClosing: false, + }], + ["end", undefined], + ["ready", undefined], + ], + opt: {}, + }); +});