diff --git a/lib/saxes.js b/lib/saxes.js
index 082ac41a..aa5bca16 100644
--- a/lib/saxes.js
+++ b/lib/saxes.js
@@ -803,7 +803,7 @@ class SaxesParser {
* @private
*
* @param {number[]} chars An array of codepoints. Encountering a character in
- * the array ends the capture.
+ * the array ends the capture. (``chars`` may safely contain ``NL``.)
*
* @return {number} The character code that made the capture end, or ``EOC``
* if we hit the end of the chunk. The return value cannot be NL_LIKE: NL is
@@ -813,22 +813,17 @@ class SaxesParser {
let { i: start } = this;
const { chunk } = this;
while (true) {
- let c = this.getCode();
- switch (c) {
- case NL_LIKE:
- this.text += `${chunk.slice(start, this.prevI)}\n`;
- start = this.i;
- c = NL;
- break;
- case EOC:
- this.text += chunk.slice(start);
- return EOC;
- default:
+ const c = this.getCode();
+ const isNLLike = c === NL_LIKE;
+ const final = isNLLike ? NL : c;
+ if (final === EOC || chars.includes(final)) {
+ this.text += chunk.slice(start, this.prevI);
+ return final;
}
- if (chars.includes(c)) {
- this.text += chunk.slice(start, this.prevI);
- return c;
+ if (isNLLike) {
+ this.text += `${chunk.slice(start, this.prevI)}\n`;
+ start = this.i;
}
}
}
@@ -838,7 +833,8 @@ class SaxesParser {
*
* @private
*
- * @param {number} char The codepoint that ends the capture.
+ * @param {number} char The codepoint that ends the capture. **NOTE ``char``
+ * MAY NOT CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior.
*
* @return {boolean} ``true`` if we ran into the character. Otherwise, we ran
* into the end of the current chunk.
@@ -1486,7 +1482,7 @@ class SaxesParser {
/** @private */
sPIRest() {
const c = this.captureWhileNameCheck("piTarget");
- if ((c === QUESTION || isS(c))) {
+ if (c === QUESTION || isS(c)) {
this.piIsXMLDecl = this.piTarget === "xml";
if (this.piIsXMLDecl && !this.xmlDeclPossible) {
this.fail("an XML declaration must be at the start of the document.");
diff --git a/test/eol-handling.js b/test/eol-handling.js
index 27a385b1..0565886a 100644
--- a/test/eol-handling.js
+++ b/test/eol-handling.js
@@ -58,6 +58,242 @@ describe("eol handling", () => {
});
});
+ // The comprehensive test is meant to have EOL in all possible contexts.
+ describe("comprehensive", () => {
+ const nl = `\
+
+
+
+I'm a test.
+?>
+"
+>
+]>
+
+
+
+abc
+
+
+
+
+
+
+`;
+ const xmlDeclEnd = nl.indexOf("?>");
+
+ const expect = [
+ ["text", "\n"],
+ ["doctype", `
+root
+[
+
+
+I'm a test.
+?>
+"
+>
+]`],
+ ["text", "\n"],
+ ["comment", "\n"],
+ ["text", "\n"],
+ ["processinginstruction", { target: "fnord", body: "" }],
+ ["text", "\n"],
+ ["opentagstart", { name: "moo", attributes: {} }],
+ ["opentag", {
+ name: "moo",
+ attributes: {
+ a: "12\n 3",
+ b: "\nz\n",
+ },
+ isSelfClosing: false,
+ }],
+ ["text", `
+abc
+\r
+
+`],
+ ["comment", "\n"],
+ ["text", "\n"],
+ ["processinginstruction", { target: "fnord", body: "" }],
+ ["text", "\n"],
+ ["opentagstart", { name: "abc", attributes: {} }],
+ ["opentag", {
+ name: "abc",
+ attributes: {
+ a: "bc",
+ },
+ isSelfClosing: true,
+ }],
+ ["closetag", {
+ name: "abc",
+ attributes: {
+ a: "bc",
+ },
+ isSelfClosing: true,
+ }],
+ ["text", "\n"],
+ ["closetag", {
+ name: "moo",
+ attributes: {
+ a: "12\n 3",
+ b: "\nz\n",
+ },
+ isSelfClosing: false,
+ }],
+ ["text", "\n"],
+ ];
+
+ describe("nl", () => {
+ test({
+ name: "one chunk",
+ xml: nl,
+ expect,
+ });
+
+ test({
+ name: "char-by-char",
+ expect,
+ fn(parser) {
+ for (const x of nl) {
+ parser.write(x);
+ }
+ parser.close();
+ },
+ });
+ });
+
+ describe("cr", () => {
+ const cr = nl.replace(/\n/g, "\r");
+
+ test({
+ name: "one chunk",
+ xml: cr,
+ expect,
+ });
+
+ test({
+ name: "char-by-char",
+ expect,
+ fn(parser) {
+ for (const x of cr) {
+ parser.write(x);
+ }
+ parser.close();
+ },
+ });
+ });
+
+ describe("crnl", () => {
+ const crnl = nl.replace(/\n/g, "\r\n");
+
+ test({
+ name: "one chunk",
+ xml: crnl,
+ expect,
+ });
+
+ test({
+ name: "char-by-char",
+ expect,
+ fn(parser) {
+ for (const x of crnl) {
+ parser.write(x);
+ }
+ parser.close();
+ },
+ });
+ });
+
+ describe("nel", () => {
+ // We have to switch the EOL characters after the XML declaration.
+ const nel = nl.slice(0, xmlDeclEnd).replace("1.0", "1.1") +
+ nl.slice(xmlDeclEnd).replace(/\n/g, "\u0085");
+
+ test({
+ name: "one chunk",
+ xml: nel,
+ expect,
+ });
+
+ test({
+ name: "char-by-char",
+ expect,
+ fn(parser) {
+ for (const x of nel) {
+ parser.write(x);
+ }
+ parser.close();
+ },
+ });
+ });
+
+ describe("ls", () => {
+ // We have to switch the EOL characters after the XML declaration.
+ const ls = nl.slice(0, xmlDeclEnd).replace("1.0", "1.1") +
+ nl.slice(xmlDeclEnd).replace(/\n/g, "\u2028");
+
+ test({
+ name: "one chunk",
+ xml: ls,
+ expect,
+ });
+
+ test({
+ name: "char-by-char",
+ expect,
+ fn(parser) {
+ for (const x of ls) {
+ parser.write(x);
+ }
+ parser.close();
+ },
+ });
+ });
+ });
+
describe("bad start", () => {
const xml = `
`;