Skip to content

Commit

Permalink
fix: fix a bug in EOL handling
Browse files Browse the repository at this point in the history
A non-NL character acting as EOL and seen by captureTo would cause a crash in
some cases.
  • Loading branch information
lddubeau committed Oct 11, 2019
1 parent 1c8df1a commit 03b1567
Show file tree
Hide file tree
Showing 2 changed files with 249 additions and 17 deletions.
30 changes: 13 additions & 17 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ class SaxesParser {
* @private
*
* @param {number[]} chars An array of codepoints. Encountering a character in
* the array ends the capture.
* the array ends the capture. (``chars`` may safely contain ``NL``.)
*
* @return {number} The character code that made the capture end, or ``EOC``
* if we hit the end of the chunk. The return value cannot be NL_LIKE: NL is
Expand All @@ -813,22 +813,17 @@ class SaxesParser {
let { i: start } = this;
const { chunk } = this;
while (true) {
let c = this.getCode();
switch (c) {
case NL_LIKE:
this.text += `${chunk.slice(start, this.prevI)}\n`;
start = this.i;
c = NL;
break;
case EOC:
this.text += chunk.slice(start);
return EOC;
default:
const c = this.getCode();
const isNLLike = c === NL_LIKE;
const final = isNLLike ? NL : c;
if (final === EOC || chars.includes(final)) {
this.text += chunk.slice(start, this.prevI);
return final;
}

if (chars.includes(c)) {
this.text += chunk.slice(start, this.prevI);
return c;
if (isNLLike) {
this.text += `${chunk.slice(start, this.prevI)}\n`;
start = this.i;
}
}
}
Expand All @@ -838,7 +833,8 @@ class SaxesParser {
*
* @private
*
* @param {number} char The codepoint that ends the capture.
* @param {number} char The codepoint that ends the capture. **NOTE ``char``
* MAY NOT CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior.
*
* @return {boolean} ``true`` if we ran into the character. Otherwise, we ran
* into the end of the current chunk.
Expand Down Expand Up @@ -1486,7 +1482,7 @@ class SaxesParser {
/** @private */
sPIRest() {
const c = this.captureWhileNameCheck("piTarget");
if ((c === QUESTION || isS(c))) {
if (c === QUESTION || isS(c)) {
this.piIsXMLDecl = this.piTarget === "xml";
if (this.piIsXMLDecl && !this.xmlDeclPossible) {
this.fail("an XML declaration must be at the start of the document.");
Expand Down
236 changes: 236 additions & 0 deletions test/eol-handling.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,242 @@ describe("eol handling", () => {
});
});

// The comprehensive test is meant to have EOL in all possible contexts.
describe("comprehensive", () => {
const nl = `\
<?xml
version
=
"1.0"
encoding
=
"utf-8"
standalone
=
"no"
?>
<!DOCTYPE
root
[
<!--
I'm a test.
-->
<?
I'm a test.
?>
<!NOTATION
not1
SYSTEM
"]>"
>
]>
<!--
-->
<?fnord
?>
<moo
a
=
"12
3"
b
=
"
z
"
>
abc
&#xD;
<!--
-->
<?fnord
?>
<abc
a
=
"bc"
/>
</moo
>
`;
const xmlDeclEnd = nl.indexOf("?>");

const expect = [
["text", "\n"],
["doctype", `
root
[
<!--
I'm a test.
-->
<?
I'm a test.
?>
<!NOTATION
not1
SYSTEM
"]>"
>
]`],
["text", "\n"],
["comment", "\n"],
["text", "\n"],
["processinginstruction", { target: "fnord", body: "" }],
["text", "\n"],
["opentagstart", { name: "moo", attributes: {} }],
["opentag", {
name: "moo",
attributes: {
a: "12\n 3",
b: "\nz\n",
},
isSelfClosing: false,
}],
["text", `
abc
\r
`],
["comment", "\n"],
["text", "\n"],
["processinginstruction", { target: "fnord", body: "" }],
["text", "\n"],
["opentagstart", { name: "abc", attributes: {} }],
["opentag", {
name: "abc",
attributes: {
a: "bc",
},
isSelfClosing: true,
}],
["closetag", {
name: "abc",
attributes: {
a: "bc",
},
isSelfClosing: true,
}],
["text", "\n"],
["closetag", {
name: "moo",
attributes: {
a: "12\n 3",
b: "\nz\n",
},
isSelfClosing: false,
}],
["text", "\n"],
];

describe("nl", () => {
test({
name: "one chunk",
xml: nl,
expect,
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of nl) {
parser.write(x);
}
parser.close();
},
});
});

describe("cr", () => {
const cr = nl.replace(/\n/g, "\r");

test({
name: "one chunk",
xml: cr,
expect,
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of cr) {
parser.write(x);
}
parser.close();
},
});
});

describe("crnl", () => {
const crnl = nl.replace(/\n/g, "\r\n");

test({
name: "one chunk",
xml: crnl,
expect,
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of crnl) {
parser.write(x);
}
parser.close();
},
});
});

describe("nel", () => {
// We have to switch the EOL characters after the XML declaration.
const nel = nl.slice(0, xmlDeclEnd).replace("1.0", "1.1") +
nl.slice(xmlDeclEnd).replace(/\n/g, "\u0085");

test({
name: "one chunk",
xml: nel,
expect,
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of nel) {
parser.write(x);
}
parser.close();
},
});
});

describe("ls", () => {
// We have to switch the EOL characters after the XML declaration.
const ls = nl.slice(0, xmlDeclEnd).replace("1.0", "1.1") +
nl.slice(xmlDeclEnd).replace(/\n/g, "\u2028");

test({
name: "one chunk",
xml: ls,
expect,
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of ls) {
parser.write(x);
}
parser.close();
},
});
});
});

describe("bad start", () => {
const xml = `
<?xml version="1.0" encoding="utf-8"?><doc/>`;
Expand Down

0 comments on commit 03b1567

Please sign in to comment.