Skip to content

Commit

Permalink
fix: fix bug with initial eol characters
Browse files Browse the repository at this point in the history
The previous code incorrectly handled eol characters at the very start of a
file. This has been fixed.
  • Loading branch information
lddubeau committed Oct 2, 2019
1 parent c485d26 commit 7b3db75
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 44 deletions.
26 changes: 20 additions & 6 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -966,18 +966,32 @@ class SaxesParser {
if (c === 0xFEFF) {
this.i++;
}
else if (isS(c)) {
this.i++;
// An XML declaration cannot appear after initial spaces.
this.xmlDeclPossible = false;
}

this.state = S_BEGIN_WHITESPACE;
}

/** @private */
sBeginWhitespace() {
const c = this.skipSpaces();
// This initial loop is a specialized version of skipSpaces. We need to know
// whether we've encountered spaces or not because as soon as we run into a
// space, an XML declaration is no longer possible. Rather than slow down
// skipSpaces even in places where we don't care whether it skipped anything
// or not, we use a specialized loop here.
let c;
let sawSpace = false;
while (true) {
c = this.getCode();
if (c === undefined || !isS(c)) {
break;
}

sawSpace = true;
}

if (sawSpace) {
this.xmlDeclPossible = false;
}

if (c === LESS) {
this.state = S_OPEN_WAKA;
}
Expand Down
142 changes: 104 additions & 38 deletions test/eol-handling.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

const { test } = require(".");

/* eslint-disable linebreak-style */
const xml = `\
describe("eol handling", () => {
describe("mixed", () => {
/* eslint-disable linebreak-style */
const xml = `\
<?xml version="1.0" encoding="utf-8"?>
<moo a="12
Expand All @@ -15,44 +17,108 @@ const xml = `\
xxxx
</moo>
`;
/* eslint-enable linebreak-style */

const expect = [
["text", "\n\n"],
["opentagstart", { name: "moo", attributes: {} }],
["opentag", {
name: "moo",
attributes: {
a: "12\n 3",
},
isSelfClosing: false,
}],
["text", "\n abc\n def\r\n ghi\n\n xx\nxx\n"],
["closetag", {
name: "moo",
attributes: {
a: "12\n 3",
},
isSelfClosing: false,
}],
["text", "\n"],
];
/* eslint-enable linebreak-style */

describe("eol handling", () => {
test({
name: "one chunk",
xml,
expect,
const expect = [
["text", "\n\n"],
["opentagstart", { name: "moo", attributes: {} }],
["opentag", {
name: "moo",
attributes: {
a: "12\n 3",
},
isSelfClosing: false,
}],
["text", "\n abc\n def\r\n ghi\n\n xx\nxx\n"],
["closetag", {
name: "moo",
attributes: {
a: "12\n 3",
},
isSelfClosing: false,
}],
["text", "\n"],
];

test({
name: "one chunk",
xml,
expect,
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of xml) {
parser.write(x);
}
parser.close();
},
});
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of xml) {
parser.write(x);
}
parser.close();
},
describe("bad start", () => {
const xml = `
<?xml version="1.0" encoding="utf-8"?><doc/>`;
const expect = [
[
"error",
"2:6: an XML declaration must be at the start of the document.",
],
["opentagstart", {
name: "doc",
attributes: {},
}],
["opentag", {
name: "doc",
attributes: {},
isSelfClosing: true,
}],
["closetag", {
name: "doc",
attributes: {},
isSelfClosing: true,
}],
];

describe("with nl as eol", () => {
test({
name: "one chunk",
xml,
expect,
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of xml) {
parser.write(x);
}
parser.close();
},
});
});

describe("with crnl as eol", () => {
const crnl = xml.replace(/\n/g, "\r\n");
test({
name: "one chunk",
xml: crnl,
expect,
});

test({
name: "char-by-char",
expect,
fn(parser) {
for (const x of crnl) {
parser.write(x);
}
parser.close();
},
});
});
});
});
Expand Down

0 comments on commit 7b3db75

Please sign in to comment.