Skip to content

Commit

Permalink
feat: handle XML declarations
Browse files Browse the repository at this point in the history
  • Loading branch information
lddubeau committed Jul 6, 2018
1 parent cf9c589 commit 5258939
Show file tree
Hide file tree
Showing 4 changed files with 345 additions and 14 deletions.
178 changes: 167 additions & 11 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ const S_ATTRIB_VALUE_ENTITY_Q = S_INDEX++; // <foo bar="&quot;"
const S_ATTRIB_VALUE_ENTITY_U = S_INDEX++; // <foo bar=&quot
const S_CLOSE_TAG = S_INDEX++; // </a
const S_CLOSE_TAG_SAW_WHITE = S_INDEX++; // </a >
const S_XML_DECL_NAME_START = S_INDEX++; // <?xml
const S_XML_DECL_NAME = S_INDEX++; // <?xml foo
const S_XML_DECL_EQ = S_INDEX++; // <?xml foo=
const S_XML_DECL_VALUE_START = S_INDEX++; // <?xml foo=
const S_XML_DECL_VALUE = S_INDEX++; // <?xml foo="bar"

const SPACE_SEPARATOR = "SPACE_SEPARATOR";

exports.EVENTS = [
"text",
Expand All @@ -88,7 +95,8 @@ exports.EVENTS = [

const buffers = [
"comment", "openWakaBang", "textNode", "tagName", "doctype", "piTarget",
"piBody", "entity", "attribName", "attribValue", "cdata",
"piBody", "entity", "attribName", "attribValue", "cdata", "xmlDeclName",
"xmlDeclValue"
];

let Stream;
Expand Down Expand Up @@ -140,6 +148,16 @@ class SAXParser {
this.attribList = [];
this.reportedTextBeforeRoot = false;
this.reportedTextAfterRoot = false;
this.xmlDeclPossible = true;
this.piIsXMLDecl = false;
this.xmlDeclState = S_XML_DECL_NAME_START;
this.xmlDeclExpects = ["version"];
this.requiredSeparator = undefined;
this.xmlDecl = {
version: undefined,
encoding: undefined,
standalone: undefined,
};

// namespaces form a prototype chain.
// it always points at the current tag,
Expand Down Expand Up @@ -264,6 +282,10 @@ class SAXParser {
}
this.textNode = c;
this.state = S_TEXT;
this.xmlDeclPossible = false;
}
else {
this.xmlDeclPossible = false;
}
continue;

Expand Down Expand Up @@ -328,14 +350,17 @@ class SAXParser {
if (c === "!") {
this.state = S_OPEN_WAKA_BANG;
this.openWakaBang = "";
this.xmlDeclPossible = false;
}
else if (isMatch(NAME_START_CHAR, c)) {
this.state = S_OPEN_TAG;
this.tagName = c;
this.xmlDeclPossible = false;
}
else if (c === "/") {
this.state = S_CLOSE_TAG;
this.tagName = "";
this.xmlDeclPossible = false;
}
else if (c === "?") {
this.state = S_PI;
Expand All @@ -350,6 +375,7 @@ class SAXParser {
}
this.textNode += `<${c}`;
this.state = S_TEXT;
this.xmlDeclPossible = false;
}
continue;

Expand Down Expand Up @@ -511,11 +537,13 @@ class SAXParser {
continue;

case S_PI:
if (c === "?") {
this.state = S_PI_ENDING;
}
else if (isWhitespace(c)) {
this.state = S_PI_BODY;
if (c === "?" || isWhitespace(c)) {
this.piIsXMLDecl = this.piTarget === "xml";
if (this.piIsXMLDecl && !this.xmlDeclPossible) {
this.fail(
"an XML declaration must be at the start of the document.");
}
this.state = c === "?" ? S_PI_ENDING : S_PI_BODY;
}
else {
if (!(isMatch(this.piTarget.length ?
Expand All @@ -532,22 +560,149 @@ class SAXParser {
continue;

case S_PI_BODY:
if (!this.piBody && isWhitespace(c)) {
continue;
}
else if (c === "?") {
// The question mark character is not valid inside any of the XML
// delcaration name/value pairs.
if (c === "?") {
this.state = S_PI_ENDING;
}
else if (this.piIsXMLDecl) {
switch (this.xmlDeclState) {
case S_XML_DECL_NAME_START:
if (!isWhitespace(c)) {
if (this.requiredSeparator === SPACE_SEPARATOR) {
this.fail("whitespace required.");
}
this.xmlDeclState = S_XML_DECL_NAME;
this.xmlDeclName = c;
}

this.requiredSeparator = undefined;
break;
case S_XML_DECL_NAME:
if (isWhitespace(c) || c === "=") {
if (!this.xmlDeclExpects.includes(this.xmlDeclName)) {
switch (this.xmlDeclName.length) {
case 0:
this.fail("did not expect any more name/value pairs.");
break;
case 1:
this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
break;
default:
this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
}
}

this.xmlDeclState = (c === "=") ? S_XML_DECL_VALUE_START :
S_XML_DECL_EQ;
}
else {
this.xmlDeclName += c;
}
break;
case S_XML_DECL_EQ:
if (!isWhitespace(c)) {
if (c !== "=") {
this.fail("value required.");
}
this.xmlDeclState = S_XML_DECL_VALUE_START;
}
break;
case S_XML_DECL_VALUE_START:
if (!isWhitespace(c)) {
if (!isQuote(c)) {
this.fail("value must be quoted.");
this.q = " ";
}
else {
this.q = c;
}
this.xmlDeclState = S_XML_DECL_VALUE;
this.xmlDeclValue = "";
}
break;
case S_XML_DECL_VALUE:
if (c !== this.q) {
this.xmlDeclValue += c;
}
else {
switch (this.xmlDeclName) {
case "version":
if (!/^1\.[0-9]+$/.test(this.xmlDeclValue)) {
this.fail("version number must match /^1\\.[0-9]+$/.");
}
this.xmlDeclExpects = ["encoding", "standalone"];
this.xmlDecl.version = this.xmlDeclValue;
break;
case "encoding":
if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(this.xmlDeclValue)) {
this.fail("encoding value must match \
/^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
}
this.xmlDeclExpects = ["standalone"];
this.xmlDecl.encoding = this.xmlDeclValue;
break;
case "standalone":
if (this.xmlDeclValue !== "yes" && this.xmlDeclValue !== "no") {
this.fail("standalone value must match \"yes\" or \"no\".");
}
this.xmlDeclExpects = [];
this.xmlDecl.standalone = this.xmlDeclValue;
break;
default:
// We don't need to raise an error here since we've already
// raised one when checking what name was expected.
}
this.xmlDeclName = this.xmlDeclValue = "";
this.xmlDeclState = S_XML_DECL_NAME_START;
this.requiredSeparator = SPACE_SEPARATOR;
}
break;
default:
throw new Error(
this,
`Unknown XML declaration state: ${this.xmlDeclState}`);
}
}
else if (!this.piBody && isWhitespace(c)) {
continue;
}
else {
this.piBody += c;
}
continue;

case S_PI_ENDING:
if (c === ">") {
if (this.piIsXMLDecl) {
if (c === ">") {
if (this.piTarget !== "xml") {
this.fail("processing instructions are not allowed before root.");
}
else if (this.xmlDeclState !== S_XML_DECL_NAME_START) {
this.fail("XML declaration is incomplete.");
}
else if (this.xmlDeclExpects.includes("version")) {
this.fail("XML declaration must contain a version.");
}
this.xmlDeclName = this.xmlDeclValue = "";
this.requiredSeparator = undefined;
this.state = S_TEXT;
}
else {
// We got here because the previous character was a ?, but the
// question mark character is not valid inside any of the XML
// delcaration name/value pairs.
this.fail(
"The character ? is disallowed anywhere in XML declarations.");
}
}
else if (c === ">") {
if (this.piTarget.trim() === "") {
this.fail("processing instruction without a target.");
}
else if (this.piTarget.trim().toLowerCase() === "xml") {
this.fail("the XML declaration must appear at the start of the document.");
}
this.emitNode("onprocessinginstruction", {
target: this.piTarget,
body: this.piBody,
Expand All @@ -566,6 +721,7 @@ class SAXParser {
this.piBody += `?${c}`;
this.state = S_PI_BODY;
}
this.xmlDeclPossible = false;
continue;

case S_OPEN_TAG:
Expand Down
2 changes: 1 addition & 1 deletion test/end_empty_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ it("end empty stream", () => {
// It musn't throw.
expect(() => saxesStream.end()).to.throw(
Error,
/^undefined:1:0: document must contain a root element.$/);
/^undefined:1:0: document must contain a root element.$/);
});
4 changes: 2 additions & 2 deletions test/issue-84.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
// https://github.com/isaacs/sax-js/issues/84
require(".").test({
name: "issue 84 (unbalanced quotes in pi)",
xml: "<?has unbalanced \"quotes?><xml>body</xml>",
xml: "<xml><?has unbalanced \"quotes?>body</xml>",
expect: [
["processinginstruction", { target: "has", body: "unbalanced \"quotes" }],
["opentagstart", { name: "xml", attributes: {} }],
["opentag", { name: "xml", attributes: {}, isSelfClosing: false }],
["processinginstruction", { target: "has", body: "unbalanced \"quotes" }],
["text", "body"],
["closetag", "xml"],
],
Expand Down
Loading

0 comments on commit 5258939

Please sign in to comment.