Skip to content

Commit

Permalink
fix: pay attention to comments and processing instructions in DTDs
Browse files Browse the repository at this point in the history
Closes #19
  • Loading branch information
lddubeau committed Jun 25, 2019
1 parent a65586e commit 52ffd90
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 13 deletions.
111 changes: 98 additions & 13 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,21 @@ const XML_ENTITIES = {

const S_INITIAL = "sInitial"; // initial state
const S_BEGIN_WHITESPACE = "sBeginWhitespace"; // leading whitespace
const S_DOCTYPE = "sDoctype"; // <!DOCTYPE
const S_DOCTYPE_QUOTE = "sDoctypeQuote"; // <!DOCTYPE "//blah
const S_DTD = "sDTD"; // <!DOCTYPE "//blah" [ ...
const S_DTD_QUOTED = "sDTDQuoted"; // <!DOCTYPE "//blah" [ "foo
const S_DTD_OPEN_WAKA = "sDTDOpenWaka";
const S_DTD_OPEN_WAKA_BANG = "sDTDOpenWakaBang";
const S_DTD_COMMENT = "sDTDComment"; // <!--
const S_DTD_COMMENT_ENDING = "sDTDCommentEnding"; // <!-- blah -
const S_DTD_COMMENT_ENDED = "sDTDCommentEnded"; // <!-- blah --
const S_DTD_PI = "sDTDPI"; // <?
const S_DTD_PI_ENDING = "sDTDPIEnding"; // <?hi "there" ?
const S_TEXT = "sText"; // general stuff
const S_ENTITY = "sEntity"; // &amp and such
const S_OPEN_WAKA = "sOpenWaka"; // <
const S_OPEN_WAKA_BANG = "sOpenWakaBang"; // <!...
const S_DOCTYPE = "sDoctype"; // <!DOCTYPE
const S_DOCTYPE_QUOTED = "sDoctypeQuoted"; // <!DOCTYPE "//blah
const S_DOCTYPE_DTD = "sDoctypeDTD"; // <!DOCTYPE "//blah" [ ...
const S_DOCTYPE_DTD_QUOTED = "sDoctypeDTDQuoted"; // <!DOCTYPE "//blah" [ "foo
const S_COMMENT = "sComment"; // <!--
const S_COMMENT_ENDING = "sCommentEnding"; // <!-- blah -
const S_COMMENT_ENDED = "sCommentEnded"; // <!-- blah --
Expand Down Expand Up @@ -102,7 +109,7 @@ function isQuote(c) {
const QUOTES = [DQUOTE, SQUOTE];

const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER];
const DOCTYPE_DTD_TERMINATOR = [...QUOTES, CLOSE_BRACKET];
const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET];
const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST];
const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS];

Expand Down Expand Up @@ -1027,17 +1034,17 @@ class SaxesParser {
else if (c) {
this.doctype += String.fromCodePoint(c);
if (c === OPEN_BRACKET) {
this.state = S_DOCTYPE_DTD;
this.state = S_DTD;
}
else if (isQuote(c)) {
this.state = S_DOCTYPE_QUOTED;
this.state = S_DOCTYPE_QUOTE;
this.q = c;
}
}
}

/** @private */
sDoctypeQuoted() {
sDoctypeQuote() {
const { q } = this;
if (this.captureToChar(q, "doctype")) {
this.doctype += String.fromCodePoint(q);
Expand All @@ -1047,8 +1054,8 @@ class SaxesParser {
}

/** @private */
sDoctypeDTD() {
const c = this.captureTo(DOCTYPE_DTD_TERMINATOR, "doctype");
sDTD() {
const c = this.captureTo(DTD_TERMINATOR, "doctype");
if (!c) {
return;
}
Expand All @@ -1057,22 +1064,100 @@ class SaxesParser {
if (c === CLOSE_BRACKET) {
this.state = S_DOCTYPE;
}
else if (c === LESS) {
this.state = S_DTD_OPEN_WAKA;
}
else if (isQuote(c)) {
this.state = S_DOCTYPE_DTD_QUOTED;
this.state = S_DTD_QUOTED;
this.q = c;
}
}

/** @private */
sDoctypeDTDQuoted() {
sDTDQuoted() {
const { q } = this;
if (this.captureToChar(q, "doctype")) {
this.doctype += String.fromCodePoint(q);
this.state = S_DOCTYPE_DTD;
this.state = S_DTD;
this.q = null;
}
}

/** @private */
sDTDOpenWaka() {
const c = this.getCode();
this.doctype += String.fromCodePoint(c);
switch (c) {
case BANG:
this.state = S_DTD_OPEN_WAKA_BANG;
this.openWakaBang = "";
break;
case QUESTION:
this.state = S_DTD_PI;
break;
default:
this.state = S_DTD;
}
}

/** @private */
sDTDOpenWakaBang() {
const char = String.fromCodePoint(this.getCode());
const owb = this.openWakaBang += char;
this.doctype += char;
if (owb !== "-") {
this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
this.openWakaBang = "";
}
}

/** @private */
sDTDComment() {
if (this.captureToChar(MINUS, "doctype")) {
this.doctype += "-";
this.state = S_DTD_COMMENT_ENDING;
}
}

/** @private */
sDTDCommentEnding() {
const c = this.getCode();
this.doctype += String.fromCodePoint(c);
this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
}

/** @private */
sDTDCommentEnded() {
const c = this.getCode();
this.doctype += String.fromCodePoint(c);
if (c === GREATER) {
this.state = S_DTD;
}
else {
this.fail("malformed comment.");
// <!-- blah -- bloo --> will be recorded as
// a comment of " blah -- bloo "
this.state = S_DTD_COMMENT;
}
}

/** @private */
sDTDPI() {
if (this.captureToChar(QUESTION, "doctype")) {
this.doctype += "?";
this.state = S_DTD_PI_ENDING;
}
}

/** @private */
sDTDPIEnding() {
const c = this.getCode();
this.doctype += String.fromCodePoint(c);
if (c === GREATER) {
this.state = S_DTD;
}
}

/** @private */
sComment() {
if (this.captureToChar(MINUS, "comment")) {
Expand Down
65 changes: 65 additions & 0 deletions test/dtd.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"use strict";

const { test } = require(".");

describe("dtd", () => {
test({
name: "DTD with comment containing a quote",
xml: `\
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE root [
<!-- I'm a test. -->
]>
<root/>`,
expect: [
["text", "\n"],
["doctype", ` root [
<!-- I'm a test. -->
]`],
["text", "\n"],
["opentagstart", { name: "root", attributes: {} }],
["opentag", { name: "root", attributes: {}, isSelfClosing: true }],
["closetag", { name: "root", attributes: {}, isSelfClosing: true }],
],
});

test({
name: "DTD with processing instruction containing a quote",
xml: `\
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE root [
<? I'm a test. ?>
]>
<root/>`,
expect: [
["text", "\n"],
["doctype", ` root [
<? I'm a test. ?>
]`],
["text", "\n"],
["opentagstart", { name: "root", attributes: {} }],
["opentag", { name: "root", attributes: {}, isSelfClosing: true }],
["closetag", { name: "root", attributes: {}, isSelfClosing: true }],
],
});

test({
name: "DTD with ]> in a string",
xml: `\
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE root [
<!NOTATION not1 SYSTEM "]>">
]>
<root/>`,
expect: [
["text", "\n"],
["doctype", ` root [
<!NOTATION not1 SYSTEM "]>">
]`],
["text", "\n"],
["opentagstart", { name: "root", attributes: {} }],
["opentag", { name: "root", attributes: {}, isSelfClosing: true }],
["closetag", { name: "root", attributes: {}, isSelfClosing: true }],
],
});
});

0 comments on commit 52ffd90

Please sign in to comment.