From 1ff2d6a762b4ac0dd0f13bce9e9b59bc008c74bf Mon Sep 17 00:00:00 2001 From: Louis-Dominique Dubeau Date: Mon, 20 Aug 2018 08:56:34 -0400 Subject: [PATCH] feat: add support for parsing fragments --- README.md | 29 ++++++- lib/saxes.js | 111 ++++++++++++++------------- test/fragments.js | 188 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 273 insertions(+), 55 deletions(-) create mode 100644 test/fragments.js diff --git a/README.md b/README.md index 4a00d077..c7a56d8d 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ with the ``@private`` tag, are private. If you use anything private, that's at your own peril. -If there's a mistake in the documenation, raise an issue. If you just assume, +If there's a mistake in the documentation, raise an issue. If you just assume, you may assume incorrectly. ## Summary Usage Information @@ -119,16 +119,23 @@ Pass the following arguments to the parser function. All are optional. Settings supported: -* `xmlns` - Boolean. If true, then namespaces are supported. Default +* `xmlns` - Boolean. If `true`, then namespaces are supported. Default is `false`. -* `position` - Boolean. If false, then don't track line/col/position. Unset is +* `position` - Boolean. If `false`, then don't track line/col/position. Unset is treated as `true`. Default is unset. * `fileName` - String. Set a file name for error reporting. This is useful only when tracking positions. You may leave it unset, in which case the file name in error messages will be `undefined`. +* `fragment` - Boolean. If `true`, parse the XML as an XML fragment. Default is + `false`. + +* `additionalNamespaces` - A plain object whose key, value pairs define + namespaces known before parsing the XML file. It is not legal to pass + bindings for the namespaces `"xml"` or `"xmlns"`. + ### Methods `write` - Write bytes onto the stream. You don't have to do this all at @@ -167,6 +174,22 @@ are also in the exported `EVENTS` array. See the JSDOC comments in the source code for a description of each supported event. +### Parsing XML Fragments + +The XML specification does not define any method by which to parse XML +fragments. However, there are usage scenarios in which it is desirable to parse +fragments. In order to allow this, saxes provides two initialization options. + +If you pass the option `fragment: true` to the parser constructor, the parser +will expect an XML fragment. It essentially starts with a parsing state +equivalent to the one it would be in if `parser.write(")` had been called +right after initialization. In other words, it expects content which is +acceptable inside an element. This also turns off well-formedness checks that +are inappropriate when parsing a fragment. + +The other option is `additionalNamespaces`, which allows you to define +additional prefix-to-URI bindings known before parsing starts. + ## FAQ Q. Why has saxes dropped support for limiting the size of data chunks passed to diff --git a/lib/saxes.js b/lib/saxes.js index e671b126..5262228d 100644 --- a/lib/saxes.js +++ b/lib/saxes.js @@ -114,35 +114,42 @@ function isEntityStartChar(c) { return isNameStartChar(c) || c === HASH; } -function xmlNSCheck(parser, uri) { - if (uri !== XML_NAMESPACE) { +function nsMappingCheck(parser, mapping) { + const { xml, xmlns } = mapping; + if (xml && xml !== XML_NAMESPACE) { parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`); } -} -function xmlnsNSCheck(parser, uri) { - if (uri !== XMLNS_NAMESPACE) { + if (xmlns && xmlns !== XMLNS_NAMESPACE) { parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`); } - else { - parser.fail(`may not assign a prefix (even "xmlns") to the URI \ -${XMLNS_NAMESPACE}.`); - } -} -function defaultNSCheck(parser, uri) { - if (uri === XMLNS_NAMESPACE || uri === XML_NAMESPACE) { - parser.fail(`the default namespace may not be set to ${uri}.`); + for (const local of Object.keys(mapping)) { + const uri = mapping[local]; + switch (uri) { + case XMLNS_NAMESPACE: + parser.fail(local === "" ? + `the default namespace may not be set to ${uri}.` : + `may not assign a prefix (even "xmlns") to the URI \ +${XMLNS_NAMESPACE}.`); + break; + case XML_NAMESPACE: + switch (local) { + case "xml": + // Assinging the XML namespace to "xml" is fine. + break; + case "": + parser.fail(`the default namespace may not be set to ${uri}.`); + break; + default: + parser.fail("may not assign the xml namespace to another prefix."); + } + break; + default: + } } } -const nsChecks = { - __proto__: null, - xml: xmlNSCheck, - xmlns: xmlnsNSCheck, - "": defaultNSCheck, -}; - /** * Data structure for an XML tag. * @@ -219,10 +226,17 @@ const nsChecks = { * @typedef SaxesOptions * * @property {boolean} [xmlns] Whether to track namespaces. Unset means - *``false``. + * ``false``. + * + * @property {boolean} [fragment] Whether to accept XML fragments. Unset means + * ``false``. + * + * @property {boolean} [additionalNamespaces] A plain object whose key, value + * pairs define namespaces known before parsing the XML file. It is not legal + * to pass bindings for the namespaces ``"xml"`` or ``"xmlns"``. * * @property {boolean} [position] Whether to track positions. Unset means - *``true``. + * ``true``. * * @property {string} [fileName] A file name to use for error reporting. Leaving * this unset will report a file name of "undefined". "File name" is a loose @@ -277,9 +291,7 @@ class SaxesParser { this.q = null; this.tags = []; this.initial = true; - this.closedRoot = this.sawRoot = this.inRoot = false; this.tag = null; - this.state = S_BEGIN_WHITESPACE; /** * A map of entity name to expansion. * @@ -287,9 +299,18 @@ class SaxesParser { */ this.ENTITIES = Object.create(XML_ENTITIES); this.attribList = []; - this.reportedTextBeforeRoot = false; - this.reportedTextAfterRoot = false; - this.xmlDeclPossible = true; + + // The logic is organized so as to minimize the need to check + // this.opt.fragment while parsing. + + this.state = this.opt.fragment ? S_TEXT : S_BEGIN_WHITESPACE; + // We want these to be all true if we are dealing with a fragment. + this.reportedTextBeforeRoot = this.reportedTextAfterRoot = + this.closedRoot = this.sawRoot = this.inRoot = this.opt.fragment; + // An XML declaration is intially possible only when parsing whole + // documents. + this.xmlDeclPossible = !this.opt.fragment; + this.piIsXMLDecl = false; this.xmlDeclState = S_XML_DECL_NAME_START; this.xmlDeclExpects = ["version"]; @@ -302,7 +323,12 @@ class SaxesParser { this.textNodeCheckedBefore = 0; if (this.opt.xmlns) { - this.ns = Object.assign({}, rootNS); + this.ns = Object.assign({ __proto__: null }, rootNS); + const additional = this.opt.additionalNamespaces; + if (additional) { + nsMappingCheck(this, additional); + Object.assign(this.ns, additional); + } } this.trackPosition = this.opt.position !== false; @@ -1598,34 +1624,15 @@ class SaxesParser { if (this.opt.xmlns) { // emit namespace binding events const { ns, attributes } = tag; - // eslint-disable-next-line prefer-const - for (let [name, uri] of attribList) { + for (const [name, uri] of attribList) { const { prefix, local } = this.qname(name, true); if (prefix === "xmlns") { - // namespace binding attribute. push the binding into scope - uri = uri.trim(); - const check = nsChecks[local]; - - if (check) { - check(this, uri); - } - else { - switch (uri) { - case XMLNS_NAMESPACE: - this.fail(`may not assign a prefix (even "xmlns") to the URI \ -${XMLNS_NAMESPACE}.`); - break; - case XML_NAMESPACE: - this.fail("may not assign the xml namespace to another prefix."); - break; - default: - } - } - - ns[local] = uri; + ns[local] = uri.trim(); } } + nsMappingCheck(this, ns); + { // add namespace info to tag const { prefix, local } = this.qname(this.tagName); @@ -1688,7 +1695,7 @@ ${XMLNS_NAMESPACE}.`); tag.isSelfClosing = !!selfClosing; // process the tag - if (this.closedRoot) { + if (!this.opt.fragment && this.closedRoot) { this.fail("documents may contain only one root."); } else { diff --git a/test/fragments.js b/test/fragments.js new file mode 100644 index 00000000..8b20c2b9 --- /dev/null +++ b/test/fragments.js @@ -0,0 +1,188 @@ +"use strict"; + +const { test } = require("."); + +describe("fragments", () => { + test({ + name: "empty", + xml: "", + expect: [], + opt: { + xmlns: true, + fragment: true, + }, + }); + + test({ + name: "text only", + xml: " Something ", + expect: [ + ["text", " Something "], + ], + opt: { + xmlns: true, + fragment: true, + }, + }); + + test({ + name: "text and elements", + xml: "Something blah something", + expect: [ + ["text", "Something "], + ["opentagstart", { + name: "blah", + attributes: {}, + ns: {}, + }], + ["opentag", { + name: "blah", + local: "blah", + prefix: "", + uri: "", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["opentagstart", { + name: "more", + attributes: {}, + ns: {}, + }], + ["opentag", { + name: "more", + local: "more", + prefix: "", + uri: "", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["text", "blah"], + ["closetag", { + name: "more", + local: "more", + prefix: "", + uri: "", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["closetag", { + name: "blah", + local: "blah", + prefix: "", + uri: "", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["text", " something"], + ], + opt: { + xmlns: true, + fragment: true, + }, + }); + + test({ + name: "two top-level elements", + xml: "Something 12 something", + expect: [ + ["text", "Something "], + ["opentagstart", { + name: "blah", + attributes: {}, + ns: {}, + }], + ["opentag", { + name: "blah", + local: "blah", + prefix: "", + uri: "", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["text", "1"], + ["closetag", { + name: "blah", + local: "blah", + prefix: "", + uri: "", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["opentagstart", { + name: "more", + attributes: {}, + ns: {}, + }], + ["opentag", { + name: "more", + local: "more", + prefix: "", + uri: "", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["text", "2"], + ["closetag", { + name: "more", + local: "more", + prefix: "", + uri: "", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["text", " something"], + ], + opt: { + xmlns: true, + fragment: true, + }, + }); + + test({ + name: "namespaces", + xml: "Something 1 something", + expect: [ + ["text", "Something "], + ["opentagstart", { + name: "foo:blah", + attributes: {}, + ns: {}, + }], + ["opentag", { + name: "foo:blah", + local: "blah", + prefix: "foo", + uri: "foo-uri", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["text", "1"], + ["closetag", { + name: "foo:blah", + local: "blah", + prefix: "foo", + uri: "foo-uri", + attributes: {}, + ns: {}, + isSelfClosing: false, + }], + ["text", " something"], + ], + opt: { + xmlns: true, + fragment: true, + additionalNamespaces: { + foo: "foo-uri", + }, + }, + }); +});