Skip to content

Commit

Permalink
feat: add support for parsing fragments
Browse files Browse the repository at this point in the history
  • Loading branch information
lddubeau committed Aug 20, 2018
1 parent 4ffe5ed commit 1ff2d6a
Show file tree
Hide file tree
Showing 3 changed files with 273 additions and 55 deletions.
29 changes: 26 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ with the ``@private`` tag, are private.

If you use anything private, that's at your own peril.

If there's a mistake in the documenation, raise an issue. If you just assume,
If there's a mistake in the documentation, raise an issue. If you just assume,
you may assume incorrectly.

## Summary Usage Information
Expand Down Expand Up @@ -119,16 +119,23 @@ Pass the following arguments to the parser function. All are optional.

Settings supported:

* `xmlns` - Boolean. If true, then namespaces are supported. Default
* `xmlns` - Boolean. If `true`, then namespaces are supported. Default
is `false`.

* `position` - Boolean. If false, then don't track line/col/position. Unset is
* `position` - Boolean. If `false`, then don't track line/col/position. Unset is
treated as `true`. Default is unset.

* `fileName` - String. Set a file name for error reporting. This is useful only
when tracking positions. You may leave it unset, in which case the file name
in error messages will be `undefined`.

* `fragment` - Boolean. If `true`, parse the XML as an XML fragment. Default is
`false`.

* `additionalNamespaces` - A plain object whose key, value pairs define
namespaces known before parsing the XML file. It is not legal to pass
bindings for the namespaces `"xml"` or `"xmlns"`.

### Methods

`write` - Write bytes onto the stream. You don't have to do this all at
Expand Down Expand Up @@ -167,6 +174,22 @@ are also in the exported `EVENTS` array.
See the JSDOC comments in the source code for a description of each supported
event.

### Parsing XML Fragments

The XML specification does not define any method by which to parse XML
fragments. However, there are usage scenarios in which it is desirable to parse
fragments. In order to allow this, saxes provides two initialization options.

If you pass the option `fragment: true` to the parser constructor, the parser
will expect an XML fragment. It essentially starts with a parsing state
equivalent to the one it would be in if `parser.write("<foo">)` had been called
right after initialization. In other words, it expects content which is
acceptable inside an element. This also turns off well-formedness checks that
are inappropriate when parsing a fragment.

The other option is `additionalNamespaces`, which allows you to define
additional prefix-to-URI bindings known before parsing starts.

## FAQ

Q. Why has saxes dropped support for limiting the size of data chunks passed to
Expand Down
111 changes: 59 additions & 52 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,35 +114,42 @@ function isEntityStartChar(c) {
return isNameStartChar(c) || c === HASH;
}

function xmlNSCheck(parser, uri) {
if (uri !== XML_NAMESPACE) {
function nsMappingCheck(parser, mapping) {
const { xml, xmlns } = mapping;
if (xml && xml !== XML_NAMESPACE) {
parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
}
}

function xmlnsNSCheck(parser, uri) {
if (uri !== XMLNS_NAMESPACE) {
if (xmlns && xmlns !== XMLNS_NAMESPACE) {
parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
}
else {
parser.fail(`may not assign a prefix (even "xmlns") to the URI \
${XMLNS_NAMESPACE}.`);
}
}

function defaultNSCheck(parser, uri) {
if (uri === XMLNS_NAMESPACE || uri === XML_NAMESPACE) {
parser.fail(`the default namespace may not be set to ${uri}.`);
for (const local of Object.keys(mapping)) {
const uri = mapping[local];
switch (uri) {
case XMLNS_NAMESPACE:
parser.fail(local === "" ?
`the default namespace may not be set to ${uri}.` :
`may not assign a prefix (even "xmlns") to the URI \
${XMLNS_NAMESPACE}.`);
break;
case XML_NAMESPACE:
switch (local) {
case "xml":
// Assinging the XML namespace to "xml" is fine.
break;
case "":
parser.fail(`the default namespace may not be set to ${uri}.`);
break;
default:
parser.fail("may not assign the xml namespace to another prefix.");
}
break;
default:
}
}
}

const nsChecks = {
__proto__: null,
xml: xmlNSCheck,
xmlns: xmlnsNSCheck,
"": defaultNSCheck,
};

/**
* Data structure for an XML tag.
*
Expand Down Expand Up @@ -219,10 +226,17 @@ const nsChecks = {
* @typedef SaxesOptions
*
* @property {boolean} [xmlns] Whether to track namespaces. Unset means
*``false``.
* ``false``.
*
* @property {boolean} [fragment] Whether to accept XML fragments. Unset means
* ``false``.
*
* @property {boolean} [additionalNamespaces] A plain object whose key, value
* pairs define namespaces known before parsing the XML file. It is not legal
* to pass bindings for the namespaces ``"xml"`` or ``"xmlns"``.
*
* @property {boolean} [position] Whether to track positions. Unset means
*``true``.
* ``true``.
*
* @property {string} [fileName] A file name to use for error reporting. Leaving
* this unset will report a file name of "undefined". "File name" is a loose
Expand Down Expand Up @@ -277,19 +291,26 @@ class SaxesParser {
this.q = null;
this.tags = [];
this.initial = true;
this.closedRoot = this.sawRoot = this.inRoot = false;
this.tag = null;
this.state = S_BEGIN_WHITESPACE;
/**
* A map of entity name to expansion.
*
* @type {Object.<string, string>}
*/
this.ENTITIES = Object.create(XML_ENTITIES);
this.attribList = [];
this.reportedTextBeforeRoot = false;
this.reportedTextAfterRoot = false;
this.xmlDeclPossible = true;

// The logic is organized so as to minimize the need to check
// this.opt.fragment while parsing.

this.state = this.opt.fragment ? S_TEXT : S_BEGIN_WHITESPACE;
// We want these to be all true if we are dealing with a fragment.
this.reportedTextBeforeRoot = this.reportedTextAfterRoot =
this.closedRoot = this.sawRoot = this.inRoot = this.opt.fragment;
// An XML declaration is intially possible only when parsing whole
// documents.
this.xmlDeclPossible = !this.opt.fragment;

this.piIsXMLDecl = false;
this.xmlDeclState = S_XML_DECL_NAME_START;
this.xmlDeclExpects = ["version"];
Expand All @@ -302,7 +323,12 @@ class SaxesParser {
this.textNodeCheckedBefore = 0;

if (this.opt.xmlns) {
this.ns = Object.assign({}, rootNS);
this.ns = Object.assign({ __proto__: null }, rootNS);
const additional = this.opt.additionalNamespaces;
if (additional) {
nsMappingCheck(this, additional);
Object.assign(this.ns, additional);
}
}

this.trackPosition = this.opt.position !== false;
Expand Down Expand Up @@ -1598,34 +1624,15 @@ class SaxesParser {
if (this.opt.xmlns) {
// emit namespace binding events
const { ns, attributes } = tag;
// eslint-disable-next-line prefer-const
for (let [name, uri] of attribList) {
for (const [name, uri] of attribList) {
const { prefix, local } = this.qname(name, true);
if (prefix === "xmlns") {
// namespace binding attribute. push the binding into scope
uri = uri.trim();
const check = nsChecks[local];

if (check) {
check(this, uri);
}
else {
switch (uri) {
case XMLNS_NAMESPACE:
this.fail(`may not assign a prefix (even "xmlns") to the URI \
${XMLNS_NAMESPACE}.`);
break;
case XML_NAMESPACE:
this.fail("may not assign the xml namespace to another prefix.");
break;
default:
}
}

ns[local] = uri;
ns[local] = uri.trim();
}
}

nsMappingCheck(this, ns);

{
// add namespace info to tag
const { prefix, local } = this.qname(this.tagName);
Expand Down Expand Up @@ -1688,7 +1695,7 @@ ${XMLNS_NAMESPACE}.`);
tag.isSelfClosing = !!selfClosing;

// process the tag
if (this.closedRoot) {
if (!this.opt.fragment && this.closedRoot) {
this.fail("documents may contain only one root.");
}
else {
Expand Down
Loading

0 comments on commit 1ff2d6a

Please sign in to comment.