Skip to content

Commit

Permalink
fix: Make XML parsing secure (#4598)
Browse files Browse the repository at this point in the history
Harden the XmlUtils.parseXmlString function against XML documents that embed elements from the HTML or SVG namespaces, which could trigger script execution and cause XSS vulnerabilities.

Also migrate direct users of the DOMParser.parseFromString function to XmlUtils, and add appropriate unit tests.
  • Loading branch information
bjarkler authored and joeyparrish committed Oct 26, 2022
1 parent 9b255b1 commit 2e45696
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 100 deletions.
20 changes: 20 additions & 0 deletions build/conformance.textproto
Original file line number Diff line number Diff line change
Expand Up @@ -355,3 +355,23 @@ requirement: {
"because is not supported on Xbox and old browsers."
whitelist_regexp: "lib/util/string_utils.js"
}

requirement: {
type: BANNED_PROPERTY
value: "String.prototype.replaceAll"
error_message:
"Using \"String.replaceAll\" is not allowed; "
"use String.replace instead"
}

# Disallow the general use of DOMParser.parseFromString, which has security
# implications.
requirement: {
type: BANNED_PROPERTY_CALL
value: "DOMParser.prototype.parseFromString"
error_message:
"Using \"DOMParser.parseFromString\" directly is not allowed; "
"use shaka.util.XmlUtils.parseXmlString instead."
whitelist_regexp: "lib/util/xml_utils.js"
whitelist_regexp: "test/"
}
4 changes: 3 additions & 1 deletion lib/media/drm_engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ goog.require('shaka.util.StreamUtils');
goog.require('shaka.util.StringUtils');
goog.require('shaka.util.Timer');
goog.require('shaka.util.Uint8ArrayUtils');
goog.require('shaka.util.XmlUtils');


/** @implements {shaka.util.IDestroyable} */
Expand Down Expand Up @@ -1506,7 +1507,8 @@ shaka.media.DrmEngine = class {
return;
}
shaka.log.debug('Unwrapping PlayReady request.');
const dom = new DOMParser().parseFromString(xml, 'application/xml');
const dom = shaka.util.XmlUtils.parseXmlString(xml, 'PlayReadyKeyMessage');
goog.asserts.assert(dom, 'Failed to parse PlayReady XML!');

// Set request headers.
const headers = dom.getElementsByTagName('HttpHeader');
Expand Down
162 changes: 69 additions & 93 deletions lib/text/ttml_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,129 +49,105 @@ shaka.text.TtmlTextParser = class {
const ttsNs = TtmlTextParser.styleNs_;
const str = shaka.util.StringUtils.fromUTF8(data);
const cues = [];
const parser = new DOMParser();
let xml = null;

// dont try to parse empty string as
// DOMParser will not throw error but return an errored xml
if (str == '') {
return cues;
}

try {
xml = parser.parseFromString(str, 'text/xml');
} catch (exception) {
const tt = XmlUtils.parseXmlString(str, 'tt');
if (!tt) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_XML,
'Failed to parse TTML.');
}

if (xml) {
const parserError = xml.getElementsByTagName('parsererror')[0];
if (parserError) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_XML,
parserError.textContent);
}

const tt = xml.getElementsByTagName('tt')[0];
// TTML should always have tt element.
if (!tt) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_XML,
'TTML does not contain <tt> tag.');
}

const body = tt.getElementsByTagName('body')[0];
if (!body) {
return [];
}
const body = tt.getElementsByTagName('body')[0];
if (!body) {
return [];
}

// Get the framerate, subFrameRate and frameRateMultiplier if applicable.
const frameRate = XmlUtils.getAttributeNSList(tt, ttpNs, 'frameRate');
const subFrameRate = XmlUtils.getAttributeNSList(
tt, ttpNs, 'subFrameRate');
const frameRateMultiplier =
XmlUtils.getAttributeNSList(tt, ttpNs, 'frameRateMultiplier');
const tickRate = XmlUtils.getAttributeNSList(tt, ttpNs, 'tickRate');
// Get the framerate, subFrameRate and frameRateMultiplier if applicable.
const frameRate = XmlUtils.getAttributeNSList(tt, ttpNs, 'frameRate');
const subFrameRate = XmlUtils.getAttributeNSList(
tt, ttpNs, 'subFrameRate');
const frameRateMultiplier =
XmlUtils.getAttributeNSList(tt, ttpNs, 'frameRateMultiplier');
const tickRate = XmlUtils.getAttributeNSList(tt, ttpNs, 'tickRate');

const cellResolution = XmlUtils.getAttributeNSList(
tt, ttpNs, 'cellResolution');
const spaceStyle = tt.getAttribute('xml:space') || 'default';
const extent = XmlUtils.getAttributeNSList(tt, ttsNs, 'extent');
const cellResolution = XmlUtils.getAttributeNSList(
tt, ttpNs, 'cellResolution');
const spaceStyle = tt.getAttribute('xml:space') || 'default';
const extent = XmlUtils.getAttributeNSList(tt, ttsNs, 'extent');

if (spaceStyle != 'default' && spaceStyle != 'preserve') {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_XML,
'Invalid xml:space value: ' + spaceStyle);
}
const whitespaceTrim = spaceStyle == 'default';
if (spaceStyle != 'default' && spaceStyle != 'preserve') {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_XML,
'Invalid xml:space value: ' + spaceStyle);
}
const whitespaceTrim = spaceStyle == 'default';

const rateInfo = new TtmlTextParser.RateInfo_(
frameRate, subFrameRate, frameRateMultiplier, tickRate);
const rateInfo = new TtmlTextParser.RateInfo_(
frameRate, subFrameRate, frameRateMultiplier, tickRate);

const cellResolutionInfo =
TtmlTextParser.getCellResolution_(cellResolution);
const cellResolutionInfo =
TtmlTextParser.getCellResolution_(cellResolution);

const metadata = tt.getElementsByTagName('metadata')[0];
const metadataElements = metadata ? XmlUtils.getChildren(metadata) : [];
const styles = Array.from(tt.getElementsByTagName('style'));
const regionElements = Array.from(tt.getElementsByTagName('region'));
const metadata = tt.getElementsByTagName('metadata')[0];
const metadataElements = metadata ? XmlUtils.getChildren(metadata) : [];
const styles = Array.from(tt.getElementsByTagName('style'));
const regionElements = Array.from(tt.getElementsByTagName('region'));

const cueRegions = [];
for (const region of regionElements) {
const cueRegion =
TtmlTextParser.parseCueRegion_(region, styles, extent);
if (cueRegion) {
cueRegions.push(cueRegion);
}
const cueRegions = [];
for (const region of regionElements) {
const cueRegion =
TtmlTextParser.parseCueRegion_(region, styles, extent);
if (cueRegion) {
cueRegions.push(cueRegion);
}
}

// A <body> element should only contain <div> elements, not <p> or <span>
// elements. We used to allow this, but it is non-compliant, and the
// loose nature of our previous parser made it difficult to implement TTML
// nesting more fully.
if (XmlUtils.findChildren(body, 'p').length) {
// A <body> element should only contain <div> elements, not <p> or <span>
// elements. We used to allow this, but it is non-compliant, and the
// loose nature of our previous parser made it difficult to implement TTML
// nesting more fully.
if (XmlUtils.findChildren(body, 'p').length) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_TEXT_CUE,
'<p> can only be inside <div> in TTML');
}

for (const div of XmlUtils.findChildren(body, 'div')) {
// A <div> element should only contain <p>, not <span>.
if (XmlUtils.findChildren(div, 'span').length) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_TEXT_CUE,
'<p> can only be inside <div> in TTML');
}

for (const div of XmlUtils.findChildren(body, 'div')) {
// A <div> element should only contain <p>, not <span>.
if (XmlUtils.findChildren(div, 'span').length) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_TEXT_CUE,
'<span> can only be inside <p> in TTML');
}
'<span> can only be inside <p> in TTML');
}
}

const cue = TtmlTextParser.parseCue_(
body, time.periodStart, rateInfo, metadataElements, styles,
regionElements, cueRegions, whitespaceTrim,
cellResolutionInfo, /* parentCueElement= */ null,
/* isContent= */ false);
if (cue) {
// According to the TTML spec, backgrounds default to transparent.
// So default the background of the top-level element to transparent.
// Nested elements may override that background color already.
if (!cue.backgroundColor) {
cue.backgroundColor = 'transparent';
}
cues.push(cue);
const cue = TtmlTextParser.parseCue_(
body, time.periodStart, rateInfo, metadataElements, styles,
regionElements, cueRegions, whitespaceTrim,
cellResolutionInfo, /* parentCueElement= */ null,
/* isContent= */ false);
if (cue) {
// According to the TTML spec, backgrounds default to transparent.
// So default the background of the top-level element to transparent.
// Nested elements may override that background color already.
if (!cue.backgroundColor) {
cue.backgroundColor = 'transparent';
}
cues.push(cue);
}

return cues;
Expand Down
51 changes: 45 additions & 6 deletions lib/util/xml_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ goog.provide('shaka.util.XmlUtils');

goog.require('goog.asserts');
goog.require('shaka.log');
goog.require('shaka.util.Lazy');
goog.require('shaka.util.StringUtils');


Expand Down Expand Up @@ -348,19 +349,21 @@ shaka.util.XmlUtils = class {
*/
static parseXmlString(xmlString, expectedRootElemName) {
const parser = new DOMParser();
let xml = null;
const unsafeXmlString =
shaka.util.XmlUtils.trustedHTMLFromString_.value()(xmlString);
let unsafeXml = null;
try {
xml = parser.parseFromString(xmlString, 'text/xml');
unsafeXml = parser.parseFromString(unsafeXmlString, 'text/xml');
} catch (exception) {
shaka.log.error('XML parsing exception:', exception);
return null;
}

// According to MDN, parseFromString never returns null.
goog.asserts.assert(xml, 'Parsed XML document cannot be null!');
goog.asserts.assert(unsafeXml, 'Parsed XML document cannot be null!');

// Check for empty documents.
const rootElem = xml.documentElement;
const rootElem = unsafeXml.documentElement;
if (!rootElem) {
shaka.log.error('XML document was empty!');
return null;
Expand All @@ -374,13 +377,28 @@ shaka.util.XmlUtils = class {
}

// The top-level element in the loaded XML should have the name we expect.
if (xml.documentElement.tagName != expectedRootElemName) {
if (rootElem.tagName != expectedRootElemName) {
shaka.log.error(
`XML tag name does not match expected "${expectedRootElemName}":`,
xml.documentElement.tagName);
rootElem.tagName);
return null;
}

// SECURITY: Verify that the document does not contain elements from the
// HTML or SVG namespaces, which could trigger script execution and XSS.
const iterator = document.createNodeIterator(
unsafeXml,
NodeFilter.SHOW_ALL,
);
let currentNode;
while (currentNode = iterator.nextNode()) {
if (currentNode instanceof HTMLElement ||
currentNode instanceof SVGElement) {
shaka.log.error('XML document embeds unsafe content!');
return null;
}
}

return rootElem;
}

Expand All @@ -402,3 +420,24 @@ shaka.util.XmlUtils = class {
}
}
};

/**
* Promote a string to TrustedHTML. This function is security-sensitive and
* should only be used with security approval where the string is guaranteed not
* to cause an XSS vulnerability.
*
* @private {!shaka.util.Lazy.<function(!string): (!TrustedHTML|!string)>}
*/
shaka.util.XmlUtils.trustedHTMLFromString_ = new shaka.util.Lazy(() => {
if (typeof trustedTypes !== 'undefined') {
// Create a Trusted Types policy for promoting the string to TrustedHTML.
// The Lazy wrapper ensures this policy is only created once.
const policy = trustedTypes.createPolicy('shaka-player#xml', {
createHTML: (s) => s,
});
return (s) => policy.createHTML(s);
}
// Fall back to strings in environments that don't support Trusted Types.
return (s) => s;
});

Loading

0 comments on commit 2e45696

Please sign in to comment.