Skip to content

Commit

Permalink
fix(WebVTT): Add support to  , ‎ and ‏ (#4920)
Browse files Browse the repository at this point in the history
Backported to v4.2.x, also includes #4660

Co-authored-by: Joey Parrish <[email protected]>
  • Loading branch information
avelad and joeyparrish committed Jan 30, 2023
1 parent ae0f615 commit b9ddf6a
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
39 changes: 38 additions & 1 deletion lib/text/vtt_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ shaka.text.VttTextParser = class {
end += timeOffset;

// Get the payload.
const payload = text.slice(1).join('\n').trim();
const payload = VttTextParser.htmlUnescape_(
text.slice(1).join('\n').trim());

let cue = null;
if (styles.has('global')) {
Expand Down Expand Up @@ -863,6 +864,42 @@ shaka.text.VttTextParser = class {

return (milliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600);
}

/**
* This method converts the HTML entities &amp;, &lt;, &gt;, &quot;, &#39;,
* &nbsp;, &lrm; and &rlm; in string to their corresponding characters.
*
* @param {!string} input
* @return {string}
* @private
*/
static htmlUnescape_(input) {
// Used to map HTML entities to characters.
const htmlUnescapes = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': '\'',
'&nbsp;': '\u{a0}',
'&lrm;': '\u{200e}',
'&rlm;': '\u{200f}',
};

// Used to match HTML entities and HTML characters.
const reEscapedHtml = /&(?:amp|lt|gt|quot|#(0+)?39|nbsp|lrm|rlm);/g;
const reHasEscapedHtml = RegExp(reEscapedHtml.source);
// This check is an optimization, since replace always makes a copy
if (input && reHasEscapedHtml.test(input)) {
return input.replace(reEscapedHtml, (entity) => {
// The only thing that might not match the dictionary above is the
// single quote, which can be matched by many strings in the regex, but
// only has a single entry in the dictionary.
return htmlUnescapes[entity] || '\'';
});
}
return input || '';
}
};

/**
Expand Down
11 changes: 11 additions & 0 deletions test/text/vtt_text_parser_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,17 @@ describe('VttTextParser', () => {
{periodStart: 0, segmentStart: 0, segmentEnd: 0});
});

it('support escaped html payload', () => {
verifyHelper(
[
{startTime: 20.1, endTime: 40.505, payload: '"Test & 1"\u{a0}'},
],
'WEBVTT\n\n' +
'00:00:20.100 --> 00:00:40.505\n' +
'&quot;Test &amp; 1&quot;&nbsp;',
{periodStart: 0, segmentStart: 0, segmentEnd: 0, vttOffset: 0});
});

it('supports specific style blocks', () => {
verifyHelper(
[
Expand Down

0 comments on commit b9ddf6a

Please sign in to comment.