Skip to content

Commit

Permalink
fix(ttml): Handle escaped special characters. (#7047)
Browse files Browse the repository at this point in the history
Fixes #7044
  • Loading branch information
theodab authored and avelad committed Jul 18, 2024
1 parent 30cdd61 commit 451a41e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 2 deletions.
21 changes: 19 additions & 2 deletions lib/util/string_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,36 @@ shaka.util.StringUtils = class {
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': '\'',
'&apos;': '\'',
'&nbsp;': '\u{a0}',
'&lrm;': '\u{200e}',
'&rlm;': '\u{200f}',
};

// Used to match HTML entities and HTML characters.
const reEscapedHtml = /&(?:amp|lt|gt|quot|apos|#(0+)?39|nbsp|lrm|rlm);/g;
const reEscapedHtml =
/&(?:amp|lt|gt|quot|apos|nbsp|lrm|rlm|#[xX]?[0-9a-fA-F]+);/g;
const reHasEscapedHtml = RegExp(reEscapedHtml.source);
// This check is an optimization, since replace always makes a copy
if (input && reHasEscapedHtml.test(input)) {
return input.replace(reEscapedHtml, (entity) => {
if (entity[1] == '#') {
// Translate this into an HTML character.
let code = 0;
if (entity[2] == 'x' || entity[2] == 'X') {
// It's hex.
code = parseInt(entity.substring(3), 16);
} else {
// It's decimal.
code = parseInt(entity.substring(2), 10);
}
// Ignore it if it's an invalid code point.
if (code >= 0 && code <= 0x10FFFF) {
return String.fromCodePoint(code);
} else {
return '';
}
}
// The only thing that might not match the dictionary above is the
// single quote, which can be matched by many strings in the regex, but
// only has a single entry in the dictionary.
Expand Down
14 changes: 14 additions & 0 deletions test/util/string_utils_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,20 @@ function defineStringUtilTests() {
});
});

describe('htmlUnescape', () => {
it('handles special characters', () => {
expect(StringUtils.htmlUnescape('foo &amp; bar')).toBe('foo & bar');
});

it('handles decimal special characters', () => {
expect(StringUtils.htmlUnescape('foo &#70; bar')).toBe('foo F bar');
});

it('handles hex special characters', () => {
expect(StringUtils.htmlUnescape('foo &#x44; bar')).toBe('foo D bar');
});
});

it('converts toUTF8', () => {
const str = 'Xe\u4524\u1952';
const arr = [0x58, 0x65, 0xe4, 0x94, 0xa4, 0xe1, 0xa5, 0x92];
Expand Down

0 comments on commit 451a41e

Please sign in to comment.