diff --git a/src/common/input/TextDecoder.test.ts b/src/common/input/TextDecoder.test.ts index 92b0e03a44..e929902130 100644 --- a/src/common/input/TextDecoder.test.ts +++ b/src/common/input/TextDecoder.test.ts @@ -58,8 +58,8 @@ describe('text encodings', () => { const decoder = new StringToUtf32(); const target = new Uint32Array(5); for (let i = 0; i < 65536; ++i) { - // skip surrogate pairs - if (i >= 0xD800 && i <= 0xDFFF) { + // skip surrogate pairs and a BOM + if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) { continue; } const length = decoder.decode(String.fromCharCode(i), target); @@ -84,6 +84,14 @@ describe('text encodings', () => { decoder.clear(); } }); + + it('0xFEFF(BOM)', () => { + const decoder = new StringToUtf32(); + const target = new Uint32Array(5); + const length = decoder.decode(String.fromCharCode(0xFEFF), target); + assert.equal(length, 0); + decoder.clear(); + }); }); it('test strings', () => { @@ -118,8 +126,8 @@ describe('text encodings', () => { const decoder = new Utf8ToUtf32(); const target = new Uint32Array(5); for (let i = 0; i < 65536; ++i) { - // skip surrogate pairs - if (i >= 0xD800 && i <= 0xDFFF) { + // skip surrogate pairs and a BOM + if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) { continue; } const utf8Data = fromByteString(encode(String.fromCharCode(i))); @@ -142,6 +150,15 @@ describe('text encodings', () => { decoder.clear(); } }); + + it('0xFEFF(BOM)', () => { + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(5); + const utf8Data = fromByteString(encode(String.fromCharCode(0xFEFF))); + const length = decoder.decode(utf8Data, target); + assert.equal(length, 0); + decoder.clear(); + }); }); it('test strings', () => { diff --git a/src/common/input/TextDecoder.ts b/src/common/input/TextDecoder.ts index 6ecab011d5..9df26f5994 100644 --- a/src/common/input/TextDecoder.ts +++ b/src/common/input/TextDecoder.ts @@ -105,6 +105,10 @@ export class StringToUtf32 { } continue; } + if (code === 0xFEFF) { + // BOM + continue; + } target[size++] = code; } return size; @@ -286,8 +290,8 @@ export class Utf8ToUtf32 { continue; } codepoint = (byte1 & 0x0F) << 12 | (byte2 & 0x3F) << 6 | (byte3 & 0x3F); - if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) { - // illegal codepoint, no i-- here + if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF) || codepoint === 0xFEFF) { + // illegal codepoint or BOM, no i-- here continue; } target[size++] = codepoint;