Skip to content

Commit

Permalink
filter U+FEFF (BOM) when decoding input data
Browse files Browse the repository at this point in the history
  • Loading branch information
kena0ki committed Jan 23, 2021
1 parent 4da5d55 commit 93aab95
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
25 changes: 21 additions & 4 deletions src/common/input/TextDecoder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ describe('text encodings', () => {
const decoder = new StringToUtf32();
const target = new Uint32Array(5);
for (let i = 0; i < 65536; ++i) {
// skip surrogate pairs
if (i >= 0xD800 && i <= 0xDFFF) {
// skip surrogate pairs and a BOM
if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) {
continue;
}
const length = decoder.decode(String.fromCharCode(i), target);
Expand All @@ -84,6 +84,14 @@ describe('text encodings', () => {
decoder.clear();
}
});

it('0xFEFF(BOM)', () => {
const decoder = new StringToUtf32();
const target = new Uint32Array(5);
const length = decoder.decode(String.fromCharCode(0xFEFF), target);
assert.equal(length, 0);
decoder.clear();
});
});

it('test strings', () => {
Expand Down Expand Up @@ -118,8 +126,8 @@ describe('text encodings', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
for (let i = 0; i < 65536; ++i) {
// skip surrogate pairs
if (i >= 0xD800 && i <= 0xDFFF) {
// skip surrogate pairs and a BOM
if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) {
continue;
}
const utf8Data = fromByteString(encode(String.fromCharCode(i)));
Expand All @@ -142,6 +150,15 @@ describe('text encodings', () => {
decoder.clear();
}
});

it('0xFEFF(BOM)', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
const utf8Data = fromByteString(encode(String.fromCharCode(0xFEFF)));
const length = decoder.decode(utf8Data, target);
assert.equal(length, 0);
decoder.clear();
});
});

it('test strings', () => {
Expand Down
8 changes: 6 additions & 2 deletions src/common/input/TextDecoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ export class StringToUtf32 {
}
continue;
}
if (code === 0xFEFF) {
// BOM
continue;
}
target[size++] = code;
}
return size;
Expand Down Expand Up @@ -286,8 +290,8 @@ export class Utf8ToUtf32 {
continue;
}
codepoint = (byte1 & 0x0F) << 12 | (byte2 & 0x3F) << 6 | (byte3 & 0x3F);
if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
// illegal codepoint, no i-- here
if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF) || codepoint === 0xFEFF) {
// illegal codepoint or BOM, no i-- here
continue;
}
target[size++] = codepoint;
Expand Down

0 comments on commit 93aab95

Please sign in to comment.