Merge pull request #3200 from kena0ki/issue3032

Filter U+FEFF (BOM) when decoding input data
xtermjs · Jan 23, 2021 · ff74caf · ff74caf
2 parents 4da5d55 + 3d941f2
commit ff74caf
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 8 deletions.
diff --git a/src/common/input/TextDecoder.test.ts b/src/common/input/TextDecoder.test.ts
@@ -58,8 +58,8 @@ describe('text encodings', () => {
         const decoder = new StringToUtf32();
         const target = new Uint32Array(5);
         for (let i = 0; i < 65536; ++i) {
-          // skip surrogate pairs
-          if (i >= 0xD800 && i <= 0xDFFF) {
+          // skip surrogate pairs and a BOM
+          if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) {
             continue;
           }
           const length = decoder.decode(String.fromCharCode(i), target);
@@ -84,6 +84,14 @@ describe('text encodings', () => {
           decoder.clear();
         }
       });
+
+      it('0xFEFF(BOM)', () => {
+        const decoder = new StringToUtf32();
+        const target = new Uint32Array(5);
+        const length = decoder.decode(String.fromCharCode(0xFEFF), target);
+        assert.equal(length, 0);
+        decoder.clear();
+      });
     });
 
     it('test strings', () => {
@@ -118,8 +126,8 @@ describe('text encodings', () => {
         const decoder = new Utf8ToUtf32();
         const target = new Uint32Array(5);
         for (let i = 0; i < 65536; ++i) {
-          // skip surrogate pairs
-          if (i >= 0xD800 && i <= 0xDFFF) {
+          // skip surrogate pairs and a BOM
+          if ((i >= 0xD800 && i <= 0xDFFF) || i === 0xFEFF) {
             continue;
           }
           const utf8Data = fromByteString(encode(String.fromCharCode(i)));
@@ -142,6 +150,15 @@ describe('text encodings', () => {
           decoder.clear();
         }
       });
+
+      it('0xFEFF(BOM)', () => {
+        const decoder = new Utf8ToUtf32();
+        const target = new Uint32Array(5);
+        const utf8Data = fromByteString(encode(String.fromCharCode(0xFEFF)));
+        const length = decoder.decode(utf8Data, target);
+        assert.equal(length, 0);
+        decoder.clear();
+      });
     });
 
     it('test strings', () => {
@@ -215,6 +232,19 @@ describe('text encodings', () => {
         }
         assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€');
       });
+
+      it('BOMs (3 byte sequences) - advance by 2', () => {
+        const decoder = new Utf8ToUtf32();
+        const target = new Uint32Array(5);
+        const utf8Data = fromByteString('\xef\xbb\xbf\xef\xbb\xbf');
+        let decoded = '';
+        for (let i = 0; i < utf8Data.length; i += 2) {
+          const written = decoder.decode(utf8Data.slice(i, i + 2), target);
+          decoded += toString(target, written);
+        }
+        assert.equal(decoded, '');
+      });
+
       it('test break after 3 bytes - issue #2495', () => {
         const decoder = new Utf8ToUtf32();
         const target = new Uint32Array(5);

diff --git a/src/common/input/TextDecoder.ts b/src/common/input/TextDecoder.ts
@@ -105,6 +105,10 @@ export class StringToUtf32 {
         }
         continue;
       }
+      if (code === 0xFEFF) {
+        // BOM
+        continue;
+      }
       target[size++] = code;
     }
     return size;
@@ -188,8 +192,8 @@ export class Utf8ToUtf32 {
             target[size++] = cp;
           }
         } else if (type === 3) {
-          if (cp < 0x0800 || (cp >= 0xD800 && cp <= 0xDFFF)) {
-            // illegal codepoint
+          if (cp < 0x0800 || (cp >= 0xD800 && cp <= 0xDFFF) || cp === 0xFEFF) {
+            // illegal codepoint or BOM
           } else {
             target[size++] = cp;
           }
@@ -286,8 +290,8 @@ export class Utf8ToUtf32 {
           continue;
         }
         codepoint = (byte1 & 0x0F) << 12 | (byte2 & 0x3F) << 6 | (byte3 & 0x3F);
-        if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
-          // illegal codepoint, no i-- here
+        if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF) || codepoint === 0xFEFF) {
+          // illegal codepoint or BOM, no i-- here
           continue;
         }
         target[size++] = codepoint;