Skip to content

Commit

Permalink
Detect binary files by checking first 256 code units for 0xFFFD (#57008)
Browse files Browse the repository at this point in the history
  • Loading branch information
jakebailey authored Jan 17, 2024
1 parent 59b6f78 commit ad2f465
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 8 deletions.
12 changes: 7 additions & 5 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1795,12 +1795,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean

const ch = codePointAt(text, pos);
if (pos === 0) {
// If a file wasn't valid text at all, it will usually be apparent at
// position 0 because UTF-8 decode will fail and produce U+FFFD.
// If a file isn't valid text at all, it will usually be apparent
// in the first few characters because UTF-8 decode will fail and produce U+FFFD.
// If that happens, just issue one error and refuse to try to scan further;
// this is likely a binary file that cannot be parsed
if (ch === CharacterCodes.replacementCharacter) {
// Jump to the end of the file and fail.
// this is likely a binary file that cannot be parsed.
//
// It's safe to slice the text; U+FFFD can only be produced by an invalid decode,
// so even if we cut a surrogate pair in half, they wouldn't be U+FFFD.
if (text.slice(0, 256).includes("\uFFFD")) {
error(Diagnostics.File_appears_to_be_binary);
pos = end;
return token = SyntaxKind.NonTextFileMarkerTrivia;
Expand Down
3 changes: 0 additions & 3 deletions src/compiler/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7532,9 +7532,6 @@ export const enum CharacterCodes {
mathematicalSpace = 0x205F,
ogham = 0x1680,

// Unicode replacement character produced when a byte sequence is invalid
replacementCharacter = 0xFFFD,

_ = 0x5F,
$ = 0x24,

Expand Down
7 changes: 7 additions & 0 deletions tests/baselines/reference/TransportStream.errors.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
TransportStream.ts(1,1): error TS1490: File appears to be binary.


==== TransportStream.ts (1 errors) ====
G@�G@�G@�

!!! error TS1490: File appears to be binary.
6 changes: 6 additions & 0 deletions tests/baselines/reference/TransportStream.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//// [tests/cases/compiler/TransportStream.ts] ////

//// [TransportStream.ts]
G@G@G@

//// [TransportStream.js]
5 changes: 5 additions & 0 deletions tests/baselines/reference/TransportStream.symbols
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//// [tests/cases/compiler/TransportStream.ts] ////

=== TransportStream.ts ===

G@�G@�G@�
5 changes: 5 additions & 0 deletions tests/baselines/reference/TransportStream.types
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//// [tests/cases/compiler/TransportStream.ts] ////

=== TransportStream.ts ===

G@�G@�G@�
1 change: 1 addition & 0 deletions tests/cases/compiler/TransportStream.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
䝀ҒЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄЄ䝀֒ԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅԅ䝀ڒ؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆؆

0 comments on commit ad2f465

Please sign in to comment.