-
Notifications
You must be signed in to change notification settings - Fork 10.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Attempt to find truncated endstream commands, in the fallback code-path, in Parser.makeStream
(issue 10004)
#10010
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,8 +18,8 @@ import { | |
PredictorStream, RunLengthStream | ||
} from './stream'; | ||
import { | ||
assert, FormatError, info, isNum, isSpace, isString, MissingDataException, | ||
StreamType, warn | ||
assert, bytesToString, FormatError, info, isNum, isSpace, isString, | ||
MissingDataException, StreamType, warn | ||
} from '../shared/util'; | ||
import { | ||
Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref | ||
|
@@ -471,13 +471,45 @@ var Parser = (function ParserClosure() { | |
|
||
return imageStream; | ||
}, | ||
|
||
_findStreamLength(startPos, signature) { | ||
const { stream, } = this.lexer; | ||
stream.pos = startPos; | ||
|
||
const SCAN_BLOCK_LENGTH = 2048; | ||
const signatureLength = signature.length; | ||
|
||
while (stream.pos < stream.end) { | ||
const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH); | ||
const scanLength = scanBytes.length - signatureLength; | ||
|
||
if (scanLength <= 0) { | ||
break; | ||
} | ||
let pos = 0; | ||
while (pos < scanLength) { | ||
let j = 0; | ||
while (j < signatureLength && scanBytes[pos + j] === signature[j]) { | ||
j++; | ||
} | ||
if (j >= signatureLength) { // `signature` found. | ||
stream.pos += pos; | ||
return (stream.pos - startPos); | ||
} | ||
pos++; | ||
} | ||
stream.pos += scanLength; | ||
} | ||
return -1; | ||
}, | ||
|
||
makeStream: function Parser_makeStream(dict, cipherTransform) { | ||
var lexer = this.lexer; | ||
var stream = lexer.stream; | ||
|
||
// get stream start position | ||
lexer.skipToNextLine(); | ||
var pos = stream.pos - 1; | ||
const startPos = stream.pos - 1; | ||
|
||
// get length | ||
var length = dict.get('Length'); | ||
|
@@ -487,60 +519,57 @@ var Parser = (function ParserClosure() { | |
} | ||
|
||
// skip over the stream data | ||
stream.pos = pos + length; | ||
stream.pos = startPos + length; | ||
lexer.nextChar(); | ||
|
||
// Shift '>>' and check whether the new object marks the end of the stream | ||
if (this.tryShift() && isCmd(this.buf2, 'endstream')) { | ||
this.shift(); // 'stream' | ||
} else { | ||
// bad stream length, scanning for endstream | ||
stream.pos = pos; | ||
var SCAN_BLOCK_SIZE = 2048; | ||
var ENDSTREAM_SIGNATURE_LENGTH = 9; | ||
var ENDSTREAM_SIGNATURE = [0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65, | ||
0x61, 0x6D]; | ||
var skipped = 0, found = false, i, j; | ||
while (stream.pos < stream.end) { | ||
var scanBytes = stream.peekBytes(SCAN_BLOCK_SIZE); | ||
var scanLength = scanBytes.length - ENDSTREAM_SIGNATURE_LENGTH; | ||
if (scanLength <= 0) { | ||
break; | ||
} | ||
found = false; | ||
i = 0; | ||
while (i < scanLength) { | ||
j = 0; | ||
while (j < ENDSTREAM_SIGNATURE_LENGTH && | ||
scanBytes[i + j] === ENDSTREAM_SIGNATURE[j]) { | ||
j++; | ||
} | ||
if (j >= ENDSTREAM_SIGNATURE_LENGTH) { | ||
found = true; | ||
// Bad stream length, scanning for endstream command. | ||
const ENDSTREAM_SIGNATURE = new Uint8Array([ | ||
0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6D]); | ||
let actualLength = this._findStreamLength(startPos, | ||
ENDSTREAM_SIGNATURE); | ||
if (actualLength < 0) { | ||
// Only allow limited truncation of the endstream signature, | ||
// to prevent false positives. | ||
const MAX_TRUNCATION = 1; | ||
// Check if the PDF generator included truncated endstream commands, | ||
// such as e.g. "endstrea" (fixes issue10004.pdf). | ||
for (let i = 1; i <= MAX_TRUNCATION; i++) { | ||
const end = ENDSTREAM_SIGNATURE.length - i; | ||
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end); | ||
|
||
let maybeLength = this._findStreamLength(startPos, | ||
TRUNCATED_SIGNATURE); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like one space should be removed to make this align. |
||
if (maybeLength >= 0) { | ||
// Ensure that the byte immediately following the truncated | ||
// endstream command is a space, to prevent false positives. | ||
const lastByte = stream.peekBytes(end + 1)[end]; | ||
if (!isSpace(lastByte)) { | ||
break; | ||
} | ||
info(`Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` + | ||
'searching for endstream command.'); | ||
actualLength = maybeLength; | ||
break; | ||
} | ||
i++; | ||
} | ||
if (found) { | ||
skipped += i; | ||
stream.pos += i; | ||
break; | ||
|
||
if (actualLength < 0) { | ||
throw new FormatError('Missing endstream command.'); | ||
} | ||
skipped += scanLength; | ||
stream.pos += scanLength; | ||
} | ||
if (!found) { | ||
throw new FormatError('Missing endstream'); | ||
} | ||
length = skipped; | ||
length = actualLength; | ||
|
||
lexer.nextChar(); | ||
this.shift(); | ||
this.shift(); | ||
} | ||
this.shift(); // 'endstream' | ||
|
||
stream = stream.makeSubStream(pos, length, dict); | ||
stream = stream.makeSubStream(startPos, length, dict); | ||
if (cipherTransform) { | ||
stream = cipherTransform.createStream(stream, length); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
https://github.com/mozilla/pdf.js/files/2315390/2371410.pdf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like one space should be removed to make this align.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Strange, it looks correctly aligned to my eye; both here in the diff and in the file
pdf.js/src/core/parser.js
Lines 532 to 533 in 95e5bad
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's really weird. Here above the
ENDSTREAM_SIGNATURE
is one space ahead ofstartPos
, but that is not the case only if I open the raw file. Somehow this is just GitHub messing up the diff rendering; sorry for the noise!