Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt to find truncated endstream commands, in the fallback code-path, in Parser.makeStream (issue 10004) #10010

Merged
merged 2 commits into from
Sep 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 68 additions & 39 deletions src/core/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import {
PredictorStream, RunLengthStream
} from './stream';
import {
assert, FormatError, info, isNum, isSpace, isString, MissingDataException,
StreamType, warn
assert, bytesToString, FormatError, info, isNum, isSpace, isString,
MissingDataException, StreamType, warn
} from '../shared/util';
import {
Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref
Expand Down Expand Up @@ -471,13 +471,45 @@ var Parser = (function ParserClosure() {

return imageStream;
},

_findStreamLength(startPos, signature) {
const { stream, } = this.lexer;
stream.pos = startPos;

const SCAN_BLOCK_LENGTH = 2048;
const signatureLength = signature.length;

while (stream.pos < stream.end) {
const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
const scanLength = scanBytes.length - signatureLength;

if (scanLength <= 0) {
break;
}
let pos = 0;
while (pos < scanLength) {
let j = 0;
while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
j++;
}
if (j >= signatureLength) { // `signature` found.
stream.pos += pos;
return (stream.pos - startPos);
}
pos++;
}
stream.pos += scanLength;
}
return -1;
},

makeStream: function Parser_makeStream(dict, cipherTransform) {
var lexer = this.lexer;
var stream = lexer.stream;

// get stream start position
lexer.skipToNextLine();
var pos = stream.pos - 1;
const startPos = stream.pos - 1;

// get length
var length = dict.get('Length');
Expand All @@ -487,60 +519,57 @@ var Parser = (function ParserClosure() {
}

// skip over the stream data
stream.pos = pos + length;
stream.pos = startPos + length;
lexer.nextChar();

// Shift '>>' and check whether the new object marks the end of the stream
if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
this.shift(); // 'stream'
} else {
// bad stream length, scanning for endstream
stream.pos = pos;
var SCAN_BLOCK_SIZE = 2048;
var ENDSTREAM_SIGNATURE_LENGTH = 9;
var ENDSTREAM_SIGNATURE = [0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65,
0x61, 0x6D];
var skipped = 0, found = false, i, j;
while (stream.pos < stream.end) {
var scanBytes = stream.peekBytes(SCAN_BLOCK_SIZE);
var scanLength = scanBytes.length - ENDSTREAM_SIGNATURE_LENGTH;
if (scanLength <= 0) {
break;
}
found = false;
i = 0;
while (i < scanLength) {
j = 0;
while (j < ENDSTREAM_SIGNATURE_LENGTH &&
scanBytes[i + j] === ENDSTREAM_SIGNATURE[j]) {
j++;
}
if (j >= ENDSTREAM_SIGNATURE_LENGTH) {
found = true;
// Bad stream length, scanning for endstream command.
const ENDSTREAM_SIGNATURE = new Uint8Array([
0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6D]);
let actualLength = this._findStreamLength(startPos,
ENDSTREAM_SIGNATURE);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like one space should be removed to make this align.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strange, it looks correctly aligned to my eye; both here in the diff and in the file

pdf.js/src/core/parser.js

Lines 532 to 533 in 95e5bad

let actualLength = this._findStreamLength(startPos,
ENDSTREAM_SIGNATURE);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's really weird. Here above the ENDSTREAM_SIGNATURE is one space ahead of startPos, but that is not the case only if I open the raw file. Somehow this is just GitHub messing up the diff rendering; sorry for the noise!

if (actualLength < 0) {
// Only allow limited truncation of the endstream signature,
// to prevent false positives.
const MAX_TRUNCATION = 1;
// Check if the PDF generator included truncated endstream commands,
// such as e.g. "endstrea" (fixes issue10004.pdf).
for (let i = 1; i <= MAX_TRUNCATION; i++) {
const end = ENDSTREAM_SIGNATURE.length - i;
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);

let maybeLength = this._findStreamLength(startPos,
TRUNCATED_SIGNATURE);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like one space should be removed to make this align.

if (maybeLength >= 0) {
// Ensure that the byte immediately following the truncated
// endstream command is a space, to prevent false positives.
const lastByte = stream.peekBytes(end + 1)[end];
if (!isSpace(lastByte)) {
break;
}
info(`Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` +
'searching for endstream command.');
actualLength = maybeLength;
break;
}
i++;
}
if (found) {
skipped += i;
stream.pos += i;
break;

if (actualLength < 0) {
throw new FormatError('Missing endstream command.');
}
skipped += scanLength;
stream.pos += scanLength;
}
if (!found) {
throw new FormatError('Missing endstream');
}
length = skipped;
length = actualLength;

lexer.nextChar();
this.shift();
this.shift();
}
this.shift(); // 'endstream'

stream = stream.makeSubStream(pos, length, dict);
stream = stream.makeSubStream(startPos, length, dict);
if (cipherTransform) {
stream = cipherTransform.createStream(stream, length);
}
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/issue10004.pdf.link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/2315390/2371410.pdf
7 changes: 7 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,13 @@
"link": false,
"type": "load"
},
{ "id": "issue10004",
"file": "pdfs/issue10004.pdf",
"md5": "64d1853060cefe3be50e5c4617dd0505",
"rounds": 1,
"link": true,
"type": "load"
},
{ "id": "issue7507",
"file": "pdfs/issue7507.pdf",
"md5": "f7aeaafe0c89b94436e94eaa63307303",
Expand Down