From ec3728b54018d3e74086c67d10c2b3d53bc8c699 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 25 Jul 2018 20:50:25 +0200 Subject: [PATCH] Use the `Linearization` dictionary, if it exists, when fetching the first Page Since PDF.js already supports range requests and streaming, not to mention chunked rendering, attempting to use the `Linearization` dictionary in `PDFDocument.getPage` probably isn't going to improve performance in any noticeable way. Nonetheless, when `Linearization` data is available, it will allow looking up the first Page *directly* without having to descend into the `Pages` tree to find the correct object. --- src/core/document.js | 61 +++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 5fdf83d074504..6367d89b47408 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -13,12 +13,13 @@ * limitations under the License. */ -import { Catalog, ObjectLoader, XRef } from './obj'; -import { Dict, isDict, isName, isStream } from './primitives'; import { - getInheritableProperty, info, isArrayBuffer, isNum, isSpace, isString, - MissingDataException, OPS, shadow, stringToBytes, stringToPDFString, Util + assert, FormatError, getInheritableProperty, info, isArrayBuffer, isNum, + isSpace, isString, MissingDataException, OPS, shadow, stringToBytes, + stringToPDFString, Util } from '../shared/util'; +import { Catalog, ObjectLoader, XRef } from './obj'; +import { Dict, isDict, isName, isStream, Ref } from './primitives'; import { NullStream, Stream, StreamsSequenceStream } from './stream'; import { AnnotationFactory } from './annotation'; import { calculateMD5 } from './crypto'; @@ -586,25 +587,49 @@ var PDFDocument = (function PDFDocumentClosure() { return shadow(this, 'fingerprint', fileID); }, + _getLinearizationPage(pageIndex) { + const { catalog, linearization, } = this; + assert(linearization && linearization.pageFirst === pageIndex); + + const ref = new Ref(linearization.objectNumberFirst, 0); + return this.xref.fetchAsync(ref).then((obj) => { + // Ensure that the object that was found is actually a Page dictionary. + if (isDict(obj, 'Page') || + (isDict(obj) && !obj.has('Type') && obj.has('Contents'))) { + if (ref && !catalog.pageKidsCountCache.has(ref)) { + catalog.pageKidsCountCache.put(ref, 1); // Cache the Page reference. + } + return [obj, ref]; + } + throw new FormatError('The Linearization dictionary doesn\'t point ' + + 'to a valid Page dictionary.'); + }).catch((reason) => { + info(reason); + return catalog.getPageDict(pageIndex); + }); + }, + getPage(pageIndex) { if (this._pagePromises[pageIndex] !== undefined) { return this._pagePromises[pageIndex]; } - const catalog = this.catalog; - - return this._pagePromises[pageIndex] = - catalog.getPageDict(pageIndex).then(([pageDict, ref]) => { - return new Page({ - pdfManager: this.pdfManager, - xref: this.xref, - pageIndex, - pageDict, - ref, - fontCache: catalog.fontCache, - builtInCMapCache: catalog.builtInCMapCache, - pdfFunctionFactory: this.pdfFunctionFactory, - }); + const { catalog, linearization, } = this; + + const promise = (linearization && linearization.pageFirst === pageIndex) ? + this._getLinearizationPage(pageIndex) : catalog.getPageDict(pageIndex); + + return this._pagePromises[pageIndex] = promise.then(([pageDict, ref]) => { + return new Page({ + pdfManager: this.pdfManager, + xref: this.xref, + pageIndex, + pageDict, + ref, + fontCache: catalog.fontCache, + builtInCMapCache: catalog.builtInCMapCache, + pdfFunctionFactory: this.pdfFunctionFactory, }); + }); }, cleanup: function PDFDocument_cleanup() {