From 604cd6d6003e42665cd8fa710c42395c394e7d30 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:25:34 +0200 Subject: [PATCH 01/10] Move the `ObjectLoader` from `src/core/obj.js` and into its own file The size of the `src/core/obj.js` file has increased slowly over the years, and it also contains a fair amount of *distinct* functionality. In order to improve readability and make it easier to navigate through the code, this patch moves the `ObjectLoader` into its own file. --- src/core/annotation.js | 3 +- src/core/document.js | 3 +- src/core/obj.js | 139 +-------------------------------- src/core/object_loader.js | 157 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 162 insertions(+), 140 deletions(-) create mode 100644 src/core/object_loader.js diff --git a/src/core/annotation.js b/src/core/annotation.js index f3c670ea03d49..3d6afebfb4c30 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -34,7 +34,7 @@ import { Util, warn, } from "../shared/util.js"; -import { Catalog, FileSpec, ObjectLoader } from "./obj.js"; +import { Catalog, FileSpec } from "./obj.js"; import { collectActions, getInheritableProperty } from "./core_utils.js"; import { createDefaultAppearance, @@ -50,6 +50,7 @@ import { RefSet, } from "./primitives.js"; import { ColorSpace } from "./colorspace.js"; +import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; import { StringStream } from "./stream.js"; import { writeDict } from "./writer.js"; diff --git a/src/core/document.js b/src/core/document.js index 4db6248bef7f2..c22395f3ae922 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -34,7 +34,7 @@ import { Util, warn, } from "../shared/util.js"; -import { Catalog, ObjectLoader, XRef } from "./obj.js"; +import { Catalog, XRef } from "./obj.js"; import { clearPrimitiveCaches, Dict, @@ -56,6 +56,7 @@ import { NullStream, Stream, StreamsSequenceStream } from "./stream.js"; import { AnnotationFactory } from "./annotation.js"; import { calculateMD5 } from "./crypto.js"; import { Linearization } from "./parser.js"; +import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; import { PartialEvaluator } from "./evaluator.js"; import { StructTreePage } from "./struct_tree.js"; diff --git a/src/core/obj.js b/src/core/obj.js index c5b2c7b829a87..49e2bb3538781 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -2512,141 +2512,4 @@ var FileSpec = (function FileSpecClosure() { return FileSpec; })(); -/** - * A helper for loading missing data in `Dict` graphs. It traverses the graph - * depth first and queues up any objects that have missing data. Once it has - * has traversed as many objects that are available it attempts to bundle the - * missing data requests and then resume from the nodes that weren't ready. - * - * NOTE: It provides protection from circular references by keeping track of - * loaded references. However, you must be careful not to load any graphs - * that have references to the catalog or other pages since that will cause the - * entire PDF document object graph to be traversed. - */ -const ObjectLoader = (function () { - function mayHaveChildren(value) { - return ( - value instanceof Ref || - value instanceof Dict || - Array.isArray(value) || - isStream(value) - ); - } - - function addChildren(node, nodesToVisit) { - if (node instanceof Dict) { - node = node.getRawValues(); - } else if (isStream(node)) { - node = node.dict.getRawValues(); - } else if (!Array.isArray(node)) { - return; - } - for (const rawValue of node) { - if (mayHaveChildren(rawValue)) { - nodesToVisit.push(rawValue); - } - } - } - - // eslint-disable-next-line no-shadow - function ObjectLoader(dict, keys, xref) { - this.dict = dict; - this.keys = keys; - this.xref = xref; - this.refSet = null; - } - - ObjectLoader.prototype = { - async load() { - // Don't walk the graph if all the data is already loaded; note that only - // `ChunkedStream` instances have a `allChunksLoaded` method. - if ( - !this.xref.stream.allChunksLoaded || - this.xref.stream.allChunksLoaded() - ) { - return undefined; - } - - const { keys, dict } = this; - this.refSet = new RefSet(); - // Setup the initial nodes to visit. - const nodesToVisit = []; - for (let i = 0, ii = keys.length; i < ii; i++) { - const rawValue = dict.getRaw(keys[i]); - // Skip nodes that are guaranteed to be empty. - if (rawValue !== undefined) { - nodesToVisit.push(rawValue); - } - } - return this._walk(nodesToVisit); - }, - - async _walk(nodesToVisit) { - const nodesToRevisit = []; - const pendingRequests = []; - // DFS walk of the object graph. - while (nodesToVisit.length) { - let currentNode = nodesToVisit.pop(); - - // Only references or chunked streams can cause missing data exceptions. - if (currentNode instanceof Ref) { - // Skip nodes that have already been visited. - if (this.refSet.has(currentNode)) { - continue; - } - try { - this.refSet.put(currentNode); - currentNode = this.xref.fetch(currentNode); - } catch (ex) { - if (!(ex instanceof MissingDataException)) { - warn(`ObjectLoader._walk - requesting all data: "${ex}".`); - this.refSet = null; - - const { manager } = this.xref.stream; - return manager.requestAllChunks(); - } - nodesToRevisit.push(currentNode); - pendingRequests.push({ begin: ex.begin, end: ex.end }); - } - } - if (currentNode && currentNode.getBaseStreams) { - const baseStreams = currentNode.getBaseStreams(); - let foundMissingData = false; - for (let i = 0, ii = baseStreams.length; i < ii; i++) { - const stream = baseStreams[i]; - if (stream.allChunksLoaded && !stream.allChunksLoaded()) { - foundMissingData = true; - pendingRequests.push({ begin: stream.start, end: stream.end }); - } - } - if (foundMissingData) { - nodesToRevisit.push(currentNode); - } - } - - addChildren(currentNode, nodesToVisit); - } - - if (pendingRequests.length) { - await this.xref.stream.manager.requestRanges(pendingRequests); - - for (let i = 0, ii = nodesToRevisit.length; i < ii; i++) { - const node = nodesToRevisit[i]; - // Remove any reference nodes from the current `RefSet` so they - // aren't skipped when we revist them. - if (node instanceof Ref) { - this.refSet.remove(node); - } - } - return this._walk(nodesToRevisit); - } - // Everything is loaded. - this.refSet = null; - return undefined; - }, - }; - - return ObjectLoader; -})(); - -export { Catalog, FileSpec, NumberTree, ObjectLoader, XRef }; +export { Catalog, FileSpec, NumberTree, XRef }; diff --git a/src/core/object_loader.js b/src/core/object_loader.js new file mode 100644 index 0000000000000..f2bff6ac07ec6 --- /dev/null +++ b/src/core/object_loader.js @@ -0,0 +1,157 @@ +/* Copyright 2021 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Dict, isStream, Ref, RefSet } from "./primitives.js"; +import { MissingDataException } from "./core_utils.js"; +import { warn } from "../shared/util.js"; + +/** + * A helper for loading missing data in `Dict` graphs. It traverses the graph + * depth first and queues up any objects that have missing data. Once it has + * has traversed as many objects that are available it attempts to bundle the + * missing data requests and then resume from the nodes that weren't ready. + * + * NOTE: It provides protection from circular references by keeping track of + * loaded references. However, you must be careful not to load any graphs + * that have references to the catalog or other pages since that will cause the + * entire PDF document object graph to be traversed. + */ +const ObjectLoader = (function () { + function mayHaveChildren(value) { + return ( + value instanceof Ref || + value instanceof Dict || + Array.isArray(value) || + isStream(value) + ); + } + + function addChildren(node, nodesToVisit) { + if (node instanceof Dict) { + node = node.getRawValues(); + } else if (isStream(node)) { + node = node.dict.getRawValues(); + } else if (!Array.isArray(node)) { + return; + } + for (const rawValue of node) { + if (mayHaveChildren(rawValue)) { + nodesToVisit.push(rawValue); + } + } + } + + // eslint-disable-next-line no-shadow + function ObjectLoader(dict, keys, xref) { + this.dict = dict; + this.keys = keys; + this.xref = xref; + this.refSet = null; + } + + ObjectLoader.prototype = { + async load() { + // Don't walk the graph if all the data is already loaded; note that only + // `ChunkedStream` instances have a `allChunksLoaded` method. + if ( + !this.xref.stream.allChunksLoaded || + this.xref.stream.allChunksLoaded() + ) { + return undefined; + } + + const { keys, dict } = this; + this.refSet = new RefSet(); + // Setup the initial nodes to visit. + const nodesToVisit = []; + for (let i = 0, ii = keys.length; i < ii; i++) { + const rawValue = dict.getRaw(keys[i]); + // Skip nodes that are guaranteed to be empty. + if (rawValue !== undefined) { + nodesToVisit.push(rawValue); + } + } + return this._walk(nodesToVisit); + }, + + async _walk(nodesToVisit) { + const nodesToRevisit = []; + const pendingRequests = []; + // DFS walk of the object graph. + while (nodesToVisit.length) { + let currentNode = nodesToVisit.pop(); + + // Only references or chunked streams can cause missing data exceptions. + if (currentNode instanceof Ref) { + // Skip nodes that have already been visited. + if (this.refSet.has(currentNode)) { + continue; + } + try { + this.refSet.put(currentNode); + currentNode = this.xref.fetch(currentNode); + } catch (ex) { + if (!(ex instanceof MissingDataException)) { + warn(`ObjectLoader._walk - requesting all data: "${ex}".`); + this.refSet = null; + + const { manager } = this.xref.stream; + return manager.requestAllChunks(); + } + nodesToRevisit.push(currentNode); + pendingRequests.push({ begin: ex.begin, end: ex.end }); + } + } + if (currentNode && currentNode.getBaseStreams) { + const baseStreams = currentNode.getBaseStreams(); + let foundMissingData = false; + for (let i = 0, ii = baseStreams.length; i < ii; i++) { + const stream = baseStreams[i]; + if (stream.allChunksLoaded && !stream.allChunksLoaded()) { + foundMissingData = true; + pendingRequests.push({ begin: stream.start, end: stream.end }); + } + } + if (foundMissingData) { + nodesToRevisit.push(currentNode); + } + } + + addChildren(currentNode, nodesToVisit); + } + + if (pendingRequests.length) { + await this.xref.stream.manager.requestRanges(pendingRequests); + + for (let i = 0, ii = nodesToRevisit.length; i < ii; i++) { + const node = nodesToRevisit[i]; + // Remove any reference nodes from the current `RefSet` so they + // aren't skipped when we revist them. + if (node instanceof Ref) { + this.refSet.remove(node); + } + } + return this._walk(nodesToRevisit); + } + // Everything is loaded. + this.refSet = null; + return undefined; + }, + }; + + return ObjectLoader; +})(); + +export { ObjectLoader }; From 6a935682fd6f81795e5fa8d855d5bde49d8ccb5c Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:25:42 +0200 Subject: [PATCH 02/10] Covert the `ObjectLoader` to a "normal" class --- src/core/object_loader.js | 205 +++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 105 deletions(-) diff --git a/src/core/object_loader.js b/src/core/object_loader.js index f2bff6ac07ec6..f36016e6eb84f 100644 --- a/src/core/object_loader.js +++ b/src/core/object_loader.js @@ -17,6 +17,30 @@ import { Dict, isStream, Ref, RefSet } from "./primitives.js"; import { MissingDataException } from "./core_utils.js"; import { warn } from "../shared/util.js"; +function mayHaveChildren(value) { + return ( + value instanceof Ref || + value instanceof Dict || + Array.isArray(value) || + isStream(value) + ); +} + +function addChildren(node, nodesToVisit) { + if (node instanceof Dict) { + node = node.getRawValues(); + } else if (isStream(node)) { + node = node.dict.getRawValues(); + } else if (!Array.isArray(node)) { + return; + } + for (const rawValue of node) { + if (mayHaveChildren(rawValue)) { + nodesToVisit.push(rawValue); + } + } +} + /** * A helper for loading missing data in `Dict` graphs. It traverses the graph * depth first and queues up any objects that have missing data. Once it has @@ -28,130 +52,101 @@ import { warn } from "../shared/util.js"; * that have references to the catalog or other pages since that will cause the * entire PDF document object graph to be traversed. */ -const ObjectLoader = (function () { - function mayHaveChildren(value) { - return ( - value instanceof Ref || - value instanceof Dict || - Array.isArray(value) || - isStream(value) - ); - } - - function addChildren(node, nodesToVisit) { - if (node instanceof Dict) { - node = node.getRawValues(); - } else if (isStream(node)) { - node = node.dict.getRawValues(); - } else if (!Array.isArray(node)) { - return; - } - for (const rawValue of node) { - if (mayHaveChildren(rawValue)) { - nodesToVisit.push(rawValue); - } - } - } - - // eslint-disable-next-line no-shadow - function ObjectLoader(dict, keys, xref) { +class ObjectLoader { + constructor(dict, keys, xref) { this.dict = dict; this.keys = keys; this.xref = xref; this.refSet = null; } - ObjectLoader.prototype = { - async load() { - // Don't walk the graph if all the data is already loaded; note that only - // `ChunkedStream` instances have a `allChunksLoaded` method. - if ( - !this.xref.stream.allChunksLoaded || - this.xref.stream.allChunksLoaded() - ) { - return undefined; - } + async load() { + // Don't walk the graph if all the data is already loaded; note that only + // `ChunkedStream` instances have a `allChunksLoaded` method. + if ( + !this.xref.stream.allChunksLoaded || + this.xref.stream.allChunksLoaded() + ) { + return undefined; + } - const { keys, dict } = this; - this.refSet = new RefSet(); - // Setup the initial nodes to visit. - const nodesToVisit = []; - for (let i = 0, ii = keys.length; i < ii; i++) { - const rawValue = dict.getRaw(keys[i]); - // Skip nodes that are guaranteed to be empty. - if (rawValue !== undefined) { - nodesToVisit.push(rawValue); - } + const { keys, dict } = this; + this.refSet = new RefSet(); + // Setup the initial nodes to visit. + const nodesToVisit = []; + for (let i = 0, ii = keys.length; i < ii; i++) { + const rawValue = dict.getRaw(keys[i]); + // Skip nodes that are guaranteed to be empty. + if (rawValue !== undefined) { + nodesToVisit.push(rawValue); } - return this._walk(nodesToVisit); - }, + } + return this._walk(nodesToVisit); + } - async _walk(nodesToVisit) { - const nodesToRevisit = []; - const pendingRequests = []; - // DFS walk of the object graph. - while (nodesToVisit.length) { - let currentNode = nodesToVisit.pop(); + async _walk(nodesToVisit) { + const nodesToRevisit = []; + const pendingRequests = []; + // DFS walk of the object graph. + while (nodesToVisit.length) { + let currentNode = nodesToVisit.pop(); - // Only references or chunked streams can cause missing data exceptions. - if (currentNode instanceof Ref) { - // Skip nodes that have already been visited. - if (this.refSet.has(currentNode)) { - continue; - } - try { - this.refSet.put(currentNode); - currentNode = this.xref.fetch(currentNode); - } catch (ex) { - if (!(ex instanceof MissingDataException)) { - warn(`ObjectLoader._walk - requesting all data: "${ex}".`); - this.refSet = null; + // Only references or chunked streams can cause missing data exceptions. + if (currentNode instanceof Ref) { + // Skip nodes that have already been visited. + if (this.refSet.has(currentNode)) { + continue; + } + try { + this.refSet.put(currentNode); + currentNode = this.xref.fetch(currentNode); + } catch (ex) { + if (!(ex instanceof MissingDataException)) { + warn(`ObjectLoader._walk - requesting all data: "${ex}".`); + this.refSet = null; - const { manager } = this.xref.stream; - return manager.requestAllChunks(); - } - nodesToRevisit.push(currentNode); - pendingRequests.push({ begin: ex.begin, end: ex.end }); + const { manager } = this.xref.stream; + return manager.requestAllChunks(); } + nodesToRevisit.push(currentNode); + pendingRequests.push({ begin: ex.begin, end: ex.end }); } - if (currentNode && currentNode.getBaseStreams) { - const baseStreams = currentNode.getBaseStreams(); - let foundMissingData = false; - for (let i = 0, ii = baseStreams.length; i < ii; i++) { - const stream = baseStreams[i]; - if (stream.allChunksLoaded && !stream.allChunksLoaded()) { - foundMissingData = true; - pendingRequests.push({ begin: stream.start, end: stream.end }); - } - } - if (foundMissingData) { - nodesToRevisit.push(currentNode); + } + if (currentNode && currentNode.getBaseStreams) { + const baseStreams = currentNode.getBaseStreams(); + let foundMissingData = false; + for (let i = 0, ii = baseStreams.length; i < ii; i++) { + const stream = baseStreams[i]; + if (stream.allChunksLoaded && !stream.allChunksLoaded()) { + foundMissingData = true; + pendingRequests.push({ begin: stream.start, end: stream.end }); } } - - addChildren(currentNode, nodesToVisit); + if (foundMissingData) { + nodesToRevisit.push(currentNode); + } } - if (pendingRequests.length) { - await this.xref.stream.manager.requestRanges(pendingRequests); + addChildren(currentNode, nodesToVisit); + } - for (let i = 0, ii = nodesToRevisit.length; i < ii; i++) { - const node = nodesToRevisit[i]; - // Remove any reference nodes from the current `RefSet` so they - // aren't skipped when we revist them. - if (node instanceof Ref) { - this.refSet.remove(node); - } + if (pendingRequests.length) { + await this.xref.stream.manager.requestRanges(pendingRequests); + + for (let i = 0, ii = nodesToRevisit.length; i < ii; i++) { + const node = nodesToRevisit[i]; + // Remove any reference nodes from the current `RefSet` so they + // aren't skipped when we revist them. + if (node instanceof Ref) { + this.refSet.remove(node); } - return this._walk(nodesToRevisit); } - // Everything is loaded. - this.refSet = null; - return undefined; - }, - }; - - return ObjectLoader; -})(); + return this._walk(nodesToRevisit); + } + // Everything is loaded. + this.refSet = null; + return undefined; + } +} export { ObjectLoader }; From e02d17da935fb83c404721059ecf2557f93e3daa Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:25:48 +0200 Subject: [PATCH 03/10] Move the `FileSpec` from `src/core/obj.js` and into its own file The size of the `src/core/obj.js` file has increased slowly over the years, and it also contains a fair amount of *distinct* functionality. In order to improve readability and make it easier to navigate through the code, this patch moves the `FileSpec` into its own file. --- src/core/annotation.js | 3 +- src/core/file_spec.js | 113 +++++++++++++++++++++++++++++++++++++++++ src/core/obj.js | 96 +--------------------------------- 3 files changed, 117 insertions(+), 95 deletions(-) create mode 100644 src/core/file_spec.js diff --git a/src/core/annotation.js b/src/core/annotation.js index 3d6afebfb4c30..c30fdaebada41 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -34,7 +34,6 @@ import { Util, warn, } from "../shared/util.js"; -import { Catalog, FileSpec } from "./obj.js"; import { collectActions, getInheritableProperty } from "./core_utils.js"; import { createDefaultAppearance, @@ -49,7 +48,9 @@ import { Name, RefSet, } from "./primitives.js"; +import { Catalog } from "./obj.js"; import { ColorSpace } from "./colorspace.js"; +import { FileSpec } from "./file_spec.js"; import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; import { StringStream } from "./stream.js"; diff --git a/src/core/file_spec.js b/src/core/file_spec.js new file mode 100644 index 0000000000000..9beee0a6d748f --- /dev/null +++ b/src/core/file_spec.js @@ -0,0 +1,113 @@ +/* Copyright 2021 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* eslint-disable no-var */ + +import { isDict, isStream } from "./primitives.js"; +import { stringToPDFString, warn } from "../shared/util.js"; + +/** + * "A PDF file can refer to the contents of another file by using a File + * Specification (PDF 1.1)", see the spec (7.11) for more details. + * NOTE: Only embedded files are supported (as part of the attachments support) + * TODO: support the 'URL' file system (with caching if !/V), portable + * collections attributes and related files (/RF) + */ +var FileSpec = (function FileSpecClosure() { + // eslint-disable-next-line no-shadow + function FileSpec(root, xref) { + if (!root || !isDict(root)) { + return; + } + this.xref = xref; + this.root = root; + if (root.has("FS")) { + this.fs = root.get("FS"); + } + this.description = root.has("Desc") + ? stringToPDFString(root.get("Desc")) + : ""; + if (root.has("RF")) { + warn("Related file specifications are not supported"); + } + this.contentAvailable = true; + if (!root.has("EF")) { + this.contentAvailable = false; + warn("Non-embedded file specifications are not supported"); + } + } + + function pickPlatformItem(dict) { + // Look for the filename in this order: + // UF, F, Unix, Mac, DOS + if (dict.has("UF")) { + return dict.get("UF"); + } else if (dict.has("F")) { + return dict.get("F"); + } else if (dict.has("Unix")) { + return dict.get("Unix"); + } else if (dict.has("Mac")) { + return dict.get("Mac"); + } else if (dict.has("DOS")) { + return dict.get("DOS"); + } + return null; + } + + FileSpec.prototype = { + get filename() { + if (!this._filename && this.root) { + var filename = pickPlatformItem(this.root) || "unnamed"; + this._filename = stringToPDFString(filename) + .replace(/\\\\/g, "\\") + .replace(/\\\//g, "/") + .replace(/\\/g, "/"); + } + return this._filename; + }, + get content() { + if (!this.contentAvailable) { + return null; + } + if (!this.contentRef && this.root) { + this.contentRef = pickPlatformItem(this.root.get("EF")); + } + var content = null; + if (this.contentRef) { + var xref = this.xref; + var fileObj = xref.fetchIfRef(this.contentRef); + if (fileObj && isStream(fileObj)) { + content = fileObj.getBytes(); + } else { + warn( + "Embedded file specification points to non-existing/invalid " + + "content" + ); + } + } else { + warn("Embedded file specification does not have a content"); + } + return content; + }, + get serializable() { + return { + filename: this.filename, + content: this.content, + }; + }, + }; + return FileSpec; +})(); + +export { FileSpec }; diff --git a/src/core/obj.js b/src/core/obj.js index 49e2bb3538781..ea9e7d0e3d5cc 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -58,6 +58,7 @@ import { import { Lexer, Parser } from "./parser.js"; import { CipherTransformFactory } from "./crypto.js"; import { ColorSpace } from "./colorspace.js"; +import { FileSpec } from "./file_spec.js"; import { GlobalImageCache } from "./image_utils.js"; import { MetadataParser } from "./metadata_parser.js"; import { StructTreeRoot } from "./struct_tree.js"; @@ -2419,97 +2420,4 @@ class NumberTree extends NameOrNumberTree { } } -/** - * "A PDF file can refer to the contents of another file by using a File - * Specification (PDF 1.1)", see the spec (7.11) for more details. - * NOTE: Only embedded files are supported (as part of the attachments support) - * TODO: support the 'URL' file system (with caching if !/V), portable - * collections attributes and related files (/RF) - */ -var FileSpec = (function FileSpecClosure() { - // eslint-disable-next-line no-shadow - function FileSpec(root, xref) { - if (!root || !isDict(root)) { - return; - } - this.xref = xref; - this.root = root; - if (root.has("FS")) { - this.fs = root.get("FS"); - } - this.description = root.has("Desc") - ? stringToPDFString(root.get("Desc")) - : ""; - if (root.has("RF")) { - warn("Related file specifications are not supported"); - } - this.contentAvailable = true; - if (!root.has("EF")) { - this.contentAvailable = false; - warn("Non-embedded file specifications are not supported"); - } - } - - function pickPlatformItem(dict) { - // Look for the filename in this order: - // UF, F, Unix, Mac, DOS - if (dict.has("UF")) { - return dict.get("UF"); - } else if (dict.has("F")) { - return dict.get("F"); - } else if (dict.has("Unix")) { - return dict.get("Unix"); - } else if (dict.has("Mac")) { - return dict.get("Mac"); - } else if (dict.has("DOS")) { - return dict.get("DOS"); - } - return null; - } - - FileSpec.prototype = { - get filename() { - if (!this._filename && this.root) { - var filename = pickPlatformItem(this.root) || "unnamed"; - this._filename = stringToPDFString(filename) - .replace(/\\\\/g, "\\") - .replace(/\\\//g, "/") - .replace(/\\/g, "/"); - } - return this._filename; - }, - get content() { - if (!this.contentAvailable) { - return null; - } - if (!this.contentRef && this.root) { - this.contentRef = pickPlatformItem(this.root.get("EF")); - } - var content = null; - if (this.contentRef) { - var xref = this.xref; - var fileObj = xref.fetchIfRef(this.contentRef); - if (fileObj && isStream(fileObj)) { - content = fileObj.getBytes(); - } else { - warn( - "Embedded file specification points to non-existing/invalid " + - "content" - ); - } - } else { - warn("Embedded file specification does not have a content"); - } - return content; - }, - get serializable() { - return { - filename: this.filename, - content: this.content, - }; - }, - }; - return FileSpec; -})(); - -export { Catalog, FileSpec, NumberTree, XRef }; +export { Catalog, NumberTree, XRef }; From 22a066e6578e5c88cd3d6ae1bb50cd31541500a0 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:25:52 +0200 Subject: [PATCH 04/10] Convert the `FileSpec` to a "normal" class --- src/core/file_spec.js | 115 ++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 59 deletions(-) diff --git a/src/core/file_spec.js b/src/core/file_spec.js index 9beee0a6d748f..40306d2a8a9ae 100644 --- a/src/core/file_spec.js +++ b/src/core/file_spec.js @@ -17,6 +17,23 @@ import { isDict, isStream } from "./primitives.js"; import { stringToPDFString, warn } from "../shared/util.js"; +function pickPlatformItem(dict) { + // Look for the filename in this order: + // UF, F, Unix, Mac, DOS + if (dict.has("UF")) { + return dict.get("UF"); + } else if (dict.has("F")) { + return dict.get("F"); + } else if (dict.has("Unix")) { + return dict.get("Unix"); + } else if (dict.has("Mac")) { + return dict.get("Mac"); + } else if (dict.has("DOS")) { + return dict.get("DOS"); + } + return null; +} + /** * "A PDF file can refer to the contents of another file by using a File * Specification (PDF 1.1)", see the spec (7.11) for more details. @@ -24,9 +41,8 @@ import { stringToPDFString, warn } from "../shared/util.js"; * TODO: support the 'URL' file system (with caching if !/V), portable * collections attributes and related files (/RF) */ -var FileSpec = (function FileSpecClosure() { - // eslint-disable-next-line no-shadow - function FileSpec(root, xref) { +class FileSpec { + constructor(root, xref) { if (!root || !isDict(root)) { return; } @@ -48,66 +64,47 @@ var FileSpec = (function FileSpecClosure() { } } - function pickPlatformItem(dict) { - // Look for the filename in this order: - // UF, F, Unix, Mac, DOS - if (dict.has("UF")) { - return dict.get("UF"); - } else if (dict.has("F")) { - return dict.get("F"); - } else if (dict.has("Unix")) { - return dict.get("Unix"); - } else if (dict.has("Mac")) { - return dict.get("Mac"); - } else if (dict.has("DOS")) { - return dict.get("DOS"); + get filename() { + if (!this._filename && this.root) { + var filename = pickPlatformItem(this.root) || "unnamed"; + this._filename = stringToPDFString(filename) + .replace(/\\\\/g, "\\") + .replace(/\\\//g, "/") + .replace(/\\/g, "/"); } - return null; + return this._filename; } - FileSpec.prototype = { - get filename() { - if (!this._filename && this.root) { - var filename = pickPlatformItem(this.root) || "unnamed"; - this._filename = stringToPDFString(filename) - .replace(/\\\\/g, "\\") - .replace(/\\\//g, "/") - .replace(/\\/g, "/"); - } - return this._filename; - }, - get content() { - if (!this.contentAvailable) { - return null; - } - if (!this.contentRef && this.root) { - this.contentRef = pickPlatformItem(this.root.get("EF")); - } - var content = null; - if (this.contentRef) { - var xref = this.xref; - var fileObj = xref.fetchIfRef(this.contentRef); - if (fileObj && isStream(fileObj)) { - content = fileObj.getBytes(); - } else { - warn( - "Embedded file specification points to non-existing/invalid " + - "content" - ); - } + get content() { + if (!this.contentAvailable) { + return null; + } + if (!this.contentRef && this.root) { + this.contentRef = pickPlatformItem(this.root.get("EF")); + } + var content = null; + if (this.contentRef) { + var xref = this.xref; + var fileObj = xref.fetchIfRef(this.contentRef); + if (fileObj && isStream(fileObj)) { + content = fileObj.getBytes(); } else { - warn("Embedded file specification does not have a content"); + warn( + "Embedded file specification points to non-existing/invalid content" + ); } - return content; - }, - get serializable() { - return { - filename: this.filename, - content: this.content, - }; - }, - }; - return FileSpec; -})(); + } else { + warn("Embedded file specification does not have a content"); + } + return content; + } + + get serializable() { + return { + filename: this.filename, + content: this.content, + }; + } +} export { FileSpec }; From 92141e04688d9e2c7a2503532eb8b71e594db6da Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:25:57 +0200 Subject: [PATCH 05/10] Enable the `no-var` rule in the `src/core/file_spec.js` file --- src/core/file_spec.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/core/file_spec.js b/src/core/file_spec.js index 40306d2a8a9ae..151a48e7a8d4a 100644 --- a/src/core/file_spec.js +++ b/src/core/file_spec.js @@ -12,7 +12,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* eslint-disable no-var */ import { isDict, isStream } from "./primitives.js"; import { stringToPDFString, warn } from "../shared/util.js"; @@ -66,7 +65,7 @@ class FileSpec { get filename() { if (!this._filename && this.root) { - var filename = pickPlatformItem(this.root) || "unnamed"; + const filename = pickPlatformItem(this.root) || "unnamed"; this._filename = stringToPDFString(filename) .replace(/\\\\/g, "\\") .replace(/\\\//g, "/") @@ -82,10 +81,9 @@ class FileSpec { if (!this.contentRef && this.root) { this.contentRef = pickPlatformItem(this.root.get("EF")); } - var content = null; + let content = null; if (this.contentRef) { - var xref = this.xref; - var fileObj = xref.fetchIfRef(this.contentRef); + const fileObj = this.xref.fetchIfRef(this.contentRef); if (fileObj && isStream(fileObj)) { content = fileObj.getBytes(); } else { From 24e5ecdf76458480e25876b82cf7a5d4d3c9027f Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:26:02 +0200 Subject: [PATCH 06/10] Move `NameTree`/`NumberTree` from `src/core/obj.js` and into its own file The size of the `src/core/obj.js` file has increased slowly over the years, and it also contains a fair amount of *distinct* functionality. In order to improve readability and make it easier to navigate through the code, this patch moves `NameTree`/`NumberTree` into its own file. --- src/core/name_number_tree.js | 169 +++++++++++++++++++++++++++++++++++ src/core/obj.js | 154 +------------------------------ src/core/struct_tree.js | 2 +- 3 files changed, 172 insertions(+), 153 deletions(-) create mode 100644 src/core/name_number_tree.js diff --git a/src/core/name_number_tree.js b/src/core/name_number_tree.js new file mode 100644 index 0000000000000..19b90bf51d148 --- /dev/null +++ b/src/core/name_number_tree.js @@ -0,0 +1,169 @@ +/* Copyright 2021 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { FormatError, info, unreachable, warn } from "../shared/util.js"; +import { isDict, RefSet } from "./primitives.js"; + +/** + * A NameTree/NumberTree is like a Dict but has some advantageous properties, + * see the specification (7.9.6 and 7.9.7) for additional details. + * TODO: implement all the Dict functions and make this more efficient. + */ +class NameOrNumberTree { + constructor(root, xref, type) { + if (this.constructor === NameOrNumberTree) { + unreachable("Cannot initialize NameOrNumberTree."); + } + this.root = root; + this.xref = xref; + this._type = type; + } + + getAll() { + const dict = Object.create(null); + if (!this.root) { + return dict; + } + const xref = this.xref; + // Reading Name/Number tree. + const processed = new RefSet(); + processed.put(this.root); + const queue = [this.root]; + while (queue.length > 0) { + const obj = xref.fetchIfRef(queue.shift()); + if (!isDict(obj)) { + continue; + } + if (obj.has("Kids")) { + const kids = obj.get("Kids"); + for (let i = 0, ii = kids.length; i < ii; i++) { + const kid = kids[i]; + if (processed.has(kid)) { + throw new FormatError(`Duplicate entry in "${this._type}" tree.`); + } + queue.push(kid); + processed.put(kid); + } + continue; + } + const entries = obj.get(this._type); + if (Array.isArray(entries)) { + for (let i = 0, ii = entries.length; i < ii; i += 2) { + dict[xref.fetchIfRef(entries[i])] = xref.fetchIfRef(entries[i + 1]); + } + } + } + return dict; + } + + get(key) { + if (!this.root) { + return null; + } + const xref = this.xref; + let kidsOrEntries = xref.fetchIfRef(this.root); + let loopCount = 0; + const MAX_LEVELS = 10; + + // Perform a binary search to quickly find the entry that + // contains the key we are looking for. + while (kidsOrEntries.has("Kids")) { + if (++loopCount > MAX_LEVELS) { + warn(`Search depth limit reached for "${this._type}" tree.`); + return null; + } + + const kids = kidsOrEntries.get("Kids"); + if (!Array.isArray(kids)) { + return null; + } + + let l = 0, + r = kids.length - 1; + while (l <= r) { + const m = (l + r) >> 1; + const kid = xref.fetchIfRef(kids[m]); + const limits = kid.get("Limits"); + + if (key < xref.fetchIfRef(limits[0])) { + r = m - 1; + } else if (key > xref.fetchIfRef(limits[1])) { + l = m + 1; + } else { + kidsOrEntries = xref.fetchIfRef(kids[m]); + break; + } + } + if (l > r) { + return null; + } + } + + // If we get here, then we have found the right entry. Now go through the + // entries in the dictionary until we find the key we're looking for. + const entries = kidsOrEntries.get(this._type); + if (Array.isArray(entries)) { + // Perform a binary search to reduce the lookup time. + let l = 0, + r = entries.length - 2; + while (l <= r) { + // Check only even indices (0, 2, 4, ...) because the + // odd indices contain the actual data. + const tmp = (l + r) >> 1, + m = tmp + (tmp & 1); + const currentKey = xref.fetchIfRef(entries[m]); + if (key < currentKey) { + r = m - 2; + } else if (key > currentKey) { + l = m + 2; + } else { + return xref.fetchIfRef(entries[m + 1]); + } + } + + // Fallback to an exhaustive search, in an attempt to handle corrupt + // PDF files where keys are not correctly ordered (fixes issue 10272). + info( + `Falling back to an exhaustive search, for key "${key}", ` + + `in "${this._type}" tree.` + ); + for (let m = 0, mm = entries.length; m < mm; m += 2) { + const currentKey = xref.fetchIfRef(entries[m]); + if (currentKey === key) { + warn( + `The "${key}" key was found at an incorrect, ` + + `i.e. out-of-order, position in "${this._type}" tree.` + ); + return xref.fetchIfRef(entries[m + 1]); + } + } + } + return null; + } +} + +class NameTree extends NameOrNumberTree { + constructor(root, xref) { + super(root, xref, "Names"); + } +} + +class NumberTree extends NameOrNumberTree { + constructor(root, xref) { + super(root, xref, "Nums"); + } +} + +export { NameTree, NumberTree }; diff --git a/src/core/obj.js b/src/core/obj.js index ea9e7d0e3d5cc..558f2c24876ad 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -31,7 +31,6 @@ import { shadow, stringToPDFString, stringToUTF8String, - unreachable, warn, } from "../shared/util.js"; import { @@ -56,6 +55,7 @@ import { XRefParseException, } from "./core_utils.js"; import { Lexer, Parser } from "./parser.js"; +import { NameTree, NumberTree } from "./name_number_tree.js"; import { CipherTransformFactory } from "./crypto.js"; import { ColorSpace } from "./colorspace.js"; import { FileSpec } from "./file_spec.js"; @@ -2270,154 +2270,4 @@ var XRef = (function XRefClosure() { return XRef; })(); -/** - * A NameTree/NumberTree is like a Dict but has some advantageous properties, - * see the specification (7.9.6 and 7.9.7) for additional details. - * TODO: implement all the Dict functions and make this more efficient. - */ -class NameOrNumberTree { - constructor(root, xref, type) { - if (this.constructor === NameOrNumberTree) { - unreachable("Cannot initialize NameOrNumberTree."); - } - this.root = root; - this.xref = xref; - this._type = type; - } - - getAll() { - const dict = Object.create(null); - if (!this.root) { - return dict; - } - const xref = this.xref; - // Reading Name/Number tree. - const processed = new RefSet(); - processed.put(this.root); - const queue = [this.root]; - while (queue.length > 0) { - const obj = xref.fetchIfRef(queue.shift()); - if (!isDict(obj)) { - continue; - } - if (obj.has("Kids")) { - const kids = obj.get("Kids"); - for (let i = 0, ii = kids.length; i < ii; i++) { - const kid = kids[i]; - if (processed.has(kid)) { - throw new FormatError(`Duplicate entry in "${this._type}" tree.`); - } - queue.push(kid); - processed.put(kid); - } - continue; - } - const entries = obj.get(this._type); - if (Array.isArray(entries)) { - for (let i = 0, ii = entries.length; i < ii; i += 2) { - dict[xref.fetchIfRef(entries[i])] = xref.fetchIfRef(entries[i + 1]); - } - } - } - return dict; - } - - get(key) { - if (!this.root) { - return null; - } - const xref = this.xref; - let kidsOrEntries = xref.fetchIfRef(this.root); - let loopCount = 0; - const MAX_LEVELS = 10; - - // Perform a binary search to quickly find the entry that - // contains the key we are looking for. - while (kidsOrEntries.has("Kids")) { - if (++loopCount > MAX_LEVELS) { - warn(`Search depth limit reached for "${this._type}" tree.`); - return null; - } - - const kids = kidsOrEntries.get("Kids"); - if (!Array.isArray(kids)) { - return null; - } - - let l = 0, - r = kids.length - 1; - while (l <= r) { - const m = (l + r) >> 1; - const kid = xref.fetchIfRef(kids[m]); - const limits = kid.get("Limits"); - - if (key < xref.fetchIfRef(limits[0])) { - r = m - 1; - } else if (key > xref.fetchIfRef(limits[1])) { - l = m + 1; - } else { - kidsOrEntries = xref.fetchIfRef(kids[m]); - break; - } - } - if (l > r) { - return null; - } - } - - // If we get here, then we have found the right entry. Now go through the - // entries in the dictionary until we find the key we're looking for. - const entries = kidsOrEntries.get(this._type); - if (Array.isArray(entries)) { - // Perform a binary search to reduce the lookup time. - let l = 0, - r = entries.length - 2; - while (l <= r) { - // Check only even indices (0, 2, 4, ...) because the - // odd indices contain the actual data. - const tmp = (l + r) >> 1, - m = tmp + (tmp & 1); - const currentKey = xref.fetchIfRef(entries[m]); - if (key < currentKey) { - r = m - 2; - } else if (key > currentKey) { - l = m + 2; - } else { - return xref.fetchIfRef(entries[m + 1]); - } - } - - // Fallback to an exhaustive search, in an attempt to handle corrupt - // PDF files where keys are not correctly ordered (fixes issue 10272). - info( - `Falling back to an exhaustive search, for key "${key}", ` + - `in "${this._type}" tree.` - ); - for (let m = 0, mm = entries.length; m < mm; m += 2) { - const currentKey = xref.fetchIfRef(entries[m]); - if (currentKey === key) { - warn( - `The "${key}" key was found at an incorrect, ` + - `i.e. out-of-order, position in "${this._type}" tree.` - ); - return xref.fetchIfRef(entries[m + 1]); - } - } - } - return null; - } -} - -class NameTree extends NameOrNumberTree { - constructor(root, xref) { - super(root, xref, "Names"); - } -} - -class NumberTree extends NameOrNumberTree { - constructor(root, xref) { - super(root, xref, "Nums"); - } -} - -export { Catalog, NumberTree, XRef }; +export { Catalog, XRef }; diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index 41587d45cb851..1d2fabd274095 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -15,7 +15,7 @@ import { isDict, isName, isRef } from "./primitives.js"; import { isString, stringToPDFString, warn } from "../shared/util.js"; -import { NumberTree } from "./obj.js"; +import { NumberTree } from "./name_number_tree.js"; const MAX_DEPTH = 40; From e8750cfe950259491d68a043c735653e97361a20 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:26:07 +0200 Subject: [PATCH 07/10] Move the `XRef` from `src/core/obj.js` and into its own file The size of the `src/core/obj.js` file has increased slowly over the years, and it also contains a fair amount of *distinct* functionality. In order to improve readability and make it easier to navigate through the code, this patch moves the `XRef` into its own file. --- src/core/document.js | 3 +- src/core/obj.js | 856 +---------------------------------------- src/core/xref.js | 886 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 889 insertions(+), 856 deletions(-) create mode 100644 src/core/xref.js diff --git a/src/core/document.js b/src/core/document.js index c22395f3ae922..27b9dd6ac0d55 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -34,7 +34,6 @@ import { Util, warn, } from "../shared/util.js"; -import { Catalog, XRef } from "./obj.js"; import { clearPrimitiveCaches, Dict, @@ -55,12 +54,14 @@ import { import { NullStream, Stream, StreamsSequenceStream } from "./stream.js"; import { AnnotationFactory } from "./annotation.js"; import { calculateMD5 } from "./crypto.js"; +import { Catalog } from "./obj.js"; import { Linearization } from "./parser.js"; import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; import { PartialEvaluator } from "./evaluator.js"; import { StructTreePage } from "./struct_tree.js"; import { XFAFactory } from "./xfa/factory.js"; +import { XRef } from "./xref.js"; const DEFAULT_USER_UNIT = 1.0; const LETTER_SIZE_MEDIABOX = [0, 0, 612, 792]; diff --git a/src/core/obj.js b/src/core/obj.js index 558f2c24876ad..47df1ab36f057 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -12,17 +12,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* eslint-disable no-var */ import { - assert, bytesToString, createPromiseCapability, createValidAbsoluteUrl, DocumentActionEventType, FormatError, info, - InvalidPDFException, isBool, isNum, isString, @@ -35,15 +32,12 @@ import { } from "../shared/util.js"; import { clearPrimitiveCaches, - Cmd, Dict, - isCmd, isDict, isName, isRef, isRefsEqual, isStream, - Ref, RefSet, RefSetCache, } from "./primitives.js"; @@ -51,12 +45,8 @@ import { collectActions, MissingDataException, toRomanNumerals, - XRefEntryException, - XRefParseException, } from "./core_utils.js"; -import { Lexer, Parser } from "./parser.js"; import { NameTree, NumberTree } from "./name_number_tree.js"; -import { CipherTransformFactory } from "./crypto.js"; import { ColorSpace } from "./colorspace.js"; import { FileSpec } from "./file_spec.js"; import { GlobalImageCache } from "./image_utils.js"; @@ -1426,848 +1416,4 @@ class Catalog { } } -var XRef = (function XRefClosure() { - // eslint-disable-next-line no-shadow - function XRef(stream, pdfManager) { - this.stream = stream; - this.pdfManager = pdfManager; - this.entries = []; - this.xrefstms = Object.create(null); - this._cacheMap = new Map(); // Prepare the XRef cache. - this.stats = { - streamTypes: Object.create(null), - fontTypes: Object.create(null), - }; - this._newRefNum = null; - } - - XRef.prototype = { - getNewRef: function XRef_getNewRef() { - if (this._newRefNum === null) { - this._newRefNum = this.entries.length; - } - return Ref.get(this._newRefNum++, 0); - }, - - resetNewRef: function XRef_resetNewRef() { - this._newRefNum = null; - }, - - setStartXRef: function XRef_setStartXRef(startXRef) { - // Store the starting positions of xref tables as we process them - // so we can recover from missing data errors - this.startXRefQueue = [startXRef]; - }, - - parse: function XRef_parse(recoveryMode) { - var trailerDict; - if (!recoveryMode) { - trailerDict = this.readXRef(); - } else { - warn("Indexing all PDF objects"); - trailerDict = this.indexObjects(); - } - trailerDict.assignXref(this); - this.trailer = trailerDict; - - let encrypt; - try { - encrypt = trailerDict.get("Encrypt"); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`); - } - if (isDict(encrypt)) { - var ids = trailerDict.get("ID"); - var fileId = ids && ids.length ? ids[0] : ""; - // The 'Encrypt' dictionary itself should not be encrypted, and by - // setting `suppressEncryption` we can prevent an infinite loop inside - // of `XRef_fetchUncompressed` if the dictionary contains indirect - // objects (fixes issue7665.pdf). - encrypt.suppressEncryption = true; - this.encrypt = new CipherTransformFactory( - encrypt, - fileId, - this.pdfManager.password - ); - } - - // Get the root dictionary (catalog) object, and do some basic validation. - let root; - try { - root = trailerDict.get("Root"); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); - } - if (isDict(root) && root.has("Pages")) { - this.root = root; - } else { - if (!recoveryMode) { - throw new XRefParseException(); - } - throw new FormatError("Invalid root reference"); - } - }, - - processXRefTable: function XRef_processXRefTable(parser) { - if (!("tableState" in this)) { - // Stores state of the table as we process it so we can resume - // from middle of table in case of missing data error - this.tableState = { - entryNum: 0, - streamPos: parser.lexer.stream.pos, - parserBuf1: parser.buf1, - parserBuf2: parser.buf2, - }; - } - - var obj = this.readXRefTable(parser); - - // Sanity check - if (!isCmd(obj, "trailer")) { - throw new FormatError( - "Invalid XRef table: could not find trailer dictionary" - ); - } - // Read trailer dictionary, e.g. - // trailer - // << /Size 22 - // /Root 20R - // /Info 10R - // /ID [ <81b14aafa313db63dbd6f981e49f94f4> ] - // >> - // The parser goes through the entire stream << ... >> and provides - // a getter interface for the key-value table - var dict = parser.getObj(); - - // The pdflib PDF generator can generate a nested trailer dictionary - if (!isDict(dict) && dict.dict) { - dict = dict.dict; - } - if (!isDict(dict)) { - throw new FormatError( - "Invalid XRef table: could not parse trailer dictionary" - ); - } - delete this.tableState; - - return dict; - }, - - readXRefTable: function XRef_readXRefTable(parser) { - // Example of cross-reference table: - // xref - // 0 1 <-- subsection header (first obj #, obj count) - // 0000000000 65535 f <-- actual object (offset, generation #, f/n) - // 23 2 <-- subsection header ... and so on ... - // 0000025518 00002 n - // 0000025635 00000 n - // trailer - // ... - - var stream = parser.lexer.stream; - var tableState = this.tableState; - stream.pos = tableState.streamPos; - parser.buf1 = tableState.parserBuf1; - parser.buf2 = tableState.parserBuf2; - - // Outer loop is over subsection headers - var obj; - - while (true) { - if (!("firstEntryNum" in tableState) || !("entryCount" in tableState)) { - if (isCmd((obj = parser.getObj()), "trailer")) { - break; - } - tableState.firstEntryNum = obj; - tableState.entryCount = parser.getObj(); - } - - var first = tableState.firstEntryNum; - var count = tableState.entryCount; - if (!Number.isInteger(first) || !Number.isInteger(count)) { - throw new FormatError( - "Invalid XRef table: wrong types in subsection header" - ); - } - // Inner loop is over objects themselves - for (var i = tableState.entryNum; i < count; i++) { - tableState.streamPos = stream.pos; - tableState.entryNum = i; - tableState.parserBuf1 = parser.buf1; - tableState.parserBuf2 = parser.buf2; - - var entry = {}; - entry.offset = parser.getObj(); - entry.gen = parser.getObj(); - var type = parser.getObj(); - - if (type instanceof Cmd) { - switch (type.cmd) { - case "f": - entry.free = true; - break; - case "n": - entry.uncompressed = true; - break; - } - } - - // Validate entry obj - if ( - !Number.isInteger(entry.offset) || - !Number.isInteger(entry.gen) || - !(entry.free || entry.uncompressed) - ) { - throw new FormatError( - `Invalid entry in XRef subsection: ${first}, ${count}` - ); - } - - // The first xref table entry, i.e. obj 0, should be free. Attempting - // to adjust an incorrect first obj # (fixes issue 3248 and 7229). - if (i === 0 && entry.free && first === 1) { - first = 0; - } - - if (!this.entries[i + first]) { - this.entries[i + first] = entry; - } - } - - tableState.entryNum = 0; - tableState.streamPos = stream.pos; - tableState.parserBuf1 = parser.buf1; - tableState.parserBuf2 = parser.buf2; - delete tableState.firstEntryNum; - delete tableState.entryCount; - } - - // Sanity check: as per spec, first object must be free - if (this.entries[0] && !this.entries[0].free) { - throw new FormatError("Invalid XRef table: unexpected first object"); - } - return obj; - }, - - processXRefStream: function XRef_processXRefStream(stream) { - if (!("streamState" in this)) { - // Stores state of the stream as we process it so we can resume - // from middle of stream in case of missing data error - var streamParameters = stream.dict; - var byteWidths = streamParameters.get("W"); - var range = streamParameters.get("Index"); - if (!range) { - range = [0, streamParameters.get("Size")]; - } - - this.streamState = { - entryRanges: range, - byteWidths, - entryNum: 0, - streamPos: stream.pos, - }; - } - this.readXRefStream(stream); - delete this.streamState; - - return stream.dict; - }, - - readXRefStream: function XRef_readXRefStream(stream) { - var i, j; - var streamState = this.streamState; - stream.pos = streamState.streamPos; - - var byteWidths = streamState.byteWidths; - var typeFieldWidth = byteWidths[0]; - var offsetFieldWidth = byteWidths[1]; - var generationFieldWidth = byteWidths[2]; - - var entryRanges = streamState.entryRanges; - while (entryRanges.length > 0) { - var first = entryRanges[0]; - var n = entryRanges[1]; - - if (!Number.isInteger(first) || !Number.isInteger(n)) { - throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`); - } - if ( - !Number.isInteger(typeFieldWidth) || - !Number.isInteger(offsetFieldWidth) || - !Number.isInteger(generationFieldWidth) - ) { - throw new FormatError( - `Invalid XRef entry fields length: ${first}, ${n}` - ); - } - for (i = streamState.entryNum; i < n; ++i) { - streamState.entryNum = i; - streamState.streamPos = stream.pos; - - var type = 0, - offset = 0, - generation = 0; - for (j = 0; j < typeFieldWidth; ++j) { - type = (type << 8) | stream.getByte(); - } - // if type field is absent, its default value is 1 - if (typeFieldWidth === 0) { - type = 1; - } - for (j = 0; j < offsetFieldWidth; ++j) { - offset = (offset << 8) | stream.getByte(); - } - for (j = 0; j < generationFieldWidth; ++j) { - generation = (generation << 8) | stream.getByte(); - } - var entry = {}; - entry.offset = offset; - entry.gen = generation; - switch (type) { - case 0: - entry.free = true; - break; - case 1: - entry.uncompressed = true; - break; - case 2: - break; - default: - throw new FormatError(`Invalid XRef entry type: ${type}`); - } - if (!this.entries[first + i]) { - this.entries[first + i] = entry; - } - } - - streamState.entryNum = 0; - streamState.streamPos = stream.pos; - entryRanges.splice(0, 2); - } - }, - - indexObjects: function XRef_indexObjects() { - // Simple scan through the PDF content to find objects, - // trailers and XRef streams. - var TAB = 0x9, - LF = 0xa, - CR = 0xd, - SPACE = 0x20; - var PERCENT = 0x25, - LT = 0x3c; - - function readToken(data, offset) { - var token = "", - ch = data[offset]; - while (ch !== LF && ch !== CR && ch !== LT) { - if (++offset >= data.length) { - break; - } - token += String.fromCharCode(ch); - ch = data[offset]; - } - return token; - } - function skipUntil(data, offset, what) { - var length = what.length, - dataLength = data.length; - var skipped = 0; - // finding byte sequence - while (offset < dataLength) { - var i = 0; - while (i < length && data[offset + i] === what[i]) { - ++i; - } - if (i >= length) { - break; // sequence found - } - offset++; - skipped++; - } - return skipped; - } - var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/; - const endobjRegExp = /\bendobj[\b\s]$/; - const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s<])$/; - const CHECK_CONTENT_LENGTH = 25; - - var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]); - // prettier-ignore - var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114, - 101, 102]); - const objBytes = new Uint8Array([111, 98, 106]); - var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]); - - // Clear out any existing entries, since they may be bogus. - this.entries.length = 0; - - var stream = this.stream; - stream.pos = 0; - var buffer = stream.getBytes(); - var position = stream.start, - length = buffer.length; - var trailers = [], - xrefStms = []; - while (position < length) { - var ch = buffer[position]; - if (ch === TAB || ch === LF || ch === CR || ch === SPACE) { - ++position; - continue; - } - if (ch === PERCENT) { - // %-comment - do { - ++position; - if (position >= length) { - break; - } - ch = buffer[position]; - } while (ch !== LF && ch !== CR); - continue; - } - var token = readToken(buffer, position); - var m; - if ( - token.startsWith("xref") && - (token.length === 4 || /\s/.test(token[4])) - ) { - position += skipUntil(buffer, position, trailerBytes); - trailers.push(position); - position += skipUntil(buffer, position, startxrefBytes); - } else if ((m = objRegExp.exec(token))) { - const num = m[1] | 0, - gen = m[2] | 0; - if (!this.entries[num] || this.entries[num].gen === gen) { - this.entries[num] = { - offset: position - stream.start, - gen, - uncompressed: true, - }; - } - let contentLength, - startPos = position + token.length; - - // Find the next "obj" string, rather than "endobj", to ensure that - // we won't skip over a new 'obj' operator in corrupt files where - // 'endobj' operators are missing (fixes issue9105_reduced.pdf). - while (startPos < buffer.length) { - const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4; - contentLength = endPos - position; - - const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, startPos); - const tokenStr = bytesToString(buffer.subarray(checkPos, endPos)); - - // Check if the current object ends with an 'endobj' operator. - if (endobjRegExp.test(tokenStr)) { - break; - } else { - // Check if an "obj" occurrence is actually a new object, - // i.e. the current object is missing the 'endobj' operator. - const objToken = nestedObjRegExp.exec(tokenStr); - - if (objToken && objToken[1]) { - warn( - 'indexObjects: Found new "obj" inside of another "obj", ' + - 'caused by missing "endobj" -- trying to recover.' - ); - contentLength -= objToken[1].length; - break; - } - } - startPos = endPos; - } - const content = buffer.subarray(position, position + contentLength); - - // checking XRef stream suspect - // (it shall have '/XRef' and next char is not a letter) - var xrefTagOffset = skipUntil(content, 0, xrefBytes); - if ( - xrefTagOffset < contentLength && - content[xrefTagOffset + 5] < 64 - ) { - xrefStms.push(position - stream.start); - this.xrefstms[position - stream.start] = 1; // Avoid recursion - } - - position += contentLength; - } else if ( - token.startsWith("trailer") && - (token.length === 7 || /\s/.test(token[7])) - ) { - trailers.push(position); - position += skipUntil(buffer, position, startxrefBytes); - } else { - position += token.length + 1; - } - } - // reading XRef streams - for (let i = 0, ii = xrefStms.length; i < ii; ++i) { - this.startXRefQueue.push(xrefStms[i]); - this.readXRef(/* recoveryMode */ true); - } - // finding main trailer - let trailerDict; - for (let i = 0, ii = trailers.length; i < ii; ++i) { - stream.pos = trailers[i]; - const parser = new Parser({ - lexer: new Lexer(stream), - xref: this, - allowStreams: true, - recoveryMode: true, - }); - var obj = parser.getObj(); - if (!isCmd(obj, "trailer")) { - continue; - } - // read the trailer dictionary - const dict = parser.getObj(); - if (!isDict(dict)) { - continue; - } - // Do some basic validation of the trailer/root dictionary candidate. - try { - const rootDict = dict.get("Root"); - if (!(rootDict instanceof Dict)) { - continue; - } - const pagesDict = rootDict.get("Pages"); - if (!(pagesDict instanceof Dict)) { - continue; - } - const pagesCount = pagesDict.get("Count"); - if (!Number.isInteger(pagesCount)) { - continue; - } - // The top-level /Pages dictionary isn't obviously corrupt. - } catch (ex) { - continue; - } - // taking the first one with 'ID' - if (dict.has("ID")) { - return dict; - } - // The current dictionary is a candidate, but continue searching. - trailerDict = dict; - } - // No trailer with 'ID', taking last one (if exists). - if (trailerDict) { - return trailerDict; - } - // nothing helps - throw new InvalidPDFException("Invalid PDF structure."); - }, - - readXRef: function XRef_readXRef(recoveryMode) { - var stream = this.stream; - // Keep track of already parsed XRef tables, to prevent an infinite loop - // when parsing corrupt PDF files where e.g. the /Prev entries create a - // circular dependency between tables (fixes bug1393476.pdf). - const startXRefParsedCache = new Set(); - - try { - while (this.startXRefQueue.length) { - var startXRef = this.startXRefQueue[0]; - - if (startXRefParsedCache.has(startXRef)) { - warn("readXRef - skipping XRef table since it was already parsed."); - this.startXRefQueue.shift(); - continue; - } - startXRefParsedCache.add(startXRef); - - stream.pos = startXRef + stream.start; - - const parser = new Parser({ - lexer: new Lexer(stream), - xref: this, - allowStreams: true, - }); - var obj = parser.getObj(); - var dict; - - // Get dictionary - if (isCmd(obj, "xref")) { - // Parse end-of-file XRef - dict = this.processXRefTable(parser); - if (!this.topDict) { - this.topDict = dict; - } - - // Recursively get other XRefs 'XRefStm', if any - obj = dict.get("XRefStm"); - if (Number.isInteger(obj)) { - var pos = obj; - // ignore previously loaded xref streams - // (possible infinite recursion) - if (!(pos in this.xrefstms)) { - this.xrefstms[pos] = 1; - this.startXRefQueue.push(pos); - } - } - } else if (Number.isInteger(obj)) { - // Parse in-stream XRef - if ( - !Number.isInteger(parser.getObj()) || - !isCmd(parser.getObj(), "obj") || - !isStream((obj = parser.getObj())) - ) { - throw new FormatError("Invalid XRef stream"); - } - dict = this.processXRefStream(obj); - if (!this.topDict) { - this.topDict = dict; - } - if (!dict) { - throw new FormatError("Failed to read XRef stream"); - } - } else { - throw new FormatError("Invalid XRef stream header"); - } - - // Recursively get previous dictionary, if any - obj = dict.get("Prev"); - if (Number.isInteger(obj)) { - this.startXRefQueue.push(obj); - } else if (isRef(obj)) { - // The spec says Prev must not be a reference, i.e. "/Prev NNN" - // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R" - this.startXRefQueue.push(obj.num); - } - - this.startXRefQueue.shift(); - } - - return this.topDict; - } catch (e) { - if (e instanceof MissingDataException) { - throw e; - } - info("(while reading XRef): " + e); - } - - if (recoveryMode) { - return undefined; - } - throw new XRefParseException(); - }, - - getEntry: function XRef_getEntry(i) { - var xrefEntry = this.entries[i]; - if (xrefEntry && !xrefEntry.free && xrefEntry.offset) { - return xrefEntry; - } - return null; - }, - - fetchIfRef: function XRef_fetchIfRef(obj, suppressEncryption) { - if (obj instanceof Ref) { - return this.fetch(obj, suppressEncryption); - } - return obj; - }, - - fetch: function XRef_fetch(ref, suppressEncryption) { - if (!(ref instanceof Ref)) { - throw new Error("ref object is not a reference"); - } - const num = ref.num; - - // The XRef cache is populated with objects which are obtained through - // `Parser.getObj`, and indirectly via `Lexer.getObj`. Neither of these - // methods should ever return `undefined` (note the `assert` calls below). - const cacheEntry = this._cacheMap.get(num); - if (cacheEntry !== undefined) { - // In documents with Object Streams, it's possible that cached `Dict`s - // have not been assigned an `objId` yet (see e.g. issue3115r.pdf). - if (cacheEntry instanceof Dict && !cacheEntry.objId) { - cacheEntry.objId = ref.toString(); - } - return cacheEntry; - } - let xrefEntry = this.getEntry(num); - - if (xrefEntry === null) { - // The referenced entry can be free. - this._cacheMap.set(num, xrefEntry); - return xrefEntry; - } - - if (xrefEntry.uncompressed) { - xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); - } else { - xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); - } - if (isDict(xrefEntry)) { - xrefEntry.objId = ref.toString(); - } else if (isStream(xrefEntry)) { - xrefEntry.dict.objId = ref.toString(); - } - return xrefEntry; - }, - - fetchUncompressed(ref, xrefEntry, suppressEncryption = false) { - var gen = ref.gen; - var num = ref.num; - if (xrefEntry.gen !== gen) { - throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`); - } - var stream = this.stream.makeSubStream( - xrefEntry.offset + this.stream.start - ); - const parser = new Parser({ - lexer: new Lexer(stream), - xref: this, - allowStreams: true, - }); - var obj1 = parser.getObj(); - var obj2 = parser.getObj(); - var obj3 = parser.getObj(); - - if (obj1 !== num || obj2 !== gen || !(obj3 instanceof Cmd)) { - throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); - } - if (obj3.cmd !== "obj") { - // some bad PDFs use "obj1234" and really mean 1234 - if (obj3.cmd.startsWith("obj")) { - num = parseInt(obj3.cmd.substring(3), 10); - if (!Number.isNaN(num)) { - return num; - } - } - throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); - } - if (this.encrypt && !suppressEncryption) { - xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen)); - } else { - xrefEntry = parser.getObj(); - } - if (!isStream(xrefEntry)) { - if ( - typeof PDFJSDev === "undefined" || - PDFJSDev.test("!PRODUCTION || TESTING") - ) { - assert( - xrefEntry !== undefined, - 'fetchUncompressed: The "xrefEntry" cannot be undefined.' - ); - } - this._cacheMap.set(num, xrefEntry); - } - return xrefEntry; - }, - - fetchCompressed(ref, xrefEntry, suppressEncryption = false) { - const tableOffset = xrefEntry.offset; - const stream = this.fetch(Ref.get(tableOffset, 0)); - if (!isStream(stream)) { - throw new FormatError("bad ObjStm stream"); - } - const first = stream.dict.get("First"); - const n = stream.dict.get("N"); - if (!Number.isInteger(first) || !Number.isInteger(n)) { - throw new FormatError( - "invalid first and n parameters for ObjStm stream" - ); - } - let parser = new Parser({ - lexer: new Lexer(stream), - xref: this, - allowStreams: true, - }); - const nums = new Array(n); - const offsets = new Array(n); - // read the object numbers to populate cache - for (let i = 0; i < n; ++i) { - const num = parser.getObj(); - if (!Number.isInteger(num)) { - throw new FormatError( - `invalid object number in the ObjStm stream: ${num}` - ); - } - const offset = parser.getObj(); - if (!Number.isInteger(offset)) { - throw new FormatError( - `invalid object offset in the ObjStm stream: ${offset}` - ); - } - nums[i] = num; - offsets[i] = offset; - } - - const start = (stream.start || 0) + first; - const entries = new Array(n); - // read stream objects for cache - for (let i = 0; i < n; ++i) { - const length = i < n - 1 ? offsets[i + 1] - offsets[i] : undefined; - if (length < 0) { - throw new FormatError("Invalid offset in the ObjStm stream."); - } - parser = new Parser({ - lexer: new Lexer( - stream.makeSubStream(start + offsets[i], length, stream.dict) - ), - xref: this, - allowStreams: true, - }); - - const obj = parser.getObj(); - entries[i] = obj; - if (isStream(obj)) { - continue; - } - const num = nums[i], - entry = this.entries[num]; - if (entry && entry.offset === tableOffset && entry.gen === i) { - if ( - typeof PDFJSDev === "undefined" || - PDFJSDev.test("!PRODUCTION || TESTING") - ) { - assert( - obj !== undefined, - 'fetchCompressed: The "obj" cannot be undefined.' - ); - } - this._cacheMap.set(num, obj); - } - } - xrefEntry = entries[xrefEntry.gen]; - if (xrefEntry === undefined) { - throw new XRefEntryException(`Bad (compressed) XRef entry: ${ref}`); - } - return xrefEntry; - }, - - async fetchIfRefAsync(obj, suppressEncryption) { - if (obj instanceof Ref) { - return this.fetchAsync(obj, suppressEncryption); - } - return obj; - }, - - async fetchAsync(ref, suppressEncryption) { - try { - return this.fetch(ref, suppressEncryption); - } catch (ex) { - if (!(ex instanceof MissingDataException)) { - throw ex; - } - await this.pdfManager.requestRange(ex.begin, ex.end); - return this.fetchAsync(ref, suppressEncryption); - } - }, - - getCatalogObj: function XRef_getCatalogObj() { - return this.root; - }, - }; - - return XRef; -})(); - -export { Catalog, XRef }; +export { Catalog }; diff --git a/src/core/xref.js b/src/core/xref.js new file mode 100644 index 0000000000000..5e2e8b60447a6 --- /dev/null +++ b/src/core/xref.js @@ -0,0 +1,886 @@ +/* Copyright 2021 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* eslint-disable no-var */ + +import { + assert, + bytesToString, + FormatError, + info, + InvalidPDFException, + warn, +} from "../shared/util.js"; +import { + Cmd, + Dict, + isCmd, + isDict, + isRef, + isStream, + Ref, +} from "./primitives.js"; +import { Lexer, Parser } from "./parser.js"; +import { + MissingDataException, + XRefEntryException, + XRefParseException, +} from "./core_utils.js"; +import { CipherTransformFactory } from "./crypto.js"; + +var XRef = (function XRefClosure() { + // eslint-disable-next-line no-shadow + function XRef(stream, pdfManager) { + this.stream = stream; + this.pdfManager = pdfManager; + this.entries = []; + this.xrefstms = Object.create(null); + this._cacheMap = new Map(); // Prepare the XRef cache. + this.stats = { + streamTypes: Object.create(null), + fontTypes: Object.create(null), + }; + this._newRefNum = null; + } + + XRef.prototype = { + getNewRef: function XRef_getNewRef() { + if (this._newRefNum === null) { + this._newRefNum = this.entries.length; + } + return Ref.get(this._newRefNum++, 0); + }, + + resetNewRef: function XRef_resetNewRef() { + this._newRefNum = null; + }, + + setStartXRef: function XRef_setStartXRef(startXRef) { + // Store the starting positions of xref tables as we process them + // so we can recover from missing data errors + this.startXRefQueue = [startXRef]; + }, + + parse: function XRef_parse(recoveryMode) { + var trailerDict; + if (!recoveryMode) { + trailerDict = this.readXRef(); + } else { + warn("Indexing all PDF objects"); + trailerDict = this.indexObjects(); + } + trailerDict.assignXref(this); + this.trailer = trailerDict; + + let encrypt; + try { + encrypt = trailerDict.get("Encrypt"); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`); + } + if (isDict(encrypt)) { + var ids = trailerDict.get("ID"); + var fileId = ids && ids.length ? ids[0] : ""; + // The 'Encrypt' dictionary itself should not be encrypted, and by + // setting `suppressEncryption` we can prevent an infinite loop inside + // of `XRef_fetchUncompressed` if the dictionary contains indirect + // objects (fixes issue7665.pdf). + encrypt.suppressEncryption = true; + this.encrypt = new CipherTransformFactory( + encrypt, + fileId, + this.pdfManager.password + ); + } + + // Get the root dictionary (catalog) object, and do some basic validation. + let root; + try { + root = trailerDict.get("Root"); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); + } + if (isDict(root) && root.has("Pages")) { + this.root = root; + } else { + if (!recoveryMode) { + throw new XRefParseException(); + } + throw new FormatError("Invalid root reference"); + } + }, + + processXRefTable: function XRef_processXRefTable(parser) { + if (!("tableState" in this)) { + // Stores state of the table as we process it so we can resume + // from middle of table in case of missing data error + this.tableState = { + entryNum: 0, + streamPos: parser.lexer.stream.pos, + parserBuf1: parser.buf1, + parserBuf2: parser.buf2, + }; + } + + var obj = this.readXRefTable(parser); + + // Sanity check + if (!isCmd(obj, "trailer")) { + throw new FormatError( + "Invalid XRef table: could not find trailer dictionary" + ); + } + // Read trailer dictionary, e.g. + // trailer + // << /Size 22 + // /Root 20R + // /Info 10R + // /ID [ <81b14aafa313db63dbd6f981e49f94f4> ] + // >> + // The parser goes through the entire stream << ... >> and provides + // a getter interface for the key-value table + var dict = parser.getObj(); + + // The pdflib PDF generator can generate a nested trailer dictionary + if (!isDict(dict) && dict.dict) { + dict = dict.dict; + } + if (!isDict(dict)) { + throw new FormatError( + "Invalid XRef table: could not parse trailer dictionary" + ); + } + delete this.tableState; + + return dict; + }, + + readXRefTable: function XRef_readXRefTable(parser) { + // Example of cross-reference table: + // xref + // 0 1 <-- subsection header (first obj #, obj count) + // 0000000000 65535 f <-- actual object (offset, generation #, f/n) + // 23 2 <-- subsection header ... and so on ... + // 0000025518 00002 n + // 0000025635 00000 n + // trailer + // ... + + var stream = parser.lexer.stream; + var tableState = this.tableState; + stream.pos = tableState.streamPos; + parser.buf1 = tableState.parserBuf1; + parser.buf2 = tableState.parserBuf2; + + // Outer loop is over subsection headers + var obj; + + while (true) { + if (!("firstEntryNum" in tableState) || !("entryCount" in tableState)) { + if (isCmd((obj = parser.getObj()), "trailer")) { + break; + } + tableState.firstEntryNum = obj; + tableState.entryCount = parser.getObj(); + } + + var first = tableState.firstEntryNum; + var count = tableState.entryCount; + if (!Number.isInteger(first) || !Number.isInteger(count)) { + throw new FormatError( + "Invalid XRef table: wrong types in subsection header" + ); + } + // Inner loop is over objects themselves + for (var i = tableState.entryNum; i < count; i++) { + tableState.streamPos = stream.pos; + tableState.entryNum = i; + tableState.parserBuf1 = parser.buf1; + tableState.parserBuf2 = parser.buf2; + + var entry = {}; + entry.offset = parser.getObj(); + entry.gen = parser.getObj(); + var type = parser.getObj(); + + if (type instanceof Cmd) { + switch (type.cmd) { + case "f": + entry.free = true; + break; + case "n": + entry.uncompressed = true; + break; + } + } + + // Validate entry obj + if ( + !Number.isInteger(entry.offset) || + !Number.isInteger(entry.gen) || + !(entry.free || entry.uncompressed) + ) { + throw new FormatError( + `Invalid entry in XRef subsection: ${first}, ${count}` + ); + } + + // The first xref table entry, i.e. obj 0, should be free. Attempting + // to adjust an incorrect first obj # (fixes issue 3248 and 7229). + if (i === 0 && entry.free && first === 1) { + first = 0; + } + + if (!this.entries[i + first]) { + this.entries[i + first] = entry; + } + } + + tableState.entryNum = 0; + tableState.streamPos = stream.pos; + tableState.parserBuf1 = parser.buf1; + tableState.parserBuf2 = parser.buf2; + delete tableState.firstEntryNum; + delete tableState.entryCount; + } + + // Sanity check: as per spec, first object must be free + if (this.entries[0] && !this.entries[0].free) { + throw new FormatError("Invalid XRef table: unexpected first object"); + } + return obj; + }, + + processXRefStream: function XRef_processXRefStream(stream) { + if (!("streamState" in this)) { + // Stores state of the stream as we process it so we can resume + // from middle of stream in case of missing data error + var streamParameters = stream.dict; + var byteWidths = streamParameters.get("W"); + var range = streamParameters.get("Index"); + if (!range) { + range = [0, streamParameters.get("Size")]; + } + + this.streamState = { + entryRanges: range, + byteWidths, + entryNum: 0, + streamPos: stream.pos, + }; + } + this.readXRefStream(stream); + delete this.streamState; + + return stream.dict; + }, + + readXRefStream: function XRef_readXRefStream(stream) { + var i, j; + var streamState = this.streamState; + stream.pos = streamState.streamPos; + + var byteWidths = streamState.byteWidths; + var typeFieldWidth = byteWidths[0]; + var offsetFieldWidth = byteWidths[1]; + var generationFieldWidth = byteWidths[2]; + + var entryRanges = streamState.entryRanges; + while (entryRanges.length > 0) { + var first = entryRanges[0]; + var n = entryRanges[1]; + + if (!Number.isInteger(first) || !Number.isInteger(n)) { + throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`); + } + if ( + !Number.isInteger(typeFieldWidth) || + !Number.isInteger(offsetFieldWidth) || + !Number.isInteger(generationFieldWidth) + ) { + throw new FormatError( + `Invalid XRef entry fields length: ${first}, ${n}` + ); + } + for (i = streamState.entryNum; i < n; ++i) { + streamState.entryNum = i; + streamState.streamPos = stream.pos; + + var type = 0, + offset = 0, + generation = 0; + for (j = 0; j < typeFieldWidth; ++j) { + type = (type << 8) | stream.getByte(); + } + // if type field is absent, its default value is 1 + if (typeFieldWidth === 0) { + type = 1; + } + for (j = 0; j < offsetFieldWidth; ++j) { + offset = (offset << 8) | stream.getByte(); + } + for (j = 0; j < generationFieldWidth; ++j) { + generation = (generation << 8) | stream.getByte(); + } + var entry = {}; + entry.offset = offset; + entry.gen = generation; + switch (type) { + case 0: + entry.free = true; + break; + case 1: + entry.uncompressed = true; + break; + case 2: + break; + default: + throw new FormatError(`Invalid XRef entry type: ${type}`); + } + if (!this.entries[first + i]) { + this.entries[first + i] = entry; + } + } + + streamState.entryNum = 0; + streamState.streamPos = stream.pos; + entryRanges.splice(0, 2); + } + }, + + indexObjects: function XRef_indexObjects() { + // Simple scan through the PDF content to find objects, + // trailers and XRef streams. + var TAB = 0x9, + LF = 0xa, + CR = 0xd, + SPACE = 0x20; + var PERCENT = 0x25, + LT = 0x3c; + + function readToken(data, offset) { + var token = "", + ch = data[offset]; + while (ch !== LF && ch !== CR && ch !== LT) { + if (++offset >= data.length) { + break; + } + token += String.fromCharCode(ch); + ch = data[offset]; + } + return token; + } + function skipUntil(data, offset, what) { + var length = what.length, + dataLength = data.length; + var skipped = 0; + // finding byte sequence + while (offset < dataLength) { + var i = 0; + while (i < length && data[offset + i] === what[i]) { + ++i; + } + if (i >= length) { + break; // sequence found + } + offset++; + skipped++; + } + return skipped; + } + var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/; + const endobjRegExp = /\bendobj[\b\s]$/; + const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s<])$/; + const CHECK_CONTENT_LENGTH = 25; + + var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]); + // prettier-ignore + var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114, + 101, 102]); + const objBytes = new Uint8Array([111, 98, 106]); + var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]); + + // Clear out any existing entries, since they may be bogus. + this.entries.length = 0; + + var stream = this.stream; + stream.pos = 0; + var buffer = stream.getBytes(); + var position = stream.start, + length = buffer.length; + var trailers = [], + xrefStms = []; + while (position < length) { + var ch = buffer[position]; + if (ch === TAB || ch === LF || ch === CR || ch === SPACE) { + ++position; + continue; + } + if (ch === PERCENT) { + // %-comment + do { + ++position; + if (position >= length) { + break; + } + ch = buffer[position]; + } while (ch !== LF && ch !== CR); + continue; + } + var token = readToken(buffer, position); + var m; + if ( + token.startsWith("xref") && + (token.length === 4 || /\s/.test(token[4])) + ) { + position += skipUntil(buffer, position, trailerBytes); + trailers.push(position); + position += skipUntil(buffer, position, startxrefBytes); + } else if ((m = objRegExp.exec(token))) { + const num = m[1] | 0, + gen = m[2] | 0; + if (!this.entries[num] || this.entries[num].gen === gen) { + this.entries[num] = { + offset: position - stream.start, + gen, + uncompressed: true, + }; + } + let contentLength, + startPos = position + token.length; + + // Find the next "obj" string, rather than "endobj", to ensure that + // we won't skip over a new 'obj' operator in corrupt files where + // 'endobj' operators are missing (fixes issue9105_reduced.pdf). + while (startPos < buffer.length) { + const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4; + contentLength = endPos - position; + + const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, startPos); + const tokenStr = bytesToString(buffer.subarray(checkPos, endPos)); + + // Check if the current object ends with an 'endobj' operator. + if (endobjRegExp.test(tokenStr)) { + break; + } else { + // Check if an "obj" occurrence is actually a new object, + // i.e. the current object is missing the 'endobj' operator. + const objToken = nestedObjRegExp.exec(tokenStr); + + if (objToken && objToken[1]) { + warn( + 'indexObjects: Found new "obj" inside of another "obj", ' + + 'caused by missing "endobj" -- trying to recover.' + ); + contentLength -= objToken[1].length; + break; + } + } + startPos = endPos; + } + const content = buffer.subarray(position, position + contentLength); + + // checking XRef stream suspect + // (it shall have '/XRef' and next char is not a letter) + var xrefTagOffset = skipUntil(content, 0, xrefBytes); + if ( + xrefTagOffset < contentLength && + content[xrefTagOffset + 5] < 64 + ) { + xrefStms.push(position - stream.start); + this.xrefstms[position - stream.start] = 1; // Avoid recursion + } + + position += contentLength; + } else if ( + token.startsWith("trailer") && + (token.length === 7 || /\s/.test(token[7])) + ) { + trailers.push(position); + position += skipUntil(buffer, position, startxrefBytes); + } else { + position += token.length + 1; + } + } + // reading XRef streams + for (let i = 0, ii = xrefStms.length; i < ii; ++i) { + this.startXRefQueue.push(xrefStms[i]); + this.readXRef(/* recoveryMode */ true); + } + // finding main trailer + let trailerDict; + for (let i = 0, ii = trailers.length; i < ii; ++i) { + stream.pos = trailers[i]; + const parser = new Parser({ + lexer: new Lexer(stream), + xref: this, + allowStreams: true, + recoveryMode: true, + }); + var obj = parser.getObj(); + if (!isCmd(obj, "trailer")) { + continue; + } + // read the trailer dictionary + const dict = parser.getObj(); + if (!isDict(dict)) { + continue; + } + // Do some basic validation of the trailer/root dictionary candidate. + try { + const rootDict = dict.get("Root"); + if (!(rootDict instanceof Dict)) { + continue; + } + const pagesDict = rootDict.get("Pages"); + if (!(pagesDict instanceof Dict)) { + continue; + } + const pagesCount = pagesDict.get("Count"); + if (!Number.isInteger(pagesCount)) { + continue; + } + // The top-level /Pages dictionary isn't obviously corrupt. + } catch (ex) { + continue; + } + // taking the first one with 'ID' + if (dict.has("ID")) { + return dict; + } + // The current dictionary is a candidate, but continue searching. + trailerDict = dict; + } + // No trailer with 'ID', taking last one (if exists). + if (trailerDict) { + return trailerDict; + } + // nothing helps + throw new InvalidPDFException("Invalid PDF structure."); + }, + + readXRef: function XRef_readXRef(recoveryMode) { + var stream = this.stream; + // Keep track of already parsed XRef tables, to prevent an infinite loop + // when parsing corrupt PDF files where e.g. the /Prev entries create a + // circular dependency between tables (fixes bug1393476.pdf). + const startXRefParsedCache = new Set(); + + try { + while (this.startXRefQueue.length) { + var startXRef = this.startXRefQueue[0]; + + if (startXRefParsedCache.has(startXRef)) { + warn("readXRef - skipping XRef table since it was already parsed."); + this.startXRefQueue.shift(); + continue; + } + startXRefParsedCache.add(startXRef); + + stream.pos = startXRef + stream.start; + + const parser = new Parser({ + lexer: new Lexer(stream), + xref: this, + allowStreams: true, + }); + var obj = parser.getObj(); + var dict; + + // Get dictionary + if (isCmd(obj, "xref")) { + // Parse end-of-file XRef + dict = this.processXRefTable(parser); + if (!this.topDict) { + this.topDict = dict; + } + + // Recursively get other XRefs 'XRefStm', if any + obj = dict.get("XRefStm"); + if (Number.isInteger(obj)) { + var pos = obj; + // ignore previously loaded xref streams + // (possible infinite recursion) + if (!(pos in this.xrefstms)) { + this.xrefstms[pos] = 1; + this.startXRefQueue.push(pos); + } + } + } else if (Number.isInteger(obj)) { + // Parse in-stream XRef + if ( + !Number.isInteger(parser.getObj()) || + !isCmd(parser.getObj(), "obj") || + !isStream((obj = parser.getObj())) + ) { + throw new FormatError("Invalid XRef stream"); + } + dict = this.processXRefStream(obj); + if (!this.topDict) { + this.topDict = dict; + } + if (!dict) { + throw new FormatError("Failed to read XRef stream"); + } + } else { + throw new FormatError("Invalid XRef stream header"); + } + + // Recursively get previous dictionary, if any + obj = dict.get("Prev"); + if (Number.isInteger(obj)) { + this.startXRefQueue.push(obj); + } else if (isRef(obj)) { + // The spec says Prev must not be a reference, i.e. "/Prev NNN" + // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R" + this.startXRefQueue.push(obj.num); + } + + this.startXRefQueue.shift(); + } + + return this.topDict; + } catch (e) { + if (e instanceof MissingDataException) { + throw e; + } + info("(while reading XRef): " + e); + } + + if (recoveryMode) { + return undefined; + } + throw new XRefParseException(); + }, + + getEntry: function XRef_getEntry(i) { + var xrefEntry = this.entries[i]; + if (xrefEntry && !xrefEntry.free && xrefEntry.offset) { + return xrefEntry; + } + return null; + }, + + fetchIfRef: function XRef_fetchIfRef(obj, suppressEncryption) { + if (obj instanceof Ref) { + return this.fetch(obj, suppressEncryption); + } + return obj; + }, + + fetch: function XRef_fetch(ref, suppressEncryption) { + if (!(ref instanceof Ref)) { + throw new Error("ref object is not a reference"); + } + const num = ref.num; + + // The XRef cache is populated with objects which are obtained through + // `Parser.getObj`, and indirectly via `Lexer.getObj`. Neither of these + // methods should ever return `undefined` (note the `assert` calls below). + const cacheEntry = this._cacheMap.get(num); + if (cacheEntry !== undefined) { + // In documents with Object Streams, it's possible that cached `Dict`s + // have not been assigned an `objId` yet (see e.g. issue3115r.pdf). + if (cacheEntry instanceof Dict && !cacheEntry.objId) { + cacheEntry.objId = ref.toString(); + } + return cacheEntry; + } + let xrefEntry = this.getEntry(num); + + if (xrefEntry === null) { + // The referenced entry can be free. + this._cacheMap.set(num, xrefEntry); + return xrefEntry; + } + + if (xrefEntry.uncompressed) { + xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); + } else { + xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + } + if (isDict(xrefEntry)) { + xrefEntry.objId = ref.toString(); + } else if (isStream(xrefEntry)) { + xrefEntry.dict.objId = ref.toString(); + } + return xrefEntry; + }, + + fetchUncompressed(ref, xrefEntry, suppressEncryption = false) { + var gen = ref.gen; + var num = ref.num; + if (xrefEntry.gen !== gen) { + throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`); + } + var stream = this.stream.makeSubStream( + xrefEntry.offset + this.stream.start + ); + const parser = new Parser({ + lexer: new Lexer(stream), + xref: this, + allowStreams: true, + }); + var obj1 = parser.getObj(); + var obj2 = parser.getObj(); + var obj3 = parser.getObj(); + + if (obj1 !== num || obj2 !== gen || !(obj3 instanceof Cmd)) { + throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); + } + if (obj3.cmd !== "obj") { + // some bad PDFs use "obj1234" and really mean 1234 + if (obj3.cmd.startsWith("obj")) { + num = parseInt(obj3.cmd.substring(3), 10); + if (!Number.isNaN(num)) { + return num; + } + } + throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); + } + if (this.encrypt && !suppressEncryption) { + xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen)); + } else { + xrefEntry = parser.getObj(); + } + if (!isStream(xrefEntry)) { + if ( + typeof PDFJSDev === "undefined" || + PDFJSDev.test("!PRODUCTION || TESTING") + ) { + assert( + xrefEntry !== undefined, + 'fetchUncompressed: The "xrefEntry" cannot be undefined.' + ); + } + this._cacheMap.set(num, xrefEntry); + } + return xrefEntry; + }, + + fetchCompressed(ref, xrefEntry, suppressEncryption = false) { + const tableOffset = xrefEntry.offset; + const stream = this.fetch(Ref.get(tableOffset, 0)); + if (!isStream(stream)) { + throw new FormatError("bad ObjStm stream"); + } + const first = stream.dict.get("First"); + const n = stream.dict.get("N"); + if (!Number.isInteger(first) || !Number.isInteger(n)) { + throw new FormatError( + "invalid first and n parameters for ObjStm stream" + ); + } + let parser = new Parser({ + lexer: new Lexer(stream), + xref: this, + allowStreams: true, + }); + const nums = new Array(n); + const offsets = new Array(n); + // read the object numbers to populate cache + for (let i = 0; i < n; ++i) { + const num = parser.getObj(); + if (!Number.isInteger(num)) { + throw new FormatError( + `invalid object number in the ObjStm stream: ${num}` + ); + } + const offset = parser.getObj(); + if (!Number.isInteger(offset)) { + throw new FormatError( + `invalid object offset in the ObjStm stream: ${offset}` + ); + } + nums[i] = num; + offsets[i] = offset; + } + + const start = (stream.start || 0) + first; + const entries = new Array(n); + // read stream objects for cache + for (let i = 0; i < n; ++i) { + const length = i < n - 1 ? offsets[i + 1] - offsets[i] : undefined; + if (length < 0) { + throw new FormatError("Invalid offset in the ObjStm stream."); + } + parser = new Parser({ + lexer: new Lexer( + stream.makeSubStream(start + offsets[i], length, stream.dict) + ), + xref: this, + allowStreams: true, + }); + + const obj = parser.getObj(); + entries[i] = obj; + if (isStream(obj)) { + continue; + } + const num = nums[i], + entry = this.entries[num]; + if (entry && entry.offset === tableOffset && entry.gen === i) { + if ( + typeof PDFJSDev === "undefined" || + PDFJSDev.test("!PRODUCTION || TESTING") + ) { + assert( + obj !== undefined, + 'fetchCompressed: The "obj" cannot be undefined.' + ); + } + this._cacheMap.set(num, obj); + } + } + xrefEntry = entries[xrefEntry.gen]; + if (xrefEntry === undefined) { + throw new XRefEntryException(`Bad (compressed) XRef entry: ${ref}`); + } + return xrefEntry; + }, + + async fetchIfRefAsync(obj, suppressEncryption) { + if (obj instanceof Ref) { + return this.fetchAsync(obj, suppressEncryption); + } + return obj; + }, + + async fetchAsync(ref, suppressEncryption) { + try { + return this.fetch(ref, suppressEncryption); + } catch (ex) { + if (!(ex instanceof MissingDataException)) { + throw ex; + } + await this.pdfManager.requestRange(ex.begin, ex.end); + return this.fetchAsync(ref, suppressEncryption); + } + }, + + getCatalogObj: function XRef_getCatalogObj() { + return this.root; + }, + }; + + return XRef; +})(); + +export { XRef }; From bc828cd41f9ce25e8ce1a483f53b4ddb3fa5ccec Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:26:12 +0200 Subject: [PATCH 08/10] Convert the `XRef` to a "normal" class --- src/core/xref.js | 1464 +++++++++++++++++++++++----------------------- 1 file changed, 727 insertions(+), 737 deletions(-) diff --git a/src/core/xref.js b/src/core/xref.js index 5e2e8b60447a6..aac2c368a8c1f 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -39,9 +39,8 @@ import { } from "./core_utils.js"; import { CipherTransformFactory } from "./crypto.js"; -var XRef = (function XRefClosure() { - // eslint-disable-next-line no-shadow - function XRef(stream, pdfManager) { +class XRef { + constructor(stream, pdfManager) { this.stream = stream; this.pdfManager = pdfManager; this.entries = []; @@ -54,833 +53,824 @@ var XRef = (function XRefClosure() { this._newRefNum = null; } - XRef.prototype = { - getNewRef: function XRef_getNewRef() { - if (this._newRefNum === null) { - this._newRefNum = this.entries.length; - } - return Ref.get(this._newRefNum++, 0); - }, + getNewRef() { + if (this._newRefNum === null) { + this._newRefNum = this.entries.length; + } + return Ref.get(this._newRefNum++, 0); + } - resetNewRef: function XRef_resetNewRef() { - this._newRefNum = null; - }, + resetNewRef() { + this._newRefNum = null; + } - setStartXRef: function XRef_setStartXRef(startXRef) { - // Store the starting positions of xref tables as we process them - // so we can recover from missing data errors - this.startXRefQueue = [startXRef]; - }, + setStartXRef(startXRef) { + // Store the starting positions of xref tables as we process them + // so we can recover from missing data errors + this.startXRefQueue = [startXRef]; + } - parse: function XRef_parse(recoveryMode) { - var trailerDict; + parse(recoveryMode = false) { + var trailerDict; + if (!recoveryMode) { + trailerDict = this.readXRef(); + } else { + warn("Indexing all PDF objects"); + trailerDict = this.indexObjects(); + } + trailerDict.assignXref(this); + this.trailer = trailerDict; + + let encrypt; + try { + encrypt = trailerDict.get("Encrypt"); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`); + } + if (isDict(encrypt)) { + var ids = trailerDict.get("ID"); + var fileId = ids && ids.length ? ids[0] : ""; + // The 'Encrypt' dictionary itself should not be encrypted, and by + // setting `suppressEncryption` we can prevent an infinite loop inside + // of `XRef_fetchUncompressed` if the dictionary contains indirect + // objects (fixes issue7665.pdf). + encrypt.suppressEncryption = true; + this.encrypt = new CipherTransformFactory( + encrypt, + fileId, + this.pdfManager.password + ); + } + + // Get the root dictionary (catalog) object, and do some basic validation. + let root; + try { + root = trailerDict.get("Root"); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); + } + if (isDict(root) && root.has("Pages")) { + this.root = root; + } else { if (!recoveryMode) { - trailerDict = this.readXRef(); - } else { - warn("Indexing all PDF objects"); - trailerDict = this.indexObjects(); + throw new XRefParseException(); } - trailerDict.assignXref(this); - this.trailer = trailerDict; + throw new FormatError("Invalid root reference"); + } + } - let encrypt; - try { - encrypt = trailerDict.get("Encrypt"); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`); - } - if (isDict(encrypt)) { - var ids = trailerDict.get("ID"); - var fileId = ids && ids.length ? ids[0] : ""; - // The 'Encrypt' dictionary itself should not be encrypted, and by - // setting `suppressEncryption` we can prevent an infinite loop inside - // of `XRef_fetchUncompressed` if the dictionary contains indirect - // objects (fixes issue7665.pdf). - encrypt.suppressEncryption = true; - this.encrypt = new CipherTransformFactory( - encrypt, - fileId, - this.pdfManager.password - ); - } + processXRefTable(parser) { + if (!("tableState" in this)) { + // Stores state of the table as we process it so we can resume + // from middle of table in case of missing data error + this.tableState = { + entryNum: 0, + streamPos: parser.lexer.stream.pos, + parserBuf1: parser.buf1, + parserBuf2: parser.buf2, + }; + } + + var obj = this.readXRefTable(parser); + + // Sanity check + if (!isCmd(obj, "trailer")) { + throw new FormatError( + "Invalid XRef table: could not find trailer dictionary" + ); + } + // Read trailer dictionary, e.g. + // trailer + // << /Size 22 + // /Root 20R + // /Info 10R + // /ID [ <81b14aafa313db63dbd6f981e49f94f4> ] + // >> + // The parser goes through the entire stream << ... >> and provides + // a getter interface for the key-value table + var dict = parser.getObj(); + + // The pdflib PDF generator can generate a nested trailer dictionary + if (!isDict(dict) && dict.dict) { + dict = dict.dict; + } + if (!isDict(dict)) { + throw new FormatError( + "Invalid XRef table: could not parse trailer dictionary" + ); + } + delete this.tableState; - // Get the root dictionary (catalog) object, and do some basic validation. - let root; - try { - root = trailerDict.get("Root"); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); - } - if (isDict(root) && root.has("Pages")) { - this.root = root; - } else { - if (!recoveryMode) { - throw new XRefParseException(); + return dict; + } + + readXRefTable(parser) { + // Example of cross-reference table: + // xref + // 0 1 <-- subsection header (first obj #, obj count) + // 0000000000 65535 f <-- actual object (offset, generation #, f/n) + // 23 2 <-- subsection header ... and so on ... + // 0000025518 00002 n + // 0000025635 00000 n + // trailer + // ... + + var stream = parser.lexer.stream; + var tableState = this.tableState; + stream.pos = tableState.streamPos; + parser.buf1 = tableState.parserBuf1; + parser.buf2 = tableState.parserBuf2; + + // Outer loop is over subsection headers + var obj; + + while (true) { + if (!("firstEntryNum" in tableState) || !("entryCount" in tableState)) { + if (isCmd((obj = parser.getObj()), "trailer")) { + break; } - throw new FormatError("Invalid root reference"); - } - }, - - processXRefTable: function XRef_processXRefTable(parser) { - if (!("tableState" in this)) { - // Stores state of the table as we process it so we can resume - // from middle of table in case of missing data error - this.tableState = { - entryNum: 0, - streamPos: parser.lexer.stream.pos, - parserBuf1: parser.buf1, - parserBuf2: parser.buf2, - }; + tableState.firstEntryNum = obj; + tableState.entryCount = parser.getObj(); } - var obj = this.readXRefTable(parser); - - // Sanity check - if (!isCmd(obj, "trailer")) { - throw new FormatError( - "Invalid XRef table: could not find trailer dictionary" - ); - } - // Read trailer dictionary, e.g. - // trailer - // << /Size 22 - // /Root 20R - // /Info 10R - // /ID [ <81b14aafa313db63dbd6f981e49f94f4> ] - // >> - // The parser goes through the entire stream << ... >> and provides - // a getter interface for the key-value table - var dict = parser.getObj(); - - // The pdflib PDF generator can generate a nested trailer dictionary - if (!isDict(dict) && dict.dict) { - dict = dict.dict; - } - if (!isDict(dict)) { + var first = tableState.firstEntryNum; + var count = tableState.entryCount; + if (!Number.isInteger(first) || !Number.isInteger(count)) { throw new FormatError( - "Invalid XRef table: could not parse trailer dictionary" + "Invalid XRef table: wrong types in subsection header" ); } - delete this.tableState; - - return dict; - }, - - readXRefTable: function XRef_readXRefTable(parser) { - // Example of cross-reference table: - // xref - // 0 1 <-- subsection header (first obj #, obj count) - // 0000000000 65535 f <-- actual object (offset, generation #, f/n) - // 23 2 <-- subsection header ... and so on ... - // 0000025518 00002 n - // 0000025635 00000 n - // trailer - // ... - - var stream = parser.lexer.stream; - var tableState = this.tableState; - stream.pos = tableState.streamPos; - parser.buf1 = tableState.parserBuf1; - parser.buf2 = tableState.parserBuf2; - - // Outer loop is over subsection headers - var obj; - - while (true) { - if (!("firstEntryNum" in tableState) || !("entryCount" in tableState)) { - if (isCmd((obj = parser.getObj()), "trailer")) { - break; + // Inner loop is over objects themselves + for (var i = tableState.entryNum; i < count; i++) { + tableState.streamPos = stream.pos; + tableState.entryNum = i; + tableState.parserBuf1 = parser.buf1; + tableState.parserBuf2 = parser.buf2; + + var entry = {}; + entry.offset = parser.getObj(); + entry.gen = parser.getObj(); + var type = parser.getObj(); + + if (type instanceof Cmd) { + switch (type.cmd) { + case "f": + entry.free = true; + break; + case "n": + entry.uncompressed = true; + break; } - tableState.firstEntryNum = obj; - tableState.entryCount = parser.getObj(); } - var first = tableState.firstEntryNum; - var count = tableState.entryCount; - if (!Number.isInteger(first) || !Number.isInteger(count)) { + // Validate entry obj + if ( + !Number.isInteger(entry.offset) || + !Number.isInteger(entry.gen) || + !(entry.free || entry.uncompressed) + ) { throw new FormatError( - "Invalid XRef table: wrong types in subsection header" + `Invalid entry in XRef subsection: ${first}, ${count}` ); } - // Inner loop is over objects themselves - for (var i = tableState.entryNum; i < count; i++) { - tableState.streamPos = stream.pos; - tableState.entryNum = i; - tableState.parserBuf1 = parser.buf1; - tableState.parserBuf2 = parser.buf2; - - var entry = {}; - entry.offset = parser.getObj(); - entry.gen = parser.getObj(); - var type = parser.getObj(); - - if (type instanceof Cmd) { - switch (type.cmd) { - case "f": - entry.free = true; - break; - case "n": - entry.uncompressed = true; - break; - } - } - - // Validate entry obj - if ( - !Number.isInteger(entry.offset) || - !Number.isInteger(entry.gen) || - !(entry.free || entry.uncompressed) - ) { - throw new FormatError( - `Invalid entry in XRef subsection: ${first}, ${count}` - ); - } - - // The first xref table entry, i.e. obj 0, should be free. Attempting - // to adjust an incorrect first obj # (fixes issue 3248 and 7229). - if (i === 0 && entry.free && first === 1) { - first = 0; - } - if (!this.entries[i + first]) { - this.entries[i + first] = entry; - } + // The first xref table entry, i.e. obj 0, should be free. Attempting + // to adjust an incorrect first obj # (fixes issue 3248 and 7229). + if (i === 0 && entry.free && first === 1) { + first = 0; } - tableState.entryNum = 0; - tableState.streamPos = stream.pos; - tableState.parserBuf1 = parser.buf1; - tableState.parserBuf2 = parser.buf2; - delete tableState.firstEntryNum; - delete tableState.entryCount; + if (!this.entries[i + first]) { + this.entries[i + first] = entry; + } } - // Sanity check: as per spec, first object must be free - if (this.entries[0] && !this.entries[0].free) { - throw new FormatError("Invalid XRef table: unexpected first object"); - } - return obj; - }, - - processXRefStream: function XRef_processXRefStream(stream) { - if (!("streamState" in this)) { - // Stores state of the stream as we process it so we can resume - // from middle of stream in case of missing data error - var streamParameters = stream.dict; - var byteWidths = streamParameters.get("W"); - var range = streamParameters.get("Index"); - if (!range) { - range = [0, streamParameters.get("Size")]; - } + tableState.entryNum = 0; + tableState.streamPos = stream.pos; + tableState.parserBuf1 = parser.buf1; + tableState.parserBuf2 = parser.buf2; + delete tableState.firstEntryNum; + delete tableState.entryCount; + } - this.streamState = { - entryRanges: range, - byteWidths, - entryNum: 0, - streamPos: stream.pos, - }; - } - this.readXRefStream(stream); - delete this.streamState; + // Sanity check: as per spec, first object must be free + if (this.entries[0] && !this.entries[0].free) { + throw new FormatError("Invalid XRef table: unexpected first object"); + } + return obj; + } + + processXRefStream(stream) { + if (!("streamState" in this)) { + // Stores state of the stream as we process it so we can resume + // from middle of stream in case of missing data error + var streamParameters = stream.dict; + var byteWidths = streamParameters.get("W"); + var range = streamParameters.get("Index"); + if (!range) { + range = [0, streamParameters.get("Size")]; + } + + this.streamState = { + entryRanges: range, + byteWidths, + entryNum: 0, + streamPos: stream.pos, + }; + } + this.readXRefStream(stream); + delete this.streamState; + + return stream.dict; + } - return stream.dict; - }, + readXRefStream(stream) { + var i, j; + var streamState = this.streamState; + stream.pos = streamState.streamPos; - readXRefStream: function XRef_readXRefStream(stream) { - var i, j; - var streamState = this.streamState; - stream.pos = streamState.streamPos; + var byteWidths = streamState.byteWidths; + var typeFieldWidth = byteWidths[0]; + var offsetFieldWidth = byteWidths[1]; + var generationFieldWidth = byteWidths[2]; - var byteWidths = streamState.byteWidths; - var typeFieldWidth = byteWidths[0]; - var offsetFieldWidth = byteWidths[1]; - var generationFieldWidth = byteWidths[2]; + var entryRanges = streamState.entryRanges; + while (entryRanges.length > 0) { + var first = entryRanges[0]; + var n = entryRanges[1]; - var entryRanges = streamState.entryRanges; - while (entryRanges.length > 0) { - var first = entryRanges[0]; - var n = entryRanges[1]; + if (!Number.isInteger(first) || !Number.isInteger(n)) { + throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`); + } + if ( + !Number.isInteger(typeFieldWidth) || + !Number.isInteger(offsetFieldWidth) || + !Number.isInteger(generationFieldWidth) + ) { + throw new FormatError( + `Invalid XRef entry fields length: ${first}, ${n}` + ); + } + for (i = streamState.entryNum; i < n; ++i) { + streamState.entryNum = i; + streamState.streamPos = stream.pos; - if (!Number.isInteger(first) || !Number.isInteger(n)) { - throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`); + var type = 0, + offset = 0, + generation = 0; + for (j = 0; j < typeFieldWidth; ++j) { + type = (type << 8) | stream.getByte(); } - if ( - !Number.isInteger(typeFieldWidth) || - !Number.isInteger(offsetFieldWidth) || - !Number.isInteger(generationFieldWidth) - ) { - throw new FormatError( - `Invalid XRef entry fields length: ${first}, ${n}` - ); + // if type field is absent, its default value is 1 + if (typeFieldWidth === 0) { + type = 1; } - for (i = streamState.entryNum; i < n; ++i) { - streamState.entryNum = i; - streamState.streamPos = stream.pos; - - var type = 0, - offset = 0, - generation = 0; - for (j = 0; j < typeFieldWidth; ++j) { - type = (type << 8) | stream.getByte(); - } - // if type field is absent, its default value is 1 - if (typeFieldWidth === 0) { - type = 1; - } - for (j = 0; j < offsetFieldWidth; ++j) { - offset = (offset << 8) | stream.getByte(); - } - for (j = 0; j < generationFieldWidth; ++j) { - generation = (generation << 8) | stream.getByte(); - } - var entry = {}; - entry.offset = offset; - entry.gen = generation; - switch (type) { - case 0: - entry.free = true; - break; - case 1: - entry.uncompressed = true; - break; - case 2: - break; - default: - throw new FormatError(`Invalid XRef entry type: ${type}`); - } - if (!this.entries[first + i]) { - this.entries[first + i] = entry; - } + for (j = 0; j < offsetFieldWidth; ++j) { + offset = (offset << 8) | stream.getByte(); } - - streamState.entryNum = 0; - streamState.streamPos = stream.pos; - entryRanges.splice(0, 2); - } - }, - - indexObjects: function XRef_indexObjects() { - // Simple scan through the PDF content to find objects, - // trailers and XRef streams. - var TAB = 0x9, - LF = 0xa, - CR = 0xd, - SPACE = 0x20; - var PERCENT = 0x25, - LT = 0x3c; - - function readToken(data, offset) { - var token = "", - ch = data[offset]; - while (ch !== LF && ch !== CR && ch !== LT) { - if (++offset >= data.length) { + for (j = 0; j < generationFieldWidth; ++j) { + generation = (generation << 8) | stream.getByte(); + } + var entry = {}; + entry.offset = offset; + entry.gen = generation; + switch (type) { + case 0: + entry.free = true; break; - } - token += String.fromCharCode(ch); - ch = data[offset]; + case 1: + entry.uncompressed = true; + break; + case 2: + break; + default: + throw new FormatError(`Invalid XRef entry type: ${type}`); } - return token; - } - function skipUntil(data, offset, what) { - var length = what.length, - dataLength = data.length; - var skipped = 0; - // finding byte sequence - while (offset < dataLength) { - var i = 0; - while (i < length && data[offset + i] === what[i]) { - ++i; - } - if (i >= length) { - break; // sequence found - } - offset++; - skipped++; + if (!this.entries[first + i]) { + this.entries[first + i] = entry; } - return skipped; } - var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/; - const endobjRegExp = /\bendobj[\b\s]$/; - const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s<])$/; - const CHECK_CONTENT_LENGTH = 25; - - var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]); - // prettier-ignore - var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114, - 101, 102]); - const objBytes = new Uint8Array([111, 98, 106]); - var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]); - - // Clear out any existing entries, since they may be bogus. - this.entries.length = 0; - - var stream = this.stream; - stream.pos = 0; - var buffer = stream.getBytes(); - var position = stream.start, - length = buffer.length; - var trailers = [], - xrefStms = []; - while (position < length) { - var ch = buffer[position]; - if (ch === TAB || ch === LF || ch === CR || ch === SPACE) { - ++position; - continue; + + streamState.entryNum = 0; + streamState.streamPos = stream.pos; + entryRanges.splice(0, 2); + } + } + + indexObjects() { + // Simple scan through the PDF content to find objects, + // trailers and XRef streams. + var TAB = 0x9, + LF = 0xa, + CR = 0xd, + SPACE = 0x20; + var PERCENT = 0x25, + LT = 0x3c; + + function readToken(data, offset) { + var token = "", + ch = data[offset]; + while (ch !== LF && ch !== CR && ch !== LT) { + if (++offset >= data.length) { + break; } - if (ch === PERCENT) { - // %-comment - do { - ++position; - if (position >= length) { - break; - } - ch = buffer[position]; - } while (ch !== LF && ch !== CR); - continue; + token += String.fromCharCode(ch); + ch = data[offset]; + } + return token; + } + function skipUntil(data, offset, what) { + var length = what.length, + dataLength = data.length; + var skipped = 0; + // finding byte sequence + while (offset < dataLength) { + var i = 0; + while (i < length && data[offset + i] === what[i]) { + ++i; } - var token = readToken(buffer, position); - var m; - if ( - token.startsWith("xref") && - (token.length === 4 || /\s/.test(token[4])) - ) { - position += skipUntil(buffer, position, trailerBytes); - trailers.push(position); - position += skipUntil(buffer, position, startxrefBytes); - } else if ((m = objRegExp.exec(token))) { - const num = m[1] | 0, - gen = m[2] | 0; - if (!this.entries[num] || this.entries[num].gen === gen) { - this.entries[num] = { - offset: position - stream.start, - gen, - uncompressed: true, - }; + if (i >= length) { + break; // sequence found + } + offset++; + skipped++; + } + return skipped; + } + var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/; + const endobjRegExp = /\bendobj[\b\s]$/; + const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s<])$/; + const CHECK_CONTENT_LENGTH = 25; + + var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]); + // prettier-ignore + var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114, + 101, 102]); + const objBytes = new Uint8Array([111, 98, 106]); + var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]); + + // Clear out any existing entries, since they may be bogus. + this.entries.length = 0; + + var stream = this.stream; + stream.pos = 0; + var buffer = stream.getBytes(); + var position = stream.start, + length = buffer.length; + var trailers = [], + xrefStms = []; + while (position < length) { + var ch = buffer[position]; + if (ch === TAB || ch === LF || ch === CR || ch === SPACE) { + ++position; + continue; + } + if (ch === PERCENT) { + // %-comment + do { + ++position; + if (position >= length) { + break; } - let contentLength, - startPos = position + token.length; + ch = buffer[position]; + } while (ch !== LF && ch !== CR); + continue; + } + var token = readToken(buffer, position); + var m; + if ( + token.startsWith("xref") && + (token.length === 4 || /\s/.test(token[4])) + ) { + position += skipUntil(buffer, position, trailerBytes); + trailers.push(position); + position += skipUntil(buffer, position, startxrefBytes); + } else if ((m = objRegExp.exec(token))) { + const num = m[1] | 0, + gen = m[2] | 0; + if (!this.entries[num] || this.entries[num].gen === gen) { + this.entries[num] = { + offset: position - stream.start, + gen, + uncompressed: true, + }; + } + let contentLength, + startPos = position + token.length; - // Find the next "obj" string, rather than "endobj", to ensure that - // we won't skip over a new 'obj' operator in corrupt files where - // 'endobj' operators are missing (fixes issue9105_reduced.pdf). - while (startPos < buffer.length) { - const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4; - contentLength = endPos - position; + // Find the next "obj" string, rather than "endobj", to ensure that + // we won't skip over a new 'obj' operator in corrupt files where + // 'endobj' operators are missing (fixes issue9105_reduced.pdf). + while (startPos < buffer.length) { + const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4; + contentLength = endPos - position; - const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, startPos); - const tokenStr = bytesToString(buffer.subarray(checkPos, endPos)); + const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, startPos); + const tokenStr = bytesToString(buffer.subarray(checkPos, endPos)); - // Check if the current object ends with an 'endobj' operator. - if (endobjRegExp.test(tokenStr)) { + // Check if the current object ends with an 'endobj' operator. + if (endobjRegExp.test(tokenStr)) { + break; + } else { + // Check if an "obj" occurrence is actually a new object, + // i.e. the current object is missing the 'endobj' operator. + const objToken = nestedObjRegExp.exec(tokenStr); + + if (objToken && objToken[1]) { + warn( + 'indexObjects: Found new "obj" inside of another "obj", ' + + 'caused by missing "endobj" -- trying to recover.' + ); + contentLength -= objToken[1].length; break; - } else { - // Check if an "obj" occurrence is actually a new object, - // i.e. the current object is missing the 'endobj' operator. - const objToken = nestedObjRegExp.exec(tokenStr); - - if (objToken && objToken[1]) { - warn( - 'indexObjects: Found new "obj" inside of another "obj", ' + - 'caused by missing "endobj" -- trying to recover.' - ); - contentLength -= objToken[1].length; - break; - } } - startPos = endPos; - } - const content = buffer.subarray(position, position + contentLength); - - // checking XRef stream suspect - // (it shall have '/XRef' and next char is not a letter) - var xrefTagOffset = skipUntil(content, 0, xrefBytes); - if ( - xrefTagOffset < contentLength && - content[xrefTagOffset + 5] < 64 - ) { - xrefStms.push(position - stream.start); - this.xrefstms[position - stream.start] = 1; // Avoid recursion } - - position += contentLength; - } else if ( - token.startsWith("trailer") && - (token.length === 7 || /\s/.test(token[7])) - ) { - trailers.push(position); - position += skipUntil(buffer, position, startxrefBytes); - } else { - position += token.length + 1; + startPos = endPos; + } + const content = buffer.subarray(position, position + contentLength); + + // checking XRef stream suspect + // (it shall have '/XRef' and next char is not a letter) + var xrefTagOffset = skipUntil(content, 0, xrefBytes); + if (xrefTagOffset < contentLength && content[xrefTagOffset + 5] < 64) { + xrefStms.push(position - stream.start); + this.xrefstms[position - stream.start] = 1; // Avoid recursion } + + position += contentLength; + } else if ( + token.startsWith("trailer") && + (token.length === 7 || /\s/.test(token[7])) + ) { + trailers.push(position); + position += skipUntil(buffer, position, startxrefBytes); + } else { + position += token.length + 1; + } + } + // reading XRef streams + for (let i = 0, ii = xrefStms.length; i < ii; ++i) { + this.startXRefQueue.push(xrefStms[i]); + this.readXRef(/* recoveryMode */ true); + } + // finding main trailer + let trailerDict; + for (let i = 0, ii = trailers.length; i < ii; ++i) { + stream.pos = trailers[i]; + const parser = new Parser({ + lexer: new Lexer(stream), + xref: this, + allowStreams: true, + recoveryMode: true, + }); + var obj = parser.getObj(); + if (!isCmd(obj, "trailer")) { + continue; } - // reading XRef streams - for (let i = 0, ii = xrefStms.length; i < ii; ++i) { - this.startXRefQueue.push(xrefStms[i]); - this.readXRef(/* recoveryMode */ true); + // read the trailer dictionary + const dict = parser.getObj(); + if (!isDict(dict)) { + continue; } - // finding main trailer - let trailerDict; - for (let i = 0, ii = trailers.length; i < ii; ++i) { - stream.pos = trailers[i]; - const parser = new Parser({ - lexer: new Lexer(stream), - xref: this, - allowStreams: true, - recoveryMode: true, - }); - var obj = parser.getObj(); - if (!isCmd(obj, "trailer")) { + // Do some basic validation of the trailer/root dictionary candidate. + try { + const rootDict = dict.get("Root"); + if (!(rootDict instanceof Dict)) { continue; } - // read the trailer dictionary - const dict = parser.getObj(); - if (!isDict(dict)) { + const pagesDict = rootDict.get("Pages"); + if (!(pagesDict instanceof Dict)) { continue; } - // Do some basic validation of the trailer/root dictionary candidate. - try { - const rootDict = dict.get("Root"); - if (!(rootDict instanceof Dict)) { - continue; - } - const pagesDict = rootDict.get("Pages"); - if (!(pagesDict instanceof Dict)) { - continue; - } - const pagesCount = pagesDict.get("Count"); - if (!Number.isInteger(pagesCount)) { - continue; - } - // The top-level /Pages dictionary isn't obviously corrupt. - } catch (ex) { + const pagesCount = pagesDict.get("Count"); + if (!Number.isInteger(pagesCount)) { continue; } - // taking the first one with 'ID' - if (dict.has("ID")) { - return dict; - } - // The current dictionary is a candidate, but continue searching. - trailerDict = dict; - } - // No trailer with 'ID', taking last one (if exists). - if (trailerDict) { - return trailerDict; - } - // nothing helps - throw new InvalidPDFException("Invalid PDF structure."); - }, + // The top-level /Pages dictionary isn't obviously corrupt. + } catch (ex) { + continue; + } + // taking the first one with 'ID' + if (dict.has("ID")) { + return dict; + } + // The current dictionary is a candidate, but continue searching. + trailerDict = dict; + } + // No trailer with 'ID', taking last one (if exists). + if (trailerDict) { + return trailerDict; + } + // nothing helps + throw new InvalidPDFException("Invalid PDF structure."); + } - readXRef: function XRef_readXRef(recoveryMode) { - var stream = this.stream; - // Keep track of already parsed XRef tables, to prevent an infinite loop - // when parsing corrupt PDF files where e.g. the /Prev entries create a - // circular dependency between tables (fixes bug1393476.pdf). - const startXRefParsedCache = new Set(); + readXRef(recoveryMode = false) { + var stream = this.stream; + // Keep track of already parsed XRef tables, to prevent an infinite loop + // when parsing corrupt PDF files where e.g. the /Prev entries create a + // circular dependency between tables (fixes bug1393476.pdf). + const startXRefParsedCache = new Set(); - try { - while (this.startXRefQueue.length) { - var startXRef = this.startXRefQueue[0]; + try { + while (this.startXRefQueue.length) { + var startXRef = this.startXRefQueue[0]; - if (startXRefParsedCache.has(startXRef)) { - warn("readXRef - skipping XRef table since it was already parsed."); - this.startXRefQueue.shift(); - continue; - } - startXRefParsedCache.add(startXRef); - - stream.pos = startXRef + stream.start; - - const parser = new Parser({ - lexer: new Lexer(stream), - xref: this, - allowStreams: true, - }); - var obj = parser.getObj(); - var dict; - - // Get dictionary - if (isCmd(obj, "xref")) { - // Parse end-of-file XRef - dict = this.processXRefTable(parser); - if (!this.topDict) { - this.topDict = dict; - } + if (startXRefParsedCache.has(startXRef)) { + warn("readXRef - skipping XRef table since it was already parsed."); + this.startXRefQueue.shift(); + continue; + } + startXRefParsedCache.add(startXRef); - // Recursively get other XRefs 'XRefStm', if any - obj = dict.get("XRefStm"); - if (Number.isInteger(obj)) { - var pos = obj; - // ignore previously loaded xref streams - // (possible infinite recursion) - if (!(pos in this.xrefstms)) { - this.xrefstms[pos] = 1; - this.startXRefQueue.push(pos); - } - } - } else if (Number.isInteger(obj)) { - // Parse in-stream XRef - if ( - !Number.isInteger(parser.getObj()) || - !isCmd(parser.getObj(), "obj") || - !isStream((obj = parser.getObj())) - ) { - throw new FormatError("Invalid XRef stream"); - } - dict = this.processXRefStream(obj); - if (!this.topDict) { - this.topDict = dict; - } - if (!dict) { - throw new FormatError("Failed to read XRef stream"); - } - } else { - throw new FormatError("Invalid XRef stream header"); + stream.pos = startXRef + stream.start; + + const parser = new Parser({ + lexer: new Lexer(stream), + xref: this, + allowStreams: true, + }); + var obj = parser.getObj(); + var dict; + + // Get dictionary + if (isCmd(obj, "xref")) { + // Parse end-of-file XRef + dict = this.processXRefTable(parser); + if (!this.topDict) { + this.topDict = dict; } - // Recursively get previous dictionary, if any - obj = dict.get("Prev"); + // Recursively get other XRefs 'XRefStm', if any + obj = dict.get("XRefStm"); if (Number.isInteger(obj)) { - this.startXRefQueue.push(obj); - } else if (isRef(obj)) { - // The spec says Prev must not be a reference, i.e. "/Prev NNN" - // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R" - this.startXRefQueue.push(obj.num); + var pos = obj; + // ignore previously loaded xref streams + // (possible infinite recursion) + if (!(pos in this.xrefstms)) { + this.xrefstms[pos] = 1; + this.startXRefQueue.push(pos); + } } - - this.startXRefQueue.shift(); + } else if (Number.isInteger(obj)) { + // Parse in-stream XRef + if ( + !Number.isInteger(parser.getObj()) || + !isCmd(parser.getObj(), "obj") || + !isStream((obj = parser.getObj())) + ) { + throw new FormatError("Invalid XRef stream"); + } + dict = this.processXRefStream(obj); + if (!this.topDict) { + this.topDict = dict; + } + if (!dict) { + throw new FormatError("Failed to read XRef stream"); + } + } else { + throw new FormatError("Invalid XRef stream header"); } - return this.topDict; - } catch (e) { - if (e instanceof MissingDataException) { - throw e; + // Recursively get previous dictionary, if any + obj = dict.get("Prev"); + if (Number.isInteger(obj)) { + this.startXRefQueue.push(obj); + } else if (isRef(obj)) { + // The spec says Prev must not be a reference, i.e. "/Prev NNN" + // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R" + this.startXRefQueue.push(obj.num); } - info("(while reading XRef): " + e); - } - if (recoveryMode) { - return undefined; + this.startXRefQueue.shift(); } - throw new XRefParseException(); - }, - getEntry: function XRef_getEntry(i) { - var xrefEntry = this.entries[i]; - if (xrefEntry && !xrefEntry.free && xrefEntry.offset) { - return xrefEntry; + return this.topDict; + } catch (e) { + if (e instanceof MissingDataException) { + throw e; } - return null; - }, + info("(while reading XRef): " + e); + } - fetchIfRef: function XRef_fetchIfRef(obj, suppressEncryption) { - if (obj instanceof Ref) { - return this.fetch(obj, suppressEncryption); - } - return obj; - }, + if (recoveryMode) { + return undefined; + } + throw new XRefParseException(); + } - fetch: function XRef_fetch(ref, suppressEncryption) { - if (!(ref instanceof Ref)) { - throw new Error("ref object is not a reference"); - } - const num = ref.num; - - // The XRef cache is populated with objects which are obtained through - // `Parser.getObj`, and indirectly via `Lexer.getObj`. Neither of these - // methods should ever return `undefined` (note the `assert` calls below). - const cacheEntry = this._cacheMap.get(num); - if (cacheEntry !== undefined) { - // In documents with Object Streams, it's possible that cached `Dict`s - // have not been assigned an `objId` yet (see e.g. issue3115r.pdf). - if (cacheEntry instanceof Dict && !cacheEntry.objId) { - cacheEntry.objId = ref.toString(); - } - return cacheEntry; - } - let xrefEntry = this.getEntry(num); + getEntry(i) { + var xrefEntry = this.entries[i]; + if (xrefEntry && !xrefEntry.free && xrefEntry.offset) { + return xrefEntry; + } + return null; + } - if (xrefEntry === null) { - // The referenced entry can be free. - this._cacheMap.set(num, xrefEntry); - return xrefEntry; - } + fetchIfRef(obj, suppressEncryption = false) { + if (obj instanceof Ref) { + return this.fetch(obj, suppressEncryption); + } + return obj; + } - if (xrefEntry.uncompressed) { - xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); - } else { - xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + fetch(ref, suppressEncryption = false) { + if (!(ref instanceof Ref)) { + throw new Error("ref object is not a reference"); + } + const num = ref.num; + + // The XRef cache is populated with objects which are obtained through + // `Parser.getObj`, and indirectly via `Lexer.getObj`. Neither of these + // methods should ever return `undefined` (note the `assert` calls below). + const cacheEntry = this._cacheMap.get(num); + if (cacheEntry !== undefined) { + // In documents with Object Streams, it's possible that cached `Dict`s + // have not been assigned an `objId` yet (see e.g. issue3115r.pdf). + if (cacheEntry instanceof Dict && !cacheEntry.objId) { + cacheEntry.objId = ref.toString(); + } + return cacheEntry; + } + let xrefEntry = this.getEntry(num); + + if (xrefEntry === null) { + // The referenced entry can be free. + this._cacheMap.set(num, xrefEntry); + return xrefEntry; + } + + if (xrefEntry.uncompressed) { + xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); + } else { + xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + } + if (isDict(xrefEntry)) { + xrefEntry.objId = ref.toString(); + } else if (isStream(xrefEntry)) { + xrefEntry.dict.objId = ref.toString(); + } + return xrefEntry; + } + + fetchUncompressed(ref, xrefEntry, suppressEncryption = false) { + var gen = ref.gen; + var num = ref.num; + if (xrefEntry.gen !== gen) { + throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`); + } + var stream = this.stream.makeSubStream( + xrefEntry.offset + this.stream.start + ); + const parser = new Parser({ + lexer: new Lexer(stream), + xref: this, + allowStreams: true, + }); + var obj1 = parser.getObj(); + var obj2 = parser.getObj(); + var obj3 = parser.getObj(); + + if (obj1 !== num || obj2 !== gen || !(obj3 instanceof Cmd)) { + throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); + } + if (obj3.cmd !== "obj") { + // some bad PDFs use "obj1234" and really mean 1234 + if (obj3.cmd.startsWith("obj")) { + num = parseInt(obj3.cmd.substring(3), 10); + if (!Number.isNaN(num)) { + return num; + } } - if (isDict(xrefEntry)) { - xrefEntry.objId = ref.toString(); - } else if (isStream(xrefEntry)) { - xrefEntry.dict.objId = ref.toString(); + throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); + } + if (this.encrypt && !suppressEncryption) { + xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen)); + } else { + xrefEntry = parser.getObj(); + } + if (!isStream(xrefEntry)) { + if ( + typeof PDFJSDev === "undefined" || + PDFJSDev.test("!PRODUCTION || TESTING") + ) { + assert( + xrefEntry !== undefined, + 'fetchUncompressed: The "xrefEntry" cannot be undefined.' + ); } - return xrefEntry; - }, + this._cacheMap.set(num, xrefEntry); + } + return xrefEntry; + } - fetchUncompressed(ref, xrefEntry, suppressEncryption = false) { - var gen = ref.gen; - var num = ref.num; - if (xrefEntry.gen !== gen) { - throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`); + fetchCompressed(ref, xrefEntry, suppressEncryption = false) { + const tableOffset = xrefEntry.offset; + const stream = this.fetch(Ref.get(tableOffset, 0)); + if (!isStream(stream)) { + throw new FormatError("bad ObjStm stream"); + } + const first = stream.dict.get("First"); + const n = stream.dict.get("N"); + if (!Number.isInteger(first) || !Number.isInteger(n)) { + throw new FormatError("invalid first and n parameters for ObjStm stream"); + } + let parser = new Parser({ + lexer: new Lexer(stream), + xref: this, + allowStreams: true, + }); + const nums = new Array(n); + const offsets = new Array(n); + // read the object numbers to populate cache + for (let i = 0; i < n; ++i) { + const num = parser.getObj(); + if (!Number.isInteger(num)) { + throw new FormatError( + `invalid object number in the ObjStm stream: ${num}` + ); } - var stream = this.stream.makeSubStream( - xrefEntry.offset + this.stream.start - ); - const parser = new Parser({ - lexer: new Lexer(stream), + const offset = parser.getObj(); + if (!Number.isInteger(offset)) { + throw new FormatError( + `invalid object offset in the ObjStm stream: ${offset}` + ); + } + nums[i] = num; + offsets[i] = offset; + } + + const start = (stream.start || 0) + first; + const entries = new Array(n); + // read stream objects for cache + for (let i = 0; i < n; ++i) { + const length = i < n - 1 ? offsets[i + 1] - offsets[i] : undefined; + if (length < 0) { + throw new FormatError("Invalid offset in the ObjStm stream."); + } + parser = new Parser({ + lexer: new Lexer( + stream.makeSubStream(start + offsets[i], length, stream.dict) + ), xref: this, allowStreams: true, }); - var obj1 = parser.getObj(); - var obj2 = parser.getObj(); - var obj3 = parser.getObj(); - if (obj1 !== num || obj2 !== gen || !(obj3 instanceof Cmd)) { - throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); - } - if (obj3.cmd !== "obj") { - // some bad PDFs use "obj1234" and really mean 1234 - if (obj3.cmd.startsWith("obj")) { - num = parseInt(obj3.cmd.substring(3), 10); - if (!Number.isNaN(num)) { - return num; - } - } - throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); + const obj = parser.getObj(); + entries[i] = obj; + if (isStream(obj)) { + continue; } - if (this.encrypt && !suppressEncryption) { - xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen)); - } else { - xrefEntry = parser.getObj(); - } - if (!isStream(xrefEntry)) { + const num = nums[i], + entry = this.entries[num]; + if (entry && entry.offset === tableOffset && entry.gen === i) { if ( typeof PDFJSDev === "undefined" || PDFJSDev.test("!PRODUCTION || TESTING") ) { assert( - xrefEntry !== undefined, - 'fetchUncompressed: The "xrefEntry" cannot be undefined.' + obj !== undefined, + 'fetchCompressed: The "obj" cannot be undefined.' ); } - this._cacheMap.set(num, xrefEntry); - } - return xrefEntry; - }, - - fetchCompressed(ref, xrefEntry, suppressEncryption = false) { - const tableOffset = xrefEntry.offset; - const stream = this.fetch(Ref.get(tableOffset, 0)); - if (!isStream(stream)) { - throw new FormatError("bad ObjStm stream"); - } - const first = stream.dict.get("First"); - const n = stream.dict.get("N"); - if (!Number.isInteger(first) || !Number.isInteger(n)) { - throw new FormatError( - "invalid first and n parameters for ObjStm stream" - ); - } - let parser = new Parser({ - lexer: new Lexer(stream), - xref: this, - allowStreams: true, - }); - const nums = new Array(n); - const offsets = new Array(n); - // read the object numbers to populate cache - for (let i = 0; i < n; ++i) { - const num = parser.getObj(); - if (!Number.isInteger(num)) { - throw new FormatError( - `invalid object number in the ObjStm stream: ${num}` - ); - } - const offset = parser.getObj(); - if (!Number.isInteger(offset)) { - throw new FormatError( - `invalid object offset in the ObjStm stream: ${offset}` - ); - } - nums[i] = num; - offsets[i] = offset; - } - - const start = (stream.start || 0) + first; - const entries = new Array(n); - // read stream objects for cache - for (let i = 0; i < n; ++i) { - const length = i < n - 1 ? offsets[i + 1] - offsets[i] : undefined; - if (length < 0) { - throw new FormatError("Invalid offset in the ObjStm stream."); - } - parser = new Parser({ - lexer: new Lexer( - stream.makeSubStream(start + offsets[i], length, stream.dict) - ), - xref: this, - allowStreams: true, - }); - - const obj = parser.getObj(); - entries[i] = obj; - if (isStream(obj)) { - continue; - } - const num = nums[i], - entry = this.entries[num]; - if (entry && entry.offset === tableOffset && entry.gen === i) { - if ( - typeof PDFJSDev === "undefined" || - PDFJSDev.test("!PRODUCTION || TESTING") - ) { - assert( - obj !== undefined, - 'fetchCompressed: The "obj" cannot be undefined.' - ); - } - this._cacheMap.set(num, obj); - } - } - xrefEntry = entries[xrefEntry.gen]; - if (xrefEntry === undefined) { - throw new XRefEntryException(`Bad (compressed) XRef entry: ${ref}`); - } - return xrefEntry; - }, + this._cacheMap.set(num, obj); + } + } + xrefEntry = entries[xrefEntry.gen]; + if (xrefEntry === undefined) { + throw new XRefEntryException(`Bad (compressed) XRef entry: ${ref}`); + } + return xrefEntry; + } - async fetchIfRefAsync(obj, suppressEncryption) { - if (obj instanceof Ref) { - return this.fetchAsync(obj, suppressEncryption); - } - return obj; - }, + async fetchIfRefAsync(obj, suppressEncryption) { + if (obj instanceof Ref) { + return this.fetchAsync(obj, suppressEncryption); + } + return obj; + } - async fetchAsync(ref, suppressEncryption) { - try { - return this.fetch(ref, suppressEncryption); - } catch (ex) { - if (!(ex instanceof MissingDataException)) { - throw ex; - } - await this.pdfManager.requestRange(ex.begin, ex.end); - return this.fetchAsync(ref, suppressEncryption); + async fetchAsync(ref, suppressEncryption) { + try { + return this.fetch(ref, suppressEncryption); + } catch (ex) { + if (!(ex instanceof MissingDataException)) { + throw ex; } - }, - - getCatalogObj: function XRef_getCatalogObj() { - return this.root; - }, - }; + await this.pdfManager.requestRange(ex.begin, ex.end); + return this.fetchAsync(ref, suppressEncryption); + } + } - return XRef; -})(); + getCatalogObj() { + return this.root; + } +} export { XRef }; From 088a55f80d5a3ac52eec0af6f546034ebcaa7b97 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:26:18 +0200 Subject: [PATCH 09/10] Enable the `no-var` rule in the `src/core/xref.js` file --- src/core/xref.js | 117 +++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 59 deletions(-) diff --git a/src/core/xref.js b/src/core/xref.js index aac2c368a8c1f..4aa121624b3e1 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -12,7 +12,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* eslint-disable no-var */ import { assert, @@ -71,7 +70,7 @@ class XRef { } parse(recoveryMode = false) { - var trailerDict; + let trailerDict; if (!recoveryMode) { trailerDict = this.readXRef(); } else { @@ -91,8 +90,8 @@ class XRef { warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`); } if (isDict(encrypt)) { - var ids = trailerDict.get("ID"); - var fileId = ids && ids.length ? ids[0] : ""; + const ids = trailerDict.get("ID"); + const fileId = ids && ids.length ? ids[0] : ""; // The 'Encrypt' dictionary itself should not be encrypted, and by // setting `suppressEncryption` we can prevent an infinite loop inside // of `XRef_fetchUncompressed` if the dictionary contains indirect @@ -137,7 +136,7 @@ class XRef { }; } - var obj = this.readXRefTable(parser); + const obj = this.readXRefTable(parser); // Sanity check if (!isCmd(obj, "trailer")) { @@ -154,7 +153,7 @@ class XRef { // >> // The parser goes through the entire stream << ... >> and provides // a getter interface for the key-value table - var dict = parser.getObj(); + let dict = parser.getObj(); // The pdflib PDF generator can generate a nested trailer dictionary if (!isDict(dict) && dict.dict) { @@ -181,14 +180,14 @@ class XRef { // trailer // ... - var stream = parser.lexer.stream; - var tableState = this.tableState; + const stream = parser.lexer.stream; + const tableState = this.tableState; stream.pos = tableState.streamPos; parser.buf1 = tableState.parserBuf1; parser.buf2 = tableState.parserBuf2; // Outer loop is over subsection headers - var obj; + let obj; while (true) { if (!("firstEntryNum" in tableState) || !("entryCount" in tableState)) { @@ -199,24 +198,24 @@ class XRef { tableState.entryCount = parser.getObj(); } - var first = tableState.firstEntryNum; - var count = tableState.entryCount; + let first = tableState.firstEntryNum; + const count = tableState.entryCount; if (!Number.isInteger(first) || !Number.isInteger(count)) { throw new FormatError( "Invalid XRef table: wrong types in subsection header" ); } // Inner loop is over objects themselves - for (var i = tableState.entryNum; i < count; i++) { + for (let i = tableState.entryNum; i < count; i++) { tableState.streamPos = stream.pos; tableState.entryNum = i; tableState.parserBuf1 = parser.buf1; tableState.parserBuf2 = parser.buf2; - var entry = {}; + const entry = {}; entry.offset = parser.getObj(); entry.gen = parser.getObj(); - var type = parser.getObj(); + const type = parser.getObj(); if (type instanceof Cmd) { switch (type.cmd) { @@ -270,9 +269,9 @@ class XRef { if (!("streamState" in this)) { // Stores state of the stream as we process it so we can resume // from middle of stream in case of missing data error - var streamParameters = stream.dict; - var byteWidths = streamParameters.get("W"); - var range = streamParameters.get("Index"); + const streamParameters = stream.dict; + const byteWidths = streamParameters.get("W"); + let range = streamParameters.get("Index"); if (!range) { range = [0, streamParameters.get("Size")]; } @@ -291,19 +290,19 @@ class XRef { } readXRefStream(stream) { - var i, j; - var streamState = this.streamState; + let i, j; + const streamState = this.streamState; stream.pos = streamState.streamPos; - var byteWidths = streamState.byteWidths; - var typeFieldWidth = byteWidths[0]; - var offsetFieldWidth = byteWidths[1]; - var generationFieldWidth = byteWidths[2]; + const byteWidths = streamState.byteWidths; + const typeFieldWidth = byteWidths[0]; + const offsetFieldWidth = byteWidths[1]; + const generationFieldWidth = byteWidths[2]; - var entryRanges = streamState.entryRanges; + const entryRanges = streamState.entryRanges; while (entryRanges.length > 0) { - var first = entryRanges[0]; - var n = entryRanges[1]; + const first = entryRanges[0]; + const n = entryRanges[1]; if (!Number.isInteger(first) || !Number.isInteger(n)) { throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`); @@ -321,7 +320,7 @@ class XRef { streamState.entryNum = i; streamState.streamPos = stream.pos; - var type = 0, + let type = 0, offset = 0, generation = 0; for (j = 0; j < typeFieldWidth; ++j) { @@ -337,7 +336,7 @@ class XRef { for (j = 0; j < generationFieldWidth; ++j) { generation = (generation << 8) | stream.getByte(); } - var entry = {}; + const entry = {}; entry.offset = offset; entry.gen = generation; switch (type) { @@ -366,15 +365,15 @@ class XRef { indexObjects() { // Simple scan through the PDF content to find objects, // trailers and XRef streams. - var TAB = 0x9, + const TAB = 0x9, LF = 0xa, CR = 0xd, SPACE = 0x20; - var PERCENT = 0x25, + const PERCENT = 0x25, LT = 0x3c; function readToken(data, offset) { - var token = "", + let token = "", ch = data[offset]; while (ch !== LF && ch !== CR && ch !== LT) { if (++offset >= data.length) { @@ -386,12 +385,12 @@ class XRef { return token; } function skipUntil(data, offset, what) { - var length = what.length, + const length = what.length, dataLength = data.length; - var skipped = 0; + let skipped = 0; // finding byte sequence while (offset < dataLength) { - var i = 0; + let i = 0; while (i < length && data[offset + i] === what[i]) { ++i; } @@ -403,30 +402,30 @@ class XRef { } return skipped; } - var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/; + const objRegExp = /^(\d+)\s+(\d+)\s+obj\b/; const endobjRegExp = /\bendobj[\b\s]$/; const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s<])$/; const CHECK_CONTENT_LENGTH = 25; - var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]); + const trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]); // prettier-ignore - var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114, + const startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114, 101, 102]); const objBytes = new Uint8Array([111, 98, 106]); - var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]); + const xrefBytes = new Uint8Array([47, 88, 82, 101, 102]); // Clear out any existing entries, since they may be bogus. this.entries.length = 0; - var stream = this.stream; + const stream = this.stream; stream.pos = 0; - var buffer = stream.getBytes(); - var position = stream.start, + const buffer = stream.getBytes(), length = buffer.length; - var trailers = [], + let position = stream.start; + const trailers = [], xrefStms = []; while (position < length) { - var ch = buffer[position]; + let ch = buffer[position]; if (ch === TAB || ch === LF || ch === CR || ch === SPACE) { ++position; continue; @@ -442,8 +441,8 @@ class XRef { } while (ch !== LF && ch !== CR); continue; } - var token = readToken(buffer, position); - var m; + const token = readToken(buffer, position); + let m; if ( token.startsWith("xref") && (token.length === 4 || /\s/.test(token[4])) @@ -497,7 +496,7 @@ class XRef { // checking XRef stream suspect // (it shall have '/XRef' and next char is not a letter) - var xrefTagOffset = skipUntil(content, 0, xrefBytes); + const xrefTagOffset = skipUntil(content, 0, xrefBytes); if (xrefTagOffset < contentLength && content[xrefTagOffset + 5] < 64) { xrefStms.push(position - stream.start); this.xrefstms[position - stream.start] = 1; // Avoid recursion @@ -529,7 +528,7 @@ class XRef { allowStreams: true, recoveryMode: true, }); - var obj = parser.getObj(); + const obj = parser.getObj(); if (!isCmd(obj, "trailer")) { continue; } @@ -572,7 +571,7 @@ class XRef { } readXRef(recoveryMode = false) { - var stream = this.stream; + const stream = this.stream; // Keep track of already parsed XRef tables, to prevent an infinite loop // when parsing corrupt PDF files where e.g. the /Prev entries create a // circular dependency between tables (fixes bug1393476.pdf). @@ -580,7 +579,7 @@ class XRef { try { while (this.startXRefQueue.length) { - var startXRef = this.startXRefQueue[0]; + const startXRef = this.startXRefQueue[0]; if (startXRefParsedCache.has(startXRef)) { warn("readXRef - skipping XRef table since it was already parsed."); @@ -596,8 +595,8 @@ class XRef { xref: this, allowStreams: true, }); - var obj = parser.getObj(); - var dict; + let obj = parser.getObj(); + let dict; // Get dictionary if (isCmd(obj, "xref")) { @@ -610,7 +609,7 @@ class XRef { // Recursively get other XRefs 'XRefStm', if any obj = dict.get("XRefStm"); if (Number.isInteger(obj)) { - var pos = obj; + const pos = obj; // ignore previously loaded xref streams // (possible infinite recursion) if (!(pos in this.xrefstms)) { @@ -666,7 +665,7 @@ class XRef { } getEntry(i) { - var xrefEntry = this.entries[i]; + const xrefEntry = this.entries[i]; if (xrefEntry && !xrefEntry.free && xrefEntry.offset) { return xrefEntry; } @@ -720,12 +719,12 @@ class XRef { } fetchUncompressed(ref, xrefEntry, suppressEncryption = false) { - var gen = ref.gen; - var num = ref.num; + const gen = ref.gen; + let num = ref.num; if (xrefEntry.gen !== gen) { throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`); } - var stream = this.stream.makeSubStream( + const stream = this.stream.makeSubStream( xrefEntry.offset + this.stream.start ); const parser = new Parser({ @@ -733,9 +732,9 @@ class XRef { xref: this, allowStreams: true, }); - var obj1 = parser.getObj(); - var obj2 = parser.getObj(); - var obj3 = parser.getObj(); + const obj1 = parser.getObj(); + const obj2 = parser.getObj(); + const obj3 = parser.getObj(); if (obj1 !== num || obj2 !== gen || !(obj3 instanceof Cmd)) { throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); From 1d6d476cab0af6c067159d84ba0d0f29e2667d9f Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Apr 2021 18:26:23 +0200 Subject: [PATCH 10/10] Rename the `src/core/obj.js` file to `src/core/catalog.js` Now that only the `Catalog` remains in this file, after the previous patches, it makes sense to rename the file to reduce confusion. --- src/core/annotation.js | 2 +- src/core/{obj.js => catalog.js} | 0 src/core/document.js | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/core/{obj.js => catalog.js} (100%) diff --git a/src/core/annotation.js b/src/core/annotation.js index c30fdaebada41..4b8b95d183a93 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -48,7 +48,7 @@ import { Name, RefSet, } from "./primitives.js"; -import { Catalog } from "./obj.js"; +import { Catalog } from "./catalog.js"; import { ColorSpace } from "./colorspace.js"; import { FileSpec } from "./file_spec.js"; import { ObjectLoader } from "./object_loader.js"; diff --git a/src/core/obj.js b/src/core/catalog.js similarity index 100% rename from src/core/obj.js rename to src/core/catalog.js diff --git a/src/core/document.js b/src/core/document.js index 27b9dd6ac0d55..923bf29d58ccf 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -54,7 +54,7 @@ import { import { NullStream, Stream, StreamsSequenceStream } from "./stream.js"; import { AnnotationFactory } from "./annotation.js"; import { calculateMD5 } from "./crypto.js"; -import { Catalog } from "./obj.js"; +import { Catalog } from "./catalog.js"; import { Linearization } from "./parser.js"; import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js";