From aec236f6d899d62294ac26f764d5916b3f0c3d43 Mon Sep 17 00:00:00 2001 From: Tim van der Meij Date: Sun, 19 Aug 2018 18:19:25 +0200 Subject: [PATCH 1/2] Convert the `Catalog` class, in `src/core/obj.js`, to ES6 syntax --- src/core/obj.js | 1064 ++++++++++++++++++++++++----------------------- 1 file changed, 542 insertions(+), 522 deletions(-) diff --git a/src/core/obj.js b/src/core/obj.js index 82035a5104a9f..5d74a250d8860 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -32,13 +32,14 @@ function fetchDestination(dest) { return isDict(dest) ? dest.get('D') : dest; } -var Catalog = (function CatalogClosure() { - function Catalog(pdfManager, xref) { +class Catalog { + constructor(pdfManager, xref) { this.pdfManager = pdfManager; this.xref = xref; + this.catDict = xref.getCatalogObj(); if (!isDict(this.catDict)) { - throw new FormatError('catalog object is not a dictionary'); + throw new FormatError('Catalog object is not a dictionary.'); } this.fontCache = new RefSetCache(); @@ -46,564 +47,582 @@ var Catalog = (function CatalogClosure() { this.pageKidsCountCache = new RefSetCache(); } - Catalog.prototype = { - get metadata() { - var streamRef = this.catDict.getRaw('Metadata'); - if (!isRef(streamRef)) { - return shadow(this, 'metadata', null); - } + get metadata() { + const streamRef = this.catDict.getRaw('Metadata'); + if (!isRef(streamRef)) { + return shadow(this, 'metadata', null); + } - var encryptMetadata = (!this.xref.encrypt ? false : + const encryptMetadata = (!this.xref.encrypt ? false : this.xref.encrypt.encryptMetadata); - - var stream = this.xref.fetch(streamRef, !encryptMetadata); - var metadata; - if (stream && isDict(stream.dict)) { - var type = stream.dict.get('Type'); - var subtype = stream.dict.get('Subtype'); - - if (isName(type, 'Metadata') && isName(subtype, 'XML')) { - // XXX: This should examine the charset the XML document defines, - // however since there are currently no real means to decode - // arbitrary charsets, let's just hope that the author of the PDF - // was reasonable enough to stick with the XML default charset, - // which is UTF-8. - try { - metadata = stringToUTF8String(bytesToString(stream.getBytes())); - } catch (e) { - if (e instanceof MissingDataException) { - throw e; - } - info('Skipping invalid metadata.'); + const stream = this.xref.fetch(streamRef, !encryptMetadata); + let metadata; + + if (stream && isDict(stream.dict)) { + const type = stream.dict.get('Type'); + const subtype = stream.dict.get('Subtype'); + + if (isName(type, 'Metadata') && isName(subtype, 'XML')) { + // XXX: This should examine the charset the XML document defines, + // however since there are currently no real means to decode + // arbitrary charsets, let's just hope that the author of the PDF + // was reasonable enough to stick with the XML default charset, + // which is UTF-8. + try { + metadata = stringToUTF8String(bytesToString(stream.getBytes())); + } catch (e) { + if (e instanceof MissingDataException) { + throw e; } + info('Skipping invalid metadata.'); } } + } + return shadow(this, 'metadata', metadata); + } - return shadow(this, 'metadata', metadata); - }, - get toplevelPagesDict() { - var pagesObj = this.catDict.get('Pages'); - if (!isDict(pagesObj)) { - throw new FormatError('invalid top-level pages dictionary'); - } - // shadow the prototype getter - return shadow(this, 'toplevelPagesDict', pagesObj); - }, - get documentOutline() { - var obj = null; - try { - obj = this.readDocumentOutline(); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - warn('Unable to read document outline'); + get toplevelPagesDict() { + const pagesObj = this.catDict.get('Pages'); + if (!isDict(pagesObj)) { + throw new FormatError('Invalid top-level pages dictionary.'); + } + return shadow(this, 'toplevelPagesDict', pagesObj); + } + + get documentOutline() { + let obj = null; + try { + obj = this._readDocumentOutline(); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; } - return shadow(this, 'documentOutline', obj); - }, - readDocumentOutline: function Catalog_readDocumentOutline() { - var obj = this.catDict.get('Outlines'); - if (!isDict(obj)) { - return null; + warn('Unable to read document outline.'); + } + return shadow(this, 'documentOutline', obj); + } + + /** + * @private + */ + _readDocumentOutline() { + let obj = this.catDict.get('Outlines'); + if (!isDict(obj)) { + return null; + } + obj = obj.getRaw('First'); + if (!isRef(obj)) { + return null; + } + + const root = { items: [], }; + const queue = [{ obj, parent: root, }]; + // To avoid recursion, keep track of the already processed items. + const processed = new RefSet(); + processed.put(obj); + const xref = this.xref, blackColor = new Uint8ClampedArray(3); + + while (queue.length > 0) { + const i = queue.shift(); + const outlineDict = xref.fetchIfRef(i.obj); + if (outlineDict === null) { + continue; } - obj = obj.getRaw('First'); - if (!isRef(obj)) { - return null; + if (!outlineDict.has('Title')) { + throw new FormatError('Invalid outline item encountered.'); } - var root = { items: [], }; - var queue = [{ obj, parent: root, }]; - // To avoid recursion, keep track of the already processed items. - var processed = new RefSet(); - processed.put(obj); - var xref = this.xref, blackColor = new Uint8ClampedArray(3); - - while (queue.length > 0) { - var i = queue.shift(); - var outlineDict = xref.fetchIfRef(i.obj); - if (outlineDict === null) { - continue; - } - if (!outlineDict.has('Title')) { - throw new FormatError('Invalid outline item'); - } - var data = { url: null, dest: null, }; - Catalog.parseDestDictionary({ - destDict: outlineDict, - resultObj: data, - docBaseUrl: this.pdfManager.docBaseUrl, - }); - var title = outlineDict.get('Title'); - var flags = outlineDict.get('F') || 0; - - var color = outlineDict.getArray('C'), rgbColor = blackColor; - // We only need to parse the color when it's valid, and non-default. - if (Array.isArray(color) && color.length === 3 && - (color[0] !== 0 || color[1] !== 0 || color[2] !== 0)) { - rgbColor = ColorSpace.singletons.rgb.getRgb(color, 0); - } - var outlineItem = { - dest: data.dest, - url: data.url, - unsafeUrl: data.unsafeUrl, - newWindow: data.newWindow, - title: stringToPDFString(title), - color: rgbColor, - count: outlineDict.get('Count'), - bold: !!(flags & 2), - italic: !!(flags & 1), - items: [], - }; - i.parent.items.push(outlineItem); - obj = outlineDict.getRaw('First'); - if (isRef(obj) && !processed.has(obj)) { - queue.push({ obj, parent: outlineItem, }); - processed.put(obj); - } - obj = outlineDict.getRaw('Next'); - if (isRef(obj) && !processed.has(obj)) { - queue.push({ obj, parent: i.parent, }); - processed.put(obj); - } + const data = { url: null, dest: null, }; + Catalog.parseDestDictionary({ + destDict: outlineDict, + resultObj: data, + docBaseUrl: this.pdfManager.docBaseUrl, + }); + const title = outlineDict.get('Title'); + const flags = outlineDict.get('F') || 0; + const color = outlineDict.getArray('C'); + let rgbColor = blackColor; + + // We only need to parse the color when it's valid, and non-default. + if (Array.isArray(color) && color.length === 3 && + (color[0] !== 0 || color[1] !== 0 || color[2] !== 0)) { + rgbColor = ColorSpace.singletons.rgb.getRgb(color, 0); + } + + const outlineItem = { + dest: data.dest, + url: data.url, + unsafeUrl: data.unsafeUrl, + newWindow: data.newWindow, + title: stringToPDFString(title), + color: rgbColor, + count: outlineDict.get('Count'), + bold: !!(flags & 2), + italic: !!(flags & 1), + items: [], + }; + + i.parent.items.push(outlineItem); + obj = outlineDict.getRaw('First'); + if (isRef(obj) && !processed.has(obj)) { + queue.push({ obj, parent: outlineItem, }); + processed.put(obj); } - return (root.items.length > 0 ? root.items : null); - }, - get numPages() { - var obj = this.toplevelPagesDict.get('Count'); - if (!Number.isInteger(obj)) { - throw new FormatError( - 'page count in top level pages object is not an integer'); + obj = outlineDict.getRaw('Next'); + if (isRef(obj) && !processed.has(obj)) { + queue.push({ obj, parent: i.parent, }); + processed.put(obj); } - // shadow the prototype getter - return shadow(this, 'numPages', obj); - }, + } + return (root.items.length > 0 ? root.items : null); + } - get destinations() { - const obj = this._readDests(), dests = Object.create(null); - if (obj instanceof NameTree) { - const names = obj.getAll(); - for (let name in names) { - dests[name] = fetchDestination(names[name]); + get numPages() { + const obj = this.toplevelPagesDict.get('Count'); + if (!Number.isInteger(obj)) { + throw new FormatError( + 'Page count in top-level pages dictionary is not an integer.'); + } + return shadow(this, 'numPages', obj); + } + + get destinations() { + const obj = this._readDests(), dests = Object.create(null); + if (obj instanceof NameTree) { + const names = obj.getAll(); + for (let name in names) { + dests[name] = fetchDestination(names[name]); + } + } else if (obj instanceof Dict) { + obj.forEach(function(key, value) { + if (value) { + dests[key] = fetchDestination(value); } - } else if (obj instanceof Dict) { - obj.forEach(function(key, value) { - if (value) { - dests[key] = fetchDestination(value); - } - }); - } - return shadow(this, 'destinations', dests); - }, - getDestination(destinationId) { - const obj = this._readDests(); - if (obj instanceof NameTree || obj instanceof Dict) { - return fetchDestination(obj.get(destinationId) || null); + }); + } + return shadow(this, 'destinations', dests); + } + + getDestination(destinationId) { + const obj = this._readDests(); + if (obj instanceof NameTree || obj instanceof Dict) { + return fetchDestination(obj.get(destinationId) || null); + } + return null; + } + + /** + * @private + */ + _readDests() { + const obj = this.catDict.get('Names'); + if (obj && obj.has('Dests')) { + return new NameTree(obj.getRaw('Dests'), this.xref); + } else if (this.catDict.has('Dests')) { // Simple destination dictionary. + return this.catDict.get('Dests'); + } + } + + get pageLabels() { + let obj = null; + try { + obj = this._readPageLabels(); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; } + warn('Unable to read page labels.'); + } + return shadow(this, 'pageLabels', obj); + } + + /** + * @private + */ + _readPageLabels() { + const obj = this.catDict.getRaw('PageLabels'); + if (!obj) { return null; - }, - _readDests() { - const obj = this.catDict.get('Names'); - if (obj && obj.has('Dests')) { - return new NameTree(obj.getRaw('Dests'), this.xref); - } else if (this.catDict.has('Dests')) { // Simple destination dictionary. - return this.catDict.get('Dests'); - } - }, + } - get pageLabels() { - var obj = null; - try { - obj = this.readPageLabels(); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - warn('Unable to read page labels.'); - } - return shadow(this, 'pageLabels', obj); - }, - readPageLabels: function Catalog_readPageLabels() { - var obj = this.catDict.getRaw('PageLabels'); - if (!obj) { - return null; - } - var pageLabels = new Array(this.numPages); - var style = null; - var prefix = ''; + const pageLabels = new Array(this.numPages); + let style = null, prefix = ''; - var numberTree = new NumberTree(obj, this.xref); - var nums = numberTree.getAll(); - var currentLabel = '', currentIndex = 1; + const numberTree = new NumberTree(obj, this.xref); + const nums = numberTree.getAll(); + let currentLabel = '', currentIndex = 1; - for (var i = 0, ii = this.numPages; i < ii; i++) { - if (i in nums) { - const labelDict = nums[i]; - if (!isDict(labelDict)) { - throw new FormatError('The PageLabel is not a dictionary.'); - } + for (let i = 0, ii = this.numPages; i < ii; i++) { + if (i in nums) { + const labelDict = nums[i]; + if (!isDict(labelDict)) { + throw new FormatError('PageLabel is not a dictionary.'); + } - if (labelDict.has('Type') && - !isName(labelDict.get('Type'), 'PageLabel')) { - throw new FormatError('Invalid type in PageLabel dictionary.'); - } + if (labelDict.has('Type') && + !isName(labelDict.get('Type'), 'PageLabel')) { + throw new FormatError('Invalid type in PageLabel dictionary.'); + } - if (labelDict.has('S')) { - const s = labelDict.get('S'); - if (!isName(s)) { - throw new FormatError('Invalid style in PageLabel dictionary.'); - } - style = s.name; - } else { - style = null; + if (labelDict.has('S')) { + const s = labelDict.get('S'); + if (!isName(s)) { + throw new FormatError('Invalid style in PageLabel dictionary.'); } + style = s.name; + } else { + style = null; + } - if (labelDict.has('P')) { - const p = labelDict.get('P'); - if (!isString(p)) { - throw new FormatError('Invalid prefix in PageLabel dictionary.'); - } - prefix = stringToPDFString(p); - } else { - prefix = ''; + if (labelDict.has('P')) { + const p = labelDict.get('P'); + if (!isString(p)) { + throw new FormatError('Invalid prefix in PageLabel dictionary.'); } + prefix = stringToPDFString(p); + } else { + prefix = ''; + } - if (labelDict.has('St')) { - const st = labelDict.get('St'); - if (!(Number.isInteger(st) && st >= 1)) { - throw new FormatError('Invalid start in PageLabel dictionary.'); - } - currentIndex = st; - } else { - currentIndex = 1; + if (labelDict.has('St')) { + const st = labelDict.get('St'); + if (!(Number.isInteger(st) && st >= 1)) { + throw new FormatError('Invalid start in PageLabel dictionary.'); } + currentIndex = st; + } else { + currentIndex = 1; } + } - switch (style) { - case 'D': - currentLabel = currentIndex; - break; - case 'R': - case 'r': - currentLabel = toRomanNumerals(currentIndex, style === 'r'); - break; - case 'A': - case 'a': - var LIMIT = 26; // Use only the characters A--Z, or a--z. - var A_UPPER_CASE = 0x41, A_LOWER_CASE = 0x61; - - var baseCharCode = (style === 'a' ? A_LOWER_CASE : A_UPPER_CASE); - var letterIndex = currentIndex - 1; - var character = String.fromCharCode(baseCharCode + + switch (style) { + case 'D': + currentLabel = currentIndex; + break; + case 'R': + case 'r': + currentLabel = toRomanNumerals(currentIndex, style === 'r'); + break; + case 'A': + case 'a': + const LIMIT = 26; // Use only the characters A-Z, or a-z. + const A_UPPER_CASE = 0x41, A_LOWER_CASE = 0x61; + + const baseCharCode = (style === 'a' ? A_LOWER_CASE : A_UPPER_CASE); + const letterIndex = currentIndex - 1; + const character = String.fromCharCode(baseCharCode + (letterIndex % LIMIT)); - var charBuf = []; - for (var j = 0, jj = (letterIndex / LIMIT) | 0; j <= jj; j++) { - charBuf.push(character); - } - currentLabel = charBuf.join(''); - break; - default: - if (style) { - throw new FormatError( - `Invalid style "${style}" in PageLabel dictionary.`); - } - currentLabel = ''; - } + const charBuf = []; + for (let j = 0, jj = (letterIndex / LIMIT) | 0; j <= jj; j++) { + charBuf.push(character); + } + currentLabel = charBuf.join(''); + break; + default: + if (style) { + throw new FormatError( + `Invalid style "${style}" in PageLabel dictionary.`); + } + currentLabel = ''; + } + + pageLabels[i] = prefix + currentLabel; + currentIndex++; + } + return pageLabels; + } - pageLabels[i] = prefix + currentLabel; - currentIndex++; + get pageMode() { + const obj = this.catDict.get('PageMode'); + let pageMode = 'UseNone'; // Default value. + + if (isName(obj)) { + switch (obj.name) { + case 'UseNone': + case 'UseOutlines': + case 'UseThumbs': + case 'FullScreen': + case 'UseOC': + case 'UseAttachments': + pageMode = obj.name; } - return pageLabels; - }, + } + return shadow(this, 'pageMode', pageMode); + } + + get attachments() { + let attachments = null, nameTreeRef; + const obj = this.catDict.get('Names'); + if (obj) { + nameTreeRef = obj.getRaw('EmbeddedFiles'); + } - get pageMode() { - let obj = this.catDict.get('PageMode'); - let pageMode = 'UseNone'; // Default value. - - if (isName(obj)) { - switch (obj.name) { - case 'UseNone': - case 'UseOutlines': - case 'UseThumbs': - case 'FullScreen': - case 'UseOC': - case 'UseAttachments': - pageMode = obj.name; + if (nameTreeRef) { + const nameTree = new NameTree(nameTreeRef, this.xref); + const names = nameTree.getAll(); + for (const name in names) { + const fs = new FileSpec(names[name], this.xref); + if (!attachments) { + attachments = Object.create(null); } + attachments[stringToPDFString(name)] = fs.serializable; } - return shadow(this, 'pageMode', pageMode); - }, + } + return shadow(this, 'attachments', attachments); + } - get attachments() { - var xref = this.xref; - var attachments = null, nameTreeRef; - var obj = this.catDict.get('Names'); - if (obj) { - nameTreeRef = obj.getRaw('EmbeddedFiles'); - } - - if (nameTreeRef) { - var nameTree = new NameTree(nameTreeRef, xref); - var names = nameTree.getAll(); - for (var name in names) { - var fs = new FileSpec(names[name], xref); - if (!attachments) { - attachments = Object.create(null); - } - attachments[stringToPDFString(name)] = fs.serializable; - } + get javaScript() { + const obj = this.catDict.get('Names'); + + let javaScript = null; + function appendIfJavaScriptDict(jsDict) { + const type = jsDict.get('S'); + if (!isName(type, 'JavaScript')) { + return; } - return shadow(this, 'attachments', attachments); - }, - get javaScript() { - var xref = this.xref; - var obj = this.catDict.get('Names'); - - let javaScript = null; - function appendIfJavaScriptDict(jsDict) { - var type = jsDict.get('S'); - if (!isName(type, 'JavaScript')) { - return; - } - var js = jsDict.get('JS'); - if (isStream(js)) { - js = bytesToString(js.getBytes()); - } else if (!isString(js)) { - return; - } - if (!javaScript) { - javaScript = []; - } - javaScript.push(stringToPDFString(js)); - } - if (obj && obj.has('JavaScript')) { - var nameTree = new NameTree(obj.getRaw('JavaScript'), xref); - var names = nameTree.getAll(); - for (var name in names) { - // We don't really use the JavaScript right now. This code is - // defensive so we don't cause errors on document load. - var jsDict = names[name]; - if (isDict(jsDict)) { - appendIfJavaScriptDict(jsDict); - } + + let js = jsDict.get('JS'); + if (isStream(js)) { + js = bytesToString(js.getBytes()); + } else if (!isString(js)) { + return; + } + + if (!javaScript) { + javaScript = []; + } + javaScript.push(stringToPDFString(js)); + } + + if (obj && obj.has('JavaScript')) { + const nameTree = new NameTree(obj.getRaw('JavaScript'), this.xref); + const names = nameTree.getAll(); + for (const name in names) { + // We don't use most JavaScript in PDF documents. This code is + // defensive so we don't cause errors on document load. + const jsDict = names[name]; + if (isDict(jsDict)) { + appendIfJavaScriptDict(jsDict); } } + } - // Append OpenAction actions to javaScript array - var openactionDict = this.catDict.get('OpenAction'); - if (isDict(openactionDict, 'Action')) { - var actionType = openactionDict.get('S'); - if (isName(actionType, 'Named')) { - // The named Print action is not a part of the PDF 1.7 specification, - // but is supported by many PDF readers/writers (including Adobe's). - var action = openactionDict.get('N'); - if (isName(action, 'Print')) { - if (!javaScript) { - javaScript = []; - } - javaScript.push('print({});'); + // Append OpenAction actions to the JavaScript array. + const openActionDict = this.catDict.get('OpenAction'); + if (isDict(openActionDict, 'Action')) { + const actionType = openActionDict.get('S'); + if (isName(actionType, 'Named')) { + // The named Print action is not a part of the PDF 1.7 specification, + // but is supported by many PDF readers/writers (including Adobe's). + const action = openActionDict.get('N'); + if (isName(action, 'Print')) { + if (!javaScript) { + javaScript = []; } - } else { - appendIfJavaScriptDict(openactionDict); + javaScript.push('print({});'); } + } else { + appendIfJavaScriptDict(openActionDict); } + } - return shadow(this, 'javaScript', javaScript); - }, + return shadow(this, 'javaScript', javaScript); + } - cleanup: function Catalog_cleanup() { - this.pageKidsCountCache.clear(); + cleanup() { + this.pageKidsCountCache.clear(); - var promises = []; - this.fontCache.forEach(function (promise) { - promises.push(promise); - }); - return Promise.all(promises).then((translatedFonts) => { - for (var i = 0, ii = translatedFonts.length; i < ii; i++) { - var font = translatedFonts[i].dict; - delete font.translated; - } - this.fontCache.clear(); - this.builtInCMapCache.clear(); - }); - }, + const promises = []; + this.fontCache.forEach(function(promise) { + promises.push(promise); + }); - getPageDict: function Catalog_getPageDict(pageIndex) { - var capability = createPromiseCapability(); - var nodesToVisit = [this.catDict.getRaw('Pages')]; - var count, currentPageIndex = 0; - var xref = this.xref, pageKidsCountCache = this.pageKidsCountCache; - - function next() { - while (nodesToVisit.length) { - var currentNode = nodesToVisit.pop(); - - if (isRef(currentNode)) { - count = pageKidsCountCache.get(currentNode); - // Skip nodes where the page can't be. - if (count > 0 && currentPageIndex + count < pageIndex) { - currentPageIndex += count; - continue; - } + return Promise.all(promises).then((translatedFonts) => { + for (let i = 0, ii = translatedFonts.length; i < ii; i++) { + const font = translatedFonts[i].dict; + delete font.translated; + } + this.fontCache.clear(); + this.builtInCMapCache.clear(); + }); + } - xref.fetchAsync(currentNode).then(function (obj) { - if (isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids'))) { - if (pageIndex === currentPageIndex) { - // Cache the Page reference, since it can *greatly* improve - // performance by reducing redundant lookups in long documents - // where all nodes are found at *one* level of the tree. - if (currentNode && !pageKidsCountCache.has(currentNode)) { - pageKidsCountCache.put(currentNode, 1); - } - capability.resolve([obj, currentNode]); - } else { - currentPageIndex++; - next(); - } - return; - } - nodesToVisit.push(obj); - next(); - }, capability.reject); - return; - } + getPageDict(pageIndex) { + const capability = createPromiseCapability(); + const nodesToVisit = [this.catDict.getRaw('Pages')]; + const xref = this.xref, pageKidsCountCache = this.pageKidsCountCache; + let count, currentPageIndex = 0; - // Must be a child page dictionary. - if (!isDict(currentNode)) { - capability.reject(new FormatError( - 'page dictionary kid reference points to wrong type of object')); - return; - } + function next() { + while (nodesToVisit.length) { + const currentNode = nodesToVisit.pop(); - count = currentNode.get('Count'); - if (Number.isInteger(count) && count >= 0) { - // Cache the Kids count, since it can reduce redundant lookups in - // documents where all nodes are found at *one* level of the tree. - var objId = currentNode.objId; - if (objId && !pageKidsCountCache.has(objId)) { - pageKidsCountCache.put(objId, count); - } - // Skip nodes where the page can't be. - if (currentPageIndex + count <= pageIndex) { - currentPageIndex += count; - continue; - } + if (isRef(currentNode)) { + count = pageKidsCountCache.get(currentNode); + // Skip nodes where the page can't be. + if (count > 0 && currentPageIndex + count < pageIndex) { + currentPageIndex += count; + continue; } - var kids = currentNode.get('Kids'); - if (!Array.isArray(kids)) { - // Prevent errors in corrupt PDF documents that violate the - // specification by *inlining* Page dicts directly in the Kids - // array, rather than using indirect objects (fixes issue9540.pdf). - if (isName(currentNode.get('Type'), 'Page') || - (!currentNode.has('Type') && currentNode.has('Contents'))) { - if (currentPageIndex === pageIndex) { - capability.resolve([currentNode, null]); - return; + xref.fetchAsync(currentNode).then(function(obj) { + if (isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids'))) { + if (pageIndex === currentPageIndex) { + // Cache the Page reference, since it can *greatly* improve + // performance by reducing redundant lookups in long documents + // where all nodes are found at *one* level of the tree. + if (currentNode && !pageKidsCountCache.has(currentNode)) { + pageKidsCountCache.put(currentNode, 1); + } + capability.resolve([obj, currentNode]); + } else { + currentPageIndex++; + next(); } - currentPageIndex++; - continue; + return; } + nodesToVisit.push(obj); + next(); + }, capability.reject); + return; + } - capability.reject(new FormatError( - 'page dictionary kids object is not an array')); - return; + // Must be a child page dictionary. + if (!isDict(currentNode)) { + capability.reject(new FormatError( + 'Page dictionary kid reference points to wrong type of object.')); + return; + } + + count = currentNode.get('Count'); + if (Number.isInteger(count) && count >= 0) { + // Cache the Kids count, since it can reduce redundant lookups in + // documents where all nodes are found at *one* level of the tree. + const objId = currentNode.objId; + if (objId && !pageKidsCountCache.has(objId)) { + pageKidsCountCache.put(objId, count); + } + // Skip nodes where the page can't be. + if (currentPageIndex + count <= pageIndex) { + currentPageIndex += count; + continue; } + } - // Always check all `Kids` nodes, to avoid getting stuck in an empty - // node further down in the tree (see issue5644.pdf, issue8088.pdf), - // and to ensure that we actually find the correct `Page` dict. - for (var last = kids.length - 1; last >= 0; last--) { - nodesToVisit.push(kids[last]); + const kids = currentNode.get('Kids'); + if (!Array.isArray(kids)) { + // Prevent errors in corrupt PDF documents that violate the + // specification by *inlining* Page dicts directly in the Kids + // array, rather than using indirect objects (fixes issue9540.pdf). + if (isName(currentNode.get('Type'), 'Page') || + (!currentNode.has('Type') && currentNode.has('Contents'))) { + if (currentPageIndex === pageIndex) { + capability.resolve([currentNode, null]); + return; + } + currentPageIndex++; + continue; } + + capability.reject(new FormatError( + 'Page dictionary kids object is not an array.')); + return; + } + + // Always check all `Kids` nodes, to avoid getting stuck in an empty + // node further down in the tree (see issue5644.pdf, issue8088.pdf), + // and to ensure that we actually find the correct `Page` dict. + for (let last = kids.length - 1; last >= 0; last--) { + nodesToVisit.push(kids[last]); } - capability.reject(new Error('Page index ' + pageIndex + ' not found.')); } - next(); - return capability.promise; - }, + capability.reject(new Error(`Page index ${pageIndex} not found.`)); + } + next(); + return capability.promise; + } - getPageIndex: function Catalog_getPageIndex(pageRef) { - // The page tree nodes have the count of all the leaves below them. To get - // how many pages are before we just have to walk up the tree and keep - // adding the count of siblings to the left of the node. - var xref = this.xref; - function pagesBeforeRef(kidRef) { - var total = 0; - var parentRef; - return xref.fetchAsync(kidRef).then(function (node) { - if (isRefsEqual(kidRef, pageRef) && !isDict(node, 'Page') && - !(isDict(node) && !node.has('Type') && node.has('Contents'))) { - throw new FormatError( - 'The reference does not point to a /Page Dict.'); - } - if (!node) { - return null; - } - if (!isDict(node)) { - throw new FormatError('node must be a Dict.'); - } - parentRef = node.getRaw('Parent'); - return node.getAsync('Parent'); - }).then(function (parent) { - if (!parent) { - return null; - } - if (!isDict(parent)) { - throw new FormatError('parent must be a Dict.'); + getPageIndex(pageRef) { + // The page tree nodes have the count of all the leaves below them. To get + // how many pages are before we just have to walk up the tree and keep + // adding the count of siblings to the left of the node. + const xref = this.xref; + + function pagesBeforeRef(kidRef) { + let total = 0, parentRef; + + return xref.fetchAsync(kidRef).then(function(node) { + if (isRefsEqual(kidRef, pageRef) && !isDict(node, 'Page') && + !(isDict(node) && !node.has('Type') && node.has('Contents'))) { + throw new FormatError( + 'The reference does not point to a /Page dictionary.'); + } + if (!node) { + return null; + } + if (!isDict(node)) { + throw new FormatError('Node must be a dictionary.'); + } + parentRef = node.getRaw('Parent'); + return node.getAsync('Parent'); + }).then(function(parent) { + if (!parent) { + return null; + } + if (!isDict(parent)) { + throw new FormatError('Parent must be a dictionary.'); + } + return parent.getAsync('Kids'); + }).then(function(kids) { + if (!kids) { + return null; + } + + const kidPromises = []; + let found = false; + for (let i = 0, ii = kids.length; i < ii; i++) { + const kid = kids[i]; + if (!isRef(kid)) { + throw new FormatError('Kid must be a reference.'); } - return parent.getAsync('Kids'); - }).then(function (kids) { - if (!kids) { - return null; + if (isRefsEqual(kid, kidRef)) { + found = true; + break; } - var kidPromises = []; - var found = false; - for (var i = 0; i < kids.length; i++) { - var kid = kids[i]; - if (!isRef(kid)) { - throw new FormatError('kid must be a Ref.'); + kidPromises.push(xref.fetchAsync(kid).then(function(kid) { + if (!isDict(kid)) { + throw new FormatError('Kid node must be a dictionary.'); } - if (isRefsEqual(kid, kidRef)) { - found = true; - break; + if (kid.has('Count')) { + total += kid.get('Count'); + } else { // Page leaf node. + total++; } - kidPromises.push(xref.fetchAsync(kid).then(function (kid) { - if (!isDict(kid)) { - throw new FormatError('kid node must be a Dict.'); - } - if (kid.has('Count')) { - var count = kid.get('Count'); - total += count; - } else { // page leaf node - total++; - } - })); - } - if (!found) { - throw new FormatError('kid ref not found in parents kids'); - } - return Promise.all(kidPromises).then(function () { - return [total, parentRef]; - }); + })); + } + if (!found) { + throw new FormatError('Kid reference not found in parent\'s kids.'); + } + return Promise.all(kidPromises).then(function() { + return [total, parentRef]; }); - } + }); + } - var total = 0; - function next(ref) { - return pagesBeforeRef(ref).then(function (args) { - if (!args) { - return total; - } - var count = args[0]; - var parentRef = args[1]; - total += count; - return next(parentRef); - }); - } + let total = 0; + function next(ref) { + return pagesBeforeRef(ref).then(function(args) { + if (!args) { + return total; + } + const [count, parentRef] = args; + total += count; + return next(parentRef); + }); + } - return next(pageRef); - }, - }; + return next(pageRef); + } /** * @typedef ParseDestDictionaryParameters @@ -618,16 +637,17 @@ var Catalog = (function CatalogClosure() { * Helper function used to parse the contents of destination dictionaries. * @param {ParseDestDictionaryParameters} params */ - Catalog.parseDestDictionary = function Catalog_parseDestDictionary(params) { + static parseDestDictionary(params) { // Lets URLs beginning with 'www.' default to using the 'http://' protocol. function addDefaultProtocolToUrl(url) { if (url.indexOf('www.') === 0) { - return ('http://' + url); + return `http://${url}`; } return url; } + // According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded - // in 7-bit ASCII. Some bad PDFs use UTF-8 encoding, see Bugzilla 1122280. + // in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see Bugzilla 1122280. function tryConvertUrlEncoding(url) { try { return stringToUTF8String(url); @@ -636,19 +656,19 @@ var Catalog = (function CatalogClosure() { } } - var destDict = params.destDict; + const destDict = params.destDict; if (!isDict(destDict)) { - warn('parseDestDictionary: "destDict" must be a dictionary.'); + warn('parseDestDictionary: `destDict` must be a dictionary.'); return; } - var resultObj = params.resultObj; + const resultObj = params.resultObj; if (typeof resultObj !== 'object') { - warn('parseDestDictionary: "resultObj" must be an object.'); + warn('parseDestDictionary: `resultObj` must be an object.'); return; } - var docBaseUrl = params.docBaseUrl || null; + const docBaseUrl = params.docBaseUrl || null; - var action = destDict.get('A'), url, dest; + let action = destDict.get('A'), url, dest; if (!isDict(action) && destDict.has('Dest')) { // A /Dest entry should *only* contain a Name or an Array, but some bad // PDF generators ignore that and treat it as an /A entry. @@ -656,12 +676,12 @@ var Catalog = (function CatalogClosure() { } if (isDict(action)) { - let actionType = action.get('S'); + const actionType = action.get('S'); if (!isName(actionType)) { warn('parseDestDictionary: Invalid type in Action dictionary.'); return; } - let actionName = actionType.name; + const actionName = actionType.name; switch (actionName) { case 'URI': @@ -687,7 +707,7 @@ var Catalog = (function CatalogClosure() { /* falls through */ case 'GoToR': - var urlDict = action.get('F'); + const urlDict = action.get('F'); if (isDict(urlDict)) { // We assume that we found a FileSpec dictionary // and fetch the URL without checking any further. @@ -697,13 +717,13 @@ var Catalog = (function CatalogClosure() { } // NOTE: the destination is relative to the *remote* document. - var remoteDest = action.get('D'); + let remoteDest = action.get('D'); if (remoteDest) { if (isName(remoteDest)) { remoteDest = remoteDest.name; } if (isString(url)) { - let baseUrl = url.split('#')[0]; + const baseUrl = url.split('#')[0]; if (isString(remoteDest)) { url = baseUrl + '#' + remoteDest; } else if (Array.isArray(remoteDest)) { @@ -712,21 +732,23 @@ var Catalog = (function CatalogClosure() { } } // The 'NewWindow' property, equal to `LinkTarget.BLANK`. - var newWindow = action.get('NewWindow'); + const newWindow = action.get('NewWindow'); if (isBool(newWindow)) { resultObj.newWindow = newWindow; } break; case 'Named': - var namedAction = action.get('N'); + const namedAction = action.get('N'); if (isName(namedAction)) { resultObj.action = namedAction.name; } break; case 'JavaScript': - var jsAction = action.get('JS'), js; + const jsAction = action.get('JS'); + let js; + if (isStream(jsAction)) { js = bytesToString(jsAction.getBytes()); } else if (isString(jsAction)) { @@ -734,19 +756,19 @@ var Catalog = (function CatalogClosure() { } if (js) { - // Attempt to recover valid URLs from 'JS' entries with certain - // white-listed formats, e.g. + // Attempt to recover valid URLs from `JS` entries with certain + // white-listed formats: // - window.open('http://example.com') // - app.launchURL('http://example.com', true) - var URL_OPEN_METHODS = [ + const URL_OPEN_METHODS = [ 'app.launchURL', 'window.open' ]; - var regex = new RegExp( + const regex = new RegExp( '^\\s*(' + URL_OPEN_METHODS.join('|').split('.').join('\\.') + ')\\((?:\'|\")([^\'\"]*)(?:\'|\")(?:,\\s*(\\w+)\\)|\\))', 'i'); - var jsUrl = regex.exec(stringToPDFString(js)); + const jsUrl = regex.exec(stringToPDFString(js)); if (jsUrl && jsUrl[2]) { url = jsUrl[2]; @@ -758,7 +780,7 @@ var Catalog = (function CatalogClosure() { } /* falls through */ default: - warn(`parseDestDictionary: Unsupported Action type "${actionName}".`); + warn(`parseDestDictionary: unsupported action type "${actionName}".`); break; } } else if (destDict.has('Dest')) { // Simple destination. @@ -767,7 +789,7 @@ var Catalog = (function CatalogClosure() { if (isString(url)) { url = tryConvertUrlEncoding(url); - var absoluteUrl = createValidAbsoluteUrl(url, docBaseUrl); + const absoluteUrl = createValidAbsoluteUrl(url, docBaseUrl); if (absoluteUrl) { resultObj.url = absoluteUrl.href; } @@ -781,10 +803,8 @@ var Catalog = (function CatalogClosure() { resultObj.dest = dest; } } - }; - - return Catalog; -})(); + } +} var XRef = (function XRefClosure() { function XRef(stream, pdfManager) { From 4a0d15aa0e4d611428a69caf5e546e1994260217 Mon Sep 17 00:00:00 2001 From: Tim van der Meij Date: Sat, 25 Aug 2018 16:35:23 +0200 Subject: [PATCH 2/2] Slightly simplify the catalog code --- src/core/obj.js | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/core/obj.js b/src/core/obj.js index 5d74a250d8860..9bb26fc5dd71b 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -53,9 +53,9 @@ class Catalog { return shadow(this, 'metadata', null); } - const encryptMetadata = (!this.xref.encrypt ? false : - this.xref.encrypt.encryptMetadata); - const stream = this.xref.fetch(streamRef, !encryptMetadata); + const suppressEncryption = !(this.xref.encrypt && + this.xref.encrypt.encryptMetadata); + const stream = this.xref.fetch(streamRef, suppressEncryption); let metadata; if (stream && isDict(stream.dict)) { @@ -351,14 +351,11 @@ class Catalog { } get attachments() { - let attachments = null, nameTreeRef; const obj = this.catDict.get('Names'); - if (obj) { - nameTreeRef = obj.getRaw('EmbeddedFiles'); - } + let attachments = null; - if (nameTreeRef) { - const nameTree = new NameTree(nameTreeRef, this.xref); + if (obj && obj.has('EmbeddedFiles')) { + const nameTree = new NameTree(obj.getRaw('EmbeddedFiles'), this.xref); const names = nameTree.getAll(); for (const name in names) { const fs = new FileSpec(names[name], this.xref);