From ecabd757478e4c17773d6aa53910d31ff74aa2c9 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 5 Nov 2020 13:35:33 +0100 Subject: [PATCH] Add global caching, for /Resources without blend modes, and use it to reduce repeated fetching/parsing in `PartialEvaluator.hasBlendModes` The `PartialEvaluator.hasBlendModes` method is necessary to determine if there's any blend modes on a page, which unfortunately requires *synchronous* parsing of the /Resources of each page before its rendering can start (see the "StartRenderPage"-message). In practice it's not uncommon for certain /Resources-entries to be found on more than one page (referenced via the XRef-table), which thus leads to unnecessary re-fetching/re-parsing of data in `PartialEvaluator.hasBlendModes`. To improve performance, especially in pathological cases, we can cache /Resources-entries when it's absolutely clear that they do not contain *any* blend modes at all[1]. This way, subsequent `PartialEvaluator.hasBlendModes` calls can be made significantly more efficient. This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf: ``` [ { "id": "issue6961", "file": "../web/pdfs/issue6961.pdf", "md5": "a80e4357a8fda758d96c2c76f2980b03", "rounds": 100, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, page, stat -- browser | page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ---- | ------------ | ----- | ------------ | ----------- | ---- | ------ | ------------- firefox | 0 | Overall | 100 | 1034 | 555 | -480 | -46.39 | faster firefox | 0 | Page Request | 100 | 489 | 7 | -482 | -98.67 | faster firefox | 0 | Rendering | 100 | 545 | 548 | 2 | 0.45 | firefox | 1 | Overall | 100 | 912 | 428 | -484 | -53.06 | faster firefox | 1 | Page Request | 100 | 487 | 1 | -486 | -99.77 | faster firefox | 1 | Rendering | 100 | 425 | 427 | 2 | 0.51 | ``` --- [1] In the case where blend modes *are* found, it becomes a lot more difficult to know if it's generally safe to skip them. Hence we don't cache anything in that case, however note that most document/pages do not utilize blend modes anyway. --- src/core/document.js | 8 +++++++- src/core/evaluator.js | 16 ++++++++++++++-- src/core/obj.js | 2 ++ src/core/primitives.js | 26 ++++++++++++++++++++++++-- 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index cb9e474d3bd21c..19bd28977bc660 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -76,6 +76,7 @@ class Page { fontCache, builtInCMapCache, globalImageCache, + nonBlendModesSet, }) { this.pdfManager = pdfManager; this.pageIndex = pageIndex; @@ -85,6 +86,7 @@ class Page { this.fontCache = fontCache; this.builtInCMapCache = builtInCMapCache; this.globalImageCache = globalImageCache; + this.nonBlendModesSet = nonBlendModesSet; this.evaluatorOptions = pdfManager.evaluatorOptions; this.resourcesPromise = null; @@ -312,7 +314,10 @@ class Page { const opList = new OperatorList(intent, sink); handler.send("StartRenderPage", { - transparency: partialEvaluator.hasBlendModes(this.resources), + transparency: partialEvaluator.hasBlendModes( + this.resources, + this.nonBlendModesSet + ), pageIndex: this.pageIndex, intent, }); @@ -917,6 +922,7 @@ class PDFDocument { fontCache: catalog.fontCache, builtInCMapCache: catalog.builtInCMapCache, globalImageCache: catalog.globalImageCache, + nonBlendModesSet: catalog.nonBlendModesSet, }); })); } diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 16bff8a9d8cf47..dc61e8f31265a1 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -239,12 +239,15 @@ class PartialEvaluator { return newEvaluator; } - hasBlendModes(resources) { + hasBlendModes(resources, nonBlendModesSet) { if (!(resources instanceof Dict)) { return false; } + if (resources.objId && nonBlendModesSet.has(resources.objId)) { + return false; + } - const processed = new RefSet(); + const processed = new RefSet(nonBlendModesSet); if (resources.objId) { processed.put(resources.objId); } @@ -344,6 +347,15 @@ class PartialEvaluator { } } } + + if (processed.size > 0) { + // When no blend modes exist, there's no need check any of the parsed + // `Ref`s again for subsequent pages. This helps reduce redundant + // `XRef.fetch` calls for some documents (e.g. issue6961.pdf). + processed.forEach(ref => { + nonBlendModesSet.put(ref); + }); + } return false; } diff --git a/src/core/obj.js b/src/core/obj.js index 7cd4193a69517d..b7565b7b0ed93f 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -76,6 +76,7 @@ class Catalog { this.builtInCMapCache = new Map(); this.globalImageCache = new GlobalImageCache(); this.pageKidsCountCache = new RefSetCache(); + this.nonBlendModesSet = new RefSet(); } get version() { @@ -937,6 +938,7 @@ class Catalog { clearPrimitiveCaches(); this.globalImageCache.clear(/* onlyData = */ manuallyTriggered); this.pageKidsCountCache.clear(); + this.nonBlendModesSet.clear(); const promises = []; this.fontCache.forEach(function (promise) { diff --git a/src/core/primitives.js b/src/core/primitives.js index a6083db99a47aa..1587308267ef7a 100644 --- a/src/core/primitives.js +++ b/src/core/primitives.js @@ -277,8 +277,20 @@ var Ref = (function RefClosure() { // The reference is identified by number and generation. // This structure stores only one instance of the reference. class RefSet { - constructor() { - this._set = new Set(); + constructor(parent = null) { + if ( + (typeof PDFJSDev === "undefined" || + PDFJSDev.test("!PRODUCTION || TESTING")) && + parent && + !(parent instanceof RefSet) + ) { + unreachable('RefSet: Invalid "parent" value.'); + } + this._set = new Set(parent && parent._set); + } + + get size() { + return this._set.size; } has(ref) { @@ -292,6 +304,16 @@ class RefSet { remove(ref) { this._set.delete(ref.toString()); } + + forEach(callback) { + for (const ref of this._set.values()) { + callback(ref); + } + } + + clear() { + this._set.clear(); + } } class RefSetCache {