From 65132ba5d84ee2daf6234696cc5a4e9c78240da3 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 13 Oct 2020 19:30:43 +0200 Subject: [PATCH] Handle `WorkerTask`s, and various PDF document properties, correctly in the "SaveDocument" handler in `src/core/worker.js` - Actually register/unregister the `WorkerTask`s, used when saving each page, correctly. To prevent issues when terminating the Worker, we purposely wait for all running `WorkerTask`s to complete first. Hence we need to actually handle `WorkerTask`s the same way in "SaveDocument" as in the rest of this file, see e.g. "GetOperatorList" and "GetTextContent". - Access `PDFDocument` properties in a generally safe/consistent way. While the current code works fine, given how the PDF document is being loaded, it still seems like a very good idea to be *consistent* in how we access these kind of properties (since in general you need to avoid `MissingDataException` everywhere in this file). - Change a variable name, since there's essentially no precedent in the code-base for *local* variable names to start with an underscore. --- src/core/worker.js | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/core/worker.js b/src/core/worker.js index 673b3f45cf291..267f277c3d54d 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -525,18 +525,32 @@ class WorkerMessageHandler { const promises = [ pdfManager.onLoadedStream(), pdfManager.ensureCatalog("acroForm"), + pdfManager.ensureDoc("xref"), + pdfManager.ensureDoc("startXRef"), ]; - const document = pdfManager.pdfDocument; + for (let pageIndex = 0; pageIndex < numPages; pageIndex++) { promises.push( pdfManager.getPage(pageIndex).then(function (page) { const task = new WorkerTask(`Save: page ${pageIndex}`); - return page.save(handler, task, annotationStorage); + startWorkerTask(task); + + return page + .save(handler, task, annotationStorage) + .finally(function () { + finishWorkerTask(task); + }); }) ); } - return Promise.all(promises).then(([stream, acroForm, ...refs]) => { + return Promise.all(promises).then(function ([ + stream, + acroForm, + xref, + startXRef, + ...refs + ]) { let newRefs = []; for (const ref of refs) { newRefs = ref @@ -562,16 +576,15 @@ class WorkerMessageHandler { warn("Unsupported XFA type."); } - const xref = document.xref; let newXrefInfo = Object.create(null); if (xref.trailer) { - // Get string info from Info in order to compute fileId - const _info = Object.create(null); + // Get string info from Info in order to compute fileId. + const infoObj = Object.create(null); const xrefInfo = xref.trailer.get("Info") || null; if (xrefInfo instanceof Dict) { xrefInfo.forEach((key, value) => { if (isString(key) && isString(value)) { - _info[key] = stringToPDFString(value); + infoObj[key] = stringToPDFString(value); } }); } @@ -581,9 +594,9 @@ class WorkerMessageHandler { encrypt: xref.trailer.getRaw("Encrypt") || null, newRef: xref.getNewRef(), infoRef: xref.trailer.getRaw("Info") || null, - info: _info, + info: infoObj, fileIds: xref.trailer.getRaw("ID") || null, - startXRef: document.startXRef, + startXRef, filename, }; }