Skip to content

Commit

Permalink
Improve memory usage around the BasePdfManager.docBaseUrl parameter…
Browse files Browse the repository at this point in the history
… (PR 7689 follow-up)

While there is nothing *outright* wrong with the existing implementation, it can however lead to increased memory usage in one particular case (that I completely overlooked when implementing this):
For "data:"-URLs, which by definition contains the entire PDF document and can thus be arbitrarily large, we obviously want to avoid sending, storing, and/or logging the "raw" docBaseUrl in that case.

To address this, this patch makes the following changes:
 - Ignore any non-string in the `docBaseUrl` option passed to `getDocument`, since those are unsupported anyway, already on the main-thread.

 - Ignore "data:"-URLs in the `docBaseUrl` option passed to `getDocument`, to avoid having to send what could potentially be a *very* long string to the worker-thread.

 - Parse the `docBaseUrl` option *directly* in the `BasePdfManager`-constructors, on the worker-thread, to avoid having to store the "raw" docBaseUrl in the first place.
  • Loading branch information
Snuffleupagus committed Mar 16, 2021
1 parent 4a0e5cd commit b8b0ea8
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 18 deletions.
33 changes: 15 additions & 18 deletions src/core/pdf_manager.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,23 @@
* limitations under the License.
*/

import {
createValidAbsoluteUrl,
shadow,
unreachable,
warn,
} from "../shared/util.js";
import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js";
import { ChunkedStreamManager } from "./chunked_stream.js";
import { MissingDataException } from "./core_utils.js";
import { PDFDocument } from "./document.js";
import { Stream } from "./stream.js";

function parseDocBaseUrl(url) {
if (url) {
const absoluteUrl = createValidAbsoluteUrl(url);
if (absoluteUrl) {
return absoluteUrl.href;
}
warn(`Invalid absolute docBaseUrl: "${url}".`);
}
return null;
}

class BasePdfManager {
constructor() {
if (this.constructor === BasePdfManager) {
Expand All @@ -40,16 +46,7 @@ class BasePdfManager {
}

get docBaseUrl() {
let docBaseUrl = null;
if (this._docBaseUrl) {
const absoluteUrl = createValidAbsoluteUrl(this._docBaseUrl);
if (absoluteUrl) {
docBaseUrl = absoluteUrl.href;
} else {
warn(`Invalid absolute docBaseUrl: "${this._docBaseUrl}".`);
}
}
return shadow(this, "docBaseUrl", docBaseUrl);
return this._docBaseUrl;
}

onLoadedStream() {
Expand Down Expand Up @@ -111,7 +108,7 @@ class LocalPdfManager extends BasePdfManager {

this._docId = docId;
this._password = password;
this._docBaseUrl = docBaseUrl;
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
this.evaluatorOptions = evaluatorOptions;

const stream = new Stream(data);
Expand Down Expand Up @@ -146,7 +143,7 @@ class NetworkPdfManager extends BasePdfManager {

this._docId = docId;
this._password = args.password;
this._docBaseUrl = docBaseUrl;
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
this.msgHandler = args.msgHandler;
this.evaluatorOptions = evaluatorOptions;

Expand Down
10 changes: 10 additions & 0 deletions src/display/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import {
deprecated,
DOMCanvasFactory,
DOMCMapReaderFactory,
isDataScheme,
loadScript,
PageViewport,
RenderingCancelledException,
Expand Down Expand Up @@ -285,6 +286,15 @@ function getDocument(src) {
params.fontExtraProperties = params.fontExtraProperties === true;
params.pdfBug = params.pdfBug === true;

if (
typeof params.docBaseUrl !== "string" ||
isDataScheme(params.docBaseUrl)
) {
// Ignore "data:"-URLs, since they can't be used to recover valid absolute
// URLs anyway. We want to avoid sending them to the worker-thread, since
// they contain the *entire* PDF document and can thus be arbitrarily long.
params.docBaseUrl = null;
}
if (!Number.isInteger(params.maxImageSize)) {
params.maxImageSize = -1;
}
Expand Down
1 change: 1 addition & 0 deletions src/display/display_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,7 @@ export {
DOMSVGFactory,
getFilenameFromUrl,
getPdfFilenameFromUrl,
isDataScheme,
isFetchSupported,
isPdfFile,
isValidFetchUrl,
Expand Down

0 comments on commit b8b0ea8

Please sign in to comment.