Skip to content

Commit

Permalink
Add local caching of non-font Graphics State (ExtGState) data in `Par…
Browse files Browse the repository at this point in the history
…tialEvaluator.getTextContent`

It turns out that `getTextContent` suffers from *similar* problems with repeated GStates as `getOperatorList`; please see the previous patch.

While only `/ExtGState` resources containing Fonts will actually be *parsed* by `PartialEvaluator.getTextContent`, we're still forced to fetch/validate repeated `/ExtGState` resources even though *most* of them won't affect the textContent (since they mostly contain purely graphical state).

With these changes we also no longer need to immediately reset the current text-state when encountering a `setGState` operator, which may thus improve text-selection in some cases.
  • Loading branch information
Snuffleupagus committed Jul 14, 2020
1 parent 90eb579 commit 981ff41
Showing 1 changed file with 52 additions and 17 deletions.
69 changes: 52 additions & 17 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -1848,6 +1848,7 @@ class PartialEvaluator {
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
var xobjs = null;
const emptyXObjectCache = new LocalImageCache();
const emptyGStateCache = new LocalGStateCache();

var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);

Expand Down Expand Up @@ -2420,25 +2421,59 @@ class PartialEvaluator {
);
return;
case OPS.setGState:
flushTextContentItem();
var dictName = args[0];
var extGState = resources.get("ExtGState");

if (!isDict(extGState) || !isName(dictName)) {
break;
}
var gState = extGState.get(dictName.name);
if (!isDict(gState)) {
name = args[0].name;
if (name && emptyGStateCache.getByName(name)) {
break;
}
var gStateFont = gState.get("Font");
if (gStateFont) {
textState.fontName = null;
textState.fontSize = gStateFont[1];
next(handleSetFont(null, gStateFont[0]));
return;
}
break;

next(
new Promise(function (resolveGState, rejectGState) {
if (!name) {
throw new FormatError("GState must be referred to by name.");
}

const extGState = resources.get("ExtGState");
if (!(extGState instanceof Dict)) {
throw new FormatError("ExtGState should be a dictionary.");
}

const gState = extGState.get(name);
// TODO: Attempt to lookup cached GStates by reference as well,
// if and only if there are PDF documents where doing so
// would significantly improve performance.
if (!(gState instanceof Dict)) {
throw new FormatError("GState should be a dictionary.");
}

const gStateFont = gState.get("Font");
if (!gStateFont) {
emptyGStateCache.set(name, gState.objId, true);

resolveGState();
return;
}
flushTextContentItem();

textState.fontName = null;
textState.fontSize = gStateFont[1];
handleSetFont(null, gStateFont[0]).then(
resolveGState,
rejectGState
);
}).catch(function (reason) {
if (reason instanceof AbortException) {
return;
}
if (self.options.ignoreErrors) {
// Error(s) in the ExtGState -- allow text-extraction to
// continue.
warn(`getTextContent - ignoring ExtGState: "${reason}".`);
return;
}
throw reason;
})
);
return;
} // switch
if (textContent.items.length >= sink.desiredSize) {
// Wait for ready, if we reach highWaterMark.
Expand Down

0 comments on commit 981ff41

Please sign in to comment.