Skip to content

Commit

Permalink
Merge pull request #12271 from timvandermeij/acroform-type-detection
Browse files Browse the repository at this point in the history
Improve AcroForm/XFA form type detection
  • Loading branch information
timvandermeij authored Aug 25, 2020
2 parents 525cc73 + 0f229d5 commit 4ffdbe6
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 74 deletions.
2 changes: 1 addition & 1 deletion src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class AnnotationFactory {
* instance.
*/
static create(xref, ref, pdfManager, idFactory) {
return pdfManager.ensureDoc("acroForm").then(acroForm => {
return pdfManager.ensureCatalog("acroForm").then(acroForm => {
return pdfManager.ensure(this, "_create", [
xref,
ref,
Expand Down
124 changes: 78 additions & 46 deletions src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ class PDFDocument {
this.stream = stream;
this.xref = new XRef(stream, pdfManager);
this._pagePromises = [];
this._version = null;

const idCounters = {
font: 0,
Expand All @@ -572,42 +573,15 @@ class PDFDocument {
}

parse(recoveryMode) {
this.setup(recoveryMode);

const version = this.catalog.catDict.get("Version");
if (isName(version)) {
this.pdfFormatVersion = version.name;
}

// Check if AcroForms are present in the document.
try {
this.acroForm = this.catalog.catDict.get("AcroForm");
if (this.acroForm) {
this.xfa = this.acroForm.get("XFA");
const fields = this.acroForm.get("Fields");
if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
this.acroForm = null; // No fields and no XFA, so it's not a form.
}
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no AcroForms are present");
this.acroForm = null;
}
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);

// Check if a Collection dictionary is present in the document.
try {
const collection = this.catalog.catDict.get("Collection");
if (isDict(collection) && collection.size > 0) {
this.collection = collection;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch Collection dictionary.");
// The `checkHeader` method is called before this method and parses the
// version from the header. The specification states in section 7.5.2
// that the version from the catalog, if present, should overwrite the
// version from the header.
if (this.catalog.version) {
this._version = this.catalog.version;
}
}

Expand Down Expand Up @@ -693,27 +667,85 @@ class PDFDocument {
}
version += String.fromCharCode(ch);
}
if (!this.pdfFormatVersion) {
if (!this._version) {
// Remove the "%PDF-" prefix.
this.pdfFormatVersion = version.substring(5);
this._version = version.substring(5);
}
}

parseStartXRef() {
this.xref.setStartXRef(this.startXRef);
}

setup(recoveryMode) {
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);
}

get numPages() {
const linearization = this.linearization;
const num = linearization ? linearization.numPages : this.catalog.numPages;
return shadow(this, "numPages", num);
}

/**
* @private
*/
_hasOnlyDocumentSignatures(fields, recursionDepth = 0) {
const RECURSION_LIMIT = 10;
return fields.every(field => {
field = this.xref.fetchIfRef(field);
if (field.has("Kids")) {
if (++recursionDepth > RECURSION_LIMIT) {
warn("_hasOnlyDocumentSignatures: maximum recursion depth reached");
return false;
}
return this._hasOnlyDocumentSignatures(
field.get("Kids"),
recursionDepth
);
}
const isSignature = isName(field.get("FT"), "Sig");
const rectangle = field.get("Rect");
const isInvisible =
Array.isArray(rectangle) && rectangle.every(value => value === 0);
return isSignature && isInvisible;
});
}

get formInfo() {
const formInfo = { hasAcroForm: false, hasXfa: false };
const acroForm = this.catalog.acroForm;
if (!acroForm) {
return shadow(this, "formInfo", formInfo);
}

try {
// The document contains XFA data if the `XFA` entry is a non-empty
// array or stream.
const xfa = acroForm.get("XFA");
const hasXfa =
(Array.isArray(xfa) && xfa.length > 0) ||
(isStream(xfa) && !xfa.isEmpty);
formInfo.hasXfa = hasXfa;

// The document contains AcroForm data if the `Fields` entry is a
// non-empty array and it doesn't consist of only document signatures.
// This second check is required for files that don't actually contain
// AcroForm data (only XFA data), but that use the `Fields` entry to
// store (invisible) document signatures. This can be detected using
// the first bit of the `SigFlags` integer (see Table 219 in the
// specification).
const fields = acroForm.get("Fields");
const hasFields = Array.isArray(fields) && fields.length > 0;
const sigFlags = acroForm.get("SigFlags");
const hasOnlyDocumentSignatures =
!!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields);
formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch form information.");
}
return shadow(this, "formInfo", formInfo);
}

get documentInfo() {
const DocumentInfoValidators = {
Title: isString,
Expand All @@ -727,7 +759,7 @@ class PDFDocument {
Trapped: isName,
};

let version = this.pdfFormatVersion;
let version = this._version;
if (
typeof version !== "string" ||
!PDF_HEADER_VERSION_REGEXP.test(version)
Expand All @@ -739,9 +771,9 @@ class PDFDocument {
const docInfo = {
PDFFormatVersion: version,
IsLinearized: !!this.linearization,
IsAcroFormPresent: !!this.acroForm,
IsXFAPresent: !!this.xfa,
IsCollectionPresent: !!this.collection,
IsAcroFormPresent: this.formInfo.hasAcroForm,
IsXFAPresent: this.formInfo.hasXfa,
IsCollectionPresent: !!this.catalog.collection,
};

let infoDict;
Expand Down
76 changes: 58 additions & 18 deletions src/core/obj.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ class Catalog {
this.pdfManager = pdfManager;
this.xref = xref;

this.catDict = xref.getCatalogObj();
if (!isDict(this.catDict)) {
this._catDict = xref.getCatalogObj();
if (!isDict(this._catDict)) {
throw new FormatError("Catalog object is not a dictionary.");
}

Expand All @@ -76,8 +76,48 @@ class Catalog {
this.pageKidsCountCache = new RefSetCache();
}

get version() {
const version = this._catDict.get("Version");
if (!isName(version)) {
return shadow(this, "version", null);
}
return shadow(this, "version", version.name);
}

get collection() {
let collection = null;
try {
const obj = this._catDict.get("Collection");
if (isDict(obj) && obj.size > 0) {
collection = obj;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch Collection entry; assuming no collection is present.");
}
return shadow(this, "collection", collection);
}

get acroForm() {
let acroForm = null;
try {
const obj = this._catDict.get("AcroForm");
if (isDict(obj) && obj.size > 0) {
acroForm = obj;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no forms are present.");
}
return shadow(this, "acroForm", acroForm);
}

get metadata() {
const streamRef = this.catDict.getRaw("Metadata");
const streamRef = this._catDict.getRaw("Metadata");
if (!isRef(streamRef)) {
return shadow(this, "metadata", null);
}
Expand Down Expand Up @@ -112,7 +152,7 @@ class Catalog {
}

get toplevelPagesDict() {
const pagesObj = this.catDict.get("Pages");
const pagesObj = this._catDict.get("Pages");
if (!isDict(pagesObj)) {
throw new FormatError("Invalid top-level pages dictionary.");
}
Expand All @@ -136,7 +176,7 @@ class Catalog {
* @private
*/
_readDocumentOutline() {
let obj = this.catDict.get("Outlines");
let obj = this._catDict.get("Outlines");
if (!isDict(obj)) {
return null;
}
Expand Down Expand Up @@ -257,7 +297,7 @@ class Catalog {
get optionalContentConfig() {
let config = null;
try {
const properties = this.catDict.get("OCProperties");
const properties = this._catDict.get("OCProperties");
if (!properties) {
return shadow(this, "optionalContentConfig", null);
}
Expand Down Expand Up @@ -370,12 +410,12 @@ class Catalog {
* @private
*/
_readDests() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");
if (obj && obj.has("Dests")) {
return new NameTree(obj.getRaw("Dests"), this.xref);
} else if (this.catDict.has("Dests")) {
} else if (this._catDict.has("Dests")) {
// Simple destination dictionary.
return this.catDict.get("Dests");
return this._catDict.get("Dests");
}
return undefined;
}
Expand All @@ -397,7 +437,7 @@ class Catalog {
* @private
*/
_readPageLabels() {
const obj = this.catDict.getRaw("PageLabels");
const obj = this._catDict.getRaw("PageLabels");
if (!obj) {
return null;
}
Expand Down Expand Up @@ -497,7 +537,7 @@ class Catalog {
}

get pageLayout() {
const obj = this.catDict.get("PageLayout");
const obj = this._catDict.get("PageLayout");
// Purposely use a non-standard default value, rather than 'SinglePage', to
// allow differentiating between `undefined` and /SinglePage since that does
// affect the Scroll mode (continuous/non-continuous) used in Adobe Reader.
Expand All @@ -518,7 +558,7 @@ class Catalog {
}

get pageMode() {
const obj = this.catDict.get("PageMode");
const obj = this._catDict.get("PageMode");
let pageMode = "UseNone"; // Default value.

if (isName(obj)) {
Expand Down Expand Up @@ -556,7 +596,7 @@ class Catalog {
NumCopies: Number.isInteger,
};

const obj = this.catDict.get("ViewerPreferences");
const obj = this._catDict.get("ViewerPreferences");
let prefs = null;

if (isDict(obj)) {
Expand Down Expand Up @@ -681,7 +721,7 @@ class Catalog {
* NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below.
*/
get openAction() {
const obj = this.catDict.get("OpenAction");
const obj = this._catDict.get("OpenAction");
let openAction = null;

if (isDict(obj)) {
Expand Down Expand Up @@ -714,7 +754,7 @@ class Catalog {
}

get attachments() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");
let attachments = null;

if (obj && obj.has("EmbeddedFiles")) {
Expand All @@ -732,7 +772,7 @@ class Catalog {
}

get javaScript() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");

let javaScript = null;
function appendIfJavaScriptDict(jsDict) {
Expand Down Expand Up @@ -768,7 +808,7 @@ class Catalog {
}

// Append OpenAction "JavaScript" actions to the JavaScript array.
const openAction = this.catDict.get("OpenAction");
const openAction = this._catDict.get("OpenAction");
if (isDict(openAction) && isName(openAction.get("S"), "JavaScript")) {
appendIfJavaScriptDict(openAction);
}
Expand Down Expand Up @@ -813,7 +853,7 @@ class Catalog {

getPageDict(pageIndex) {
const capability = createPromiseCapability();
const nodesToVisit = [this.catDict.getRaw("Pages")];
const nodesToVisit = [this._catDict.getRaw("Pages")];
const visitedNodes = new RefSet();
const xref = this.xref,
pageKidsCountCache = this.pageKidsCountCache;
Expand Down
8 changes: 5 additions & 3 deletions test/unit/annotation_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ describe("annotation", function () {
constructor(params) {
this.docBaseUrl = params.docBaseUrl || null;
this.pdfDocument = {
acroForm: new Dict(),
catalog: {
acroForm: new Dict(),
},
};
}

Expand All @@ -56,8 +58,8 @@ describe("annotation", function () {
});
}

ensureDoc(prop, args) {
return this.ensure(this.pdfDocument, prop, args);
ensureCatalog(prop, args) {
return this.ensure(this.pdfDocument.catalog, prop, args);
}
}

Expand Down
Loading

0 comments on commit 4ffdbe6

Please sign in to comment.