Skip to content

Commit

Permalink
Redo the form type detection logic and include unit tests
Browse files Browse the repository at this point in the history
Good form type detection is important to get reliable telemetry and to
only show the fallback bar if a form cannot be filled out by the user.

PDF.js only supports AcroForm data, so XFA data is explicitly unsupported
(tracked in issue #2373). However, the previous form type detection
couldn't separate AcroForm and XFA well enough, causing form type
telemetry to be incorrect sometimes and the fallback bar to be shown for
forms that could in fact be filled out by the user.

The solution in this commit is found by studying the specification and
the form documents that are available to us. In a nutshell the rules are:

- There is XFA data if the `XFA` entry is a non-empty array or stream.
- There is AcroForm data if the `Fields` entry is a non-empty array and
  it doesn't consist of only invisible digital document signatures.

The digital signatures part was not handled in the old code, causing a
document with only XFA data to also be marked as having AcroForm data.
Moreover, the old code didn't check all the data types.

Now that AcroForm and XFA can be distinguished, the viewer is configured
to only show the fallback bar for documents that only have XFA data. If
a document also has AcroForm data, the viewer can use that to render the
form. We have not found documents where the XFA data was necessary in
that case.

Finally, we include unit tests to ensure that all cases are covered and
move the form type detection out of the `parse` function so that it's
only executed if the document information is actually requested
(potentially making initial parsing a tiny bit faster).
  • Loading branch information
timvandermeij committed Aug 23, 2020
1 parent 18baac8 commit 8ed7bbe
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 26 deletions.
77 changes: 57 additions & 20 deletions src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -582,24 +582,6 @@ class PDFDocument {
this._version = this.catalog.version;
}

// Check if AcroForms are present in the document.
try {
this._hasAcroForm = !!this.catalog.acroForm;
if (this._hasAcroForm) {
this.xfa = this.catalog.acroForm.get("XFA");
const fields = this.catalog.acroForm.get("Fields");
if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
this._hasAcroForm = false; // No fields and no XFA, so it's not a form.
}
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no AcroForms are present.");
this._hasAcroForm = false;
}

// Check if a collection is present in the document.
try {
this._hasCollection = !!this.catalog.collection;
Expand Down Expand Up @@ -715,6 +697,61 @@ class PDFDocument {
return shadow(this, "numPages", num);
}

/**
* @private
*/
_hasOnlyDocumentSignatures(fields) {
return fields.every(field => {
field = this.xref.fetchIfRef(field);
if (field.has("Kids")) {
return this._hasOnlyDocumentSignatures(field.get("Kids"));
}
const fieldType = field.get("FT");
const isSignature = isName(fieldType) && fieldType.name === "Sig";
const rectangle = field.get("Rect");
const isInvisible =
Array.isArray(rectangle) && rectangle.every(value => value === 0);
return isSignature && isInvisible;
});
}

get formInfo() {
const formInfo = { hasAcroForm: false, hasXfa: false };
try {
const acroForm = this.catalog.acroForm;
if (acroForm) {
// The document contains XFA data if the `XFA` entry is a non-empty
// array or stream.
const xfa = acroForm.get("XFA");
const hasXfa = (Array.isArray(xfa) || isStream(xfa)) && xfa.length > 0;
formInfo.hasXfa = hasXfa;

// The document contains AcroForm data if the `Fields` entry is a
// non-empty array and it doesn't consist of only digital signatures.
// This second check is required for files that don't actually contain
// AcroForm data (only XFA data), but that use the `Fields` entry to
// store invisible document signatures. This can be detected using the
// first bit of the `SigFlags` integer (see Table 219 in the
// specification).
const fields = acroForm.get("Fields");
const hasFields = Array.isArray(fields) && fields.length > 0;
const sigFlags = acroForm.get("SigFlags");
const hasDocumentSignatures = !!(sigFlags & 0x1);
let hasOnlyDocumentSignatures = false;
if (hasDocumentSignatures) {
hasOnlyDocumentSignatures = this._hasOnlyDocumentSignatures(fields);
}
formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no forms are present.");
}
return shadow(this, "formInfo", formInfo);
}

get documentInfo() {
const DocumentInfoValidators = {
Title: isString,
Expand All @@ -740,8 +777,8 @@ class PDFDocument {
const docInfo = {
PDFFormatVersion: version,
IsLinearized: !!this.linearization,
IsAcroFormPresent: this._hasAcroForm,
IsXFAPresent: !!this.xfa,
IsAcroFormPresent: this.formInfo.hasAcroForm,
IsXFAPresent: this.formInfo.hasXfa,
IsCollectionPresent: this._hasCollection,
};

Expand Down
112 changes: 111 additions & 1 deletion test/unit/document_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
* limitations under the License.
*/

import { createIdFactory } from "./test_utils.js";
import { createIdFactory, XRefMock } from "./test_utils.js";
import { Dict, Name, Ref } from "../../src/core/primitives.js";
import { PDFDocument } from "../../src/core/document.js";
import { StringStream } from "../../src/core/stream.js";

describe("document", function () {
describe("Page", function () {
Expand All @@ -40,4 +43,111 @@ describe("document", function () {
expect(idFactory1.getDocId()).toEqual("g_d0");
});
});

describe("PDFDocument", function () {
const pdfManager = {
get docId() {
return "d0";
},
};
const stream = new StringStream("Dummy_PDF_data");

function getDocument(acroForm) {
const pdfDocument = new PDFDocument(pdfManager, stream);
pdfDocument.catalog = { acroForm };
return pdfDocument;
}

it("should get form info when no form data is present", function () {
const pdfDocument = getDocument(null);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
});

it("should get form info when XFA is present", function () {
const acroForm = new Dict();

// The `XFA` entry can only be a non-empty array or stream.
acroForm.set("XFA", []);
let pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});

acroForm.set("XFA", ["foo", "bar"]);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
});

acroForm.set("XFA", new StringStream(""));
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});

acroForm.set("XFA", new StringStream("non-empty"));
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
});
});

it("should get form info when AcroForm is present", function () {
const acroForm = new Dict();

// The `Fields` entry can only be a non-empty array.
acroForm.set("Fields", []);
let pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});

acroForm.set("Fields", ["foo", "bar"]);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
});

// If the first bit of the `SigFlags` entry is set and the `Fields` array
// only contains document signatures, then there is no AcroForm data.
acroForm.set("Fields", ["foo", "bar"]);
acroForm.set("SigFlags", 2);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
});

const annotationDict = new Dict();
annotationDict.set("FT", Name.get("Sig"));
annotationDict.set("Rect", [0, 0, 0, 0]);
const annotationRef = Ref.get(11, 0);

const kidsDict = new Dict();
kidsDict.set("Kids", [annotationRef]);
const kidsRef = Ref.get(10, 0);

pdfDocument.xref = new XRefMock([
{ ref: annotationRef, data: annotationDict },
{ ref: kidsRef, data: kidsDict },
]);

acroForm.set("Fields", [kidsRef]);
acroForm.set("SigFlags", 3);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
});
});
});
12 changes: 7 additions & 5 deletions web/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -1426,14 +1426,14 @@ const PDFViewerApplication = {
this.setTitle(contentDispositionFilename);
}

if (info.IsXFAPresent) {
if (info.IsXFAPresent && !info.IsAcroFormPresent) {
console.warn("Warning: XFA is not supported");
this._delayedFallback(UNSUPPORTED_FEATURES.forms);
} else if (
info.IsAcroFormPresent &&
(info.IsAcroFormPresent || info.IsXFAPresent) &&
!this.pdfViewer.renderInteractiveForms
) {
console.warn("Warning: AcroForm support is not enabled");
console.warn("Warning: Interactive form support is not enabled");
this._delayedFallback(UNSUPPORTED_FEATURES.forms);
}

Expand All @@ -1454,8 +1454,10 @@ const PDFViewerApplication = {
});
}
let formType = null;
if (info.IsAcroFormPresent) {
formType = info.IsXFAPresent ? "xfa" : "acroform";
if (info.IsXFAPresent) {
formType = "xfa";
} else if (info.IsAcroFormPresent) {
formType = "acroform";
}
this.externalServices.reportTelemetry({
type: "documentInfo",
Expand Down

0 comments on commit 8ed7bbe

Please sign in to comment.