Skip to content

Commit

Permalink
Save form data in XFA datasets when pdf is a mix of acroforms and xfa (
Browse files Browse the repository at this point in the history
…#12344)

* Move display/xml_parser.js in shared to use it in worker

* Save form data in XFA datasets when pdf is a mix of acroforms and xfa

Co-authored-by: Brendan Dahl <[email protected]>
  • Loading branch information
calixteman and brendandahl authored Sep 8, 2020
1 parent 622e2fb commit 68b99c5
Show file tree
Hide file tree
Showing 11 changed files with 416 additions and 19 deletions.
31 changes: 25 additions & 6 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -1073,18 +1073,23 @@ class WidgetAnnotation extends Annotation {
return null;
}

const value = annotationStorage[this.data.id];
const bbox = [
0,
0,
this.data.rect[2] - this.data.rect[0],
this.data.rect[3] - this.data.rect[1],
];

const xfa = {
path: stringToPDFString(dict.get("T") || ""),
value,
};

const newRef = evaluator.xref.getNewRef();
const AP = new Dict(evaluator.xref);
AP.set("N", newRef);

const value = annotationStorage[this.data.id];
const encrypt = evaluator.xref.encrypt;
let originalTransform = null;
let newTransform = null;
Expand Down Expand Up @@ -1120,9 +1125,9 @@ class WidgetAnnotation extends Annotation {
return [
// data for the original object
// V field changed + reference for new AP
{ ref: this.ref, data: bufferOriginal.join("") },
{ ref: this.ref, data: bufferOriginal.join(""), xfa },
// data for the new AP
{ ref: newRef, data: bufferNew.join("") },
{ ref: newRef, data: bufferNew.join(""), xfa: null },
];
}

Expand Down Expand Up @@ -1521,6 +1526,11 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
return null;
}

const xfa = {
path: stringToPDFString(dict.get("T") || ""),
value: value ? this.data.exportValue : "",
};

const name = Name.get(value ? this.data.exportValue : "Off");
dict.set("V", name);
dict.set("AS", name);
Expand All @@ -1539,7 +1549,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
writeDict(dict, buffer, originalTransform);
buffer.push("\nendobj\n");

return [{ ref: this.ref, data: buffer.join("") }];
return [{ ref: this.ref, data: buffer.join(""), xfa }];
}

async _saveRadioButton(evaluator, task, annotationStorage) {
Expand All @@ -1555,6 +1565,11 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
return null;
}

const xfa = {
path: stringToPDFString(dict.get("T") || ""),
value: value ? this.data.buttonValue : "",
};

const name = Name.get(value ? this.data.buttonValue : "Off");
let parentBuffer = null;
const encrypt = evaluator.xref.encrypt;
Expand Down Expand Up @@ -1593,9 +1608,13 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
writeDict(dict, buffer, originalTransform);
buffer.push("\nendobj\n");

const newRefs = [{ ref: this.ref, data: buffer.join("") }];
const newRefs = [{ ref: this.ref, data: buffer.join(""), xfa }];
if (parentBuffer !== null) {
newRefs.push({ ref: this.parent, data: parentBuffer.join("") });
newRefs.push({
ref: this.parent,
data: parentBuffer.join(""),
xfa: null,
});
}

return newRefs;
Expand Down
31 changes: 27 additions & 4 deletions src/core/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import {
VerbosityLevel,
warn,
} from "../shared/util.js";
import { clearPrimitiveCaches, Ref } from "./primitives.js";
import { clearPrimitiveCaches, Dict, isDict, Ref } from "./primitives.js";
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
import { incrementalUpdate } from "./writer.js";
import { isNodeJS } from "../shared/is_node.js";
Expand Down Expand Up @@ -521,7 +521,10 @@ class WorkerMessageHandler {
filename,
}) {
pdfManager.requestLoadedStream();
const promises = [pdfManager.onLoadedStream()];
const promises = [
pdfManager.onLoadedStream(),
pdfManager.ensureCatalog("acroForm"),
];
const document = pdfManager.pdfDocument;
for (let pageIndex = 0; pageIndex < numPages; pageIndex++) {
promises.push(
Expand All @@ -532,7 +535,7 @@ class WorkerMessageHandler {
);
}

return Promise.all(promises).then(([stream, ...refs]) => {
return Promise.all(promises).then(([stream, acroForm, ...refs]) => {
let newRefs = [];
for (const ref of refs) {
newRefs = ref
Expand All @@ -545,6 +548,20 @@ class WorkerMessageHandler {
return stream.bytes;
}

acroForm = isDict(acroForm) ? acroForm : Dict.empty;
const xfa = acroForm.get("XFA") || [];
let xfaDatasets = null;
if (Array.isArray(xfa)) {
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
if (xfa[i] === "datasets") {
xfaDatasets = xfa[i + 1];
}
}
} else {
// TODO: Support XFA streams.
warn("Unsupported XFA type.");
}

const xref = document.xref;
let newXrefInfo = Object.create(null);
if (xref.trailer) {
Expand Down Expand Up @@ -572,7 +589,13 @@ class WorkerMessageHandler {
}
xref.resetNewRef();

return incrementalUpdate(stream.bytes, newXrefInfo, newRefs);
return incrementalUpdate(
stream.bytes,
newXrefInfo,
newRefs,
xref,
xfaDatasets
);
});
});

Expand Down
58 changes: 56 additions & 2 deletions src/core/writer.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,14 @@
*/
/* eslint no-var: error */

import { bytesToString, escapeString } from "../shared/util.js";
import {
bytesToString,
escapeString,
parseXFAPath,
warn,
} from "../shared/util.js";
import { Dict, isDict, isName, isRef, isStream, Name } from "./primitives.js";
import { SimpleDOMNode, SimpleXMLParser } from "../shared/xml_parser.js";
import { calculateMD5 } from "./crypto.js";

function writeDict(dict, buffer, transform) {
Expand Down Expand Up @@ -123,7 +129,55 @@ function computeMD5(filesize, xrefInfo) {
return bytesToString(calculateMD5(array));
}

function incrementalUpdate(originalData, xrefInfo, newRefs) {
function updateXFA(datasetsRef, newRefs, xref) {
if (datasetsRef === null || xref === null) {
return;
}
const datasets = xref.fetchIfRef(datasetsRef);
const str = bytesToString(datasets.getBytes());
const xml = new SimpleXMLParser(/* hasAttributes */ true).parseFromString(
str
);

for (const { xfa } of newRefs) {
if (!xfa) {
continue;
}
const { path, value } = xfa;
if (!path) {
continue;
}
const node = xml.documentElement.searchNode(parseXFAPath(path), 0);
if (node) {
node.childNodes = [new SimpleDOMNode("#text", value)];
} else {
warn(`Node not found for path: ${path}`);
}
}
const buffer = [];
xml.documentElement.dump(buffer);
let updatedXml = buffer.join("");

const encrypt = xref.encrypt;
if (encrypt) {
const transform = encrypt.createCipherTransform(
datasetsRef.num,
datasetsRef.gen
);
updatedXml = transform.encryptString(updatedXml);
}
const data =
`${datasetsRef.num} ${datasetsRef.gen} obj\n` +
`<< /Type /EmbeddedFile /Length ${updatedXml.length}>>\nstream\n` +
updatedXml +
"\nendstream\nendobj\n";

newRefs.push({ ref: datasetsRef, data });
}

function incrementalUpdate(originalData, xrefInfo, newRefs, xref, datasetsRef) {
updateXFA(datasetsRef, newRefs, xref);

const newXref = new Dict(null);
const refForXrefTable = xrefInfo.newRef;

Expand Down
2 changes: 1 addition & 1 deletion src/display/metadata.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
*/

import { assert } from "../shared/util.js";
import { SimpleXMLParser } from "./xml_parser.js";
import { SimpleXMLParser } from "../shared/xml_parser.js";

class Metadata {
constructor(data) {
Expand Down
69 changes: 69 additions & 0 deletions src/shared/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,73 @@ const createObjectURL = (function createObjectURLClosure() {
};
})();

/**
* AcroForm field names use an array like notation to refer to
* repeated XFA elements e.g. foo.bar[nnn].
* see: XFA Spec Chapter 3 - Repeated Elements
*
* @param {string} path - XFA path name.
* @returns {Array} - Array of Objects with the name and pos of
* each part of the path.
*/
function parseXFAPath(path) {
const positionPattern = /(.+)\[([0-9]+)\]$/;
return path.split(".").map(component => {
const m = component.match(positionPattern);
if (m) {
return { name: m[1], pos: parseInt(m[2], 10) };
}
return { name: component, pos: 0 };
});
}

const XMLEntities = {
/* < */ 0x3c: "&lt;",
/* > */ 0x3e: "&gt;",
/* & */ 0x26: "&amp;",
/* " */ 0x22: "&quot;",
/* ' */ 0x27: "&apos;",
};

function encodeToXmlString(str) {
const buffer = [];
let start = 0;
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.codePointAt(i);
if (0x20 <= char && char <= 0x7e) {
// ascii
const entity = XMLEntities[char];
if (entity) {
if (start < i) {
buffer.push(str.substring(start, i));
}
buffer.push(entity);
start = i + 1;
}
} else {
if (start < i) {
buffer.push(str.substring(start, i));
}
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
// char is represented by two u16
i++;
}
start = i + 1;
}
}

if (buffer.length === 0) {
return str;
}

if (start < str.length) {
buffer.push(str.substring(start, str.length));
}

return buffer.join("");
}

export {
BaseException,
FONT_IDENTITY_MATRIX,
Expand Down Expand Up @@ -947,6 +1014,7 @@ export {
createPromiseCapability,
createObjectURL,
escapeString,
encodeToXmlString,
getModificationDate,
getVerbosityLevel,
info,
Expand All @@ -959,6 +1027,7 @@ export {
createValidAbsoluteUrl,
IsLittleEndianCached,
IsEvalSupportedCached,
parseXFAPath,
removeNullCharacters,
setVerbosityLevel,
shadow,
Expand Down
Loading

0 comments on commit 68b99c5

Please sign in to comment.