diff --git a/src/core/writer.js b/src/core/writer.js index 16c58fe97bdc0..4d61ad73a8e8f 100644 --- a/src/core/writer.js +++ b/src/core/writer.js @@ -130,9 +130,7 @@ function updateXFA(datasetsRef, newRefs, xref) { } const datasets = xref.fetchIfRef(datasetsRef); const str = bytesToString(datasets.getBytes()); - const xml = new SimpleXMLParser(/* hasAttributes */ true).parseFromString( - str - ); + const xml = new SimpleXMLParser({ hasAttributes: true }).parseFromString(str); for (const { xfa } of newRefs) { if (!xfa) { diff --git a/src/display/metadata.js b/src/display/metadata.js index 963eac9264552..c34bd4907f031 100644 --- a/src/display/metadata.js +++ b/src/display/metadata.js @@ -24,7 +24,7 @@ class Metadata { data = this._repair(data); // Convert the string to an XML document. - const parser = new SimpleXMLParser(); + const parser = new SimpleXMLParser({ lowerCaseName: true }); const xmlDocument = parser.parseFromString(data); this._metadataMap = new Map(); @@ -32,6 +32,7 @@ class Metadata { if (xmlDocument) { this._parse(xmlDocument); } + this._data = data; } _repair(data) { @@ -79,40 +80,71 @@ class Metadata { }); } + _getSequence(entry) { + const name = entry.nodeName; + if (name !== "rdf:bag" && name !== "rdf:seq" && name !== "rdf:alt") { + return null; + } + + return entry.childNodes.filter(node => node.nodeName === "rdf:li"); + } + + _getCreators(entry) { + if (entry.nodeName !== "dc:creator") { + return false; + } + if (!entry.hasChildNodes()) { + return true; + } + + // Child must be a Bag (unordered array) or a Seq. + const seqNode = entry.childNodes[0]; + const authors = this._getSequence(seqNode) || []; + this._metadataMap.set( + entry.nodeName, + authors.map(node => node.textContent.trim()) + ); + + return true; + } + _parse(xmlDocument) { let rdf = xmlDocument.documentElement; - if (rdf.nodeName.toLowerCase() !== "rdf:rdf") { + if (rdf.nodeName !== "rdf:rdf") { // Wrapped in rdf = rdf.firstChild; - while (rdf && rdf.nodeName.toLowerCase() !== "rdf:rdf") { + while (rdf && rdf.nodeName !== "rdf:rdf") { rdf = rdf.nextSibling; } } - const nodeName = rdf ? rdf.nodeName.toLowerCase() : null; - if (!rdf || nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) { + if (!rdf || rdf.nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) { return; } - const children = rdf.childNodes; - for (let i = 0, ii = children.length; i < ii; i++) { - const desc = children[i]; - if (desc.nodeName.toLowerCase() !== "rdf:description") { + for (const desc of rdf.childNodes) { + if (desc.nodeName !== "rdf:description") { continue; } - for (let j = 0, jj = desc.childNodes.length; j < jj; j++) { - if (desc.childNodes[j].nodeName.toLowerCase() !== "#text") { - const entry = desc.childNodes[j]; - const name = entry.nodeName.toLowerCase(); - - this._metadataMap.set(name, entry.textContent.trim()); + for (const entry of desc.childNodes) { + const name = entry.nodeName; + if (name === "#text") { + continue; } + if (this._getCreators(entry)) { + continue; + } + this._metadataMap.set(name, entry.textContent.trim()); } } } + getRaw() { + return this._data; + } + get(name) { return this._metadataMap.has(name) ? this._metadataMap.get(name) : null; } diff --git a/src/scripting_api/doc.js b/src/scripting_api/doc.js index 063c4cf7e7816..6797e693570df 100644 --- a/src/scripting_api/doc.js +++ b/src/scripting_api/doc.js @@ -42,7 +42,7 @@ class Doc extends PDFObject { this._dirty = false; this._disclosed = false; this._media = undefined; - this._metadata = data.metadata; + this._metadata = data.metadata || ""; this._noautocomplete = undefined; this._nocache = undefined; this._spellDictionaryOrder = []; @@ -74,12 +74,13 @@ class Doc extends PDFObject { // and they're are read-only. this._info = new Proxy( { - title: this.title, - author: this.author, - subject: this.subject, - keywords: this.keywords, - creator: this.creator, - producer: this.producer, + title: this._title, + author: this._author, + authors: data.authors || [this._author], + subject: this._subject, + keywords: this._keywords, + creator: this._creator, + producer: this._producer, creationdate: this._creationDate, moddate: this._modDate, trapped: data.Trapped || "Unknown", diff --git a/src/shared/xml_parser.js b/src/shared/xml_parser.js index 2ba25f83a7472..582280b6a68bc 100644 --- a/src/shared/xml_parser.js +++ b/src/shared/xml_parser.js @@ -427,12 +427,13 @@ class SimpleDOMNode { } class SimpleXMLParser extends XMLParserBase { - constructor(hasAttributes = false) { + constructor({ hasAttributes = false, lowerCaseName = false }) { super(); this._currentFragment = null; this._stack = null; this._errorCode = XMLParserErrorCode.NoError; this._hasAttributes = hasAttributes; + this._lowerCaseName = lowerCaseName; } parseFromString(data) { @@ -476,6 +477,9 @@ class SimpleXMLParser extends XMLParserBase { } onBeginElement(name, attributes, isEmpty) { + if (this._lowerCaseName) { + name = name.toLowerCase(); + } const node = new SimpleDOMNode(name); node.childNodes = []; if (this._hasAttributes) { diff --git a/test/integration/scripting_spec.js b/test/integration/scripting_spec.js index 01f6e7ea5d47c..1b6d94bfff65c 100644 --- a/test/integration/scripting_spec.js +++ b/test/integration/scripting_spec.js @@ -451,4 +451,29 @@ describe("Interaction", () => { ); }); }); + + describe("in js-authors.pdf", () => { + let pages; + + beforeAll(async () => { + pages = await loadAndWait("js-authors.pdf", "#\\32 5R"); + }); + + afterAll(async () => { + await closePages(pages); + }); + + it("must print authors in a text field", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + const text = await actAndWaitForInput(page, "#\\32 5R", async () => { + await page.click("[data-annotation-id='26R']"); + }); + expect(text) + .withContext(`In ${browserName}`) + .toEqual("author1::author2::author3::author4::author5"); + }) + ); + }); + }); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 6e52ba6968159..1676f297bd899 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -324,6 +324,7 @@ !tensor-allflags-withfunction.pdf !issue10084_reduced.pdf !issue4246.pdf +!js-authors.pdf !issue4461.pdf !issue4573.pdf !issue4722.pdf diff --git a/test/pdfs/js-authors.pdf b/test/pdfs/js-authors.pdf new file mode 100644 index 0000000000000..840a73c7ce714 Binary files /dev/null and b/test/pdfs/js-authors.pdf differ diff --git a/test/unit/metadata_spec.js b/test/unit/metadata_spec.js index 02eabd271c834..7d6b0d3246bdd 100644 --- a/test/unit/metadata_spec.js +++ b/test/unit/metadata_spec.js @@ -96,7 +96,7 @@ describe("metadata", function () { expect(metadata.get("dc:qux")).toEqual(null); expect(metadata.getAll()).toEqual({ - "dc:creator": "ODIS", + "dc:creator": ["ODIS"], "dc:title": "L'Odissee thématique logo Odisséé - décembre 2008.pub", "xap:creatortool": "PDFCreator Version 0.9.6", }); @@ -168,7 +168,7 @@ describe("metadata", function () { expect(metadata.get("dc:qux")).toEqual(null); expect(metadata.getAll()).toEqual({ - "dc:creator": "", + "dc:creator": [""], "dc:description": "", "dc:format": "application/pdf", "dc:subject": "", diff --git a/test/unit/xml_spec.js b/test/unit/xml_spec.js index a17f5e5ca008a..f7fbbc94f795b 100644 --- a/test/unit/xml_spec.js +++ b/test/unit/xml_spec.js @@ -47,8 +47,9 @@ describe("XML", function () { `; - const root = new SimpleXMLParser(true).parseFromString(xml) - .documentElement; + const root = new SimpleXMLParser({ hasAttributes: true }).parseFromString( + xml + ).documentElement; function getAttr(path) { return root.searchNode(parseXFAPath(path), 0).attributes[0].value; } @@ -96,8 +97,9 @@ describe("XML", function () { `; - const root = new SimpleXMLParser(true).parseFromString(xml) - .documentElement; + const root = new SimpleXMLParser({ hasAttributes: true }).parseFromString( + xml + ).documentElement; const buffer = []; root.dump(buffer); diff --git a/web/app.js b/web/app.js index f2c049f377169..d6a365e173313 100644 --- a/web/app.js +++ b/web/app.js @@ -1655,7 +1655,8 @@ const PDFViewerApplication = { baseURL: this.baseUrl, filesize: this._contentLength, filename: this._docFilename, - metadata: this.metadata, + metadata: this.metadata?.getRaw(), + authors: this.metadata?.get("dc:creator"), numPages: pdfDocument.numPages, URL: this.url, actions: docActions,