From 34cb5008ca32fb41994233e46ce00bca430e18ea Mon Sep 17 00:00:00 2001 From: danfickle Date: Sat, 12 Jan 2019 21:46:15 +1100 Subject: [PATCH] #79 - XMP metadata for PDF/UA compilance. --- .../pdfboxout/PdfBoxFontResolver.java | 7 +- .../pdfboxout/PdfBoxRenderer.java | 70 ++++++++++++++++++- .../pdfboxout/PdfRendererBuilder.java | 10 +++ .../pdfboxout/PdfRendererBuilderState.java | 1 + 4 files changed, 85 insertions(+), 3 deletions(-) diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFontResolver.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFontResolver.java index 2336e85fa..402621b6b 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFontResolver.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFontResolver.java @@ -66,12 +66,14 @@ public class PdfBoxFontResolver implements FontResolver { private final List _collectionsToClose = new ArrayList(); private final FSCacheEx _fontMetricsCache; private final PdfAConformance _pdfAConformance; + private final boolean _pdfUaConform; - public PdfBoxFontResolver(SharedContext sharedContext, PDDocument doc, FSCacheEx pdfMetricsCache, PdfAConformance pdfAConformance) { + public PdfBoxFontResolver(SharedContext sharedContext, PDDocument doc, FSCacheEx pdfMetricsCache, PdfAConformance pdfAConformance, boolean pdfUaConform) { _sharedContext = sharedContext; _doc = doc; _fontMetricsCache = pdfMetricsCache; _pdfAConformance = pdfAConformance; + _pdfUaConform = pdfUaConform; // All fonts are required to be embedded in PDF/A documents, so we don't add the built-in fonts, if conformance is required. _fontFamilies = (_pdfAConformance == PdfAConformance.NONE) ? createInitialFontMap() : new HashMap>(); @@ -402,7 +404,8 @@ private FSFont resolveFont(SharedContext ctx, String[] families, float size, Ide } } - if (_pdfAConformance == PdfAConformance.NONE) { + if (_pdfAConformance == PdfAConformance.NONE && + !_pdfUaConform) { // We don't have a final fallback font for PDF/A documents as serif may not be available // unless the user has explicitly embedded it. diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java index 209993c9a..976849621 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxRenderer.java @@ -51,6 +51,8 @@ import com.openhtmltopdf.util.Configuration; import com.openhtmltopdf.util.ThreadCtx; import com.openhtmltopdf.util.XRLog; + +import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.pdmodel.*; import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; import org.apache.pdfbox.pdmodel.common.PDMetadata; @@ -59,10 +61,12 @@ import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot; import org.apache.pdfbox.pdmodel.encryption.PDEncryption; import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent; +import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences; import org.apache.xmpbox.XMPMetadata; import org.apache.xmpbox.schema.AdobePDFSchema; import org.apache.xmpbox.schema.PDFAIdentificationSchema; import org.apache.xmpbox.schema.XMPBasicSchema; +import org.apache.xmpbox.schema.XMPSchema; import org.apache.xmpbox.type.BadFieldValueException; import org.apache.xmpbox.xml.XmpSerializer; import org.w3c.dom.Document; @@ -107,6 +111,7 @@ public class PdfBoxRenderer implements Closeable { private float _pdfVersion; private PdfAConformance _pdfAConformance; + private boolean _pdfUaConformance; private byte[] _colorProfile; @@ -139,6 +144,7 @@ public class PdfBoxRenderer implements Closeable { _mathmlImpl = state._mathmlImpl; _pdfAConformance = state._pdfAConformance; + _pdfUaConformance = state._pdfUaConform; _colorProfile = state._colorProfile; _dotsPerPoint = DEFAULT_DOTS_PER_POINT; @@ -167,7 +173,7 @@ public class PdfBoxRenderer implements Closeable { userAgent.setSharedContext(_sharedContext); _outputDevice.setSharedContext(_sharedContext); - PdfBoxFontResolver fontResolver = new PdfBoxFontResolver(_sharedContext, _pdfDoc, state._caches.get(CacheStore.PDF_FONT_METRICS), state._pdfAConformance); + PdfBoxFontResolver fontResolver = new PdfBoxFontResolver(_sharedContext, _pdfDoc, state._caches.get(CacheStore.PDF_FONT_METRICS), state._pdfAConformance, state._pdfUaConform); _sharedContext.setFontResolver(fontResolver); PdfBoxReplacedElementFactory replacedElementFactory = new PdfBoxReplacedElementFactory(_outputDevice, state._svgImpl, state._objectDrawerFactory, state._mathmlImpl); @@ -587,6 +593,12 @@ private void writePDFFast(List pages, RenderingContext c, Rectangle2D f firePreWrite(pageCount); // opportunity to adjust meta data setDidValues(doc); // set PDF header fields from meta data + if (_pdfUaConformance) { + addPdfUaXMPSchema(doc); + } else if (_pdfAConformance != PdfAConformance.NONE) { + addPdfASchema(doc, _pdfAConformance.getPart(), _pdfAConformance.getConformanceValue()); + } + DisplayListCollector dlCollector = new DisplayListCollector(_root.getLayer().getPages()); DisplayListContainer dlPages = dlCollector.collectRoot(c, _root.getLayer()); @@ -678,6 +690,62 @@ private void writePDF(List pages, RenderingContext c, Rectangle2D first _outputDevice.finish(c, _root); } + // Kindly provided by GurpusMaximus at: + // https://stackoverflow.com/questions/49682339/how-can-i-create-an-accessible-pdf-with-java-pdfbox-2-0-8-library-that-is-also-v + private void addPdfUaXMPSchema(PDDocument doc) { + try + { + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + String lang = _doc.getDocumentElement().getAttribute("lang"); + catalog.setLanguage(lang != null ? lang : "English"); + catalog.setViewerPreferences(new PDViewerPreferences(new COSDictionary())); + catalog.getViewerPreferences().setDisplayDocTitle(true); + PDMarkInfo markInfo = new PDMarkInfo(); + markInfo.setMarked(true); + catalog.setMarkInfo(markInfo); + + PDDocumentInformation info = doc.getDocumentInformation(); + XMPMetadata xmp = XMPMetadata.createXMPMetadata(); + xmp.createAndAddDublinCoreSchema(); + xmp.getDublinCoreSchema().setTitle(info.getTitle()); + String metaDescription = _outputDevice.getMetadataByName("description"); + xmp.getDublinCoreSchema().setDescription(metaDescription != null ? metaDescription : info.getTitle()); + xmp.createAndAddPDFAExtensionSchemaWithDefaultNS(); + xmp.getPDFExtensionSchema().addNamespace( + "http://www.aiim.org/pdfa/ns/schema#", "pdfaSchema"); + xmp.getPDFExtensionSchema().addNamespace( + "http://www.aiim.org/pdfa/ns/property#", "pdfaProperty"); + xmp.getPDFExtensionSchema().addNamespace( + "http://www.aiim.org/pdfua/ns/id/", "pdfuaid"); + XMPSchema uaSchema = new XMPSchema(XMPMetadata.createXMPMetadata(), + "pdfaSchema", "pdfaSchema", "pdfaSchema"); + uaSchema.setTextPropertyValue("schema", + "PDF/UA Universal Accessibility Schema"); + uaSchema.setTextPropertyValue("namespaceURI", + "http://www.aiim.org/pdfua/ns/id/"); + uaSchema.setTextPropertyValue("prefix", "pdfuaid"); + XMPSchema uaProp = new XMPSchema(XMPMetadata.createXMPMetadata(), + "pdfaProperty", "pdfaProperty", "pdfaProperty"); + uaProp.setTextPropertyValue("name", "part"); + uaProp.setTextPropertyValue("valueType", "Integer"); + uaProp.setTextPropertyValue("category", "internal"); + uaProp.setTextPropertyValue("description", + "Indicates, which part of ISO 14289 standard is followed"); + uaSchema.addUnqualifiedSequenceValue("property", uaProp); + xmp.getPDFExtensionSchema().addBagValue("schemas", uaSchema); + xmp.getPDFExtensionSchema().setPrefix("pdfuaid"); + xmp.getPDFExtensionSchema().setTextPropertyValue("part", "1"); + XmpSerializer serializer = new XmpSerializer(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + serializer.serialize(xmp, baos, true); + PDMetadata metadata = new PDMetadata(doc); + metadata.importXMPMetadata(baos.toByteArray()); + doc.getDocumentCatalog().setMetadata(metadata); + } catch (IOException|TransformerException e) { + throw new RuntimeException(e); + } + } + private void addPdfASchema(PDDocument document, int part, String conformance) { PDDocumentInformation information = document.getDocumentInformation(); XMPMetadata metadata = XMPMetadata.createXMPMetadata(); diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java index 68e42761f..b38e0060d 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilder.java @@ -129,6 +129,16 @@ public PdfRendererBuilder usePdfAConformance(PdfAConformance pdfAConformance) { this.state._pdfAConformance = pdfAConformance; return this; } + + /** + * Whether to conform to PDF/UA or Accessible PDF. False by default. + * @param pdfUaAccessibility + * @return this for method chaining + */ + public PdfRendererBuilder usePdfUaAccessbility(boolean pdfUaAccessibility) { + this.state._pdfUaConform = pdfUaAccessibility; + return this; + } /** * Sets the color profile, needed for PDF/A conformance. diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilderState.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilderState.java index 0614f865d..ae9dd6887 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilderState.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfRendererBuilderState.java @@ -29,5 +29,6 @@ public class PdfRendererBuilderState extends BaseRendererBuilder.BaseRendererBui public PDDocument pddocument; public final Map> _caches = new EnumMap>(CacheStore.class); public PdfAConformance _pdfAConformance = PdfAConformance.NONE; + public boolean _pdfUaConform = false; public byte[] _colorProfile; }