Do no export file field in XMP (#11083)

* Do no export file field in XMP * changelog * use with in tests, move comment * chekstyle
JabRef · Mar 23, 2024 · 8d08279 · 8d08279
1 parent 369b9a7
commit 8d08279
Show file tree

Hide file tree

Showing 3 changed files with 98 additions and 103 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -55,6 +55,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
 - Keywords field are now displayed as tags. [#10910](https://github.com/JabRef/jabref/pull/10910)
 - Citation relations now get more information, and have quick access to view the articles in a browser without adding them to the library [#10869](https://github.com/JabRef/jabref/issues/10869)
 - Importer/Exporter for CFF format now supports JabRef `cites` and `related` relationships, as well as all fields from the CFF specification. [#10993](https://github.com/JabRef/jabref/issues/10993)
+- The XMP-Exporter no longer writes the content of the `file`-field. [#11083](https://github.com/JabRef/jabref/pull/11083)
 
 ### Fixed
 

diff --git a/src/main/java/org/jabref/logic/xmp/XmpUtilWriter.java b/src/main/java/org/jabref/logic/xmp/XmpUtilWriter.java
@@ -36,7 +36,7 @@
 
 /**
  * Writes given BibEntries into the XMP part of a PDF file.
- *
+ * <p>
  * The conversion of a BibEntry to the XMP data (using Dublin Core) is done at
  * {@link DublinCoreExtractor#fillDublinCoreSchema()}
  */
@@ -56,13 +56,13 @@ public XmpUtilWriter(XmpPreferences xmpPreferences) {
     /**
      * Try to write the given BibTexEntry in the XMP-stream of the given
      * PDF-file.
-     *
+     * <p>
      * Throws an IOException if the file cannot be read or written, so the user
      * can remove a lock or cancel the operation.
-     *
+     * <p>
      * The method will overwrite existing BibTeX-XMP-data, but keep other
      * existing metadata.
-     *
+     * <p>
      * This is a convenience method for writeXMP(File, Collection).
      *
      * @param file     The path to write to.
@@ -75,18 +75,18 @@ public XmpUtilWriter(XmpPreferences xmpPreferences) {
     public void writeXmp(Path file,
                          BibEntry entry,
                          BibDatabase database)
-        throws IOException, TransformerException {
+            throws IOException, TransformerException {
         writeXmp(file, List.of(entry), database);
     }
 
     /**
      * Writes the information of the bib entry to the dublin core schema using
      * a custom extractor.
      *
-     * @param dcSchema  Dublin core schema, which is filled with the bib entry.
-     * @param entry     The entry, which is added to the dublin core metadata.
-     * @param database  An optional database which the given bibtex entries belong to, which will be used to
-     *                  resolve strings. If the database is null the strings will not be resolved.
+     * @param dcSchema Dublin core schema, which is filled with the bib entry.
+     * @param entry    The entry, which is added to the dublin core metadata.
+     * @param database An optional database which the given bibtex entries belong to, which will be used to
+     *                 resolve strings. If the database is null the strings will not be resolved.
      */
     private void writeToDCSchema(DublinCoreSchema dcSchema,
                                  BibEntry entry,
@@ -98,8 +98,8 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,
     /**
      * Writes the information of the bib entry to the dublin core schema using a custom extractor.
      *
-     * @param dcSchema  Dublin core schema, which is filled with the bib entry.
-     * @param entry     The entry, which is added to the dublin core metadata.
+     * @param dcSchema Dublin core schema, which is filled with the bib entry.
+     * @param entry    The entry, which is added to the dublin core metadata.
      */
     private void writeToDCSchema(DublinCoreSchema dcSchema,
                                  BibEntry entry) {
@@ -109,7 +109,7 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,
 
     /**
      * Try to write the given BibTexEntries as DublinCore XMP Schemas
-     *
+     * <p>
      * Existing DublinCore schemas in the document are removed
      *
      * @param document The pdf document to write to.
@@ -120,7 +120,7 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,
     private void writeDublinCore(PDDocument document,
                                  List<BibEntry> entries,
                                  BibDatabase database)
-        throws IOException, TransformerException {
+            throws IOException, TransformerException {
 
         List<BibEntry> resolvedEntries;
         if (database == null) {
@@ -166,10 +166,9 @@ private void writeDublinCore(PDDocument document,
     /**
      * This method generates an xmp metadata string in dublin core format.
      *
-     * @param entries   A list of entries, which are added to the dublin core metadata.
-     *
-     * @return  If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
-     *          otherwise it returns the xmp metadata as a string in dublin core format.
+     * @param entries A list of entries, which are added to the dublin core metadata.
+     * @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
+     * otherwise it returns the xmp metadata as a string in dublin core format.
      */
     private String generateXmpStringWithXmpDeclaration(List<BibEntry> entries) {
         XMPMetadata meta = XMPMetadata.createXMPMetadata();
@@ -198,10 +197,9 @@ private String generateXmpStringWithXmpDeclaration(List<BibEntry> entries) {
      * metadata section <?xpacket begin=...>.
      * <br/>
      *
-     * @param entries   A list of entries, which are added to the dublin core metadata.
-     *
-     * @return  If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
-     *          otherwise it returns the xmp metadata without metadata description as a string in dublin core format.
+     * @param entries A list of entries, which are added to the dublin core metadata.
+     * @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
+     * otherwise it returns the xmp metadata without metadata description as a string in dublin core format.
      */
     public String generateXmpStringWithoutXmpDeclaration(List<BibEntry> entries) {
         String xmpContent = generateXmpStringWithXmpDeclaration(entries);
@@ -237,34 +235,42 @@ private void writeDocumentInformation(PDDocument document,
         for (Field field : resolvedEntry.getFields()) {
             if (useXmpPrivacyFilter && xmpPreferences.getXmpPrivacyFilter().contains(field)) {
                 // erase field instead of adding it
-                if (StandardField.AUTHOR == field) {
-                    di.setAuthor(null);
-                } else if (StandardField.TITLE == field) {
-                    di.setTitle(null);
-                } else if (StandardField.KEYWORDS == field) {
-                    di.setKeywords(null);
-                } else if (StandardField.ABSTRACT == field) {
-                    di.setSubject(null);
-                } else {
-                    di.setCustomMetadataValue("bibtex/" + field, null);
+                switch (field) {
+                    case StandardField.AUTHOR ->
+                            di.setAuthor(null);
+                    case StandardField.TITLE ->
+                            di.setTitle(null);
+                    case StandardField.KEYWORDS ->
+                            di.setKeywords(null);
+                    case StandardField.ABSTRACT ->
+                            di.setSubject(null);
+                    // do not write file field
+                    case StandardField.FILE -> {
+                    }
+                    case null, default ->
+                            di.setCustomMetadataValue("bibtex/" + field, null);
                 }
                 continue;
             }
 
             // LaTeX content is removed from the string for "standard" fields in the PDF
-            String value = unprotectTermsFormatter.format(resolvedEntry.getField(field).get());
+            String value = unprotectTermsFormatter.format(resolvedEntry.getField(field).orElse(""));
 
-            if (StandardField.AUTHOR == field) {
-                di.setAuthor(value);
-            } else if (StandardField.TITLE == field) {
-                di.setTitle(value);
-            } else if (StandardField.KEYWORDS == field) {
-                di.setKeywords(value);
-            } else if (StandardField.ABSTRACT == field) {
-                di.setSubject(value);
-            } else {
+            switch (field) {
+                case StandardField.AUTHOR ->
+                        di.setAuthor(value);
+                case StandardField.TITLE ->
+                        di.setTitle(value);
+                case StandardField.KEYWORDS ->
+                        di.setKeywords(value);
+                case StandardField.ABSTRACT ->
+                        di.setSubject(value);
+                // do not write file field
+                case StandardField.FILE -> {
+                }
+                case null, default ->
+                        resolvedEntry.getField(field).ifPresent(val -> di.setCustomMetadataValue("bibtex/" + field, val));
                 // We hit the case of an PDF-unsupported field --> write it directly
-                di.setCustomMetadataValue("bibtex/" + field, resolvedEntry.getField(field).get());
             }
         }
         di.setCustomMetadataValue("bibtex/entrytype", resolvedEntry.getType().getDisplayName());
@@ -273,13 +279,13 @@ private void writeDocumentInformation(PDDocument document,
     /**
      * Try to write the given BibTexEntry in the XMP-stream of the given
      * PDF-file.
-     *
+     * <p>
      * Throws an IOException if the file cannot be read or written, so the user
      * can remove a lock or cancel the operation.
-     *
+     * <p>
      * The method will overwrite existing BibTeX-XMP-data, but keep other
      * existing metadata.
-     *
+     * <p>
      * The code for using PDFBox is also used at {@link EmbeddedBibFilePdfExporter#embedBibTex(String, Path)}.
      *
      * @param path          The file to write the entries to.
@@ -292,7 +298,7 @@ private void writeDocumentInformation(PDDocument document,
     public void writeXmp(Path path,
                          List<BibEntry> bibtexEntries,
                          BibDatabase database)
-        throws IOException, TransformerException {
+            throws IOException, TransformerException {
         List<BibEntry> resolvedEntries;
         if (database == null) {
             resolvedEntries = bibtexEntries;

diff --git a/src/test/java/org/jabref/logic/exporter/XmpPdfExporterTest.java b/src/test/java/org/jabref/logic/exporter/XmpPdfExporterTest.java
@@ -47,10 +47,9 @@
 class XmpPdfExporterTest {
 
     @TempDir static Path tempDir;
-
-    private static BibEntry olly2018 = new BibEntry(StandardEntryType.Article);
-    private static BibEntry toral2006 = new BibEntry(StandardEntryType.Article);
-    private static BibEntry vapnik2000 = new BibEntry(StandardEntryType.Article);
+    private static final BibEntry OLLY_2018 = new BibEntry(StandardEntryType.Article);
+    private static final BibEntry TORAL_2006 = new BibEntry(StandardEntryType.Article);
+    private static final BibEntry VAPNIK_2000 = new BibEntry(StandardEntryType.Article);
 
     private XmpPdfExporter exporter;
     private PdfXmpImporter importer;
@@ -61,46 +60,38 @@ class XmpPdfExporterTest {
     private FilePreferences filePreferences;
 
     private static void initBibEntries() throws IOException {
-        olly2018.setCitationKey("Olly2018");
-        olly2018.setField(StandardField.AUTHOR, "Olly and Johannes");
-        olly2018.setField(StandardField.TITLE, "Stefan's palace");
-        olly2018.setField(StandardField.JOURNAL, "Test Journal");
-        olly2018.setField(StandardField.VOLUME, "1");
-        olly2018.setField(StandardField.NUMBER, "1");
-        olly2018.setField(StandardField.PAGES, "1-2");
-        olly2018.setMonth(Month.MARCH);
-        olly2018.setField(StandardField.ISSN, "978-123-123");
-        olly2018.setField(StandardField.NOTE, "NOTE");
-        olly2018.setField(StandardField.ABSTRACT, "ABSTRACT");
-        olly2018.setField(StandardField.COMMENT, "COMMENT");
-        olly2018.setField(StandardField.DOI, "10/3212.3123");
-        olly2018.setField(StandardField.FILE, ":article_dublinCore.pdf:PDF");
-        olly2018.setField(StandardField.GROUPS, "NO");
-        olly2018.setField(StandardField.HOWPUBLISHED, "online");
-        olly2018.setField(StandardField.KEYWORDS, "k1, k2");
-        olly2018.setField(StandardField.OWNER, "me");
-        olly2018.setField(StandardField.REVIEW, "review");
-        olly2018.setField(StandardField.URL, "https://www.olly2018.edu");
-
-        LinkedFile linkedFile = createDefaultLinkedFile("existing.pdf", tempDir);
-        olly2018.setFiles(List.of(linkedFile));
-
-        toral2006.setField(StandardField.AUTHOR, "Toral, Antonio and Munoz, Rafael");
-        toral2006.setField(StandardField.TITLE, "A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using Wikipedia");
-        toral2006.setField(StandardField.BOOKTITLE, "Proceedings of EACL");
-        toral2006.setField(StandardField.PAGES, "56--61");
-        toral2006.setField(StandardField.EPRINTTYPE, "asdf");
-        toral2006.setField(StandardField.OWNER, "Ich");
-        toral2006.setField(StandardField.URL, "www.url.de");
-
-        toral2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
-
-        vapnik2000.setCitationKey("vapnik2000");
-        vapnik2000.setField(StandardField.TITLE, "The Nature of Statistical Learning Theory");
-        vapnik2000.setField(StandardField.PUBLISHER, "Springer Science + Business Media");
-        vapnik2000.setField(StandardField.AUTHOR, "Vapnik, Vladimir N.");
-        vapnik2000.setField(StandardField.DOI, "10.1007/978-1-4757-3264-1");
-        vapnik2000.setField(StandardField.OWNER, "Ich");
+        OLLY_2018.withCitationKey("Olly2018")
+                 .withField(StandardField.AUTHOR, "Olly and Johannes")
+                 .withField(StandardField.TITLE, "Stefan's palace")
+                 .withField(StandardField.JOURNAL, "Test Journal")
+                 .withField(StandardField.VOLUME, "1")
+                 .withField(StandardField.NUMBER, "1")
+                 .withField(StandardField.PAGES, "1-2")
+                 .withMonth(Month.MARCH)
+                 .withField(StandardField.ISSN, "978-123-123")
+                 .withField(StandardField.NOTE, "NOTE")
+                 .withField(StandardField.ABSTRACT, "ABSTRACT")
+                 .withField(StandardField.COMMENT, "COMMENT").withField(StandardField.DOI, "10/3212.3123").withField(StandardField.FILE, ":article_dublinCore.pdf:PDF")
+                 .withField(StandardField.GROUPS, "NO")
+                 .withField(StandardField.HOWPUBLISHED, "online")
+                 .withField(StandardField.KEYWORDS, "k1, k2").withField(StandardField.OWNER, "me").withField(StandardField.REVIEW, "review")
+                 .withField(StandardField.URL, "https://www.olly2018.edu")
+                 .withFiles(List.of(createDefaultLinkedFile("existing.pdf", tempDir)));
+
+        TORAL_2006.withField(StandardField.AUTHOR, "Toral, Antonio and Munoz, Rafael")
+                  .withField(StandardField.TITLE, "A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using Wikipedia")
+                  .withField(StandardField.BOOKTITLE, "Proceedings of EACL")
+                  .withField(StandardField.PAGES, "56--61")
+                  .withField(StandardField.EPRINTTYPE, "asdf")
+                  .withField(StandardField.OWNER, "Ich")
+                  .withField(StandardField.URL, "www.url.de")
+                  .withFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
+
+        VAPNIK_2000.withCitationKey("vapnik2000")
+                   .withField(StandardField.TITLE, "The Nature of Statistical Learning Theory")
+                   .withField(StandardField.PUBLISHER, "Springer Science + Business Media")
+                   .withField(StandardField.AUTHOR, "Vapnik, Vladimir N.")
+                   .withField(StandardField.DOI, "10.1007/978-1-4757-3264-1").withField(StandardField.OWNER, "Ich");
     }
 
     /**
@@ -124,9 +115,9 @@ void setUp() throws IOException {
         BibDatabase dataBase = databaseContext.getDatabase();
 
         initBibEntries();
-        dataBase.insertEntry(olly2018);
-        dataBase.insertEntry(toral2006);
-        dataBase.insertEntry(vapnik2000);
+        dataBase.insertEntry(OLLY_2018);
+        dataBase.insertEntry(TORAL_2006);
+        dataBase.insertEntry(VAPNIK_2000);
     }
 
     @AfterEach
@@ -136,28 +127,28 @@ void reset() throws IOException {
             entry.clearField(StandardField.FILE);
         }
         LinkedFile linkedFile = createDefaultLinkedFile("existing.pdf", tempDir);
-        olly2018.setFiles(List.of(linkedFile));
-        toral2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
+        OLLY_2018.setFiles(List.of(linkedFile));
+        TORAL_2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
     }
 
     @ParameterizedTest
     @MethodSource("provideBibEntriesWithValidPdfFileLinks")
     void successfulExportToAllFilesOfEntry(BibEntry bibEntryWithValidPdfFileLink) throws Exception {
-        assertTrue(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(olly2018), abbreviationRepository));
+        assertTrue(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(OLLY_2018), abbreviationRepository));
     }
 
     @ParameterizedTest
     @MethodSource("provideBibEntriesWithInvalidPdfFileLinks")
     void unsuccessfulExportToAllFilesOfEntry(BibEntry bibEntryWithValidPdfFileLink) throws Exception {
-        assertFalse(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(olly2018), abbreviationRepository));
+        assertFalse(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(OLLY_2018), abbreviationRepository));
     }
 
     public static Stream<Arguments> provideBibEntriesWithValidPdfFileLinks() {
-        return Stream.of(Arguments.of(olly2018));
+        return Stream.of(Arguments.of(OLLY_2018));
     }
 
     public static Stream<Arguments> provideBibEntriesWithInvalidPdfFileLinks() {
-        return Stream.of(Arguments.of(vapnik2000), Arguments.of(toral2006));
+        return Stream.of(Arguments.of(VAPNIK_2000), Arguments.of(TORAL_2006));
     }
 
     @ParameterizedTest
@@ -218,10 +209,6 @@ public static Stream<Arguments> providePathsToInvalidPDFs() throws IOException {
     }
 
     private static LinkedFile createDefaultLinkedFile(String fileName, Path tempDir) throws IOException {
-        return createDefaultLinkedFile("", fileName, tempDir);
-    }
-
-    private static LinkedFile createDefaultLinkedFile(String description, String fileName, Path tempDir) throws IOException {
         Path pdfFile = tempDir.resolve(fileName);
         try (PDDocument pdf = new PDDocument()) {
             pdf.addPage(new PDPage());
@@ -231,3 +218,4 @@ private static LinkedFile createDefaultLinkedFile(String description, String fil
         return new LinkedFile("", pdfFile, "PDF");
     }
 }
+