Skip to content

Commit

Permalink
Do no export file field in XMP (#11083)
Browse files Browse the repository at this point in the history
* Do no export file field in XMP

* changelog

* use with in tests, move comment

* chekstyle
  • Loading branch information
Siedlerchr authored Mar 23, 2024
1 parent 369b9a7 commit 8d08279
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 103 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- Keywords field are now displayed as tags. [#10910](https://github.com/JabRef/jabref/pull/10910)
- Citation relations now get more information, and have quick access to view the articles in a browser without adding them to the library [#10869](https://github.com/JabRef/jabref/issues/10869)
- Importer/Exporter for CFF format now supports JabRef `cites` and `related` relationships, as well as all fields from the CFF specification. [#10993](https://github.com/JabRef/jabref/issues/10993)
- The XMP-Exporter no longer writes the content of the `file`-field. [#11083](https://github.com/JabRef/jabref/pull/11083)

### Fixed

Expand Down
98 changes: 52 additions & 46 deletions src/main/java/org/jabref/logic/xmp/XmpUtilWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

/**
* Writes given BibEntries into the XMP part of a PDF file.
*
* <p>
* The conversion of a BibEntry to the XMP data (using Dublin Core) is done at
* {@link DublinCoreExtractor#fillDublinCoreSchema()}
*/
Expand All @@ -56,13 +56,13 @@ public XmpUtilWriter(XmpPreferences xmpPreferences) {
/**
* Try to write the given BibTexEntry in the XMP-stream of the given
* PDF-file.
*
* <p>
* Throws an IOException if the file cannot be read or written, so the user
* can remove a lock or cancel the operation.
*
* <p>
* The method will overwrite existing BibTeX-XMP-data, but keep other
* existing metadata.
*
* <p>
* This is a convenience method for writeXMP(File, Collection).
*
* @param file The path to write to.
Expand All @@ -75,18 +75,18 @@ public XmpUtilWriter(XmpPreferences xmpPreferences) {
public void writeXmp(Path file,
BibEntry entry,
BibDatabase database)
throws IOException, TransformerException {
throws IOException, TransformerException {
writeXmp(file, List.of(entry), database);
}

/**
* Writes the information of the bib entry to the dublin core schema using
* a custom extractor.
*
* @param dcSchema Dublin core schema, which is filled with the bib entry.
* @param entry The entry, which is added to the dublin core metadata.
* @param database An optional database which the given bibtex entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
* @param dcSchema Dublin core schema, which is filled with the bib entry.
* @param entry The entry, which is added to the dublin core metadata.
* @param database An optional database which the given bibtex entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
*/
private void writeToDCSchema(DublinCoreSchema dcSchema,
BibEntry entry,
Expand All @@ -98,8 +98,8 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,
/**
* Writes the information of the bib entry to the dublin core schema using a custom extractor.
*
* @param dcSchema Dublin core schema, which is filled with the bib entry.
* @param entry The entry, which is added to the dublin core metadata.
* @param dcSchema Dublin core schema, which is filled with the bib entry.
* @param entry The entry, which is added to the dublin core metadata.
*/
private void writeToDCSchema(DublinCoreSchema dcSchema,
BibEntry entry) {
Expand All @@ -109,7 +109,7 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,

/**
* Try to write the given BibTexEntries as DublinCore XMP Schemas
*
* <p>
* Existing DublinCore schemas in the document are removed
*
* @param document The pdf document to write to.
Expand All @@ -120,7 +120,7 @@ private void writeToDCSchema(DublinCoreSchema dcSchema,
private void writeDublinCore(PDDocument document,
List<BibEntry> entries,
BibDatabase database)
throws IOException, TransformerException {
throws IOException, TransformerException {

List<BibEntry> resolvedEntries;
if (database == null) {
Expand Down Expand Up @@ -166,10 +166,9 @@ private void writeDublinCore(PDDocument document,
/**
* This method generates an xmp metadata string in dublin core format.
*
* @param entries A list of entries, which are added to the dublin core metadata.
*
* @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
* otherwise it returns the xmp metadata as a string in dublin core format.
* @param entries A list of entries, which are added to the dublin core metadata.
* @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
* otherwise it returns the xmp metadata as a string in dublin core format.
*/
private String generateXmpStringWithXmpDeclaration(List<BibEntry> entries) {
XMPMetadata meta = XMPMetadata.createXMPMetadata();
Expand Down Expand Up @@ -198,10 +197,9 @@ private String generateXmpStringWithXmpDeclaration(List<BibEntry> entries) {
* metadata section <?xpacket begin=...>.
* <br/>
*
* @param entries A list of entries, which are added to the dublin core metadata.
*
* @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
* otherwise it returns the xmp metadata without metadata description as a string in dublin core format.
* @param entries A list of entries, which are added to the dublin core metadata.
* @return If something goes wrong (e.g. an exception is thrown), the method returns an empty string,
* otherwise it returns the xmp metadata without metadata description as a string in dublin core format.
*/
public String generateXmpStringWithoutXmpDeclaration(List<BibEntry> entries) {
String xmpContent = generateXmpStringWithXmpDeclaration(entries);
Expand Down Expand Up @@ -237,34 +235,42 @@ private void writeDocumentInformation(PDDocument document,
for (Field field : resolvedEntry.getFields()) {
if (useXmpPrivacyFilter && xmpPreferences.getXmpPrivacyFilter().contains(field)) {
// erase field instead of adding it
if (StandardField.AUTHOR == field) {
di.setAuthor(null);
} else if (StandardField.TITLE == field) {
di.setTitle(null);
} else if (StandardField.KEYWORDS == field) {
di.setKeywords(null);
} else if (StandardField.ABSTRACT == field) {
di.setSubject(null);
} else {
di.setCustomMetadataValue("bibtex/" + field, null);
switch (field) {
case StandardField.AUTHOR ->
di.setAuthor(null);
case StandardField.TITLE ->
di.setTitle(null);
case StandardField.KEYWORDS ->
di.setKeywords(null);
case StandardField.ABSTRACT ->
di.setSubject(null);
// do not write file field
case StandardField.FILE -> {
}
case null, default ->
di.setCustomMetadataValue("bibtex/" + field, null);
}
continue;
}

// LaTeX content is removed from the string for "standard" fields in the PDF
String value = unprotectTermsFormatter.format(resolvedEntry.getField(field).get());
String value = unprotectTermsFormatter.format(resolvedEntry.getField(field).orElse(""));

if (StandardField.AUTHOR == field) {
di.setAuthor(value);
} else if (StandardField.TITLE == field) {
di.setTitle(value);
} else if (StandardField.KEYWORDS == field) {
di.setKeywords(value);
} else if (StandardField.ABSTRACT == field) {
di.setSubject(value);
} else {
switch (field) {
case StandardField.AUTHOR ->
di.setAuthor(value);
case StandardField.TITLE ->
di.setTitle(value);
case StandardField.KEYWORDS ->
di.setKeywords(value);
case StandardField.ABSTRACT ->
di.setSubject(value);
// do not write file field
case StandardField.FILE -> {
}
case null, default ->
resolvedEntry.getField(field).ifPresent(val -> di.setCustomMetadataValue("bibtex/" + field, val));
// We hit the case of an PDF-unsupported field --> write it directly
di.setCustomMetadataValue("bibtex/" + field, resolvedEntry.getField(field).get());
}
}
di.setCustomMetadataValue("bibtex/entrytype", resolvedEntry.getType().getDisplayName());
Expand All @@ -273,13 +279,13 @@ private void writeDocumentInformation(PDDocument document,
/**
* Try to write the given BibTexEntry in the XMP-stream of the given
* PDF-file.
*
* <p>
* Throws an IOException if the file cannot be read or written, so the user
* can remove a lock or cancel the operation.
*
* <p>
* The method will overwrite existing BibTeX-XMP-data, but keep other
* existing metadata.
*
* <p>
* The code for using PDFBox is also used at {@link EmbeddedBibFilePdfExporter#embedBibTex(String, Path)}.
*
* @param path The file to write the entries to.
Expand All @@ -292,7 +298,7 @@ private void writeDocumentInformation(PDDocument document,
public void writeXmp(Path path,
List<BibEntry> bibtexEntries,
BibDatabase database)
throws IOException, TransformerException {
throws IOException, TransformerException {
List<BibEntry> resolvedEntries;
if (database == null) {
resolvedEntries = bibtexEntries;
Expand Down
102 changes: 45 additions & 57 deletions src/test/java/org/jabref/logic/exporter/XmpPdfExporterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,9 @@
class XmpPdfExporterTest {

@TempDir static Path tempDir;

private static BibEntry olly2018 = new BibEntry(StandardEntryType.Article);
private static BibEntry toral2006 = new BibEntry(StandardEntryType.Article);
private static BibEntry vapnik2000 = new BibEntry(StandardEntryType.Article);
private static final BibEntry OLLY_2018 = new BibEntry(StandardEntryType.Article);
private static final BibEntry TORAL_2006 = new BibEntry(StandardEntryType.Article);
private static final BibEntry VAPNIK_2000 = new BibEntry(StandardEntryType.Article);

private XmpPdfExporter exporter;
private PdfXmpImporter importer;
Expand All @@ -61,46 +60,38 @@ class XmpPdfExporterTest {
private FilePreferences filePreferences;

private static void initBibEntries() throws IOException {
olly2018.setCitationKey("Olly2018");
olly2018.setField(StandardField.AUTHOR, "Olly and Johannes");
olly2018.setField(StandardField.TITLE, "Stefan's palace");
olly2018.setField(StandardField.JOURNAL, "Test Journal");
olly2018.setField(StandardField.VOLUME, "1");
olly2018.setField(StandardField.NUMBER, "1");
olly2018.setField(StandardField.PAGES, "1-2");
olly2018.setMonth(Month.MARCH);
olly2018.setField(StandardField.ISSN, "978-123-123");
olly2018.setField(StandardField.NOTE, "NOTE");
olly2018.setField(StandardField.ABSTRACT, "ABSTRACT");
olly2018.setField(StandardField.COMMENT, "COMMENT");
olly2018.setField(StandardField.DOI, "10/3212.3123");
olly2018.setField(StandardField.FILE, ":article_dublinCore.pdf:PDF");
olly2018.setField(StandardField.GROUPS, "NO");
olly2018.setField(StandardField.HOWPUBLISHED, "online");
olly2018.setField(StandardField.KEYWORDS, "k1, k2");
olly2018.setField(StandardField.OWNER, "me");
olly2018.setField(StandardField.REVIEW, "review");
olly2018.setField(StandardField.URL, "https://www.olly2018.edu");

LinkedFile linkedFile = createDefaultLinkedFile("existing.pdf", tempDir);
olly2018.setFiles(List.of(linkedFile));

toral2006.setField(StandardField.AUTHOR, "Toral, Antonio and Munoz, Rafael");
toral2006.setField(StandardField.TITLE, "A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using Wikipedia");
toral2006.setField(StandardField.BOOKTITLE, "Proceedings of EACL");
toral2006.setField(StandardField.PAGES, "56--61");
toral2006.setField(StandardField.EPRINTTYPE, "asdf");
toral2006.setField(StandardField.OWNER, "Ich");
toral2006.setField(StandardField.URL, "www.url.de");

toral2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));

vapnik2000.setCitationKey("vapnik2000");
vapnik2000.setField(StandardField.TITLE, "The Nature of Statistical Learning Theory");
vapnik2000.setField(StandardField.PUBLISHER, "Springer Science + Business Media");
vapnik2000.setField(StandardField.AUTHOR, "Vapnik, Vladimir N.");
vapnik2000.setField(StandardField.DOI, "10.1007/978-1-4757-3264-1");
vapnik2000.setField(StandardField.OWNER, "Ich");
OLLY_2018.withCitationKey("Olly2018")
.withField(StandardField.AUTHOR, "Olly and Johannes")
.withField(StandardField.TITLE, "Stefan's palace")
.withField(StandardField.JOURNAL, "Test Journal")
.withField(StandardField.VOLUME, "1")
.withField(StandardField.NUMBER, "1")
.withField(StandardField.PAGES, "1-2")
.withMonth(Month.MARCH)
.withField(StandardField.ISSN, "978-123-123")
.withField(StandardField.NOTE, "NOTE")
.withField(StandardField.ABSTRACT, "ABSTRACT")
.withField(StandardField.COMMENT, "COMMENT").withField(StandardField.DOI, "10/3212.3123").withField(StandardField.FILE, ":article_dublinCore.pdf:PDF")
.withField(StandardField.GROUPS, "NO")
.withField(StandardField.HOWPUBLISHED, "online")
.withField(StandardField.KEYWORDS, "k1, k2").withField(StandardField.OWNER, "me").withField(StandardField.REVIEW, "review")
.withField(StandardField.URL, "https://www.olly2018.edu")
.withFiles(List.of(createDefaultLinkedFile("existing.pdf", tempDir)));

TORAL_2006.withField(StandardField.AUTHOR, "Toral, Antonio and Munoz, Rafael")
.withField(StandardField.TITLE, "A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using Wikipedia")
.withField(StandardField.BOOKTITLE, "Proceedings of EACL")
.withField(StandardField.PAGES, "56--61")
.withField(StandardField.EPRINTTYPE, "asdf")
.withField(StandardField.OWNER, "Ich")
.withField(StandardField.URL, "www.url.de")
.withFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));

VAPNIK_2000.withCitationKey("vapnik2000")
.withField(StandardField.TITLE, "The Nature of Statistical Learning Theory")
.withField(StandardField.PUBLISHER, "Springer Science + Business Media")
.withField(StandardField.AUTHOR, "Vapnik, Vladimir N.")
.withField(StandardField.DOI, "10.1007/978-1-4757-3264-1").withField(StandardField.OWNER, "Ich");
}

/**
Expand All @@ -124,9 +115,9 @@ void setUp() throws IOException {
BibDatabase dataBase = databaseContext.getDatabase();

initBibEntries();
dataBase.insertEntry(olly2018);
dataBase.insertEntry(toral2006);
dataBase.insertEntry(vapnik2000);
dataBase.insertEntry(OLLY_2018);
dataBase.insertEntry(TORAL_2006);
dataBase.insertEntry(VAPNIK_2000);
}

@AfterEach
Expand All @@ -136,28 +127,28 @@ void reset() throws IOException {
entry.clearField(StandardField.FILE);
}
LinkedFile linkedFile = createDefaultLinkedFile("existing.pdf", tempDir);
olly2018.setFiles(List.of(linkedFile));
toral2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
OLLY_2018.setFiles(List.of(linkedFile));
TORAL_2006.setFiles(List.of(new LinkedFile("non-existing", "path/to/nowhere.pdf", "PDF")));
}

@ParameterizedTest
@MethodSource("provideBibEntriesWithValidPdfFileLinks")
void successfulExportToAllFilesOfEntry(BibEntry bibEntryWithValidPdfFileLink) throws Exception {
assertTrue(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(olly2018), abbreviationRepository));
assertTrue(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(OLLY_2018), abbreviationRepository));
}

@ParameterizedTest
@MethodSource("provideBibEntriesWithInvalidPdfFileLinks")
void unsuccessfulExportToAllFilesOfEntry(BibEntry bibEntryWithValidPdfFileLink) throws Exception {
assertFalse(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(olly2018), abbreviationRepository));
assertFalse(exporter.exportToAllFilesOfEntry(databaseContext, filePreferences, bibEntryWithValidPdfFileLink, List.of(OLLY_2018), abbreviationRepository));
}

public static Stream<Arguments> provideBibEntriesWithValidPdfFileLinks() {
return Stream.of(Arguments.of(olly2018));
return Stream.of(Arguments.of(OLLY_2018));
}

public static Stream<Arguments> provideBibEntriesWithInvalidPdfFileLinks() {
return Stream.of(Arguments.of(vapnik2000), Arguments.of(toral2006));
return Stream.of(Arguments.of(VAPNIK_2000), Arguments.of(TORAL_2006));
}

@ParameterizedTest
Expand Down Expand Up @@ -218,10 +209,6 @@ public static Stream<Arguments> providePathsToInvalidPDFs() throws IOException {
}

private static LinkedFile createDefaultLinkedFile(String fileName, Path tempDir) throws IOException {
return createDefaultLinkedFile("", fileName, tempDir);
}

private static LinkedFile createDefaultLinkedFile(String description, String fileName, Path tempDir) throws IOException {
Path pdfFile = tempDir.resolve(fileName);
try (PDDocument pdf = new PDDocument()) {
pdf.addPage(new PDPage());
Expand All @@ -231,3 +218,4 @@ private static LinkedFile createDefaultLinkedFile(String description, String fil
return new LinkedFile("", pdfFile, "PDF");
}
}

0 comments on commit 8d08279

Please sign in to comment.