diff --git a/CHANGELOG.md b/CHANGELOG.md index f76897fd213..defb4ac32af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -95,7 +95,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by - Fixed [#1062](https://github.com/JabRef/jabref/issues/1062): Merge entry with DOI information now also applies changes to entry type - Fixed [#535](https://github.com/JabRef/jabref/issues/535): Add merge action to right click menu - Fixed [#1115](https://github.com/JabRef/jabref/issues/1115): Wrong warning message when importing duplicate entries - +- Fixed [#935](https://github.com/JabRef/jabref/issues/935): PDFs, which are readable, but carry a protection for editing, are treated by the XMP parser and the importer generating a BibTeX entry based on the content. ### Removed - Fixed [#627](https://github.com/JabRef/jabref/issues/627): The pdf field is removed from the export formats, use the file field diff --git a/build.gradle b/build.gradle index 587b768beb8..819194c1587 100644 --- a/build.gradle +++ b/build.gradle @@ -71,6 +71,7 @@ dependencies { compile 'org.apache.pdfbox:pdfbox:1.8.11' compile 'org.apache.pdfbox:fontbox:1.8.11' compile 'org.apache.pdfbox:jempbox:1.8.11' + compile 'org.bouncycastle:bcprov-jdk15on:1.54' compile 'commons-cli:commons-cli:1.3.1' diff --git a/external-libraries.txt b/external-libraries.txt index e21c9ff9d08..566ca480574 100644 --- a/external-libraries.txt +++ b/external-libraries.txt @@ -92,6 +92,11 @@ Project: Apache PDFBox URL: http://pdfbox.apache.org License: Apache 2.0 +Id: org.bouncycastle:bcprov-jdk15on +Project: The Legion of the Bouncy Castle +URL: https://www.bouncycastle.org/ +License: MIT + Id: org.jsoup:jsoup Project: jsoup URL: https://github.com/jhy/jsoup/ diff --git a/src/main/java/net/sf/jabref/importer/EntryFromPDFCreator.java b/src/main/java/net/sf/jabref/importer/EntryFromPDFCreator.java index d414b04b7d1..0686ef7e9d2 100644 --- a/src/main/java/net/sf/jabref/importer/EntryFromPDFCreator.java +++ b/src/main/java/net/sf/jabref/importer/EntryFromPDFCreator.java @@ -7,17 +7,17 @@ import java.util.List; import java.util.Optional; -import net.sf.jabref.gui.IconTheme; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; -import net.sf.jabref.model.entry.BibEntry; -import net.sf.jabref.pdfimport.PdfImporter; -import net.sf.jabref.pdfimport.PdfImporter.ImportPdfFilesResult; import net.sf.jabref.JabRef; import net.sf.jabref.external.ExternalFileType; import net.sf.jabref.external.ExternalFileTypes; +import net.sf.jabref.gui.IconTheme; import net.sf.jabref.logic.xmp.XMPUtil; +import net.sf.jabref.model.entry.BibEntry; +import net.sf.jabref.pdfimport.PdfImporter; +import net.sf.jabref.pdfimport.PdfImporter.ImportPdfFilesResult; /** * Uses XMPUtils to get one BibEntry for a PDF-File. @@ -41,11 +41,7 @@ private static ExternalFileType getPDFExternalFileType() { return pdfFileType.get(); } - /* - * (non-Javadoc) - * - * @see net.sf.jabref.imports.EntryFromFileCreator#accept(java.io.File) - * + /** * Accepts all Files having as suffix ".PDF" (in ignore case mode). */ @Override @@ -114,7 +110,7 @@ private void addEntryDataFromPDDocumentInformation(File pdfFile, BibEntry entry) } /** - * Adds all data Found in all the entrys of this XMP file to the given + * Adds all data Found in all the entries of this XMP file to the given * entry. This was implemented without having much knowledge of the XMP * format. * diff --git a/src/main/java/net/sf/jabref/importer/fileformat/PdfContentImporter.java b/src/main/java/net/sf/jabref/importer/fileformat/PdfContentImporter.java index 52b455a19d7..3d38236ce47 100644 --- a/src/main/java/net/sf/jabref/importer/fileformat/PdfContentImporter.java +++ b/src/main/java/net/sf/jabref/importer/fileformat/PdfContentImporter.java @@ -4,6 +4,8 @@ import net.sf.jabref.importer.OutputPrinter; import net.sf.jabref.importer.fetcher.DOItoBibTeXFetcher; import net.sf.jabref.logic.util.DOI; +import net.sf.jabref.logic.xmp.EncryptedPdfsNotSupportedException; +import net.sf.jabref.logic.xmp.XMPUtil; import net.sf.jabref.model.entry.BibEntry; import net.sf.jabref.model.entry.BibtexEntryTypes; import net.sf.jabref.model.entry.EntryType; @@ -19,6 +21,7 @@ import java.io.InputStream; import java.io.StringWriter; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.regex.Matcher; @@ -184,12 +187,7 @@ public boolean isRecognizedFormat(InputStream in) throws IOException { public List importEntries(InputStream in, OutputPrinter status) throws IOException { final ArrayList result = new ArrayList<>(1); - try (PDDocument document = PDDocument.load(in)) { - if (document.isEncrypted()) { - LOGGER.info("Encrypted documents are not supported"); - return result; - } - + try (PDDocument document = XMPUtil.loadWithAutomaticDecryption(in)) { String firstPageContents = getFirstPageContents(document); Optional doi = DOI.findInText(firstPageContents); @@ -481,6 +479,9 @@ public void addEntry(BibEntry entry) { } result.add(entry); + } catch (EncryptedPdfsNotSupportedException e) { + LOGGER.info("Decryption not supported"); + return Collections.EMPTY_LIST; } return result; } diff --git a/src/main/java/net/sf/jabref/logic/xmp/EncryptionNotSupportedException.java b/src/main/java/net/sf/jabref/logic/xmp/EncryptedPdfsNotSupportedException.java similarity index 79% rename from src/main/java/net/sf/jabref/logic/xmp/EncryptionNotSupportedException.java rename to src/main/java/net/sf/jabref/logic/xmp/EncryptedPdfsNotSupportedException.java index 59849d57a60..29dc6a14cec 100644 --- a/src/main/java/net/sf/jabref/logic/xmp/EncryptionNotSupportedException.java +++ b/src/main/java/net/sf/jabref/logic/xmp/EncryptedPdfsNotSupportedException.java @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 JabRef contributors. +/* Copyright (C) 2003-2016 JabRef contributors. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -17,8 +17,6 @@ import java.io.IOException; -public class EncryptionNotSupportedException extends IOException { - public EncryptionNotSupportedException(String string) { - super(string); - } +public class EncryptedPdfsNotSupportedException extends IOException { + // no additional information needed } diff --git a/src/main/java/net/sf/jabref/logic/xmp/XMPUtil.java b/src/main/java/net/sf/jabref/logic/xmp/XMPUtil.java index 0a065e52c8f..a29f6efa655 100644 --- a/src/main/java/net/sf/jabref/logic/xmp/XMPUtil.java +++ b/src/main/java/net/sf/jabref/logic/xmp/XMPUtil.java @@ -46,10 +46,13 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.exceptions.COSVisitorException; +import org.apache.pdfbox.exceptions.CryptographyException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.common.PDMetadata; +import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException; +import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; import org.w3c.dom.Document; /** @@ -121,6 +124,39 @@ public static List readXMP(File file) throws IOException { return result; } + public static PDDocument loadWithAutomaticDecryption(InputStream inputStream) + throws IOException, EncryptedPdfsNotSupportedException { + PDDocument doc = PDDocument.load(inputStream); + if (doc.isEncrypted()) { + // try the empty string as user password + StandardDecryptionMaterial sdm = new StandardDecryptionMaterial(""); + try { + doc.openProtection(sdm); + } catch (BadSecurityHandlerException | CryptographyException e) { + LOGGER.error("Cannot handle encrypted PDF: " + e.getMessage()); + throw new EncryptedPdfsNotSupportedException(); + } catch (NoClassDefFoundError e) { + // This is to avoid following exception: + // Exception in thread "JabRef CachedThreadPool" java.lang.NoClassDefFoundError: org/bouncycastle/jce/provider/BouncyCastleProvider + // at org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1611) + // at net.sf.jabref.logic.xmp.XMPUtil.loadWithAutomaticDecryption(XMPUtil.java:133) + // This exception occurs if JabRef is compiled without 'org.bouncycastle:bcprov-jdk15on' (meaning, without the BouncyCastle library), which may happen in some countries not allowing cryptography. + // See for instance http://www.bouncycastle.org/wiki/display/JA1/Frequently+Asked+Questions#FrequentlyAskedQuestions-11.WhatisBouncyCastle%27sexportclassificationintheUnitedStatesofAmerica? + // See also https://sourceforge.net/p/jabref/bugs/1257/ and http://stackoverflow.com/a/2929228/873282 + if (e.getMessage().equals("org/bouncycastle/jce/provider/BouncyCastleProvider")) { + LOGGER.warn( + "Java Bouncy Castle library not found. This might have been removed due redistribution restrictions. Please download and install it. For more information see http://www.bouncycastle.org/."); + // We convert it to a EncryptionNotSupportedException as this is handled properly by the caller + throw new EncryptedPdfsNotSupportedException(); + } else { + // we really cannot deal with it + throw e; + } + } + } + return doc; + } + /** * Try to read the given BibTexEntry from the XMP-stream of the given * inputstream containing a PDF-file. @@ -139,11 +175,7 @@ public static List readXMP(InputStream inputStream) List result = new LinkedList<>(); - try (PDDocument document = PDDocument.load(inputStream)) { - if (document.isEncrypted()) { - throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document."); - } - + try (PDDocument document = loadWithAutomaticDecryption(inputStream)) { Optional meta = XMPUtil.getXMPMetadata(document); if (meta.isPresent()) { @@ -508,13 +540,8 @@ public static String toXMP(Collection bibtexEntries, * @return The XMPMetadata object found in the file */ private static Optional readRawXMP(InputStream inputStream) throws IOException { - try (PDDocument document = PDDocument.load(inputStream)) { - if (document.isEncrypted()) { - throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document."); - } - + try (PDDocument document = loadWithAutomaticDecryption(inputStream)) { return XMPUtil.getXMPMetadata(document); - } } @@ -1036,8 +1063,7 @@ public static void writeXMP(File file, try (PDDocument document = PDDocument.load(file.getAbsoluteFile())) { if (document.isEncrypted()) { - throw new EncryptionNotSupportedException( - "Error: Cannot add metadata to encrypted document."); + throw new EncryptedPdfsNotSupportedException(); } if (writePDFInfo && (resolvedEntries.size() == 1)) { @@ -1083,10 +1109,9 @@ public static void writeXMP(File file, try { document.save(file.getAbsolutePath()); } catch (COSVisitorException e) { - throw new TransformerException("Could not write XMP-metadata: " - + e.getLocalizedMessage()); + LOGGER.debug("Could not write XMP metadata", e); + throw new TransformerException("Could not write XMP metadata: " + e.getLocalizedMessage(), e); } - } } @@ -1265,7 +1290,7 @@ public static boolean hasMetadata(InputStream inputStream) { try { List bibEntries = XMPUtil.readXMP(inputStream); return !bibEntries.isEmpty(); - } catch (EncryptionNotSupportedException ex) { + } catch (EncryptedPdfsNotSupportedException ex) { LOGGER.info("Encryption not supported by XMPUtil"); return false; } catch (IOException e) { diff --git a/src/test/java/net/sf/jabref/importer/fileformat/PdfContentImporterTestFiles.java b/src/test/java/net/sf/jabref/importer/fileformat/PdfContentImporterTestFiles.java index 4a966979388..83735066b7b 100644 --- a/src/test/java/net/sf/jabref/importer/fileformat/PdfContentImporterTestFiles.java +++ b/src/test/java/net/sf/jabref/importer/fileformat/PdfContentImporterTestFiles.java @@ -34,7 +34,11 @@ public static Collection fileNames() { // The test folder contains pairs of PDFs and BibTeX files. We check each pair. // This method returns the basenames of the available pairs - Object[][] data = new Object[][] {{"LNCS-minimal"}}; + Object[][] data = new Object[][] { + // minimal PDF, not encrypted + {"LNCS-minimal"}, + // minimal PDF, write-protected, thus encrypted + {"LNCS-minimal-protected"}}; return Arrays.asList(data); } diff --git a/src/test/java/net/sf/jabref/importer/fileformat/PdfXmpImporterTest.java b/src/test/java/net/sf/jabref/importer/fileformat/PdfXmpImporterTest.java index 368be7c222a..f8ed3673e3b 100644 --- a/src/test/java/net/sf/jabref/importer/fileformat/PdfXmpImporterTest.java +++ b/src/test/java/net/sf/jabref/importer/fileformat/PdfXmpImporterTest.java @@ -3,12 +3,10 @@ import net.sf.jabref.Globals; import net.sf.jabref.JabRefPreferences; import net.sf.jabref.importer.OutputPrinterToNull; +import net.sf.jabref.logic.xmp.EncryptedPdfsNotSupportedException; import net.sf.jabref.model.entry.BibEntry; import org.junit.Before; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.ExpectedException; - import java.io.IOException; import java.io.InputStream; import java.util.Arrays; @@ -18,8 +16,6 @@ public class PdfXmpImporterTest { - @Rule - public ExpectedException thrown = ExpectedException.none(); private PdfXmpImporter importer; @@ -34,11 +30,9 @@ public void testGetFormatName() { assertEquals("XMP-annotated PDF", importer.getFormatName()); } - @Test + @Test(expected = EncryptedPdfsNotSupportedException.class) public void importEncryptedFileThrowsException() throws IOException { try (InputStream is = PdfXmpImporterTest.class.getResourceAsStream("/pdfs/encrypted.pdf")) { - thrown.expect(IOException.class); - thrown.expectMessage("Error: Cannot read metadata from encrypted document."); importer.importEntries(is, new OutputPrinterToNull()); } } diff --git a/src/test/java/net/sf/jabref/logic/xmp/XMPUtilTest.java b/src/test/java/net/sf/jabref/logic/xmp/XMPUtilTest.java index 5cf9dab0c15..437b57368a9 100644 --- a/src/test/java/net/sf/jabref/logic/xmp/XMPUtilTest.java +++ b/src/test/java/net/sf/jabref/logic/xmp/XMPUtilTest.java @@ -8,6 +8,7 @@ import net.sf.jabref.model.database.BibDatabaseMode; import net.sf.jabref.model.entry.AuthorList; import net.sf.jabref.bibtex.BibEntryWriter; +import net.sf.jabref.bibtex.BibtexEntryAssert; import net.sf.jabref.model.entry.IdGenerator; import net.sf.jabref.model.entry.BibEntry; import net.sf.jabref.model.entry.BibtexEntryTypes; @@ -860,6 +861,34 @@ public void testWriteMultiple() throws IOException, TransformerException { assertEqualsBibtexEntry(t3BibtexEntry(), b); } + /** + * Tests whether a edit-protected PDF can be read + */ + @Test + public void testReadProtectedPDFHasMetaData() throws Exception { + try (InputStream is = XMPUtilTest.class.getResourceAsStream("/pdfs/write-protected.pdf")) { + Assert.assertTrue(XMPUtil.hasMetadata(is)); + } + } + + /** + * Tests whether a edit-protected PDF can be read + */ + @Test + public void testReadProtectedPDFHasCorrectMetaData() throws Exception { + try (InputStream is = XMPUtilTest.class.getResourceAsStream("/pdfs/write-protected.pdf")) { + List readEntries = XMPUtil.readXMP(is); + + BibEntry entry = new BibEntry(); + entry.setType("misc"); + entry.setField("author", "Firstname Lastname"); + List expected = new ArrayList<>(1); + expected.add(entry); + + BibtexEntryAssert.assertEquals(expected, readEntries); + } + } + @Test public void testReadWriteDC() throws IOException, TransformerException { List l = new LinkedList<>(); @@ -1272,14 +1301,14 @@ public void testResolveStrings() throws IOException, TransformerException { AuthorList.parse(x.getField("author"))); } - @Test(expected = EncryptionNotSupportedException.class) + @Test(expected = EncryptedPdfsNotSupportedException.class) public void expectedEncryptionNotSupportedExceptionAtRead() throws IOException { try (InputStream is = XMPUtilTest.class.getResourceAsStream("/pdfs/encrypted.pdf")) { XMPUtil.readXMP(is); } } - @Test(expected = EncryptionNotSupportedException.class) + @Test(expected = EncryptedPdfsNotSupportedException.class) public void expectedEncryptionNotSupportedExceptionAtWrite() throws IOException, TransformerException { XMPUtil.writeXMP("src/test/resources/pdfs/encrypted.pdf", t1BibtexEntry(), null); } diff --git a/src/test/resources/net/sf/jabref/importer/fileformat/LNCS-minimal-protected.bib b/src/test/resources/net/sf/jabref/importer/fileformat/LNCS-minimal-protected.bib new file mode 100644 index 00000000000..3bc22f124f4 --- /dev/null +++ b/src/test/resources/net/sf/jabref/importer/fileformat/LNCS-minimal-protected.bib @@ -0,0 +1,5 @@ +@inproceedings{, + abstract = {Abstract goes here}, + author = {Firstname Lastname and Firstname Lastname}, + title = {Paper Title} +} \ No newline at end of file diff --git a/src/test/resources/net/sf/jabref/importer/fileformat/LNCS-minimal-protected.pdf b/src/test/resources/net/sf/jabref/importer/fileformat/LNCS-minimal-protected.pdf new file mode 100644 index 00000000000..cfc933f34c4 Binary files /dev/null and b/src/test/resources/net/sf/jabref/importer/fileformat/LNCS-minimal-protected.pdf differ diff --git a/src/test/resources/pdfs/write-protected.docx b/src/test/resources/pdfs/write-protected.docx new file mode 100644 index 00000000000..eda5cc84b22 Binary files /dev/null and b/src/test/resources/pdfs/write-protected.docx differ diff --git a/src/test/resources/pdfs/write-protected.pdf b/src/test/resources/pdfs/write-protected.pdf new file mode 100644 index 00000000000..0a54130815a Binary files /dev/null and b/src/test/resources/pdfs/write-protected.pdf differ