Skip to content

Commit

Permalink
Add support for protected ("encrypted") PDFs
Browse files Browse the repository at this point in the history
 * add BouncyCastle library
 * remove BouncyCastle installation warning string
 * add test file write-protected.pdf
 * better exception name
  • Loading branch information
koppor committed Apr 12, 2016
1 parent 39f7ed7 commit a39f11f
Show file tree
Hide file tree
Showing 14 changed files with 108 additions and 50 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by
- Fixed [#1062](https://github.com/JabRef/jabref/issues/1062): Merge entry with DOI information now also applies changes to entry type
- Fixed [#535](https://github.com/JabRef/jabref/issues/535): Add merge action to right click menu
- Fixed [#1115](https://github.com/JabRef/jabref/issues/1115): Wrong warning message when importing duplicate entries

- Fixed [#935](https://github.com/JabRef/jabref/issues/935): PDFs, which are readable, but carry a protection for editing, are treated by the XMP parser and the importer generating a BibTeX entry based on the content.

### Removed
- Fixed [#627](https://github.com/JabRef/jabref/issues/627): The pdf field is removed from the export formats, use the file field
Expand Down
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ dependencies {
compile 'org.apache.pdfbox:pdfbox:1.8.11'
compile 'org.apache.pdfbox:fontbox:1.8.11'
compile 'org.apache.pdfbox:jempbox:1.8.11'
compile 'org.bouncycastle:bcprov-jdk15on:1.54'

compile 'commons-cli:commons-cli:1.3.1'

Expand Down
5 changes: 5 additions & 0 deletions external-libraries.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ Project: Apache PDFBox
URL: http://pdfbox.apache.org
License: Apache 2.0

Id: org.bouncycastle:bcprov-jdk15on
Project: The Legion of the Bouncy Castle
URL: https://www.bouncycastle.org/
License: MIT

Id: org.jsoup:jsoup
Project: jsoup
URL: https://github.com/jhy/jsoup/
Expand Down
16 changes: 6 additions & 10 deletions src/main/java/net/sf/jabref/importer/EntryFromPDFCreator.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@
import java.util.List;
import java.util.Optional;

import net.sf.jabref.gui.IconTheme;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;

import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.pdfimport.PdfImporter;
import net.sf.jabref.pdfimport.PdfImporter.ImportPdfFilesResult;
import net.sf.jabref.JabRef;
import net.sf.jabref.external.ExternalFileType;
import net.sf.jabref.external.ExternalFileTypes;
import net.sf.jabref.gui.IconTheme;
import net.sf.jabref.logic.xmp.XMPUtil;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.pdfimport.PdfImporter;
import net.sf.jabref.pdfimport.PdfImporter.ImportPdfFilesResult;

/**
* Uses XMPUtils to get one BibEntry for a PDF-File.
Expand All @@ -41,11 +41,7 @@ private static ExternalFileType getPDFExternalFileType() {
return pdfFileType.get();
}

/*
* (non-Javadoc)
*
* @see net.sf.jabref.imports.EntryFromFileCreator#accept(java.io.File)
*
/**
* Accepts all Files having as suffix ".PDF" (in ignore case mode).
*/
@Override
Expand Down Expand Up @@ -114,7 +110,7 @@ private void addEntryDataFromPDDocumentInformation(File pdfFile, BibEntry entry)
}

/**
* Adds all data Found in all the entrys of this XMP file to the given
* Adds all data Found in all the entries of this XMP file to the given
* entry. This was implemented without having much knowledge of the XMP
* format.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import net.sf.jabref.importer.OutputPrinter;
import net.sf.jabref.importer.fetcher.DOItoBibTeXFetcher;
import net.sf.jabref.logic.util.DOI;
import net.sf.jabref.logic.xmp.EncryptedPdfsNotSupportedException;
import net.sf.jabref.logic.xmp.XMPUtil;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.model.entry.BibtexEntryTypes;
import net.sf.jabref.model.entry.EntryType;
Expand All @@ -19,6 +21,7 @@
import java.io.InputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -184,12 +187,7 @@ public boolean isRecognizedFormat(InputStream in) throws IOException {
public List<BibEntry> importEntries(InputStream in, OutputPrinter status) throws IOException {
final ArrayList<BibEntry> result = new ArrayList<>(1);

try (PDDocument document = PDDocument.load(in)) {
if (document.isEncrypted()) {
LOGGER.info("Encrypted documents are not supported");
return result;
}

try (PDDocument document = XMPUtil.loadWithAutomaticDecryption(in)) {
String firstPageContents = getFirstPageContents(document);

Optional<DOI> doi = DOI.findInText(firstPageContents);
Expand Down Expand Up @@ -481,6 +479,9 @@ public void addEntry(BibEntry entry) {
}

result.add(entry);
} catch (EncryptedPdfsNotSupportedException e) {
LOGGER.info("Decryption not supported");
return Collections.EMPTY_LIST;
}
return result;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (C) 2003-2011 JabRef contributors.
/* Copyright (C) 2003-2016 JabRef contributors.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
Expand All @@ -17,8 +17,6 @@

import java.io.IOException;

public class EncryptionNotSupportedException extends IOException {
public EncryptionNotSupportedException(String string) {
super(string);
}
public class EncryptedPdfsNotSupportedException extends IOException {
// no additional information needed
}
59 changes: 42 additions & 17 deletions src/main/java/net/sf/jabref/logic/xmp/XMPUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,13 @@
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.w3c.dom.Document;

/**
Expand Down Expand Up @@ -121,6 +124,39 @@ public static List<BibEntry> readXMP(File file) throws IOException {
return result;
}

public static PDDocument loadWithAutomaticDecryption(InputStream inputStream)
throws IOException, EncryptedPdfsNotSupportedException {
PDDocument doc = PDDocument.load(inputStream);
if (doc.isEncrypted()) {
// try the empty string as user password
StandardDecryptionMaterial sdm = new StandardDecryptionMaterial("");
try {
doc.openProtection(sdm);
} catch (BadSecurityHandlerException | CryptographyException e) {
LOGGER.error("Cannot handle encrypted PDF: " + e.getMessage());
throw new EncryptedPdfsNotSupportedException();
} catch (NoClassDefFoundError e) {
// This is to avoid following exception:
// Exception in thread "JabRef CachedThreadPool" java.lang.NoClassDefFoundError: org/bouncycastle/jce/provider/BouncyCastleProvider
// at org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1611)
// at net.sf.jabref.logic.xmp.XMPUtil.loadWithAutomaticDecryption(XMPUtil.java:133)
// This exception occurs if JabRef is compiled without 'org.bouncycastle:bcprov-jdk15on' (meaning, without the BouncyCastle library), which may happen in some countries not allowing cryptography.
// See for instance http://www.bouncycastle.org/wiki/display/JA1/Frequently+Asked+Questions#FrequentlyAskedQuestions-11.WhatisBouncyCastle%27sexportclassificationintheUnitedStatesofAmerica?
// See also https://sourceforge.net/p/jabref/bugs/1257/ and http://stackoverflow.com/a/2929228/873282
if (e.getMessage().equals("org/bouncycastle/jce/provider/BouncyCastleProvider")) {
LOGGER.warn(
"Java Bouncy Castle library not found. This might have been removed due redistribution restrictions. Please download and install it. For more information see http://www.bouncycastle.org/.");
// We convert it to a EncryptionNotSupportedException as this is handled properly by the caller
throw new EncryptedPdfsNotSupportedException();
} else {
// we really cannot deal with it
throw e;
}
}
}
return doc;
}

/**
* Try to read the given BibTexEntry from the XMP-stream of the given
* inputstream containing a PDF-file.
Expand All @@ -139,11 +175,7 @@ public static List<BibEntry> readXMP(InputStream inputStream)

List<BibEntry> result = new LinkedList<>();

try (PDDocument document = PDDocument.load(inputStream)) {
if (document.isEncrypted()) {
throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document.");
}

try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
Optional<XMPMetadata> meta = XMPUtil.getXMPMetadata(document);

if (meta.isPresent()) {
Expand Down Expand Up @@ -508,13 +540,8 @@ public static String toXMP(Collection<BibEntry> bibtexEntries,
* @return The XMPMetadata object found in the file
*/
private static Optional<XMPMetadata> readRawXMP(InputStream inputStream) throws IOException {
try (PDDocument document = PDDocument.load(inputStream)) {
if (document.isEncrypted()) {
throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document.");
}

try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
return XMPUtil.getXMPMetadata(document);

}
}

Expand Down Expand Up @@ -1036,8 +1063,7 @@ public static void writeXMP(File file,

try (PDDocument document = PDDocument.load(file.getAbsoluteFile())) {
if (document.isEncrypted()) {
throw new EncryptionNotSupportedException(
"Error: Cannot add metadata to encrypted document.");
throw new EncryptedPdfsNotSupportedException();
}

if (writePDFInfo && (resolvedEntries.size() == 1)) {
Expand Down Expand Up @@ -1083,10 +1109,9 @@ public static void writeXMP(File file,
try {
document.save(file.getAbsolutePath());
} catch (COSVisitorException e) {
throw new TransformerException("Could not write XMP-metadata: "
+ e.getLocalizedMessage());
LOGGER.debug("Could not write XMP metadata", e);
throw new TransformerException("Could not write XMP metadata: " + e.getLocalizedMessage(), e);
}

}
}

Expand Down Expand Up @@ -1265,7 +1290,7 @@ public static boolean hasMetadata(InputStream inputStream) {
try {
List<BibEntry> bibEntries = XMPUtil.readXMP(inputStream);
return !bibEntries.isEmpty();
} catch (EncryptionNotSupportedException ex) {
} catch (EncryptedPdfsNotSupportedException ex) {
LOGGER.info("Encryption not supported by XMPUtil");
return false;
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@ public static Collection<Object[]> fileNames() {
// The test folder contains pairs of PDFs and BibTeX files. We check each pair.
// This method returns the basenames of the available pairs

Object[][] data = new Object[][] {{"LNCS-minimal"}};
Object[][] data = new Object[][] {
// minimal PDF, not encrypted
{"LNCS-minimal"},
// minimal PDF, write-protected, thus encrypted
{"LNCS-minimal-protected"}};
return Arrays.asList(data);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
import net.sf.jabref.Globals;
import net.sf.jabref.JabRefPreferences;
import net.sf.jabref.importer.OutputPrinterToNull;
import net.sf.jabref.logic.xmp.EncryptedPdfsNotSupportedException;
import net.sf.jabref.model.entry.BibEntry;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
Expand All @@ -18,8 +16,6 @@

public class PdfXmpImporterTest {

@Rule
public ExpectedException thrown = ExpectedException.none();
private PdfXmpImporter importer;


Expand All @@ -34,11 +30,9 @@ public void testGetFormatName() {
assertEquals("XMP-annotated PDF", importer.getFormatName());
}

@Test
@Test(expected = EncryptedPdfsNotSupportedException.class)
public void importEncryptedFileThrowsException() throws IOException {
try (InputStream is = PdfXmpImporterTest.class.getResourceAsStream("/pdfs/encrypted.pdf")) {
thrown.expect(IOException.class);
thrown.expectMessage("Error: Cannot read metadata from encrypted document.");
importer.importEntries(is, new OutputPrinterToNull());
}
}
Expand Down
33 changes: 31 additions & 2 deletions src/test/java/net/sf/jabref/logic/xmp/XMPUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import net.sf.jabref.model.database.BibDatabaseMode;
import net.sf.jabref.model.entry.AuthorList;
import net.sf.jabref.bibtex.BibEntryWriter;
import net.sf.jabref.bibtex.BibtexEntryAssert;
import net.sf.jabref.model.entry.IdGenerator;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.model.entry.BibtexEntryTypes;
Expand Down Expand Up @@ -860,6 +861,34 @@ public void testWriteMultiple() throws IOException, TransformerException {
assertEqualsBibtexEntry(t3BibtexEntry(), b);
}

/**
* Tests whether a edit-protected PDF can be read
*/
@Test
public void testReadProtectedPDFHasMetaData() throws Exception {
try (InputStream is = XMPUtilTest.class.getResourceAsStream("/pdfs/write-protected.pdf")) {
Assert.assertTrue(XMPUtil.hasMetadata(is));
}
}

/**
* Tests whether a edit-protected PDF can be read
*/
@Test
public void testReadProtectedPDFHasCorrectMetaData() throws Exception {
try (InputStream is = XMPUtilTest.class.getResourceAsStream("/pdfs/write-protected.pdf")) {
List<BibEntry> readEntries = XMPUtil.readXMP(is);

BibEntry entry = new BibEntry();
entry.setType("misc");
entry.setField("author", "Firstname Lastname");
List<BibEntry> expected = new ArrayList<>(1);
expected.add(entry);

BibtexEntryAssert.assertEquals(expected, readEntries);
}
}

@Test
public void testReadWriteDC() throws IOException, TransformerException {
List<BibEntry> l = new LinkedList<>();
Expand Down Expand Up @@ -1272,14 +1301,14 @@ public void testResolveStrings() throws IOException, TransformerException {
AuthorList.parse(x.getField("author")));
}

@Test(expected = EncryptionNotSupportedException.class)
@Test(expected = EncryptedPdfsNotSupportedException.class)
public void expectedEncryptionNotSupportedExceptionAtRead() throws IOException {
try (InputStream is = XMPUtilTest.class.getResourceAsStream("/pdfs/encrypted.pdf")) {
XMPUtil.readXMP(is);
}
}

@Test(expected = EncryptionNotSupportedException.class)
@Test(expected = EncryptedPdfsNotSupportedException.class)
public void expectedEncryptionNotSupportedExceptionAtWrite() throws IOException, TransformerException {
XMPUtil.writeXMP("src/test/resources/pdfs/encrypted.pdf", t1BibtexEntry(), null);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@inproceedings{,
abstract = {Abstract goes here},
author = {Firstname Lastname and Firstname Lastname},
title = {Paper Title}
}
Binary file not shown.
Binary file added src/test/resources/pdfs/write-protected.docx
Binary file not shown.
Binary file added src/test/resources/pdfs/write-protected.pdf
Binary file not shown.

0 comments on commit a39f11f

Please sign in to comment.