diff --git a/build.gradle b/build.gradle index c33ab938a84..c2294a1ae4d 100644 --- a/build.gradle +++ b/build.gradle @@ -100,6 +100,8 @@ dependencies { compile 'org.apache.pdfbox:fontbox:2.0.13' compile 'org.apache.pdfbox:xmpbox:2.0.13' + compile group: 'org.apache.tika', name: 'tika-core', version: '1.20' + // required for reading write-protected PDFs - see https://github.com/JabRef/jabref/pull/942#issuecomment-209252635 compile 'org.bouncycastle:bcprov-jdk15on:1.60' diff --git a/src/main/java/org/jabref/model/util/FileHelper.java b/src/main/java/org/jabref/model/util/FileHelper.java index f972d42cc46..0ed4e5cb293 100644 --- a/src/main/java/org/jabref/model/util/FileHelper.java +++ b/src/main/java/org/jabref/model/util/FileHelper.java @@ -1,5 +1,9 @@ package org.jabref.model.util; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -12,6 +16,14 @@ import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.metadata.FilePreferences; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.detect.Detector; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.mime.MimeType; +import org.apache.tika.mime.MimeTypeException; +import org.apache.tika.parser.AutoDetectParser; + public class FileHelper { /** @@ -31,12 +43,37 @@ public static Optional getFileExtension(Path file) { * @return The extension (without leading dot), trimmed and in lowercase. */ public static Optional getFileExtension(String fileName) { + Metadata metadata = new Metadata(); + metadata.add(Metadata.RESOURCE_NAME_KEY, fileName); + + if (isUrl(fileName)) { + try (InputStream is = new URL(fileName).openStream()) { + return detectExtension(is, metadata); + } catch (IOException | MimeTypeException e) { + return Optional.empty(); + } + } + int dotPosition = fileName.lastIndexOf('.'); if ((dotPosition > 0) && (dotPosition < (fileName.length() - 1))) { return Optional.of(fileName.substring(dotPosition + 1).trim().toLowerCase(Locale.ROOT)); - } else { + } + return Optional.empty(); + } + + private static Optional detectExtension(InputStream is, Metadata metaData) throws IOException, MimeTypeException { + BufferedInputStream bis = new BufferedInputStream(is); + AutoDetectParser parser = new AutoDetectParser(); + Detector detector = parser.getDetector(); + MediaType mediaType = detector.detect(bis, metaData); + MimeType mimeType = TikaConfig.getDefaultConfig().getMimeRepository().forName(mediaType.toString()); + String extension = mimeType.getExtension(); + + if (extension.isEmpty()) { return Optional.empty(); } + + return Optional.of(extension.substring(1, extension.length())); } /** @@ -129,4 +166,13 @@ private static Optional expandFilename(String filename, Path directory) { return Optional.empty(); } } + + private static boolean isUrl(String url) { + try { + new URL(url); + return true; + } catch (Exception e) { + return false; + } + } } diff --git a/src/test/java/org/jabref/model/util/FileHelperTest.java b/src/test/java/org/jabref/model/util/FileHelperTest.java new file mode 100644 index 00000000000..27096d3b4ff --- /dev/null +++ b/src/test/java/org/jabref/model/util/FileHelperTest.java @@ -0,0 +1,21 @@ +package org.jabref.model.util; + +import java.util.Optional; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class FileHelperTest { + @Test + public void extractFileExtension() { + final String filePath = FileHelperTest.class.getResource("pdffile.pdf").getPath(); + assertEquals(Optional.of("pdf"), FileHelper.getFileExtension(filePath)); + } + + @Test + public void fileExtensionFromUrl() { + final String filePath = "https://link.springer.com/content/pdf/10.1007%2Fs40955-018-0121-9.pdf"; + assertEquals(Optional.of("pdf"), FileHelper.getFileExtension(filePath)); + } +} diff --git a/src/test/resources/org/jabref/model/util/pdffile.pdf b/src/test/resources/org/jabref/model/util/pdffile.pdf new file mode 100644 index 00000000000..cdacbbebbbf Binary files /dev/null and b/src/test/resources/org/jabref/model/util/pdffile.pdf differ