Skip to content

Commit

Permalink
Do not extract file ending from Urls (JabRef#4547)
Browse files Browse the repository at this point in the history
* Fixes JabRef#4544 Do not extract file ending from Urls

* Add tests

* file type for any resource type

* Keep simple file name extraction for files

* checkstyle
  • Loading branch information
stefan-kolb authored and Siedlerchr committed Jan 5, 2019
1 parent 1ae85bc commit 69da3aa
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 1 deletion.
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ dependencies {
compile 'org.apache.pdfbox:fontbox:2.0.13'
compile 'org.apache.pdfbox:xmpbox:2.0.13'

compile group: 'org.apache.tika', name: 'tika-core', version: '1.20'

// required for reading write-protected PDFs - see https://github.com/JabRef/jabref/pull/942#issuecomment-209252635
compile 'org.bouncycastle:bcprov-jdk15on:1.60'

Expand Down
48 changes: 47 additions & 1 deletion src/main/java/org/jabref/model/util/FileHelper.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package org.jabref.model.util;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -12,6 +16,14 @@
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.metadata.FilePreferences;

import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.parser.AutoDetectParser;

public class FileHelper {

/**
Expand All @@ -31,12 +43,37 @@ public static Optional<String> getFileExtension(Path file) {
* @return The extension (without leading dot), trimmed and in lowercase.
*/
public static Optional<String> getFileExtension(String fileName) {
Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, fileName);

if (isUrl(fileName)) {
try (InputStream is = new URL(fileName).openStream()) {
return detectExtension(is, metadata);
} catch (IOException | MimeTypeException e) {
return Optional.empty();
}
}

int dotPosition = fileName.lastIndexOf('.');
if ((dotPosition > 0) && (dotPosition < (fileName.length() - 1))) {
return Optional.of(fileName.substring(dotPosition + 1).trim().toLowerCase(Locale.ROOT));
} else {
}
return Optional.empty();
}

private static Optional<String> detectExtension(InputStream is, Metadata metaData) throws IOException, MimeTypeException {
BufferedInputStream bis = new BufferedInputStream(is);
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
MediaType mediaType = detector.detect(bis, metaData);
MimeType mimeType = TikaConfig.getDefaultConfig().getMimeRepository().forName(mediaType.toString());
String extension = mimeType.getExtension();

if (extension.isEmpty()) {
return Optional.empty();
}

return Optional.of(extension.substring(1, extension.length()));
}

/**
Expand Down Expand Up @@ -129,4 +166,13 @@ private static Optional<Path> expandFilename(String filename, Path directory) {
return Optional.empty();
}
}

private static boolean isUrl(String url) {
try {
new URL(url);
return true;
} catch (Exception e) {
return false;
}
}
}
21 changes: 21 additions & 0 deletions src/test/java/org/jabref/model/util/FileHelperTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package org.jabref.model.util;

import java.util.Optional;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

class FileHelperTest {
@Test
public void extractFileExtension() {
final String filePath = FileHelperTest.class.getResource("pdffile.pdf").getPath();
assertEquals(Optional.of("pdf"), FileHelper.getFileExtension(filePath));
}

@Test
public void fileExtensionFromUrl() {
final String filePath = "https://link.springer.com/content/pdf/10.1007%2Fs40955-018-0121-9.pdf";
assertEquals(Optional.of("pdf"), FileHelper.getFileExtension(filePath));
}
}
Binary file not shown.

0 comments on commit 69da3aa

Please sign in to comment.