Skip to content

Commit

Permalink
Resolves #2431 Check more results returned by CrossRef API for matching
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-kolb committed Feb 8, 2017
1 parent 89c5643 commit 4e4932c
Showing 1 changed file with 39 additions and 29 deletions.
68 changes: 39 additions & 29 deletions src/main/java/net/sf/jabref/logic/importer/fetcher/CrossRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,17 @@
*/
public class CrossRef {
private static final Log LOGGER = LogFactory.getLog(CrossRef.class);
private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter();

private static final String API_URL = "http://api.crossref.org";
// number of results to lookup from crossref API
private static final int API_RESULTS = 5;

private static final Levenshtein METRIC_DISTANCE = new Levenshtein();
// edit distance threshold for entry title comnparison
private static final int METRIC_THRESHOLD = 4;

private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter();

public static Optional<DOI> findDOI(BibEntry entry) {
Objects.requireNonNull(entry);
Optional<DOI> doi = Optional.empty();
Expand All @@ -49,15 +54,16 @@ public static Optional<DOI> findDOI(BibEntry entry) {
try {
HttpResponse<JsonNode> response = Unirest.get(API_URL + "/works")
.queryString("query", query)
.queryString("rows", "1")
.queryString("rows", API_RESULTS)
.asJson();

JSONArray items = response.getBody().getObject().getJSONObject("message").getJSONArray("items");
// quality check
if (checkValidity(entry, items)) {
String dataDOI = items.getJSONObject(0).getString("DOI");
LOGGER.debug("DOI " + dataDOI + " for " + title.get() + " found.");
return DOI.build(dataDOI);
Optional<String> dataDoi = findMatchingEntry(entry, items);

if (dataDoi.isPresent()) {
LOGGER.debug("DOI " + dataDoi.get() + " for " + title.get() + " found.");
return DOI.build(dataDoi.get());
}
} catch (UnirestException e) {
LOGGER.warn("Unable to query CrossRef API: " + e.getMessage(), e);
Expand All @@ -84,33 +90,37 @@ private static String enhanceQuery(String query, BibEntry entry) {
return enhancedQuery.toString();
}

private static boolean checkValidity(BibEntry entry, JSONArray result) {
private static Optional<String> findMatchingEntry(BibEntry entry, JSONArray results) {
final String entryTitle = REMOVE_BRACES_FORMATTER.format(entry.getLatexFreeField(FieldName.TITLE).orElse(""));

// currently only title-based
// title: [ "How the Mind Hurts and Heals the Body." ]
// subtitle: [ "" ]
try {
// title
JSONObject data = result.getJSONObject(0);
String dataTitle = data.getJSONArray("title").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataTitle) <= METRIC_THRESHOLD) {
return true;
}

// subtitle
// additional check, as sometimes subtitle is needed but sometimes only duplicates the title
if (data.getJSONArray("subtitle").length() > 0) {
String dataWithSubTitle = dataTitle + " " + data.getJSONArray("subtitle").getString(0);

return editDistanceIgnoreCase(entryTitle, dataWithSubTitle) <= METRIC_THRESHOLD;
for (int i = 0; i < results.length(); i++) {
// currently only title-based
// title: [ "How the Mind Hurts and Heals the Body." ]
// subtitle: [ "" ]
try {
// title
JSONObject data = results.getJSONObject(i);
String dataTitle = data.getJSONArray("title").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataTitle) <= METRIC_THRESHOLD) {
return Optional.of(data.getString("DOI"));
}

// subtitle
// additional check, as sometimes subtitle is needed but sometimes only duplicates the title
if (data.getJSONArray("subtitle").length() > 0) {
String dataWithSubTitle = dataTitle + " " + data.getJSONArray("subtitle").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataWithSubTitle) <= METRIC_THRESHOLD) {
return Optional.of(data.getString("DOI"));
}
}
} catch(JSONException ex) {
return Optional.empty();
}

return false;
} catch(JSONException ex) {
return false;
}

return Optional.empty();
}

private static double editDistanceIgnoreCase(String a, String b) {
Expand Down

0 comments on commit 4e4932c

Please sign in to comment.