From 192e4dd19a48baff12d7b34a8b728859ba723da3 Mon Sep 17 00:00:00 2001 From: Stefan Kolb Date: Thu, 9 Feb 2017 10:26:24 +0100 Subject: [PATCH] Check more results returned by CrossRef API for matching (#2531) * Resolves #2431 Check more results returned by CrossRef API for matching * Log error --- .../logic/importer/fetcher/CrossRef.java | 69 +++++++++++-------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/src/main/java/net/sf/jabref/logic/importer/fetcher/CrossRef.java b/src/main/java/net/sf/jabref/logic/importer/fetcher/CrossRef.java index 19a472ada5b..4cf303f5151 100644 --- a/src/main/java/net/sf/jabref/logic/importer/fetcher/CrossRef.java +++ b/src/main/java/net/sf/jabref/logic/importer/fetcher/CrossRef.java @@ -27,12 +27,17 @@ */ public class CrossRef { private static final Log LOGGER = LogFactory.getLog(CrossRef.class); - private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter(); private static final String API_URL = "http://api.crossref.org"; + // number of results to lookup from crossref API + private static final int API_RESULTS = 5; + private static final Levenshtein METRIC_DISTANCE = new Levenshtein(); + // edit distance threshold for entry title comnparison private static final int METRIC_THRESHOLD = 4; + private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter(); + public static Optional findDOI(BibEntry entry) { Objects.requireNonNull(entry); Optional doi = Optional.empty(); @@ -49,15 +54,16 @@ public static Optional findDOI(BibEntry entry) { try { HttpResponse response = Unirest.get(API_URL + "/works") .queryString("query", query) - .queryString("rows", "1") + .queryString("rows", API_RESULTS) .asJson(); JSONArray items = response.getBody().getObject().getJSONObject("message").getJSONArray("items"); // quality check - if (checkValidity(entry, items)) { - String dataDOI = items.getJSONObject(0).getString("DOI"); - LOGGER.debug("DOI " + dataDOI + " for " + title.get() + " found."); - return DOI.build(dataDOI); + Optional dataDoi = findMatchingEntry(entry, items); + + if (dataDoi.isPresent()) { + LOGGER.debug("DOI " + dataDoi.get() + " for " + title.get() + " found."); + return DOI.build(dataDoi.get()); } } catch (UnirestException e) { LOGGER.warn("Unable to query CrossRef API: " + e.getMessage(), e); @@ -84,33 +90,38 @@ private static String enhanceQuery(String query, BibEntry entry) { return enhancedQuery.toString(); } - private static boolean checkValidity(BibEntry entry, JSONArray result) { + private static Optional findMatchingEntry(BibEntry entry, JSONArray results) { final String entryTitle = REMOVE_BRACES_FORMATTER.format(entry.getLatexFreeField(FieldName.TITLE).orElse("")); - // currently only title-based - // title: [ "How the Mind Hurts and Heals the Body." ] - // subtitle: [ "" ] - try { - // title - JSONObject data = result.getJSONObject(0); - String dataTitle = data.getJSONArray("title").getString(0); - - if (editDistanceIgnoreCase(entryTitle, dataTitle) <= METRIC_THRESHOLD) { - return true; - } - - // subtitle - // additional check, as sometimes subtitle is needed but sometimes only duplicates the title - if (data.getJSONArray("subtitle").length() > 0) { - String dataWithSubTitle = dataTitle + " " + data.getJSONArray("subtitle").getString(0); - - return editDistanceIgnoreCase(entryTitle, dataWithSubTitle) <= METRIC_THRESHOLD; + for (int i = 0; i < results.length(); i++) { + // currently only title-based + // title: [ "How the Mind Hurts and Heals the Body." ] + // subtitle: [ "" ] + try { + // title + JSONObject data = results.getJSONObject(i); + String dataTitle = data.getJSONArray("title").getString(0); + + if (editDistanceIgnoreCase(entryTitle, dataTitle) <= METRIC_THRESHOLD) { + return Optional.of(data.getString("DOI")); + } + + // subtitle + // additional check, as sometimes subtitle is needed but sometimes only duplicates the title + if (data.getJSONArray("subtitle").length() > 0) { + String dataWithSubTitle = dataTitle + " " + data.getJSONArray("subtitle").getString(0); + + if (editDistanceIgnoreCase(entryTitle, dataWithSubTitle) <= METRIC_THRESHOLD) { + return Optional.of(data.getString("DOI")); + } + } + } catch(JSONException ex) { + LOGGER.warn("CrossRef API JSON format has changed: " + ex.getMessage()); + return Optional.empty(); } - - return false; - } catch(JSONException ex) { - return false; } + + return Optional.empty(); } private static double editDistanceIgnoreCase(String a, String b) {