Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check more results returned by CrossRef API for matching #2531

Merged
merged 2 commits into from
Feb 9, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 40 additions & 29 deletions src/main/java/net/sf/jabref/logic/importer/fetcher/CrossRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,17 @@
*/
public class CrossRef {
private static final Log LOGGER = LogFactory.getLog(CrossRef.class);
private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter();

private static final String API_URL = "http://api.crossref.org";
// number of results to lookup from crossref API
private static final int API_RESULTS = 5;

private static final Levenshtein METRIC_DISTANCE = new Levenshtein();
// edit distance threshold for entry title comnparison
private static final int METRIC_THRESHOLD = 4;

private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter();

public static Optional<DOI> findDOI(BibEntry entry) {
Objects.requireNonNull(entry);
Optional<DOI> doi = Optional.empty();
Expand All @@ -49,15 +54,16 @@ public static Optional<DOI> findDOI(BibEntry entry) {
try {
HttpResponse<JsonNode> response = Unirest.get(API_URL + "/works")
.queryString("query", query)
.queryString("rows", "1")
.queryString("rows", API_RESULTS)
.asJson();

JSONArray items = response.getBody().getObject().getJSONObject("message").getJSONArray("items");
// quality check
if (checkValidity(entry, items)) {
String dataDOI = items.getJSONObject(0).getString("DOI");
LOGGER.debug("DOI " + dataDOI + " for " + title.get() + " found.");
return DOI.build(dataDOI);
Optional<String> dataDoi = findMatchingEntry(entry, items);

if (dataDoi.isPresent()) {
LOGGER.debug("DOI " + dataDoi.get() + " for " + title.get() + " found.");
return DOI.build(dataDoi.get());
}
} catch (UnirestException e) {
LOGGER.warn("Unable to query CrossRef API: " + e.getMessage(), e);
Expand All @@ -84,33 +90,38 @@ private static String enhanceQuery(String query, BibEntry entry) {
return enhancedQuery.toString();
}

private static boolean checkValidity(BibEntry entry, JSONArray result) {
private static Optional<String> findMatchingEntry(BibEntry entry, JSONArray results) {
final String entryTitle = REMOVE_BRACES_FORMATTER.format(entry.getLatexFreeField(FieldName.TITLE).orElse(""));

// currently only title-based
// title: [ "How the Mind Hurts and Heals the Body." ]
// subtitle: [ "" ]
try {
// title
JSONObject data = result.getJSONObject(0);
String dataTitle = data.getJSONArray("title").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataTitle) <= METRIC_THRESHOLD) {
return true;
}

// subtitle
// additional check, as sometimes subtitle is needed but sometimes only duplicates the title
if (data.getJSONArray("subtitle").length() > 0) {
String dataWithSubTitle = dataTitle + " " + data.getJSONArray("subtitle").getString(0);

return editDistanceIgnoreCase(entryTitle, dataWithSubTitle) <= METRIC_THRESHOLD;
for (int i = 0; i < results.length(); i++) {
// currently only title-based
// title: [ "How the Mind Hurts and Heals the Body." ]
// subtitle: [ "" ]
try {
// title
JSONObject data = results.getJSONObject(i);
String dataTitle = data.getJSONArray("title").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataTitle) <= METRIC_THRESHOLD) {
return Optional.of(data.getString("DOI"));
}

// subtitle
// additional check, as sometimes subtitle is needed but sometimes only duplicates the title
if (data.getJSONArray("subtitle").length() > 0) {
String dataWithSubTitle = dataTitle + " " + data.getJSONArray("subtitle").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataWithSubTitle) <= METRIC_THRESHOLD) {
return Optional.of(data.getString("DOI"));
}
}
} catch(JSONException ex) {
LOGGER.warn("CrossRef API JSON format has changed: " + ex.getMessage());
return Optional.empty();
}

return false;
} catch(JSONException ex) {
return false;
}

return Optional.empty();
}

private static double editDistanceIgnoreCase(String a, String b) {
Expand Down