Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check more results returned by CrossRef API for matching #2531

Merged
merged 2 commits into from
Feb 9, 2017
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 39 additions & 29 deletions src/main/java/net/sf/jabref/logic/importer/fetcher/CrossRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,17 @@
*/
public class CrossRef {
private static final Log LOGGER = LogFactory.getLog(CrossRef.class);
private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter();

private static final String API_URL = "http://api.crossref.org";
// number of results to lookup from crossref API
private static final int API_RESULTS = 5;

private static final Levenshtein METRIC_DISTANCE = new Levenshtein();
// edit distance threshold for entry title comnparison
private static final int METRIC_THRESHOLD = 4;

private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter();

public static Optional<DOI> findDOI(BibEntry entry) {
Objects.requireNonNull(entry);
Optional<DOI> doi = Optional.empty();
Expand All @@ -49,15 +54,16 @@ public static Optional<DOI> findDOI(BibEntry entry) {
try {
HttpResponse<JsonNode> response = Unirest.get(API_URL + "/works")
.queryString("query", query)
.queryString("rows", "1")
.queryString("rows", API_RESULTS)
.asJson();

JSONArray items = response.getBody().getObject().getJSONObject("message").getJSONArray("items");
// quality check
if (checkValidity(entry, items)) {
String dataDOI = items.getJSONObject(0).getString("DOI");
LOGGER.debug("DOI " + dataDOI + " for " + title.get() + " found.");
return DOI.build(dataDOI);
Optional<String> dataDoi = findMatchingEntry(entry, items);

if (dataDoi.isPresent()) {
LOGGER.debug("DOI " + dataDoi.get() + " for " + title.get() + " found.");
return DOI.build(dataDoi.get());
}
} catch (UnirestException e) {
LOGGER.warn("Unable to query CrossRef API: " + e.getMessage(), e);
Expand All @@ -84,33 +90,37 @@ private static String enhanceQuery(String query, BibEntry entry) {
return enhancedQuery.toString();
}

private static boolean checkValidity(BibEntry entry, JSONArray result) {
private static Optional<String> findMatchingEntry(BibEntry entry, JSONArray results) {
final String entryTitle = REMOVE_BRACES_FORMATTER.format(entry.getLatexFreeField(FieldName.TITLE).orElse(""));

// currently only title-based
// title: [ "How the Mind Hurts and Heals the Body." ]
// subtitle: [ "" ]
try {
// title
JSONObject data = result.getJSONObject(0);
String dataTitle = data.getJSONArray("title").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataTitle) <= METRIC_THRESHOLD) {
return true;
}

// subtitle
// additional check, as sometimes subtitle is needed but sometimes only duplicates the title
if (data.getJSONArray("subtitle").length() > 0) {
String dataWithSubTitle = dataTitle + " " + data.getJSONArray("subtitle").getString(0);

return editDistanceIgnoreCase(entryTitle, dataWithSubTitle) <= METRIC_THRESHOLD;
for (int i = 0; i < results.length(); i++) {
// currently only title-based
// title: [ "How the Mind Hurts and Heals the Body." ]
// subtitle: [ "" ]
try {
// title
JSONObject data = results.getJSONObject(i);
String dataTitle = data.getJSONArray("title").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataTitle) <= METRIC_THRESHOLD) {
return Optional.of(data.getString("DOI"));
}

// subtitle
// additional check, as sometimes subtitle is needed but sometimes only duplicates the title
if (data.getJSONArray("subtitle").length() > 0) {
String dataWithSubTitle = dataTitle + " " + data.getJSONArray("subtitle").getString(0);

if (editDistanceIgnoreCase(entryTitle, dataWithSubTitle) <= METRIC_THRESHOLD) {
return Optional.of(data.getString("DOI"));
}
}
} catch(JSONException ex) {
return Optional.empty();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mabye useful to add a logging here in case the json format changes?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be detected by our Tests. Shouldn't it?

However, logging here does no harm.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test will fail yes.

}

return false;
} catch(JSONException ex) {
return false;
}

return Optional.empty();
}

private static double editDistanceIgnoreCase(String a, String b) {
Expand Down