Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mEDRA DOI fetcher implementation. #6641

Merged
merged 26 commits into from
Jul 9, 2020
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2d75da2
mEDRA DOI fetcher implementation.
alchimos Jun 26, 2020
df31be7
mEDRA DOI fetcher implementation
alchimos Jun 28, 2020
b521e7c
mEDRA DOI fetcher implementation
alchimos Jun 30, 2020
3539079
mEDRA DOI fetcher implementation
alchimos Jun 30, 2020
1b78fd5
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
81d5cc3
Merge branch 'master' into medra-fetcher
alchimos Jul 1, 2020
51423cb
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
a42904c
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
c0a329f
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
85fc98f
Merge branch 'medra-fetcher' of https://github.com/mind000/jabref into
alchimos Jul 1, 2020
2ede4ac
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
a108974
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
4f96aab
Revert wrong merge
koppor Jul 1, 2020
4cfc672
Merge remote-tracking branch 'origin/master' into fix
koppor Jul 1, 2020
4ae1788
mEDRA DOI fetcher implementation
alchimos Jul 5, 2020
8c7f1b5
Merge branch 'master' into medra-fetcher
alchimos Jul 7, 2020
5504e52
Rewrite JsonReader to accept an empty input stream
koppor Jul 7, 2020
6fd81c3
Introduce "getUrlDownloadForIdentifier" and shrink fetcher code
koppor Jul 7, 2020
9b9b14f
Use parameterized tests (and add ISSN to Cisternino Paola)
koppor Jul 7, 2020
3596442
Fix parsing of names
koppor Jul 7, 2020
d17af0f
Fix checkstyle
koppor Jul 7, 2020
dd0a665
Move "getUrlDownload" up to WebFetcher
koppor Jul 7, 2020
4fa073e
Use new method "getUrlDownload"
koppor Jul 7, 2020
faa304a
Merge pull request #1 from JabRef/improveJsonReader
alchimos Jul 7, 2020
4523f1d
mEDRA DOI fetcher implementation
alchimos Jul 9, 2020
1793870
Merge branch 'master' into medra-fetcher
Siedlerchr Jul 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Added

- We added a new fetcher to enable users to search mEDRA DOIs [#6602](https://github.com/JabRef/jabref/issues/6602)
- We added default values for delimiters in Add Subgroup window [#6624](https://github.com/JabRef/jabref/issues/6624)
- We improved responsiveness of general fields specification dialog window. [#6643](https://github.com/JabRef/jabref/issues/6604)
- We added support for importing ris file and load DOI [#6530](https://github.com/JabRef/jabref/issues/6530)
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.jabref.logic.importer.fetcher.LibraryOfCongress;
import org.jabref.logic.importer.fetcher.MathSciNet;
import org.jabref.logic.importer.fetcher.MedlineFetcher;
import org.jabref.logic.importer.fetcher.Medra;
import org.jabref.logic.importer.fetcher.OpenAccessDoi;
import org.jabref.logic.importer.fetcher.RfcFetcher;
import org.jabref.logic.importer.fetcher.ScienceDirect;
Expand Down Expand Up @@ -121,6 +122,7 @@ public static SortedSet<IdBasedFetcher> getIdBasedFetchers(ImportFormatPreferenc
set.add(new LibraryOfCongress(importFormatPreferences));
set.add(new IacrEprintFetcher(importFormatPreferences));
set.add(new RfcFetcher(importFormatPreferences));
set.add(new Medra());
return set;
}

Expand Down
43 changes: 43 additions & 0 deletions src/main/java/org/jabref/logic/importer/fetcher/CrossRef.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
Expand All @@ -17,6 +20,7 @@
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.logic.importer.util.JsonReader;
import org.jabref.logic.net.URLDownload;
import org.jabref.logic.util.strings.StringSimilarity;
import org.jabref.model.cleanup.FieldFormatterCleanup;
import org.jabref.model.entry.AuthorList;
Expand All @@ -25,6 +29,7 @@
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.types.EntryType;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.strings.StringUtil;
import org.jabref.model.util.OptionalUtil;

import kong.unirest.json.JSONArray;
Expand Down Expand Up @@ -98,6 +103,44 @@ public Parser getParser() {
};
}

@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
if (StringUtil.isBlank(identifier)) {
return Optional.empty();
}

try (InputStream stream = new URLDownload(getURLForID(identifier)).asInputStream();
koppor marked this conversation as resolved.
Show resolved Hide resolved
PushbackInputStream pushbackInputStream = new PushbackInputStream(stream)) {

List<BibEntry> fetchedEntries = new ArrayList<>();

// check if there is anything to read
int readByte;
readByte = pushbackInputStream.read();
if (readByte != -1) {
pushbackInputStream.unread(readByte);
fetchedEntries = getParser().parseEntries(pushbackInputStream);
}

if (fetchedEntries.isEmpty()) {
return Optional.empty();
}

BibEntry entry = fetchedEntries.get(0);

// Post-cleanup
doPostCleanup(entry);

return Optional.of(entry);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
throw new FetcherException("A network error occurred", e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred", e);
}
}

@Override
public void doPostCleanup(BibEntry entry) {
// Sometimes the fetched entry returns the title also in the subtitle field; in this case only keep the title field
Expand Down
47 changes: 46 additions & 1 deletion src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,17 @@
import org.jabref.model.util.DummyFileUpdateMonitor;
import org.jabref.model.util.OptionalUtil;

import kong.unirest.json.JSONArray;
import kong.unirest.json.JSONException;
import kong.unirest.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher {
public static final String NAME = "DOI";

private static final Logger LOGGER = LoggerFactory.getLogger(DoiFetcher.class);

private final ImportFormatPreferences preferences;

public DoiFetcher(ImportFormatPreferences preferences) {
Expand All @@ -47,8 +55,17 @@ public Optional<HelpFile> getHelpPage() {
@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
Optional<DOI> doi = DOI.parse(identifier);

try {
if (doi.isPresent()) {
alchimos marked this conversation as resolved.
Show resolved Hide resolved
Optional<BibEntry> fetchedEntry;

// mEDRA does not return a parsable bibtex string
if (getAgency(doi.get()).isPresent() && "medra".equalsIgnoreCase(getAgency(doi.get()).get())) {
fetchedEntry = new Medra().performSearchById(identifier);
alchimos marked this conversation as resolved.
Show resolved Hide resolved
alchimos marked this conversation as resolved.
Show resolved Hide resolved
return fetchedEntry;
}

URL doiURL = new URL(doi.get().getURIAsASCIIString());

// BibTeX data
Expand All @@ -57,8 +74,9 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
String bibtexString = download.asString();

// BibTeX entry
Optional<BibEntry> fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry.ifPresent(this::doPostCleanup);

return fetchedEntry;
} else {
throw new FetcherException(Localization.lang("Invalid DOI: '%0'.", identifier));
Expand All @@ -67,6 +85,8 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
throw new FetcherException(Localization.lang("Connection error"), e);
} catch (ParseException e) {
throw new FetcherException("Could not parse BibTeX entry", e);
} catch (JSONException e) {
throw new FetcherException("Could not retrieve Registration Agency", e);
}
}

Expand All @@ -84,4 +104,29 @@ public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
return Collections.emptyList();
}
}

/**
* Returns registration agency. Optional.empty() if no agency is found.
*
* @param doi the doi to be searched
*/
public Optional<String> getAgency(DOI doi) throws IOException {
alchimos marked this conversation as resolved.
Show resolved Hide resolved
Optional<String> agency = Optional.empty();

try {
URLDownload download = new URLDownload(new URL(DOI.AGENCY_RESOLVER + "/" + doi.getDOI()));
JSONObject response = new JSONArray(download.asString()).getJSONObject(0);

if (response != null) {
agency = Optional.ofNullable(response.optString("RA"));
}

} catch (JSONException e) {
LOGGER.error("Cannot parse agency fetcher repsonse to JSON");
return Optional.empty();
}


return agency;
}
}
173 changes: 173 additions & 0 deletions src/main/java/org/jabref/logic/importer/fetcher/Medra.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.IdBasedParserFetcher;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.util.JsonReader;
import org.jabref.logic.importer.util.MediaTypes;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.EntryType;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.strings.StringUtil;

import kong.unirest.json.JSONArray;
import kong.unirest.json.JSONException;
import kong.unirest.json.JSONObject;
import org.apache.http.client.utils.URIBuilder;

/**
* A class for fetching DOIs from Medra
*
* @see <a href="https://data.medra.org">mEDRA Content Negotiation API</a> for an overview of the API
* <p>
* It requires "Accept" request Header attribute to be set to desired content-type.
*/
public class Medra implements IdBasedParserFetcher {

public static final String API_URL = "https://data.medra.org";

@Override
public String getName() {
return "mEDRA";
}

@Override
public Parser getParser() {
return inputStream -> {
JSONObject response = JsonReader.toJsonObject(inputStream);

return Collections.singletonList(jsonItemToBibEntry(response));
};
}

private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException {
try {

return new BibEntry(convertType(item.getString("type")))
.withField(StandardField.TITLE, item.getString("title"))
.withField(StandardField.AUTHOR, toAuthors(item.optJSONArray("author")))
.withField(StandardField.YEAR,
Optional.ofNullable(item.optJSONObject("issued"))
.map(array -> array.optJSONArray("date-parts"))
.map(array -> array.optJSONArray(0))
.map(array -> array.optInt(0))
.map(year -> Integer.toString(year)).orElse(""))
.withField(StandardField.DOI, item.getString("DOI"))
.withField(StandardField.PAGES, item.optString("page"))
.withField(StandardField.ISSN, item.optString("ISSN"))
.withField(StandardField.JOURNAL, item.optString("container-title"))
.withField(StandardField.PUBLISHER, item.optString("publisher"))
.withField(StandardField.URL, item.optString("URL"))
.withField(StandardField.VOLUME, item.optString("volume"));

} catch (JSONException exception) {
throw new ParseException("mEdRA API JSON format has changed", exception);
}
}

private EntryType convertType(String type) {
switch (type) {
case "article-journal":
return StandardEntryType.Article;
default:
return StandardEntryType.Misc;
}
}

private String toAuthors(JSONArray authors) {
if (authors == null) {
return "";
}

// input: list of {"literal":"A."}
AuthorList authorsParsed = new AuthorList();
String name = "";

for (int i = 0; i < authors.length(); i++) {
JSONObject author = authors.getJSONObject(i);
name = author.optString("literal", "") + " " + author.optString("family", "") + " " + author.optString("given", "");

authorsParsed.addAuthor(
name,
"",
"",
"",
"");

}
return authorsParsed.getAsFirstLastNamesWithAnd();
}

@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
koppor marked this conversation as resolved.
Show resolved Hide resolved
if (StringUtil.isBlank(identifier)) {
return Optional.empty();
}

try (InputStream stream = getUrlDownload(identifier).asInputStream();
PushbackInputStream pushbackInputStream = new PushbackInputStream(stream)) {

List<BibEntry> fetchedEntries = new ArrayList<>();

// check if there is anything to read
int readByte;
readByte = pushbackInputStream.read();
koppor marked this conversation as resolved.
Show resolved Hide resolved
if (readByte != -1) {
pushbackInputStream.unread(readByte);
fetchedEntries = getParser().parseEntries(pushbackInputStream);
}

if (fetchedEntries.isEmpty()) {
return Optional.empty();
}

BibEntry entry = fetchedEntries.get(0);

// Post-cleanup
doPostCleanup(entry);

return Optional.of(entry);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
// For some DOIs we get 500 error. mEDRA team explained this is due to DOIs recently moved from other agency but no yet fully registered.
// They say these should return 204 code and they will fix the misconfiguration
throw new FetcherException("A network error occurred", e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred", e);
}
}

@Override
public void doPostCleanup(BibEntry entry) {
IdBasedParserFetcher.super.doPostCleanup(entry);
}

public URLDownload getUrlDownload(String identifier) throws MalformedURLException, FetcherException, URISyntaxException {
URLDownload download = new URLDownload(getURLForID(identifier));
download.addHeader("Accept", MediaTypes.CITATIONSTYLES_JSON);
return download;
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(API_URL + "/" + identifier);
return uriBuilder.build().toURL();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
*/
public class MediaTypes {
public static final String APPLICATION_BIBTEX = "application/x-bibtex";
public static final String CITATIONSTYLES_JSON = "application/vnd.citationstyles.csl+json";
}
7 changes: 6 additions & 1 deletion src/main/java/org/jabref/model/entry/identifier/DOI.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
* (DOIs)</a> and <a href="http://shortdoi.org">Short DOIs</a>
*/
public class DOI implements Identifier {

public static final URI AGENCY_RESOLVER = URI.create("https://doi.org/doiRA");

private static final Logger LOGGER = LoggerFactory.getLogger(DOI.class);

// DOI/Short DOI resolver
private static final URI RESOLVER = URI.create("https://doi.org");

// Regex
// (see http://www.doi.org/doi_handbook/2_Numbering.html)
private static final String DOI_EXP = ""
Expand Down Expand Up @@ -238,7 +242,7 @@ public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
if ((o == null) || (getClass() != o.getClass())) {
return false;
}
DOI other = (DOI) o;
Expand All @@ -249,4 +253,5 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(doi.toLowerCase(Locale.ENGLISH));
}

}
Loading