Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mEDRA DOI fetcher implementation. #6641

Merged
merged 26 commits into from
Jul 9, 2020
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2d75da2
mEDRA DOI fetcher implementation.
alchimos Jun 26, 2020
df31be7
mEDRA DOI fetcher implementation
alchimos Jun 28, 2020
b521e7c
mEDRA DOI fetcher implementation
alchimos Jun 30, 2020
3539079
mEDRA DOI fetcher implementation
alchimos Jun 30, 2020
1b78fd5
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
81d5cc3
Merge branch 'master' into medra-fetcher
alchimos Jul 1, 2020
51423cb
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
a42904c
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
c0a329f
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
85fc98f
Merge branch 'medra-fetcher' of https://github.com/mind000/jabref into
alchimos Jul 1, 2020
2ede4ac
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
a108974
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
4f96aab
Revert wrong merge
koppor Jul 1, 2020
4cfc672
Merge remote-tracking branch 'origin/master' into fix
koppor Jul 1, 2020
4ae1788
mEDRA DOI fetcher implementation
alchimos Jul 5, 2020
8c7f1b5
Merge branch 'master' into medra-fetcher
alchimos Jul 7, 2020
5504e52
Rewrite JsonReader to accept an empty input stream
koppor Jul 7, 2020
6fd81c3
Introduce "getUrlDownloadForIdentifier" and shrink fetcher code
koppor Jul 7, 2020
9b9b14f
Use parameterized tests (and add ISSN to Cisternino Paola)
koppor Jul 7, 2020
3596442
Fix parsing of names
koppor Jul 7, 2020
d17af0f
Fix checkstyle
koppor Jul 7, 2020
dd0a665
Move "getUrlDownload" up to WebFetcher
koppor Jul 7, 2020
4fa073e
Use new method "getUrlDownload"
koppor Jul 7, 2020
faa304a
Merge pull request #1 from JabRef/improveJsonReader
alchimos Jul 7, 2020
4523f1d
mEDRA DOI fetcher implementation
alchimos Jul 9, 2020
1793870
Merge branch 'master' into medra-fetcher
Siedlerchr Jul 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Added

- We added a new fetcher to enable users to search mEDRA DOIs [#6602](https://github.com/JabRef/jabref/issues/6602)
- We added default values for delimiters in Add Subgroup window [#6624](https://github.com/JabRef/jabref/issues/6624)
- We improved responsiveness of general fields specification dialog window. [#6643](https://github.com/JabRef/jabref/issues/6604)
- We added support for importing ris file and load DOI [#6530](https://github.com/JabRef/jabref/issues/6530)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public interface IdBasedParserFetcher extends IdBasedFetcher {
*
* @param identifier the ID
*/
URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException;
URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException;

/**
* Returns the parser used to convert the response to a list of {@link BibEntry}.
Expand Down Expand Up @@ -61,7 +61,7 @@ default Optional<BibEntry> performSearchById(String identifier) throws FetcherEx
return Optional.empty();
}

try (InputStream stream = new URLDownload(getURLForID(identifier)).asInputStream()) {
try (InputStream stream = getUrlDownload(getUrlForIdentifier(identifier)).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);

if (fetchedEntries.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,20 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher {
* {@code new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);}
*
* By default, no cleanup is done.
*
* @param entry the entry to be cleaned-up
*/
default void doPostCleanup(BibEntry entry) {
// Do nothing by default
}

/**
* Gets the {@link URLDownload} object for downloading content. Overwrite, if you need to send additional headers for the download
*
* @param query The search query
* @throws MalformedURLException
* @throws FetcherException
* @throws URISyntaxException
*/
default URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
return new URLDownload(getURLForQuery(query));
}

@Override
default List<BibEntry> performSearch(String query) throws FetcherException {
if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

try (InputStream stream = getUrlDownload(query).asInputStream()) {
try (InputStream stream = getUrlDownload(getURLForQuery(query)).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);

// Post-cleanup
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetcher.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package org.jabref.logic.importer;

import java.net.URL;
import java.util.Optional;

import org.jabref.logic.help.HelpFile;
import org.jabref.logic.net.URLDownload;

/**
* Searches web resources for bibliographic information.
Expand All @@ -25,4 +27,11 @@ public interface WebFetcher {
default Optional<HelpFile> getHelpPage() {
return Optional.empty(); // no help page by default
}

/**
* Constructs an {@link URLDownload} object for downloading content based on the given URL. Overwrite, if you need to send additional headers for the download.
*/
default URLDownload getUrlDownload(URL url) {
return new URLDownload(url);
}
}
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.jabref.logic.importer.fetcher.LibraryOfCongress;
import org.jabref.logic.importer.fetcher.MathSciNet;
import org.jabref.logic.importer.fetcher.MedlineFetcher;
import org.jabref.logic.importer.fetcher.Medra;
import org.jabref.logic.importer.fetcher.OpenAccessDoi;
import org.jabref.logic.importer.fetcher.RfcFetcher;
import org.jabref.logic.importer.fetcher.ScienceDirect;
Expand Down Expand Up @@ -121,6 +122,7 @@ public static SortedSet<IdBasedFetcher> getIdBasedFetchers(ImportFormatPreferenc
set.add(new LibraryOfCongress(importFormatPreferences));
set.add(new IacrEprintFetcher(importFormatPreferences));
set.add(new RfcFetcher(importFormatPreferences));
set.add(new Medra());
return set;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedUR
* @return URL which points to a search URL for given identifier
*/
@Override
public URL getURLForID(String identifier) throws FetcherException, URISyntaxException, MalformedURLException {
public URL getUrlForIdentifier(String identifier) throws FetcherException, URISyntaxException, MalformedURLException {
String query = "doi:\"" + identifier + "\" OR " + "bibcode:\"" + identifier + "\"";
URIBuilder builder = new URIBuilder(API_SEARCH_URL);
builder.addParameter("q", query);
Expand Down Expand Up @@ -227,7 +227,7 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
}

try {
List<String> bibcodes = fetchBibcodes(getURLForID(identifier));
List<String> bibcodes = fetchBibcodes(getUrlForIdentifier(identifier));
List<BibEntry> fetchedEntries = performSearchByIds(bibcodes);

if (fetchedEntries.isEmpty()) {
Expand Down
36 changes: 22 additions & 14 deletions src/main/java/org/jabref/logic/importer/fetcher/CrossRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

Expand Down Expand Up @@ -69,31 +70,38 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(API_URL + "/" + identifier);
return uriBuilder.build().toURL();
}

@Override
public Parser getParser() {
return inputStream -> {
JSONObject response = JsonReader.toJsonObject(inputStream).getJSONObject("message");

List<BibEntry> entries = new ArrayList<>();
if (response.has("items")) {
// Response contains a list
JSONArray items = response.getJSONArray("items");
for (int i = 0; i < items.length(); i++) {
JSONObject item = items.getJSONObject(i);
BibEntry entry = jsonItemToBibEntry(item);
entries.add(entry);
}
} else {
JSONObject response = JsonReader.toJsonObject(inputStream);
if (response.isEmpty()) {
return Collections.emptyList();
}

response = response.getJSONObject("message");
if (response.isEmpty()) {
return Collections.emptyList();
}

if (!response.has("items")) {
// Singleton response
BibEntry entry = jsonItemToBibEntry(response);
entries.add(entry);
return Collections.singletonList(entry);
}

// Response contains a list
JSONArray items = response.getJSONArray("items");
List<BibEntry> entries = new ArrayList<>(items.length());
for (int i = 0; i < items.length(); i++) {
JSONObject item = items.getJSONObject(i);
BibEntry entry = jsonItemToBibEntry(item);
entries.add(entry);
}
return entries;
};
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/logic/importer/fetcher/DiVA.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public Optional<HelpFile> getHelpPage() {
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder("http://www.diva-portal.org/smash/getreferences");

uriBuilder.addParameter("referenceFormat", "BibTex");
Expand Down
46 changes: 44 additions & 2 deletions src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.jabref.logic.importer.IdBasedFetcher;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.WebFetcher;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.importer.util.MediaTypes;
import org.jabref.logic.l10n.Localization;
Expand All @@ -25,9 +26,17 @@
import org.jabref.model.util.DummyFileUpdateMonitor;
import org.jabref.model.util.OptionalUtil;

import kong.unirest.json.JSONArray;
import kong.unirest.json.JSONException;
import kong.unirest.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher {
public static final String NAME = "DOI";

private static final Logger LOGGER = LoggerFactory.getLogger(DoiFetcher.class);

private final ImportFormatPreferences preferences;

public DoiFetcher(ImportFormatPreferences preferences) {
Expand All @@ -47,18 +56,28 @@ public Optional<HelpFile> getHelpPage() {
@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
Optional<DOI> doi = DOI.parse(identifier);

try {
if (doi.isPresent()) {
alchimos marked this conversation as resolved.
Show resolved Hide resolved
Optional<BibEntry> fetchedEntry;

// mEDRA does not return a parsable bibtex string
if (getAgency(doi.get()).isPresent() && "medra".equalsIgnoreCase(getAgency(doi.get()).get())) {
fetchedEntry = new Medra().performSearchById(identifier);
alchimos marked this conversation as resolved.
Show resolved Hide resolved
alchimos marked this conversation as resolved.
Show resolved Hide resolved
return fetchedEntry;
}

URL doiURL = new URL(doi.get().getURIAsASCIIString());

// BibTeX data
URLDownload download = new URLDownload(doiURL);
URLDownload download = getUrlDownload(doiURL);
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
String bibtexString = download.asString();

// BibTeX entry
Optional<BibEntry> fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry.ifPresent(this::doPostCleanup);

return fetchedEntry;
} else {
throw new FetcherException(Localization.lang("Invalid DOI: '%0'.", identifier));
Expand All @@ -67,6 +86,8 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
throw new FetcherException(Localization.lang("Connection error"), e);
} catch (ParseException e) {
throw new FetcherException("Could not parse BibTeX entry", e);
} catch (JSONException e) {
throw new FetcherException("Could not retrieve Registration Agency", e);
}
}

Expand All @@ -84,4 +105,25 @@ public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
return Collections.emptyList();
}
}

/**
* Returns registration agency. Optional.empty() if no agency is found.
*
* @param doi the DOI to be searched
*/
public Optional<String> getAgency(DOI doi) throws IOException {
alchimos marked this conversation as resolved.
Show resolved Hide resolved
Optional<String> agency = Optional.empty();
try {
URLDownload download = getUrlDownload(new URL(DOI.AGENCY_RESOLVER + "/" + doi.getDOI()));
JSONObject response = new JSONArray(download.asString()).getJSONObject(0);
if (response != null) {
agency = Optional.ofNullable(response.optString("RA"));
}
} catch (JSONException e) {
LOGGER.error("Cannot parse agency fetcher repsonse to JSON");
return Optional.empty();
}

return agency;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE
}

@Override
public URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
URLDownload download = new URLDownload(getURLForQuery(query));
public URLDownload getUrlDownload(URL url) {
URLDownload download = new URLDownload(url);
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
return download;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public String getName() {
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
this.ensureThatIsbnIsValid(identifier);
URIBuilder uriBuilder = new URIBuilder(BASE_URL);
uriBuilder.addParameter("isbn", identifier);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public String getName() {
* @return null, because the identifier is passed using form data. This method is not used.
*/
@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public String getName() {
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder("https://lccn.loc.gov/" + identifier + "/mods");
return uriBuilder.build().toURL();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedUR
Optional<String> mrNumberInEntry = entry.getField(StandardField.MR_NUMBER);
if (mrNumberInEntry.isPresent()) {
// We are lucky and already know the id, so use it instead
return getURLForID(mrNumberInEntry.get());
return getUrlForIdentifier(mrNumberInEntry.get());
}

URIBuilder uriBuilder = new URIBuilder("https://mathscinet.ams.org/mrlookup");
Expand All @@ -83,7 +83,7 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder("https://mathscinet.ams.org/mathscinet/search/publications.html");
uriBuilder.addParameter("pg1", "MR"); // search MR number
uriBuilder.addParameter("s1", identifier); // identifier
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ public Optional<HelpFile> getHelpPage() {
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(ID_URL);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("retmode", "xml");
Expand Down Expand Up @@ -206,7 +206,7 @@ private URL createSearchUrl(String term) throws URISyntaxException, MalformedURL
private List<BibEntry> fetchMedline(List<String> ids) throws FetcherException {
try {
// Separate the IDs with a comma to search multiple entries
URL fetchURL = getURLForID(String.join(",", ids));
URL fetchURL = getUrlForIdentifier(String.join(",", ids));
URLConnection data = fetchURL.openConnection();
ParserResult result = new MedlineImporter().importDatabase(
new BufferedReader(new InputStreamReader(data.getInputStream(), StandardCharsets.UTF_8)));
Expand Down
Loading