Skip to content

Commit

Permalink
mEDRA DOI fetcher implementation. (#6641)
Browse files Browse the repository at this point in the history
* mEDRA DOI fetcher implementation.

* mEDRA DOI fetcher implementation

* mEDRA DOI fetcher implementation

* mEDRA DOI fetcher implementation

* mEDRA DOI fetcher implementation

* mEDRA DOI fetcher implementation

* mEDRA DOI fetcher implementation

* mEDRA DOI fetcher implementation

* mEDRA DOI fetcher implementation

* mEDRA DOI fetcher implementation

* Revert wrong merge

* mEDRA DOI fetcher implementation

* Rewrite JsonReader to accept an empty input stream

* Introduce "getUrlDownloadForIdentifier" and shrink fetcher code

- Rename getURLForID to getUrlForIdentifier
- Shrink Medra fetcher
- Shrink CrossRef fetcher

* Use parameterized tests (and add ISSN to Cisternino Paola)

* Fix parsing of names

* Fix checkstyle

* Move "getUrlDownload" up to WebFetcher

Reastion: It is the common base of both SearchBasedParserFetcher and IdBasedParserFetcher

* Use new method "getUrlDownload"

* mEDRA DOI fetcher implementation

Co-authored-by: Oliver Kopp <[email protected]>
Co-authored-by: Christoph <[email protected]>
  • Loading branch information
3 people authored Jul 9, 2020
1 parent 558c810 commit 20e78a4
Show file tree
Hide file tree
Showing 24 changed files with 418 additions and 64 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Added

- We added a new fetcher to enable users to search mEDRA DOIs [#6602](https://github.com/JabRef/jabref/issues/6602)
- We added a new fetcher to enable users to search "[Collection of Computer Science Bibliographies](https://liinwww.ira.uka.de/bibliography/index.html)". [#6638](https://github.com/JabRef/jabref/issues/6638)
- We added default values for delimiters in Add Subgroup window [#6624](https://github.com/JabRef/jabref/issues/6624)
- We improved responsiveness of general fields specification dialog window. [#6643](https://github.com/JabRef/jabref/issues/6604)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public interface IdBasedParserFetcher extends IdBasedFetcher {
*
* @param identifier the ID
*/
URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException;
URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException;

/**
* Returns the parser used to convert the response to a list of {@link BibEntry}.
Expand Down Expand Up @@ -61,7 +61,7 @@ default Optional<BibEntry> performSearchById(String identifier) throws FetcherEx
return Optional.empty();
}

try (InputStream stream = new URLDownload(getURLForID(identifier)).asInputStream()) {
try (InputStream stream = getUrlDownload(getUrlForIdentifier(identifier)).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);

if (fetchedEntries.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,20 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher {
* {@code new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);}
*
* By default, no cleanup is done.
*
* @param entry the entry to be cleaned-up
*/
default void doPostCleanup(BibEntry entry) {
// Do nothing by default
}

/**
* Gets the {@link URLDownload} object for downloading content. Overwrite, if you need to send additional headers for the download
*
* @param query The search query
* @throws MalformedURLException
* @throws FetcherException
* @throws URISyntaxException
*/
default URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
return new URLDownload(getURLForQuery(query));
}

@Override
default List<BibEntry> performSearch(String query) throws FetcherException {
if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

try (InputStream stream = getUrlDownload(query).asInputStream()) {
try (InputStream stream = getUrlDownload(getURLForQuery(query)).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);

// Post-cleanup
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetcher.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package org.jabref.logic.importer;

import java.net.URL;
import java.util.Optional;

import org.jabref.logic.help.HelpFile;
import org.jabref.logic.net.URLDownload;

/**
* Searches web resources for bibliographic information.
Expand All @@ -25,4 +27,11 @@ public interface WebFetcher {
default Optional<HelpFile> getHelpPage() {
return Optional.empty(); // no help page by default
}

/**
* Constructs an {@link URLDownload} object for downloading content based on the given URL. Overwrite, if you need to send additional headers for the download.
*/
default URLDownload getUrlDownload(URL url) {
return new URLDownload(url);
}
}
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.jabref.logic.importer.fetcher.LibraryOfCongress;
import org.jabref.logic.importer.fetcher.MathSciNet;
import org.jabref.logic.importer.fetcher.MedlineFetcher;
import org.jabref.logic.importer.fetcher.Medra;
import org.jabref.logic.importer.fetcher.OpenAccessDoi;
import org.jabref.logic.importer.fetcher.RfcFetcher;
import org.jabref.logic.importer.fetcher.ScienceDirect;
Expand Down Expand Up @@ -123,6 +124,7 @@ public static SortedSet<IdBasedFetcher> getIdBasedFetchers(ImportFormatPreferenc
set.add(new LibraryOfCongress(importFormatPreferences));
set.add(new IacrEprintFetcher(importFormatPreferences));
set.add(new RfcFetcher(importFormatPreferences));
set.add(new Medra());
return set;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedUR
* @return URL which points to a search URL for given identifier
*/
@Override
public URL getURLForID(String identifier) throws FetcherException, URISyntaxException, MalformedURLException {
public URL getUrlForIdentifier(String identifier) throws FetcherException, URISyntaxException, MalformedURLException {
String query = "doi:\"" + identifier + "\" OR " + "bibcode:\"" + identifier + "\"";
URIBuilder builder = new URIBuilder(API_SEARCH_URL);
builder.addParameter("q", query);
Expand Down Expand Up @@ -227,7 +227,7 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
}

try {
List<String> bibcodes = fetchBibcodes(getURLForID(identifier));
List<String> bibcodes = fetchBibcodes(getUrlForIdentifier(identifier));
List<BibEntry> fetchedEntries = performSearchByIds(bibcodes);

if (fetchedEntries.isEmpty()) {
Expand Down
36 changes: 22 additions & 14 deletions src/main/java/org/jabref/logic/importer/fetcher/CrossRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

Expand Down Expand Up @@ -69,31 +70,38 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(API_URL + "/" + identifier);
return uriBuilder.build().toURL();
}

@Override
public Parser getParser() {
return inputStream -> {
JSONObject response = JsonReader.toJsonObject(inputStream).getJSONObject("message");

List<BibEntry> entries = new ArrayList<>();
if (response.has("items")) {
// Response contains a list
JSONArray items = response.getJSONArray("items");
for (int i = 0; i < items.length(); i++) {
JSONObject item = items.getJSONObject(i);
BibEntry entry = jsonItemToBibEntry(item);
entries.add(entry);
}
} else {
JSONObject response = JsonReader.toJsonObject(inputStream);
if (response.isEmpty()) {
return Collections.emptyList();
}

response = response.getJSONObject("message");
if (response.isEmpty()) {
return Collections.emptyList();
}

if (!response.has("items")) {
// Singleton response
BibEntry entry = jsonItemToBibEntry(response);
entries.add(entry);
return Collections.singletonList(entry);
}

// Response contains a list
JSONArray items = response.getJSONArray("items");
List<BibEntry> entries = new ArrayList<>(items.length());
for (int i = 0; i < items.length(); i++) {
JSONObject item = items.getJSONObject(i);
BibEntry entry = jsonItemToBibEntry(item);
entries.add(entry);
}
return entries;
};
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/logic/importer/fetcher/DiVA.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public Optional<HelpFile> getHelpPage() {
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder("http://www.diva-portal.org/smash/getreferences");

uriBuilder.addParameter("referenceFormat", "BibTex");
Expand Down
22 changes: 20 additions & 2 deletions src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,15 @@
import org.jabref.model.util.DummyFileUpdateMonitor;
import org.jabref.model.util.OptionalUtil;

import kong.unirest.json.JSONException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher {
public static final String NAME = "DOI";

private static final Logger LOGGER = LoggerFactory.getLogger(DoiFetcher.class);

private final ImportFormatPreferences preferences;

public DoiFetcher(ImportFormatPreferences preferences) {
Expand All @@ -47,18 +53,27 @@ public Optional<HelpFile> getHelpPage() {
@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
Optional<DOI> doi = DOI.parse(identifier);

try {
if (doi.isPresent()) {
Optional<BibEntry> fetchedEntry;

// mEDRA does not return a parsable bibtex string
if (doi.get().getAgency().isPresent() && "medra".equalsIgnoreCase(doi.get().getAgency().get())) {
return new Medra().performSearchById(identifier);
}

URL doiURL = new URL(doi.get().getURIAsASCIIString());

// BibTeX data
URLDownload download = new URLDownload(doiURL);
URLDownload download = getUrlDownload(doiURL);
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
String bibtexString = download.asString();

// BibTeX entry
Optional<BibEntry> fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry.ifPresent(this::doPostCleanup);

return fetchedEntry;
} else {
throw new FetcherException(Localization.lang("Invalid DOI: '%0'.", identifier));
Expand All @@ -67,6 +82,8 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
throw new FetcherException(Localization.lang("Connection error"), e);
} catch (ParseException e) {
throw new FetcherException("Could not parse BibTeX entry", e);
} catch (JSONException e) {
throw new FetcherException("Could not retrieve Registration Agency", e);
}
}

Expand All @@ -84,4 +101,5 @@ public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
return Collections.emptyList();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE
}

@Override
public URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
URLDownload download = new URLDownload(getURLForQuery(query));
public URLDownload getUrlDownload(URL url) {
URLDownload download = new URLDownload(url);
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
return download;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public String getName() {
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
this.ensureThatIsbnIsValid(identifier);
URIBuilder uriBuilder = new URIBuilder(BASE_URL);
uriBuilder.addParameter("isbn", identifier);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public String getName() {
* @return null, because the identifier is passed using form data. This method is not used.
*/
@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public String getName() {
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder("https://lccn.loc.gov/" + identifier + "/mods");
return uriBuilder.build().toURL();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedUR
Optional<String> mrNumberInEntry = entry.getField(StandardField.MR_NUMBER);
if (mrNumberInEntry.isPresent()) {
// We are lucky and already know the id, so use it instead
return getURLForID(mrNumberInEntry.get());
return getUrlForIdentifier(mrNumberInEntry.get());
}

URIBuilder uriBuilder = new URIBuilder("https://mathscinet.ams.org/mrlookup");
Expand All @@ -83,7 +83,7 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder("https://mathscinet.ams.org/mathscinet/search/publications.html");
uriBuilder.addParameter("pg1", "MR"); // search MR number
uriBuilder.addParameter("s1", identifier); // identifier
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ public Optional<HelpFile> getHelpPage() {
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(ID_URL);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("retmode", "xml");
Expand Down Expand Up @@ -206,7 +206,7 @@ private URL createSearchUrl(String term) throws URISyntaxException, MalformedURL
private List<BibEntry> fetchMedline(List<String> ids) throws FetcherException {
try {
// Separate the IDs with a comma to search multiple entries
URL fetchURL = getURLForID(String.join(",", ids));
URL fetchURL = getUrlForIdentifier(String.join(",", ids));
URLConnection data = fetchURL.openConnection();
ParserResult result = new MedlineImporter().importDatabase(
new BufferedReader(new InputStreamReader(data.getInputStream(), StandardCharsets.UTF_8)));
Expand Down
Loading

0 comments on commit 20e78a4

Please sign in to comment.