Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mEDRA DOI fetcher implementation. #6641

Merged
merged 26 commits into from
Jul 9, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2d75da2
mEDRA DOI fetcher implementation.
alchimos Jun 26, 2020
df31be7
mEDRA DOI fetcher implementation
alchimos Jun 28, 2020
b521e7c
mEDRA DOI fetcher implementation
alchimos Jun 30, 2020
3539079
mEDRA DOI fetcher implementation
alchimos Jun 30, 2020
1b78fd5
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
81d5cc3
Merge branch 'master' into medra-fetcher
alchimos Jul 1, 2020
51423cb
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
a42904c
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
c0a329f
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
85fc98f
Merge branch 'medra-fetcher' of https://github.com/mind000/jabref into
alchimos Jul 1, 2020
2ede4ac
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
a108974
mEDRA DOI fetcher implementation
alchimos Jul 1, 2020
4f96aab
Revert wrong merge
koppor Jul 1, 2020
4cfc672
Merge remote-tracking branch 'origin/master' into fix
koppor Jul 1, 2020
4ae1788
mEDRA DOI fetcher implementation
alchimos Jul 5, 2020
8c7f1b5
Merge branch 'master' into medra-fetcher
alchimos Jul 7, 2020
5504e52
Rewrite JsonReader to accept an empty input stream
koppor Jul 7, 2020
6fd81c3
Introduce "getUrlDownloadForIdentifier" and shrink fetcher code
koppor Jul 7, 2020
9b9b14f
Use parameterized tests (and add ISSN to Cisternino Paola)
koppor Jul 7, 2020
3596442
Fix parsing of names
koppor Jul 7, 2020
d17af0f
Fix checkstyle
koppor Jul 7, 2020
dd0a665
Move "getUrlDownload" up to WebFetcher
koppor Jul 7, 2020
4fa073e
Use new method "getUrlDownload"
koppor Jul 7, 2020
faa304a
Merge pull request #1 from JabRef/improveJsonReader
alchimos Jul 7, 2020
4523f1d
mEDRA DOI fetcher implementation
alchimos Jul 9, 2020
1793870
Merge branch 'master' into medra-fetcher
Siedlerchr Jul 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Added

- We added a new fetcher to enable users to search mEDRA DOIs [#6602](https://github.com/JabRef/jabref/issues/6602)
- We added support for importing ris file and load DOI [#6530](https://github.com/JabRef/jabref/issues/6530)
- We added the Library properties to a context menu on the library tabs [#6485](https://github.com/JabRef/jabref/issues/6485)
- We added a new field in the preferences in 'BibTeX key generator' for unwanted characters that can be user-specified. [#6295](https://github.com/JabRef/jabref/issues/6295)
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.jabref.logic.importer.fetcher.LibraryOfCongress;
import org.jabref.logic.importer.fetcher.MathSciNet;
import org.jabref.logic.importer.fetcher.MedlineFetcher;
import org.jabref.logic.importer.fetcher.Medra;
import org.jabref.logic.importer.fetcher.OpenAccessDoi;
import org.jabref.logic.importer.fetcher.RfcFetcher;
import org.jabref.logic.importer.fetcher.ScienceDirect;
Expand Down Expand Up @@ -121,6 +122,7 @@ public static SortedSet<IdBasedFetcher> getIdBasedFetchers(ImportFormatPreferenc
set.add(new LibraryOfCongress(importFormatPreferences));
set.add(new IacrEprintFetcher(importFormatPreferences));
set.add(new RfcFetcher(importFormatPreferences));
set.add(new Medra());
return set;
}

Expand Down
51 changes: 43 additions & 8 deletions src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
import org.jabref.model.util.DummyFileUpdateMonitor;
import org.jabref.model.util.OptionalUtil;

import kong.unirest.json.JSONArray;
import kong.unirest.json.JSONException;
import kong.unirest.json.JSONObject;

public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher {
public static final String NAME = "DOI";

Expand All @@ -47,18 +51,28 @@ public Optional<HelpFile> getHelpPage() {
@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
Optional<DOI> doi = DOI.parse(identifier);
String agency = "";
try {
if (doi.isPresent()) {
alchimos marked this conversation as resolved.
Show resolved Hide resolved
URL doiURL = new URL(doi.get().getURIAsASCIIString());
Optional<BibEntry> fetchedEntry;

// mEDRA does not return a parsable bibtex string
if (getAgency(doi.get()).isPresent() && "medra".equalsIgnoreCase(getAgency(doi.get()).get())) {
fetchedEntry = new Medra().performSearchById(identifier);
alchimos marked this conversation as resolved.
Show resolved Hide resolved
alchimos marked this conversation as resolved.
Show resolved Hide resolved

} else {
URL doiURL = new URL(doi.get().getURIAsASCIIString());

// BibTeX data
URLDownload download = new URLDownload(doiURL);
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
String bibtexString = download.asString();
// BibTeX data
URLDownload download = new URLDownload(doiURL);
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
String bibtexString = download.asString();

// BibTeX entry
fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry.ifPresent(this::doPostCleanup);
}

// BibTeX entry
Optional<BibEntry> fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry.ifPresent(this::doPostCleanup);
return fetchedEntry;
} else {
throw new FetcherException(Localization.lang("Invalid DOI: '%0'.", identifier));
Expand All @@ -67,6 +81,8 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
throw new FetcherException(Localization.lang("Connection error"), e);
} catch (ParseException e) {
throw new FetcherException("Could not parse BibTeX entry", e);
} catch (JSONException e) {
throw new FetcherException("Could not retrieve Registration Agency", e);
}
}

Expand All @@ -84,4 +100,23 @@ public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
return Collections.emptyList();
}
}

/**
* Returns registration agency. Null if no agency is found.
koppor marked this conversation as resolved.
Show resolved Hide resolved
*
* @param doi the doi to be searched
* @throws JSONException
koppor marked this conversation as resolved.
Show resolved Hide resolved
* @throws IOException
*/
public Optional<String> getAgency(DOI doi) throws JSONException, IOException {
Optional<String> agency = Optional.empty();
URLDownload download = new URLDownload(DOI.AGENCY_RESOLVER + "/" + doi.getDOI());
JSONObject response = new JSONArray(download.asString()).getJSONObject(0);

if (response != null) {
agency = Optional.ofNullable(response.optString("RA"));
}

return agency;
}
}
182 changes: 182 additions & 0 deletions src/main/java/org/jabref/logic/importer/fetcher/Medra.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.IdBasedParserFetcher;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.logic.importer.util.JsonReader;
import org.jabref.logic.importer.util.MediaTypes;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.EntryType;
import org.jabref.model.entry.types.StandardEntryType;
import kong.unirest.json.JSONArray;
import kong.unirest.json.JSONException;
import kong.unirest.json.JSONObject;
import org.apache.http.client.utils.URIBuilder;

/**
* A class for fetching DOIs from Medra
*
* @see <a href="https://data.medra.org">mEDRA Content Negotiation API</a> for an overview of the API
* <p>
* It requires "Accept" request Header attribute to be set to desired content-type.
*/
public class Medra implements SearchBasedParserFetcher, IdBasedParserFetcher {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you really need the SearchBasedParserFetcher interface?
I think this is superflous. The IDBasedFetcher should be sufficient

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, I forgot to remove it.
In this case I don't see why i should add Medra Fetcher to org.jabref.logic.importer.WebFetchersTest array test, since it seems that this already loads Medra class to expected array of getIdBasedFetchersReturnsAllFetcherDerivingFromIdBasedFetcher() and the test is passed.
Am I missing anything?

Copy link
Member

@Siedlerchr Siedlerchr Jul 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was probaby the reason for the failing test at first. If it passes now it's fine. Nothing to do then.
Please take a look at the checkstyle issues, than it's ready to go!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I fixed the style issues and wanted to run ./gradlew checkstyleMain checkstyleTest checkstyleJmh from terminal to test them before committing but i get:

`Starting a Gradle Daemon (subsequent builds will be faster)

> Configure project :
Found module name 'org.jabref'

> Task :compileJava
org.apache.logging.log4j.LoggingException: Unable to create Plugin Service Class org.jabref.gui.logging.plugins.Log4jPlugins
        at org.apache.logging.log4j.plugins.processor.PluginProcessor.createSourceFile(PluginProcessor.java:201)
        at org.apache.logging.log4j.plugins.processor.PluginProcessor.writeClassFile(PluginProcessor.java:163)
        at org.apache.logging.log4j.plugins.processor.PluginProcessor.process(PluginProcessor.java:91)
        at org.gradle.api.internal.tasks.compile.processing.DelegatingProcessor.process(DelegatingProcessor.java:62)`

running with --info gives some more details:

> Task :compileJava
Caching disabled for task ':compileJava' because:
  Build cache is disabled
Task ':compileJava' is not up-to-date because:
  Task has failed previously.
The input changes require a full rebuild for incremental task ':compileJava'.
Full recompilation is required because no incremental change information is available. This is usually caused by clean builds or changing compiler arguments.
Compiling with JDK Java compiler API.
org.apache.logging.log4j.LoggingException: Unable to create Plugin Service Class org.jabref.gui.logging.plugins.Log4jPlugins
        at org.apache.logging.log4j.plugins.processor.PluginProcessor.createSourceFile(PluginProcessor.java:201)
        at org.apache.logging.log4j.plugins.processor.PluginProcessor.writeClassFile(PluginProcessor.java:163)

Should I commit anyway and see what the tests say?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, just commit.


public static final String API_URL = "https://data.medra.org";

@Override
public String getName() {
return "mEDRA";
}

@Override
public Parser getParser() {
return inputStream -> {
JSONObject response = JsonReader.toJsonObject(inputStream);

List<BibEntry> entries = new ArrayList<>();
BibEntry entry = jsonItemToBibEntry(response);
entries.add(entry);

return entries;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These 4 lines can be done shorter as it is a single entry:

Suggested change
return entries;
return Collections.singleton(jsonITemToBibEntry(response));

};
}

private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException {
try {
BibEntry entry = new BibEntry();
entry.setType(convertType(item.getString("type")));
koppor marked this conversation as resolved.
Show resolved Hide resolved
entry.setField(StandardField.TITLE, item.getString("title"));
entry.setField(StandardField.AUTHOR, toAuthors(item.optJSONArray("author")));
entry.setField(StandardField.YEAR,
Optional.ofNullable(item.optJSONObject("issued"))
.map(array -> array.optJSONArray("date-parts"))
.map(array -> array.optJSONArray(0))
.map(array -> array.optInt(0))
.map(year -> Integer.toString(year)).orElse(""));
entry.setField(StandardField.DOI, item.getString("DOI"));
entry.setField(StandardField.PAGES, item.optString("page"));
entry.setField(StandardField.ISSN, item.optString("ISSN"));
entry.setField(StandardField.JOURNAL, item.optString("container-title"));
entry.setField(StandardField.PUBLISHER, item.optString("publisher"));
entry.setField(StandardField.URL, item.optString("URL"));
entry.setField(StandardField.VOLUME, item.optString("volume"));
return entry;
} catch (JSONException exception) {
throw new ParseException("mEdRA API JSON format has changed", exception);
}
}

private EntryType convertType(String type) {
switch (type) {
case "article-journal":
return StandardEntryType.Article;
default:
return StandardEntryType.Misc;
}
}

private String toAuthors(JSONArray authors) {
if (authors == null) {
return "";
}

// input: list of {"literal":"A."}
AuthorList authorsParsed = new AuthorList();
String name = "";

for (int i = 0; i < authors.length(); i++) {
JSONObject author = authors.getJSONObject(i);
if (author.has("literal")) {
name = author.optString("literal", "");
} else {
name = author.optString("family", "") + " " + author.optString("given", "");
}

authorsParsed.addAuthor(
name,
"",
"",
"",
"");

}
return authorsParsed.getAsFirstLastNamesWithAnd();
}

@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
koppor marked this conversation as resolved.
Show resolved Hide resolved

try (InputStream stream = getUrlDownload(identifier).asInputStream();
PushbackInputStream pushbackInputStream = new PushbackInputStream(stream)) {

List<BibEntry> fetchedEntries = new ArrayList<>();

// check if there is anything to read since mEDRA '404 not found' returns nothing
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you mean HTTP 404 returns an empty response.

In line 259 of URLDownload is is implemented exactly as that.

Why do the other fetchers cope well with that and here you have to do some special tweaks?

Maybe, the response from the mEDRA thing is different?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, URLDownload returns empty stream but I thought somewhere I had to check whether it's empty or not before trying to parse it to JSON.
Crossref serach by ID doesn't cope well with that, for example, because there's not empty stream check. I added a test for valid DOI returning nothing to CrossRefTest and the test failed.
If you think this could be the case I could implement just that check on performSearchById of Crossref as well, otherwise i just commit my changes.

2020-07-04_18h11_15

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it makes sense to add that to CrossRef as well

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, this is fixed in https://github.com/mind000/jabref/pull/1/, too.

int readByte;
readByte = pushbackInputStream.read();
koppor marked this conversation as resolved.
Show resolved Hide resolved
if (readByte != -1) {
pushbackInputStream.unread(readByte);
fetchedEntries = getParser().parseEntries(pushbackInputStream);
}

if (fetchedEntries.isEmpty()) {
return Optional.empty();
}

BibEntry entry = fetchedEntries.get(0);

// Post-cleanup
doPostCleanup(entry);

return Optional.of(entry);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
// TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource. It might be that there is an UnknownHostException (eutils.ncbi.nlm.nih.gov cannot be resolved).
throw new FetcherException("A network error occurred", e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred", e);
}
}

@Override
public void doPostCleanup(BibEntry entry) {
IdBasedParserFetcher.super.doPostCleanup(entry);
}

@Override
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(API_URL + "/" + query);
return uriBuilder.build().toURL();
}

@Override
public URLDownload getUrlDownload(String identifier) throws MalformedURLException, FetcherException, URISyntaxException {
URLDownload download = new URLDownload(getURLForID(identifier));
download.addHeader("Accept", MediaTypes.APPLICATION_JSON);
return download;
}

@Override
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(API_URL + "/" + identifier);
return uriBuilder.build().toURL();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
*/
public class MediaTypes {
public static final String APPLICATION_BIBTEX = "application/x-bibtex";
public static final String APPLICATION_JSON = "application/vnd.citationstyles.csl+json";
koppor marked this conversation as resolved.
Show resolved Hide resolved
}
4 changes: 3 additions & 1 deletion src/main/java/org/jabref/model/entry/identifier/DOI.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public class DOI implements Identifier {

// DOI/Short DOI resolver
private static final URI RESOLVER = URI.create("https://doi.org");
public static final URI AGENCY_RESOLVER = URI.create("https://doi.org/doiRA");
// Regex
// (see http://www.doi.org/doi_handbook/2_Numbering.html)
private static final String DOI_EXP = ""
Expand Down Expand Up @@ -238,7 +239,7 @@ public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
if ((o == null) || (getClass() != o.getClass())) {
return false;
}
DOI other = (DOI) o;
Expand All @@ -249,4 +250,5 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(doi.toLowerCase(Locale.ENGLISH));
}

}
Loading