Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix APS and ScienceDirect fetcher #6781

Merged
merged 5 commits into from
Aug 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue about duplicated group color indicators [#6175](https://github.com/JabRef/jabref/issues/6175)
- We fixed an issue where entries with the entry type Misc from an imported aux file would not be saved correctly to the bib file on disk [#6405](https://github.com/JabRef/jabref/issues/6405)
- We fixed an issue where percent sign ('%') was not formatted properly by the HTML formatter [#6753](https://github.com/JabRef/jabref/issues/6753)
- We fixed an issue with the [SAO/NASA Astrophysics Data System](https://docs.jabref.org/collect/import-using-online-bibliographic-database/ads) fetcher where `\textbackslash` appeared at the end of the abstract.
- We fixed an issue with the Science Direct fetcher where PDFs could not be downloaded. Fixes [#5860](https://github.com/JabRef/jabref/issues/5860)

### Removed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ public void doPostCleanup(BibEntry entry) {
.ifPresent(abstractText -> entry.clearField(StandardField.ABSTRACT));

entry.getField(StandardField.ABSTRACT)
.map(abstractText -> abstractText.replace("<P />", "").trim())
.map(abstractText -> abstractText.replace("<P />", ""))
.map(abstractText -> abstractText.replace("\\textbackslash", ""))
.map(abstractText -> abstractText.trim())
.ifPresent(abstractText -> entry.setField(StandardField.ABSTRACT, abstractText));
// The fetcher adds some garbage (number of found entries etc before)
entry.setCommentsBeforeEntry("");
Expand Down
44 changes: 25 additions & 19 deletions src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@
import kong.unirest.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* FulltextFetcher implementation that attempts to find a PDF URL at ScienceDirect.
* See <a href="https://dev.elsevier.com/">https://dev.elsevier.com/</a>
* FulltextFetcher implementation that attempts to find a PDF URL at ScienceDirect. See <a href="https://dev.elsevier.com/">https://dev.elsevier.com/</a>
*/
public class ScienceDirect implements FulltextFetcher {
private static final Logger LOGGER = LoggerFactory.getLogger(ScienceDirect.class);
Expand Down Expand Up @@ -62,22 +60,30 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
return Optional.of(new URL(link));
}

// Retrieve PDF link (old page)
// TODO: can possibly be removed
Element link = html.getElementById("pdfLink");

if (link != null) {
LOGGER.info("Fulltext PDF found @ ScienceDirect (old page).");
Optional<URL> pdfLink = Optional.of(new URL(link.attr("pdfurl")));
return pdfLink;
}
// Retrieve PDF link (new page)
// TODO: can possibly be removed
String url = html.getElementsByClass("pdf-download-btn-link").attr("href");

if (url != null) {
LOGGER.info("Fulltext PDF found @ ScienceDirect (new page).");
Optional<URL> pdfLink = Optional.of(new URL("http://www.sciencedirect.com" + url));
URL url = new URL(sciLink);
String protocol = url.getProtocol();
String authority = url.getAuthority();

Optional<String> fullLinkToPdf = html
.getElementsByAttributeValue("type", "application/json")
.stream()
.flatMap(element -> element.getElementsByTag("script").stream())
// get the text element
.map(element -> element.childNode(0))
.map(element -> element.toString())
.map(text -> new JSONObject(text))
.filter(json -> json.has("article"))
.map(json -> json.getJSONObject("article"))
.filter(json -> json.has("pdfDownload"))
.map(json -> json.getJSONObject("pdfDownload"))
.filter(json -> json.has("linkToPdf"))
.map(json -> json.getString("linkToPdf"))
.map(linkToPdf -> String.format("%s://%s%s", protocol, authority, linkToPdf))
.findAny();
if (fullLinkToPdf.isPresent()) {
LOGGER.info("Fulltext PDF found at ScienceDirect.");
// new URL may through "MalformedURLException", thus using "isPresent()" above and ".get()"
Optional<URL> pdfLink = Optional.of(new URL(fullLinkToPdf.get()));
return pdfLink;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;

import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.model.entry.BibEntry;
Expand All @@ -10,6 +11,9 @@
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.Answers;

import static org.junit.jupiter.api.Assertions.assertEquals;
Expand All @@ -30,39 +34,51 @@ public class GrobidCitationFetcherTest {
.withField(StandardField.PAGES, "245--259")
.withField(StandardField.VOLUME, "23")
.withField(StandardField.NUMBER, "4");

static String example2 = "Thomas, H. K. (2004). Training strategies for improving listeners' comprehension of foreign-accented speech (Doctoral dissertation). University of Colorado, Boulder.";
static BibEntry example2AsBibEntry = new BibEntry(BibEntry.DEFAULT_TYPE).withCiteKey("-1")
.withField(StandardField.AUTHOR, "Thomas, H")
.withField(StandardField.TITLE, "Training strategies for improving listeners' comprehension of foreign-accented speech (Doctoral dissertation)")
.withField(StandardField.YEAR, "2004")
.withField(StandardField.ADDRESS, "Boulder");

static String example3 = "Turk, J., Graham, P., & Verhulst, F. (2007). Child and adolescent psychiatry : A developmental approach. Oxford, England: Oxford University Press.";
static BibEntry example3AsBibEntry = new BibEntry(BibEntry.DEFAULT_TYPE).withCiteKey("-1")
.withField(StandardField.AUTHOR, "Turk, J and Graham, P and Verhulst, F")
.withField(StandardField.TITLE, "Child and adolescent psychiatry : A developmental approach")
.withField(StandardField.PUBLISHER, "Oxford University Press")
.withField(StandardField.YEAR, "2007")
.withField(StandardField.ADDRESS, "Oxford, England");

static String example4 = "Carr, I., & Kidner, R. (2003). Statutes and conventions on international trade law (4th ed.). London, England: Cavendish.";
static BibEntry example4AsBibEntry = new BibEntry(StandardEntryType.InBook).withCiteKey("-1")
.withField(StandardField.AUTHOR, "Carr, I and Kidner, R")
.withField(StandardField.BOOKTITLE, "Statutes and conventions on international trade law")
.withField(StandardField.PUBLISHER, "Cavendish")
.withField(StandardField.YEAR, "2003")
.withField(StandardField.ADDRESS, "London, England");
static String invalidInput1 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx________________________________";
static String invalidInput2 = "¦@#¦@#¦@#¦@#¦@#¦@#¦@°#¦@¦°¦@°";

@Test
public void grobidPerformSearchCorrectResultTest() {
List<BibEntry> entries = grobidCitationFetcher.performSearch(example1);
assertEquals(List.of(example1AsBibEntry), entries);
entries = grobidCitationFetcher.performSearch(example2);
assertEquals(List.of(example2AsBibEntry), entries);
entries = grobidCitationFetcher.performSearch(example3);
assertEquals(List.of(example3AsBibEntry), entries);
entries = grobidCitationFetcher.performSearch(example4);
assertEquals(List.of(example4AsBibEntry), entries);
public static Stream<Arguments> provideExamplesForCorrectResultTest() {
return Stream.of(
Arguments.of("example1", example1AsBibEntry, example1),
Arguments.of("example2", example2AsBibEntry, example2),
Arguments.of("example3", example3AsBibEntry, example3),
Arguments.of("example4", example4AsBibEntry, example4)
);
}

public static Stream<Arguments> provideInvalidInput() {
return Stream.of(
Arguments.of("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx________________________________"),
Arguments.of("¦@#¦@#¦@#¦@#¦@#¦@#¦@°#¦@¦°¦@°")
);
}

@ParameterizedTest(name = "{0}")
@MethodSource("provideExamplesForCorrectResultTest")
public void grobidPerformSearchCorrectResultTest(String testName, BibEntry expectedBibEntry, String searchQuery) {
List<BibEntry> entries = grobidCitationFetcher.performSearch(searchQuery);
assertEquals(List.of(expectedBibEntry), entries);
}

@Test
Expand All @@ -77,11 +93,10 @@ public void grobidPerformSearchWithEmptyStringsTest() {
assertEquals(Collections.emptyList(), entries);
}

@Test
public void grobidPerformSearchWithInvalidDataTest() {
List<BibEntry> entries = grobidCitationFetcher.performSearch(invalidInput1);
assertEquals(Collections.emptyList(), entries);
entries = grobidCitationFetcher.performSearch(invalidInput2);
@ParameterizedTest
@MethodSource("provideInvalidInput")
public void grobidPerformSearchWithInvalidDataTest(String invalidInput) {
List<BibEntry> entries = grobidCitationFetcher.performSearch(invalidInput);
assertEquals(Collections.emptyList(), entries);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ void findByDOINewPage() throws IOException {
);
}

@Test
@DisabledOnCIServer("CI server is blocked")
void findByDoiWorksForBoneArticle() throws IOException {
// The DOI is an example by a user taken from https://github.com/JabRef/jabref/issues/5860
entry.setField(StandardField.DOI, "https://doi.org/10.1016/j.bone.2020.115226");

assertEquals(
Optional.of(new URL("https://www.sciencedirect.com/science/article/pii/S8756328220300065/pdfft?md5=0ad75ff155637dec358e5c9fb8b90afd&pid=1-s2.0-S8756328220300065-main.pdf")),
finder.findFullText(entry)
);
}

@Test
@DisabledOnCIServer("CI server is blocked")
void notFoundByDOI() throws IOException {
Expand Down