Skip to content

Commit

Permalink
Fixes #2574 Add logic for new Sciencedirect pages
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-kolb committed Feb 20, 2017
1 parent 4da74e5 commit 44fc6a2
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 6 deletions.
24 changes: 19 additions & 5 deletions src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ public class ScienceDirect implements FulltextFetcher {
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);
Optional<URL> pdfLink = Optional.empty();

// Try unique DOI first
Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::build);
Expand All @@ -46,21 +45,36 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
try {
String sciLink = getUrlByDoi(doi.get().getDOI());

// scrape the web page not as mobile client!
if (!sciLink.isEmpty()) {
// Retrieve PDF link
Document html = Jsoup.connect(sciLink).ignoreHttpErrors(true).get();
Document html = Jsoup.connect(sciLink)
.userAgent("Mozilla/5.0 (Windows; U; WindowsNT 5.1; en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6")
.referrer("http://www.google.com")
.ignoreHttpErrors(true).get();

// Retrieve PDF link (old page)
Element link = html.getElementById("pdfLink");

if (link != null) {
LOGGER.info("Fulltext PDF found @ ScienceDirect.");
pdfLink = Optional.of(new URL(link.attr("pdfurl")));
Optional<URL> pdfLink = Optional.of(new URL(link.attr("pdfurl")));
return pdfLink;
}
// Retrieve PDF link (new page)
// PdfDropDownMenu - ul - li - a - href
String url = html.getElementsByClass("pdf-download-btn-link").attr("href");

if (url != null) {
LOGGER.info("Fulltext PDF found @ ScienceDirect.");
Optional<URL> pdfLink = Optional.of(new URL("http://www.sciencedirect.com" + url));
return pdfLink;
}
}
} catch(UnirestException e) {
LOGGER.warn("ScienceDirect API request failed", e);
}
}
return pdfLink;
return Optional.empty();
}

private String getUrlByDoi(String doi) throws UnirestException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public void doiNotPresent() throws IOException {
}

@Test
public void findByDOI() throws IOException {
public void findByDOIOldPage() throws IOException {
// CI server is blocked
Assume.assumeFalse(DevEnvironment.isCIServer());

Expand All @@ -50,6 +50,19 @@ public void findByDOI() throws IOException {
);
}

@Test
public void findByDOINewPage() throws IOException {
// CI server is blocked
Assume.assumeFalse(DevEnvironment.isCIServer());

entry.setField("doi", "10.1016/j.aasri.2014.09.002");

Assert.assertEquals(
Optional.of(new URL("http://www.sciencedirect.com/science/article/pii/S2212671614001024/pdf?md5=4e2e9a369b4d5b3db5100aba599bef8b&pid=1-s2.0-S2212671614001024-main.pdf")),
finder.findFullText(entry)
);
}

@Test
public void notFoundByDOI() throws IOException {
// CI server is blocked
Expand Down

0 comments on commit 44fc6a2

Please sign in to comment.