diff --git a/.gitignore b/.gitignore index 2e33739ec64..e3abb530fc8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,11 @@ src/main/gen/ src/main/generated/ src-gen/ + .lycheecache +jcef-bundle/ + javafx/javafx-sdk-* javafx/javafx-jmods-* javafx/javafx.html diff --git a/build.gradle b/build.gradle index fd199a8b667..a14365f4a56 100644 --- a/build.gradle +++ b/build.gradle @@ -255,10 +255,13 @@ dependencies { implementation 'org.controlsfx:controlsfx:11.2.1' + // region HTTP clients + implementation 'com.machinepublishers:jbrowserdriver:1.1.1' // used for web scraping; https://github.com/jcefmaven/jcefmaven implementation 'org.jsoup:jsoup:1.18.1' implementation 'com.konghq:unirest-java-core:4.4.4' implementation 'com.konghq:unirest-modules-gson:4.4.4' implementation 'org.apache.httpcomponents.client5:httpclient5:5.3.1' + // endregion implementation 'org.slf4j:slf4j-api:2.0.16' implementation 'org.tinylog:tinylog-api:2.7.0' diff --git a/buildres/abbrv.jabref.org b/buildres/abbrv.jabref.org index b69f1d607a5..8fbad5a1285 160000 --- a/buildres/abbrv.jabref.org +++ b/buildres/abbrv.jabref.org @@ -1 +1 @@ -Subproject commit b69f1d607a57488276f3402bbf610d9129e7f6fb +Subproject commit 8fbad5a1285926b177803087b35b0eb6b0fd0142 diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java index 040717bf907..917c976d138 100644 --- a/src/main/java/module-info.java +++ b/src/main/java/module-info.java @@ -90,10 +90,14 @@ requires org.glassfish.hk2.api; // region: http clients - requires unirest.java.core; - requires unirest.modules.gson; + requires transitive jbrowserdriver; + requires org.openqa.selenium.core; + requires org.openqa.grid.selenium; + requires org.openqa.selenium.remote; requires org.apache.httpcomponents.core5.httpcore5; requires org.jsoup; + requires unirest.java.core; + requires unirest.modules.gson; // endregion // region: SQL databases diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ACS.java b/src/main/java/org/jabref/logic/importer/fetcher/ACS.java index 3c81d89db2a..266bbda282f 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/ACS.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/ACS.java @@ -10,14 +10,16 @@ import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.identifier.DOI; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; +import com.machinepublishers.jbrowserdriver.JBrowserDriver; +import com.machinepublishers.jbrowserdriver.Settings; +import com.machinepublishers.jbrowserdriver.Timezone; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * FulltextFetcher implementation that attempts to find a PDF URL at ACS. + * FulltextFetcher implementation that attempts to find a PDF URL at ACS. + * + * Alternatives concidered: https://stackoverflow.com/a/53099311/873282 */ public class ACS implements FulltextFetcher { private static final Logger LOGGER = LoggerFactory.getLogger(ACS.class); @@ -25,35 +27,33 @@ public class ACS implements FulltextFetcher { private static final String SOURCE = "https://pubs.acs.org/doi/abs/%s"; /** - * Tries to find a fulltext URL for a given BibTex entry. - *

- * Currently only uses the DOI if found. - * - * @param entry The Bibtex entry - * @return The fulltext PDF URL Optional, if found, or an empty Optional if not found. - * @throws NullPointerException if no BibTex entry is given - * @throws java.io.IOException + * Tries to find a fulltext URL for a given BibTeX entry. + * Requires the entry to have a DOI field. + * In case no DOI is present, an empty Optional is returned. */ @Override public Optional findFullText(BibEntry entry) throws IOException { Objects.requireNonNull(entry); - - // DOI search Optional doi = entry.getField(StandardField.DOI).flatMap(DOI::parse); - - if (!doi.isPresent()) { + if (doi.isEmpty()) { return Optional.empty(); } + System.setProperty("jcef.logSeverity", "VERBOSE"); + System.setProperty("jcef.logFile", "jcef.log"); + String source = SOURCE.formatted(doi.get().getDOI()); - // Retrieve PDF link - Document html = Jsoup.connect(source).ignoreHttpErrors(true).get(); - Element link = html.select("a.button_primary").first(); - if (link != null) { - LOGGER.info("Fulltext PDF found @ ACS."); - return Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/"))); - } + // You can optionally pass a Settings object here, + // constructed using Settings.Builder + JBrowserDriver driver = new JBrowserDriver(Settings.builder(). + timezone(Timezone.AMERICA_NEWYORK).build()); + + driver.get(source); + System.out.println(driver.getStatusCode()); + System.out.println(driver.getPageSource()); + driver.quit(); + return Optional.empty(); } diff --git a/src/main/resources/csl-locales b/src/main/resources/csl-locales index 7eeb36257a6..7b5a477f2d9 160000 --- a/src/main/resources/csl-locales +++ b/src/main/resources/csl-locales @@ -1 +1 @@ -Subproject commit 7eeb36257a68cb1907bd04f0eaa08d9ed238cbe6 +Subproject commit 7b5a477f2d9a8882b52bcecdc50f08d4422cc822 diff --git a/src/main/resources/csl-styles b/src/main/resources/csl-styles index 2b15b1fbc19..713bf5738ac 160000 --- a/src/main/resources/csl-styles +++ b/src/main/resources/csl-styles @@ -1 +1 @@ -Subproject commit 2b15b1fbc190e003b555486f46ce1112fd95defc +Subproject commit 713bf5738ac0b13c502e364cded9445c48d18193