Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable JCEF #695

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ src/main/gen/
src/main/generated/
src-gen/


.lycheecache

jcef-bundle/

javafx/javafx-sdk-*
javafx/javafx-jmods-*
javafx/javafx.html
Expand Down
3 changes: 3 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,13 @@ dependencies {

implementation 'org.controlsfx:controlsfx:11.2.1'

// region HTTP clients
implementation 'com.machinepublishers:jbrowserdriver:1.1.1' // used for web scraping; https://github.com/jcefmaven/jcefmaven
implementation 'org.jsoup:jsoup:1.18.1'
implementation 'com.konghq:unirest-java-core:4.4.4'
implementation 'com.konghq:unirest-modules-gson:4.4.4'
implementation 'org.apache.httpcomponents.client5:httpclient5:5.3.1'
// endregion

implementation 'org.slf4j:slf4j-api:2.0.16'
implementation 'org.tinylog:tinylog-api:2.7.0'
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,14 @@
requires org.glassfish.hk2.api;

// region: http clients
requires unirest.java.core;
requires unirest.modules.gson;
requires transitive jbrowserdriver;
requires org.openqa.selenium.core;
requires org.openqa.grid.selenium;
requires org.openqa.selenium.remote;
requires org.apache.httpcomponents.core5.httpcore5;
requires org.jsoup;
requires unirest.java.core;
requires unirest.modules.gson;
// endregion

// region: SQL databases
Expand Down
46 changes: 23 additions & 23 deletions src/main/java/org/jabref/logic/importer/fetcher/ACS.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,50 @@
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.machinepublishers.jbrowserdriver.JBrowserDriver;
import com.machinepublishers.jbrowserdriver.Settings;
import com.machinepublishers.jbrowserdriver.Timezone;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* FulltextFetcher implementation that attempts to find a PDF URL at ACS.
* FulltextFetcher implementation that attempts to find a PDF URL at <a href="https://pubs.acs.org/">ACS</a>.
*
* Alternatives concidered: https://stackoverflow.com/a/53099311/873282
*/
public class ACS implements FulltextFetcher {
private static final Logger LOGGER = LoggerFactory.getLogger(ACS.class);

private static final String SOURCE = "https://pubs.acs.org/doi/abs/%s";

/**
* Tries to find a fulltext URL for a given BibTex entry.
* <p>
* Currently only uses the DOI if found.
*
* @param entry The Bibtex entry
* @return The fulltext PDF URL Optional, if found, or an empty Optional if not found.
* @throws NullPointerException if no BibTex entry is given
* @throws java.io.IOException
* Tries to find a fulltext URL for a given BibTeX entry.
* Requires the entry to have a DOI field.
* In case no DOI is present, an empty Optional is returned.
*/
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);

// DOI search
Optional<DOI> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse);

if (!doi.isPresent()) {
if (doi.isEmpty()) {
return Optional.empty();
}

System.setProperty("jcef.logSeverity", "VERBOSE");
System.setProperty("jcef.logFile", "jcef.log");

String source = SOURCE.formatted(doi.get().getDOI());
// Retrieve PDF link
Document html = Jsoup.connect(source).ignoreHttpErrors(true).get();
Element link = html.select("a.button_primary").first();

if (link != null) {
LOGGER.info("Fulltext PDF found @ ACS.");
return Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/")));
}
// You can optionally pass a Settings object here,
// constructed using Settings.Builder
JBrowserDriver driver = new JBrowserDriver(Settings.builder().
timezone(Timezone.AMERICA_NEWYORK).build());

driver.get(source);
System.out.println(driver.getStatusCode());
System.out.println(driver.getPageSource());
driver.quit();

return Optional.empty();
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/csl-styles
Submodule csl-styles updated 108 files
Loading