diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
index 4e28444ff28f..73057a916091 100644
--- a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
+++ b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
@@ -29,13 +29,14 @@
import org.apache.http.client.utils.URIBuilder;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* FulltextFetcher implementation that attempts to find a PDF URL at GoogleScholar.
- *
+ *
* Search String infos: https://scholar.google.com/intl/en/scholar/help.html#searching
*/
public class GoogleScholar implements FulltextFetcher, SearchBasedFetcher {
@@ -58,11 +59,10 @@ public GoogleScholar(ImportFormatPreferences importFormatPreferences) {
@Override
public Optional findFullText(BibEntry entry) throws IOException, FetcherException {
Objects.requireNonNull(entry);
- Optional pdfLink = Optional.empty();
// Search in title
if (!entry.hasField(StandardField.TITLE)) {
- return pdfLink;
+ return Optional.empty();
}
try {
@@ -74,12 +74,10 @@ public Optional findFullText(BibEntry entry) throws IOException, FetcherExc
// as_occt field to search in
uriBuilder.addParameter("as_occt", "title");
- pdfLink = search(uriBuilder.toString());
+ return search(uriBuilder.toString());
} catch (URISyntaxException e) {
throw new FetcherException("Building URI failed.", e);
}
-
- return pdfLink;
}
@Override
@@ -91,6 +89,11 @@ private Optional search(String url) throws IOException {
Optional pdfLink = Optional.empty();
Document doc = Jsoup.connect(url).userAgent(URLDownload.USER_AGENT).get();
+
+ if (needsCaptcha(doc.body().html())) {
+ LOGGER.warn("Hit Google traffic limitation. Captcha prevents automatic fetching.");
+ return Optional.empty();
+ }
// Check results for PDF link
// TODO: link always on first result or none?
for (int i = 0; i < NUM_RESULTS; i++) {
@@ -111,6 +114,10 @@ private Optional search(String url) throws IOException {
return pdfLink;
}
+ private boolean needsCaptcha(String body) {
+ return body.contains("id=\"gs_captcha_ccl\"");
+ }
+
@Override
public String getName() {
return "Google Scholar";
@@ -158,6 +165,11 @@ public List performSearch(String query) throws FetcherException {
private void addHitsFromQuery(List entryList, String queryURL) throws IOException, FetcherException {
String content = new URLDownload(queryURL).asString();
+ if (needsCaptcha(content)) {
+ throw new FetcherException("Fetching from Google Scholar failed.",
+ Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), null);
+ }
+
Matcher matcher = LINK_TO_BIB_PATTERN.matcher(content);
while (matcher.find()) {
String citationsPageURL = matcher.group().replace("&", "&");