Skip to content

Commit

Permalink
Rename package to fetchers
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-kolb committed Aug 18, 2015
1 parent 3d3bfb5 commit 43d0c7d
Show file tree
Hide file tree
Showing 22 changed files with 154 additions and 226 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[master]
- Fix fulltext crawler for ScienceDirect, SpringerLink, and ACS
- Feature: PDF auto download from ACS, ScienceDirect, SpringerLink, and Google Scholar
- Perform syntax improvements enabled by Java 1.7+ (diamond operator, try-with-resources)
- List of authors is now auto generated `scripts/generate-authors.sh` and inserted into L10N About.html
- Remove Mr.DLib support as MR.DLib will be shut down in 2015
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
package net.sf.jabref.export.layout.format;

import net.sf.jabref.util.DOI;
import net.sf.jabref.logic.util.DOI;
import net.sf.jabref.export.layout.LayoutFormatter;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
package net.sf.jabref.export.layout.format;

import net.sf.jabref.util.DOI;
import net.sf.jabref.logic.util.DOI;
import net.sf.jabref.export.layout.LayoutFormatter;

/**
Expand Down
11 changes: 6 additions & 5 deletions src/main/java/net/sf/jabref/external/DownloadExternalFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,6 @@ public void run() {
if (mimeType != null) {
System.out.println("mimetype:" + mimeType);
suggestedType = Globals.prefs.getExternalFileTypeByMimeType(mimeType);
/*if (suggestedType != null)
System.out.println("Found type '"+suggestedType.getName()+"' by MIME type '"+udl.getMimeType()+"'");*/
}
// Then, while the download is proceeding, let the user choose the details of the file:
String suffix;
Expand Down Expand Up @@ -286,7 +284,7 @@ private void downloadFinished() {
editor.setOkEnabled(true);
editor.getProgressBar().setValue(editor.getProgressBar().getMaximum());
}

// FIXME: will break download if no bibtexkey is present!
private String getSuggestedFileName(String suffix) {

String plannedName = bibtexKey;
Expand All @@ -298,7 +296,10 @@ private String getSuggestedFileName(String suffix) {
* [ 1548875 ] download pdf produces unsupported filename
*
* http://sourceforge.net/tracker/index.php?func=detail&aid=1548875&group_id=92314&atid=600306
*
* FIXME: rework this! just allow alphanumeric stuff or so?
* https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#naming_conventions
* http://superuser.com/questions/358855/what-characters-are-safe-in-cross-platform-file-names-for-linux-windows-and-os
* https://support.apple.com/en-us/HT202808
*/
if (OS.WINDOWS) {
plannedName = plannedName.replaceAll(
Expand All @@ -311,7 +312,7 @@ private String getSuggestedFileName(String suffix) {
}

/**
* Look for the last '.' in the link, and returnthe following characters.
* Look for the last '.' in the link, and return the following characters.
* This gives the extension for most reasonably named links.
*
* @param link The link
Expand Down
8 changes: 1 addition & 7 deletions src/main/java/net/sf/jabref/external/FindFullText.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
*/
package net.sf.jabref.external;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
Expand All @@ -29,11 +27,7 @@
import java.util.Optional;

import net.sf.jabref.BibtexEntry;
import net.sf.jabref.logic.crawler.ACS;
import net.sf.jabref.logic.crawler.GoogleScholar;
import net.sf.jabref.logic.crawler.ScienceDirect;
import net.sf.jabref.logic.crawler.SpringerLink;
import net.sf.jabref.util.DOI;
import net.sf.jabref.logic.fetcher.*;
import net.sf.jabref.logic.net.URLDownload;

/**
Expand Down
39 changes: 0 additions & 39 deletions src/main/java/net/sf/jabref/external/FullTextFinder.java

This file was deleted.

2 changes: 1 addition & 1 deletion src/main/java/net/sf/jabref/gui/CleanUpAction.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import com.jgoodies.forms.layout.CellConstraints;
import com.jgoodies.forms.layout.FormLayout;
import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.util.DOI;
import net.sf.jabref.logic.util.DOI;
import net.sf.jabref.util.FileUtil;
import net.sf.jabref.logic.util.MonthUtil;
import net.sf.jabref.util.Util;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ of the License, or (at your option) any later version.

import net.sf.jabref.*;
import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.util.DOI;
import net.sf.jabref.logic.util.DOI;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
Expand Down
51 changes: 0 additions & 51 deletions src/main/java/net/sf/jabref/logic/crawler/ScienceDirect.java

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,30 +1,13 @@
/* Copyright (C) 2014 Commonwealth Scientific and Industrial Research Organisation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package net.sf.jabref.logic.crawler;
package net.sf.jabref.logic.fetcher;

import net.sf.jabref.BibtexEntry;
import net.sf.jabref.external.FullTextFinder;
import net.sf.jabref.util.DOI;
import net.sf.jabref.logic.util.DOI;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.net.HttpURLConnection;
import java.net.URL;
import java.io.IOException;
import java.util.Objects;
Expand Down
24 changes: 24 additions & 0 deletions src/main/java/net/sf/jabref/logic/fetcher/FullTextFinder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package net.sf.jabref.logic.fetcher;

import net.sf.jabref.BibtexEntry;

import java.net.URL;
import java.io.IOException;
import java.util.Optional;

/**
* This interface is used for classes that try to resolve a full-text PDF url for a BibTex entry.
* Implementing classes should specialize on specific article sites.
* See e.g. @link{http://libguides.mit.edu/apis}.
*/
public interface FullTextFinder {
/**
* Tries to find a fulltext URL for a given BibTex entry.
*
* @param entry The Bibtex entry
* @return The fulltext PDF URL Optional, if found, or an empty Optional if not found.
* @throws NullPointerException if no BibTex entry is given
* @throws java.io.IOException
*/
Optional<URL> findFullText(BibtexEntry entry) throws IOException;
}
Original file line number Diff line number Diff line change
@@ -1,31 +1,13 @@
/* Copyright (C) 2003-2011 JabRef contributors.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package net.sf.jabref.logic.crawler;
package net.sf.jabref.logic.fetcher;

import net.sf.jabref.BibtexEntry;
import net.sf.jabref.external.FullTextFinder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Objects;
Expand Down Expand Up @@ -55,7 +37,7 @@ public Optional<URL> findFullText(BibtexEntry entry) throws IOException {
String url = String.format(SEARCH_URL, URLEncoder.encode(entryTitle, "UTF-8"));

Document doc = Jsoup.connect(url)
.userAgent("Mozilla") // don't identify as a crawler
.userAgent("Mozilla") // don't identify as a crawler FIXME: still gets blocked in tests
.get();
// Check results for PDF link
// TODO: link always on first result or none?
Expand Down
57 changes: 57 additions & 0 deletions src/main/java/net/sf/jabref/logic/fetcher/ScienceDirect.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package net.sf.jabref.logic.fetcher;

import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;
import net.sf.jabref.BibtexEntry;
import net.sf.jabref.logic.util.DOI;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.io.InputStream;
import java.net.URL;
import java.io.IOException;
import java.util.Objects;
import java.util.Optional;

/**
* FullTextFinder implementation that attempts to find a PDF URL from a ScienceDirect article page.
*
* @see http://dev.elsevier.com/
*/
public class ScienceDirect implements FullTextFinder {
private static final Log LOGGER = LogFactory.getLog(ScienceDirect.class);

private static final String API_URL = "http://api.elsevier.com/content/article/doi/";
private static final String API_KEY = "fb82f2e692b3c72dafe5f4f1fa0ac00b";
@Override
public Optional<URL> findFullText(BibtexEntry entry) throws IOException {
Objects.requireNonNull(entry);
Optional<URL> pdfLink = Optional.empty();

// Try unique DOI first
Optional<DOI> doi = DOI.build(entry.getField("doi"));

if(doi.isPresent()) {
// Available in catalog?
try {
String request = API_URL + doi.get().getDOI();
HttpResponse<InputStream> response = Unirest.get(request)
.header("X-ELS-APIKey", API_KEY)
.queryString("httpAccept", "application/pdf")
.asBinary();

if (response.getStatus() == 200) {
LOGGER.info("Fulltext PDF found @ ScienceDirect.");
pdfLink = Optional.of(new URL(request + "?httpAccept=application/pdf"));
}
} catch(UnirestException e) {
LOGGER.warn("Elsevier API request failed: " + e.getMessage());
}
}

// TODO: title search
// We can also get abstract automatically!
return pdfLink;
}
}
Original file line number Diff line number Diff line change
@@ -1,36 +1,16 @@
/* Copyright (C) 2003-2011 JabRef contributors.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package net.sf.jabref.logic.crawler;
package net.sf.jabref.logic.fetcher;

import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;
import net.sf.jabref.BibtexEntry;
import net.sf.jabref.external.FullTextFinder;
import net.sf.jabref.util.DOI;
import net.sf.jabref.logic.util.DOI;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.net.URL;
import java.net.MalformedURLException;
import java.io.*;
import java.util.Objects;
import java.util.Optional;
Expand Down
Loading

0 comments on commit 43d0c7d

Please sign in to comment.