Skip to content

Commit

Permalink
Introduce convienent interface for ID-based fetcher (JabRef#1998)
Browse files Browse the repository at this point in the history
* Introduce IdBasedParserFetcher

* Add language text

* Remove unused imports
  • Loading branch information
tobiasdiez authored and zesaro committed Oct 27, 2016
1 parent d9754ae commit 12f9a11
Show file tree
Hide file tree
Showing 22 changed files with 183 additions and 80 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package net.sf.jabref.logic.importer;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import java.util.Optional;

import net.sf.jabref.logic.formatter.Formatter;
import net.sf.jabref.model.entry.BibEntry;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.helper.StringUtil;

/**
* Provides a convenient interface for ID-based fetcher, which follow the usual three-step procedure:
* 1. Open a URL based on the search query
* 2. Parse the response to get a list of {@link BibEntry}
* 3. Post-process fetched entries
*/
public interface IdBasedParserFetcher extends IdBasedFetcher {

Log LOGGER = LogFactory.getLog(IdBasedParserFetcher.class);

/**
* Constructs a URL based on the query.
* @param identifier the ID
*/
URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException;

/**
* Returns the parser used to convert the response to a list of {@link BibEntry}.
*/
Parser getParser();

/**
* Performs a cleanup of the fetched entry.
*
* Only systematic errors of the fetcher should be corrected here
* (i.e. if information is consistently contained in the wrong field or the wrong format)
* but not cosmetic issues which may depend on the user's taste (for example, LateX code vs HTML in the abstract).
*
* Try to reuse existing {@link Formatter} for the cleanup. For example,
* {@code new FieldFormatterCleanup(FieldName.TITLE, new RemoveBracesFormatter()).cleanup(entry);}
*
* By default, no cleanup is done.
* @param entry the entry to be cleaned-up
*/
default void doPostCleanup(BibEntry entry) {
// Do nothing by default
}

@Override
default Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
if (StringUtil.isBlank(identifier)) {
return Optional.empty();
}

try (InputStream stream = new BufferedInputStream(getURLForID(identifier).openStream())) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);

if (fetchedEntries.isEmpty()) {
return Optional.empty();
}

if (fetchedEntries.size() > 1) {
LOGGER.info("Fetcher " + getName() + "found more than one result for identifier " + identifier
+ ". We will use the first entry.");
}

BibEntry entry = fetchedEntries.get(0);

// Post-cleanup
doPostCleanup(entry);

return Optional.of(entry);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
// TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource
throw new FetcherException("An I/O exception occurred", e);
} catch (ParserException e) {
throw new FetcherException("An internal parser error occurred", e);
}
}
}
36 changes: 14 additions & 22 deletions src/main/java/net/sf/jabref/logic/importer/fetcher/DiVA.java
Original file line number Diff line number Diff line change
@@ -1,31 +1,26 @@
package net.sf.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
import java.net.URL;

import net.sf.jabref.logic.help.HelpFile;
import net.sf.jabref.logic.importer.FetcherException;
import net.sf.jabref.logic.importer.IdBasedFetcher;
import net.sf.jabref.logic.importer.IdBasedParserFetcher;
import net.sf.jabref.logic.importer.ImportFormatPreferences;
import net.sf.jabref.logic.importer.Parser;
import net.sf.jabref.logic.importer.fileformat.BibtexParser;
import net.sf.jabref.logic.net.URLDownload;
import net.sf.jabref.model.entry.BibEntry;

import org.apache.http.client.utils.URIBuilder;

/*
* http://www.diva-portal.org/smash/aboutdiva.jsf?dswid=-3222
* DiVA portal contains research publications and student theses from 40 Swedish universities and research institutions.
*/
public class DiVA implements IdBasedFetcher {

private static final String URL = "http://www.diva-portal.org/smash/getreferences"; // ?referenceFormat=BibTex&pids=%s";
public class DiVA implements IdBasedParserFetcher {

private final ImportFormatPreferences importFormatPreferences;


public DiVA(ImportFormatPreferences importFormatPreferences) {
this.importFormatPreferences = importFormatPreferences;
}
Expand All @@ -41,21 +36,18 @@ public HelpFile getHelpPage() {
}

@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
try {
URIBuilder uriBuilder = new URIBuilder(URL);

uriBuilder.addParameter("referenceFormat", "BibTex");
uriBuilder.addParameter("pids", identifier);
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder("http://www.diva-portal.org/smash/getreferences");

URLDownload dl = new URLDownload(uriBuilder.build().toURL());
uriBuilder.addParameter("referenceFormat", "BibTex");
uriBuilder.addParameter("pids", identifier);

String bibtexString = dl.downloadToString(StandardCharsets.UTF_8);
return BibtexParser.singleFromString(bibtexString, importFormatPreferences);
return uriBuilder.build().toURL();
}

} catch (URISyntaxException | IOException e) {
throw new FetcherException("Problem getting information from DiVA", e);
}
@Override
public Parser getParser() {
return new BibtexParser(importFormatPreferences);
}

public boolean isValidId(String identifier) {
Expand Down
73 changes: 30 additions & 43 deletions src/main/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcher.java
Original file line number Diff line number Diff line change
@@ -1,33 +1,34 @@
package net.sf.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Optional;

import net.sf.jabref.logic.cleanup.FieldFormatterCleanup;
import net.sf.jabref.logic.formatter.bibtexfields.ClearFormatter;
import net.sf.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter;
import net.sf.jabref.logic.help.HelpFile;
import net.sf.jabref.logic.importer.FetcherException;
import net.sf.jabref.logic.importer.IdBasedFetcher;
import net.sf.jabref.logic.importer.IdBasedParserFetcher;
import net.sf.jabref.logic.importer.ImportFormatPreferences;
import net.sf.jabref.logic.importer.Parser;
import net.sf.jabref.logic.importer.fileformat.BibtexParser;
import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.logic.util.ISBN;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.model.entry.FieldName;

import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;
import org.apache.http.client.utils.URIBuilder;

/**
* Fetcher for ISBN.
* Fetcher for ISBN using http://www.ebook.de.
*/
public class IsbnFetcher implements IdBasedFetcher {
public class IsbnFetcher implements IdBasedParserFetcher {

private static final String URL_PATTERN = "http://www.ebook.de/de/tools/isbn2bibtex?";
private ImportFormatPreferences prefs;
private ImportFormatPreferences importFormatPreferences;

public IsbnFetcher(ImportFormatPreferences prefs){
this.prefs = prefs;
public IsbnFetcher(ImportFormatPreferences importFormatPreferences){
this.importFormatPreferences = importFormatPreferences;
}

@Override
Expand All @@ -41,43 +42,29 @@ public HelpFile getHelpPage() {
}

@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
ISBN isbn = new ISBN(identifier);
Optional<BibEntry> result = Optional.empty();

if (isbn.isValidChecksum() && isbn.isValidFormat()) {
try {
//Build the URL. In this case: http://www.ebook.de/de/tools/isbn2bibtex?isbn=identifier
URIBuilder uriBuilder = new URIBuilder(URL_PATTERN);
uriBuilder.addParameter("isbn", identifier);
URL url = uriBuilder.build().toURL();

//Downloads the source code of the site and then creates a .bib file out of the String
String bibtexString = Unirest.get(url.toString()).asString().getBody();
Optional<BibEntry> entry = BibtexParser.singleFromString(bibtexString, prefs);

if (entry.isPresent()) {
result = postProcessEntry(entry.get());
}

} catch (UnirestException | IOException | URISyntaxException e) {
throw new FetcherException("Bad URL when fetching ISBN info", e);
}
if (!isbn.isValid()) {
throw new FetcherException(Localization.lang("Invalid_ISBN:_'%0'.", identifier));
}
return result;

URIBuilder uriBuilder = new URIBuilder("http://www.ebook.de/de/tools/isbn2bibtex");
uriBuilder.addParameter("isbn", identifier);
return uriBuilder.build().toURL();
}

private Optional<BibEntry> postProcessEntry(BibEntry entry) {
if (entry.hasField(FieldName.URL)) {
entry.clearField(FieldName.URL);
}
@Override
public Parser getParser() {
return new BibtexParser(importFormatPreferences);
}

//Removes every non-digit character in the PAGETOTAL field.
Optional<String> pagetotal = entry.getField(FieldName.PAGETOTAL);
pagetotal.ifPresent(pg -> {
entry.setField(FieldName.PAGETOTAL, pg.replaceAll("[\\D]", ""));
});
@Override
public void doPostCleanup(BibEntry entry) {
new FieldFormatterCleanup(FieldName.URL, new ClearFormatter()).cleanup(entry);

return Optional.of(entry);
// Fetcher returns page numbers as "30 Seiten" -> remove every non-digit character in the PAGETOTAL field
entry.getField(FieldName.PAGETOTAL).ifPresent(pages ->
entry.setField(FieldName.PAGETOTAL, pages.replaceAll("[\\D]", "")));
new FieldFormatterCleanup(FieldName.PAGETOTAL, new NormalizePagesFormatter()).cleanup(entry);
}
}
3 changes: 3 additions & 0 deletions src/main/java/net/sf/jabref/logic/util/ISBN.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,7 @@ private boolean isbn13check() {
return (sum % 10) == 0;
}

public boolean isValid() {
return isValidFormat() && isValidChecksum();
}
}
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_da.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2372,3 +2372,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_de.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2368,3 +2368,5 @@ Select_first_entry=Ersten_Eintrag_auswählen
Select_last_entry=Letzten_Eintrag_auswählen

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2359,3 +2359,5 @@ Fetcher_'%0'_did_not_find_an_entry_for_id_'%1'.=Fetcher_'%0'_did_not_find_an_ent

Select_first_entry=Select_first_entry
Select_last_entry=Select_last_entry

Invalid_ISBN\:_'%0'.=Invalid_ISBN\:_'%0'.
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_es.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2372,3 +2372,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_fa.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2375,3 +2375,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_fr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2368,3 +2368,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_in.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2372,3 +2372,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_it.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2372,3 +2372,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_ja.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2457,3 +2457,5 @@ Fetcher_'%0'_did_not_find_an_entry_for_id_'%1'.=取得子「%0」は,IDが「%
Search_in_all_open_databases=
Select_first_entry=最初の項目を選択
Select_last_entry=最後の項目を選択

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_nl.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2374,3 +2374,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_no.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2374,3 +2374,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_pt_BR.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2372,3 +2372,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_ru.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2375,3 +2375,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_sv.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2368,3 +2368,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_tr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2373,3 +2373,5 @@ Select_first_entry=İlk_girdiyi_seç
Select_last_entry=Son_girdiyi_seç

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_vi.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2374,3 +2374,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_zh.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2372,3 +2372,5 @@ Select_first_entry=
Select_last_entry=

Search_in_all_open_databases=

Invalid_ISBN\:_'%0'.=
Loading

0 comments on commit 12f9a11

Please sign in to comment.