diff --git a/src/main/java/net/sf/jabref/logic/importer/IdBasedParserFetcher.java b/src/main/java/net/sf/jabref/logic/importer/IdBasedParserFetcher.java new file mode 100644 index 00000000000..c657183085a --- /dev/null +++ b/src/main/java/net/sf/jabref/logic/importer/IdBasedParserFetcher.java @@ -0,0 +1,90 @@ +package net.sf.jabref.logic.importer; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.List; +import java.util.Optional; + +import net.sf.jabref.logic.formatter.Formatter; +import net.sf.jabref.model.entry.BibEntry; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.jsoup.helper.StringUtil; + +/** + * Provides a convenient interface for ID-based fetcher, which follow the usual three-step procedure: + * 1. Open a URL based on the search query + * 2. Parse the response to get a list of {@link BibEntry} + * 3. Post-process fetched entries + */ +public interface IdBasedParserFetcher extends IdBasedFetcher { + + Log LOGGER = LogFactory.getLog(IdBasedParserFetcher.class); + + /** + * Constructs a URL based on the query. + * @param identifier the ID + */ + URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException; + + /** + * Returns the parser used to convert the response to a list of {@link BibEntry}. + */ + Parser getParser(); + + /** + * Performs a cleanup of the fetched entry. + * + * Only systematic errors of the fetcher should be corrected here + * (i.e. if information is consistently contained in the wrong field or the wrong format) + * but not cosmetic issues which may depend on the user's taste (for example, LateX code vs HTML in the abstract). + * + * Try to reuse existing {@link Formatter} for the cleanup. For example, + * {@code new FieldFormatterCleanup(FieldName.TITLE, new RemoveBracesFormatter()).cleanup(entry);} + * + * By default, no cleanup is done. + * @param entry the entry to be cleaned-up + */ + default void doPostCleanup(BibEntry entry) { + // Do nothing by default + } + + @Override + default Optional performSearchById(String identifier) throws FetcherException { + if (StringUtil.isBlank(identifier)) { + return Optional.empty(); + } + + try (InputStream stream = new BufferedInputStream(getURLForID(identifier).openStream())) { + List fetchedEntries = getParser().parseEntries(stream); + + if (fetchedEntries.isEmpty()) { + return Optional.empty(); + } + + if (fetchedEntries.size() > 1) { + LOGGER.info("Fetcher " + getName() + "found more than one result for identifier " + identifier + + ". We will use the first entry."); + } + + BibEntry entry = fetchedEntries.get(0); + + // Post-cleanup + doPostCleanup(entry); + + return Optional.of(entry); + } catch (URISyntaxException e) { + throw new FetcherException("Search URI is malformed", e); + } catch (IOException e) { + // TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource + throw new FetcherException("An I/O exception occurred", e); + } catch (ParserException e) { + throw new FetcherException("An internal parser error occurred", e); + } + } +} diff --git a/src/main/java/net/sf/jabref/logic/importer/fetcher/DiVA.java b/src/main/java/net/sf/jabref/logic/importer/fetcher/DiVA.java index 80d2f108b63..e0fac8bc84f 100644 --- a/src/main/java/net/sf/jabref/logic/importer/fetcher/DiVA.java +++ b/src/main/java/net/sf/jabref/logic/importer/fetcher/DiVA.java @@ -1,17 +1,15 @@ package net.sf.jabref.logic.importer.fetcher; -import java.io.IOException; +import java.net.MalformedURLException; import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.util.Optional; +import java.net.URL; import net.sf.jabref.logic.help.HelpFile; import net.sf.jabref.logic.importer.FetcherException; -import net.sf.jabref.logic.importer.IdBasedFetcher; +import net.sf.jabref.logic.importer.IdBasedParserFetcher; import net.sf.jabref.logic.importer.ImportFormatPreferences; +import net.sf.jabref.logic.importer.Parser; import net.sf.jabref.logic.importer.fileformat.BibtexParser; -import net.sf.jabref.logic.net.URLDownload; -import net.sf.jabref.model.entry.BibEntry; import org.apache.http.client.utils.URIBuilder; @@ -19,13 +17,10 @@ * http://www.diva-portal.org/smash/aboutdiva.jsf?dswid=-3222 * DiVA portal contains research publications and student theses from 40 Swedish universities and research institutions. */ -public class DiVA implements IdBasedFetcher { - - private static final String URL = "http://www.diva-portal.org/smash/getreferences"; // ?referenceFormat=BibTex&pids=%s"; +public class DiVA implements IdBasedParserFetcher { private final ImportFormatPreferences importFormatPreferences; - public DiVA(ImportFormatPreferences importFormatPreferences) { this.importFormatPreferences = importFormatPreferences; } @@ -41,21 +36,18 @@ public HelpFile getHelpPage() { } @Override - public Optional performSearchById(String identifier) throws FetcherException { - try { - URIBuilder uriBuilder = new URIBuilder(URL); - - uriBuilder.addParameter("referenceFormat", "BibTex"); - uriBuilder.addParameter("pids", identifier); + public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException { + URIBuilder uriBuilder = new URIBuilder("http://www.diva-portal.org/smash/getreferences"); - URLDownload dl = new URLDownload(uriBuilder.build().toURL()); + uriBuilder.addParameter("referenceFormat", "BibTex"); + uriBuilder.addParameter("pids", identifier); - String bibtexString = dl.downloadToString(StandardCharsets.UTF_8); - return BibtexParser.singleFromString(bibtexString, importFormatPreferences); + return uriBuilder.build().toURL(); + } - } catch (URISyntaxException | IOException e) { - throw new FetcherException("Problem getting information from DiVA", e); - } + @Override + public Parser getParser() { + return new BibtexParser(importFormatPreferences); } public boolean isValidId(String identifier) { diff --git a/src/main/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcher.java b/src/main/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcher.java index 61c0ebf86dd..30f1559fad4 100644 --- a/src/main/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcher.java +++ b/src/main/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcher.java @@ -1,33 +1,34 @@ package net.sf.jabref.logic.importer.fetcher; -import java.io.IOException; +import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; -import java.util.Optional; +import net.sf.jabref.logic.cleanup.FieldFormatterCleanup; +import net.sf.jabref.logic.formatter.bibtexfields.ClearFormatter; +import net.sf.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter; import net.sf.jabref.logic.help.HelpFile; import net.sf.jabref.logic.importer.FetcherException; -import net.sf.jabref.logic.importer.IdBasedFetcher; +import net.sf.jabref.logic.importer.IdBasedParserFetcher; import net.sf.jabref.logic.importer.ImportFormatPreferences; +import net.sf.jabref.logic.importer.Parser; import net.sf.jabref.logic.importer.fileformat.BibtexParser; +import net.sf.jabref.logic.l10n.Localization; import net.sf.jabref.logic.util.ISBN; import net.sf.jabref.model.entry.BibEntry; import net.sf.jabref.model.entry.FieldName; -import com.mashape.unirest.http.Unirest; -import com.mashape.unirest.http.exceptions.UnirestException; import org.apache.http.client.utils.URIBuilder; /** - * Fetcher for ISBN. + * Fetcher for ISBN using http://www.ebook.de. */ -public class IsbnFetcher implements IdBasedFetcher { +public class IsbnFetcher implements IdBasedParserFetcher { - private static final String URL_PATTERN = "http://www.ebook.de/de/tools/isbn2bibtex?"; - private ImportFormatPreferences prefs; + private ImportFormatPreferences importFormatPreferences; - public IsbnFetcher(ImportFormatPreferences prefs){ - this.prefs = prefs; + public IsbnFetcher(ImportFormatPreferences importFormatPreferences){ + this.importFormatPreferences = importFormatPreferences; } @Override @@ -41,43 +42,29 @@ public HelpFile getHelpPage() { } @Override - public Optional performSearchById(String identifier) throws FetcherException { + public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException { ISBN isbn = new ISBN(identifier); - Optional result = Optional.empty(); - - if (isbn.isValidChecksum() && isbn.isValidFormat()) { - try { - //Build the URL. In this case: http://www.ebook.de/de/tools/isbn2bibtex?isbn=identifier - URIBuilder uriBuilder = new URIBuilder(URL_PATTERN); - uriBuilder.addParameter("isbn", identifier); - URL url = uriBuilder.build().toURL(); - - //Downloads the source code of the site and then creates a .bib file out of the String - String bibtexString = Unirest.get(url.toString()).asString().getBody(); - Optional entry = BibtexParser.singleFromString(bibtexString, prefs); - - if (entry.isPresent()) { - result = postProcessEntry(entry.get()); - } - - } catch (UnirestException | IOException | URISyntaxException e) { - throw new FetcherException("Bad URL when fetching ISBN info", e); - } + if (!isbn.isValid()) { + throw new FetcherException(Localization.lang("Invalid_ISBN:_'%0'.", identifier)); } - return result; + + URIBuilder uriBuilder = new URIBuilder("http://www.ebook.de/de/tools/isbn2bibtex"); + uriBuilder.addParameter("isbn", identifier); + return uriBuilder.build().toURL(); } - private Optional postProcessEntry(BibEntry entry) { - if (entry.hasField(FieldName.URL)) { - entry.clearField(FieldName.URL); - } + @Override + public Parser getParser() { + return new BibtexParser(importFormatPreferences); + } - //Removes every non-digit character in the PAGETOTAL field. - Optional pagetotal = entry.getField(FieldName.PAGETOTAL); - pagetotal.ifPresent(pg -> { - entry.setField(FieldName.PAGETOTAL, pg.replaceAll("[\\D]", "")); - }); + @Override + public void doPostCleanup(BibEntry entry) { + new FieldFormatterCleanup(FieldName.URL, new ClearFormatter()).cleanup(entry); - return Optional.of(entry); + // Fetcher returns page numbers as "30 Seiten" -> remove every non-digit character in the PAGETOTAL field + entry.getField(FieldName.PAGETOTAL).ifPresent(pages -> + entry.setField(FieldName.PAGETOTAL, pages.replaceAll("[\\D]", ""))); + new FieldFormatterCleanup(FieldName.PAGETOTAL, new NormalizePagesFormatter()).cleanup(entry); } } diff --git a/src/main/java/net/sf/jabref/logic/util/ISBN.java b/src/main/java/net/sf/jabref/logic/util/ISBN.java index 60688245961..b9b632f6a23 100644 --- a/src/main/java/net/sf/jabref/logic/util/ISBN.java +++ b/src/main/java/net/sf/jabref/logic/util/ISBN.java @@ -73,4 +73,7 @@ private boolean isbn13check() { return (sum % 10) == 0; } + public boolean isValid() { + return isValidFormat() && isValidChecksum(); + } } diff --git a/src/main/resources/l10n/JabRef_da.properties b/src/main/resources/l10n/JabRef_da.properties index 5c0632c9702..8602a71d4c1 100644 --- a/src/main/resources/l10n/JabRef_da.properties +++ b/src/main/resources/l10n/JabRef_da.properties @@ -2302,3 +2302,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_de.properties b/src/main/resources/l10n/JabRef_de.properties index a9e78b93d9d..797430f89a3 100644 --- a/src/main/resources/l10n/JabRef_de.properties +++ b/src/main/resources/l10n/JabRef_de.properties @@ -2304,3 +2304,5 @@ Select_first_entry=Ersten_Eintrag_auswählen Select_last_entry=Letzten_Eintrag_auswählen Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 3259b530958..7c0e49a2bc1 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -2298,3 +2298,5 @@ Fetcher_'%0'_did_not_find_an_entry_for_id_'%1'.=Fetcher_'%0'_did_not_find_an_ent Select_first_entry=Select_first_entry Select_last_entry=Select_last_entry + +Invalid_ISBN\:_'%0'.=Invalid_ISBN\:_'%0'. diff --git a/src/main/resources/l10n/JabRef_es.properties b/src/main/resources/l10n/JabRef_es.properties index 630043f3136..575ebecdac4 100644 --- a/src/main/resources/l10n/JabRef_es.properties +++ b/src/main/resources/l10n/JabRef_es.properties @@ -2302,3 +2302,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_fa.properties b/src/main/resources/l10n/JabRef_fa.properties index 65bef6116af..42a0e184ad9 100644 --- a/src/main/resources/l10n/JabRef_fa.properties +++ b/src/main/resources/l10n/JabRef_fa.properties @@ -2305,3 +2305,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_fr.properties b/src/main/resources/l10n/JabRef_fr.properties index ef31f488fbc..8b5425168cf 100644 --- a/src/main/resources/l10n/JabRef_fr.properties +++ b/src/main/resources/l10n/JabRef_fr.properties @@ -2302,3 +2302,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_in.properties b/src/main/resources/l10n/JabRef_in.properties index 1bc89bbab1d..34cf2bdc6a7 100644 --- a/src/main/resources/l10n/JabRef_in.properties +++ b/src/main/resources/l10n/JabRef_in.properties @@ -2302,3 +2302,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_it.properties b/src/main/resources/l10n/JabRef_it.properties index 241f6168d38..12b85604ee1 100644 --- a/src/main/resources/l10n/JabRef_it.properties +++ b/src/main/resources/l10n/JabRef_it.properties @@ -2302,3 +2302,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_ja.properties b/src/main/resources/l10n/JabRef_ja.properties index e4eabb3bfb8..fd90f9ebb46 100644 --- a/src/main/resources/l10n/JabRef_ja.properties +++ b/src/main/resources/l10n/JabRef_ja.properties @@ -2300,3 +2300,5 @@ Fetcher_'%0'_did_not_find_an_entry_for_id_'%1'.=取得子「%0」は,IDが「% Search_in_all_open_databases= Select_first_entry=最初の項目を選択 Select_last_entry=最後の項目を選択 + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_nl.properties b/src/main/resources/l10n/JabRef_nl.properties index 2700177eb43..bb9e929e90c 100644 --- a/src/main/resources/l10n/JabRef_nl.properties +++ b/src/main/resources/l10n/JabRef_nl.properties @@ -2304,3 +2304,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_no.properties b/src/main/resources/l10n/JabRef_no.properties index 2f341da7e1d..62c8c8a338f 100644 --- a/src/main/resources/l10n/JabRef_no.properties +++ b/src/main/resources/l10n/JabRef_no.properties @@ -2304,3 +2304,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_pt_BR.properties b/src/main/resources/l10n/JabRef_pt_BR.properties index 679da92f223..c739fc0e5f3 100644 --- a/src/main/resources/l10n/JabRef_pt_BR.properties +++ b/src/main/resources/l10n/JabRef_pt_BR.properties @@ -2302,3 +2302,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_ru.properties b/src/main/resources/l10n/JabRef_ru.properties index e27b6414c80..fb93523eba1 100644 --- a/src/main/resources/l10n/JabRef_ru.properties +++ b/src/main/resources/l10n/JabRef_ru.properties @@ -2305,3 +2305,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_sv.properties b/src/main/resources/l10n/JabRef_sv.properties index b9fcd41252d..f68c5b43712 100644 --- a/src/main/resources/l10n/JabRef_sv.properties +++ b/src/main/resources/l10n/JabRef_sv.properties @@ -2301,3 +2301,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_tr.properties b/src/main/resources/l10n/JabRef_tr.properties index 91decfb6e4e..0a43e352acc 100644 --- a/src/main/resources/l10n/JabRef_tr.properties +++ b/src/main/resources/l10n/JabRef_tr.properties @@ -2302,3 +2302,5 @@ Select_first_entry=İlk_girdiyi_seç Select_last_entry=Son_girdiyi_seç Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_vi.properties b/src/main/resources/l10n/JabRef_vi.properties index 04836b4ef6f..c0fe9a50f26 100644 --- a/src/main/resources/l10n/JabRef_vi.properties +++ b/src/main/resources/l10n/JabRef_vi.properties @@ -2304,3 +2304,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/main/resources/l10n/JabRef_zh.properties b/src/main/resources/l10n/JabRef_zh.properties index 719f43bb925..dfe2297d177 100644 --- a/src/main/resources/l10n/JabRef_zh.properties +++ b/src/main/resources/l10n/JabRef_zh.properties @@ -2302,3 +2302,5 @@ Select_first_entry= Select_last_entry= Search_in_all_open_databases= + +Invalid_ISBN\:_'%0'.= diff --git a/src/test/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcherTest.java b/src/test/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcherTest.java index 98ed245ac74..64ab12d74e2 100644 --- a/src/test/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcherTest.java +++ b/src/test/java/net/sf/jabref/logic/importer/fetcher/IsbnFetcherTest.java @@ -45,38 +45,35 @@ public void testHelpPage() { } @Test - public void testFetcher10() throws FetcherException { + public void searchByIdSuccessfulWithShortISBN() throws FetcherException { Optional fetchedEntry = fetcher.performSearchById("0321356683"); assertEquals(Optional.of(bibEntry), fetchedEntry); } @Test - public void testFetcher13() throws FetcherException { + public void searchByIdSuccessfulWithLongISBN() throws FetcherException { Optional fetchedEntry = fetcher.performSearchById("978-0321356680"); assertEquals(Optional.of(bibEntry), fetchedEntry); } @Test - public void testFetcher10Empty() throws FetcherException { + public void searchByIdReturnsEmptyWithEmptyISBN() throws FetcherException { Optional fetchedEntry = fetcher.performSearchById(""); assertEquals(Optional.empty(), fetchedEntry); } - @Test - public void testFetcher10ShortISBN() throws FetcherException { - Optional fetchedEntry = fetcher.performSearchById("123456789"); - assertEquals(Optional.empty(), fetchedEntry); + @Test(expected = FetcherException.class) + public void searchByIdThrowsExceptionForShortInvalidISBN() throws FetcherException { + fetcher.performSearchById("123456789"); } - @Test - public void testFetcher10LongISBN() throws FetcherException { - Optional fetchedEntry = fetcher.performSearchById("012345678910"); - assertEquals(Optional.empty(), fetchedEntry); + @Test(expected = FetcherException.class) + public void searchByIdThrowsExceptionForLongInvalidISB() throws FetcherException { + fetcher.performSearchById("012345678910"); } - @Test - public void testFetcher10InvalidISBN() throws FetcherException { - Optional fetchedEntry = fetcher.performSearchById("jabref-4-ever"); - assertEquals(Optional.empty(), fetchedEntry); + @Test(expected = FetcherException.class) + public void searchByIdThrowsExceptionForInvalidISBN() throws FetcherException { + fetcher.performSearchById("jabref-4-ever"); } }