From d05217092ef7ae28fdbbd98fd5f6b5e323191f23 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Wed, 26 Aug 2020 22:03:52 +0200 Subject: [PATCH] Fix more fetchers (#6790) --- CHANGELOG.md | 1 + ...e-URL-creation-to-enable-proper-logging.md | 104 ++++++++++++++++++ .../importer/SearchBasedParserFetcher.java | 56 +++++----- ...fComputerScienceBibliographiesFetcher.java | 39 ++++++- .../logic/importer/fetcher/GoogleScholar.java | 80 +++++++------- .../importer/fetcher/SpringerFetcher.java | 2 +- .../logic/importer/fetcher/SpringerLink.java | 3 +- .../importer/fileformat/ModsImporter.java | 6 +- .../org/jabref/logic/net/URLDownload.java | 2 +- .../logic/importer/fetcher/CiteSeerTest.java | 16 +-- ...puterScienceBibliographiesFetcherTest.java | 17 ++- .../fetcher/LibraryOfCongressTest.java | 28 ++--- .../SearchBasedFetcherCapabilityTest.java | 9 +- .../importer/fetcher/SpringerFetcherTest.java | 6 + 14 files changed, 262 insertions(+), 107 deletions(-) create mode 100644 docs/adr/0014-separate-URL-creation-to-enable-proper-logging.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 814d5209d60..cb40044b4fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -106,6 +106,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We fixed an issue where percent sign ('%') was not formatted properly by the HTML formatter [#6753](https://github.com/JabRef/jabref/issues/6753) - We fixed an issue with the [SAO/NASA Astrophysics Data System](https://docs.jabref.org/collect/import-using-online-bibliographic-database/ads) fetcher where `\textbackslash` appeared at the end of the abstract. - We fixed an issue with the Science Direct fetcher where PDFs could not be downloaded. Fixes [#5860](https://github.com/JabRef/jabref/issues/5860) +- We fixed an issue with the Library of Congress importer. ### Removed diff --git a/docs/adr/0014-separate-URL-creation-to-enable-proper-logging.md b/docs/adr/0014-separate-URL-creation-to-enable-proper-logging.md new file mode 100644 index 00000000000..1b980e3abd2 --- /dev/null +++ b/docs/adr/0014-separate-URL-creation-to-enable-proper-logging.md @@ -0,0 +1,104 @@ +# Separate URL creation to enable proper logging + +## Context and Problem Statement + +Fetchers are failing. +The reason why they are failing needs to be investigated. + +* Claim 1: Knowing the URL which was used to query the fetcher eases debugging +* Claim 2: Somehow logging the URL eases debugging (instead of showing it in the debugger only) + +How to properly log the URL used for fetching? + +## Decision Drivers + +* Code should be easy to read +* Include URL in the exception instead of logging in case an exception is thrown already (see ) + +## Considered Options + +* Separate URL creation +* Create URL when logging the URL +* Include URL creation as statement before the stream creation in the try-with-resources block + +## Decision Outcome + +Chosen option: "Separate URL creation", because comes out best \(see below\). + +## Pros and Cons of the Options + +### Separate URL creation + +```java + URL urlForQuery; + try { + urlForQuery = getURLForQuery(query); + } catch (URISyntaxException | MalformedURLException | FetcherException e) { + throw new FetcherException(String.format("Search URI %s is malformed", query), e); + } + try (InputStream stream = getUrlDownload(complexQueryURL).asInputStream()) { + ... + } catch (IOException e) { + throw new FetcherException("A network error occurred while fetching from " + urlForQuery.toString(), e); + } catch (ParseException e) { + throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery.toString(), e); + } +``` + +* Good, because exceptions thrown at method are directly catched +* Good, because exceptions in different statements belong to different catch blocks +* Good, because code to determine URL is written once +* OK, because "Java by Comparison" does not state anything about it +* Bad, because multiple try/catch statements are required +* Bad, because this style seems to be uncommon to Java coders + +### Create URL when logging the URL + +The "logging" is done when throwing the exception. + +Example code: + +```java + try (InputStream stream = getUrlDownload(getURLForQuery(query)).asInputStream()) { + ... + } catch (URISyntaxException | MalformedURLException | FetcherException e) { + throw new FetcherException(String.format("Search URI %s is malformed", query), e); + } catch (IOException e) { + try { + throw new FetcherException("A network error occurred while fetching from " + getURLForQuery(query), e); + } catch (URISyntaxException | MalformedURLException uriSyntaxException) { + // does not happen + throw new FetcherException("A network error occurred", e); + } + } catch (ParseException e) { + try { + throw new FetcherException("An internal parser error occurred while fetching from " + getURLForQuery(query), e); + } catch (URISyntaxException | MalformedURLException uriSyntaxException) { + // does not happen + throw new FetcherException("An internal parser error occurred", e); + } + } +``` + +* Good, because code inside the `try` statement stays the same +* OK, because "Java by Comparison" does not state anything about it +* Bad, because an additional try/catch-block is added to each catch statement +* Bad, because needs a `throw` statement in the `URISyntaxException` catch block (even though at this point the exception cannot be thrown), because Java otherwise misses a `return` statement. + +### Include URL creation as statement before the stream creation in the try-with-resources block + +```java + try (URL urlForQuery = getURLForQuery(query); InputStream stream = urlForQuery.asInputStream()) { + ... + } catch (URISyntaxException | MalformedURLException | FetcherException e) { + throw new FetcherException(String.format("Search URI %s is malformed", query), e); + } catch (IOException e) { + throw new FetcherException("A network error occurred while fetching from " + urlForQuery.toString(), e); + } catch (ParseException e) { + throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery.toString(), e); + } +``` + +* Good, because the single try/catch-block can be kept +* Good, because logical flow is kept +* Bad, because does not compile (because URL is not an `AutoClosable`) diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java index 24b0e2c84d5..c1dbb3a4a93 100644 --- a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java +++ b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java @@ -15,9 +15,11 @@ /** * Provides a convenient interface for search-based fetcher, which follow the usual three-step procedure: - * 1. Open a URL based on the search query - * 2. Parse the response to get a list of {@link BibEntry} - * 3. Post-process fetched entries + *
    + *
  1. Open a URL based on the search query
  2. + *
  3. Parse the response to get a list of {@link BibEntry}
  4. + *
  5. Post-process fetched entries
  6. + *
*/ public interface SearchBasedParserFetcher extends SearchBasedFetcher { @@ -39,21 +41,14 @@ default List performSearch(String query) throws FetcherException { return Collections.emptyList(); } - try (InputStream stream = getUrlDownload(getURLForQuery(query)).asInputStream()) { - List fetchedEntries = getParser().parseEntries(stream); - - // Post-cleanup - fetchedEntries.forEach(this::doPostCleanup); - - return fetchedEntries; - } catch (URISyntaxException e) { - throw new FetcherException("Search URI is malformed", e); - } catch (IOException e) { - // TODO: Catch HTTP Response 401/403 errors and report that user has no rights to access resource - throw new FetcherException("A network error occurred", e); - } catch (ParseException e) { - throw new FetcherException("An internal parser error occurred", e); + // ADR-0014 + URL urlForQuery; + try { + urlForQuery = getURLForQuery(query); + } catch (URISyntaxException | MalformedURLException | FetcherException e) { + throw new FetcherException(String.format("Search URI crafted from query %s is malformed", query), e); } + return getBibEntries(urlForQuery); } /** @@ -65,17 +60,25 @@ default List performSearch(String query) throws FetcherException { */ @Override default List performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { - try (InputStream stream = getUrlDownload(getComplexQueryURL(complexSearchQuery)).asInputStream()) { + // ADR-0014 + URL urlForQuery; + try { + urlForQuery = getComplexQueryURL(complexSearchQuery); + } catch (URISyntaxException | MalformedURLException | FetcherException e) { + throw new FetcherException("Search URI crafted from complex search query is malformed", e); + } + return getBibEntries(urlForQuery); + } + + private List getBibEntries(URL urlForQuery) throws FetcherException { + try (InputStream stream = getUrlDownload(urlForQuery).asInputStream()) { List fetchedEntries = getParser().parseEntries(stream); fetchedEntries.forEach(this::doPostCleanup); return fetchedEntries; - } catch (URISyntaxException e) { - throw new FetcherException("Search URI is malformed", e); } catch (IOException e) { - // TODO: Catch HTTP Response 401/403 errors and report that user has no rights to access resource - throw new FetcherException("A network error occurred", e); + throw new FetcherException("A network error occurred while fetching from " + urlForQuery, e); } catch (ParseException e) { - throw new FetcherException("An internal parser error occurred", e); + throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery, e); } } @@ -86,15 +89,16 @@ default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URI /** * Performs a cleanup of the fetched entry. - * + *

* Only systematic errors of the fetcher should be corrected here * (i.e. if information is consistently contained in the wrong field or the wrong format) * but not cosmetic issues which may depend on the user's taste (for example, LateX code vs HTML in the abstract). - * + *

* Try to reuse existing {@link Formatter} for the cleanup. For example, * {@code new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);} - * + *

* By default, no cleanup is done. + * * @param entry the entry to be cleaned-up */ default void doPostCleanup(BibEntry entry) { diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java index 26cbaa9d9f8..0d3b58e2847 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java @@ -3,6 +3,7 @@ import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; +import java.util.Arrays; import org.jabref.logic.formatter.bibtexfields.RemoveDigitsFormatter; import org.jabref.logic.formatter.bibtexfields.RemoveNewlinesFormatter; @@ -14,7 +15,10 @@ import org.jabref.logic.importer.SearchBasedParserFetcher; import org.jabref.model.cleanup.FieldFormatterCleanup; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.Field; +import org.jabref.model.entry.field.FieldFactory; import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.field.UnknownField; import org.apache.http.client.utils.URIBuilder; @@ -31,10 +35,10 @@ public CollectionOfComputerScienceBibliographiesFetcher(ImportFormatPreferences @Override public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { return new URIBuilder(BASIC_SEARCH_URL) - .addParameter("query", query) - .addParameter("sort", "score") - .build() - .toURL(); + .addParameter("query", query) + .addParameter("sort", "score") + .build() + .toURL(); } @Override @@ -53,5 +57,32 @@ public void doPostCleanup(BibEntry entry) { new FieldFormatterCleanup(StandardField.ABSTRACT, new ReplaceTabsBySpaceFormater()).cleanup(entry); new FieldFormatterCleanup(StandardField.ABSTRACT, new RemoveRedundantSpacesFormatter()).cleanup(entry); new FieldFormatterCleanup(StandardField.EDITOR, new RemoveDigitsFormatter()).cleanup(entry); + // identifier fields is a key-value field + // example: "urn:isbn:978-1-4503-5217-8; doi:10.1145/3129790.3129810; ISI:000505046100032; Scopus 2-s2.0-85037741580" + // thus, key can contain multiple ":"; sometimes value separated by " " instead of ":" + UnknownField identifierField = new UnknownField("identifier"); + entry.getField(identifierField) + .stream() + .flatMap(value -> Arrays.stream(value.split("; "))) + .forEach(identifierKeyValue -> { + // check for pattern "Scopus 2-..." + String[] identifierKeyValueSplit = identifierKeyValue.split(" "); + if (identifierKeyValueSplit.length == 1) { + // check for pattern "doi:..." + identifierKeyValueSplit = identifierKeyValue.split(":"); + } + int length = identifierKeyValueSplit.length; + if (length < 2) { + return; + } + // in the case "urn:isbn:", just "isbn" is used + String key = identifierKeyValueSplit[length - 2]; + String value = identifierKeyValueSplit[length - 1]; + Field field = FieldFactory.parseField(key); + if (!entry.hasField(field)) { + entry.setField(field, value); + } + }); + entry.clearField(identifierField); } } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java index ebfb5f7e3a7..f251ed7f992 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java @@ -43,8 +43,7 @@ public class GoogleScholar implements FulltextFetcher, SearchBasedFetcher { private static final Pattern LINK_TO_BIB_PATTERN = Pattern.compile("(https:\\/\\/scholar.googleusercontent.com\\/scholar.bib[^\"]*)"); - private static final String BASIC_SEARCH_URL = "https://scholar.google.com/scholar?"; - private static final String SEARCH_IN_TITLE_URL = "https://scholar.google.com// scholar?"; + private static final String BASIC_SEARCH_URL = "https://scholar.google.ch/scholar?"; private static final int NUM_RESULTS = 10; @@ -66,10 +65,10 @@ public Optional findFullText(BibEntry entry) throws IOException, FetcherExc try { // title search - URIBuilder uriBuilder = new URIBuilder(SEARCH_IN_TITLE_URL); + URIBuilder uriBuilder = new URIBuilder(BASIC_SEARCH_URL); uriBuilder.addParameter("as_q", ""); // as_epq as exact phrase - uriBuilder.addParameter("as_epq", entry.getField(StandardField.TITLE).orElse(null)); + uriBuilder.addParameter("as_epq", entry.getField(StandardField.TITLE).orElse("")); // as_occt field to search in uriBuilder.addParameter("as_occt", "title"); @@ -129,36 +128,37 @@ public Optional getHelpPage() { @Override public List performSearch(String query) throws FetcherException { - try { - obtainAndModifyCookie(); - List foundEntries = new ArrayList<>(10); - - URIBuilder uriBuilder = new URIBuilder(BASIC_SEARCH_URL); - uriBuilder.addParameter("hl", "en"); - uriBuilder.addParameter("btnG", "Search"); - uriBuilder.addParameter("q", query); + LOGGER.debug("Using URL {}", query); + obtainAndModifyCookie(); + List foundEntries = new ArrayList<>(20); - addHitsFromQuery(foundEntries, uriBuilder.toString()); + URIBuilder uriBuilder = null; + try { + uriBuilder = new URIBuilder(BASIC_SEARCH_URL); + } catch (URISyntaxException e) { + throw new FetcherException("Error while fetching from " + getName() + " at URL " + BASIC_SEARCH_URL, e); + } - if (foundEntries.size() == 10) { - uriBuilder.addParameter("start", "10"); - addHitsFromQuery(foundEntries, uriBuilder.toString()); - } + uriBuilder.addParameter("hl", "en"); + uriBuilder.addParameter("btnG", "Search"); + uriBuilder.addParameter("q", query); + String queryURL = uriBuilder.toString(); - return foundEntries; - } catch (URISyntaxException e) { - throw new FetcherException("Error while fetching from " + getName(), e); + try { + addHitsFromQuery(foundEntries, queryURL); } catch (IOException e) { - // if there are too much requests from the same IP adress google is answering with a 503 and redirecting to a captcha challenge + // if there are too much requests from the same IP address google is answering with a 503 and redirecting to a captcha challenge // The caught IOException looks for example like this: // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0 if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) { - throw new FetcherException("Fetching from Google Scholar failed.", + throw new FetcherException("Fetching from Google Scholar at URL " + queryURL + " failed.", Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e); } else { - throw new FetcherException("Error while fetching from " + getName(), e); + throw new FetcherException("Error while fetching from " + getName() + " at URL " + queryURL, e); } } + + return foundEntries; } @Override @@ -178,26 +178,28 @@ public List performComplexSearch(ComplexSearchQuery complexSearchQuery uriBuilder.addParameter("as_yhi", year.toString()); }); - addHitsFromQuery(foundEntries, uriBuilder.toString()); - - if (foundEntries.size() == 10) { - uriBuilder.addParameter("start", "10"); + try { addHitsFromQuery(foundEntries, uriBuilder.toString()); - } + if (foundEntries.size() == 10) { + uriBuilder.addParameter("start", "10"); + addHitsFromQuery(foundEntries, uriBuilder.toString()); + } + } catch (IOException e) { + LOGGER.info("IOException for URL {}", uriBuilder.toString()); + // if there are too much requests from the same IP adress google is answering with a 503 and redirecting to a captcha challenge + // The caught IOException looks for example like this: + // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0 + if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) { + throw new FetcherException("Fetching from Google Scholar failed.", + Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e); + } else { + throw new FetcherException("Error while fetching from " + getName(), e); + } + } return foundEntries; } catch (URISyntaxException e) { throw new FetcherException("Error while fetching from " + getName(), e); - } catch (IOException e) { - // if there are too much requests from the same IP adress google is answering with a 503 and redirecting to a captcha challenge - // The caught IOException looks for example like this: - // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0 - if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) { - throw new FetcherException("Fetching from Google Scholar failed.", - Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e); - } else { - throw new FetcherException("Error while fetching from " + getName(), e); - } } } @@ -215,7 +217,7 @@ private void addHitsFromQuery(List entryList, String queryURL) throws String content = new URLDownload(queryURL).asString(); if (needsCaptcha(content)) { - throw new FetcherException("Fetching from Google Scholar failed.", + throw new FetcherException("Fetching from Google Scholar failed: Captacha hit at " + queryURL + ".", Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), null); } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java index 9cdddcfb0fc..a86e711fb78 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java @@ -174,7 +174,7 @@ private String constructComplexQueryString(ComplexSearchQuery complexSearchQuery complexSearchQuery.getAuthors().ifPresent(authors -> authors.forEach(author -> searchTerms.add("name:" + author))); complexSearchQuery.getTitlePhrases().ifPresent(titlePhrases -> titlePhrases.forEach(title -> searchTerms.add("title:" + title))); complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("journal:" + journal)); - // Since Springer API does not support year range search we ignore formYear and toYear. + // Since Springer API does not support year range search, we ignore formYear and toYear and use "singleYear" only complexSearchQuery.getSingleYear().ifPresent(year -> searchTerms.add("year:" + year.toString())); complexSearchQuery.getDefaultField().ifPresent(defaultField -> searchTerms.add(defaultField)); return String.join(" AND ", searchTerms); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/SpringerLink.java b/src/main/java/org/jabref/logic/importer/fetcher/SpringerLink.java index e9aafb45597..06334f42939 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/SpringerLink.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/SpringerLink.java @@ -6,6 +6,7 @@ import java.util.Optional; import org.jabref.logic.importer.FulltextFetcher; +import org.jabref.logic.util.BuildInfo; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.identifier.DOI; @@ -27,7 +28,7 @@ public class SpringerLink implements FulltextFetcher { private static final Logger LOGGER = LoggerFactory.getLogger(SpringerLink.class); private static final String API_URL = "https://api.springer.com/meta/v1/json"; - private static final String API_KEY = "a98b4a55181ffcd27259bea45edad12e"; + private static final String API_KEY = new BuildInfo().springerNatureAPIKey; private static final String CONTENT_HOST = "link.springer.com"; @Override diff --git a/src/main/java/org/jabref/logic/importer/fileformat/ModsImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/ModsImporter.java index 802d234ef26..a690784116e 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/ModsImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/ModsImporter.java @@ -398,7 +398,7 @@ private void putDate(Map fields, String elementName, DateDefiniti case "dateIssued": // The first 4 digits of dateIssued should be the year - fields.put(StandardField.YEAR, date.getValue().substring(0, 4)); + fields.put(StandardField.YEAR, date.getValue().replaceAll("[^0-9]*", "").replaceAll("\\(\\d?\\d?\\d?\\d?.*\\)", "\1")); break; case "dateCreated": // If there was no year in date issued, then take the year from date created @@ -435,7 +435,9 @@ private void handleAuthorsInNamePart(NameDefinition name, List authors, NamePartDefinition namePart = (NamePartDefinition) value; String type = namePart.getAtType(); if ((type == null) && (namePart.getValue() != null)) { - authors.add(namePart.getValue()); + String namePartValue = namePart.getValue(); + namePartValue = namePartValue.replaceAll(",$", ""); + authors.add(namePartValue); } else if ("family".equals(type) && (namePart.getValue() != null)) { // family should come first, so if family appears we can set the author then comes before // we have to check if forename and family name are not empty in case it's the first author diff --git a/src/main/java/org/jabref/logic/net/URLDownload.java b/src/main/java/org/jabref/logic/net/URLDownload.java index bf240352920..07c1129e6f4 100644 --- a/src/main/java/org/jabref/logic/net/URLDownload.java +++ b/src/main/java/org/jabref/logic/net/URLDownload.java @@ -60,7 +60,7 @@ */ public class URLDownload { - public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0"; + public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0"; private static final Logger LOGGER = LoggerFactory.getLogger(URLDownload.class); private final URL source; diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java index 7d8d2242d04..e2f6f2951a1 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java @@ -8,7 +8,7 @@ import org.jabref.model.entry.types.StandardEntryType; import org.jabref.testutils.category.FetcherTest; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -16,15 +16,11 @@ @FetcherTest class CiteSeerTest { - CiteSeer fetcher; - - @BeforeEach - void setUp() throws Exception { - fetcher = new CiteSeer(); - } + private CiteSeer fetcher = new CiteSeer(); @Test - void searchByQueryFindsEntry() throws Exception { + @Disabled("CiteseerX currently has issues with ncites query") + void searchByQueryFindsEntryRigorousDerivation() throws Exception { BibEntry expected = new BibEntry(StandardEntryType.Misc) .withField(StandardField.AUTHOR, "Wang Wei and Zhang Pingwen and Zhang Zhifei") .withField(StandardField.TITLE, "Rigorous Derivation from Landau-de Gennes Theory to Eericksen-leslie Theory") @@ -35,13 +31,13 @@ void searchByQueryFindsEntry() throws Exception { } @Test - void searchByQueryFindsEntry2() throws Exception { + void searchByQueryFindsEntryCopingTheoryAndResearch() throws Exception { BibEntry expected = new BibEntry(StandardEntryType.Misc) .withField(StandardField.AUTHOR, "Lazarus Richard S.") .withField(StandardField.TITLE, "Coping Theory and Research: Past Present and Future") .withField(StandardField.DOI, "10.1.1.115.9665") .withField(StandardField.YEAR, "1993") - .withField(StandardField.JOURNAL, "PSYCHOSOMATIC MEDICINE"); + .withField(StandardField.JOURNALTITLE, "PSYCHOSOMATIC MEDICINE"); List fetchedEntries = fetcher.performSearch("doi:10.1.1.115.9665"); assertEquals(Collections.singletonList(expected), fetchedEntries); diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java index 73ccf743b4d..724c4cbd29f 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java @@ -5,6 +5,8 @@ import java.net.URL; import java.util.Collections; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.ImportFormatPreferences; @@ -19,7 +21,6 @@ import org.mockito.Answers; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -66,7 +67,10 @@ public void performSearchReturnsMatchingMultipleEntries() throws FetcherExceptio BibEntry secondBibEntry = new BibEntry(StandardEntryType.Article) .withCiteKey("oai:DiVA.org:lnu-68408") - .withField(new UnknownField("identifier"), "urn:isbn:978-1-4503-5217-8; doi:10.1145/3129790.3129810; ISI:000426556400034; Scopus 2-s2.0-85037741580") + .withField(StandardField.ISBN, "978-1-4503-5217-8") + .withField(StandardField.DOI, "10.1145/3129790.3129810") + .withField(new UnknownField("ISI"), "000505046100032") + .withField(new UnknownField("Scopus"), "2-s2.0-85037741580") .withField(new UnknownField("subject"), "Software Architecture; Code Churn; Open Source; Architecrual Erosion; Technical Debt; Software Engineering; Programvaruteknik") .withField(new UnknownField("relation"), "ACM International Conference Proceeding Series; ECSA '17~Proceedings of the 11th European Conference on Software Architecture : Companion Proceedings, p. 152-158") .withField(StandardField.ABSTRACT, "The open source application JabRef has existed since" + @@ -103,9 +107,12 @@ public void performSearchReturnsMatchingMultipleEntries() throws FetcherExceptio .withField(StandardField.LANGUAGE, "eng") .withField(StandardField.AUTHOR, "Tobias Olsson and Morgan Ericsson and Anna Wingkvist") .withField(StandardField.YEAR, "2017"); - // Checking entries in the set as the query is generic and returns a changing result set - assertTrue(searchResult.contains(firstBibEntry)); - assertTrue(searchResult.contains(secondBibEntry)); + + // Checking a subset, because the query "jabref" is generic and returns a changing result set + assertEquals(Set.of(firstBibEntry, secondBibEntry), searchResult.stream().filter(bibEntry -> { + String citeKey = bibEntry.getCiteKeyOptional().get(); + return (citeKey.equals(firstBibEntry.getCiteKeyOptional().get()) || citeKey.equals(secondBibEntry.getCiteKeyOptional().get())); + }).collect(Collectors.toSet())); } @Test diff --git a/src/test/java/org/jabref/logic/importer/fetcher/LibraryOfCongressTest.java b/src/test/java/org/jabref/logic/importer/fetcher/LibraryOfCongressTest.java index bfbd409848d..b18e2b88ba6 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/LibraryOfCongressTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/LibraryOfCongressTest.java @@ -29,20 +29,20 @@ public void setUp() { @Test public void performSearchById() throws Exception { - BibEntry expected = new BibEntry(); - expected.setField(StandardField.ADDRESS, "Burlington, MA"); - expected.setField(StandardField.AUTHOR, "West, Matthew"); - expected.setField(StandardField.ISBN, "0123751063 (pbk.)"); - expected.setField(new UnknownField("issuance"), "monographic"); - expected.setField(StandardField.KEYWORDS, "Database design, Data structures (Computer science)"); - expected.setField(StandardField.LANGUAGE, "eng"); - expected.setField(new UnknownField("lccn"), "2010045158"); - expected.setField(StandardField.NOTE, "Matthew West., Includes index."); - expected.setField(new UnknownField("oclc"), "ocn665135773"); - expected.setField(StandardField.PUBLISHER, "Morgan Kaufmann"); - expected.setField(new UnknownField("source"), "DLC"); - expected.setField(StandardField.TITLE, "Developing high quality data models"); - expected.setField(StandardField.YEAR, "2011"); + BibEntry expected = new BibEntry() + .withField(StandardField.ADDRESS, "Burlington, MA") + .withField(StandardField.AUTHOR, "West, Matthew") + .withField(StandardField.ISBN, "0123751063 (pbk.)") + .withField(new UnknownField("issuance"), "monographic") + .withField(StandardField.KEYWORDS, "Database design, Data structures (Computer science)") + .withField(StandardField.LANGUAGE, "eng") + .withField(new UnknownField("lccn"), "2010045158") + .withField(StandardField.NOTE, "Matthew West., Includes index.") + .withField(new UnknownField("oclc"), "ocn665135773") + .withField(StandardField.PUBLISHER, "Morgan Kaufmann") + .withField(new UnknownField("source"), "DLC") + .withField(StandardField.TITLE, "Developing high quality data models") + .withField(StandardField.YEAR, "2011"); assertEquals(Optional.of(expected), fetcher.performSearchById("2010045158")); } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java b/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java index 7f7a4a69cdf..02760e1b91e 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java @@ -51,10 +51,12 @@ default void supportsAuthorSearch() throws Exception { */ @Test default void supportsYearSearch() throws Exception { - ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder(); - builder.singleYear(getTestYear()); + ComplexSearchQuery complexSearchQuery = ComplexSearchQuery + .builder() + .singleYear(getTestYear()) + .build(); - List result = getFetcher().performComplexSearch(builder.build()); + List result = getFetcher().performComplexSearch(complexSearchQuery); new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); List differentYearsInResult = result.stream() .map(bibEntry -> bibEntry.getField(StandardField.YEAR)) @@ -63,7 +65,6 @@ default void supportsYearSearch() throws Exception { .distinct() .collect(Collectors.toList()); - assertFalse(result.isEmpty()); assertEquals(Collections.singletonList(getTestYear().toString()), differentYearsInResult); } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java index 7ffb4e19b1c..7fc0d47fa38 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java @@ -80,6 +80,12 @@ void searchByEmptyQueryFindsNothing() throws Exception { assertEquals(Collections.emptyList(), fetcher.performSearch("")); } + @Test + @Disabled("Year search is currently broken, because the API returns mutliple years.") + @Override + public void supportsYearSearch() throws Exception { + } + @Test @Disabled("Year range search is not natively supported by the API, but can be emulated by multiple single year searches.") @Override