diff --git a/config/checkstyle/checkstyle.xml b/config/checkstyle/checkstyle.xml index 2b267604f65..09f3edd01f1 100644 --- a/config/checkstyle/checkstyle.xml +++ b/config/checkstyle/checkstyle.xml @@ -12,7 +12,9 @@ - + diff --git a/docs/adr/0012-handle-different-bibEntry-formats-of-fetchers.md b/docs/adr/0012-handle-different-bibEntry-formats-of-fetchers.md new file mode 100644 index 00000000000..4a689e51e74 --- /dev/null +++ b/docs/adr/0012-handle-different-bibEntry-formats-of-fetchers.md @@ -0,0 +1,56 @@ +# Handle different bibentry formats of fetchers by adding a layer + +## Context and Problem Statement + +All fetchers (except IDFetchers) in JabRef return BibEntries when fetching entries from their API. +Some fetchers directly receive BibTeX entries from their API, the other fetchers receive their entries in some kind of exchange format such as JSON or XML and then parse this into BibEntries. +Currently, all fetchers either return BibEntries in BibTeX or BibLaTeX format. +This can lead to importing BibEntries of one format in a database of the other format. +How can this inconsistency between fetchers, and their used formats be addressed? + +## Considered Options + +* Pass fetchers the format, they have to create entries accordingly (in the correct format). +* Pass fetchers the format, they have to call a conversion method if necessary (in the correct format). +* Let the caller handle any format inconsistencies and the conversion. +* Introduce a new layer between fetchers and caller, such as a FetcherHandler, that manages the conversion + +## Decision Outcome + +Chosen option: "Introduce a new layer between fetchers and caller, such as a FetcherHandler, that manages the conversion", +because it can compose all steps required during importing, not only format conversion of fetched entries. +[As described here (comment)](https://github.com/JabRef/jabref/pull/6687) + +## Pros and Cons of the Options + +### Introduce a new layer between fetchers and caller, such as a FetcherHandler, that manages the conversion + +* Good, because fetchers do not have to think about conversion (Separation of concerns) +* Good, because no other code that currently relies on fetchers has to do the conversion +* Good, because this layer can be used for any kind of import to handle all conversion steps (not only format). [As described here (comment)](https://github.com/JabRef/jabref/pull/6687) +* Good, because this layer can easily be extended if the import procedure changes +* Bad, because this requires a lot of code changes +* Bad, because this has to be tested extensively + +### Pass fetchers the format, they have to call a conversion method if necessary + +* Good, because less code has to be written than with option "Pass fetchers the format, they have to create entries accordingly" +* Good, because code is already tested +* Good, because keeps all conversion code centralized (code reuse) +* Bad, because fetcher first creates the BibEntry in a possibly "wrong" format, this can easily lead to bugs due to e.g. code changes +* Bad, because adds dependency + +### Pass fetchers the format, they have to create entries accordingly + +* Good, because fetchers already handle BibEntry creation (in their format of choice). This is part of his responsibility. +* Good, because fetchers only create BibEntries of the "correct" format. At no point there exists the chance of the wrong format being passed on due to e.g. code changes. +* Good, because the conversion does not have to take place +* Bad, because fetcher has to "know" all differences of the formats -> clutters the code. +* Bad, because this code has to be tested. Conversion already exists. + +### Let the caller handle any format inconsistencies and the conversion + +* Good, because fetcher code does not have to change +* Good, because fetcher only has to fetch and does not need to know anything about the formats +* Bad, because programmers might assume that a certain format is used, e.g. the preferred format (which would not work as the databases that imports the entries does not have to conform to the preferred format) +* Bad, because at every place where fetchers are used, and the format matters, conversion has to be used, creating more dependencies diff --git a/src/main/java/org/jabref/logic/importer/EntryBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/EntryBasedParserFetcher.java index cab9e202be6..5297d5ad1d2 100644 --- a/src/main/java/org/jabref/logic/importer/EntryBasedParserFetcher.java +++ b/src/main/java/org/jabref/logic/importer/EntryBasedParserFetcher.java @@ -43,6 +43,7 @@ public interface EntryBasedParserFetcher extends EntryBasedFetcher { * {@code new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);} * * By default, no cleanup is done. + * * @param entry the entry to be cleaned-up */ default void doPostCleanup(BibEntry entry) { diff --git a/src/main/java/org/jabref/logic/importer/IdBasedFetcher.java b/src/main/java/org/jabref/logic/importer/IdBasedFetcher.java index 4ca64611451..0238ef6b89a 100644 --- a/src/main/java/org/jabref/logic/importer/IdBasedFetcher.java +++ b/src/main/java/org/jabref/logic/importer/IdBasedFetcher.java @@ -14,7 +14,6 @@ public interface IdBasedFetcher extends WebFetcher { * * @param identifier a string which uniquely identifies the item * @return a {@link BibEntry} containing the bibliographic information (or an empty optional if no data was found) - * @throws FetcherException */ Optional performSearchById(String identifier) throws FetcherException; } diff --git a/src/main/java/org/jabref/logic/importer/IdBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/IdBasedParserFetcher.java index d947c10fcbd..56547e303d0 100644 --- a/src/main/java/org/jabref/logic/importer/IdBasedParserFetcher.java +++ b/src/main/java/org/jabref/logic/importer/IdBasedParserFetcher.java @@ -48,6 +48,7 @@ public interface IdBasedParserFetcher extends IdBasedFetcher { * {@code new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);} * * By default, no cleanup is done. + * * @param entry the entry to be cleaned-up */ default void doPostCleanup(BibEntry entry) { diff --git a/src/main/java/org/jabref/logic/importer/ImportCleanup.java b/src/main/java/org/jabref/logic/importer/ImportCleanup.java new file mode 100644 index 00000000000..787b56159b2 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/ImportCleanup.java @@ -0,0 +1,38 @@ +package org.jabref.logic.importer; + +import java.util.Collection; + +import org.jabref.logic.cleanup.ConvertToBiblatexCleanup; +import org.jabref.logic.cleanup.ConvertToBibtexCleanup; +import org.jabref.model.database.BibDatabaseMode; +import org.jabref.model.entry.BibEntry; + +public class ImportCleanup { + + private final BibDatabaseMode targetBibEntryFormat; + + public ImportCleanup(BibDatabaseMode targetBibEntryFormat) { + this.targetBibEntryFormat = targetBibEntryFormat; + } + + /** + * Performs a format conversion of the given entry into the targeted format. + * + * @return Returns the cleaned up bibentry to enable usage of doPostCleanup in streams. + */ + public BibEntry doPostCleanup(BibEntry entry) { + if (targetBibEntryFormat == BibDatabaseMode.BIBTEX) { + new ConvertToBibtexCleanup().cleanup(entry); + } else if (targetBibEntryFormat == BibDatabaseMode.BIBLATEX) { + new ConvertToBiblatexCleanup().cleanup(entry); + } + return entry; + } + + /** + * Performs a format conversion of the given entry collection into the targeted format. + */ + public void doPostCleanup(Collection entries) { + entries.parallelStream().forEach(entry -> doPostCleanup(entry)); + } +} diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java index 7c0e3f78bbf..d5d83f64ffb 100644 --- a/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java +++ b/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java @@ -2,6 +2,7 @@ import java.util.List; +import org.jabref.logic.importer.fetcher.ComplexSearchQuery; import org.jabref.model.entry.BibEntry; /** @@ -17,4 +18,15 @@ public interface SearchBasedFetcher extends WebFetcher { * @return a list of {@link BibEntry}, which are matched by the query (may be empty) */ List performSearch(String query) throws FetcherException; + + /** + * This method is used to send complex queries using fielded search. + * + * @param complexSearchQuery the search query defining all fielded search parameters + * @return a list of {@link BibEntry}, which are matched by the query (may be empty) + */ + default List performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { + // Default Implementation behaves like perform search using the default field as query + return performSearch(complexSearchQuery.getDefaultField().orElse("")); + } } diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java index ad1a81e3658..24b0e2c84d5 100644 --- a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java +++ b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java @@ -8,6 +8,7 @@ import java.util.Collections; import java.util.List; +import org.jabref.logic.importer.fetcher.ComplexSearchQuery; import org.jabref.model.cleanup.Formatter; import org.jabref.model.entry.BibEntry; import org.jabref.model.strings.StringUtil; @@ -22,6 +23,7 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher { /** * Constructs a URL based on the query. + * * @param query the search query */ URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException; @@ -31,24 +33,6 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher { */ Parser getParser(); - /** - * Performs a cleanup of the fetched entry. - * - * Only systematic errors of the fetcher should be corrected here - * (i.e. if information is consistently contained in the wrong field or the wrong format) - * but not cosmetic issues which may depend on the user's taste (for example, LateX code vs HTML in the abstract). - * - * Try to reuse existing {@link Formatter} for the cleanup. For example, - * {@code new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);} - * - * By default, no cleanup is done. - * - * @param entry the entry to be cleaned-up - */ - default void doPostCleanup(BibEntry entry) { - // Do nothing by default - } - @Override default List performSearch(String query) throws FetcherException { if (StringUtil.isBlank(query)) { @@ -71,4 +55,49 @@ default List performSearch(String query) throws FetcherException { throw new FetcherException("An internal parser error occurred", e); } } + + /** + * This method is used to send queries with advanced URL parameters. + * This method is necessary as the performSearch method does not support certain URL parameters that are used for + * fielded search, such as a title, author, or year parameter. + * + * @param complexSearchQuery the search query defining all fielded search parameters + */ + @Override + default List performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { + try (InputStream stream = getUrlDownload(getComplexQueryURL(complexSearchQuery)).asInputStream()) { + List fetchedEntries = getParser().parseEntries(stream); + fetchedEntries.forEach(this::doPostCleanup); + return fetchedEntries; + } catch (URISyntaxException e) { + throw new FetcherException("Search URI is malformed", e); + } catch (IOException e) { + // TODO: Catch HTTP Response 401/403 errors and report that user has no rights to access resource + throw new FetcherException("A network error occurred", e); + } catch (ParseException e) { + throw new FetcherException("An internal parser error occurred", e); + } + } + + default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException, FetcherException { + // Default Implementation behaves like getURLForQuery using the default field as query + return this.getURLForQuery(complexSearchQuery.getDefaultField().orElse("")); + } + + /** + * Performs a cleanup of the fetched entry. + * + * Only systematic errors of the fetcher should be corrected here + * (i.e. if information is consistently contained in the wrong field or the wrong format) + * but not cosmetic issues which may depend on the user's taste (for example, LateX code vs HTML in the abstract). + * + * Try to reuse existing {@link Formatter} for the cleanup. For example, + * {@code new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);} + * + * By default, no cleanup is done. + * @param entry the entry to be cleaned-up + */ + default void doPostCleanup(BibEntry entry) { + // Do nothing by default + } } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java index 463c10a83b1..7fc3fd866c9 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java @@ -158,7 +158,7 @@ private List searchForEntries(String searchQuery) throws FetcherExce } private List queryApi(String searchQuery, List ids, int start, int maxResults) - throws FetcherException { + throws FetcherException { Document result = callApi(searchQuery, ids, start, maxResults); List entries = XMLUtil.asList(result.getElementsByTagName("entry")); @@ -255,6 +255,25 @@ public List performSearch(String query) throws FetcherException { .collect(Collectors.toList()); } + /** + * Constructs a complex query string using the field prefixes specified at https://arxiv.org/help/api/user-manual + * + * @param complexSearchQuery the search query defining all fielded search parameters + * @return A list of entries matching the complex query + */ + @Override + public List performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { + List searchTerms = new ArrayList<>(); + complexSearchQuery.getAuthors().ifPresent(authors -> authors.forEach(author -> searchTerms.add("au:" + author))); + complexSearchQuery.getTitlePhrases().ifPresent(title -> searchTerms.add("ti:" + title)); + complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("jr:" + journal)); + // Since ArXiv API does not support year search, we ignore the year related terms + complexSearchQuery.getToYear().ifPresent(year -> searchTerms.add(year.toString())); + complexSearchQuery.getDefaultField().ifPresent(defaultField -> searchTerms.add(defaultField)); + String complexQueryString = String.join(" AND ", searchTerms); + return performSearch(complexQueryString); + } + @Override public Optional performSearchById(String identifier) throws FetcherException { return searchForEntryById(identifier) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java b/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java index 9586fb6c519..77c386a75be 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java @@ -59,7 +59,6 @@ public Parser getParser() { // So we extract the data string from the tags and pass the content to the COinS parser return inputStream -> { String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE)); - List entries = new ArrayList<>(); CoinsParser parser = new CoinsParser(); Pattern pattern = Pattern.compile(""); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ComplexSearchQuery.java b/src/main/java/org/jabref/logic/importer/fetcher/ComplexSearchQuery.java new file mode 100644 index 00000000000..f08a1a59418 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/ComplexSearchQuery.java @@ -0,0 +1,166 @@ +package org.jabref.logic.importer.fetcher; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +import org.jabref.model.strings.StringUtil; + +public class ComplexSearchQuery { + // Field for non-fielded search + private final String defaultField; + private final List authors; + private final List titlePhrases; + private final Integer fromYear; + private final Integer toYear; + private final Integer singleYear; + private final String journal; + + private ComplexSearchQuery(String defaultField, List authors, List titlePhrases, Integer fromYear, Integer toYear, Integer singleYear, String journal) { + this.defaultField = defaultField; + this.authors = authors; + this.titlePhrases = titlePhrases; + this.fromYear = fromYear; + // Some APIs do not support, or not fully support, year based search. In these cases, the non applicable parameters are ignored. + this.toYear = toYear; + this.journal = journal; + this.singleYear = singleYear; + } + + public Optional getDefaultField() { + return Optional.ofNullable(defaultField); + } + + public Optional> getAuthors() { + return Optional.ofNullable(authors); + } + + public Optional> getTitlePhrases() { + return Optional.ofNullable(titlePhrases); + } + + public Optional getFromYear() { + return Optional.ofNullable(fromYear); + } + + public Optional getToYear() { + return Optional.ofNullable(toYear); + } + + public Optional getSingleYear() { + return Optional.ofNullable(singleYear); + } + + public Optional getJournal() { + return Optional.ofNullable(journal); + } + + public static ComplexSearchQueryBuilder builder() { + return new ComplexSearchQueryBuilder(); + } + + public static class ComplexSearchQueryBuilder { + private String defaultField; + private List authors; + private List titlePhrases; + private String journal; + private Integer fromYear; + private Integer toYear; + private Integer singleYear; + + public ComplexSearchQueryBuilder() { + } + + public ComplexSearchQueryBuilder defaultField(String defaultField) { + if (Objects.requireNonNull(defaultField).isBlank()) { + throw new IllegalArgumentException("Parameter must not be blank"); + } + this.defaultField = defaultField; + return this; + } + + /** + * Adds author and wraps it in quotes + */ + public ComplexSearchQueryBuilder author(String author) { + if (Objects.requireNonNull(author).isBlank()) { + throw new IllegalArgumentException("Parameter must not be blank"); + } + if (Objects.isNull(authors)) { + this.authors = new ArrayList<>(); + } + // Strip all quotes before wrapping + this.authors.add(String.format("\"%s\"", author.replace("\"", ""))); + return this; + } + + /** + * Adds title phrase and wraps it in quotes + */ + public ComplexSearchQueryBuilder titlePhrase(String titlePhrase) { + if (Objects.requireNonNull(titlePhrase).isBlank()) { + throw new IllegalArgumentException("Parameter must not be blank"); + } + if (Objects.isNull(titlePhrases)) { + this.titlePhrases = new ArrayList<>(); + } + // Strip all quotes before wrapping + this.titlePhrases.add(String.format("\"%s\"", titlePhrase.replace("\"", ""))); + return this; + } + + public ComplexSearchQueryBuilder fromYearAndToYear(Integer fromYear, Integer toYear) { + if (Objects.nonNull(singleYear)) { + throw new IllegalArgumentException("You can not use single year and year range search."); + } + this.fromYear = Objects.requireNonNull(fromYear); + this.toYear = Objects.requireNonNull(toYear); + return this; + } + + public ComplexSearchQueryBuilder singleYear(Integer singleYear) { + if (Objects.nonNull(fromYear) || Objects.nonNull(toYear)) { + throw new IllegalArgumentException("You can not use single year and year range search."); + } + this.singleYear = Objects.requireNonNull(singleYear); + return this; + } + + public ComplexSearchQueryBuilder journal(String journal) { + if (Objects.requireNonNull(journal).isBlank()) { + throw new IllegalArgumentException("Parameter must not be blank"); + } + this.journal = String.format("\"%s\"", journal.replace("\"", "")); + return this; + } + + /** + * Instantiates the AdvancesSearchConfig from the provided Builder parameters + * If all text fields are empty an empty optional is returned + * + * @return AdvancedSearchConfig instance with the fields set to the values defined in the building instance. + * @throws IllegalStateException An IllegalStateException is thrown in case all text search fields are empty. + * See: https://softwareengineering.stackexchange.com/questions/241309/builder-pattern-when-to-fail/241320#241320 + */ + public ComplexSearchQuery build() throws IllegalStateException { + if (textSearchFieldsAndYearFieldsAreEmpty()) { + throw new IllegalStateException("At least one text field has to be set"); + } + return new ComplexSearchQuery(defaultField, authors, titlePhrases, fromYear, toYear, singleYear, journal); + } + + private boolean textSearchFieldsAndYearFieldsAreEmpty() { + return StringUtil.isBlank(defaultField) && this.stringListIsBlank(titlePhrases) && + this.stringListIsBlank(authors) && StringUtil.isBlank(journal) && yearFieldsAreEmpty(); + } + + private boolean yearFieldsAreEmpty() { + return Objects.isNull(singleYear) && Objects.isNull(fromYear) && Objects.isNull(toYear); + } + + private boolean stringListIsBlank(List stringList) { + return Objects.isNull(stringList) || stringList.stream().allMatch(String::isBlank); + } + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcher.java index 95ea76e66eb..b0dbf1c5184 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcher.java @@ -8,7 +8,9 @@ import org.jabref.logic.help.HelpFile; import org.jabref.logic.importer.FetcherException; +import org.jabref.logic.importer.ImportCleanup; import org.jabref.logic.importer.SearchBasedFetcher; +import org.jabref.model.database.BibDatabaseMode; import org.jabref.model.entry.BibEntry; import org.slf4j.Logger; @@ -35,16 +37,20 @@ public CompositeSearchBasedFetcher(Set searchBasedFetchers, @Override public List performSearch(String query) { - return fetchers.stream().flatMap(searchBasedFetcher -> { - try { - return searchBasedFetcher.performSearch(query).stream(); - } catch (FetcherException e) { - LOGGER.warn(String.format("%s API request failed", searchBasedFetcher.getName()), e); - return Stream.empty(); - } - }).parallel() - .limit(maximumNumberOfReturnedResults) - .collect(Collectors.toList()); + ImportCleanup cleanup = new ImportCleanup(BibDatabaseMode.BIBTEX); + // All entries have to be converted into one format, this is necessary for the format conversion + return fetchers.parallelStream() + .flatMap(searchBasedFetcher -> { + try { + return searchBasedFetcher.performSearch(query).stream(); + } catch (FetcherException e) { + LOGGER.warn(String.format("%s API request failed", searchBasedFetcher.getName()), e); + return Stream.empty(); + } + }) + .limit(maximumNumberOfReturnedResults) + .map(cleanup::doPostCleanup) + .collect(Collectors.toList()); } @Override diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java index 02d7812b638..b22f73f496e 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java @@ -55,7 +55,6 @@ public Optional getHelpPage() { @Override public Optional performSearchById(String identifier) throws FetcherException { Optional doi = DOI.parse(identifier); - try { if (doi.isPresent()) { Optional fetchedEntry; @@ -124,5 +123,4 @@ public Optional getAgency(DOI doi) throws IOException { return agency; } - } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java index 2154325aa3c..ebfb5f7e3a7 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java @@ -161,6 +161,56 @@ public List performSearch(String query) throws FetcherException { } } + @Override + public List performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { + try { + obtainAndModifyCookie(); + List foundEntries = new ArrayList<>(10); + + URIBuilder uriBuilder = new URIBuilder(BASIC_SEARCH_URL); + uriBuilder.addParameter("hl", "en"); + uriBuilder.addParameter("btnG", "Search"); + uriBuilder.addParameter("q", constructComplexQueryString(complexSearchQuery)); + complexSearchQuery.getFromYear().ifPresent(year -> uriBuilder.addParameter("as_ylo", year.toString())); + complexSearchQuery.getToYear().ifPresent(year -> uriBuilder.addParameter("as_yhi", year.toString())); + complexSearchQuery.getSingleYear().ifPresent(year -> { + uriBuilder.addParameter("as_ylo", year.toString()); + uriBuilder.addParameter("as_yhi", year.toString()); + }); + + addHitsFromQuery(foundEntries, uriBuilder.toString()); + + if (foundEntries.size() == 10) { + uriBuilder.addParameter("start", "10"); + addHitsFromQuery(foundEntries, uriBuilder.toString()); + } + + return foundEntries; + } catch (URISyntaxException e) { + throw new FetcherException("Error while fetching from " + getName(), e); + } catch (IOException e) { + // if there are too much requests from the same IP adress google is answering with a 503 and redirecting to a captcha challenge + // The caught IOException looks for example like this: + // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0 + if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) { + throw new FetcherException("Fetching from Google Scholar failed.", + Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e); + } else { + throw new FetcherException("Error while fetching from " + getName(), e); + } + } + } + + private String constructComplexQueryString(ComplexSearchQuery complexSearchQuery) { + List searchTerms = new ArrayList<>(); + complexSearchQuery.getDefaultField().ifPresent(defaultField -> searchTerms.add(defaultField)); + complexSearchQuery.getAuthors().ifPresent(authors -> authors.forEach(author -> searchTerms.add("author:" + author))); + complexSearchQuery.getTitlePhrases().ifPresent(phrases -> searchTerms.add("allintitle:" + String.join(" ", phrases))); + complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("source:" + journal)); + // API automatically ANDs the terms + return String.join(" ", searchTerms); + } + private void addHitsFromQuery(List entryList, String queryURL) throws IOException, FetcherException { String content = new URLDownload(queryURL).asString(); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java b/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java index 92074712488..6c87d9c7b35 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java @@ -6,7 +6,6 @@ import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; -import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -67,18 +66,10 @@ private static BibEntry parseJsonRespone(JSONObject jsonEntry, Character keyword BibEntry entry = new BibEntry(); switch (jsonEntry.optString("content_type")) { - case "Books": - entry.setType(StandardEntryType.Book); - break; - case "Conferences": - entry.setType(StandardEntryType.InProceedings); - break; - case "Courses": - entry.setType(StandardEntryType.Misc); - break; - default: - entry.setType(StandardEntryType.Article); - break; + case "Books" -> entry.setType(StandardEntryType.Book); + case "Conferences" -> entry.setType(StandardEntryType.InProceedings); + case "Courses" -> entry.setType(StandardEntryType.Misc); + default -> entry.setType(StandardEntryType.Article); } entry.setField(StandardField.ABSTRACT, jsonEntry.optString("abstract")); @@ -115,7 +106,11 @@ private static BibEntry parseJsonRespone(JSONObject jsonEntry, Character keyword entry.setField(StandardField.ISBN, jsonEntry.optString("isbn")); entry.setField(StandardField.ISSN, jsonEntry.optString("issn")); entry.setField(StandardField.ISSUE, jsonEntry.optString("issue")); - entry.addFile(new LinkedFile("", Path.of(jsonEntry.optString("pdf_url")), "PDF")); + try { + entry.addFile(new LinkedFile(new URL(jsonEntry.optString("pdf_url")), "PDF")); + } catch (MalformedURLException e) { + LOGGER.error("Fetched PDF URL String is malformed."); + } entry.setField(StandardField.JOURNALTITLE, jsonEntry.optString("publication_title")); entry.setField(StandardField.DATE, jsonEntry.optString("publication_date")); entry.setField(StandardField.EVENTTITLEADDON, jsonEntry.optString("conference_location")); @@ -236,4 +231,25 @@ public String getName() { public Optional getHelpPage() { return Optional.of(HelpFile.FETCHER_IEEEXPLORE); } + + @Override + public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException { + URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles"); + uriBuilder.addParameter("apikey", API_KEY); + complexSearchQuery.getDefaultField().ifPresent(defaultField -> uriBuilder.addParameter("querytext", defaultField)); + complexSearchQuery.getAuthors().ifPresent(authors -> + uriBuilder.addParameter("author", String.join(" AND ", authors))); + complexSearchQuery.getTitlePhrases().ifPresent(articleTitlePhrases -> + uriBuilder.addParameter("article_title", String.join(" AND ", articleTitlePhrases))); + complexSearchQuery.getJournal().ifPresent(journalTitle -> uriBuilder.addParameter("publication_title", journalTitle)); + complexSearchQuery.getFromYear().map(String::valueOf).ifPresent(year -> uriBuilder.addParameter("start_year", year)); + complexSearchQuery.getToYear().map(String::valueOf).ifPresent(year -> uriBuilder.addParameter("end_year", year)); + complexSearchQuery.getSingleYear().map(String::valueOf).ifPresent(year -> { + uriBuilder.addParameter("start_year", year); + uriBuilder.addParameter("end_year", year); + }); + + URLDownload.bypassSSLVerification(); + return uriBuilder.build().toURL(); + } } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/IsbnViaOttoBibFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/IsbnViaOttoBibFetcher.java index 79081628d6e..28402523e19 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/IsbnViaOttoBibFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/IsbnViaOttoBibFetcher.java @@ -64,6 +64,7 @@ public Optional performSearchById(String identifier) throws FetcherExc } catch (ParseException e) { throw new FetcherException("An internal parser error occurred", e); } + entry.ifPresent(bibEntry -> doPostCleanup(bibEntry)); return entry; } } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/Medra.java b/src/main/java/org/jabref/logic/importer/fetcher/Medra.java index a7f18942888..6e6256f00ae 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/Medra.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/Medra.java @@ -122,5 +122,4 @@ public URL getUrlForIdentifier(String identifier) throws URISyntaxException, Mal public void doPostCleanup(BibEntry entry) { new DoiCleanup().cleanup(entry); } - } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java index d27ec8998e7..9cdddcfb0fc 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java @@ -164,6 +164,22 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE return uriBuilder.build().toURL(); } + @Override + public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException, FetcherException { + return getURLForQuery(constructComplexQueryString(complexSearchQuery)); + } + + private String constructComplexQueryString(ComplexSearchQuery complexSearchQuery) { + List searchTerms = new ArrayList<>(); + complexSearchQuery.getAuthors().ifPresent(authors -> authors.forEach(author -> searchTerms.add("name:" + author))); + complexSearchQuery.getTitlePhrases().ifPresent(titlePhrases -> titlePhrases.forEach(title -> searchTerms.add("title:" + title))); + complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("journal:" + journal)); + // Since Springer API does not support year range search we ignore formYear and toYear. + complexSearchQuery.getSingleYear().ifPresent(year -> searchTerms.add("year:" + year.toString())); + complexSearchQuery.getDefaultField().ifPresent(defaultField -> searchTerms.add(defaultField)); + return String.join(" AND ", searchTerms); + } + @Override public Parser getParser() { return inputStream -> { diff --git a/src/test/java/org/jabref/logic/importer/WebFetchersTest.java b/src/test/java/org/jabref/logic/importer/WebFetchersTest.java index 003e89e1b9e..1911796ee64 100644 --- a/src/test/java/org/jabref/logic/importer/WebFetchersTest.java +++ b/src/test/java/org/jabref/logic/importer/WebFetchersTest.java @@ -79,6 +79,7 @@ void getSearchBasedFetchersReturnsAllFetcherDerivingFromSearchBasedFetcher() thr ClassInfoList controlClasses = scanResult.getClassesImplementing(SearchBasedFetcher.class.getCanonicalName()); Set> expected = new HashSet<>(controlClasses.loadClasses()); + // Remove interfaces expected.remove(SearchBasedParserFetcher.class); // Remove ACM, because it doesn't work currently diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java index 8bee0365c5b..6c3458475b2 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java @@ -3,10 +3,12 @@ import java.io.IOException; import java.net.URL; import java.util.Collections; +import java.util.List; import java.util.Optional; import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.SearchBasedFetcher; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.identifier.ArXivIdentifier; @@ -14,16 +16,18 @@ import org.jabref.testutils.category.FetcherTest; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @FetcherTest -class ArXivTest { - private ArXiv finder; +class ArXivTest implements SearchBasedFetcherCapabilityTest { + private ArXiv fetcher; private BibEntry entry; private BibEntry sliceTheoremPaper; @@ -31,30 +35,28 @@ class ArXivTest { void setUp() { ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class); when(importFormatPreferences.getKeywordSeparator()).thenReturn(','); - finder = new ArXiv(importFormatPreferences); + fetcher = new ArXiv(importFormatPreferences); entry = new BibEntry(); - - sliceTheoremPaper = new BibEntry(); - sliceTheoremPaper.setType(StandardEntryType.Article); - sliceTheoremPaper.setField(StandardField.AUTHOR, "Tobias Diez"); - sliceTheoremPaper.setField(StandardField.TITLE, "Slice theorem for Fréchet group actions and covariant symplectic field theory"); - sliceTheoremPaper.setField(StandardField.DATE, "2014-05-09"); - sliceTheoremPaper.setField(StandardField.ABSTRACT, "A general slice theorem for the action of a Fr\\'echet Lie group on a Fr\\'echet manifolds is established. The Nash-Moser theorem provides the fundamental tool to generalize the result of Palais to this infinite-dimensional setting. The presented slice theorem is illustrated by its application to gauge theories: the action of the gauge transformation group admits smooth slices at every point and thus the gauge orbit space is stratified by Fr\\'echet manifolds. Furthermore, a covariant and symplectic formulation of classical field theory is proposed and extensively discussed. At the root of this novel framework is the incorporation of field degrees of freedom F and spacetime M into the product manifold F * M. The induced bigrading of differential forms is used in order to carry over the usual symplectic theory to this new setting. The examples of the Klein-Gordon field and general Yang-Mills theory illustrate that the presented approach conveniently handles the occurring symmetries."); - sliceTheoremPaper.setField(StandardField.EPRINT, "1405.2249"); - sliceTheoremPaper.setField(StandardField.FILE, ":http\\://arxiv.org/pdf/1405.2249v1:PDF"); - sliceTheoremPaper.setField(StandardField.EPRINTTYPE, "arXiv"); - sliceTheoremPaper.setField(StandardField.EPRINTCLASS, "math-ph"); - sliceTheoremPaper.setField(StandardField.KEYWORDS, "math-ph, math.DG, math.MP, math.SG, 58B99, 58Z05, 58B25, 22E65, 58D19, 53D20, 53D42"); + sliceTheoremPaper = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "Tobias Diez") + .withField(StandardField.TITLE, "Slice theorem for Fréchet group actions and covariant symplectic field theory") + .withField(StandardField.DATE, "2014-05-09") + .withField(StandardField.ABSTRACT, "A general slice theorem for the action of a Fr\\'echet Lie group on a Fr\\'echet manifolds is established. The Nash-Moser theorem provides the fundamental tool to generalize the result of Palais to this infinite-dimensional setting. The presented slice theorem is illustrated by its application to gauge theories: the action of the gauge transformation group admits smooth slices at every point and thus the gauge orbit space is stratified by Fr\\'echet manifolds. Furthermore, a covariant and symplectic formulation of classical field theory is proposed and extensively discussed. At the root of this novel framework is the incorporation of field degrees of freedom F and spacetime M into the product manifold F * M. The induced bigrading of differential forms is used in order to carry over the usual symplectic theory to this new setting. The examples of the Klein-Gordon field and general Yang-Mills theory illustrate that the presented approach conveniently handles the occurring symmetries.") + .withField(StandardField.EPRINT, "1405.2249") + .withField(StandardField.FILE, ":http\\://arxiv.org/pdf/1405.2249v1:PDF") + .withField(StandardField.EPRINTTYPE, "arXiv") + .withField(StandardField.EPRINTCLASS, "math-ph") + .withField(StandardField.KEYWORDS, "math-ph, math.DG, math.MP, math.SG, 58B99, 58Z05, 58B25, 22E65, 58D19, 53D20, 53D42"); } @Test void findFullTextForEmptyEntryResultsEmptyOptional() throws IOException { - assertEquals(Optional.empty(), finder.findFullText(entry)); + assertEquals(Optional.empty(), fetcher.findFullText(entry)); } @Test void findFullTextRejectsNullParameter() { - assertThrows(NullPointerException.class, () -> finder.findFullText(null)); + assertThrows(NullPointerException.class, () -> fetcher.findFullText(null)); } @Test @@ -62,19 +64,19 @@ void findFullTextByDOI() throws IOException { entry.setField(StandardField.DOI, "10.1529/biophysj.104.047340"); entry.setField(StandardField.TITLE, "Pause Point Spectra in DNA Constant-Force Unzipping"); - assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry)); + assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), fetcher.findFullText(entry)); } @Test void findFullTextByEprint() throws IOException { entry.setField(StandardField.EPRINT, "1603.06570"); - assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), finder.findFullText(entry)); + assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), fetcher.findFullText(entry)); } @Test void findFullTextByEprintWithPrefix() throws IOException { entry.setField(StandardField.EPRINT, "arXiv:1603.06570"); - assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), finder.findFullText(entry)); + assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), fetcher.findFullText(entry)); } @Test @@ -82,14 +84,14 @@ void findFullTextByEprintWithUnknownDOI() throws IOException { entry.setField(StandardField.DOI, "10.1529/unknown"); entry.setField(StandardField.EPRINT, "1603.06570"); - assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), finder.findFullText(entry)); + assertEquals(Optional.of(new URL("http://arxiv.org/pdf/1603.06570v1")), fetcher.findFullText(entry)); } @Test void findFullTextByTitle() throws IOException { entry.setField(StandardField.TITLE, "Pause Point Spectra in DNA Constant-Force Unzipping"); - assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry)); + assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), fetcher.findFullText(entry)); } @Test @@ -97,19 +99,19 @@ void findFullTextByTitleAndPartOfAuthor() throws IOException { entry.setField(StandardField.TITLE, "Pause Point Spectra in DNA Constant-Force Unzipping"); entry.setField(StandardField.AUTHOR, "Weeks and Lucks"); - assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), finder.findFullText(entry)); + assertEquals(Optional.of(new URL("http://arxiv.org/pdf/cond-mat/0406246v1")), fetcher.findFullText(entry)); } @Test void notFindFullTextByUnknownDOI() throws IOException { entry.setField(StandardField.DOI, "10.1529/unknown"); - assertEquals(Optional.empty(), finder.findFullText(entry)); + assertEquals(Optional.empty(), fetcher.findFullText(entry)); } @Test void notFindFullTextByUnknownId() throws IOException { entry.setField(StandardField.EPRINT, "1234.12345"); - assertEquals(Optional.empty(), finder.findFullText(entry)); + assertEquals(Optional.empty(), fetcher.findFullText(entry)); } @Test @@ -117,106 +119,175 @@ void findFullTextByDOINotAvailableInCatalog() throws IOException { entry.setField(StandardField.DOI, "10.1016/0370-2693(77)90015-6"); entry.setField(StandardField.TITLE, "Superspace formulation of supergravity"); - assertEquals(Optional.empty(), finder.findFullText(entry)); + assertEquals(Optional.empty(), fetcher.findFullText(entry)); } @Test void findFullTextEntityWithoutDoi() throws IOException { - assertEquals(Optional.empty(), finder.findFullText(entry)); + assertEquals(Optional.empty(), fetcher.findFullText(entry)); } @Test void findFullTextTrustLevel() { - assertEquals(TrustLevel.PREPRINT, finder.getTrustLevel()); + assertEquals(TrustLevel.PREPRINT, fetcher.getTrustLevel()); } @Test void searchEntryByPartOfTitle() throws Exception { assertEquals(Collections.singletonList(sliceTheoremPaper), - finder.performSearch("ti:\"slice theorem for Frechet\"")); + fetcher.performSearch("ti:\"slice theorem for Frechet\"")); } @Test void searchEntryByPartOfTitleWithAcuteAccent() throws Exception { assertEquals(Collections.singletonList(sliceTheoremPaper), - finder.performSearch("ti:\"slice theorem for Fréchet\"")); + fetcher.performSearch("ti:\"slice theorem for Fréchet\"")); } @Test void searchEntryByOldId() throws Exception { - BibEntry expected = new BibEntry(); - expected.setType(StandardEntryType.Article); - expected.setField(StandardField.AUTHOR, "H1 Collaboration"); - expected.setField(StandardField.TITLE, "Multi-Electron Production at High Transverse Momenta in ep Collisions at HERA"); - expected.setField(StandardField.DATE, "2003-07-07"); - expected.setField(StandardField.ABSTRACT, "Multi-electron production is studied at high electron transverse momentum in positron- and electron-proton collisions using the H1 detector at HERA. The data correspond to an integrated luminosity of 115 pb-1. Di-electron and tri-electron event yields are measured. Cross sections are derived in a restricted phase space region dominated by photon-photon collisions. In general good agreement is found with the Standard Model predictions. However, for electron pair invariant masses above 100 GeV, three di-electron events and three tri-electron events are observed, compared to Standard Model expectations of 0.30 \\pm 0.04 and 0.23 \\pm 0.04, respectively."); - expected.setField(StandardField.EPRINT, "hep-ex/0307015"); - expected.setField(StandardField.FILE, ":http\\://arxiv.org/pdf/hep-ex/0307015v1:PDF"); - expected.setField(StandardField.EPRINTTYPE, "arXiv"); - expected.setField(StandardField.EPRINTCLASS, "hep-ex"); - expected.setField(StandardField.KEYWORDS, "hep-ex"); - expected.setField(StandardField.DOI, "10.1140/epjc/s2003-01326-x"); - expected.setField(StandardField.JOURNALTITLE, "Eur.Phys.J.C31:17-29,2003"); - - assertEquals(Optional.of(expected), finder.performSearchById("hep-ex/0307015")); + BibEntry expected = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "H1 Collaboration") + .withField(StandardField.TITLE, "Multi-Electron Production at High Transverse Momenta in ep Collisions at HERA") + .withField(StandardField.DATE, "2003-07-07") + .withField(StandardField.ABSTRACT, "Multi-electron production is studied at high electron transverse momentum in positron- and electron-proton collisions using the H1 detector at HERA. The data correspond to an integrated luminosity of 115 pb-1. Di-electron and tri-electron event yields are measured. Cross sections are derived in a restricted phase space region dominated by photon-photon collisions. In general good agreement is found with the Standard Model predictions. However, for electron pair invariant masses above 100 GeV, three di-electron events and three tri-electron events are observed, compared to Standard Model expectations of 0.30 \\pm 0.04 and 0.23 \\pm 0.04, respectively.") + .withField(StandardField.EPRINT, "hep-ex/0307015") + .withField(StandardField.FILE, ":http\\://arxiv.org/pdf/hep-ex/0307015v1:PDF") + .withField(StandardField.EPRINTTYPE, "arXiv") + .withField(StandardField.EPRINTCLASS, "hep-ex") + .withField(StandardField.KEYWORDS, "hep-ex") + .withField(StandardField.DOI, "10.1140/epjc/s2003-01326-x") + .withField(StandardField.JOURNALTITLE, "Eur.Phys.J.C31:17-29,2003"); + + assertEquals(Optional.of(expected), fetcher.performSearchById("hep-ex/0307015")); } @Test void searchEntryByIdWith4DigitsAndVersion() throws Exception { - assertEquals(Optional.of(sliceTheoremPaper), finder.performSearchById("1405.2249v1")); + assertEquals(Optional.of(sliceTheoremPaper), fetcher.performSearchById("1405.2249v1")); } @Test void searchEntryByIdWith4Digits() throws Exception { - assertEquals(Optional.of(sliceTheoremPaper), finder.performSearchById("1405.2249")); + assertEquals(Optional.of(sliceTheoremPaper), fetcher.performSearchById("1405.2249")); } @Test void searchEntryByIdWith4DigitsAndPrefix() throws Exception { - assertEquals(Optional.of(sliceTheoremPaper), finder.performSearchById("arXiv:1405.2249")); + assertEquals(Optional.of(sliceTheoremPaper), fetcher.performSearchById("arXiv:1405.2249")); } @Test void searchEntryByIdWith4DigitsAndPrefixAndNotTrimmed() throws Exception { - assertEquals(Optional.of(sliceTheoremPaper), finder.performSearchById("arXiv : 1405. 2249")); + assertEquals(Optional.of(sliceTheoremPaper), fetcher.performSearchById("arXiv : 1405. 2249")); } @Test void searchEntryByIdWith5Digits() throws Exception { assertEquals(Optional.of( "An Optimal Convergence Theorem for Mean Curvature Flow of Arbitrary Codimension in Hyperbolic Spaces"), - finder.performSearchById("1503.06747").flatMap(entry -> entry.getField(StandardField.TITLE))); + fetcher.performSearchById("1503.06747").flatMap(entry -> entry.getField(StandardField.TITLE))); } @Test void searchWithMalformedIdThrowsException() throws Exception { - assertThrows(FetcherException.class, () -> finder.performSearchById("123412345")); + assertThrows(FetcherException.class, () -> fetcher.performSearchById("123412345")); } @Test void searchIdentifierForSlicePaper() throws Exception { sliceTheoremPaper.clearField(StandardField.EPRINT); - assertEquals(ArXivIdentifier.parse("1405.2249"), finder.findIdentifier(sliceTheoremPaper)); + assertEquals(ArXivIdentifier.parse("1405.2249"), fetcher.findIdentifier(sliceTheoremPaper)); } @Test void searchEmptyId() throws Exception { - assertEquals(Optional.empty(), finder.performSearchById("")); + assertEquals(Optional.empty(), fetcher.performSearchById("")); } @Test void searchWithHttpUrl() throws Exception { - assertEquals(Optional.of(sliceTheoremPaper), finder.performSearchById("http://arxiv.org/abs/1405.2249")); + assertEquals(Optional.of(sliceTheoremPaper), fetcher.performSearchById("http://arxiv.org/abs/1405.2249")); } @Test void searchWithHttpsUrl() throws Exception { - assertEquals(Optional.of(sliceTheoremPaper), finder.performSearchById("https://arxiv.org/abs/1405.2249")); + assertEquals(Optional.of(sliceTheoremPaper), fetcher.performSearchById("https://arxiv.org/abs/1405.2249")); } @Test void searchWithHttpsUrlNotTrimmed() throws Exception { - assertEquals(Optional.of(sliceTheoremPaper), finder.performSearchById("https : // arxiv . org / abs / 1405 . 2249 ")); + assertEquals(Optional.of(sliceTheoremPaper), fetcher.performSearchById("https : // arxiv . org / abs / 1405 . 2249 ")); + } + + @Override + public SearchBasedFetcher getFetcher() { + return fetcher; + } + + @Override + public List getTestAuthors() { + return List.of("\"Tobias Diez\""); + } + + @Disabled("Is not supported by the current API") + @Test + @Override + public void supportsYearSearch() throws Exception { + } + + @Disabled("Is not supported by the current API") + @Test + @Override + public void supportsYearRangeSearch() throws Exception { + + } + + @Override + public String getTestJournal() { + return "\"Journal of Geometry and Physics (2013)\""; + } + + @Test + public void supportsPhraseSearch() throws Exception { + BibEntry expected = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "Tobias Büscher and Angel L. Diez and Gerhard Gompper and Jens Elgeti") + .withField(StandardField.TITLE, "Instability and fingering of interfaces in growing tissue") + .withField(StandardField.DATE, "2020-03-10") + .withField(StandardField.ABSTRACT, "Interfaces in tissues are ubiquitous, both between tissue and environment as well as between populations of different cell types. The propagation of an interface can be driven mechanically. % e.g. by a difference in the respective homeostatic stress of the different cell types. Computer simulations of growing tissues are employed to study the stability of the interface between two tissues on a substrate. From a mechanical perspective, the dynamics and stability of this system is controlled mainly by four parameters of the respective tissues: (i) the homeostatic stress (ii) cell motility (iii) tissue viscosity and (iv) substrate friction. For propagation driven by a difference in homeostatic stress, the interface is stable for tissue-specific substrate friction even for very large differences of homeostatic stress; however, it becomes unstable above a critical stress difference when the tissue with the larger homeostatic stress has a higher viscosity. A small difference in directed bulk motility between the two tissues suffices to result in propagation with a stable interface, even for otherwise identical tissues. Larger differences in motility force, however, result in a finite-wavelength instability of the interface. Interestingly, the instability is apparently bound by nonlinear effects and the amplitude of the interface undulations only grows to a finite value in time.") + .withField(StandardField.EPRINT, "2003.04601") + .withField(StandardField.FILE, ":http\\://arxiv.org/pdf/2003.04601v1:PDF") + .withField(StandardField.EPRINTTYPE, "arXiv") + .withField(StandardField.EPRINTCLASS, "q-bio.TO") + .withField(StandardField.KEYWORDS, "q-bio.TO"); + + List resultWithPhraseSearch = fetcher.performSearch("au:\"Tobias Diez\""); + List resultWithOutPhraseSearch = fetcher.performSearch("au:Tobias Diez"); + // Ensure that phrase search result is just a subset of the default search result + assertTrue(resultWithOutPhraseSearch.containsAll(resultWithPhraseSearch)); + resultWithOutPhraseSearch.removeAll(resultWithPhraseSearch); + + // There is only a single paper found by searching for Tobias Diez as author that is not authored by "Tobias Diez". + assertEquals(Collections.singletonList(expected), resultWithOutPhraseSearch); + } + + @Test + public void supportsBooleanANDSearch() throws Exception { + BibEntry expected = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "Tobias Büscher and Angel L. Diez and Gerhard Gompper and Jens Elgeti") + .withField(StandardField.TITLE, "Instability and fingering of interfaces in growing tissue") + .withField(StandardField.DATE, "2020-03-10") + .withField(StandardField.ABSTRACT, "Interfaces in tissues are ubiquitous, both between tissue and environment as well as between populations of different cell types. The propagation of an interface can be driven mechanically. % e.g. by a difference in the respective homeostatic stress of the different cell types. Computer simulations of growing tissues are employed to study the stability of the interface between two tissues on a substrate. From a mechanical perspective, the dynamics and stability of this system is controlled mainly by four parameters of the respective tissues: (i) the homeostatic stress (ii) cell motility (iii) tissue viscosity and (iv) substrate friction. For propagation driven by a difference in homeostatic stress, the interface is stable for tissue-specific substrate friction even for very large differences of homeostatic stress; however, it becomes unstable above a critical stress difference when the tissue with the larger homeostatic stress has a higher viscosity. A small difference in directed bulk motility between the two tissues suffices to result in propagation with a stable interface, even for otherwise identical tissues. Larger differences in motility force, however, result in a finite-wavelength instability of the interface. Interestingly, the instability is apparently bound by nonlinear effects and the amplitude of the interface undulations only grows to a finite value in time.") + .withField(StandardField.EPRINT, "2003.04601") + .withField(StandardField.FILE, ":http\\://arxiv.org/pdf/2003.04601v1:PDF") + .withField(StandardField.EPRINTTYPE, "arXiv") + .withField(StandardField.EPRINTCLASS, "q-bio.TO") + .withField(StandardField.KEYWORDS, "q-bio.TO"); + + List result = fetcher.performSearch("au:\"Tobias Büscher\" AND ti:\"Instability and fingering of interfaces\""); + + // There is only one paper authored by Tobias Büscher with that phrase in the title + assertEquals(Collections.singletonList(expected), result); } } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java index e0417421a89..7d8d2242d04 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/CiteSeerTest.java @@ -41,7 +41,7 @@ void searchByQueryFindsEntry2() throws Exception { .withField(StandardField.TITLE, "Coping Theory and Research: Past Present and Future") .withField(StandardField.DOI, "10.1.1.115.9665") .withField(StandardField.YEAR, "1993") - .withField(StandardField.JOURNALTITLE, "PSYCHOSOMATIC MEDICINE"); + .withField(StandardField.JOURNAL, "PSYCHOSOMATIC MEDICINE"); List fetchedEntries = fetcher.performSearch("doi:10.1.1.115.9665"); assertEquals(Collections.singletonList(expected), fetchedEntries); diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java index 3fa4637dff7..73ccf743b4d 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java @@ -53,7 +53,7 @@ public void performSearchReturnsMatchingMultipleEntries() throws FetcherExceptio BibEntry firstBibEntry = new BibEntry(StandardEntryType.InProceedings) .withCiteKey("conf/ecsa/OlssonEW17") .withField(StandardField.AUTHOR, "Tobias Olsson and Morgan Ericsson and Anna Wingkvist") - .withField(StandardField.EDITOR, "Rog{\\~A}{\\copyright}rio de Lemos") + .withField(StandardField.EDITOR, "Rog{\\'e}rio de Lemos") .withField(StandardField.ISBN, "978-1-4503-5217-8") .withField(StandardField.PAGES, "152--158") .withField(StandardField.PUBLISHER, "ACM") @@ -66,7 +66,7 @@ public void performSearchReturnsMatchingMultipleEntries() throws FetcherExceptio BibEntry secondBibEntry = new BibEntry(StandardEntryType.Article) .withCiteKey("oai:DiVA.org:lnu-68408") - .withField(new UnknownField("identifier"), "urn:isbn:978-1-4503-5217-8; doi:10.1145/3129790.3129810; ISI:000426556400034") + .withField(new UnknownField("identifier"), "urn:isbn:978-1-4503-5217-8; doi:10.1145/3129790.3129810; ISI:000426556400034; Scopus 2-s2.0-85037741580") .withField(new UnknownField("subject"), "Software Architecture; Code Churn; Open Source; Architecrual Erosion; Technical Debt; Software Engineering; Programvaruteknik") .withField(new UnknownField("relation"), "ACM International Conference Proceeding Series; ECSA '17~Proceedings of the 11th European Conference on Software Architecture : Companion Proceedings, p. 152-158") .withField(StandardField.ABSTRACT, "The open source application JabRef has existed since" + @@ -103,7 +103,6 @@ public void performSearchReturnsMatchingMultipleEntries() throws FetcherExceptio .withField(StandardField.LANGUAGE, "eng") .withField(StandardField.AUTHOR, "Tobias Olsson and Morgan Ericsson and Anna Wingkvist") .withField(StandardField.YEAR, "2017"); - // Checking entries in the set as the query is generic and returns a changing result set assertTrue(searchResult.contains(firstBibEntry)); assertTrue(searchResult.contains(secondBibEntry)); diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcherTest.java index 50866933d9d..fb4ca119f7d 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcherTest.java @@ -9,8 +9,10 @@ import org.jabref.logic.bibtex.FieldContentFormatterPreferences; import org.jabref.logic.importer.FetcherException; +import org.jabref.logic.importer.ImportCleanup; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.SearchBasedFetcher; +import org.jabref.model.database.BibDatabaseMode; import org.jabref.model.entry.BibEntry; import org.jabref.testutils.category.FetcherTest; @@ -61,11 +63,14 @@ public void performSearchOnEmptyQuery(Set fetchers) { @MethodSource("performSearchParameters") public void performSearchOnNonEmptyQuery(Set fetchers) { CompositeSearchBasedFetcher compositeFetcher = new CompositeSearchBasedFetcher(fetchers, Integer.MAX_VALUE); + ImportCleanup cleanup = new ImportCleanup(BibDatabaseMode.BIBTEX); List compositeResult = compositeFetcher.performSearch("quantum"); for (SearchBasedFetcher fetcher : fetchers) { try { - Assertions.assertTrue(compositeResult.containsAll(fetcher.performSearch("quantum"))); + List fetcherResult = fetcher.performSearch("quantum"); + fetcherResult.forEach(cleanup::doPostCleanup); + Assertions.assertTrue(compositeResult.containsAll(fetcherResult)); } catch (FetcherException e) { /* We catch the Fetcher exception here, since the failing fetcher also fails in the CompositeFetcher * and just leads to no additional results in the returned list. Therefore the test should not fail diff --git a/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java index ade75d2c307..b2772ef56bf 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java @@ -67,7 +67,6 @@ public void setUp() { .withField(StandardField.DOI, "10.3303/CET1977146") .withField(StandardField.JOURNAL, "Chemical Engineering Transactions") .withField(StandardField.PAGES, "871-876") - .withField(StandardField.URL, "http://doi.org/10.3303/CET1977146") .withField(StandardField.VOLUME, "77"); } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/GoogleScholarTest.java b/src/test/java/org/jabref/logic/importer/fetcher/GoogleScholarTest.java index 5d86db68a47..f32e0674f23 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/GoogleScholarTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/GoogleScholarTest.java @@ -9,6 +9,7 @@ import org.jabref.logic.bibtex.FieldContentFormatterPreferences; import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.SearchBasedFetcher; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.types.StandardEntryType; @@ -23,7 +24,7 @@ import static org.mockito.Mockito.when; @FetcherTest -class GoogleScholarTest { +class GoogleScholarTest implements SearchBasedFetcherCapabilityTest { private GoogleScholar finder; private BibEntry entry; @@ -79,4 +80,19 @@ void findManyEntries() throws FetcherException { assertEquals(20, foundEntries.size()); } + + @Override + public SearchBasedFetcher getFetcher() { + return finder; + } + + @Override + public List getTestAuthors() { + return List.of("Mittermeier", "Myers"); + } + + @Override + public String getTestJournal() { + return "Nature"; + } } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java b/src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java index 4d6e7e21657..9aab79989ba 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java @@ -1,13 +1,12 @@ package org.jabref.logic.importer.fetcher; -import java.io.IOException; import java.net.URL; import java.util.Collections; import java.util.List; import java.util.Optional; -import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.SearchBasedFetcher; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.types.StandardEntryType; @@ -21,7 +20,7 @@ import static org.mockito.Mockito.when; @FetcherTest -class IEEETest { +class IEEETest implements SearchBasedFetcherCapabilityTest { private IEEE fetcher; private BibEntry entry; @@ -31,107 +30,113 @@ void setUp() { ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class); when(importFormatPreferences.getKeywordSeparator()).thenReturn(','); fetcher = new IEEE(importFormatPreferences); - entry = new BibEntry(); } @Test - void findByDOI() throws IOException { + void findByDOI() throws Exception { entry.setField(StandardField.DOI, "10.1109/ACCESS.2016.2535486"); - assertEquals(Optional.of(new URL("https://ieeexplore.ieee.org/ielx7/6287639/7419931/07421926.pdf?tp=&arnumber=7421926&isnumber=7419931&ref=")), - fetcher.findFullText(entry)); + fetcher.findFullText(entry)); } @Test - void findByDocumentUrl() throws IOException { + void findByDocumentUrl() throws Exception { entry.setField(StandardField.URL, "https://ieeexplore.ieee.org/document/7421926/"); assertEquals(Optional.of(new URL("https://ieeexplore.ieee.org/ielx7/6287639/7419931/07421926.pdf?tp=&arnumber=7421926&isnumber=7419931&ref=")), - fetcher.findFullText(entry)); + fetcher.findFullText(entry)); } @Test - void findByURL() throws IOException { + void findByURL() throws Exception { entry.setField(StandardField.URL, "https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7421926&ref="); - assertEquals(Optional.of(new URL("https://ieeexplore.ieee.org/ielx7/6287639/7419931/07421926.pdf?tp=&arnumber=7421926&isnumber=7419931&ref=")), - fetcher.findFullText(entry)); + fetcher.findFullText(entry)); } @Test - void findByOldURL() throws IOException { + void findByOldURL() throws Exception { entry.setField(StandardField.URL, "https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7421926"); - assertEquals(Optional.of(new URL("https://ieeexplore.ieee.org/ielx7/6287639/7419931/07421926.pdf?tp=&arnumber=7421926&isnumber=7419931&ref=")), - fetcher.findFullText(entry)); + fetcher.findFullText(entry)); } @Test - void findByDOIButNotURL() throws IOException { + void findByDOIButNotURL() throws Exception { entry.setField(StandardField.DOI, "10.1109/ACCESS.2016.2535486"); entry.setField(StandardField.URL, "http://dx.doi.org/10.1109/ACCESS.2016.2535486"); - assertEquals(Optional.of(new URL("https://ieeexplore.ieee.org/ielx7/6287639/7419931/07421926.pdf?tp=&arnumber=7421926&isnumber=7419931&ref=")), - fetcher.findFullText(entry)); + fetcher.findFullText(entry)); } @Test - void notFoundByURL() throws IOException { + void notFoundByURL() throws Exception { entry.setField(StandardField.URL, "http://dx.doi.org/10.1109/ACCESS.2016.2535486"); - assertEquals(Optional.empty(), fetcher.findFullText(entry)); } @Test - void notFoundByDOI() throws IOException { + void notFoundByDOI() throws Exception { entry.setField(StandardField.DOI, "10.1021/bk-2006-WWW.ch014"); - assertEquals(Optional.empty(), fetcher.findFullText(entry)); } @Test - void searchResultHasNoKeywordTerms() throws FetcherException { - BibEntry expected = new BibEntry(StandardEntryType.Article); - - expected.setField(StandardField.AUTHOR, "Shatakshi Jha and Ikhlaq Hussain and Bhim Singh and Sukumar Mishra"); - expected.setField(StandardField.DATE, "25 2 2019"); - expected.setField(StandardField.YEAR, "2019"); - expected.setField(StandardField.DOI, "10.1049/iet-rpg.2018.5648"); - expected.setField(StandardField.FILE, ":https\\://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8636659:PDF"); - expected.setField(StandardField.ISSUE, "3"); - expected.setField(StandardField.ISSN, "1752-1424"); - expected.setField(StandardField.JOURNALTITLE, "IET Renewable Power Generation"); - expected.setField(StandardField.PAGES, "418--426"); - expected.setField(StandardField.PUBLISHER, "IET"); - expected.setField(StandardField.TITLE, "Optimal operation of PV-DG-battery based microgrid with power quality conditioner"); - expected.setField(StandardField.VOLUME, "13"); + void searchResultHasNoKeywordTerms() throws Exception { + BibEntry expected = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "Shatakshi Jha and Ikhlaq Hussain and Bhim Singh and Sukumar Mishra") + .withField(StandardField.DATE, "25 2 2019") + .withField(StandardField.YEAR, "2019") + .withField(StandardField.DOI, "10.1049/iet-rpg.2018.5648") + .withField(StandardField.FILE, ":https\\://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8636659:PDF") + .withField(StandardField.ISSUE, "3") + .withField(StandardField.ISSN, "1752-1424") + .withField(StandardField.JOURNALTITLE, "IET Renewable Power Generation") + .withField(StandardField.PAGES, "418--426") + .withField(StandardField.PUBLISHER, "IET") + .withField(StandardField.TITLE, "Optimal operation of PV-DG-battery based microgrid with power quality conditioner") + .withField(StandardField.VOLUME, "13"); List fetchedEntries = fetcher.performSearch("8636659"); // article number fetchedEntries.forEach(entry -> entry.clearField(StandardField.ABSTRACT)); // Remove abstract due to copyright); assertEquals(Collections.singletonList(expected), fetchedEntries); - } @Test void searchByQueryFindsEntry() throws Exception { - BibEntry expected = new BibEntry(StandardEntryType.InProceedings); - expected.setField(StandardField.AUTHOR, "Igor Steinmacher and Tayana Uchoa Conte and Christoph Treude and Marco Aurélio Gerosa"); - expected.setField(StandardField.DATE, "14-22 May 2016"); - expected.setField(StandardField.YEAR, "2016"); - expected.setField(StandardField.EVENTDATE, "14-22 May 2016"); - expected.setField(StandardField.EVENTTITLEADDON, "Austin, TX"); - expected.setField(StandardField.LOCATION, "Austin, TX"); - expected.setField(StandardField.DOI, "10.1145/2884781.2884806"); - expected.setField(StandardField.JOURNALTITLE, "2016 IEEE/ACM 38th International Conference on Software Engineering (ICSE)"); - expected.setField(StandardField.PAGES, "273--284"); - expected.setField(StandardField.ISBN, "978-1-5090-2071-3"); - expected.setField(StandardField.ISSN, "1558-1225"); - expected.setField(StandardField.PUBLISHER, "IEEE"); - expected.setField(StandardField.KEYWORDS, "Portals, Documentation, Computer bugs, Joining processes, Industries, Open source software, Newcomers, Newbies, Novices, Beginners, Open Source Software, Barriers, Obstacles, Onboarding, Joining Process"); - expected.setField(StandardField.TITLE, "Overcoming Open Source Project Entry Barriers with a Portal for Newcomers"); - expected.setField(StandardField.FILE, ":https\\://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7886910:PDF"); - expected.setField(StandardField.ABSTRACT, "Community-based Open Source Software (OSS) projects are usually self-organized and dynamic, receiving contributions from distributed volunteers. Newcomer are important to the survival, long-term success, and continuity of these communities. However, newcomers face many barriers when making their first contribution to an OSS project, leading in many cases to dropouts. Therefore, a major challenge for OSS projects is to provide ways to support newcomers during their first contribution. In this paper, we propose and evaluate FLOSScoach, a portal created to support newcomers to OSS projects. FLOSScoach was designed based on a conceptual model of barriers created in our previous work. To evaluate the portal, we conducted a study with 65 students, relying on qualitative data from diaries, self-efficacy questionnaires, and the Technology Acceptance Model. The results indicate that FLOSScoach played an important role in guiding newcomers and in lowering barriers related to the orientation and contribution process, whereas it was not effective in lowering technical barriers. We also found that FLOSScoach is useful, easy to use, and increased newcomers' confidence to contribute. Our results can help project maintainers on deciding the points that need more attention in order to help OSS project newcomers overcome entry barriers."); + BibEntry expected = new BibEntry(StandardEntryType.InProceedings) + .withField(StandardField.AUTHOR, "Igor Steinmacher and Tayana Uchoa Conte and Christoph Treude and Marco Aurélio Gerosa") + .withField(StandardField.DATE, "14-22 May 2016") + .withField(StandardField.YEAR, "2016") + .withField(StandardField.EVENTDATE, "14-22 May 2016") + .withField(StandardField.EVENTTITLEADDON, "Austin, TX") + .withField(StandardField.LOCATION, "Austin, TX") + .withField(StandardField.DOI, "10.1145/2884781.2884806") + .withField(StandardField.JOURNALTITLE, "2016 IEEE/ACM 38th International Conference on Software Engineering (ICSE)") + .withField(StandardField.PAGES, "273--284") + .withField(StandardField.ISBN, "978-1-5090-2071-3") + .withField(StandardField.ISSN, "1558-1225") + .withField(StandardField.PUBLISHER, "IEEE") + .withField(StandardField.KEYWORDS, "Portals, Documentation, Computer bugs, Joining processes, Industries, Open source software, Newcomers, Newbies, Novices, Beginners, Open Source Software, Barriers, Obstacles, Onboarding, Joining Process") + .withField(StandardField.TITLE, "Overcoming Open Source Project Entry Barriers with a Portal for Newcomers") + .withField(StandardField.FILE, ":https\\://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7886910:PDF") + .withField(StandardField.ABSTRACT, "Community-based Open Source Software (OSS) projects are usually self-organized and dynamic, receiving contributions from distributed volunteers. Newcomer are important to the survival, long-term success, and continuity of these communities. However, newcomers face many barriers when making their first contribution to an OSS project, leading in many cases to dropouts. Therefore, a major challenge for OSS projects is to provide ways to support newcomers during their first contribution. In this paper, we propose and evaluate FLOSScoach, a portal created to support newcomers to OSS projects. FLOSScoach was designed based on a conceptual model of barriers created in our previous work. To evaluate the portal, we conducted a study with 65 students, relying on qualitative data from diaries, self-efficacy questionnaires, and the Technology Acceptance Model. The results indicate that FLOSScoach played an important role in guiding newcomers and in lowering barriers related to the orientation and contribution process, whereas it was not effective in lowering technical barriers. We also found that FLOSScoach is useful, easy to use, and increased newcomers' confidence to contribute. Our results can help project maintainers on deciding the points that need more attention in order to help OSS project newcomers overcome entry barriers."); List fetchedEntries = fetcher.performSearch("Overcoming Open Source Project Entry Barriers with a Portal for Newcomers"); assertEquals(Collections.singletonList(expected), fetchedEntries); } + + @Override + public SearchBasedFetcher getFetcher() { + return fetcher; + } + + @Override + public List getTestAuthors() { + return List.of("Igor Steinmacher", "Tayana Uchoa Conte", "Christoph Treude", "Marco Aurélio Gerosa"); + } + + @Override + public String getTestJournal() { + return "IET Renewable Power Generation"; + } } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java b/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java new file mode 100644 index 00000000000..7f7a4a69cdf --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java @@ -0,0 +1,117 @@ +package org.jabref.logic.importer.fetcher; + +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.jabref.logic.importer.ImportCleanup; +import org.jabref.logic.importer.SearchBasedFetcher; +import org.jabref.model.database.BibDatabaseMode; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Defines the set of capability tests that each tests a given search capability, e.g. author based search. + * The idea is to code the capabilities of a fetcher into Java code. + * This way, a) the capbilities of a fetcher are checked automatically (because they can change from time-to-time by the provider) + * and b) the queries sent to the fetchers can be debugged directly without a route through to some fetcher code. + */ +interface SearchBasedFetcherCapabilityTest { + + /** + * Test whether the library API supports author field search. + */ + @Test + default void supportsAuthorSearch() throws Exception { + ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder(); + getTestAuthors().forEach(builder::author); + + List result = getFetcher().performComplexSearch(builder.build()); + new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); + + assertFalse(result.isEmpty()); + result.forEach(bibEntry -> { + String author = bibEntry.getField(StandardField.AUTHOR).orElse(""); + + // The co-authors differ, thus we check for the author present at all papers + getTestAuthors().forEach(expectedAuthor -> Assertions.assertTrue(author.contains(expectedAuthor.replace("\"", "")))); + }); + } + + /** + * Test whether the library API supports year field search. + */ + @Test + default void supportsYearSearch() throws Exception { + ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder(); + builder.singleYear(getTestYear()); + + List result = getFetcher().performComplexSearch(builder.build()); + new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); + List differentYearsInResult = result.stream() + .map(bibEntry -> bibEntry.getField(StandardField.YEAR)) + .filter(Optional::isPresent) + .map(Optional::get) + .distinct() + .collect(Collectors.toList()); + + assertFalse(result.isEmpty()); + assertEquals(Collections.singletonList(getTestYear().toString()), differentYearsInResult); + } + + /** + * Test whether the library API supports year range search. + */ + @Test + default void supportsYearRangeSearch() throws Exception { + ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder(); + List yearsInYearRange = List.of("2018", "2019", "2020"); + builder.fromYearAndToYear(2018, 2020); + + List result = getFetcher().performComplexSearch(builder.build()); + new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); + List differentYearsInResult = result.stream() + .map(bibEntry -> bibEntry.getField(StandardField.YEAR)) + .filter(Optional::isPresent) + .map(Optional::get) + .distinct() + .collect(Collectors.toList()); + assertFalse(result.isEmpty()); + assertTrue(yearsInYearRange.containsAll(differentYearsInResult)); + } + + /** + * Test whether the library API supports journal based search. + */ + @Test + default void supportsJournalSearch() throws Exception { + ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder(); + builder.journal(getTestJournal()); + List result = getFetcher().performComplexSearch(builder.build()); + new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); + + assertFalse(result.isEmpty()); + result.forEach(bibEntry -> { + String journal = bibEntry.getField(StandardField.JOURNAL).orElse(""); + assertTrue(journal.contains(getTestJournal().replace("\"", ""))); + }); + } + + SearchBasedFetcher getFetcher(); + + List getTestAuthors(); + + String getTestJournal(); + + default Integer getTestYear() { + return 2016; + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java index ee1d3a76c4d..dcdbf257848 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java @@ -3,7 +3,9 @@ import java.util.Collections; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; +import org.jabref.logic.importer.SearchBasedFetcher; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.types.StandardEntryType; @@ -11,13 +13,16 @@ import org.jabref.testutils.category.FetcherTest; import kong.unirest.json.JSONObject; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; @FetcherTest -class SpringerFetcherTest { +class SpringerFetcherTest implements SearchBasedFetcherCapabilityTest { SpringerFetcher fetcher; @@ -29,21 +34,21 @@ void setUp() { @DisabledOnCIServer("Disable on CI Server to not hit the API call limit") @Test void searchByQueryFindsEntry() throws Exception { - BibEntry expected = new BibEntry(StandardEntryType.Article); - expected.setField(StandardField.AUTHOR, "Steinmacher, Igor and Gerosa, Marco and Conte, Tayana U. and Redmiles, David F."); - expected.setField(StandardField.DATE, "2019-04-15"); - expected.setField(StandardField.DOI, "10.1007/s10606-018-9335-z"); - expected.setField(StandardField.ISSN, "0925-9724"); - expected.setField(StandardField.JOURNAL, "Computer Supported Cooperative Work (CSCW)"); - expected.setField(StandardField.MONTH, "#apr#"); - expected.setField(StandardField.PAGES, "247--290"); - expected.setField(StandardField.NUMBER, "1-2"); - expected.setField(StandardField.VOLUME, "28"); - expected.setField(StandardField.PUBLISHER, "Springer"); - expected.setField(StandardField.TITLE, "Overcoming Social Barriers When Contributing to Open Source Software Projects"); - expected.setField(StandardField.YEAR, "2019"); - expected.setField(StandardField.FILE, "online:http\\://link.springer.com/openurl/pdf?id=doi\\:10.1007/s10606-018-9335-z:PDF"); - expected.setField(StandardField.ABSTRACT, "An influx of newcomers is critical to the survival, long-term success, and continuity of many Open Source Software (OSS) community-based projects. However, newcomers face many barriers when making their first contribution, leading in many cases to dropouts. Due to the collaborative nature of community-based OSS projects, newcomers may be susceptible to social barriers, such as communication breakdowns and reception issues. In this article, we report a two-phase study aimed at better understanding social barriers faced by newcomers. In the first phase, we qualitatively analyzed the literature and data collected from practitioners to identify barriers that hinder newcomers’ first contribution. We designed a model composed of 58 barriers, including 13 social barriers. In the second phase, based on the barriers model, we developed FLOSScoach, a portal to support newcomers making their first contribution. We evaluated the portal in a diary-based study and found that the portal guided the newcomers and reduced the need for communication. Our results provide insights for communities that want to support newcomers and lay a foundation for building better onboarding tools. The contributions of this paper include identifying and gathering empirical evidence of social barriers faced by newcomers; understanding how social barriers can be reduced or avoided by using a portal that organizes proper information for newcomers (FLOSScoach); presenting guidelines for communities and newcomers on how to reduce or avoid social barriers; and identifying new streams of research."); + BibEntry expected = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "Steinmacher, Igor and Gerosa, Marco and Conte, Tayana U. and Redmiles, David F.") + .withField(StandardField.DATE, "2019-04-15") + .withField(StandardField.DOI, "10.1007/s10606-018-9335-z") + .withField(StandardField.ISSN, "0925-9724") + .withField(StandardField.JOURNAL, "Computer Supported Cooperative Work (CSCW)") + .withField(StandardField.MONTH, "#apr#") + .withField(StandardField.PAGES, "247--290") + .withField(StandardField.NUMBER, "1-2") + .withField(StandardField.VOLUME, "28") + .withField(StandardField.PUBLISHER, "Springer") + .withField(StandardField.TITLE, "Overcoming Social Barriers When Contributing to Open Source Software Projects") + .withField(StandardField.YEAR, "2019") + .withField(StandardField.FILE, "online:http\\://link.springer.com/openurl/pdf?id=doi\\:10.1007/s10606-018-9335-z:PDF") + .withField(StandardField.ABSTRACT, "An influx of newcomers is critical to the survival, long-term success, and continuity of many Open Source Software (OSS) community-based projects. However, newcomers face many barriers when making their first contribution, leading in many cases to dropouts. Due to the collaborative nature of community-based OSS projects, newcomers may be susceptible to social barriers, such as communication breakdowns and reception issues. In this article, we report a two-phase study aimed at better understanding social barriers faced by newcomers. In the first phase, we qualitatively analyzed the literature and data collected from practitioners to identify barriers that hinder newcomers’ first contribution. We designed a model composed of 58 barriers, including 13 social barriers. In the second phase, based on the barriers model, we developed FLOSScoach, a portal to support newcomers making their first contribution. We evaluated the portal in a diary-based study and found that the portal guided the newcomers and reduced the need for communication. Our results provide insights for communities that want to support newcomers and lay a foundation for building better onboarding tools. The contributions of this paper include identifying and gathering empirical evidence of social barriers faced by newcomers; understanding how social barriers can be reduced or avoided by using a portal that organizes proper information for newcomers (FLOSScoach); presenting guidelines for communities and newcomers on how to reduce or avoid social barriers; and identifying new streams of research."); List fetchedEntries = fetcher.performSearch("JabRef Social Barriers Steinmacher"); assertEquals(Collections.singletonList(expected), fetchedEntries); @@ -78,4 +83,65 @@ void testSpringerJSONToBibtex() { void searchByEmptyQueryFindsNothing() throws Exception { assertEquals(Collections.emptyList(), fetcher.performSearch("")); } + + @Test + @Disabled("Year range search is not natively supported by the API, but can be emulated by multiple single year searches.") + @Override + public void supportsYearRangeSearch() throws Exception { + } + + @Test + public void supportsPhraseSearch() throws Exception { + // Normal search should match due to Redmiles, Elissa M., phrase search on the other hand should not find it. + BibEntry expected = new BibEntry(StandardEntryType.InCollection) + .withField(StandardField.AUTHOR, "Booth, Kayla M. and Dosono, Bryan and Redmiles, Elissa M. and Morales, Miraida and Depew, Michael and Farzan, Rosta and Herman, Everett and Trahan, Keith and Tananis, Cindy") + .withField(StandardField.DATE, "2018-01-01") + .withField(StandardField.DOI, "10.1007/978-3-319-78105-1_75") + .withField(StandardField.ISBN, "978-3-319-78104-4") + .withField(StandardField.MONTH, "#jan#") + .withField(StandardField.PUBLISHER, "Springer") + .withField(StandardField.BOOKTITLE, "Transforming Digital Worlds") + .withField(StandardField.TITLE, "Diversifying the Next Generation of Information Scientists: Six Years of Implementation and Outcomes for a Year-Long REU Program") + .withField(StandardField.YEAR, "2018") + .withField(StandardField.FILE, "online:http\\://link.springer.com/openurl/pdf?id=doi\\:10.1007/978-3-319-78105-1_75:PDF") + .withField(StandardField.ABSTRACT, "The iSchool Inclusion Institute (i3) is a Research Experience for Undergraduates (REU) program in the US designed to address underrepresentation in the information sciences. i3 is a year-long, cohort-based program that prepares undergraduate students for graduate school in information science and is rooted in a research and leadership development curriculum. Using data from six years of i3 cohorts, we present in this paper a qualitative and quantitative evaluation of the program in terms of student learning, research production, and graduate school enrollment. We find that students who participate in i3 report significant learning gains in information-science- and graduate-school-related areas and that 52% of i3 participants enroll in graduate school, over 2 $$\\times $$ × the national average. Based on these and additional results, we distill recommendations for future implementations of similar programs to address underrepresentation in information science."); + + List resultPhrase = fetcher.performSearch("name:\"Redmiles David\""); + List result = fetcher.performSearch("name:Redmiles David"); + + // Phrase search should be a subset of the normal search result. + Assertions.assertTrue(result.containsAll(resultPhrase)); + result.removeAll(resultPhrase); + Assertions.assertEquals(Collections.singletonList(expected), result); + } + + @Test + public void supportsBooleanANDSearch() throws Exception { + List resultJustByAuthor = fetcher.performSearch("name:\"Redmiles, David\""); + List result = fetcher.performSearch("name:\"Redmiles, David\" AND journal:Computer Supported Cooperative Work"); + + Assertions.assertTrue(resultJustByAuthor.containsAll(result)); + List allEntriesFromCSCW = result.stream() + .filter(bibEntry -> bibEntry.getField(StandardField.JOURNAL).orElse("").equals("Computer Supported Cooperative Work (CSCW)")) + .collect(Collectors.toList()); + allEntriesFromCSCW.stream() + .map(bibEntry -> bibEntry.getField(StandardField.AUTHOR)) + .filter(Optional::isPresent) + .map(Optional::get).forEach(authorField -> assertTrue(authorField.contains("Redmiles"))); + } + + @Override + public SearchBasedFetcher getFetcher() { + return fetcher; + } + + @Override + public List getTestAuthors() { + return List.of("\"Steinmacher, Igor\"", "\"Gerosa, Marco\"", "\"Conte, Tayana U.\""); + } + + @Override + public String getTestJournal() { + return "\"Clinical Research in Cardiology\""; + } }