Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #10498 Create Fetcher and Transformer for ScholarArchive #10549

Merged
merged 13 commits into from
Nov 6, 2023
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package org.jabref.logic.importer.fetcher;

import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.PagedSearchBasedParserFetcher;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.fetcher.transformers.ScholarArchiveQueryTransformer;
import org.jabref.logic.importer.util.JsonReader;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.EntryType;
import org.jabref.model.entry.types.StandardEntryType;

import kong.unirest.json.JSONArray;
import kong.unirest.json.JSONException;
import kong.unirest.json.JSONObject;
import org.apache.http.client.utils.URIBuilder;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ScholarArchiveFetcher implements PagedSearchBasedParserFetcher {

// Define a constant for the fetcher name.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove that comment. Next line states some.

public static final String FETCHER_NAME = "ScholarArchive";

// Initialize the logger for this class.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove that comment. Next line states some.

private static final Logger LOGGER = LoggerFactory.getLogger(ScholarArchiveFetcher.class);

// Define the API URL for ScholarArchive.
private static final String API_URL = "https://scholar.archive.org/search";

/**
* Gets the query URL by luceneQuery and pageNumber.
*
* @param luceneQuery the search query
* @param pageNumber the number of the page indexed from 0
* @return URL
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
*/
@Override
public URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(API_URL);

// Add search query parameter to the URL.
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
uriBuilder.addParameter("q", new ScholarArchiveQueryTransformer().transformLuceneQuery(luceneQuery).orElse(""));

// Add page number and page size parameters to the URL.
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
uriBuilder.addParameter("from", String.valueOf(getPageSize() * pageNumber));
uriBuilder.addParameter("size", String.valueOf(getPageSize()));

// Specify the response format as JSON.
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
uriBuilder.addParameter("format", "json");

// Build the URL.
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
return uriBuilder.build().toURL();
}

/**
* Gets the list of BibEntry by given Json response from scholar archive fetcher API
*
* @return Parser, list of BibEntry
*/
@Override
public Parser getParser() {
return inputStream -> {
// Read the API response.
JSONObject response = JsonReader.toJsonObject(inputStream);

// Parse the JSON response into a list of BibEntry objects.
JSONObject jsonObject = new JSONObject(response);
List<BibEntry> entries = new ArrayList<>();
if (jsonObject.has("results")) {
JSONArray results = jsonObject.getJSONArray("results");
for (int i = 0; i < results.length(); i++) {
JSONObject jsonEntry = results.getJSONObject(i);
BibEntry entry = parseJSONtoBibtex(jsonEntry);
entries.add(entry);
}
}

return entries;
};
}

/**
* Gets he name of fetcher
*
* @return The fetcher name
*/
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
@Override
public String getName() {
return FETCHER_NAME;
}

/**
* Parse from Json object that contain one article to BibEntry
*
* @param jsonEntry the search query
* @return BibEntry
* @throws ParseException
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
*/
private BibEntry parseJSONtoBibtex(JSONObject jsonEntry) throws ParseException {
try {
BibEntry entry = new BibEntry();
EntryType entryType = StandardEntryType.InCollection;
JSONObject biblio = jsonEntry.optJSONObject("biblio");
JSONObject abstracts = jsonEntry.optJSONObject("abstracts");

// publication type
String type = biblio.optString("release_type");
entry.setField(StandardField.TYPE, type);
if (type.toLowerCase().contains("book")) {
entryType = StandardEntryType.Book;
} else if (type.toLowerCase().contains("article")) {
entryType = StandardEntryType.Article;
}
entry.setType(entryType);

entry.setField(StandardField.TITLE, biblio.optString("title"));
entry.setField(StandardField.JOURNAL, biblio.optString("container_name"));
entry.setField(StandardField.DOI, biblio.optString("doi"));
entry.setField(StandardField.ISSUE, biblio.optString("issue"));
entry.setField(StandardField.LANGUAGE, biblio.optString("lang_code"));
entry.setField(StandardField.PUBLISHER, biblio.optString("publisher"));

entry.setField(StandardField.YEAR, String.valueOf(biblio.optInt("release_year")));
entry.setField(StandardField.VOLUME, String.valueOf(biblio.optInt("volume_int")));
entry.setField(StandardField.ABSTRACT, abstracts.optString("body"));

// Date
String dateString = biblio.optString("date");
entry.setField(StandardField.DATE, dateString);

// Authors
if (biblio.has("contrib_names")) {
JSONArray authors = biblio.getJSONArray("contrib_names");
List<String> authorList = new ArrayList<>();
for (int i = 0; i < authors.length(); i++) {
authorList.add(authors.getString(i));
}
AuthorList parsedAuthors = AuthorList.parse(String.join(" and ", authorList));
entry.setField(StandardField.AUTHOR, String.join(" and ", parsedAuthors.getAsFirstLastNamesWithAnd()));
} else {
LOGGER.debug("No author found.");
}

// ISSN
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
if (biblio.has("issns")) {
JSONArray issn = biblio.getJSONArray("issns");
List<String> issnList = new ArrayList<>();
for (int i = 0; i < issn.length(); i++) {
issnList.add(issn.getString(i));
}
entry.setField(StandardField.ISSN, String.join(" ", issnList));
} else {
LOGGER.debug("No issns found.");
}

return entry;
} catch (JSONException exception) {
throw new ParseException("ScholarArchive API JSON format has changed", exception);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package org.jabref.logic.importer.fetcher.transformers;

/**
* This class extends the AbstractQueryTransformer to provide specific implementations
* for transforming standard queries into ones suitable for the Scholar Archive's unique format.
*/
public class ScholarArchiveQueryTransformer extends AbstractQueryTransformer {

/**
* Returns the operator for logical "AND" used in the Scholar Archive query language.
*
* @return A string representing the logical "AND" operator.
*/
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
@Override
protected String getLogicalAndOperator() {
return " AND ";
}

/**
* Returns the operator for logical "OR" used in the Scholar Archive query language.
*
* @return A string representing the logical "OR" operator.
*/
@Override
protected String getLogicalOrOperator() {
return " OR ";
}

/**
* Returns the operator for logical "NOT" used in the Scholar Archive query language.
*
* @return A string representing the logical "NOT" operator.
*/
@Override
protected String getLogicalNotOperator() {
return "NOT ";
}

/**
* Transforms the author query segment into a 'contrib_names' key-value pair for the Scholar Archive query.
*
* @param author the author's name to be searched in the Scholar Archive.
* @return A string query segment representing the author search criterion.
*/
@Override
protected String handleAuthor(String author) {
return createKeyValuePair("contrib_names", author);
}

/**
* Transforms the title query segment into a 'title' key-value pair for the Scholar Archive query.
*
* @param title the title of the work to be searched in the Scholar Archive.
* @return A string query segment representing the title search criterion.
*/
@Override
protected String handleTitle(String title) {
return createKeyValuePair("title", title);
}

/**
* Transforms the journal title query segment into a 'container_name' key-value pair for the Scholar Archive query.
*
* @param journalTitle the name of the journal to be searched in the Scholar Archive.
* @return A string query segment representing the journal title search criterion.
*/
@Override
protected String handleJournal(String journalTitle) {
return createKeyValuePair("container_name", journalTitle);
}

/**
* Handles the year query by formatting it specifically for a range search in the Scholar Archive.
* This method is for an exact year match.
*
* @param year the publication year to be searched in the Scholar Archive.
* @return A string query segment formatted for the year search.
*/
@Override
protected String handleYear(String year) {
return "publication.startDate:[" + year + " TO " + year + "]";
}

/**
* Handles a year range query, transforming it for the Scholar Archive's query format.
* If only a start year is provided, the range will extend to the current year.
*
* @param yearRange the range of years to be searched in the Scholar Archive, usually in the format "startYear-endYear".
* @return A string query segment formatted for the year range search.
*/
@Override
protected String handleYearRange(String yearRange) {
// This method presumably parses the year range into individual components.
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
parseYearRange(yearRange);
if (endYear == Integer.MAX_VALUE) {
// If no specific end year is set, it assumes the range extends to the current year.
return yearRange;
}
// Formats the year range for inclusion in the Scholar Archive query.
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
return "publication.startDate:[" + startYear + " TO " + endYear + "]";
}
}




Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.jabref.logic.importer.fetcher;

import java.util.Collections;
import java.util.List;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Answers;
import org.mockito.Mock;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.mockito.MockitoAnnotations.openMocks;

@FetcherTest
public class ScholarArchiveFetcherTest {
private ScholarArchiveFetcher fetcher;
private BibEntry bibEntry;

@Mock
private ImportFormatPreferences preferences;

@BeforeEach
public void setUp() {
openMocks(this);
fetcher = new ScholarArchiveFetcher();
bibEntry = new BibEntry(StandardEntryType.Article)
.withField(StandardField.TITLE, "Article title")
.withField(StandardField.AUTHOR, "Sam Liu");
}

@Test
public void getNameReturnsCorrectName() {
assertEquals("ScholarArchive", fetcher.getName());
}

@Test
public void getParserReturnsNonNullParser() {
Parser parser = fetcher.getParser();
assertEquals(Parser.class, parser.getClass());
}

@Test
public void performSearchReturnsExpectedResults() throws FetcherException {
Siedlerchr marked this conversation as resolved.
Show resolved Hide resolved
SearchBasedParserFetcher fetcherMock = mock(SearchBasedParserFetcher.class, Answers.RETURNS_DEEP_STUBS);
when(fetcherMock.performSearch("query")).thenReturn(Collections.singletonList(bibEntry));
List<BibEntry> fetchedEntries = fetcher.performSearch("query");
assertEquals(Collections.singletonList(bibEntry), fetchedEntries);
}
}



Loading