-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix #10498 Create Fetcher and Transformer for ScholarArchive #10549
Merged
Merged
Changes from 4 commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
1249d7d
Create Fetcher and Transformer for ScholarArchive
u7156540 18512df
Merge branch 'main' into ScholarArchiveFetcher
liyou969 505bcef
Finish change requirement including code style, testing, some error ,…
u7156540 951c8b6
Merge remote-tracking branch 'origin/ScholarArchiveFetcher' into Scho…
u7156540 c2467cd
Merge remote-tracking branch 'upstream/main' into ScholarArchiveFetcher
Siedlerchr d036a40
Finish and fix archive scholar fetcher
Siedlerchr 4692998
add url
Siedlerchr 272c5fc
fix arch
Siedlerchr 6d510f7
fix test
Siedlerchr 878efac
fix var name
Siedlerchr fcac446
remove comments
Siedlerchr 0d2fe08
add changelog
Siedlerchr e97ea04
fuck this changelog
Siedlerchr File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
171 changes: 171 additions & 0 deletions
171
src/main/java/org/jabref/logic/importer/fetcher/ScholarArchiveFetcher.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.net.MalformedURLException; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.jabref.logic.importer.FetcherException; | ||
import org.jabref.logic.importer.PagedSearchBasedParserFetcher; | ||
import org.jabref.logic.importer.ParseException; | ||
import org.jabref.logic.importer.Parser; | ||
import org.jabref.logic.importer.fetcher.transformers.ScholarArchiveQueryTransformer; | ||
import org.jabref.logic.importer.util.JsonReader; | ||
import org.jabref.model.entry.AuthorList; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.types.EntryType; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
|
||
import kong.unirest.json.JSONArray; | ||
import kong.unirest.json.JSONException; | ||
import kong.unirest.json.JSONObject; | ||
import org.apache.http.client.utils.URIBuilder; | ||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public class ScholarArchiveFetcher implements PagedSearchBasedParserFetcher { | ||
|
||
// Define a constant for the fetcher name. | ||
public static final String FETCHER_NAME = "ScholarArchive"; | ||
|
||
// Initialize the logger for this class. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove that comment. Next line states some. |
||
private static final Logger LOGGER = LoggerFactory.getLogger(ScholarArchiveFetcher.class); | ||
|
||
// Define the API URL for ScholarArchive. | ||
private static final String API_URL = "https://scholar.archive.org/search"; | ||
|
||
/** | ||
* Gets the query URL by luceneQuery and pageNumber. | ||
* | ||
* @param luceneQuery the search query | ||
* @param pageNumber the number of the page indexed from 0 | ||
* @return URL | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
*/ | ||
@Override | ||
public URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException { | ||
URIBuilder uriBuilder = new URIBuilder(API_URL); | ||
|
||
// Add search query parameter to the URL. | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
uriBuilder.addParameter("q", new ScholarArchiveQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); | ||
|
||
// Add page number and page size parameters to the URL. | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
uriBuilder.addParameter("from", String.valueOf(getPageSize() * pageNumber)); | ||
uriBuilder.addParameter("size", String.valueOf(getPageSize())); | ||
|
||
// Specify the response format as JSON. | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
uriBuilder.addParameter("format", "json"); | ||
|
||
// Build the URL. | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return uriBuilder.build().toURL(); | ||
} | ||
|
||
/** | ||
* Gets the list of BibEntry by given Json response from scholar archive fetcher API | ||
* | ||
* @return Parser, list of BibEntry | ||
*/ | ||
@Override | ||
public Parser getParser() { | ||
return inputStream -> { | ||
// Read the API response. | ||
JSONObject response = JsonReader.toJsonObject(inputStream); | ||
|
||
// Parse the JSON response into a list of BibEntry objects. | ||
JSONObject jsonObject = new JSONObject(response); | ||
List<BibEntry> entries = new ArrayList<>(); | ||
if (jsonObject.has("results")) { | ||
JSONArray results = jsonObject.getJSONArray("results"); | ||
for (int i = 0; i < results.length(); i++) { | ||
JSONObject jsonEntry = results.getJSONObject(i); | ||
BibEntry entry = parseJSONtoBibtex(jsonEntry); | ||
entries.add(entry); | ||
} | ||
} | ||
|
||
return entries; | ||
}; | ||
} | ||
|
||
/** | ||
* Gets he name of fetcher | ||
* | ||
* @return The fetcher name | ||
*/ | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@Override | ||
public String getName() { | ||
return FETCHER_NAME; | ||
} | ||
|
||
/** | ||
* Parse from Json object that contain one article to BibEntry | ||
* | ||
* @param jsonEntry the search query | ||
* @return BibEntry | ||
* @throws ParseException | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
*/ | ||
private BibEntry parseJSONtoBibtex(JSONObject jsonEntry) throws ParseException { | ||
try { | ||
BibEntry entry = new BibEntry(); | ||
EntryType entryType = StandardEntryType.InCollection; | ||
JSONObject biblio = jsonEntry.optJSONObject("biblio"); | ||
JSONObject abstracts = jsonEntry.optJSONObject("abstracts"); | ||
|
||
// publication type | ||
String type = biblio.optString("release_type"); | ||
entry.setField(StandardField.TYPE, type); | ||
if (type.toLowerCase().contains("book")) { | ||
entryType = StandardEntryType.Book; | ||
} else if (type.toLowerCase().contains("article")) { | ||
entryType = StandardEntryType.Article; | ||
} | ||
entry.setType(entryType); | ||
|
||
entry.setField(StandardField.TITLE, biblio.optString("title")); | ||
entry.setField(StandardField.JOURNAL, biblio.optString("container_name")); | ||
entry.setField(StandardField.DOI, biblio.optString("doi")); | ||
entry.setField(StandardField.ISSUE, biblio.optString("issue")); | ||
entry.setField(StandardField.LANGUAGE, biblio.optString("lang_code")); | ||
entry.setField(StandardField.PUBLISHER, biblio.optString("publisher")); | ||
|
||
entry.setField(StandardField.YEAR, String.valueOf(biblio.optInt("release_year"))); | ||
entry.setField(StandardField.VOLUME, String.valueOf(biblio.optInt("volume_int"))); | ||
entry.setField(StandardField.ABSTRACT, abstracts.optString("body")); | ||
|
||
// Date | ||
String dateString = biblio.optString("date"); | ||
entry.setField(StandardField.DATE, dateString); | ||
|
||
// Authors | ||
if (biblio.has("contrib_names")) { | ||
JSONArray authors = biblio.getJSONArray("contrib_names"); | ||
List<String> authorList = new ArrayList<>(); | ||
for (int i = 0; i < authors.length(); i++) { | ||
authorList.add(authors.getString(i)); | ||
} | ||
AuthorList parsedAuthors = AuthorList.parse(String.join(" and ", authorList)); | ||
entry.setField(StandardField.AUTHOR, String.join(" and ", parsedAuthors.getAsFirstLastNamesWithAnd())); | ||
} else { | ||
LOGGER.debug("No author found."); | ||
} | ||
|
||
// ISSN | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (biblio.has("issns")) { | ||
JSONArray issn = biblio.getJSONArray("issns"); | ||
List<String> issnList = new ArrayList<>(); | ||
for (int i = 0; i < issn.length(); i++) { | ||
issnList.add(issn.getString(i)); | ||
} | ||
entry.setField(StandardField.ISSN, String.join(" ", issnList)); | ||
} else { | ||
LOGGER.debug("No issns found."); | ||
} | ||
|
||
return entry; | ||
} catch (JSONException exception) { | ||
throw new ParseException("ScholarArchive API JSON format has changed", exception); | ||
} | ||
} | ||
} |
106 changes: 106 additions & 0 deletions
106
...n/java/org/jabref/logic/importer/fetcher/transformers/ScholarArchiveQueryTransformer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
package org.jabref.logic.importer.fetcher.transformers; | ||
|
||
/** | ||
* This class extends the AbstractQueryTransformer to provide specific implementations | ||
* for transforming standard queries into ones suitable for the Scholar Archive's unique format. | ||
*/ | ||
public class ScholarArchiveQueryTransformer extends AbstractQueryTransformer { | ||
|
||
/** | ||
* Returns the operator for logical "AND" used in the Scholar Archive query language. | ||
* | ||
* @return A string representing the logical "AND" operator. | ||
*/ | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@Override | ||
protected String getLogicalAndOperator() { | ||
return " AND "; | ||
} | ||
|
||
/** | ||
* Returns the operator for logical "OR" used in the Scholar Archive query language. | ||
* | ||
* @return A string representing the logical "OR" operator. | ||
*/ | ||
@Override | ||
protected String getLogicalOrOperator() { | ||
return " OR "; | ||
} | ||
|
||
/** | ||
* Returns the operator for logical "NOT" used in the Scholar Archive query language. | ||
* | ||
* @return A string representing the logical "NOT" operator. | ||
*/ | ||
@Override | ||
protected String getLogicalNotOperator() { | ||
return "NOT "; | ||
} | ||
|
||
/** | ||
* Transforms the author query segment into a 'contrib_names' key-value pair for the Scholar Archive query. | ||
* | ||
* @param author the author's name to be searched in the Scholar Archive. | ||
* @return A string query segment representing the author search criterion. | ||
*/ | ||
@Override | ||
protected String handleAuthor(String author) { | ||
return createKeyValuePair("contrib_names", author); | ||
} | ||
|
||
/** | ||
* Transforms the title query segment into a 'title' key-value pair for the Scholar Archive query. | ||
* | ||
* @param title the title of the work to be searched in the Scholar Archive. | ||
* @return A string query segment representing the title search criterion. | ||
*/ | ||
@Override | ||
protected String handleTitle(String title) { | ||
return createKeyValuePair("title", title); | ||
} | ||
|
||
/** | ||
* Transforms the journal title query segment into a 'container_name' key-value pair for the Scholar Archive query. | ||
* | ||
* @param journalTitle the name of the journal to be searched in the Scholar Archive. | ||
* @return A string query segment representing the journal title search criterion. | ||
*/ | ||
@Override | ||
protected String handleJournal(String journalTitle) { | ||
return createKeyValuePair("container_name", journalTitle); | ||
} | ||
|
||
/** | ||
* Handles the year query by formatting it specifically for a range search in the Scholar Archive. | ||
* This method is for an exact year match. | ||
* | ||
* @param year the publication year to be searched in the Scholar Archive. | ||
* @return A string query segment formatted for the year search. | ||
*/ | ||
@Override | ||
protected String handleYear(String year) { | ||
return "publication.startDate:[" + year + " TO " + year + "]"; | ||
} | ||
|
||
/** | ||
* Handles a year range query, transforming it for the Scholar Archive's query format. | ||
* If only a start year is provided, the range will extend to the current year. | ||
* | ||
* @param yearRange the range of years to be searched in the Scholar Archive, usually in the format "startYear-endYear". | ||
* @return A string query segment formatted for the year range search. | ||
*/ | ||
@Override | ||
protected String handleYearRange(String yearRange) { | ||
// This method presumably parses the year range into individual components. | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
parseYearRange(yearRange); | ||
if (endYear == Integer.MAX_VALUE) { | ||
// If no specific end year is set, it assumes the range extends to the current year. | ||
return yearRange; | ||
} | ||
// Formats the year range for inclusion in the Scholar Archive query. | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return "publication.startDate:[" + startYear + " TO " + endYear + "]"; | ||
} | ||
} | ||
|
||
|
||
|
||
|
63 changes: 63 additions & 0 deletions
63
src/test/java/org/jabref/logic/importer/fetcher/ScholarArchiveFetcherTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
import org.jabref.logic.importer.FetcherException; | ||
import org.jabref.logic.importer.ImportFormatPreferences; | ||
import org.jabref.logic.importer.Parser; | ||
import org.jabref.logic.importer.SearchBasedParserFetcher; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
import org.jabref.testutils.category.FetcherTest; | ||
|
||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
import org.mockito.Answers; | ||
import org.mockito.Mock; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
import static org.mockito.MockitoAnnotations.openMocks; | ||
|
||
@FetcherTest | ||
public class ScholarArchiveFetcherTest { | ||
private ScholarArchiveFetcher fetcher; | ||
private BibEntry bibEntry; | ||
|
||
@Mock | ||
private ImportFormatPreferences preferences; | ||
|
||
@BeforeEach | ||
public void setUp() { | ||
openMocks(this); | ||
fetcher = new ScholarArchiveFetcher(); | ||
bibEntry = new BibEntry(StandardEntryType.Article) | ||
.withField(StandardField.TITLE, "Article title") | ||
.withField(StandardField.AUTHOR, "Sam Liu"); | ||
} | ||
|
||
@Test | ||
public void getNameReturnsCorrectName() { | ||
assertEquals("ScholarArchive", fetcher.getName()); | ||
} | ||
|
||
@Test | ||
public void getParserReturnsNonNullParser() { | ||
Parser parser = fetcher.getParser(); | ||
assertEquals(Parser.class, parser.getClass()); | ||
} | ||
|
||
@Test | ||
public void performSearchReturnsExpectedResults() throws FetcherException { | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
SearchBasedParserFetcher fetcherMock = mock(SearchBasedParserFetcher.class, Answers.RETURNS_DEEP_STUBS); | ||
when(fetcherMock.performSearch("query")).thenReturn(Collections.singletonList(bibEntry)); | ||
List<BibEntry> fetchedEntries = fetcher.performSearch("query"); | ||
assertEquals(Collections.singletonList(bibEntry), fetchedEntries); | ||
} | ||
} | ||
|
||
|
||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove that comment. Next line states some.