Skip to content

Commit

Permalink
Fix for MathSciNet search (#11055)
Browse files Browse the repository at this point in the history
* Fix MathSciNet parser

* Conformed with stylechecks

* Fix Objects import missing

* Run  gradle reWriteRun

* Update BibEntry type set (via constructor)

Co-authored-by: Oliver Kopp <[email protected]>

* Apply review changes

* Update value:String to value:Optional<String>

Co-authored-by: Oliver Kopp <[email protected]>

* Change value setting to lambda form

Co-authored-by: Oliver Kopp <[email protected]>

* Update missing Optional.of()

Co-authored-by: Oliver Kopp <[email protected]>

* Update instanceof pattern matching syntax, removed explicit casts

* applied second round of review changes

* Changes as per third review round

* Readd bibtex parsing
fix tests, rename resources

---------

Co-authored-by: Oliver Kopp <[email protected]>
Co-authored-by: Siedlerchr <[email protected]>
  • Loading branch information
3 people authored Mar 21, 2024
1 parent 7bb9339 commit 295035a
Show file tree
Hide file tree
Showing 4 changed files with 1,478 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We fixed an issue where JabRef could not parse absolute file paths from Zotero exports. [#10959](https://github.com/JabRef/jabref/issues/10959)
- We fixed an issue where an exception occured when toggling between "Live" or "Locked" in the internal Document Viewer. [#10935](https://github.com/JabRef/jabref/issues/10935)
- Fixed an issue on Windows where the browser extension reported failure to send an entry to JabRef even though it was sent properly. [JabRef-Browser-Extension#493](https://github.com/JabRef/JabRef-Browser-Extension/issues/493)
- We fixed an issue with where JabRef would throw an error when using MathSciNet search, as it was unable to parse the fetched JSON coreectly. [10996](https://github.com/JabRef/jabref/issues/10996)

### Removed

Expand Down
145 changes: 135 additions & 10 deletions src/main/java/org/jabref/logic/importer/fetcher/MathSciNet.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.jabref.logic.cleanup.DoiCleanup;
import org.jabref.logic.cleanup.FieldFormatterCleanup;
Expand All @@ -29,14 +32,16 @@
import org.jabref.model.entry.field.AMSField;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.field.UnknownField;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.util.DummyFileUpdateMonitor;

import kong.unirest.JsonNode;
import kong.unirest.json.JSONArray;
import kong.unirest.json.JSONException;
import kong.unirest.json.JSONObject;
import org.apache.http.client.utils.URIBuilder;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.jbibtex.TokenMgrException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -45,6 +50,17 @@
*/
public class MathSciNet implements SearchBasedParserFetcher, EntryBasedParserFetcher, IdBasedParserFetcher {
private static final Logger LOGGER = LoggerFactory.getLogger(MathSciNet.class);

private static final Map<StandardField, List<String>> FIELD_MAPPINGS = Map.of(
StandardField.TITLE, List.of("titles", "title"),
StandardField.YEAR, List.of("issue", "issue", "pubYear"),
StandardField.JOURNAL, List.of("issue", "issue", "journal", "shortTitle"),
StandardField.VOLUME, List.of("issue", "issue", "volume"),
StandardField.NUMBER, List.of("issue", "issue", "number"),
StandardField.PAGES, List.of("paging", "paging", "text"),
StandardField.ISSN, List.of("issue", "issue", "journal", "issn")
);

private final ImportFormatPreferences preferences;

public MathSciNet(ImportFormatPreferences preferences) {
Expand Down Expand Up @@ -102,34 +118,144 @@ public URL getUrlForIdentifier(String identifier) throws URISyntaxException, Mal
public Parser getParser() {
return inputStream -> {
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));
List<BibEntry> entries = new ArrayList<>();
BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());

List<BibEntry> entries = new ArrayList<>();
try {
// Depending on the type of query we might get either a json object or directly a json array
JsonNode node = new JsonNode(response);

if (node.isArray()) {
JSONArray entriesArray = node.getArray();
for (int i = 0; i < entriesArray.length(); i++) {
String bibTexFormat = entriesArray.getJSONObject(i).getString("bib");
entries.addAll(bibtexParser.parseEntries(bibTexFormat));
JSONObject entryObject = entriesArray.getJSONObject(i);
if (entryObject.has("bib")) {
String bibTexFormat = entriesArray.getJSONObject(i).getString("bib");
entries.addAll(bibtexParser.parseEntries(bibTexFormat));
}
}
} else {
var element = node.getObject();
JSONArray entriesArray = element.getJSONObject("all").getJSONArray("results");
for (int i = 0; i < entriesArray.length(); i++) {
String bibTexFormat = entriesArray.getJSONObject(i).getString("bibTexFormat");
entries.addAll(bibtexParser.parseEntries(bibTexFormat));

if (element.has("all")) {
JSONArray entriesArray = element.getJSONObject("all").getJSONArray("results");
for (int i = 0; i < entriesArray.length(); i++) {
String bibTexFormat = entriesArray.getJSONObject(i).getString("bibTexFormat");
entries.addAll(bibtexParser.parseEntries(bibTexFormat));
}
} else if (element.has("results")) {
JSONArray entriesArray = element.getJSONArray("results");
for (int i = 0; i < entriesArray.length(); i++) {
JSONObject entryObject = entriesArray.getJSONObject(i);
BibEntry bibEntry = jsonItemToBibEntry(entryObject);
entries.add(bibEntry);
}
}
}
} catch (JSONException | TokenMgrException e) {
} catch (JSONException | ParseException e) {
LOGGER.error("An error occurred while parsing fetched data", e);
throw new ParseException("Error when parsing entry", e);
}
return entries;
};
}

private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException {
try {
BibEntry entry = new BibEntry(StandardEntryType.Article);

// Set the author and keywords field
Optional<String> authors = toAuthors(item.optJSONArray("authors"));
authors.ifPresent(value -> entry.setField(StandardField.AUTHOR, value));

Optional<String> keywords = getKeywords(item.optJSONObject("primaryClass"));
keywords.ifPresent(value -> entry.setField(StandardField.KEYWORDS, value));

// Set the rest of the fields based on the mappings
for (Map.Entry<StandardField, List<String>> mapEntry : FIELD_MAPPINGS.entrySet()) {
StandardField field = mapEntry.getKey();
List<String> path = mapEntry.getValue();
Optional<String> value = getOthers(item, path);
value.ifPresent(v -> entry.setField(field, v));
}

// Handle articleUrl and mrnumber fields separately, as they are non-nested properties in the JSON and can be retrieved as Strings directly
String doi = item.optString("articleUrl", "");
if (!doi.isEmpty()) {
try {
Optional<DOI> parsedDoi = DOI.parse(doi);
parsedDoi.ifPresent(validDoi -> entry.setField(StandardField.DOI, validDoi.getNormalized()));
} catch (IllegalArgumentException e) {
// If DOI parsing fails, use the original DOI string
entry.setField(StandardField.DOI, doi);
}
}

String mrNumber = item.optString("mrnumber", "");
if (!mrNumber.isEmpty()) {
entry.setField(StandardField.MR_NUMBER, mrNumber);
}

return entry;
} catch (JSONException exception) {
throw new ParseException("MathSciNet API JSON format has changed", exception);
}
}

private Optional<String> toAuthors(JSONArray authors) {
if (authors == null) {
return Optional.empty();
}

String authorsString = IntStream.range(0, authors.length())
.mapToObj(authors::getJSONObject)
.map(author -> {
String name = author.optString("name", "");
return fixStringEncoding(name);
})
.collect(Collectors.joining(" and "));

return Optional.of(authorsString);
}

private Optional<String> getKeywords(JSONObject primaryClass) {
if (primaryClass == null) {
return Optional.empty();
}
return Optional.ofNullable(primaryClass.optString("description", null));
}

private Optional<String> getOthers(JSONObject item, List<String> keys) {
Object value = item;
for (String key : keys) {
if (value instanceof JSONObject obj) {
value = obj.opt(key);
} else if (value instanceof JSONArray arr) {
value = arr.opt(Integer.parseInt(key));
} else {
break;
}
}

if (value instanceof String stringValue) {
return Optional.of(fixStringEncoding(stringValue));
} else if (value instanceof Integer intValue) {
return Optional.of(intValue.toString());
}

return Optional.empty();
}

/**
* Method to change character set, to fix output string encoding
* If we don't convert to the correct character set, the parser outputs anomalous characters.
* This is observed in case of non-UTF-8 characters, such as accented characters.
*/

private String fixStringEncoding(String value) {
return new String(value.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}

@Override
public void doPostCleanup(BibEntry entry) {
new MoveFieldCleanup(AMSField.FJOURNAL, StandardField.JOURNAL).cleanup(entry);
Expand All @@ -142,4 +268,3 @@ public void doPostCleanup(BibEntry entry) {
entry.setCommentsBeforeEntry("");
}
}

33 changes: 23 additions & 10 deletions src/test/java/org/jabref/logic/importer/fetcher/MathSciNetTest.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.jabref.logic.importer.fetcher;

import java.util.Collections;
import java.io.InputStream;
import java.util.List;
import java.util.Optional;

Expand All @@ -22,15 +22,13 @@

@FetcherTest
class MathSciNetTest {

MathSciNet fetcher;
private BibEntry ratiuEntry;

@BeforeEach
void setUp() throws Exception {
ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS);
when(importFormatPreferences.bibEntryPreferences().getKeywordSeparator()).thenReturn(',');

fetcher = new MathSciNet(importFormatPreferences);

ratiuEntry = new BibEntry();
Expand All @@ -43,9 +41,9 @@ void setUp() throws Exception {
ratiuEntry.setField(StandardField.YEAR, "2016");
ratiuEntry.setField(StandardField.NUMBER, "3");
ratiuEntry.setField(StandardField.PAGES, "571--589");
ratiuEntry.setField(StandardField.ISSN, "1422-6928,1422-6952");
ratiuEntry.setField(StandardField.KEYWORDS, "76A15 (35A01 35A02 35K61 82D30)");
ratiuEntry.setField(StandardField.MR_NUMBER, "3537908");
ratiuEntry.setField(StandardField.ISSN, "1422-6928,1422-6952");
ratiuEntry.setField(StandardField.DOI, "10.1007/s00021-016-0250-0");
}

Expand All @@ -57,7 +55,7 @@ void searchByEntryFindsEntry() throws Exception {
searchEntry.setField(StandardField.JOURNAL, "fluid");

List<BibEntry> fetchedEntries = fetcher.performSearch(searchEntry);
assertEquals(Collections.singletonList(ratiuEntry), fetchedEntries);
assertEquals(List.of(ratiuEntry), fetchedEntries);
}

@Test
Expand All @@ -67,7 +65,7 @@ void searchByIdInEntryFindsEntry() throws Exception {
searchEntry.setField(StandardField.MR_NUMBER, "3537908");

List<BibEntry> fetchedEntries = fetcher.performSearch(searchEntry);
assertEquals(Collections.singletonList(ratiuEntry), fetchedEntries);
assertEquals(List.of(ratiuEntry), fetchedEntries);
}

@Test
Expand All @@ -79,9 +77,24 @@ void searchByQueryFindsEntry() throws Exception {
}

@Test
@DisabledOnCIServer("CI server has no subscription to MathSciNet and thus gets 401 response")
void searchByIdFindsEntry() throws Exception {
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("3537908");
assertEquals(Optional.of(ratiuEntry), fetchedEntry);
void getParser() throws Exception {
String fileName = "mathscinet.json";
try (InputStream is = MathSciNetTest.class.getResourceAsStream(fileName)) {
List<BibEntry> entries = fetcher.getParser().parseEntries(is);

assertEquals(Optional.of(
new BibEntry(StandardEntryType.Article)
.withField(StandardField.TITLE, "On the weights of general MDS codes")
.withField(StandardField.AUTHOR, "Alderson, Tim L.")
.withField(StandardField.YEAR, "2020")
.withField(StandardField.JOURNAL, "IEEE Trans. Inform. Theory")
.withField(StandardField.VOLUME, "66")
.withField(StandardField.NUMBER, "9")
.withField(StandardField.PAGES, "5414--5418")
.withField(StandardField.MR_NUMBER, "4158623")
.withField(StandardField.KEYWORDS, "Bounds on codes")
.withField(StandardField.DOI, "10.1109/TIT.2020.2977319")
), entries.stream().findFirst());
}
}
}
Loading

0 comments on commit 295035a

Please sign in to comment.