Skip to content

Commit

Permalink
Merge pull request #8149 from JabRef/fixArxivIdParsing
Browse files Browse the repository at this point in the history
Fix ArXiVIdentifier -- dot is required and not arbitrary character
  • Loading branch information
Siedlerchr authored Oct 17, 2021
2 parents c5b5698 + b3a9663 commit eea27ab
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 227 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- The metadata-to-pdf actions now also embeds the bibfile to the PDF. [#8037](https://github.com/JabRef/jabref/pull/8037)
- The snap was updated to use the core20 base and to use lzo compression for better startup performance [#8109](https://github.com/JabRef/jabref/pull/8109)
- We improved the Drag and Drop behavior in the "Customize Entry Types" Dialog [#6338](https://github.com/JabRef/jabref/issues/6338)
- When determing the URL of an ArXiV eprint, the URL now points to the version [#8149](https://github.com/JabRef/jabref/pull/8149)

### Fixed

Expand Down
9 changes: 7 additions & 2 deletions src/main/java/org/jabref/gui/desktop/JabRefDesktop.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.ArXivIdentifier;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.identifier.Eprint;
import org.jabref.model.util.FileHelper;
import org.jabref.preferences.PreferencesService;

Expand Down Expand Up @@ -84,7 +84,12 @@ public static void openExternalViewer(BibDatabaseContext databaseContext,
openDoi(link);
return;
} else if (StandardField.EPRINT.equals(field)) {
link = Eprint.build(link).map(Eprint::getURIAsASCIIString).orElse(link);
link = ArXivIdentifier.parse(link)
.map(ArXivIdentifier::getExternalURI)
.filter(Optional::isPresent)
.map(Optional::get)
.map(URI::toASCIIString)
.orElse(link);
// should be opened in browser
field = StandardField.URL;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.ArXivIdentifier;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.identifier.Eprint;
import org.jabref.model.entry.identifier.ISBN;
import org.jabref.model.entry.identifier.Identifier;
import org.jabref.model.entry.identifier.MathSciNetId;
Expand All @@ -28,7 +28,7 @@ private static Function<String, Optional<? extends Identifier>> getParserForFiel
} else if (StandardField.ISBN.equals(field)) {
return ISBN::parse;
} else if (StandardField.EPRINT.equals(field)) {
return Eprint::build;
return ArXivIdentifier::parse;
} else if (StandardField.MR_NUMBER.equals(field)) {
return MathSciNetId::parse;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,36 +37,34 @@ public class ArXivIdentifier implements Identifier {

public static Optional<ArXivIdentifier> parse(String value) {
String identifier = value.replaceAll(" ", "");
Pattern identifierPattern = Pattern.compile("(" + ARXIV_PREFIX + ")?\\s?:?\\s?(?<id>\\d{4}.\\d{4,5})(v(?<version>\\d+))?\\s?(\\[(?<classification>\\S+)\\])?");
Pattern identifierPattern = Pattern.compile("(" + ARXIV_PREFIX + ")?\\s?:?\\s?(?<id>\\d{4}\\.\\d{4,5})(v(?<version>\\d+))?\\s?(\\[(?<classification>\\S+)\\])?");
Matcher identifierMatcher = identifierPattern.matcher(identifier);
if (identifierMatcher.matches()) {
String id = identifierMatcher.group("id");
String classification = identifierMatcher.group("classification");
if (classification == null) {
classification = "";
}
String version = identifierMatcher.group("version");
if (version == null) {
version = "";
}
return Optional.of(new ArXivIdentifier(id, version, classification));
return getArXivIdentifier(identifierMatcher);
}

Pattern oldIdentifierPattern = Pattern.compile("(" + ARXIV_PREFIX + ")?\\s?:?\\s?(?<id>(?<classification>[a-z\\-]+(\\.[A-Z]{2})?)/\\d{7})(v(?<version>\\d+))?");
Matcher oldIdentifierMatcher = oldIdentifierPattern.matcher(identifier);
if (oldIdentifierMatcher.matches()) {
String id = oldIdentifierMatcher.group("id");
String classification = oldIdentifierMatcher.group("classification");
String version = oldIdentifierMatcher.group("version");
if (version == null) {
version = "";
}
return Optional.of(new ArXivIdentifier(id, version, classification));
return getArXivIdentifier(oldIdentifierMatcher);
}

return Optional.empty();
}

private static Optional<ArXivIdentifier> getArXivIdentifier(Matcher matcher) {
String id = matcher.group("id");
String classification = matcher.group("classification");
if (classification == null) {
classification = "";
}
String version = matcher.group("version");
if (version == null) {
version = "";
}
return Optional.of(new ArXivIdentifier(id, version, classification));
}

public Optional<String> getClassification() {
if (classification.isEmpty()) {
return Optional.empty();
Expand Down Expand Up @@ -123,7 +121,7 @@ public String getNormalizedWithoutVersion() {
@Override
public Optional<URI> getExternalURI() {
try {
return Optional.of(new URI("https://arxiv.org/abs/" + identifier));
return Optional.of(new URI("https://arxiv.org/abs/" + getNormalized()));
} catch (URISyntaxException e) {
return Optional.empty();
}
Expand Down
142 changes: 0 additions & 142 deletions src/main/java/org/jabref/model/entry/identifier/Eprint.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.PagedSearchBasedFetcher;
import org.jabref.logic.importer.SearchBasedFetcher;
Expand Down Expand Up @@ -223,7 +222,7 @@ void searchEntryByIdWith5Digits() throws Exception {

@Test
void searchWithMalformedIdThrowsException() throws Exception {
assertThrows(FetcherException.class, () -> fetcher.performSearchById("123412345"));
assertEquals(Optional.empty(), fetcher.performSearchById("123412345"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.support.DisabledOnCIServer;
import org.jabref.testutils.category.FetcherTest;

import org.junit.jupiter.api.BeforeEach;
Expand All @@ -30,6 +31,7 @@
import static org.mockito.Mockito.mock;

@FetcherTest
@DisabledOnCIServer("eprint.iacr.org blocks with 500 when there are too many calls from the same IP address.")
public class IacrEprintFetcherTest {

private IacrEprintFetcher fetcher;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.jabref.model.entry.identifier;

import java.net.URI;
import java.util.Optional;

import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -50,6 +51,18 @@ void parseOldIdentifier() throws Exception {
assertEquals(Optional.of(new ArXivIdentifier("math.GT/0309136", "math.GT")), parsed);
}

@Test
public void acceptLegacyEprint() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("astro-ph.GT/1234567");
assertEquals(Optional.of(new ArXivIdentifier("astro-ph.GT/1234567", "astro-ph.GT")), parsed);
}

@Test
public void acceptLegacyMathEprint() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("math/1234567");
assertEquals(Optional.of(new ArXivIdentifier("math/1234567", "math")), parsed);
}

@Test
void parseOldIdentifierWithArXivPrefix() throws Exception {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("arXiv:math.GT/0309136");
Expand Down Expand Up @@ -91,4 +104,71 @@ void parseOldUrlWithVersion() throws Exception {

assertEquals(Optional.of(new ArXivIdentifier("hep-ex/0307015", "1", "hep-ex")), parsed);
}

@Test
void fourDigitDateIsInvalidInLegacyFormat() throws Exception {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("2017/1118");
assertEquals(Optional.empty(), parsed);
}

@Test
public void acceptPlainEprint() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("0706.0001");
assertEquals(Optional.of(new ArXivIdentifier("0706.0001")), parsed);
}

@Test
public void acceptPlainEprintWithVersion() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("0706.0001v1");
assertEquals(Optional.of(new ArXivIdentifier("0706.0001", "v1", "")), parsed);
}

@Test
public void acceptArxivPrefix() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("arXiv:0706.0001v1");
assertEquals(Optional.of(new ArXivIdentifier("0706.0001", "v1", "")), parsed);
}

@Test
public void ignoreLeadingAndTrailingWhitespaces() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse(" 0706.0001v1 ");
assertEquals(Optional.of(new ArXivIdentifier("0706.0001", "v1", "")), parsed);
}

@Test
public void rejectEmbeddedEprint() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("other stuff 0706.0001v1 end");
assertEquals(Optional.empty(), parsed);
}

@Test
public void rejectInvalidEprint() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("https://thisisnouri");
assertEquals(Optional.empty(), parsed);
}

@Test
public void acceptUrlHttpEprint() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("http://arxiv.org/abs/0706.0001v1");
assertEquals(Optional.of(new ArXivIdentifier("0706.0001", "v1", "")), parsed);
}

@Test
public void acceptUrlHttpsEprint() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("https://arxiv.org/abs/0706.0001v1");
assertEquals(Optional.of(new ArXivIdentifier("0706.0001", "v1", "")), parsed);
}

@Test
public void rejectUrlOtherDomainEprint() {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("https://asdf.org/abs/0706.0001v1");
assertEquals(Optional.empty(), parsed);
}

@Test
public void constructCorrectURLForEprint() throws Exception {
Optional<ArXivIdentifier> parsed = ArXivIdentifier.parse("0706.0001v1");
assertEquals(Optional.of(new URI("https://arxiv.org/abs/0706.0001v1")), parsed.get().getExternalURI());
}

}
Loading

0 comments on commit eea27ab

Please sign in to comment.