Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing journal abbreviation roundtrip #9453

Merged
merged 15 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue where font size preferences did not apply correctly to preference dialog window and the menu bar. [#8386](https://github.com/JabRef/jabref/issues/8386) and [#9279](https://github.com/JabRef/jabref/issues/9279)
- We fixed an issue that JabRef displayed the wrong group tree after loading. [koppor#637](https://github.com/koppor/jabref/issues/637)
- We fixed an issue when using an unsafe character in the citation key, the auto-linking feature fails to link files. [#9267](https://github.com/JabRef/jabref/issues/9267)
- We fixed an issue where a known journal's medline/dot-less abbreviation does not switch to the full name. [#9370](https://github.com/JabRef/jabref/issues/9370)

### Removed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.h2.mvstore.MVMap;
Expand All @@ -17,6 +18,8 @@
* A repository for all journal abbreviations, including add and find methods.
*/
public class JournalAbbreviationRepository {
static final Pattern DOT = Pattern.compile("\\.");
static final Pattern QUESTION_MARK = Pattern.compile("\\?");

private final MVMap<String, String> fullToAbbreviation;
private final MVMap<String, String> abbreviationToFull;
Expand Down Expand Up @@ -49,14 +52,20 @@ private static boolean isMatchedAbbreviated(String name, Abbreviation abbreviati
* Letters) or its abbreviated form (e.g. Phys. Rev. Lett.).
*/
public boolean isKnownName(String journalName) {
// check for at least one "?"
if (QUESTION_MARK.matcher(journalName).find()) {
return false;
}

String journal = journalName.trim().replaceAll(Matcher.quoteReplacement("\\&"), "&");

boolean isKnown = customAbbreviations.stream().anyMatch(abbreviation -> isMatched(journal, abbreviation));
if (isKnown) {
return true;
}

return fullToAbbreviation.containsKey(journal) || abbreviationToFull.containsKey(journal);
return fullToAbbreviation.containsKey(journal) || abbreviationToFull.containsKey(journal)
|| findDottedAbbrFromDotless(journal).length() > 0;
}

/**
Expand All @@ -66,8 +75,40 @@ public boolean isKnownName(String journalName) {
public boolean isAbbreviatedName(String journalName) {
String journal = journalName.trim();

// journal abbreviation must be at least 2 words
boolean isMoreThanTwoWords = journalName.split(" ").length >= 2;

return customAbbreviations.stream().anyMatch(abbreviation -> isMatchedAbbreviated(journal, abbreviation))
|| abbreviationToFull.containsKey(journal);
|| abbreviationToFull.containsKey(journal)
|| (isMoreThanTwoWords && findDottedAbbrFromDotless(journal).length() > 0);
}

public String findDottedAbbrFromDotless(String journalName) {
// check for at least one "?"
if (QUESTION_MARK.matcher(journalName).find()) {
return "UNKNOWN";
}

String foundKey = "";

// check for a dot-less abbreviation
if (!DOT.matcher(journalName).find()) {
// use dot-less abbr to find full name using regex
String[] journalSplit = journalName.split(" ");

for (int i = 0; i < journalSplit.length; i++) {
String word = journalSplit[i] + "[\\.\\s]*";
K5qu4r3d marked this conversation as resolved.
Show resolved Hide resolved
journalSplit[i] = word;
}

String joined = String.join("", journalSplit);

foundKey = abbreviationToFull.keySet().stream()
.filter(s -> Pattern.compile(joined).matcher(s).find())
.collect(Collectors.joining());
}

return foundKey;
}

/**
Expand All @@ -87,7 +128,17 @@ public Optional<Abbreviation> get(String input) {

return Optional.ofNullable(fullToAbbreviation.get(journal))
.map(abbreviation -> new Abbreviation(journal, abbreviation))
.or(() -> Optional.ofNullable(abbreviationToFull.get(journal)).map(fullName -> new Abbreviation(fullName, journal)));
.or(() -> {
String abbr = "";

// check for dot-less abbr
if (isKnownName(journal) && isAbbreviatedName(journal)) {
abbr = findDottedAbbrFromDotless(journal);
}

return Optional.ofNullable(abbreviationToFull.get(abbr.equals("") ? journal : abbr))
.map(fullName -> new Abbreviation(fullName, journal));
});
}

public void addCustomAbbreviation(Abbreviation abbreviation) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,4 +252,19 @@ void testUnabbreviateWithJournalExistsAndFJournalExists() {
.withField(StandardField.JOURNAL, "ACS Applied Materials & Interfaces");
assertEquals(expectedAbbreviatedJournalEntry, abbreviatedJournalEntry);
}

@Test
void testJournalDotlessAbbreviation() {
BibDatabase bibDatabase = new BibDatabase();
JournalAbbreviationRepository journalAbbreviationRepository = JournalAbbreviationLoader.loadBuiltInRepository();
UndoableUnabbreviator undoableUnabbreviator = new UndoableUnabbreviator(journalAbbreviationRepository);

BibEntry abbreviatedJournalEntry = new BibEntry(StandardEntryType.Article);
abbreviatedJournalEntry.setField(StandardField.JOURNAL, "ACS Appl Mater Interfaces");

undoableUnabbreviator.unabbreviate(bibDatabase, abbreviatedJournalEntry, StandardField.JOURNAL, new CompoundEdit());
BibEntry expectedAbbreviatedJournalEntry = new BibEntry(StandardEntryType.Article)
.withField(StandardField.JOURNAL, "ACS Applied Materials & Interfaces");
assertEquals(expectedAbbreviatedJournalEntry, abbreviatedJournalEntry);
}
}