Skip to content

Commit

Permalink
Fixing journal abbreviation roundtrip (#9453)
Browse files Browse the repository at this point in the history
* checking for dot-less/medline abbreviations

* fixed regex

* fixed indentation

* Move code for checking dot-less abbreviations into separate method

* Add checks for dot-less abbreviations

* Add test case for dot-less abbreviations

* Update CHANGELOG.md

* Add new static class variable for Pattern

* Add checks for "?" journal names

* Add check requiring abbreviation be more than 2 words
  • Loading branch information
K5qu4r3d authored Dec 15, 2022
1 parent fe16336 commit 55a819f
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue where font size preferences did not apply correctly to preference dialog window and the menu bar. [#8386](https://github.com/JabRef/jabref/issues/8386) and [#9279](https://github.com/JabRef/jabref/issues/9279)
- We fixed an issue that JabRef displayed the wrong group tree after loading. [koppor#637](https://github.com/koppor/jabref/issues/637)
- We fixed an issue when using an unsafe character in the citation key, the auto-linking feature fails to link files. [#9267](https://github.com/JabRef/jabref/issues/9267)
- We fixed an issue where a known journal's medline/dot-less abbreviation does not switch to the full name. [#9370](https://github.com/JabRef/jabref/issues/9370)

### Removed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.h2.mvstore.MVMap;
Expand All @@ -17,6 +18,8 @@
* A repository for all journal abbreviations, including add and find methods.
*/
public class JournalAbbreviationRepository {
static final Pattern DOT = Pattern.compile("\\.");
static final Pattern QUESTION_MARK = Pattern.compile("\\?");

private final MVMap<String, String> fullToAbbreviation;
private final MVMap<String, String> abbreviationToFull;
Expand Down Expand Up @@ -49,14 +52,20 @@ private static boolean isMatchedAbbreviated(String name, Abbreviation abbreviati
* Letters) or its abbreviated form (e.g. Phys. Rev. Lett.).
*/
public boolean isKnownName(String journalName) {
// check for at least one "?"
if (QUESTION_MARK.matcher(journalName).find()) {
return false;
}

String journal = journalName.trim().replaceAll(Matcher.quoteReplacement("\\&"), "&");

boolean isKnown = customAbbreviations.stream().anyMatch(abbreviation -> isMatched(journal, abbreviation));
if (isKnown) {
return true;
}

return fullToAbbreviation.containsKey(journal) || abbreviationToFull.containsKey(journal);
return fullToAbbreviation.containsKey(journal) || abbreviationToFull.containsKey(journal)
|| findDottedAbbrFromDotless(journal).length() > 0;
}

/**
Expand All @@ -66,8 +75,40 @@ public boolean isKnownName(String journalName) {
public boolean isAbbreviatedName(String journalName) {
String journal = journalName.trim();

// journal abbreviation must be at least 2 words
boolean isMoreThanTwoWords = journalName.split(" ").length >= 2;

return customAbbreviations.stream().anyMatch(abbreviation -> isMatchedAbbreviated(journal, abbreviation))
|| abbreviationToFull.containsKey(journal);
|| abbreviationToFull.containsKey(journal)
|| (isMoreThanTwoWords && findDottedAbbrFromDotless(journal).length() > 0);
}

public String findDottedAbbrFromDotless(String journalName) {
// check for at least one "?"
if (QUESTION_MARK.matcher(journalName).find()) {
return "UNKNOWN";
}

String foundKey = "";

// check for a dot-less abbreviation
if (!DOT.matcher(journalName).find()) {
// use dot-less abbr to find full name using regex
String[] journalSplit = journalName.split(" ");

for (int i = 0; i < journalSplit.length; i++) {
String word = journalSplit[i] + "[\\.\\s]*";
journalSplit[i] = word;
}

String joined = String.join("", journalSplit);

foundKey = abbreviationToFull.keySet().stream()
.filter(s -> Pattern.compile(joined).matcher(s).find())
.collect(Collectors.joining());
}

return foundKey;
}

/**
Expand All @@ -87,7 +128,17 @@ public Optional<Abbreviation> get(String input) {

return Optional.ofNullable(fullToAbbreviation.get(journal))
.map(abbreviation -> new Abbreviation(journal, abbreviation))
.or(() -> Optional.ofNullable(abbreviationToFull.get(journal)).map(fullName -> new Abbreviation(fullName, journal)));
.or(() -> {
String abbr = "";

// check for dot-less abbr
if (isKnownName(journal) && isAbbreviatedName(journal)) {
abbr = findDottedAbbrFromDotless(journal);
}

return Optional.ofNullable(abbreviationToFull.get(abbr.equals("") ? journal : abbr))
.map(fullName -> new Abbreviation(fullName, journal));
});
}

public void addCustomAbbreviation(Abbreviation abbreviation) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,4 +252,19 @@ void testUnabbreviateWithJournalExistsAndFJournalExists() {
.withField(StandardField.JOURNAL, "ACS Applied Materials & Interfaces");
assertEquals(expectedAbbreviatedJournalEntry, abbreviatedJournalEntry);
}

@Test
void testJournalDotlessAbbreviation() {
BibDatabase bibDatabase = new BibDatabase();
JournalAbbreviationRepository journalAbbreviationRepository = JournalAbbreviationLoader.loadBuiltInRepository();
UndoableUnabbreviator undoableUnabbreviator = new UndoableUnabbreviator(journalAbbreviationRepository);

BibEntry abbreviatedJournalEntry = new BibEntry(StandardEntryType.Article);
abbreviatedJournalEntry.setField(StandardField.JOURNAL, "ACS Appl Mater Interfaces");

undoableUnabbreviator.unabbreviate(bibDatabase, abbreviatedJournalEntry, StandardField.JOURNAL, new CompoundEdit());
BibEntry expectedAbbreviatedJournalEntry = new BibEntry(StandardEntryType.Article)
.withField(StandardField.JOURNAL, "ACS Applied Materials & Interfaces");
assertEquals(expectedAbbreviatedJournalEntry, abbreviatedJournalEntry);
}
}

0 comments on commit 55a819f

Please sign in to comment.