From 8afa8b6195ffeb5f8f3d5743196992fffffa1390 Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Mon, 5 Dec 2022 13:51:53 -0500 Subject: [PATCH 01/10] checking for dot-less/medline abbreviations --- .../JournalAbbreviationRepository.java | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java index 825bd706e27..0d6bd89cb71 100644 --- a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java +++ b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java @@ -8,6 +8,7 @@ import java.util.Optional; import java.util.Set; import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import org.h2.mvstore.MVMap; @@ -87,7 +88,36 @@ public Optional get(String input) { return Optional.ofNullable(fullToAbbreviation.get(journal)) .map(abbreviation -> new Abbreviation(journal, abbreviation)) - .or(() -> Optional.ofNullable(abbreviationToFull.get(journal)).map(fullName -> new Abbreviation(fullName, journal))); + .or(() -> { + // check for dot-less abbr + Pattern p = Pattern.compile("\\."); + Matcher m = p.matcher(journal); + boolean hasDots = m.find(); + String foundKey = ""; + + if (!hasDots) { + // use dot-less abbr to find full name using regex + String[] journalSplit = journal.split(" "); + + for (int i = 0; i < journalSplit.length; i++) { + String word = "(" + journalSplit[i] + ")+[\\.\\s]*"; + journalSplit[i] = word; + } + + String joined = String.join("", journalSplit); + + foundKey = abbreviationToFull.keySet().stream() + .filter( + s -> Pattern.compile(joined) + .matcher(s) + .find() + ) + .collect(Collectors.joining()); + } + + return Optional.ofNullable(abbreviationToFull.get(foundKey.equals("") ? journal : foundKey)) + .map(fullName -> new Abbreviation(fullName, journal)); + }); } public void addCustomAbbreviation(Abbreviation abbreviation) { From 9dd542b9f8ecac24eae94edbe008886902f9aa48 Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Mon, 5 Dec 2022 23:07:35 -0500 Subject: [PATCH 02/10] fixed regex --- .../jabref/logic/journals/JournalAbbreviationRepository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java index 0d6bd89cb71..313ad9fe85b 100644 --- a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java +++ b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java @@ -100,7 +100,7 @@ public Optional get(String input) { String[] journalSplit = journal.split(" "); for (int i = 0; i < journalSplit.length; i++) { - String word = "(" + journalSplit[i] + ")+[\\.\\s]*"; + String word = journalSplit[i] + "[\\.\\s]*"; journalSplit[i] = word; } From b201559e75e806102feed7ac18e5245f24b31780 Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Mon, 5 Dec 2022 23:12:00 -0500 Subject: [PATCH 03/10] fixed indentation --- .../logic/journals/JournalAbbreviationRepository.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java index 313ad9fe85b..7923c7b44de 100644 --- a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java +++ b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java @@ -107,12 +107,8 @@ public Optional get(String input) { String joined = String.join("", journalSplit); foundKey = abbreviationToFull.keySet().stream() - .filter( - s -> Pattern.compile(joined) - .matcher(s) - .find() - ) - .collect(Collectors.joining()); + .filter(s -> Pattern.compile(joined).matcher(s).find()) + .collect(Collectors.joining()); } return Optional.ofNullable(abbreviationToFull.get(foundKey.equals("") ? journal : foundKey)) From cf634a23f714873d7a5afbb6cd4b28268f8f0e7d Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Tue, 6 Dec 2022 21:47:37 -0500 Subject: [PATCH 04/10] Move code for checking dot-less abbreviations into separate method --- .../JournalAbbreviationRepository.java | 48 +++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java index 7923c7b44de..2eeff22ef7a 100644 --- a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java +++ b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java @@ -71,6 +71,29 @@ public boolean isAbbreviatedName(String journalName) { || abbreviationToFull.containsKey(journal); } + public String findDottedAbbrFromDotless(String journalName) { + String foundKey = ""; + + // check for a dot-less abbreviation + if (!Pattern.compile("\\.").matcher(journalName).find()) { + // use dot-less abbr to find full name using regex + String[] journalSplit = journalName.split(" "); + + for (int i = 0; i < journalSplit.length; i++) { + String word = journalSplit[i] + "[\\.\\s]*"; + journalSplit[i] = word; + } + + String joined = String.join("", journalSplit); + + foundKey = abbreviationToFull.keySet().stream() + .filter(s -> Pattern.compile(joined).matcher(s).find()) + .collect(Collectors.joining()); + } + + return foundKey; + } + /** * Attempts to get the abbreviation of the journal given. * @@ -89,29 +112,14 @@ public Optional get(String input) { return Optional.ofNullable(fullToAbbreviation.get(journal)) .map(abbreviation -> new Abbreviation(journal, abbreviation)) .or(() -> { - // check for dot-less abbr - Pattern p = Pattern.compile("\\."); - Matcher m = p.matcher(journal); - boolean hasDots = m.find(); - String foundKey = ""; - - if (!hasDots) { - // use dot-less abbr to find full name using regex - String[] journalSplit = journal.split(" "); + String abbr = ""; - for (int i = 0; i < journalSplit.length; i++) { - String word = journalSplit[i] + "[\\.\\s]*"; - journalSplit[i] = word; - } - - String joined = String.join("", journalSplit); - - foundKey = abbreviationToFull.keySet().stream() - .filter(s -> Pattern.compile(joined).matcher(s).find()) - .collect(Collectors.joining()); + // check for dot-less abbr + if (isKnownName(journal) && isAbbreviatedName(journal)) { + abbr = findDottedAbbrFromDotless(journal); } - return Optional.ofNullable(abbreviationToFull.get(foundKey.equals("") ? journal : foundKey)) + return Optional.ofNullable(abbreviationToFull.get(abbr.equals("") ? journal : abbr)) .map(fullName -> new Abbreviation(fullName, journal)); }); } From 1bf6cf336a23700853ce7e1ffdb4c02b172b387b Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Tue, 6 Dec 2022 21:48:07 -0500 Subject: [PATCH 05/10] Add checks for dot-less abbreviations --- .../jabref/logic/journals/JournalAbbreviationRepository.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java index 2eeff22ef7a..41001eb8584 100644 --- a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java +++ b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java @@ -57,7 +57,8 @@ public boolean isKnownName(String journalName) { return true; } - return fullToAbbreviation.containsKey(journal) || abbreviationToFull.containsKey(journal); + return fullToAbbreviation.containsKey(journal) || abbreviationToFull.containsKey(journal) + || findDottedAbbrFromDotless(journal).length() > 0; } /** @@ -68,7 +69,7 @@ public boolean isAbbreviatedName(String journalName) { String journal = journalName.trim(); return customAbbreviations.stream().anyMatch(abbreviation -> isMatchedAbbreviated(journal, abbreviation)) - || abbreviationToFull.containsKey(journal); + || abbreviationToFull.containsKey(journal) || findDottedAbbrFromDotless(journal).length() > 0; } public String findDottedAbbrFromDotless(String journalName) { From 9776fe07f4b5593bd397636924e625734b4341d2 Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Tue, 6 Dec 2022 21:50:06 -0500 Subject: [PATCH 06/10] Add test case for dot-less abbreviations --- .../JournalAbbreviationRepositoryTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/test/java/org/jabref/logic/journals/JournalAbbreviationRepositoryTest.java b/src/test/java/org/jabref/logic/journals/JournalAbbreviationRepositoryTest.java index fe724edc847..66f8567709b 100644 --- a/src/test/java/org/jabref/logic/journals/JournalAbbreviationRepositoryTest.java +++ b/src/test/java/org/jabref/logic/journals/JournalAbbreviationRepositoryTest.java @@ -252,4 +252,19 @@ void testUnabbreviateWithJournalExistsAndFJournalExists() { .withField(StandardField.JOURNAL, "ACS Applied Materials & Interfaces"); assertEquals(expectedAbbreviatedJournalEntry, abbreviatedJournalEntry); } + + @Test + void testJournalDotlessAbbreviation() { + BibDatabase bibDatabase = new BibDatabase(); + JournalAbbreviationRepository journalAbbreviationRepository = JournalAbbreviationLoader.loadBuiltInRepository(); + UndoableUnabbreviator undoableUnabbreviator = new UndoableUnabbreviator(journalAbbreviationRepository); + + BibEntry abbreviatedJournalEntry = new BibEntry(StandardEntryType.Article); + abbreviatedJournalEntry.setField(StandardField.JOURNAL, "ACS Appl Mater Interfaces"); + + undoableUnabbreviator.unabbreviate(bibDatabase, abbreviatedJournalEntry, StandardField.JOURNAL, new CompoundEdit()); + BibEntry expectedAbbreviatedJournalEntry = new BibEntry(StandardEntryType.Article) + .withField(StandardField.JOURNAL, "ACS Applied Materials & Interfaces"); + assertEquals(expectedAbbreviatedJournalEntry, abbreviatedJournalEntry); + } } From cf3224d9b259859c2c40560c8c13c8a0449a3184 Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Tue, 13 Dec 2022 12:35:38 -0500 Subject: [PATCH 07/10] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 615c4b88c48..a5b72c78bf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We fixed an issue where journal abbreviations would not abbreviate journal titles with escaped ampersands (\\&). [#8948](https://github.com/JabRef/jabref/issues/8948) - We fixed an issue where font size preferences did not apply correctly to preference dialog window and the menu bar. [#8386](https://github.com/JabRef/jabref/issues/8386) and [#9279](https://github.com/JabRef/jabref/issues/9279) - We fixed an issue when using an unsafe character in the citation key, the auto-linking feature fails to link files. [#9267](https://github.com/JabRef/jabref/issues/9267) +- We fixed an issue where a known journal's medline/dot-less abbreviation does not switch to the full name. [#9370](https://github.com/JabRef/jabref/issues/9370) ### Removed From 490abe22cfab4e1fe8f0e3488cc67362b450c816 Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Wed, 14 Dec 2022 09:46:42 -0500 Subject: [PATCH 08/10] Add new static class variable for Pattern --- .../jabref/logic/journals/JournalAbbreviationRepository.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java index 41001eb8584..343dfe7cd3a 100644 --- a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java +++ b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java @@ -18,6 +18,7 @@ * A repository for all journal abbreviations, including add and find methods. */ public class JournalAbbreviationRepository { + static final Pattern DOT = Pattern.compile("\\."); private final MVMap fullToAbbreviation; private final MVMap abbreviationToFull; @@ -76,7 +77,7 @@ public String findDottedAbbrFromDotless(String journalName) { String foundKey = ""; // check for a dot-less abbreviation - if (!Pattern.compile("\\.").matcher(journalName).find()) { + if (!DOT.matcher(journalName).find()) { // use dot-less abbr to find full name using regex String[] journalSplit = journalName.split(" "); From d47bd5d6bfcaf6592ddff3710dbe0b403e4308f2 Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Wed, 14 Dec 2022 10:17:23 -0500 Subject: [PATCH 09/10] Add checks for "?" journal names --- .../logic/journals/JournalAbbreviationRepository.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java index 343dfe7cd3a..bec6189f3af 100644 --- a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java +++ b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java @@ -19,6 +19,7 @@ */ public class JournalAbbreviationRepository { static final Pattern DOT = Pattern.compile("\\."); + static final Pattern QUESTION_MARK = Pattern.compile("\\?"); private final MVMap fullToAbbreviation; private final MVMap abbreviationToFull; @@ -51,6 +52,11 @@ private static boolean isMatchedAbbreviated(String name, Abbreviation abbreviati * Letters) or its abbreviated form (e.g. Phys. Rev. Lett.). */ public boolean isKnownName(String journalName) { + // check for at least one "?" + if (QUESTION_MARK.matcher(journalName).find()) { + return false; + } + String journal = journalName.trim().replaceAll(Matcher.quoteReplacement("\\&"), "&"); boolean isKnown = customAbbreviations.stream().anyMatch(abbreviation -> isMatched(journal, abbreviation)); @@ -74,6 +80,11 @@ public boolean isAbbreviatedName(String journalName) { } public String findDottedAbbrFromDotless(String journalName) { + // check for at least one "?" + if (QUESTION_MARK.matcher(journalName).find()) { + return "UNKNOWN"; + } + String foundKey = ""; // check for a dot-less abbreviation From cf70e0fd4931132493dc67a50d5d0586ca868a99 Mon Sep 17 00:00:00 2001 From: kkeomalaythong Date: Thu, 15 Dec 2022 10:59:45 -0500 Subject: [PATCH 10/10] Add check requiring abbreviation be more than 2 words --- .../logic/journals/JournalAbbreviationRepository.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java index bec6189f3af..0437e1ae2ae 100644 --- a/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java +++ b/src/main/java/org/jabref/logic/journals/JournalAbbreviationRepository.java @@ -75,8 +75,12 @@ public boolean isKnownName(String journalName) { public boolean isAbbreviatedName(String journalName) { String journal = journalName.trim(); + // journal abbreviation must be at least 2 words + boolean isMoreThanTwoWords = journalName.split(" ").length >= 2; + return customAbbreviations.stream().anyMatch(abbreviation -> isMatchedAbbreviated(journal, abbreviation)) - || abbreviationToFull.containsKey(journal) || findDottedAbbrFromDotless(journal).length() > 0; + || abbreviationToFull.containsKey(journal) + || (isMoreThanTwoWords && findDottedAbbrFromDotless(journal).length() > 0); } public String findDottedAbbrFromDotless(String journalName) {