diff --git a/src/main/java/org/jabref/model/strings/StringUtil.java b/src/main/java/org/jabref/model/strings/StringUtil.java index 37267ce92cf..0e34790549f 100644 --- a/src/main/java/org/jabref/model/strings/StringUtil.java +++ b/src/main/java/org/jabref/model/strings/StringUtil.java @@ -7,6 +7,7 @@ import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; +import java.text.Normalizer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -587,7 +588,12 @@ public static String limitStringLength(String s, int maxLength) { * accept. The basis for replacement is the HashMap UnicodeToReadableCharMap. */ public static String replaceSpecialCharacters(String s) { - String result = s; + /* Some unicode characters can be encoded in multiple ways. This uses NFC + * to re-encode the characters so that these characters can be found. + * Most people expect Unicode to work similar to NFC, i.e., if characters looks the same, it is likely that they are equivalent. + * Hence, if someone debugs issues in the `UNICODE_CHAR_MAP`, they will expect NFC. + * A more holistic approach should likely start with the compatibility equivalence. */ + String result = Normalizer.normalize(s, Normalizer.Form.NFC); for (Map.Entry chrAndReplace : UNICODE_CHAR_MAP.entrySet()) { result = result.replace(chrAndReplace.getKey(), chrAndReplace.getValue()); } diff --git a/src/test/java/org/jabref/logic/citationkeypattern/CitationKeyGeneratorTest.java b/src/test/java/org/jabref/logic/citationkeypattern/CitationKeyGeneratorTest.java index 7305bc43f66..e31a0776772 100644 --- a/src/test/java/org/jabref/logic/citationkeypattern/CitationKeyGeneratorTest.java +++ b/src/test/java/org/jabref/logic/citationkeypattern/CitationKeyGeneratorTest.java @@ -1094,4 +1094,11 @@ void generateKeyCorrectKeyLengthWithAuthNofMthAndUnicode() { assertEquals(4, generateKey(bibEntry, "[auth4_1]").length()); } + + @Test + void generateKeyWithNonNormalizedUnicode() { + BibEntry bibEntry = new BibEntry().withField(StandardField.TITLE, "Modèle et outil pour soutenir la scénarisation pédagogique de MOOC connectivistes"); + + assertEquals("Modele", generateKey(bibEntry, "[veryshorttitle]")); + } } diff --git a/src/test/java/org/jabref/model/strings/StringUtilTest.java b/src/test/java/org/jabref/model/strings/StringUtilTest.java index 3fa176e208b..6eab1079076 100644 --- a/src/test/java/org/jabref/model/strings/StringUtilTest.java +++ b/src/test/java/org/jabref/model/strings/StringUtilTest.java @@ -299,6 +299,11 @@ void testReplaceSpecialCharacters() { assertEquals("aaAeoeeee", StringUtil.replaceSpecialCharacters("åÄöéèë")); } + @Test + void replaceSpecialCharactersWithNonNormalizedUnicode() { + assertEquals("Modele", StringUtil.replaceSpecialCharacters("Modèle")); + } + @Test void testRepeatSpaces() { assertEquals("", StringUtil.repeatSpaces(0));