diff --git a/src/main/java/apoc/text/Strings.java b/src/main/java/apoc/text/Strings.java index 36640554eb..f5044e4994 100644 --- a/src/main/java/apoc/text/Strings.java +++ b/src/main/java/apoc/text/Strings.java @@ -224,7 +224,7 @@ public String urldecode(@Name("text") String text) { } } - private static Pattern cleanPattern = Pattern.compile("[^A-Za-z0-9]+"); + private static Pattern cleanPattern = Pattern.compile("[^\\p{L}\\p{N}]+"); private static Pattern specialCharPattern = Pattern.compile("\\p{IsM}+"); private static String[][] UMLAUT_REPLACEMENTS = { { new String("Ä"), "Ae" }, diff --git a/src/test/java/apoc/text/StringsTest.java b/src/test/java/apoc/text/StringsTest.java index 228a913790..c8214c5db5 100644 --- a/src/test/java/apoc/text/StringsTest.java +++ b/src/test/java/apoc/text/StringsTest.java @@ -157,6 +157,18 @@ public void testJoinWithNull() throws Exception { row -> assertEquals(null, row.get("value"))); } + @Test public void testCleanNonLatin() throws Exception { + testCall(db, + "RETURN apoc.text.clean('А .::Б В=Г Д-Е Ж%Ѕ Ꙁ И І К Л М Н О П+++Р С Т ОУ Ф Х Ѡ Ц Ч,.,.Ш Щ Ъ ЪІ Ь Ѣ Ꙗ Ѥ Ю Ѫ Ѭ Ѧ Ѩ Ѯ Ѱ Ѳ Ѵ Ҁ') AS value", + row -> assertEquals("абвгдежѕꙁиіклмнопрстоуфхѡцчшщъъіьѣꙗѥюѫѭѧѩѯѱѳѵҁ", row.get("value"))); + } + + @Test public void testCleanNonLatinChinese() throws Exception { + testCall(db, + "RETURN apoc.text.clean('桃 .::山= 區 %') AS value", + row -> assertEquals("桃山區", row.get("value"))); + } + @Test public void testCompareCleaned() throws Exception { String string1 = "&N[]eo 4 #J-(3.0) "; String string2 = " neo4j-<30"; @@ -368,7 +380,7 @@ public void testRegexGroupsForNPE() { testCall(db, "RETURN apoc.text.regexGroups(null,'(\\\\w+)') AS result", row -> { }); testCall(db, "RETURN apoc.text.regexGroups('abc',null) AS result", row -> { }); } - + @Test public void testSlug() { testCall(db, "RETURN apoc.text.slug('a-b','-') AS value", row -> assertEquals("a-b", row.get("value")));