Skip to content

Commit

Permalink
fixes #744 - apoc.text.clean strips utf8 characters (incl. cyrillic/c…
Browse files Browse the repository at this point in the history
…hinese/japanese symbols) (#869)
  • Loading branch information
AngeloBusato authored and jexp committed Aug 8, 2018
1 parent 42e81be commit bdd9646
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/main/java/apoc/text/Strings.java
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ public String urldecode(@Name("text") String text) {
}
}

private static Pattern cleanPattern = Pattern.compile("[^A-Za-z0-9]+");
private static Pattern cleanPattern = Pattern.compile("[^\\p{L}\\p{N}]+");
private static Pattern specialCharPattern = Pattern.compile("\\p{IsM}+");
private static String[][] UMLAUT_REPLACEMENTS = {
{ new String("Ä"), "Ae" },
Expand Down
14 changes: 13 additions & 1 deletion src/test/java/apoc/text/StringsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,18 @@ public void testJoinWithNull() throws Exception {
row -> assertEquals(null, row.get("value")));
}

@Test public void testCleanNonLatin() throws Exception {
testCall(db,
"RETURN apoc.text.clean('А .::Б В=Г Д-Е Ж%Ѕ Ꙁ И І К Л М Н О П+++Р С Т ОУ Ф Х Ѡ Ц Ч,.,.Ш Щ Ъ ЪІ Ь Ѣ Ꙗ Ѥ Ю Ѫ Ѭ Ѧ Ѩ Ѯ Ѱ Ѳ Ѵ Ҁ') AS value",
row -> assertEquals("абвгдежѕꙁиіклмнопрстоуфхѡцчшщъъіьѣꙗѥюѫѭѧѩѯѱѳѵҁ", row.get("value")));
}

@Test public void testCleanNonLatinChinese() throws Exception {
testCall(db,
"RETURN apoc.text.clean('桃 .::山= 區 %') AS value",
row -> assertEquals("桃山區", row.get("value")));
}

@Test public void testCompareCleaned() throws Exception {
String string1 = "&N[]eo 4 #J-(3.0) ";
String string2 = " neo4j-<30";
Expand Down Expand Up @@ -368,7 +380,7 @@ public void testRegexGroupsForNPE() {
testCall(db, "RETURN apoc.text.regexGroups(null,'<link (\\\\w+)>(\\\\w+)</link>') AS result", row -> { });
testCall(db, "RETURN apoc.text.regexGroups('abc',null) AS result", row -> { });
}

@Test
public void testSlug() {
testCall(db, "RETURN apoc.text.slug('a-b','-') AS value", row -> assertEquals("a-b", row.get("value")));
Expand Down

0 comments on commit bdd9646

Please sign in to comment.