From d3f63acffc9c75ecf68bc60db44596a9e821220c Mon Sep 17 00:00:00 2001 From: Gemma Lamont Date: Tue, 13 Aug 2024 09:08:50 +0200 Subject: [PATCH 1/3] [7hH8ZRTU] Add new regex matcher by group names --- core/src/main/java/apoc/text/Strings.java | 48 ++++++ core/src/test/java/apoc/text/StringsTest.java | 145 +++++++++++++++++- .../test/java/apoc/it/core/ApocSplitTest.java | 1 + 3 files changed, 191 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/apoc/text/Strings.java b/core/src/main/java/apoc/text/Strings.java index a84fb746c..8d50fbfca 100644 --- a/core/src/main/java/apoc/text/Strings.java +++ b/core/src/main/java/apoc/text/Strings.java @@ -33,6 +33,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -42,6 +43,7 @@ import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; @@ -182,6 +184,52 @@ public List> regexGroups(final @Name("text") String text, final @Na } } + @UserFunction("apoc.text.regexGroupsByName") + @Description("Returns all groups with their group name matching the given regular expression in the given text.") + public List> regexGroupsByName( + final @Name("text") String text, final @Name("regex") String regex) { + if (text == null || regex == null) { + return Collections.EMPTY_LIST; + } else { + List> result = new ArrayList<>(); + try { + final Pattern pattern = Pattern.compile(regex); + + final Matcher matcher = pattern.matcher(text); + List namedGroups = getNamedGroups(regex); + while (matcher.find()) { + Map matchGroupResult = new HashMap<>(); + matchGroupResult.put("group", matcher.group()); + Map matches = new HashMap<>(); + for (String groupName : namedGroups) { + String match = matcher.group(groupName); + if (match != null) { + matches.put(groupName, match); + } + } + matchGroupResult.put("matches", matches); + result.add(matchGroupResult); + } + } catch (PatternSyntaxException e) { + throw new RuntimeException("Invalid regex pattern: " + e.getMessage()); + } + return result; + } + } + + private List getNamedGroups(String text) { + List namedGroups = new ArrayList<>(); + + Matcher mG = Pattern.compile("\\(\\?<(.+?)>").matcher(text); + + while (mG.find()) { + for (int i = 1; i <= mG.groupCount(); i++) { + namedGroups.add(mG.group(i)); + } + } + return namedGroups; + } + @UserFunction("apoc.text.join") @Description("Joins the given `STRING` values using the given delimiter.") public String join(final @Name("texts") List texts, final @Name("delimiter") String delimiter) { diff --git a/core/src/test/java/apoc/text/StringsTest.java b/core/src/test/java/apoc/text/StringsTest.java index cb3a6c848..8494d8e48 100644 --- a/core/src/test/java/apoc/text/StringsTest.java +++ b/core/src/test/java/apoc/text/StringsTest.java @@ -24,6 +24,8 @@ import static java.lang.Math.toIntExact; import static java.util.Arrays.asList; import static java.util.Collections.singletonList; +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.assertTrue; import static org.junit.Assert.*; import apoc.util.TestUtil; @@ -35,6 +37,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -67,9 +70,12 @@ public static void teardown() { @Test public void testIndexOfSubstring() { - String query = "WITH 'Hello World!' as text\n" - + "WITH text, size(text) as len, apoc.text.indexOf(text, 'World',3) as index\n" - + "RETURN substring(text, case index when -1 then len-1 else index end, len) as value;\n"; + String query = + """ + WITH 'Hello World!' as text + WITH text, size(text) as len, apoc.text.indexOf(text, 'World',3) as index + RETURN substring(text, case index when -1 then len-1 else index end, len) as value; + """; testCall(db, query, (row) -> assertEquals("World!", row.get("value"))); } @@ -532,6 +538,139 @@ public void testRegexGroups() { }); } + @Test + public void singleGroupNyName() { + testResult( + db, + "RETURN apoc.text.regexGroupsByName('tenable_asset','(?\\w+)\\_(?\\w+)') AS result", + result -> { + final List r = Iterators.single(result.columnAs("result")); + + List> expected = new ArrayList<>(List.of(Map.of( + "group", + "tenable_asset", + "matches", + Map.of("firstPart", "tenable", "secondPart", "asset")))); + assertTrue(r.containsAll(expected)); + }); + } + + @Test + public void multipleGroupsNyName() { + testResult( + db, + "RETURN apoc.text.regexGroupsByName('abc yyy1 def yyy2','\\\\w+)>(?\\\\w+)') AS result", + result -> { + final List r = Iterators.single(result.columnAs("result")); + + List> expected = new ArrayList<>(List.of( + Map.of( + "group", + "yyy1", + "matches", + Map.of("firstPart", "xxx1", "secondPart", "yyy1")), + Map.of( + "group", + "yyy2", + "matches", + Map.of("firstPart", "xxx2", "secondPart", "yyy2")))); + assertTrue(r.containsAll(expected)); + }); + } + + @Test + public void groupNyNameWithMissingFirstGroup() { + testResult( + db, + "RETURN apoc.text.regexGroupsByName('_asset','(?\\w+)?\\_(?\\w+)') AS result", + result -> { + final List r = Iterators.single(result.columnAs("result")); + + List> expected = new ArrayList<>( + List.of(Map.of("group", "_asset", "matches", Map.of("secondPart", "asset")))); + assertTrue(r.containsAll(expected)); + }); + } + + @Test + public void groupNyNameWithMissingSecondGroup() { + testResult( + db, + "RETURN apoc.text.regexGroupsByName('asset_','(?\\w+)?\\_(?\\w+)?') AS result", + result -> { + final List r = Iterators.single(result.columnAs("result")); + + List> expected = new ArrayList<>( + List.of(Map.of("group", "asset_", "matches", Map.of("firstPart", "asset")))); + assertTrue(r.containsAll(expected)); + }); + } + + @Test + public void groupNyNameNoMatches() { + testResult( + db, + "RETURN apoc.text.regexGroupsByName('hello','(?\\w+)?\\_(?\\w+)?') AS result", + result -> { + final List r = Iterators.single(result.columnAs("result")); + + List> expected = new ArrayList<>(); + assertTrue(r.containsAll(expected)); + }); + } + + @Test + public void groupNyNameWithInvalidPattern1() { + QueryExecutionException e = assertThrows( + QueryExecutionException.class, + () -> testCall( + db, + "RETURN apoc.text.regexGroupsByName('asset_','(?\\w+)?\\_(?\\w+)?') AS result", + (r) -> {})); + Throwable except = ExceptionUtils.getRootCause(e); + assertTrue(except instanceof RuntimeException); + assertEquals( + """ + Invalid regex pattern: Named capturing group is already defined near index 32 + (?\\w+)?\\_(?\\w+)? + ^""", + except.getMessage()); + } + + @Test + public void groupNyNameWithInvalidPattern2() { + QueryExecutionException e = assertThrows( + QueryExecutionException.class, + () -> testCall(db, "RETURN apoc.text.regexGroupsByName('asset_','(? {})); + Throwable except = ExceptionUtils.getRootCause(e); + assertTrue(except instanceof RuntimeException); + assertEquals( + """ + Invalid regex pattern: named capturing group is missing trailing '>' near index 12 + (? { + final List r = Iterators.single(result.columnAs("result")); + + List> expected = new ArrayList<>(); + assertTrue(r.containsAll(expected)); + }); + } + + @Test + public void testRegexGroupsByNameForNPE() { + // throws no exception + testCall( + db, + "RETURN apoc.text.regexGroupsByName(null,'\\\\w+)>(?\\\\w+)') AS result", + row -> {}); + testCall(db, "RETURN apoc.text.regexGroupsByName('abc',null) AS result", row -> {}); + } + @Test public void testRegexGroupsForNPE() { // throws no exception diff --git a/it/src/test/java/apoc/it/core/ApocSplitTest.java b/it/src/test/java/apoc/it/core/ApocSplitTest.java index 4ac749c1f..6cb332f76 100644 --- a/it/src/test/java/apoc/it/core/ApocSplitTest.java +++ b/it/src/test/java/apoc/it/core/ApocSplitTest.java @@ -403,6 +403,7 @@ public class ApocSplitTest { "apoc.text.regreplace", "apoc.text.split", "apoc.text.regexGroups", + "apoc.text.regexGroupsByName", "apoc.text.join", "apoc.text.clean", "apoc.text.compareCleaned", From c969993eb8f9f9c2170d768246466c62093bdc6f Mon Sep 17 00:00:00 2001 From: Gemma Lamont Date: Wed, 14 Aug 2024 09:34:39 +0200 Subject: [PATCH 2/3] [7hH8ZRTU] Bump version --- build.gradle | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build.gradle b/build.gradle index d650d81fa..a911dad1c 100644 --- a/build.gradle +++ b/build.gradle @@ -14,7 +14,7 @@ downloadLicenses { allprojects { group = 'org.neo4j.procedure' - version = System.getenv('APOC_VERSION') ? System.getenv('APOC_VERSION') : '5.23.0' + version = System.getenv('APOC_VERSION') ? System.getenv('APOC_VERSION') : '5.24.0' archivesBaseName = 'apoc' description = """neo4j-apoc-procedures""" } @@ -100,8 +100,8 @@ subprojects { // neo4jDockerImage system property is used in TestContainerUtil systemProperties 'user.language': 'en', 'user.country': 'US', - 'neo4jDockerImage': project.hasProperty("neo4jDockerEeOverride") ? project.getProperty("neo4jDockerEeOverride") : 'neo4j:5.23.0-enterprise-debian', - 'neo4jCommunityDockerImage': project.hasProperty("neo4jDockerCeOverride") ? project.getProperty("neo4jDockerCeOverride") : 'neo4j:5.23.0-debian', + 'neo4jDockerImage': project.hasProperty("neo4jDockerEeOverride") ? project.getProperty("neo4jDockerEeOverride") : 'neo4j:5.24.0-enterprise-debian', + 'neo4jCommunityDockerImage': project.hasProperty("neo4jDockerCeOverride") ? project.getProperty("neo4jDockerCeOverride") : 'neo4j:5.24.0-debian', 'coreDir': 'core', 'testDockerBundle': project.hasProperty("testDockerBundle") ? true : false @@ -169,7 +169,7 @@ apply from: "licenses-source-header.gradle" ext { publicDir = "${project.rootDir}" - neo4jVersionEffective = project.hasProperty("neo4jVersionOverride") ? project.getProperty("neo4jVersionOverride") : "5.23.0" + neo4jVersionEffective = project.hasProperty("neo4jVersionOverride") ? project.getProperty("neo4jVersionOverride") : "5.24.0" testContainersVersion = '1.19.1' apacheArrowVersion = '15.0.0' } From 343a9422ae3cd66ff5c4c160b87937b1aa6df1ab Mon Sep 17 00:00:00 2001 From: Gemma Lamont Date: Thu, 15 Aug 2024 07:46:24 +0200 Subject: [PATCH 3/3] [7hH8ZRTU] Fix typo --- core/src/test/java/apoc/text/StringsTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/core/src/test/java/apoc/text/StringsTest.java b/core/src/test/java/apoc/text/StringsTest.java index 8494d8e48..f1dfaa4ff 100644 --- a/core/src/test/java/apoc/text/StringsTest.java +++ b/core/src/test/java/apoc/text/StringsTest.java @@ -539,7 +539,7 @@ public void testRegexGroups() { } @Test - public void singleGroupNyName() { + public void singleGroupByName() { testResult( db, "RETURN apoc.text.regexGroupsByName('tenable_asset','(?\\w+)\\_(?\\w+)') AS result", @@ -556,7 +556,7 @@ public void singleGroupNyName() { } @Test - public void multipleGroupsNyName() { + public void multipleGroupsByName() { testResult( db, "RETURN apoc.text.regexGroupsByName('abc yyy1 def yyy2','\\\\w+)>(?\\\\w+)') AS result", @@ -579,7 +579,7 @@ public void multipleGroupsNyName() { } @Test - public void groupNyNameWithMissingFirstGroup() { + public void groupByNameWithMissingFirstGroup() { testResult( db, "RETURN apoc.text.regexGroupsByName('_asset','(?\\w+)?\\_(?\\w+)') AS result", @@ -593,7 +593,7 @@ public void groupNyNameWithMissingFirstGroup() { } @Test - public void groupNyNameWithMissingSecondGroup() { + public void groupByNameWithMissingSecondGroup() { testResult( db, "RETURN apoc.text.regexGroupsByName('asset_','(?\\w+)?\\_(?\\w+)?') AS result", @@ -607,7 +607,7 @@ public void groupNyNameWithMissingSecondGroup() { } @Test - public void groupNyNameNoMatches() { + public void groupByNameNoMatches() { testResult( db, "RETURN apoc.text.regexGroupsByName('hello','(?\\w+)?\\_(?\\w+)?') AS result", @@ -620,7 +620,7 @@ public void groupNyNameNoMatches() { } @Test - public void groupNyNameWithInvalidPattern1() { + public void groupByNameWithInvalidPattern1() { QueryExecutionException e = assertThrows( QueryExecutionException.class, () -> testCall( @@ -638,7 +638,7 @@ public void groupNyNameWithInvalidPattern1() { } @Test - public void groupNyNameWithInvalidPattern2() { + public void groupByNameWithInvalidPattern2() { QueryExecutionException e = assertThrows( QueryExecutionException.class, () -> testCall(db, "RETURN apoc.text.regexGroupsByName('asset_','(? {})); @@ -652,7 +652,7 @@ public void groupNyNameWithInvalidPattern2() { } @Test - public void groupNyNameWithNoGroupNames() { + public void groupByNameWithNoGroupNames() { testResult(db, "RETURN apoc.text.regexGroupsByName('asset_','(\\w+)?\\_(\\w+)?') AS result", result -> { final List r = Iterators.single(result.columnAs("result"));