Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[7hH8ZRTU] Add new regex matcher by group names #654

Merged
merged 3 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ downloadLicenses {

allprojects {
group = 'org.neo4j.procedure'
version = System.getenv('APOC_VERSION') ? System.getenv('APOC_VERSION') : '5.23.0'
version = System.getenv('APOC_VERSION') ? System.getenv('APOC_VERSION') : '5.24.0'
archivesBaseName = 'apoc'
description = """neo4j-apoc-procedures"""
}
Expand Down Expand Up @@ -100,8 +100,8 @@ subprojects {
// neo4jDockerImage system property is used in TestContainerUtil
systemProperties 'user.language': 'en',
'user.country': 'US',
'neo4jDockerImage': project.hasProperty("neo4jDockerEeOverride") ? project.getProperty("neo4jDockerEeOverride") : 'neo4j:5.23.0-enterprise-debian',
'neo4jCommunityDockerImage': project.hasProperty("neo4jDockerCeOverride") ? project.getProperty("neo4jDockerCeOverride") : 'neo4j:5.23.0-debian',
'neo4jDockerImage': project.hasProperty("neo4jDockerEeOverride") ? project.getProperty("neo4jDockerEeOverride") : 'neo4j:5.24.0-enterprise-debian',
'neo4jCommunityDockerImage': project.hasProperty("neo4jDockerCeOverride") ? project.getProperty("neo4jDockerCeOverride") : 'neo4j:5.24.0-debian',
'coreDir': 'core',
'testDockerBundle': project.hasProperty("testDockerBundle") ? true : false

Expand Down Expand Up @@ -169,7 +169,7 @@ apply from: "licenses-source-header.gradle"

ext {
publicDir = "${project.rootDir}"
neo4jVersionEffective = project.hasProperty("neo4jVersionOverride") ? project.getProperty("neo4jVersionOverride") : "5.23.0"
neo4jVersionEffective = project.hasProperty("neo4jVersionOverride") ? project.getProperty("neo4jVersionOverride") : "5.24.0"
testContainersVersion = '1.19.1'
apacheArrowVersion = '15.0.0'
}
48 changes: 48 additions & 0 deletions core/src/main/java/apoc/text/Strings.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand All @@ -42,6 +43,7 @@
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -182,6 +184,52 @@ public List<List<String>> regexGroups(final @Name("text") String text, final @Na
}
}

@UserFunction("apoc.text.regexGroupsByName")
@Description("Returns all groups with their group name matching the given regular expression in the given text.")
public List<Map<String, Object>> regexGroupsByName(
final @Name("text") String text, final @Name("regex") String regex) {
if (text == null || regex == null) {
return Collections.EMPTY_LIST;
} else {
List<Map<String, Object>> result = new ArrayList<>();
try {
final Pattern pattern = Pattern.compile(regex);

final Matcher matcher = pattern.matcher(text);
List<String> namedGroups = getNamedGroups(regex);
while (matcher.find()) {
Map<String, Object> matchGroupResult = new HashMap<>();
matchGroupResult.put("group", matcher.group());
Map<String, Object> matches = new HashMap<>();
for (String groupName : namedGroups) {
String match = matcher.group(groupName);
if (match != null) {
matches.put(groupName, match);
}
}
matchGroupResult.put("matches", matches);
result.add(matchGroupResult);
}
} catch (PatternSyntaxException e) {
throw new RuntimeException("Invalid regex pattern: " + e.getMessage());
}
return result;
}
}

private List<String> getNamedGroups(String text) {
List<String> namedGroups = new ArrayList<>();

Matcher mG = Pattern.compile("\\(\\?<(.+?)>").matcher(text);

while (mG.find()) {
for (int i = 1; i <= mG.groupCount(); i++) {
namedGroups.add(mG.group(i));
}
}
return namedGroups;
}

@UserFunction("apoc.text.join")
@Description("Joins the given `STRING` values using the given delimiter.")
public String join(final @Name("texts") List<String> texts, final @Name("delimiter") String delimiter) {
Expand Down
145 changes: 142 additions & 3 deletions core/src/test/java/apoc/text/StringsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import static java.lang.Math.toIntExact;
import static java.util.Arrays.asList;
import static java.util.Collections.singletonList;
import static junit.framework.TestCase.assertEquals;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.*;

import apoc.util.TestUtil;
Expand All @@ -35,6 +37,7 @@
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
Expand Down Expand Up @@ -67,9 +70,12 @@ public static void teardown() {

@Test
public void testIndexOfSubstring() {
String query = "WITH 'Hello World!' as text\n"
+ "WITH text, size(text) as len, apoc.text.indexOf(text, 'World',3) as index\n"
+ "RETURN substring(text, case index when -1 then len-1 else index end, len) as value;\n";
String query =
"""
WITH 'Hello World!' as text
WITH text, size(text) as len, apoc.text.indexOf(text, 'World',3) as index
RETURN substring(text, case index when -1 then len-1 else index end, len) as value;
""";
testCall(db, query, (row) -> assertEquals("World!", row.get("value")));
}

Expand Down Expand Up @@ -532,6 +538,139 @@ public void testRegexGroups() {
});
}

@Test
public void singleGroupNyName() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public void singleGroupNyName() {
public void singleGroupByName() {

testResult(
db,
"RETURN apoc.text.regexGroupsByName('tenable_asset','(?<firstPart>\\w+)\\_(?<secondPart>\\w+)') AS result",
result -> {
final List<Object> r = Iterators.single(result.columnAs("result"));

List<Map<String, Object>> expected = new ArrayList<>(List.of(Map.of(
"group",
"tenable_asset",
"matches",
Map.of("firstPart", "tenable", "secondPart", "asset"))));
assertTrue(r.containsAll(expected));
});
}

@Test
public void multipleGroupsNyName() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public void multipleGroupsNyName() {
public void multipleGroupsByName() {

testResult(
db,
"RETURN apoc.text.regexGroupsByName('abc <link xxx1>yyy1</link> def <link xxx2>yyy2</link>','<link (?<firstPart>\\\\w+)>(?<secondPart>\\\\w+)</link>') AS result",
result -> {
final List<Object> r = Iterators.single(result.columnAs("result"));

List<Map<String, Object>> expected = new ArrayList<>(List.of(
Map.of(
"group",
"<link xxx1>yyy1</link>",
"matches",
Map.of("firstPart", "xxx1", "secondPart", "yyy1")),
Map.of(
"group",
"<link xxx2>yyy2</link>",
"matches",
Map.of("firstPart", "xxx2", "secondPart", "yyy2"))));
assertTrue(r.containsAll(expected));
});
}

@Test
public void groupNyNameWithMissingFirstGroup() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public void groupNyNameWithMissingFirstGroup() {
public void groupByNameWithMissingFirstGroup() {

testResult(
db,
"RETURN apoc.text.regexGroupsByName('_asset','(?<firstPart>\\w+)?\\_(?<secondPart>\\w+)') AS result",
result -> {
final List<Object> r = Iterators.single(result.columnAs("result"));

List<Map<String, Object>> expected = new ArrayList<>(
List.of(Map.of("group", "_asset", "matches", Map.of("secondPart", "asset"))));
assertTrue(r.containsAll(expected));
});
}

@Test
public void groupNyNameWithMissingSecondGroup() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public void groupNyNameWithMissingSecondGroup() {
public void groupByNameWithMissingSecondGroup() {

testResult(
db,
"RETURN apoc.text.regexGroupsByName('asset_','(?<firstPart>\\w+)?\\_(?<secondPart>\\w+)?') AS result",
result -> {
final List<Object> r = Iterators.single(result.columnAs("result"));

List<Map<String, Object>> expected = new ArrayList<>(
List.of(Map.of("group", "asset_", "matches", Map.of("firstPart", "asset"))));
assertTrue(r.containsAll(expected));
});
}

@Test
public void groupNyNameNoMatches() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public void groupNyNameNoMatches() {
public void groupByNameNoMatches() {

testResult(
db,
"RETURN apoc.text.regexGroupsByName('hello','(?<firstPart>\\w+)?\\_(?<secondPart>\\w+)?') AS result",
result -> {
final List<Object> r = Iterators.single(result.columnAs("result"));

List<Map<String, Object>> expected = new ArrayList<>();
assertTrue(r.containsAll(expected));
});
}

@Test
public void groupNyNameWithInvalidPattern1() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public void groupNyNameWithInvalidPattern1() {
public void groupByNameWithInvalidPattern1() {

QueryExecutionException e = assertThrows(
QueryExecutionException.class,
() -> testCall(
db,
"RETURN apoc.text.regexGroupsByName('asset_','(?<firstPart>\\w+)?\\_(?<firstPart>\\w+)?') AS result",
(r) -> {}));
Throwable except = ExceptionUtils.getRootCause(e);
assertTrue(except instanceof RuntimeException);
assertEquals(
"""
Invalid regex pattern: Named capturing group <firstPart> is already defined near index 32
(?<firstPart>\\w+)?\\_(?<firstPart>\\w+)?
^""",
except.getMessage());
}

@Test
public void groupNyNameWithInvalidPattern2() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public void groupNyNameWithInvalidPattern2() {
public void groupByNameWithInvalidPattern2() {

QueryExecutionException e = assertThrows(
QueryExecutionException.class,
() -> testCall(db, "RETURN apoc.text.regexGroupsByName('asset_','(?<firstPart') AS result", (r) -> {}));
Throwable except = ExceptionUtils.getRootCause(e);
assertTrue(except instanceof RuntimeException);
assertEquals(
"""
Invalid regex pattern: named capturing group is missing trailing '>' near index 12
(?<firstPart""",
except.getMessage());
}

@Test
public void groupNyNameWithNoGroupNames() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public void groupNyNameWithNoGroupNames() {
public void groupByNameWithNoGroupNames() {

testResult(db, "RETURN apoc.text.regexGroupsByName('asset_','(\\w+)?\\_(\\w+)?') AS result", result -> {
final List<Object> r = Iterators.single(result.columnAs("result"));

List<List<Object>> expected = new ArrayList<>();
assertTrue(r.containsAll(expected));
});
}

@Test
public void testRegexGroupsByNameForNPE() {
// throws no exception
testCall(
db,
"RETURN apoc.text.regexGroupsByName(null,'<link (?<firstPart>\\\\w+)>(?<secondPart>\\\\w+)</link>') AS result",
row -> {});
testCall(db, "RETURN apoc.text.regexGroupsByName('abc',null) AS result", row -> {});
}

@Test
public void testRegexGroupsForNPE() {
// throws no exception
Expand Down
1 change: 1 addition & 0 deletions it/src/test/java/apoc/it/core/ApocSplitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ public class ApocSplitTest {
"apoc.text.regreplace",
"apoc.text.split",
"apoc.text.regexGroups",
"apoc.text.regexGroupsByName",
"apoc.text.join",
"apoc.text.clean",
"apoc.text.compareCleaned",
Expand Down
Loading