diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 5b846545906..8e61b89c285 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4040,16 +4040,17 @@ void testStringFindOperations() { @Test void testExtractRe() { - ColumnVector input = ColumnVector.fromStrings("a1", "b2", "c3", null); - Table expected = new Table.TestBuilder() - .column("a", "b", null, null) - .column("1", "2", null, null) - .build(); - try (Table found = input.extractRe("([ab])(\\d)")) { - assertTablesAreEqual(expected, found); - } - try (Table found = input.extractRe(new RegexProgram("([ab])(\\d)"))) { - assertTablesAreEqual(expected, found); + try (ColumnVector input = ColumnVector.fromStrings("a1", "b2", "c3", null); + Table expected = new Table.TestBuilder() + .column("a", "b", null, null) + .column("1", "2", null, null) + .build()) { + try (Table found = input.extractRe("([ab])(\\d)")) { + assertTablesAreEqual(expected, found); + } + try (Table found = input.extractRe(new RegexProgram("([ab])(\\d)"))) { + assertTablesAreEqual(expected, found); + } } } @@ -4057,36 +4058,37 @@ void testExtractRe() { void testExtractAllRecord() { String pattern = "([ab])(\\d)"; RegexProgram regexProg = new RegexProgram(pattern); - ColumnVector v = ColumnVector.fromStrings("a1", "b2", "c3", null, "a1b1c3a2"); - ColumnVector expectedIdx0 = ColumnVector.fromLists( - new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.STRING)), - Arrays.asList("a1"), - Arrays.asList("b2"), - Arrays.asList(), - null, - Arrays.asList("a1", "b1", "a2")); - ColumnVector expectedIdx12 = ColumnVector.fromLists( - new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.STRING)), - Arrays.asList("a", "1"), - Arrays.asList("b", "2"), - null, - null, - Arrays.asList("a", "1", "b", "1", "a", "2")); - try (ColumnVector resultIdx0 = v.extractAllRecord(pattern, 0); - ColumnVector resultIdx1 = v.extractAllRecord(pattern, 1); - ColumnVector resultIdx2 = v.extractAllRecord(pattern, 2)) { - assertColumnsAreEqual(expectedIdx0, resultIdx0); - assertColumnsAreEqual(expectedIdx12, resultIdx1); - assertColumnsAreEqual(expectedIdx12, resultIdx2); - } - try (ColumnVector resultIdx0 = v.extractAllRecord(regexProg, 0); - ColumnVector resultIdx1 = v.extractAllRecord(regexProg, 1); - ColumnVector resultIdx2 = v.extractAllRecord(regexProg, 2)) { - assertColumnsAreEqual(expectedIdx0, resultIdx0); - assertColumnsAreEqual(expectedIdx12, resultIdx1); - assertColumnsAreEqual(expectedIdx12, resultIdx2); + try (ColumnVector v = ColumnVector.fromStrings("a1", "b2", "c3", null, "a1b1c3a2"); + ColumnVector expectedIdx0 = ColumnVector.fromLists( + new HostColumnVector.ListType(true, + new HostColumnVector.BasicType(true, DType.STRING)), + Arrays.asList("a1"), + Arrays.asList("b2"), + Arrays.asList(), + null, + Arrays.asList("a1", "b1", "a2")); + ColumnVector expectedIdx12 = ColumnVector.fromLists( + new HostColumnVector.ListType(true, + new HostColumnVector.BasicType(true, DType.STRING)), + Arrays.asList("a", "1"), + Arrays.asList("b", "2"), + null, + null, + Arrays.asList("a", "1", "b", "1", "a", "2"))) { + try (ColumnVector resultIdx0 = v.extractAllRecord(pattern, 0); + ColumnVector resultIdx1 = v.extractAllRecord(pattern, 1); + ColumnVector resultIdx2 = v.extractAllRecord(pattern, 2)) { + assertColumnsAreEqual(expectedIdx0, resultIdx0); + assertColumnsAreEqual(expectedIdx12, resultIdx1); + assertColumnsAreEqual(expectedIdx12, resultIdx2); + } + try (ColumnVector resultIdx0 = v.extractAllRecord(regexProg, 0); + ColumnVector resultIdx1 = v.extractAllRecord(regexProg, 1); + ColumnVector resultIdx2 = v.extractAllRecord(regexProg, 2)) { + assertColumnsAreEqual(expectedIdx0, resultIdx0); + assertColumnsAreEqual(expectedIdx12, resultIdx1); + assertColumnsAreEqual(expectedIdx12, resultIdx2); + } } } @@ -4100,34 +4102,35 @@ void testMatchesRe() { RegexProgram regexProg2 = new RegexProgram(patternString2, CaptureGroups.NON_CAPTURE); RegexProgram regexProg3 = new RegexProgram(patternString3, CaptureGroups.NON_CAPTURE); RegexProgram regexProg4 = new RegexProgram(patternString4, CaptureGroups.NON_CAPTURE); - ColumnVector testStrings = ColumnVector.fromStrings("", null, "abCD", "ovér the", - "lazy @dog", "1234", "00:0:00"); - ColumnVector expected1 = ColumnVector.fromBoxedBooleans(false, null, false, false, false, - true, true); - ColumnVector expected2 = ColumnVector.fromBoxedBooleans(false, null, false, false, true, - false, false); - ColumnVector expected3 = ColumnVector.fromBoxedBooleans(true, null, true, true, true, - true, true); - try (ColumnVector res1 = testStrings.matchesRe(patternString1); - ColumnVector res2 = testStrings.matchesRe(patternString2); - ColumnVector res3 = testStrings.matchesRe(patternString3)) { - assertColumnsAreEqual(expected1, res1); - assertColumnsAreEqual(expected2, res2); - assertColumnsAreEqual(expected3, res3); - } - try (ColumnVector res1 = testStrings.matchesRe(regexProg1); - ColumnVector res2 = testStrings.matchesRe(regexProg2); - ColumnVector res3 = testStrings.matchesRe(regexProg3)) { - assertColumnsAreEqual(expected1, res1); - assertColumnsAreEqual(expected2, res2); - assertColumnsAreEqual(expected3, res3); + try (ColumnVector testStrings = ColumnVector.fromStrings("", null, "abCD", "ovér the", + "lazy @dog", "1234", "00:0:00"); + ColumnVector expected1 = ColumnVector.fromBoxedBooleans(false, null, false, false, false, + true, true); + ColumnVector expected2 = ColumnVector.fromBoxedBooleans(false, null, false, false, true, + false, false); + ColumnVector expected3 = ColumnVector.fromBoxedBooleans(true, null, true, true, true, + true, true)) { + try (ColumnVector res1 = testStrings.matchesRe(patternString1); + ColumnVector res2 = testStrings.matchesRe(patternString2); + ColumnVector res3 = testStrings.matchesRe(patternString3)) { + assertColumnsAreEqual(expected1, res1); + assertColumnsAreEqual(expected2, res2); + assertColumnsAreEqual(expected3, res3); + } + try (ColumnVector res1 = testStrings.matchesRe(regexProg1); + ColumnVector res2 = testStrings.matchesRe(regexProg2); + ColumnVector res3 = testStrings.matchesRe(regexProg3)) { + assertColumnsAreEqual(expected1, res1); + assertColumnsAreEqual(expected2, res2); + assertColumnsAreEqual(expected3, res3); + } + assertThrows(AssertionError.class, () -> { + try (ColumnVector res = testStrings.matchesRe(patternString4)) {} + }); + assertThrows(AssertionError.class, () -> { + try (ColumnVector res = testStrings.matchesRe(regexProg4)) {} + }); } - assertThrows(AssertionError.class, () -> { - try (ColumnVector res = testStrings.matchesRe(patternString4)) {} - }); - assertThrows(AssertionError.class, () -> { - try (ColumnVector res = testStrings.matchesRe(regexProg4)) {} - }); } @Test @@ -4140,35 +4143,38 @@ void testContainsRe() { RegexProgram regexProg2 = new RegexProgram(patternString2, CaptureGroups.NON_CAPTURE); RegexProgram regexProg3 = new RegexProgram(patternString3, CaptureGroups.NON_CAPTURE); RegexProgram regexProg4 = new RegexProgram(patternString4, CaptureGroups.NON_CAPTURE); - ColumnVector testStrings = ColumnVector.fromStrings(null, "abCD", "ovér the", - "lazy @dog", "1234", "00:0:00", "abc1234abc", "there @are 2 lazy @dogs"); - ColumnVector expected1 = ColumnVector.fromBoxedBooleans(null, false, false, false, - true, true, true, true); - ColumnVector expected2 = ColumnVector.fromBoxedBooleans(null, false, false, true, - false, false, false, true); - ColumnVector expected3 = ColumnVector.fromBoxedBooleans(null, true, true, true, - true, true, true, true); - try (ColumnVector res1 = testStrings.containsRe(patternString1); - ColumnVector res2 = testStrings.containsRe(patternString2); - ColumnVector res3 = testStrings.containsRe(patternString3)) { - assertColumnsAreEqual(expected1, res1); - assertColumnsAreEqual(expected2, res2); - assertColumnsAreEqual(expected3, res3); + try (ColumnVector testStrings = ColumnVector.fromStrings(null, "abCD", "ovér the", + "lazy @dog", "1234", "00:0:00", "abc1234abc", "there @are 2 lazy @dogs"); + ColumnVector expected1 = ColumnVector.fromBoxedBooleans(null, false, false, false, + true, true, true, true); + ColumnVector expected2 = ColumnVector.fromBoxedBooleans(null, false, false, true, + false, false, false, true); + ColumnVector expected3 = ColumnVector.fromBoxedBooleans(null, true, true, true, + true, true, true, true)) { + try (ColumnVector res1 = testStrings.containsRe(patternString1); + ColumnVector res2 = testStrings.containsRe(patternString2); + ColumnVector res3 = testStrings.containsRe(patternString3)) { + assertColumnsAreEqual(expected1, res1); + assertColumnsAreEqual(expected2, res2); + assertColumnsAreEqual(expected3, res3); + } + try (ColumnVector res1 = testStrings.containsRe(regexProg1); + ColumnVector res2 = testStrings.containsRe(regexProg2); + ColumnVector res3 = testStrings.containsRe(regexProg3)) { + assertColumnsAreEqual(expected1, res1); + assertColumnsAreEqual(expected2, res2); + assertColumnsAreEqual(expected3, res3); + } } - try (ColumnVector res1 = testStrings.containsRe(regexProg1); - ColumnVector res2 = testStrings.containsRe(regexProg2); - ColumnVector res3 = testStrings.containsRe(regexProg3)) { - assertColumnsAreEqual(expected1, res1); - assertColumnsAreEqual(expected2, res2); - assertColumnsAreEqual(expected3, res3); + try (ColumnVector testStringsError = ColumnVector.fromStrings("", null, "abCD", "ovér the", + "lazy @dog", "1234", "00:0:00", "abc1234abc", "there @are 2 lazy @dogs")) { + assertThrows(AssertionError.class, () -> { + try (ColumnVector res = testStringsError.containsRe(patternString4)) {} + }); + assertThrows(AssertionError.class, () -> { + try (ColumnVector res = testStringsError.containsRe(regexProg4)) {} + }); } - ColumnVector testStringsError = ColumnVector.fromStrings("", null, "abCD", "ovér the", - "lazy @dog", "1234", "00:0:00", "abc1234abc", "there @are 2 lazy @dogs"); - assertThrows(AssertionError.class, () -> { - try (ColumnVector res = testStringsError.containsRe(patternString4)) {}}); - assertThrows(AssertionError.class, () -> { - try (ColumnVector res = testStringsError.containsRe(regexProg4)) {} - }); } @Test @@ -4441,30 +4447,32 @@ void testsubstring() { @Test void testExtractListElements() { - ColumnVector v = ColumnVector.fromStrings("Héllo there", "thésé", null, "", "ARé some", "test strings"); - ColumnVector expected = ColumnVector.fromStrings("Héllo", "thésé", null, "", "ARé", "test"); - try (ColumnVector list = v.stringSplitRecord(" "); - ColumnVector result = list.extractListElement(0)) { - assertColumnsAreEqual(expected, result); - } - try (ColumnVector list = v.stringSplitRecord(new RegexProgram(" ", CaptureGroups.NON_CAPTURE)); - ColumnVector result = list.extractListElement(0)) { - assertColumnsAreEqual(expected, result); + try (ColumnVector v = ColumnVector.fromStrings("Héllo there", "thésé", null, "", "ARé some", "test strings"); + ColumnVector expected = ColumnVector.fromStrings("Héllo", "thésé", null, "", "ARé", "test")) { + try (ColumnVector list = v.stringSplitRecord(" "); + ColumnVector result = list.extractListElement(0)) { + assertColumnsAreEqual(expected, result); + } + try (ColumnVector list = v.stringSplitRecord(new RegexProgram(" ", CaptureGroups.NON_CAPTURE)); + ColumnVector result = list.extractListElement(0)) { + assertColumnsAreEqual(expected, result); + } } } @Test void testExtractListElementsV() { - ColumnVector v = ColumnVector.fromStrings("Héllo there", "thésé", null, "", "ARé some", "test strings"); - ColumnVector indices = ColumnVector.fromInts(0, 2, 0, 0, 1, -1); - ColumnVector expected = ColumnVector.fromStrings("Héllo", null, null, "", "some", "strings"); - try (ColumnVector list = v.stringSplitRecord(" "); - ColumnVector result = list.extractListElement(indices)) { - assertColumnsAreEqual(expected, result); - } - try (ColumnVector list = v.stringSplitRecord(new RegexProgram(" ", CaptureGroups.NON_CAPTURE)); - ColumnVector result = list.extractListElement(indices)) { - assertColumnsAreEqual(expected, result); + try (ColumnVector v = ColumnVector.fromStrings("Héllo there", "thésé", null, "", "ARé some", "test strings"); + ColumnVector indices = ColumnVector.fromInts(0, 2, 0, 0, 1, -1); + ColumnVector expected = ColumnVector.fromStrings("Héllo", null, null, "", "some", "strings")) { + try (ColumnVector list = v.stringSplitRecord(" "); + ColumnVector result = list.extractListElement(indices)) { + assertColumnsAreEqual(expected, result); + } + try (ColumnVector list = v.stringSplitRecord(new RegexProgram(" ", CaptureGroups.NON_CAPTURE)); + ColumnVector result = list.extractListElement(indices)) { + assertColumnsAreEqual(expected, result); + } } } @@ -4992,26 +5000,27 @@ void testReverseList() { void testStringSplit() { String pattern = " "; RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); - ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "", - "ARé some things", "test strings here"); - Table expectedSplitLimit2 = new Table.TestBuilder() - .column("Héllo", "thésé", null, "", "ARé", "test") - .column("there all", null, null, null, "some things", "strings here") - .build(); - Table expectedSplitAll = new Table.TestBuilder() - .column("Héllo", "thésé", null, "", "ARé", "test") - .column("there", null, null, null, "some", "strings") - .column("all", null, null, null, "things", "here") - .build(); - try (Table resultSplitLimit2 = v.stringSplit(pattern, 2); - Table resultSplitAll = v.stringSplit(pattern)) { - assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertTablesAreEqual(expectedSplitAll, resultSplitAll); - } - try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2); - Table resultSplitAll = v.stringSplit(regexProg)) { - assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertTablesAreEqual(expectedSplitAll, resultSplitAll); + try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "", + "ARé some things", "test strings here"); + Table expectedSplitLimit2 = new Table.TestBuilder() + .column("Héllo", "thésé", null, "", "ARé", "test") + .column("there all", null, null, null, "some things", "strings here") + .build(); + Table expectedSplitAll = new Table.TestBuilder() + .column("Héllo", "thésé", null, "", "ARé", "test") + .column("there", null, null, null, "some", "strings") + .column("all", null, null, null, "things", "here") + .build()) { + try (Table resultSplitLimit2 = v.stringSplit(pattern, 2); + Table resultSplitAll = v.stringSplit(pattern)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); + } + try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2); + Table resultSplitAll = v.stringSplit(regexProg)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); + } } } @@ -5019,26 +5028,27 @@ void testStringSplit() { void testStringSplitByRegularExpression() { String pattern = "[_ ]"; RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); - ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "", - "ARé some_things", "test_strings_here"); - Table expectedSplitLimit2 = new Table.TestBuilder() - .column("Héllo", "thésé", null, "", "ARé", "test") - .column("there all", null, null, null, "some_things", "strings_here") - .build(); - Table expectedSplitAll = new Table.TestBuilder() - .column("Héllo", "thésé", null, "", "ARé", "test") - .column("there", null, null, null, "some", "strings") - .column("all", null, null, null, "things", "here") - .build(); - try (Table resultSplitLimit2 = v.stringSplit(pattern, 2, true); - Table resultSplitAll = v.stringSplit(pattern, true)) { - assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertTablesAreEqual(expectedSplitAll, resultSplitAll); - } - try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2); - Table resultSplitAll = v.stringSplit(regexProg)) { - assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertTablesAreEqual(expectedSplitAll, resultSplitAll); + try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "", + "ARé some_things", "test_strings_here"); + Table expectedSplitLimit2 = new Table.TestBuilder() + .column("Héllo", "thésé", null, "", "ARé", "test") + .column("there all", null, null, null, "some_things", "strings_here") + .build(); + Table expectedSplitAll = new Table.TestBuilder() + .column("Héllo", "thésé", null, "", "ARé", "test") + .column("there", null, null, null, "some", "strings") + .column("all", null, null, null, "things", "here") + .build()) { + try (Table resultSplitLimit2 = v.stringSplit(pattern, 2, true); + Table resultSplitAll = v.stringSplit(pattern, true)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); + } + try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2); + Table resultSplitAll = v.stringSplit(regexProg)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); + } } } @@ -5046,35 +5056,36 @@ void testStringSplitByRegularExpression() { void testStringSplitRecord() { String pattern = " "; RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); - ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "", - "ARé some things", "test strings here"); - ColumnVector expectedSplitLimit2 = ColumnVector.fromLists( - new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.STRING)), - Arrays.asList("Héllo", "there all"), - Arrays.asList("thésé"), - null, - Arrays.asList(""), - Arrays.asList("ARé", "some things"), - Arrays.asList("test", "strings here")); - ColumnVector expectedSplitAll = ColumnVector.fromLists( - new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.STRING)), - Arrays.asList("Héllo", "there", "all"), - Arrays.asList("thésé"), - null, - Arrays.asList(""), - Arrays.asList("ARé", "some", "things"), - Arrays.asList("test", "strings", "here")); - try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2); - ColumnVector resultSplitAll = v.stringSplitRecord(pattern)) { - assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertColumnsAreEqual(expectedSplitAll, resultSplitAll); - } - try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2); - ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) { - assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "", + "ARé some things", "test strings here"); + ColumnVector expectedSplitLimit2 = ColumnVector.fromLists( + new HostColumnVector.ListType(true, + new HostColumnVector.BasicType(true, DType.STRING)), + Arrays.asList("Héllo", "there all"), + Arrays.asList("thésé"), + null, + Arrays.asList(""), + Arrays.asList("ARé", "some things"), + Arrays.asList("test", "strings here")); + ColumnVector expectedSplitAll = ColumnVector.fromLists( + new HostColumnVector.ListType(true, + new HostColumnVector.BasicType(true, DType.STRING)), + Arrays.asList("Héllo", "there", "all"), + Arrays.asList("thésé"), + null, + Arrays.asList(""), + Arrays.asList("ARé", "some", "things"), + Arrays.asList("test", "strings", "here"))) { + try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2); + ColumnVector resultSplitAll = v.stringSplitRecord(pattern)) { + assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + } + try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2); + ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) { + assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + } } } @@ -5082,35 +5093,36 @@ void testStringSplitRecord() { void testStringSplitRecordByRegularExpression() { String pattern = "[_ ]"; RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); - ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "", - "ARé some_things", "test_strings_here"); - ColumnVector expectedSplitLimit2 = ColumnVector.fromLists( - new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.STRING)), - Arrays.asList("Héllo", "there all"), - Arrays.asList("thésé"), - null, - Arrays.asList(""), - Arrays.asList("ARé", "some_things"), - Arrays.asList("test", "strings_here")); - ColumnVector expectedSplitAll = ColumnVector.fromLists( - new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.STRING)), - Arrays.asList("Héllo", "there", "all"), - Arrays.asList("thésé"), - null, - Arrays.asList(""), - Arrays.asList("ARé", "some", "things"), - Arrays.asList("test", "strings", "here")); - try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true); - ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) { - assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertColumnsAreEqual(expectedSplitAll, resultSplitAll); - } - try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2); - ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) { - assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "", + "ARé some_things", "test_strings_here"); + ColumnVector expectedSplitLimit2 = ColumnVector.fromLists( + new HostColumnVector.ListType(true, + new HostColumnVector.BasicType(true, DType.STRING)), + Arrays.asList("Héllo", "there all"), + Arrays.asList("thésé"), + null, + Arrays.asList(""), + Arrays.asList("ARé", "some_things"), + Arrays.asList("test", "strings_here")); + ColumnVector expectedSplitAll = ColumnVector.fromLists( + new HostColumnVector.ListType(true, + new HostColumnVector.BasicType(true, DType.STRING)), + Arrays.asList("Héllo", "there", "all"), + Arrays.asList("thésé"), + null, + Arrays.asList(""), + Arrays.asList("ARé", "some", "things"), + Arrays.asList("test", "strings", "here"))) { + try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true); + ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) { + assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + } + try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2); + ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) { + assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + } } } @@ -5159,40 +5171,41 @@ void teststringReplaceThrowsException() { @Test void testReplaceRegex() { - ColumnVector v = ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title"); - Scalar repl = Scalar.fromString("Repl"); - String pattern = "[tT]itle"; - RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); - try (ColumnVector actual = v.replaceRegex(pattern, repl); - ColumnVector expected = - ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) { - assertColumnsAreEqual(expected, actual); - } + try (ColumnVector v = ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title"); + Scalar repl = Scalar.fromString("Repl")) { + String pattern = "[tT]itle"; + RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); + try (ColumnVector actual = v.replaceRegex(pattern, repl); + ColumnVector expected = + ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) { + assertColumnsAreEqual(expected, actual); + } - try (ColumnVector actual = v.replaceRegex(pattern, repl, 0)) { - assertColumnsAreEqual(v, actual); - } + try (ColumnVector actual = v.replaceRegex(pattern, repl, 0)) { + assertColumnsAreEqual(v, actual); + } - try (ColumnVector actual = v.replaceRegex(pattern, repl, 1); - ColumnVector expected = - ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) { - assertColumnsAreEqual(expected, actual); - } + try (ColumnVector actual = v.replaceRegex(pattern, repl, 1); + ColumnVector expected = + ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) { + assertColumnsAreEqual(expected, actual); + } - try (ColumnVector actual = v.replaceRegex(regexProg, repl); - ColumnVector expected = - ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) { - assertColumnsAreEqual(expected, actual); - } + try (ColumnVector actual = v.replaceRegex(regexProg, repl); + ColumnVector expected = + ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) { + assertColumnsAreEqual(expected, actual); + } - try (ColumnVector actual = v.replaceRegex(regexProg, repl, 0)) { - assertColumnsAreEqual(v, actual); - } + try (ColumnVector actual = v.replaceRegex(regexProg, repl, 0)) { + assertColumnsAreEqual(v, actual); + } - try (ColumnVector actual = v.replaceRegex(regexProg, repl, 1); - ColumnVector expected = - ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) { - assertColumnsAreEqual(expected, actual); + try (ColumnVector actual = v.replaceRegex(regexProg, repl, 1); + ColumnVector expected = + ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) { + assertColumnsAreEqual(expected, actual); + } } }