From ffc15c38f97c230f70fb328e569e7ab1400ee23b Mon Sep 17 00:00:00 2001 From: Cindy Jiang Date: Mon, 6 Feb 2023 14:38:01 -0800 Subject: [PATCH 1/7] added splitting regex program apis and tests Signed-off-by: Cindy Jiang --- .../main/java/ai/rapids/cudf/ColumnView.java | 111 +++++++++++++++--- java/src/main/native/src/ColumnViewJni.cpp | 72 ++++++------ .../java/ai/rapids/cudf/ColumnVectorTest.java | 71 +++++++---- 3 files changed, 178 insertions(+), 76 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 4daa3c17cfc..3b2ce941a8b 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2531,12 +2531,34 @@ public final ColumnVector stringLocate(Scalar substring, int start, int end) { * regular expression pattern or just by a string literal delimiter. * @return list of strings columns as a table. */ + @Deprecated public final Table stringSplit(String pattern, int limit, boolean splitByRegex) { + if (splitByRegex) { + return stringSplit(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit); + } else { + return stringSplit(pattern, limit); + } + } + + /** + * Returns a list of columns by splitting each string using the specified regex program. The + * number of rows in the output columns will be the same as the input column. Null entries + * are added for a row where split results have been exhausted. Null input entries result in + * all nulls in the corresponding rows of the output columns. + * + * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern + * for each input string. + * @param limit the maximum size of the list resulting from splitting each input string, + * or -1 for all possible splits. Note that limit = 0 (all possible splits without + * trailing empty strings) and limit = 1 (no split at all) are not supported. + * @return list of strings columns as a table. + */ + public final Table stringSplit(RegexProgram regexProg, int limit) { assert type.equals(DType.STRING) : "column type must be a String"; - assert pattern != null : "pattern is null"; - assert pattern.length() > 0 : "empty pattern is not supported"; + assert regexProg != null : "regex program is null"; assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported"; - return new Table(stringSplit(this.getNativeView(), pattern, limit, splitByRegex)); + return new Table(stringSplit(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(), + regexProg.capture().nativeId, limit, true)); } /** @@ -2550,6 +2572,7 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex) * regular expression pattern or just by a string literal delimiter. * @return list of strings columns as a table. */ + @Deprecated public final Table stringSplit(String pattern, boolean splitByRegex) { return stringSplit(pattern, -1, splitByRegex); } @@ -2567,7 +2590,11 @@ public final Table stringSplit(String pattern, boolean splitByRegex) { * @return list of strings columns as a table. */ public final Table stringSplit(String delimiter, int limit) { - return stringSplit(delimiter, limit, false); + assert type.equals(DType.STRING) : "column type must be a String"; + assert delimiter != null : "delimiter is null"; + assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported"; + return new Table(stringSplit(this.getNativeView(), delimiter, RegexFlag.DEFAULT.nativeId, + CaptureGroups.NON_CAPTURE.nativeId, limit, false)); } /** @@ -2580,7 +2607,21 @@ public final Table stringSplit(String delimiter, int limit) { * @return list of strings columns as a table. */ public final Table stringSplit(String delimiter) { - return stringSplit(delimiter, -1, false); + return stringSplit(delimiter, -1); + } + + /** + * Returns a list of columns by splitting each string using the specified regex program with + * string literal delimiter. The number of rows in the output columns will be the same as the + * input column. Null entries are added for a row where split results have been exhausted. + * Null input entries result in all nulls in the corresponding rows of the output columns. + * + * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern + * for each input string. + * @return list of strings columns as a table. + */ + public final Table stringSplit(RegexProgram regexProg) { + return stringSplit(regexProg, -1); } /** @@ -2595,13 +2636,33 @@ public final Table stringSplit(String delimiter) { * regular expression pattern or just by a string literal delimiter. * @return a LIST column of string elements. */ + @Deprecated public final ColumnVector stringSplitRecord(String pattern, int limit, boolean splitByRegex) { + if (splitByRegex) { + return stringSplitRecord(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit); + } else { + return stringSplitRecord(pattern, limit); + } + } + + /** + * Returns a column that are lists of strings in which each list is made by splitting the + * corresponding input string using the specified regex program pattern. + * + * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern + * for each input string. + * @param limit the maximum size of the list resulting from splitting each input string, + * or -1 for all possible splits. Note that limit = 0 (all possible splits without + * trailing empty strings) and limit = 1 (no split at all) are not supported. + * @return a LIST column of string elements. + */ + public final ColumnVector stringSplitRecord(RegexProgram regexProg, int limit) { assert type.equals(DType.STRING) : "column type must be String"; - assert pattern != null : "pattern is null"; - assert pattern.length() > 0 : "empty pattern is not supported"; + assert regexProg != null : "regex program is null"; assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported"; return new ColumnVector( - stringSplitRecord(this.getNativeView(), pattern, limit, splitByRegex)); + stringSplitRecord(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(), + regexProg.capture().nativeId, limit, true)); } /** @@ -2613,6 +2674,7 @@ public final ColumnVector stringSplitRecord(String pattern, int limit, boolean s * regular expression pattern or just by a string literal delimiter. * @return a LIST column of string elements. */ + @Deprecated public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex) { return stringSplitRecord(pattern, -1, splitByRegex); } @@ -2628,7 +2690,12 @@ public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex * @return a LIST column of string elements. */ public final ColumnVector stringSplitRecord(String delimiter, int limit) { - return stringSplitRecord(delimiter, limit, false); + assert type.equals(DType.STRING) : "column type must be String"; + assert delimiter != null : "delimiter is null"; + assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported"; + return new ColumnVector( + stringSplitRecord(this.getNativeView(), delimiter, RegexFlag.DEFAULT.nativeId, + CaptureGroups.NON_CAPTURE.nativeId, limit, false)); } /** @@ -2639,7 +2706,19 @@ public final ColumnVector stringSplitRecord(String delimiter, int limit) { * @return a LIST column of string elements. */ public final ColumnVector stringSplitRecord(String delimiter) { - return stringSplitRecord(delimiter, -1, false); + return stringSplitRecord(delimiter, -1); + } + + /** + * Returns a column that are lists of strings in which each list is made by splitting the + * corresponding input string using the specified regex program with string literal delimiter. + * + * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern + * for each input string. + * @return a LIST column of string elements. + */ + public final ColumnVector stringSplitRecord(RegexProgram regexProg) { + return stringSplitRecord(regexProg, -1); } /** @@ -3965,14 +4044,16 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle * * @param nativeHandle native handle of the input strings column that being operated on. * @param pattern UTF-8 encoded string identifying the split pattern for each input string. + * @param flags regex flags setting. + * @param capture capture groups setting. * @param limit the maximum size of the list resulting from splitting each input string, * or -1 for all possible splits. Note that limit = 0 (all possible splits without * trailing empty strings) and limit = 1 (no split at all) are not supported. * @param splitByRegex a boolean flag indicating whether the input strings will be split by a * regular expression pattern or just by a string literal delimiter. */ - private static native long[] stringSplit(long nativeHandle, String pattern, int limit, - boolean splitByRegex); + private static native long[] stringSplit(long nativeHandle, String pattern, int flags, + int capture, int limit, boolean splitByRegex); /** * Returns a column that are lists of strings in which each list is made by splitting the @@ -3980,14 +4061,16 @@ private static native long[] stringSplit(long nativeHandle, String pattern, int * * @param nativeHandle native handle of the input strings column that being operated on. * @param pattern UTF-8 encoded string identifying the split pattern for each input string. + * @param flags regex flags setting. + * @param capture capture groups setting. * @param limit the maximum size of the list resulting from splitting each input string, * or -1 for all possible splits. Note that limit = 0 (all possible splits without * trailing empty strings) and limit = 1 (no split at all) are not supported. * @param splitByRegex a boolean flag indicating whether the input strings will be split by a * regular expression pattern or just by a string literal delimiter. */ - private static native long stringSplitRecord(long nativeHandle, String pattern, int limit, - boolean splitByRegex); + private static native long stringSplitRecord(long nativeHandle, String pattern, int flags, + int capture, int limit, boolean splitByRegex); /** * Native method to calculate substring from a given string column. 0 indexing. diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index bfa3fa0a522..efd6a46b9f2 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -679,11 +679,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI CATCH_STD(env, 0); } -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *env, jclass, - jlong input_handle, - jstring pattern_obj, - jint limit, - jboolean split_by_regex) { +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit( + JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags, + jint capture_groups, jint limit, jboolean split_by_regex) { JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); if (limit == 0 || limit == 1) { @@ -697,31 +695,28 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv * try { cudf::jni::auto_set_device(env); - auto const input = reinterpret_cast(input_handle); - auto const strs_input = cudf::strings_column_view{*input}; - + auto const column_view = reinterpret_cast(input_handle); + auto const strings_column = cudf::strings_column_view{*column_view}; auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj); - if (pattern_jstr.is_empty()) { - // Java's split API produces different behaviors than cudf when splitting with empty - // pattern. - JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0); - } - auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit; - auto result = split_by_regex ? - cudf::strings::split_re(strs_input, pattern, max_split) : - cudf::strings::split(strs_input, cudf::string_scalar{pattern}, max_split); - return cudf::jni::convert_table_for_return(env, std::move(result)); + if (split_by_regex) { + auto const flags = static_cast(regex_flags); + auto const groups = static_cast(capture_groups); + auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups); + auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split); + return cudf::jni::convert_table_for_return(env, std::move(result)); + } else { + auto result = cudf::strings::split(strings_column, cudf::string_scalar{pattern}, max_split); + return cudf::jni::convert_table_for_return(env, std::move(result)); + } } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv *env, jclass, - jlong input_handle, - jstring pattern_obj, - jint limit, - jboolean split_by_regex) { +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord( + JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags, + jint capture_groups, jint limit, jboolean split_by_regex) { JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); if (limit == 0 || limit == 1) { @@ -735,23 +730,26 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv try { cudf::jni::auto_set_device(env); - auto const input = reinterpret_cast(input_handle); - auto const strs_input = cudf::strings_column_view{*input}; - + auto const column_view = reinterpret_cast(input_handle); + auto const strings_column = cudf::strings_column_view{*column_view}; auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj); - if (pattern_jstr.is_empty()) { - // Java's split API produces different behaviors than cudf when splitting with empty - // pattern. - JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0); - } - auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit; - auto result = - split_by_regex ? - cudf::strings::split_record_re(strs_input, pattern, max_split) : - cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split); - return release_as_jlong(result); + if (split_by_regex) { + auto const flags = static_cast(regex_flags); + auto const groups = static_cast(capture_groups); + auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups); + auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split); + return release_as_jlong(result); + } else { + auto result = cudf::strings::split_record(strings_column, cudf::string_scalar{pattern}, max_split); + return release_as_jlong(result); + } + // auto result = + // split_by_regex ? + // cudf::strings::split_record_re(strs_input, pattern, max_split) : + // cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split); + // return release_as_jlong(result); } CATCH_STD(env, 0); } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 46264b7d668..c3af5eb7b68 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4989,29 +4989,37 @@ void testReverseList() { @Test void testStringSplit() { String pattern = " "; + RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "", - "ARé some things", "test strings here"); + "ARé some things", "test strings here"); Table expectedSplitLimit2 = new Table.TestBuilder() - .column("Héllo", "thésé", null, "", "ARé", "test") - .column("there all", null, null, null, "some things", "strings here") - .build(); + .column("Héllo", "thésé", null, "", "ARé", "test") + .column("there all", null, null, null, "some things", "strings here") + .build(); Table expectedSplitAll = new Table.TestBuilder() - .column("Héllo", "thésé", null, "", "ARé", "test") - .column("there", null, null, null, "some", "strings") - .column("all", null, null, null, "things", "here") - .build(); - Table resultSplitLimit2 = v.stringSplit(pattern, 2); - Table resultSplitAll = v.stringSplit(pattern)) { - assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertTablesAreEqual(expectedSplitAll, resultSplitAll); + .column("Héllo", "thésé", null, "", "ARé", "test") + .column("there", null, null, null, "some", "strings") + .column("all", null, null, null, "things", "here") + .build()) { + try (Table resultSplitLimit2 = v.stringSplit(pattern, 2); + Table resultSplitAll = v.stringSplit(pattern)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); + } + try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2); + Table resultSplitAll = v.stringSplit(regexProg)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); + } } } @Test void testStringSplitByRegularExpression() { String pattern = "[_ ]"; + RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "", - "ARé some_things", "test_strings_here"); + "ARé some_things", "test_strings_here"); Table expectedSplitLimit2 = new Table.TestBuilder() .column("Héllo", "thésé", null, "", "ARé", "test") .column("there all", null, null, null, "some_things", "strings_here") @@ -5020,11 +5028,17 @@ void testStringSplitByRegularExpression() { .column("Héllo", "thésé", null, "", "ARé", "test") .column("there", null, null, null, "some", "strings") .column("all", null, null, null, "things", "here") - .build(); - Table resultSplitLimit2 = v.stringSplit(pattern, 2, true); - Table resultSplitAll = v.stringSplit(pattern, true)) { - assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertTablesAreEqual(expectedSplitAll, resultSplitAll); + .build()) { + try (Table resultSplitLimit2 = v.stringSplit(pattern, 2, true); + Table resultSplitAll = v.stringSplit(pattern, true)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); + } + try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2); + Table resultSplitAll = v.stringSplit(regexProg)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); + } } } @@ -5032,7 +5046,7 @@ void testStringSplitByRegularExpression() { void testStringSplitRecord() { String pattern = " "; try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "", - "ARé some things", "test strings here"); + "ARé some things", "test strings here"); ColumnVector expectedSplitLimit2 = ColumnVector.fromLists( new HostColumnVector.ListType(true, new HostColumnVector.BasicType(true, DType.STRING)), @@ -5061,8 +5075,9 @@ void testStringSplitRecord() { @Test void testStringSplitRecordByRegularExpression() { String pattern = "[_ ]"; + RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "", - "ARé some_things", "test_strings_here"); + "ARé some_things", "test_strings_here"); ColumnVector expectedSplitLimit2 = ColumnVector.fromLists( new HostColumnVector.ListType(true, new HostColumnVector.BasicType(true, DType.STRING)), @@ -5080,11 +5095,17 @@ void testStringSplitRecordByRegularExpression() { null, Arrays.asList(""), Arrays.asList("ARé", "some", "things"), - Arrays.asList("test", "strings", "here")); - ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true); - ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) { - assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + Arrays.asList("test", "strings", "here"))) { + try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true); + ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) { + assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + } + try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2); + ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) { + assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertColumnsAreEqual(expectedSplitAll, resultSplitAll); + } } } From 47185407cb26ee5282121b5d73adfbb11748830a Mon Sep 17 00:00:00 2001 From: Cindy Jiang Date: Mon, 6 Feb 2023 15:41:07 -0800 Subject: [PATCH 2/7] fixed code formatting Signed-off-by: Cindy Jiang --- java/src/main/native/src/ColumnViewJni.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index efd6a46b9f2..0ae1a1ed210 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -742,14 +742,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord( auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split); return release_as_jlong(result); } else { - auto result = cudf::strings::split_record(strings_column, cudf::string_scalar{pattern}, max_split); + auto result = + cudf::strings::split_record(strings_column, cudf::string_scalar{pattern}, max_split); return release_as_jlong(result); } - // auto result = - // split_by_regex ? - // cudf::strings::split_record_re(strs_input, pattern, max_split) : - // cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split); - // return release_as_jlong(result); } CATCH_STD(env, 0); } From 0376ce0868d866154cb06b930ff91c49f4333ee4 Mon Sep 17 00:00:00 2001 From: Cindy Jiang Date: Tue, 7 Feb 2023 11:45:32 -0800 Subject: [PATCH 3/7] refactored stringSplit and stringSplitRecord native methods Signed-off-by: Cindy Jiang --- .../main/java/ai/rapids/cudf/ColumnView.java | 81 ++++++++++------- java/src/main/native/src/ColumnViewJni.cpp | 87 ++++++++++++++----- 2 files changed, 115 insertions(+), 53 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 3b2ce941a8b..371cd25c4d1 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2541,8 +2541,8 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex) } /** - * Returns a list of columns by splitting each string using the specified regex program. The - * number of rows in the output columns will be the same as the input column. Null entries + * Returns a list of columns by splitting each string using the specified regex program pattern. + * The number of rows in the output columns will be the same as the input column. Null entries * are added for a row where split results have been exhausted. Null input entries result in * all nulls in the corresponding rows of the output columns. * @@ -2557,8 +2557,8 @@ public final Table stringSplit(RegexProgram regexProg, int limit) { assert type.equals(DType.STRING) : "column type must be a String"; assert regexProg != null : "regex program is null"; assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported"; - return new Table(stringSplit(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(), - regexProg.capture().nativeId, limit, true)); + return new Table(stringSplitRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(), + regexProg.capture().nativeId, limit)); } /** @@ -2593,8 +2593,7 @@ public final Table stringSplit(String delimiter, int limit) { assert type.equals(DType.STRING) : "column type must be a String"; assert delimiter != null : "delimiter is null"; assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported"; - return new Table(stringSplit(this.getNativeView(), delimiter, RegexFlag.DEFAULT.nativeId, - CaptureGroups.NON_CAPTURE.nativeId, limit, false)); + return new Table(stringSplit(this.getNativeView(), delimiter, limit)); } /** @@ -2611,10 +2610,10 @@ public final Table stringSplit(String delimiter) { } /** - * Returns a list of columns by splitting each string using the specified regex program with - * string literal delimiter. The number of rows in the output columns will be the same as the - * input column. Null entries are added for a row where split results have been exhausted. - * Null input entries result in all nulls in the corresponding rows of the output columns. + * Returns a list of columns by splitting each string using the specified regex program pattern. + * The number of rows in the output columns will be the same as the input column. Null entries + * are added for a row where split results have been exhausted. Null input entries result in + * all nulls in the corresponding rows of the output columns. * * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern * for each input string. @@ -2661,8 +2660,8 @@ public final ColumnVector stringSplitRecord(RegexProgram regexProg, int limit) { assert regexProg != null : "regex program is null"; assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported"; return new ColumnVector( - stringSplitRecord(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(), - regexProg.capture().nativeId, limit, true)); + stringSplitRecordRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(), + regexProg.capture().nativeId, limit)); } /** @@ -2693,9 +2692,7 @@ public final ColumnVector stringSplitRecord(String delimiter, int limit) { assert type.equals(DType.STRING) : "column type must be String"; assert delimiter != null : "delimiter is null"; assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported"; - return new ColumnVector( - stringSplitRecord(this.getNativeView(), delimiter, RegexFlag.DEFAULT.nativeId, - CaptureGroups.NON_CAPTURE.nativeId, limit, false)); + return new ColumnVector(stringSplitRecord(this.getNativeView(), delimiter, limit)); } /** @@ -2711,7 +2708,7 @@ public final ColumnVector stringSplitRecord(String delimiter) { /** * Returns a column that are lists of strings in which each list is made by splitting the - * corresponding input string using the specified regex program with string literal delimiter. + * corresponding input string using the specified regex program pattern. * * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern * for each input string. @@ -4037,40 +4034,64 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle private static native long substringLocate(long columnView, long substringScalar, int start, int end); /** - * Returns a list of columns by splitting each string using the specified pattern. The number of - * rows in the output columns will be the same as the input column. Null entries are added for a - * row where split results have been exhausted. Null input entries result in all nulls in the - * corresponding rows of the output columns. + * Returns a list of columns by splitting each string using the specified string literal + * delimiter. The number of rows in the output columns will be the same as the input column. + * Null entries are added for a row where split results have been exhausted. Null input entries + * result in all nulls in the corresponding rows of the output columns. * * @param nativeHandle native handle of the input strings column that being operated on. - * @param pattern UTF-8 encoded string identifying the split pattern for each input string. + * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string. + * @param limit the maximum size of the list resulting from splitting each input string, + * or -1 for all possible splits. Note that limit = 0 (all possible splits without + * trailing empty strings) and limit = 1 (no split at all) are not supported. + */ + private static native long[] stringSplit(long nativeHandle, String delimiter, int limit); + + /** + * Returns a list of columns by splitting each string using the specified regular expression + * pattern. The number of rows in the output columns will be the same as the input column. + * Null entries are added for a row where split results have been exhausted. Null input entries + * result in all nulls in the corresponding rows of the output columns. + * + * @param nativeHandle native handle of the input strings column that being operated on. + * @param pattern UTF-8 encoded string identifying the split regular expression pattern for + * each input string. * @param flags regex flags setting. * @param capture capture groups setting. * @param limit the maximum size of the list resulting from splitting each input string, * or -1 for all possible splits. Note that limit = 0 (all possible splits without * trailing empty strings) and limit = 1 (no split at all) are not supported. - * @param splitByRegex a boolean flag indicating whether the input strings will be split by a - * regular expression pattern or just by a string literal delimiter. */ - private static native long[] stringSplit(long nativeHandle, String pattern, int flags, - int capture, int limit, boolean splitByRegex); + private static native long[] stringSplitRe(long nativeHandle, String pattern, int flags, + int capture, int limit); /** * Returns a column that are lists of strings in which each list is made by splitting the * corresponding input string using the specified string literal delimiter. * * @param nativeHandle native handle of the input strings column that being operated on. - * @param pattern UTF-8 encoded string identifying the split pattern for each input string. + * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string. + * @param limit the maximum size of the list resulting from splitting each input string, + * or -1 for all possible splits. Note that limit = 0 (all possible splits without + * trailing empty strings) and limit = 1 (no split at all) are not supported. + */ + private static native long stringSplitRecord(long nativeHandle, String delimiter, int limit); + + /** + * Returns a column that are lists of strings in which each list is made by splitting the + * corresponding input string using the specified regular expression pattern. + * + * @param nativeHandle native handle of the input strings column that being operated on. + * @param pattern UTF-8 encoded string identifying the split regular expression pattern for + * each input string. * @param flags regex flags setting. * @param capture capture groups setting. * @param limit the maximum size of the list resulting from splitting each input string, * or -1 for all possible splits. Note that limit = 0 (all possible splits without * trailing empty strings) and limit = 1 (no split at all) are not supported. - * @param splitByRegex a boolean flag indicating whether the input strings will be split by a - * regular expression pattern or just by a string literal delimiter. */ - private static native long stringSplitRecord(long nativeHandle, String pattern, int flags, - int capture, int limit, boolean splitByRegex); + private static native long stringSplitRecordRe(long nativeHandle, String pattern, int flags, + int capture, int limit); /** * Native method to calculate substring from a given string column. 0 indexing. diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 0ae1a1ed210..a5f8ee14252 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -680,8 +680,34 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI } JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit( + JNIEnv *env, jclass, jlong input_handle, jstring delimiter_obj, jint limit) { + JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); + + if (limit == 0 || limit == 1) { + // Cannot achieve the results of splitting with limit == 0 or limit == 1. + // This is because cudf operates on a different parameter (`max_split`) which is converted from + // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an + // unlimited split. + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", + "limit == 0 and limit == 1 are not supported", 0); + } + + try { + cudf::jni::auto_set_device(env); + auto const column_view = reinterpret_cast(input_handle); + auto const strings_column = cudf::strings_column_view{*column_view}; + auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj); + auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes()); + auto const max_split = limit > 1 ? limit - 1 : limit; + auto result = cudf::strings::split(strings_column, cudf::string_scalar{delimiter}, max_split); + return cudf::jni::convert_table_for_return(env, std::move(result)); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe( JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags, - jint capture_groups, jint limit, jboolean split_by_regex) { + jint capture_groups, jint limit) { JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); if (limit == 0 || limit == 1) { @@ -700,23 +726,44 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit( auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj); auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit; - if (split_by_regex) { - auto const flags = static_cast(regex_flags); - auto const groups = static_cast(capture_groups); - auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups); - auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split); - return cudf::jni::convert_table_for_return(env, std::move(result)); - } else { - auto result = cudf::strings::split(strings_column, cudf::string_scalar{pattern}, max_split); - return cudf::jni::convert_table_for_return(env, std::move(result)); - } + auto const flags = static_cast(regex_flags); + auto const groups = static_cast(capture_groups); + auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups); + auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split); + return cudf::jni::convert_table_for_return(env, std::move(result)); } CATCH_STD(env, 0); } JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord( + JNIEnv *env, jclass, jlong input_handle, jstring delimiter_obj, jint limit) { + JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); + + if (limit == 0 || limit == 1) { + // Cannot achieve the results of splitting with limit == 0 or limit == 1. + // This is because cudf operates on a different parameter (`max_split`) which is converted from + // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an + // unlimited split. + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", + "limit == 0 and limit == 1 are not supported", 0); + } + + try { + cudf::jni::auto_set_device(env); + auto const column_view = reinterpret_cast(input_handle); + auto const strings_column = cudf::strings_column_view{*column_view}; + auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj); + auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes()); + auto const max_split = limit > 1 ? limit - 1 : limit; + auto result = cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split); + return release_as_jlong(result); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecordRe( JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags, - jint capture_groups, jint limit, jboolean split_by_regex) { + jint capture_groups, jint limit) { JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); if (limit == 0 || limit == 1) { @@ -735,17 +782,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord( auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj); auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit; - if (split_by_regex) { - auto const flags = static_cast(regex_flags); - auto const groups = static_cast(capture_groups); - auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups); - auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split); - return release_as_jlong(result); - } else { - auto result = - cudf::strings::split_record(strings_column, cudf::string_scalar{pattern}, max_split); - return release_as_jlong(result); - } + auto const flags = static_cast(regex_flags); + auto const groups = static_cast(capture_groups); + auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups); + auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split); + return release_as_jlong(result); } CATCH_STD(env, 0); } From 5dd8f0f2d40274410a722439932f177173daca3c Mon Sep 17 00:00:00 2001 From: Cindy Jiang Date: Tue, 7 Feb 2023 11:46:44 -0800 Subject: [PATCH 4/7] fixed code formatting Signed-off-by: Cindy Jiang --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 4 ++-- java/src/main/native/src/ColumnViewJni.cpp | 15 ++++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 371cd25c4d1..26882d90f3a 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -4046,7 +4046,7 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle * trailing empty strings) and limit = 1 (no split at all) are not supported. */ private static native long[] stringSplit(long nativeHandle, String delimiter, int limit); - + /** * Returns a list of columns by splitting each string using the specified regular expression * pattern. The number of rows in the output columns will be the same as the input column. @@ -4076,7 +4076,7 @@ private static native long[] stringSplitRe(long nativeHandle, String pattern, in * trailing empty strings) and limit = 1 (no split at all) are not supported. */ private static native long stringSplitRecord(long nativeHandle, String delimiter, int limit); - + /** * Returns a column that are lists of strings in which each list is made by splitting the * corresponding input string using the specified regular expression pattern. diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index a5f8ee14252..501db23fa85 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -679,8 +679,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI CATCH_STD(env, 0); } -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit( - JNIEnv *env, jclass, jlong input_handle, jstring delimiter_obj, jint limit) { +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *env, jclass, + jlong input_handle, + jstring delimiter_obj, + jint limit) { JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); if (limit == 0 || limit == 1) { @@ -735,8 +737,10 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe( CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord( - JNIEnv *env, jclass, jlong input_handle, jstring delimiter_obj, jint limit) { +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv *env, jclass, + jlong input_handle, + jstring delimiter_obj, + jint limit) { JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); if (limit == 0 || limit == 1) { @@ -755,7 +759,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord( auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj); auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit; - auto result = cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split); + auto result = + cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split); return release_as_jlong(result); } CATCH_STD(env, 0); From 3643269ccf7e33d064ec735c270fd4e755c8154c Mon Sep 17 00:00:00 2001 From: Cindy Jiang Date: Tue, 7 Feb 2023 12:12:32 -0800 Subject: [PATCH 5/7] removed incorrect stringsplit tests Signed-off-by: Cindy Jiang --- .../java/ai/rapids/cudf/ColumnVectorTest.java | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index c3af5eb7b68..99c3b25a4de 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4989,7 +4989,6 @@ void testReverseList() { @Test void testStringSplit() { String pattern = " "; - RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE); try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "", "ARé some things", "test strings here"); Table expectedSplitLimit2 = new Table.TestBuilder() @@ -5000,17 +4999,11 @@ void testStringSplit() { .column("Héllo", "thésé", null, "", "ARé", "test") .column("there", null, null, null, "some", "strings") .column("all", null, null, null, "things", "here") - .build()) { - try (Table resultSplitLimit2 = v.stringSplit(pattern, 2); - Table resultSplitAll = v.stringSplit(pattern)) { - assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertTablesAreEqual(expectedSplitAll, resultSplitAll); - } - try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2); - Table resultSplitAll = v.stringSplit(regexProg)) { - assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); - assertTablesAreEqual(expectedSplitAll, resultSplitAll); - } + .build(); + Table resultSplitLimit2 = v.stringSplit(pattern, 2); + Table resultSplitAll = v.stringSplit(pattern)) { + assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); + assertTablesAreEqual(expectedSplitAll, resultSplitAll); } } From 70de2b5aa03c88b9858dafda60420a2b02908ab9 Mon Sep 17 00:00:00 2001 From: Cindy Jiang Date: Tue, 7 Feb 2023 12:14:28 -0800 Subject: [PATCH 6/7] fixed indentation Signed-off-by: Cindy Jiang --- java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 99c3b25a4de..ab4baf74277 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -5000,8 +5000,8 @@ void testStringSplit() { .column("there", null, null, null, "some", "strings") .column("all", null, null, null, "things", "here") .build(); - Table resultSplitLimit2 = v.stringSplit(pattern, 2); - Table resultSplitAll = v.stringSplit(pattern)) { + Table resultSplitLimit2 = v.stringSplit(pattern, 2); + Table resultSplitAll = v.stringSplit(pattern)) { assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2); assertTablesAreEqual(expectedSplitAll, resultSplitAll); } From 465ea42dcb7e4986696ddeab367ec6cd618e5ddb Mon Sep 17 00:00:00 2001 From: Cindy Jiang Date: Tue, 7 Feb 2023 14:05:16 -0800 Subject: [PATCH 7/7] fixed variable naming and comments Signed-off-by: Cindy Jiang --- .../src/main/java/ai/rapids/cudf/ColumnView.java | 8 ++++---- java/src/main/native/src/ColumnViewJni.cpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 26882d90f3a..2d0bf28225f 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2543,7 +2543,7 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex) /** * Returns a list of columns by splitting each string using the specified regex program pattern. * The number of rows in the output columns will be the same as the input column. Null entries - * are added for a row where split results have been exhausted. Null input entries result in + * are added for the rows where split results have been exhausted. Null input entries result in * all nulls in the corresponding rows of the output columns. * * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern @@ -2612,7 +2612,7 @@ public final Table stringSplit(String delimiter) { /** * Returns a list of columns by splitting each string using the specified regex program pattern. * The number of rows in the output columns will be the same as the input column. Null entries - * are added for a row where split results have been exhausted. Null input entries result in + * are added for the rows where split results have been exhausted. Null input entries result in * all nulls in the corresponding rows of the output columns. * * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern @@ -4036,7 +4036,7 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle /** * Returns a list of columns by splitting each string using the specified string literal * delimiter. The number of rows in the output columns will be the same as the input column. - * Null entries are added for a row where split results have been exhausted. Null input entries + * Null entries are added for the rows where split results have been exhausted. Null input entries * result in all nulls in the corresponding rows of the output columns. * * @param nativeHandle native handle of the input strings column that being operated on. @@ -4050,7 +4050,7 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle /** * Returns a list of columns by splitting each string using the specified regular expression * pattern. The number of rows in the output columns will be the same as the input column. - * Null entries are added for a row where split results have been exhausted. Null input entries + * Null entries are added for the rows where split results have been exhausted. Null input entries * result in all nulls in the corresponding rows of the output columns. * * @param nativeHandle native handle of the input strings column that being operated on. diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 501db23fa85..958efd364ed 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -696,8 +696,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv * try { cudf::jni::auto_set_device(env); - auto const column_view = reinterpret_cast(input_handle); - auto const strings_column = cudf::strings_column_view{*column_view}; + auto const input = reinterpret_cast(input_handle); + auto const strings_column = cudf::strings_column_view{*input}; auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj); auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit; @@ -723,8 +723,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe( try { cudf::jni::auto_set_device(env); - auto const column_view = reinterpret_cast(input_handle); - auto const strings_column = cudf::strings_column_view{*column_view}; + auto const input = reinterpret_cast(input_handle); + auto const strings_column = cudf::strings_column_view{*input}; auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj); auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit; @@ -754,8 +754,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv try { cudf::jni::auto_set_device(env); - auto const column_view = reinterpret_cast(input_handle); - auto const strings_column = cudf::strings_column_view{*column_view}; + auto const input = reinterpret_cast(input_handle); + auto const strings_column = cudf::strings_column_view{*input}; auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj); auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit; @@ -782,8 +782,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecordRe( try { cudf::jni::auto_set_device(env); - auto const column_view = reinterpret_cast(input_handle); - auto const strings_column = cudf::strings_column_view{*column_view}; + auto const input = reinterpret_cast(input_handle); + auto const strings_column = cudf::strings_column_view{*input}; auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj); auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes()); auto const max_split = limit > 1 ? limit - 1 : limit;