From ffc15c38f97c230f70fb328e569e7ab1400ee23b Mon Sep 17 00:00:00 2001
From: Cindy Jiang <cindyj@nvidia.com>
Date: Mon, 6 Feb 2023 14:38:01 -0800
Subject: [PATCH 1/7] added splitting regex program apis and tests

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 111 +++++++++++++++---
 java/src/main/native/src/ColumnViewJni.cpp    |  72 ++++++------
 .../java/ai/rapids/cudf/ColumnVectorTest.java |  71 +++++++----
 3 files changed, 178 insertions(+), 76 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 4daa3c17cfc..3b2ce941a8b 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2531,12 +2531,34 @@ public final ColumnVector stringLocate(Scalar substring, int start, int end) {
    *                     regular expression pattern or just by a string literal delimiter.
    * @return list of strings columns as a table.
    */
+  @Deprecated
   public final Table stringSplit(String pattern, int limit, boolean splitByRegex) {
+    if (splitByRegex) {
+      return stringSplit(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit);
+    } else {
+      return stringSplit(pattern, limit);
+    }
+  }
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regex program. The
+   * number of rows in the output columns will be the same as the input column. Null entries
+   * are added for a row where split results have been exhausted. Null input entries result in
+   * all nulls in the corresponding rows of the output columns.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   * @return list of strings columns as a table.
+   */
+  public final Table stringSplit(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be a String";
-    assert pattern != null : "pattern is null";
-    assert pattern.length() > 0 : "empty pattern is not supported";
+    assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
-    return new Table(stringSplit(this.getNativeView(), pattern, limit, splitByRegex));
+    return new Table(stringSplit(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                 regexProg.capture().nativeId, limit, true));
   }
 
   /**
@@ -2550,6 +2572,7 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex)
    *                     regular expression pattern or just by a string literal delimiter.
    * @return list of strings columns as a table.
    */
+  @Deprecated
   public final Table stringSplit(String pattern, boolean splitByRegex) {
     return stringSplit(pattern, -1, splitByRegex);
   }
@@ -2567,7 +2590,11 @@ public final Table stringSplit(String pattern, boolean splitByRegex) {
    * @return list of strings columns as a table.
    */
   public final Table stringSplit(String delimiter, int limit) {
-    return stringSplit(delimiter, limit, false);
+    assert type.equals(DType.STRING) : "column type must be a String";
+    assert delimiter != null : "delimiter is null";
+    assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
+    return new Table(stringSplit(this.getNativeView(), delimiter, RegexFlag.DEFAULT.nativeId,
+                                 CaptureGroups.NON_CAPTURE.nativeId, limit, false));
   }
 
   /**
@@ -2580,7 +2607,21 @@ public final Table stringSplit(String delimiter, int limit) {
    * @return list of strings columns as a table.
    */
   public final Table stringSplit(String delimiter) {
-    return stringSplit(delimiter, -1, false);
+    return stringSplit(delimiter, -1);
+  }
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regex program with
+   * string literal delimiter. The number of rows in the output columns will be the same as the
+   * input column. Null entries are added for a row where split results have been exhausted.
+   * Null input entries result in all nulls in the corresponding rows of the output columns.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @return list of strings columns as a table.
+   */
+  public final Table stringSplit(RegexProgram regexProg) {
+    return stringSplit(regexProg, -1);
   }
 
   /**
@@ -2595,13 +2636,33 @@ public final Table stringSplit(String delimiter) {
    *                     regular expression pattern or just by a string literal delimiter.
    * @return a LIST column of string elements.
    */
+  @Deprecated
   public final ColumnVector stringSplitRecord(String pattern, int limit, boolean splitByRegex) {
+    if (splitByRegex) {
+      return stringSplitRecord(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit);
+    } else {
+      return stringSplitRecord(pattern, limit);
+    }
+  }
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regex program pattern.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   * @return a LIST column of string elements.
+   */
+  public final ColumnVector stringSplitRecord(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be String";
-    assert pattern != null : "pattern is null";
-    assert pattern.length() > 0 : "empty pattern is not supported";
+    assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
     return new ColumnVector(
-        stringSplitRecord(this.getNativeView(), pattern, limit, splitByRegex));
+        stringSplitRecord(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                          regexProg.capture().nativeId, limit, true));
   }
 
   /**
@@ -2613,6 +2674,7 @@ public final ColumnVector stringSplitRecord(String pattern, int limit, boolean s
    *                     regular expression pattern or just by a string literal delimiter.
    * @return a LIST column of string elements.
    */
+  @Deprecated
   public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex) {
     return stringSplitRecord(pattern, -1, splitByRegex);
   }
@@ -2628,7 +2690,12 @@ public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex
    * @return a LIST column of string elements.
    */
   public final ColumnVector stringSplitRecord(String delimiter, int limit) {
-    return stringSplitRecord(delimiter, limit, false);
+    assert type.equals(DType.STRING) : "column type must be String";
+    assert delimiter != null : "delimiter is null";
+    assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
+    return new ColumnVector(
+        stringSplitRecord(this.getNativeView(), delimiter, RegexFlag.DEFAULT.nativeId,
+                          CaptureGroups.NON_CAPTURE.nativeId, limit, false));
   }
 
   /**
@@ -2639,7 +2706,19 @@ public final ColumnVector stringSplitRecord(String delimiter, int limit) {
    * @return a LIST column of string elements.
    */
   public final ColumnVector stringSplitRecord(String delimiter) {
-    return stringSplitRecord(delimiter, -1, false);
+    return stringSplitRecord(delimiter, -1);
+  }
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regex program with string literal delimiter.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @return a LIST column of string elements.
+   */
+  public final ColumnVector stringSplitRecord(RegexProgram regexProg) {
+    return stringSplitRecord(regexProg, -1);
   }
 
   /**
@@ -3965,14 +4044,16 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
    * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
    * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
    *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long[] stringSplit(long nativeHandle, String pattern, int limit,
-                                           boolean splitByRegex);
+  private static native long[] stringSplit(long nativeHandle, String pattern, int flags,
+                                           int capture, int limit, boolean splitByRegex);
 
   /**
    * Returns a column that are lists of strings in which each list is made by splitting the
@@ -3980,14 +4061,16 @@ private static native long[] stringSplit(long nativeHandle, String pattern, int
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
    * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
    * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
    *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long stringSplitRecord(long nativeHandle, String pattern, int limit,
-                                               boolean splitByRegex);
+  private static native long stringSplitRecord(long nativeHandle, String pattern, int flags,
+                                               int capture, int limit, boolean splitByRegex);
 
   /**
    * Native method to calculate substring from a given string column. 0 indexing.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index bfa3fa0a522..efd6a46b9f2 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -679,11 +679,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *env, jclass,
-                                                                        jlong input_handle,
-                                                                        jstring pattern_obj,
-                                                                        jint limit,
-                                                                        jboolean split_by_regex) {
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(
+    JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jint limit, jboolean split_by_regex) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -697,31 +695,28 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const input = reinterpret_cast<cudf::column_view *>(input_handle);
-    auto const strs_input = cudf::strings_column_view{*input};
-
+    auto const column_view = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*column_view};
     auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
-    if (pattern_jstr.is_empty()) {
-      // Java's split API produces different behaviors than cudf when splitting with empty
-      // pattern.
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0);
-    }
-
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result = split_by_regex ?
-                      cudf::strings::split_re(strs_input, pattern, max_split) :
-                      cudf::strings::split(strs_input, cudf::string_scalar{pattern}, max_split);
-    return cudf::jni::convert_table_for_return(env, std::move(result));
+    if (split_by_regex) {
+      auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+      auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+      auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+      auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split);
+      return cudf::jni::convert_table_for_return(env, std::move(result));
+    } else {
+      auto result = cudf::strings::split(strings_column, cudf::string_scalar{pattern}, max_split);
+      return cudf::jni::convert_table_for_return(env, std::move(result));
+    }
   }
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv *env, jclass,
-                                                                         jlong input_handle,
-                                                                         jstring pattern_obj,
-                                                                         jint limit,
-                                                                         jboolean split_by_regex) {
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(
+    JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jint limit, jboolean split_by_regex) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -735,23 +730,26 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const input = reinterpret_cast<cudf::column_view *>(input_handle);
-    auto const strs_input = cudf::strings_column_view{*input};
-
+    auto const column_view = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*column_view};
     auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
-    if (pattern_jstr.is_empty()) {
-      // Java's split API produces different behaviors than cudf when splitting with empty
-      // pattern.
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0);
-    }
-
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result =
-        split_by_regex ?
-            cudf::strings::split_record_re(strs_input, pattern, max_split) :
-            cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split);
-    return release_as_jlong(result);
+    if (split_by_regex) {
+      auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+      auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+      auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+      auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split);
+      return release_as_jlong(result);
+    } else {
+      auto result = cudf::strings::split_record(strings_column, cudf::string_scalar{pattern}, max_split);
+      return release_as_jlong(result);
+    }
+    // auto result =
+    //     split_by_regex ?
+    //         cudf::strings::split_record_re(strs_input, pattern, max_split) :
+    //         cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split);
+    // return release_as_jlong(result);
   }
   CATCH_STD(env, 0);
 }
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 46264b7d668..c3af5eb7b68 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4989,29 +4989,37 @@ void testReverseList() {
   @Test
   void testStringSplit() {
     String pattern = " ";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
-        "ARé some things", "test strings here");
+             "ARé some things", "test strings here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
-         .column("Héllo", "thésé", null, "", "ARé", "test")
-         .column("there all", null, null, null, "some things", "strings here")
-         .build();
+             .column("Héllo", "thésé", null, "", "ARé", "test")
+             .column("there all", null, null, null, "some things", "strings here")
+             .build();
          Table expectedSplitAll = new Table.TestBuilder()
-         .column("Héllo", "thésé", null, "", "ARé", "test")
-         .column("there", null, null, null, "some", "strings")
-         .column("all", null, null, null, "things", "here")
-         .build();
-         Table resultSplitLimit2 = v.stringSplit(pattern, 2);
-         Table resultSplitAll = v.stringSplit(pattern)) {
-          assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-          assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+             .column("Héllo", "thésé", null, "", "ARé", "test")
+             .column("there", null, null, null, "some", "strings")
+             .column("all", null, null, null, "things", "here")
+             .build()) {
+      try (Table resultSplitLimit2 = v.stringSplit(pattern, 2);
+           Table resultSplitAll = v.stringSplit(pattern)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2);
+           Table resultSplitAll = v.stringSplit(regexProg)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 
   @Test
   void testStringSplitByRegularExpression() {
     String pattern = "[_ ]";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "",
-        "ARé some_things", "test_strings_here");
+             "ARé some_things", "test_strings_here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there all", null, null, null, "some_things", "strings_here")
@@ -5020,11 +5028,17 @@ void testStringSplitByRegularExpression() {
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there", null, null, null, "some", "strings")
              .column("all", null, null, null, "things", "here")
-             .build();
-         Table resultSplitLimit2 = v.stringSplit(pattern, 2, true);
-         Table resultSplitAll = v.stringSplit(pattern, true)) {
-      assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-      assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+             .build()) {
+      try (Table resultSplitLimit2 = v.stringSplit(pattern, 2, true);
+           Table resultSplitAll = v.stringSplit(pattern, true)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2);
+           Table resultSplitAll = v.stringSplit(regexProg)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 
@@ -5032,7 +5046,7 @@ void testStringSplitByRegularExpression() {
   void testStringSplitRecord() {
     String pattern = " ";
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
-        "ARé some things", "test strings here");
+             "ARé some things", "test strings here");
          ColumnVector expectedSplitLimit2 = ColumnVector.fromLists(
              new HostColumnVector.ListType(true,
                  new HostColumnVector.BasicType(true, DType.STRING)),
@@ -5061,8 +5075,9 @@ void testStringSplitRecord() {
   @Test
   void testStringSplitRecordByRegularExpression() {
     String pattern = "[_ ]";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "",
-        "ARé some_things", "test_strings_here");
+             "ARé some_things", "test_strings_here");
          ColumnVector expectedSplitLimit2 = ColumnVector.fromLists(
              new HostColumnVector.ListType(true,
                  new HostColumnVector.BasicType(true, DType.STRING)),
@@ -5080,11 +5095,17 @@ void testStringSplitRecordByRegularExpression() {
              null,
              Arrays.asList(""),
              Arrays.asList("ARé", "some", "things"),
-             Arrays.asList("test", "strings", "here"));
-         ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true);
-         ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) {
-      assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
-      assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+             Arrays.asList("test", "strings", "here"))) {
+      try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true);
+           ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) {
+        assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2);
+           ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) {
+        assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 

From 47185407cb26ee5282121b5d73adfbb11748830a Mon Sep 17 00:00:00 2001
From: Cindy Jiang <cindyj@nvidia.com>
Date: Mon, 6 Feb 2023 15:41:07 -0800
Subject: [PATCH 2/7] fixed code formatting

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 java/src/main/native/src/ColumnViewJni.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index efd6a46b9f2..0ae1a1ed210 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -742,14 +742,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(
       auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split);
       return release_as_jlong(result);
     } else {
-      auto result = cudf::strings::split_record(strings_column, cudf::string_scalar{pattern}, max_split);
+      auto result =
+          cudf::strings::split_record(strings_column, cudf::string_scalar{pattern}, max_split);
       return release_as_jlong(result);
     }
-    // auto result =
-    //     split_by_regex ?
-    //         cudf::strings::split_record_re(strs_input, pattern, max_split) :
-    //         cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split);
-    // return release_as_jlong(result);
   }
   CATCH_STD(env, 0);
 }

From 0376ce0868d866154cb06b930ff91c49f4333ee4 Mon Sep 17 00:00:00 2001
From: Cindy Jiang <cindyj@nvidia.com>
Date: Tue, 7 Feb 2023 11:45:32 -0800
Subject: [PATCH 3/7] refactored stringSplit and stringSplitRecord native
 methods

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 81 ++++++++++-------
 java/src/main/native/src/ColumnViewJni.cpp    | 87 ++++++++++++++-----
 2 files changed, 115 insertions(+), 53 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 3b2ce941a8b..371cd25c4d1 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2541,8 +2541,8 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex)
   }
 
   /**
-   * Returns a list of columns by splitting each string using the specified regex program. The
-   * number of rows in the output columns will be the same as the input column. Null entries
+   * Returns a list of columns by splitting each string using the specified regex program pattern.
+   * The number of rows in the output columns will be the same as the input column. Null entries
    * are added for a row where split results have been exhausted. Null input entries result in
    * all nulls in the corresponding rows of the output columns.
    *
@@ -2557,8 +2557,8 @@ public final Table stringSplit(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be a String";
     assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
-    return new Table(stringSplit(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
-                                 regexProg.capture().nativeId, limit, true));
+    return new Table(stringSplitRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                   regexProg.capture().nativeId, limit));
   }
 
   /**
@@ -2593,8 +2593,7 @@ public final Table stringSplit(String delimiter, int limit) {
     assert type.equals(DType.STRING) : "column type must be a String";
     assert delimiter != null : "delimiter is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
-    return new Table(stringSplit(this.getNativeView(), delimiter, RegexFlag.DEFAULT.nativeId,
-                                 CaptureGroups.NON_CAPTURE.nativeId, limit, false));
+    return new Table(stringSplit(this.getNativeView(), delimiter, limit));
   }
 
   /**
@@ -2611,10 +2610,10 @@ public final Table stringSplit(String delimiter) {
   }
 
   /**
-   * Returns a list of columns by splitting each string using the specified regex program with
-   * string literal delimiter. The number of rows in the output columns will be the same as the
-   * input column. Null entries are added for a row where split results have been exhausted.
-   * Null input entries result in all nulls in the corresponding rows of the output columns.
+   * Returns a list of columns by splitting each string using the specified regex program pattern.
+   * The number of rows in the output columns will be the same as the input column. Null entries
+   * are added for a row where split results have been exhausted. Null input entries result in
+   * all nulls in the corresponding rows of the output columns.
    *
    * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
    *                  for each input string.
@@ -2661,8 +2660,8 @@ public final ColumnVector stringSplitRecord(RegexProgram regexProg, int limit) {
     assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
     return new ColumnVector(
-        stringSplitRecord(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
-                          regexProg.capture().nativeId, limit, true));
+        stringSplitRecordRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                            regexProg.capture().nativeId, limit));
   }
 
   /**
@@ -2693,9 +2692,7 @@ public final ColumnVector stringSplitRecord(String delimiter, int limit) {
     assert type.equals(DType.STRING) : "column type must be String";
     assert delimiter != null : "delimiter is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
-    return new ColumnVector(
-        stringSplitRecord(this.getNativeView(), delimiter, RegexFlag.DEFAULT.nativeId,
-                          CaptureGroups.NON_CAPTURE.nativeId, limit, false));
+    return new ColumnVector(stringSplitRecord(this.getNativeView(), delimiter, limit));
   }
 
   /**
@@ -2711,7 +2708,7 @@ public final ColumnVector stringSplitRecord(String delimiter) {
 
   /**
    * Returns a column that are lists of strings in which each list is made by splitting the
-   * corresponding input string using the specified regex program with string literal delimiter.
+   * corresponding input string using the specified regex program pattern.
    *
    * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
    *                  for each input string.
@@ -4037,40 +4034,64 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
   private static native long substringLocate(long columnView, long substringScalar, int start, int end);
 
   /**
-   * Returns a list of columns by splitting each string using the specified pattern. The number of
-   * rows in the output columns will be the same as the input column. Null entries are added for a
-   * row where split results have been exhausted. Null input entries result in all nulls in the
-   * corresponding rows of the output columns.
+   * Returns a list of columns by splitting each string using the specified string literal
+   * delimiter. The number of rows in the output columns will be the same as the input column.
+   * Null entries are added for a row where split results have been exhausted. Null input entries
+   * result in all nulls in the corresponding rows of the output columns.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
-   * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   */
+  private static native long[] stringSplit(long nativeHandle, String delimiter, int limit);
+  
+  /**
+   * Returns a list of columns by splitting each string using the specified regular expression
+   * pattern. The number of rows in the output columns will be the same as the input column.
+   * Null entries are added for a row where split results have been exhausted. Null input entries
+   * result in all nulls in the corresponding rows of the output columns.
+   *
+   * @param nativeHandle native handle of the input strings column that being operated on.
+   * @param pattern UTF-8 encoded string identifying the split regular expression pattern for
+   *                each input string.
    * @param flags regex flags setting.
    * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
-   * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
-   *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long[] stringSplit(long nativeHandle, String pattern, int flags,
-                                           int capture, int limit, boolean splitByRegex);
+  private static native long[] stringSplitRe(long nativeHandle, String pattern, int flags,
+                                             int capture, int limit);
 
   /**
    * Returns a column that are lists of strings in which each list is made by splitting the
    * corresponding input string using the specified string literal delimiter.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
-   * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   */
+  private static native long stringSplitRecord(long nativeHandle, String delimiter, int limit);
+  
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regular expression pattern.
+   *
+   * @param nativeHandle native handle of the input strings column that being operated on.
+   * @param pattern UTF-8 encoded string identifying the split regular expression pattern for
+   *                each input string.
    * @param flags regex flags setting.
    * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
-   * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
-   *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long stringSplitRecord(long nativeHandle, String pattern, int flags,
-                                               int capture, int limit, boolean splitByRegex);
+  private static native long stringSplitRecordRe(long nativeHandle, String pattern, int flags,
+                                                 int capture, int limit);
 
   /**
    * Native method to calculate substring from a given string column. 0 indexing.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 0ae1a1ed210..a5f8ee14252 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -680,8 +680,34 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI
 }
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(
+    JNIEnv *env, jclass, jlong input_handle, jstring delimiter_obj, jint limit) {
+  JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
+
+  if (limit == 0 || limit == 1) {
+    // Cannot achieve the results of splitting with limit == 0 or limit == 1.
+    // This is because cudf operates on a different parameter (`max_split`) which is converted from
+    // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an
+    // unlimited split.
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "limit == 0 and limit == 1 are not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const column_view = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*column_view};
+    auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
+    auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
+    auto const max_split = limit > 1 ? limit - 1 : limit;
+    auto result = cudf::strings::split(strings_column, cudf::string_scalar{delimiter}, max_split);
+    return cudf::jni::convert_table_for_return(env, std::move(result));
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe(
     JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
-    jint capture_groups, jint limit, jboolean split_by_regex) {
+    jint capture_groups, jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -700,23 +726,44 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(
     auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    if (split_by_regex) {
-      auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
-      auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
-      auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
-      auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split);
-      return cudf::jni::convert_table_for_return(env, std::move(result));
-    } else {
-      auto result = cudf::strings::split(strings_column, cudf::string_scalar{pattern}, max_split);
-      return cudf::jni::convert_table_for_return(env, std::move(result));
-    }
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+    auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split);
+    return cudf::jni::convert_table_for_return(env, std::move(result));
   }
   CATCH_STD(env, 0);
 }
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(
+    JNIEnv *env, jclass, jlong input_handle, jstring delimiter_obj, jint limit) {
+  JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
+
+  if (limit == 0 || limit == 1) {
+    // Cannot achieve the results of splitting with limit == 0 or limit == 1.
+    // This is because cudf operates on a different parameter (`max_split`) which is converted from
+    // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an
+    // unlimited split.
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "limit == 0 and limit == 1 are not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const column_view = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*column_view};
+    auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
+    auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
+    auto const max_split = limit > 1 ? limit - 1 : limit;
+    auto result = cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split);
+    return release_as_jlong(result);
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecordRe(
     JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
-    jint capture_groups, jint limit, jboolean split_by_regex) {
+    jint capture_groups, jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -735,17 +782,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(
     auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    if (split_by_regex) {
-      auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
-      auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
-      auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
-      auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split);
-      return release_as_jlong(result);
-    } else {
-      auto result =
-          cudf::strings::split_record(strings_column, cudf::string_scalar{pattern}, max_split);
-      return release_as_jlong(result);
-    }
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+    auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split);
+    return release_as_jlong(result);
   }
   CATCH_STD(env, 0);
 }

From 5dd8f0f2d40274410a722439932f177173daca3c Mon Sep 17 00:00:00 2001
From: Cindy Jiang <cindyj@nvidia.com>
Date: Tue, 7 Feb 2023 11:46:44 -0800
Subject: [PATCH 4/7] fixed code formatting

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 java/src/main/java/ai/rapids/cudf/ColumnView.java |  4 ++--
 java/src/main/native/src/ColumnViewJni.cpp        | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 371cd25c4d1..26882d90f3a 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -4046,7 +4046,7 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
    */
   private static native long[] stringSplit(long nativeHandle, String delimiter, int limit);
-  
+
   /**
    * Returns a list of columns by splitting each string using the specified regular expression
    * pattern. The number of rows in the output columns will be the same as the input column.
@@ -4076,7 +4076,7 @@ private static native long[] stringSplitRe(long nativeHandle, String pattern, in
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
    */
   private static native long stringSplitRecord(long nativeHandle, String delimiter, int limit);
-  
+
   /**
    * Returns a column that are lists of strings in which each list is made by splitting the
    * corresponding input string using the specified regular expression pattern.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index a5f8ee14252..501db23fa85 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -679,8 +679,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(
-    JNIEnv *env, jclass, jlong input_handle, jstring delimiter_obj, jint limit) {
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *env, jclass,
+                                                                        jlong input_handle,
+                                                                        jstring delimiter_obj,
+                                                                        jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -735,8 +737,10 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe(
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(
-    JNIEnv *env, jclass, jlong input_handle, jstring delimiter_obj, jint limit) {
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv *env, jclass,
+                                                                         jlong input_handle,
+                                                                         jstring delimiter_obj,
+                                                                         jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -755,7 +759,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(
     auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
     auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result = cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split);
+    auto result =
+        cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split);
     return release_as_jlong(result);
   }
   CATCH_STD(env, 0);

From 3643269ccf7e33d064ec735c270fd4e755c8154c Mon Sep 17 00:00:00 2001
From: Cindy Jiang <cindyj@nvidia.com>
Date: Tue, 7 Feb 2023 12:12:32 -0800
Subject: [PATCH 5/7] removed incorrect stringsplit tests

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 .../java/ai/rapids/cudf/ColumnVectorTest.java   | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index c3af5eb7b68..99c3b25a4de 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4989,7 +4989,6 @@ void testReverseList() {
   @Test
   void testStringSplit() {
     String pattern = " ";
-    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
              "ARé some things", "test strings here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
@@ -5000,17 +4999,11 @@ void testStringSplit() {
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there", null, null, null, "some", "strings")
              .column("all", null, null, null, "things", "here")
-             .build()) {
-      try (Table resultSplitLimit2 = v.stringSplit(pattern, 2);
-           Table resultSplitAll = v.stringSplit(pattern)) {
-        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
-      }
-      try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2);
-           Table resultSplitAll = v.stringSplit(regexProg)) {
-        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
-      }
+             .build();
+          Table resultSplitLimit2 = v.stringSplit(pattern, 2);
+          Table resultSplitAll = v.stringSplit(pattern)) {
+      assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+      assertTablesAreEqual(expectedSplitAll, resultSplitAll);
     }
   }
 

From 70de2b5aa03c88b9858dafda60420a2b02908ab9 Mon Sep 17 00:00:00 2001
From: Cindy Jiang <cindyj@nvidia.com>
Date: Tue, 7 Feb 2023 12:14:28 -0800
Subject: [PATCH 6/7] fixed indentation

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 99c3b25a4de..ab4baf74277 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -5000,8 +5000,8 @@ void testStringSplit() {
              .column("there", null, null, null, "some", "strings")
              .column("all", null, null, null, "things", "here")
              .build();
-          Table resultSplitLimit2 = v.stringSplit(pattern, 2);
-          Table resultSplitAll = v.stringSplit(pattern)) {
+         Table resultSplitLimit2 = v.stringSplit(pattern, 2);
+         Table resultSplitAll = v.stringSplit(pattern)) {
       assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
       assertTablesAreEqual(expectedSplitAll, resultSplitAll);
     }

From 465ea42dcb7e4986696ddeab367ec6cd618e5ddb Mon Sep 17 00:00:00 2001
From: Cindy Jiang <cindyj@nvidia.com>
Date: Tue, 7 Feb 2023 14:05:16 -0800
Subject: [PATCH 7/7] fixed variable naming and comments

Signed-off-by: Cindy Jiang <cindyj@nvidia.com>
---
 .../src/main/java/ai/rapids/cudf/ColumnView.java |  8 ++++----
 java/src/main/native/src/ColumnViewJni.cpp       | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 26882d90f3a..2d0bf28225f 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2543,7 +2543,7 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex)
   /**
    * Returns a list of columns by splitting each string using the specified regex program pattern.
    * The number of rows in the output columns will be the same as the input column. Null entries
-   * are added for a row where split results have been exhausted. Null input entries result in
+   * are added for the rows where split results have been exhausted. Null input entries result in
    * all nulls in the corresponding rows of the output columns.
    *
    * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
@@ -2612,7 +2612,7 @@ public final Table stringSplit(String delimiter) {
   /**
    * Returns a list of columns by splitting each string using the specified regex program pattern.
    * The number of rows in the output columns will be the same as the input column. Null entries
-   * are added for a row where split results have been exhausted. Null input entries result in
+   * are added for the rows where split results have been exhausted. Null input entries result in
    * all nulls in the corresponding rows of the output columns.
    *
    * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
@@ -4036,7 +4036,7 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
   /**
    * Returns a list of columns by splitting each string using the specified string literal
    * delimiter. The number of rows in the output columns will be the same as the input column.
-   * Null entries are added for a row where split results have been exhausted. Null input entries
+   * Null entries are added for the rows where split results have been exhausted. Null input entries
    * result in all nulls in the corresponding rows of the output columns.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
@@ -4050,7 +4050,7 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
   /**
    * Returns a list of columns by splitting each string using the specified regular expression
    * pattern. The number of rows in the output columns will be the same as the input column.
-   * Null entries are added for a row where split results have been exhausted. Null input entries
+   * Null entries are added for the rows where split results have been exhausted. Null input entries
    * result in all nulls in the corresponding rows of the output columns.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 501db23fa85..958efd364ed 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -696,8 +696,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const column_view = reinterpret_cast<cudf::column_view const *>(input_handle);
-    auto const strings_column = cudf::strings_column_view{*column_view};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
     auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
     auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
@@ -723,8 +723,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe(
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const column_view = reinterpret_cast<cudf::column_view const *>(input_handle);
-    auto const strings_column = cudf::strings_column_view{*column_view};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
     auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
@@ -754,8 +754,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const column_view = reinterpret_cast<cudf::column_view const *>(input_handle);
-    auto const strings_column = cudf::strings_column_view{*column_view};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
     auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
     auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
@@ -782,8 +782,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecordRe(
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const column_view = reinterpret_cast<cudf::column_view const *>(input_handle);
-    auto const strings_column = cudf::strings_column_view{*column_view};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
     auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;