From 48ae91159375808a516feff113840c059f0a17ba Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Thu, 10 Nov 2022 16:49:32 +0800 Subject: [PATCH 1/5] Add JNI for substring without 'end' parameter. cudf substring has its own behavior when end is -1, which is different from that in JNI, so JNI should not override it. Signed-off-by: Liangcai Li --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 14 +++++++++++--- java/src/main/native/src/ColumnViewJni.cpp | 15 +++++++++++++-- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index e639320b028..d18f13ed717 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2632,12 +2632,13 @@ public final ColumnVector stringSplitRecord(String delimiter) { /** * Returns a new strings column that contains substrings of the strings in the provided column. - * Overloading subString to support if end index is not provided. Appending -1 to indicate to - * read until end of string. + * The character positions to retrieve in each string are `[start, )`.. + * * @param start first character index to begin the substring(inclusive). */ public final ColumnVector substring(int start) { - return substring(start, -1); + assert type.equals(DType.STRING) : "column type must be a String"; + return new ColumnVector(substringS(getNativeView(), start)); } /** @@ -3983,6 +3984,13 @@ private static native long stringSplitRecord(long nativeHandle, String pattern, */ private static native long substring(long columnView, int start, int end) throws CudfException; + /** + * Native method to calculate substring from a given string column. 0 indexing. + * @param columnView native handle of the cudf::column_view being operated on. + * @param start first character index to begin the substring(inclusive). + */ + private static native long substringS(long columnView, int start) throws CudfException; + /** * Native method to calculate substring from a given string column. * @param columnView native handle of the cudf::column_view being operated on. diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 9d442772261..5bbc78a347b 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1397,6 +1397,18 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVS(JNIEnv *env, j CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substringS(JNIEnv *env, jclass, + jlong column_view, jint start) { + JNI_NULL_CHECK(env, column_view, "column is null", 0); + try { + cudf::jni::auto_set_device(env); + cudf::column_view *cv = reinterpret_cast(column_view); + cudf::strings_column_view scv(*cv); + return release_as_jlong(cudf::strings::slice_strings(scv, start)); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substring(JNIEnv *env, jclass, jlong column_view, jint start, jint end) { @@ -1405,8 +1417,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substring(JNIEnv *env, jc cudf::jni::auto_set_device(env); cudf::column_view *cv = reinterpret_cast(column_view); cudf::strings_column_view scv(*cv); - return release_as_jlong((end == -1 ? cudf::strings::slice_strings(scv, start) : - cudf::strings::slice_strings(scv, start, end))); + return release_as_jlong(cudf::strings::slice_strings(scv, start, end)); } CATCH_STD(env, 0); } From 5b92a09b8d17bf39f62834a04e4432f7b1246ac2 Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Thu, 10 Nov 2022 17:06:06 +0800 Subject: [PATCH 2/5] fix a style issue Signed-off-by: Liangcai Li --- java/src/main/native/src/ColumnViewJni.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 5bbc78a347b..d4f3ea21ba6 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1398,7 +1398,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVS(JNIEnv *env, j } JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substringS(JNIEnv *env, jclass, - jlong column_view, jint start) { + jlong column_view, jint start) { JNI_NULL_CHECK(env, column_view, "column is null", 0); try { cudf::jni::auto_set_device(env); From c029add228a743f116bf25303889aed4bcef173b Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Fri, 11 Nov 2022 09:41:02 +0800 Subject: [PATCH 3/5] Update java/src/main/java/ai/rapids/cudf/ColumnView.java Co-authored-by: Nghia Truong --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index d18f13ed717..a21ef614281 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -3987,7 +3987,7 @@ private static native long stringSplitRecord(long nativeHandle, String pattern, /** * Native method to calculate substring from a given string column. 0 indexing. * @param columnView native handle of the cudf::column_view being operated on. - * @param start first character index to begin the substring(inclusive). + * @param start first character index to begin the substrings (inclusive). */ private static native long substringS(long columnView, int start) throws CudfException; From 545dd9697af76cf359fba5114599843f336eab7c Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Fri, 11 Nov 2022 09:41:11 +0800 Subject: [PATCH 4/5] Update java/src/main/java/ai/rapids/cudf/ColumnView.java Co-authored-by: Nghia Truong --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index a21ef614281..57849b9ba0a 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -3985,7 +3985,7 @@ private static native long stringSplitRecord(long nativeHandle, String pattern, private static native long substring(long columnView, int start, int end) throws CudfException; /** - * Native method to calculate substring from a given string column. 0 indexing. + * Native method to extract substrings from a given strings column. * @param columnView native handle of the cudf::column_view being operated on. * @param start first character index to begin the substrings (inclusive). */ From e32893b822844b05498d2b020848ceee79ec1a42 Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Fri, 11 Nov 2022 09:41:17 +0800 Subject: [PATCH 5/5] Update java/src/main/native/src/ColumnViewJni.cpp Co-authored-by: Nghia Truong --- java/src/main/native/src/ColumnViewJni.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index d4f3ea21ba6..4acc14c760c 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1398,12 +1398,12 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVS(JNIEnv *env, j } JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substringS(JNIEnv *env, jclass, - jlong column_view, jint start) { - JNI_NULL_CHECK(env, column_view, "column is null", 0); + jlong cv_handle, jint start) { + JNI_NULL_CHECK(env, cv_handle, "column is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view *cv = reinterpret_cast(column_view); - cudf::strings_column_view scv(*cv); + auto const cv = reinterpret_cast(cv_handle); + auto const scv = cudf::strings_column_view{*cv}; return release_as_jlong(cudf::strings::slice_strings(scv, start)); } CATCH_STD(env, 0);