From 3894427ecd6b6682eeb2d6c542667dea00fa5e6e Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Fri, 11 Nov 2022 13:10:51 +0800 Subject: [PATCH] Add JNI for `substring` without 'end' parameter. (#12113) Authors: - Liangcai Li (https://github.com/firestarman) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/12113 --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 14 +++++++++++--- java/src/main/native/src/ColumnViewJni.cpp | 15 +++++++++++++-- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index e639320b028..57849b9ba0a 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2632,12 +2632,13 @@ public final ColumnVector stringSplitRecord(String delimiter) { /** * Returns a new strings column that contains substrings of the strings in the provided column. - * Overloading subString to support if end index is not provided. Appending -1 to indicate to - * read until end of string. + * The character positions to retrieve in each string are `[start, )`.. + * * @param start first character index to begin the substring(inclusive). */ public final ColumnVector substring(int start) { - return substring(start, -1); + assert type.equals(DType.STRING) : "column type must be a String"; + return new ColumnVector(substringS(getNativeView(), start)); } /** @@ -3983,6 +3984,13 @@ private static native long stringSplitRecord(long nativeHandle, String pattern, */ private static native long substring(long columnView, int start, int end) throws CudfException; + /** + * Native method to extract substrings from a given strings column. + * @param columnView native handle of the cudf::column_view being operated on. + * @param start first character index to begin the substrings (inclusive). + */ + private static native long substringS(long columnView, int start) throws CudfException; + /** * Native method to calculate substring from a given string column. * @param columnView native handle of the cudf::column_view being operated on. diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 9d442772261..4acc14c760c 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1397,6 +1397,18 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVS(JNIEnv *env, j CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substringS(JNIEnv *env, jclass, + jlong cv_handle, jint start) { + JNI_NULL_CHECK(env, cv_handle, "column is null", 0); + try { + cudf::jni::auto_set_device(env); + auto const cv = reinterpret_cast(cv_handle); + auto const scv = cudf::strings_column_view{*cv}; + return release_as_jlong(cudf::strings::slice_strings(scv, start)); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substring(JNIEnv *env, jclass, jlong column_view, jint start, jint end) { @@ -1405,8 +1417,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substring(JNIEnv *env, jc cudf::jni::auto_set_device(env); cudf::column_view *cv = reinterpret_cast(column_view); cudf::strings_column_view scv(*cv); - return release_as_jlong((end == -1 ? cudf::strings::slice_strings(scv, start) : - cudf::strings::slice_strings(scv, start, end))); + return release_as_jlong(cudf::strings::slice_strings(scv, start, end)); } CATCH_STD(env, 0); }