Skip to content

Commit

Permalink
Add JNI for strings::code_points (#14533)
Browse files Browse the repository at this point in the history
This implements JNI work for strings::code_points to expose the API to Java usage. 

It will be useful for NVIDIA/spark-rapids#9585

Authors:
  - Haoyang Li (https://github.com/thirtiseven)
  - Chong Gao (https://github.com/res-life)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: #14533
  • Loading branch information
thirtiseven authored Dec 12, 2023
1 parent 2318548 commit f8e891f
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 0 deletions.
12 changes: 12 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,16 @@ public final ColumnVector getByteCount() {
return new ColumnVector(byteCount(getNativeView()));
}

/**
* Get the code point values (integers) for each character of each string.
*
* @return ColumnVector, with code point integer values for each character as INT32
*/
public final ColumnVector codePoints() {
assert type.equals(DType.STRING) : "type has to be a String";
return new ColumnVector(codePoints(getNativeView()));
}

/**
* Get the number of elements for each list. Null lists will have a value of null.
* @return the number of elements in each list as an INT32 value.
Expand Down Expand Up @@ -4510,6 +4520,8 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat

private static native long byteCount(long viewHandle) throws CudfException;

private static native long codePoints(long viewHandle);

private static native long extractListElement(long nativeView, int index);

private static native long extractListElementV(long nativeView, long indicesView);
Expand Down
11 changes: 11 additions & 0 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,17 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_byteCount(JNIEnv *env, jc
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_codePoints(JNIEnv *env, jclass clazz,
jlong view_handle) {
JNI_NULL_CHECK(env, view_handle, "input column is null", 0);
try {
cudf::jni::auto_set_device(env);
auto const input = reinterpret_cast<cudf::column_view const *>(view_handle);
return release_as_jlong(cudf::strings::code_points(cudf::strings_column_view{*input}));
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_findAndReplaceAll(JNIEnv *env, jclass clazz,
jlong old_values_handle,
jlong new_values_handle,
Expand Down
9 changes: 9 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2169,6 +2169,15 @@ void testGetByteCount() {
}
}

@Test
void testCodePoints() {
try (ColumnVector cv = ColumnVector.fromStrings("eee", "bb", null, "", "aa", "bbb", "ééé");
ColumnVector codePoints = cv.codePoints();
ColumnVector expected = ColumnVector.fromBoxedInts(101, 101, 101, 98, 98, 97, 97, 98, 98, 98, 50089, 50089, 50089)) {
assertColumnsAreEqual(expected, codePoints);
}
}

@Test
void testEmptyStringColumnOpts() {
try (ColumnVector cv = ColumnVector.fromStrings()) {
Expand Down

0 comments on commit f8e891f

Please sign in to comment.