rapidsai · rapids-bot · Dec 12, 2023 · Nov 30, 2023 · Dec 1, 2023 · Dec 5, 2023
@@ -373,6 +373,16 @@ public final ColumnVector getByteCount() {
     return new ColumnVector(byteCount(getNativeView()));
   }
 
+  /**
+   * Get the code point values (integers) for each character of each string.
+   * 
+   * @return ColumnVector, with code point integer values for each character as INT32
+   */
+  public final ColumnVector codePoints() {
+    assert type.equals(DType.STRING) : "type has to be a String";
+    return new ColumnVector(codePoints(getNativeView()));
+  }
+
   /**
    * Get the number of elements for each list. Null lists will have a value of null.
    * @return the number of elements in each list as an INT32 value.
@@ -4510,6 +4520,8 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat
 
   private static native long byteCount(long viewHandle) throws CudfException;
 
+  private static native long codePoints(long viewHandle);
+
   private static native long extractListElement(long nativeView, int index);
 
   private static native long extractListElementV(long nativeView, long indicesView);

@@ -895,6 +895,17 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_byteCount(JNIEnv *env, jc
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_codePoints(JNIEnv *env, jclass clazz,
+                                                                  jlong view_handle) {
+  JNI_NULL_CHECK(env, view_handle, "input column is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const*>(view_handle);
+    return release_as_jlong(cudf::strings::code_points(cudf::strings_column_view{*input}));
+  }
+  CATCH_STD(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_findAndReplaceAll(JNIEnv *env, jclass clazz,
                                                                          jlong old_values_handle,
                                                                          jlong new_values_handle,

@@ -2169,6 +2169,15 @@ void testGetByteCount() {
     }
   }
 
+  @Test
+  void testCodePoints() {
+    try (ColumnVector cv = ColumnVector.fromStrings("eee", "bb", null, "", "aa", "bbb", "ééé");
+         ColumnVector codePoints = cv.codePoints();
+         ColumnVector expected = ColumnVector.fromBoxedInts(101, 101, 101, 98, 98, 97, 97, 98, 98, 98, 50089, 50089, 50089)) {
+      assertColumnsAreEqual(expected, codePoints);
+    }
+  }
+
   @Test
   void testEmptyStringColumnOpts() {
     try (ColumnVector cv = ColumnVector.fromStrings()) {