Skip to content

Commit

Permalink
JNI support for capitalize (#8624)
Browse files Browse the repository at this point in the history
Add JNI for the string operator `capitalize`.

Signed-off-by: Firestarman <[email protected]>

Authors:
  - Liangcai Li (https://github.com/firestarman)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Jason Lowe (https://github.com/jlowe)
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: #8624
  • Loading branch information
firestarman authored Jul 2, 2021
1 parent a0b0eab commit fba09e6
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 0 deletions.
29 changes: 29 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -1497,6 +1497,33 @@ public final ColumnVector toTitle() {
assert type.equals(DType.STRING);
return new ColumnVector(title(getNativeView()));
}

/**
* Returns a column of capitalized strings.
*
* If the `delimiters` is an empty string, then only the first character of each
* row is capitalized. Otherwise, a non-delimiter character is capitalized after
* any delimiter character is found.
*
* Example:
* input = ["tesT1", "a Test", "Another Test", "a\tb"];
* delimiters = ""
* output is ["Test1", "A test", "Another test", "A\tb"]
* delimiters = " "
* output is ["Test1", "A Test", "Another Test", "A\tb"]
*
* Any null string entries return corresponding null output column entries.
*
* @param delimiters Used if identifying words to capitalize. Should not be null.
* @return a column of capitalized strings. Users should close the returned column.
*/
public final ColumnVector capitalize(Scalar delimiters) {
if (DType.STRING.equals(type) && DType.STRING.equals(delimiters.getType())) {
return new ColumnVector(capitalize(getNativeView(), delimiters.getScalarHandle()));
}
throw new IllegalArgumentException("Both input column and delimiters scalar should be" +
" string type. But got column: " + type + ", scalar: " + delimiters.getType());
}
/////////////////////////////////////////////////////////////////////////////
// TYPE CAST
/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -3322,6 +3349,8 @@ private static native long clamper(long nativeView, long loScalarHandle, long lo

protected static native long title(long handle);

private static native long capitalize(long strsColHandle, long delimitersHandle);

private static native long makeStructView(long[] handles, long rowCount);

private static native long isTimestamp(long nativeView, String format);
Expand Down
17 changes: 17 additions & 0 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1793,6 +1793,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_title(JNIEnv *env, jobjec
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_capitalize(JNIEnv *env, jobject j_object,
jlong strs_handle,
jlong delimiters_handle) {

JNI_NULL_CHECK(env, strs_handle, "native view handle is null", 0)
JNI_NULL_CHECK(env, delimiters_handle, "delimiters scalar handle is null", 0)

try {
cudf::jni::auto_set_device(env);
cudf::column_view *view = reinterpret_cast<cudf::column_view *>(strs_handle);
cudf::string_scalar *deli = reinterpret_cast<cudf::string_scalar *>(delimiters_handle);
std::unique_ptr<cudf::column> result = cudf::strings::capitalize(*view, *deli);
return reinterpret_cast<jlong>(result.release());
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_makeStructView(JNIEnv *env, jobject j_object,
jlongArray handles,
jlong row_count) {
Expand Down
25 changes: 25 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4458,6 +4458,31 @@ void testStringTitlize() {
}
}

@Test
void testStringCapitalize() {
try (ColumnVector cv = ColumnVector.fromStrings("s Park", "S\nqL", "lower \tcase",
null, "", "UPPER\rCASE")) {
try (Scalar deli = Scalar.fromString("");
ColumnVector result = cv.capitalize(deli);
ColumnVector expected = ColumnVector.fromStrings("S park", "S\nql", "Lower \tcase",
null, "", "Upper\rcase")) {
assertColumnsAreEqual(expected, result);
}
try (Scalar deli = Scalar.fromString(" ");
ColumnVector result = cv.capitalize(deli);
ColumnVector expected = ColumnVector.fromStrings("S Park", "S\nql", "Lower \tcase",
null, "", "Upper\rcase")) {
assertColumnsAreEqual(expected, result);
}
try (Scalar deli = Scalar.fromString(" \t\n");
ColumnVector result = cv.capitalize(deli);
ColumnVector expected = ColumnVector.fromStrings("S Park", "S\nQl", "Lower \tCase",
null, "", "Upper\rcase")) {
assertColumnsAreEqual(expected, result);
}
}
}

@Test
void testNansToNulls() {
Float[] floats = new Float[]{1.2f, Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY, null,
Expand Down

0 comments on commit fba09e6

Please sign in to comment.