Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce GetJsonObjectOptions in getJSONObject Java API #14956

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -2978,6 +2978,24 @@ public final ColumnVector repeatStrings(ColumnView repeatTimes) {
repeatTimes.getNativeView()));
}

/**
* Apply a JSONPath string to all rows in an input strings column.
*
* Applies a JSONPath string to an incoming strings column where each row in the column
* is a valid json string. The output is returned by row as a strings column.
*
* For reference, https://tools.ietf.org/id/draft-goessner-dispatch-jsonpath-00.html
* Note: Only implements the operators: $ . [] *
*
* @param path The JSONPath string to be applied to each row
* @param path The GetJsonObjectOptions to control get_json_object behaviour
* @return new strings ColumnVector containing the retrieved json object strings
*/
public final ColumnVector getJSONObject(Scalar path, GetJsonObjectOptions options) {
assert(type.equals(DType.STRING)) : "column type must be a String";
return new ColumnVector(getJSONObject(getNativeView(), path.getScalarHandle(), options.isAllowSingleQuotes(), options.isStripQuotesFromSingleStrings(), options.isMissingFieldsAsNulls()));
}

/**
* Apply a JSONPath string to all rows in an input strings column.
*
Expand All @@ -2992,7 +3010,7 @@ public final ColumnVector repeatStrings(ColumnView repeatTimes) {
*/
public final ColumnVector getJSONObject(Scalar path) {
assert(type.equals(DType.STRING)) : "column type must be a String";
return new ColumnVector(getJSONObject(getNativeView(), path.getScalarHandle()));
return getJSONObject(path, GetJsonObjectOptions.DEFAULT);
}

/**
Expand Down Expand Up @@ -4194,7 +4212,7 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
long repeatTimesHandle);


private static native long getJSONObject(long viewHandle, long scalarHandle) throws CudfException;
private static native long getJSONObject(long viewHandle, long scalarHandle, boolean allowSingleQuotes, boolean stripQuotesFromSingleStrings, boolean missingFieldsAsNulls) throws CudfException;

/**
* Native method to parse and convert a timestamp column vector to string column vector. A unix
Expand Down
75 changes: 75 additions & 0 deletions java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package ai.rapids.cudf;

public final class GetJsonObjectOptions {

public static GetJsonObjectOptions DEFAULT = new GetJsonObjectOptions.Builder().build();

private final boolean allowSingleQuotes;
private final boolean stripQuotesFromSingleStrings;
private final boolean missingFieldsAsNulls;

private GetJsonObjectOptions(Builder builder) {
this.allowSingleQuotes = builder.allowSingleQuotes;
this.stripQuotesFromSingleStrings = builder.stripQuotesFromSingleStrings;
this.missingFieldsAsNulls = builder.missingFieldsAsNulls;
}

public boolean isAllowSingleQuotes() {
return allowSingleQuotes;
}

public boolean isStripQuotesFromSingleStrings() {
return stripQuotesFromSingleStrings;
}

public boolean isMissingFieldsAsNulls() {
return missingFieldsAsNulls;
}

public static Builder builder() {
return new Builder();
}

public static final class Builder {
private boolean allowSingleQuotes = false;
private boolean stripQuotesFromSingleStrings = true;
private boolean missingFieldsAsNulls = false;

public Builder allowSingleQuotes(boolean allowSingleQuotes) {
this.allowSingleQuotes = allowSingleQuotes;
return this;
}

public Builder stripQuotesFromSingleStrings(boolean stripQuotesFromSingleStrings) {
this.stripQuotesFromSingleStrings = stripQuotesFromSingleStrings;
return this;
}

public Builder missingFieldsAsNulls(boolean missingFieldsAsNulls) {
this.missingFieldsAsNulls = missingFieldsAsNulls;
return this;
}

public GetJsonObjectOptions build() {
return new GetJsonObjectOptions(this);
}
}
}
12 changes: 8 additions & 4 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2436,9 +2436,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyColumnViewToCV(JNIEnv
CATCH_STD(env, 0)
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject(JNIEnv *env, jclass,
jlong j_view_handle,
jlong j_scalar_handle) {
JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject(
JNIEnv *env, jclass, jlong j_view_handle, jlong j_scalar_handle, jboolean allow_single_quotes,
jboolean strip_quotes_from_single_strings, jboolean missing_fields_as_nulls) {

JNI_NULL_CHECK(env, j_view_handle, "view cannot be null", 0);
JNI_NULL_CHECK(env, j_scalar_handle, "path cannot be null", 0);
Expand All @@ -2448,7 +2448,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject(JNIEnv *env
cudf::column_view *n_column_view = reinterpret_cast<cudf::column_view *>(j_view_handle);
cudf::strings_column_view n_strings_col_view(*n_column_view);
cudf::string_scalar *n_scalar_path = reinterpret_cast<cudf::string_scalar *>(j_scalar_handle);
return release_as_jlong(cudf::get_json_object(n_strings_col_view, *n_scalar_path));
auto options = cudf::get_json_object_options{};
options.set_allow_single_quotes(allow_single_quotes);
options.set_strip_quotes_from_single_strings(strip_quotes_from_single_strings);
options.set_missing_fields_as_nulls(missing_fields_as_nulls);
SurajAralihalli marked this conversation as resolved.
Show resolved Hide resolved
return release_as_jlong(cudf::get_json_object(n_strings_col_view, *n_scalar_path, options));
}
CATCH_STD(env, 0)
}
Expand Down
16 changes: 16 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6379,6 +6379,7 @@ void testGetJSONObject() {
" }\n" +
"}";


try (ColumnVector json = ColumnVector.fromStrings(jsonString, jsonString);
ColumnVector expectedAuthors = ColumnVector.fromStrings("[\"Nigel Rees\",\"Evelyn " +
"Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]", "[\"Nigel Rees\",\"Evelyn " +
Expand All @@ -6389,6 +6390,21 @@ void testGetJSONObject() {
}
}

@Test
void testGetJSONObjectWithSingleQuotes() {
String jsonString = "{" +
"\'a\': \'A\"\'" +
"}";

GetJsonObjectOptions options = GetJsonObjectOptions.builder().allowSingleQuotes(true).build();
try (ColumnVector json = ColumnVector.fromStrings(jsonString, jsonString);
ColumnVector expectedAuthors = ColumnVector.fromStrings("A\"", "A\"");
Scalar path = Scalar.fromString("$.a");
ColumnVector gotAuthors = json.getJSONObject(path, options)) {
assertColumnsAreEqual(expectedAuthors, gotAuthors);
}
}

@Test
void testMakeStructEmpty() {
final int numRows = 10;
Expand Down
Loading