From 39f5846a765e8f327c364bf2e8b22cfa12470f2c Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Thu, 1 Feb 2024 14:48:21 -0800 Subject: [PATCH 1/5] add options to getJSONObject --- .../main/java/ai/rapids/cudf/ColumnView.java | 7 +-- .../ai/rapids/cudf/GetJsonObjectOptions.java | 45 +++++++++++++++++++ java/src/main/native/src/ColumnViewJni.cpp | 10 +++-- 3 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 8eabed7f364..b4e42d9de73 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2988,11 +2988,12 @@ public final ColumnVector repeatStrings(ColumnView repeatTimes) { * Note: Only implements the operators: $ . [] * * * @param path The JSONPath string to be applied to each row + * @param path The GetJsonObjectOptions to control get_json_object behaviour * @return new strings ColumnVector containing the retrieved json object strings */ - public final ColumnVector getJSONObject(Scalar path) { + public final ColumnVector getJSONObject(Scalar path, GetJsonObjectOptions options) { assert(type.equals(DType.STRING)) : "column type must be a String"; - return new ColumnVector(getJSONObject(getNativeView(), path.getScalarHandle())); + return new ColumnVector(getJSONObject(getNativeView(), path.getScalarHandle(), options.isAllowSingleQuotes(), options.isStripQuotesFromSingleStrings(), options.isMissingFieldsAsNulls())); } /** @@ -4194,7 +4195,7 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle long repeatTimesHandle); - private static native long getJSONObject(long viewHandle, long scalarHandle) throws CudfException; + private static native long getJSONObject(long viewHandle, long scalarHandle, boolean allowSingleQuotes, boolean stripQuotesFromSingleStrings, boolean missingFieldsAsNulls) throws CudfException; /** * Native method to parse and convert a timestamp column vector to string column vector. A unix diff --git a/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java b/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java new file mode 100644 index 00000000000..32c4f47154f --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java @@ -0,0 +1,45 @@ +package ai.rapids.cudf; + +public class GetJsonObjectOptions { + private boolean allowSingleQuotes; + private boolean stripQuotesFromSingleStrings; + private boolean missingFieldsAsNulls; + + // Constructor with parameters to set boolean values + public GetJsonObjectOptions(boolean allowSingleQuotes, boolean stripQuotesFromSingleStrings, boolean missingFieldsAsNulls) { + this.allowSingleQuotes = allowSingleQuotes; + this.stripQuotesFromSingleStrings = stripQuotesFromSingleStrings; + this.missingFieldsAsNulls = missingFieldsAsNulls; + } + + public GetJsonObjectOptions() { + this(false, true, false); // Calls parameterized constructor with default values + } + + // Getter and setter methods for allowSingleQuotes + public boolean isAllowSingleQuotes() { + return allowSingleQuotes; + } + + public void setAllowSingleQuotes(boolean allowSingleQuotes) { + this.allowSingleQuotes = allowSingleQuotes; + } + + // Getter and setter methods for stripQuotesFromSingleStrings + public boolean isStripQuotesFromSingleStrings() { + return stripQuotesFromSingleStrings; + } + + public void setStripQuotesFromSingleStrings(boolean stripQuotesFromSingleStrings) { + this.stripQuotesFromSingleStrings = stripQuotesFromSingleStrings; + } + + // Getter and setter methods for missingFieldsAsNulls + public boolean isMissingFieldsAsNulls() { + return missingFieldsAsNulls; + } + + public void setMissingFieldsAsNulls(boolean missingFieldsAsNulls) { + this.missingFieldsAsNulls = missingFieldsAsNulls; + } +} diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 47dc802cd49..7bc361e795c 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2436,9 +2436,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyColumnViewToCV(JNIEnv CATCH_STD(env, 0) } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject(JNIEnv *env, jclass, - jlong j_view_handle, - jlong j_scalar_handle) { +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject( + JNIEnv *env, jclass, jlong j_view_handle, jlong j_scalar_handle, jboolean allow_single_quotes, + jboolean strip_quotes_from_single_strings, jboolean missing_fields_as_nulls) { JNI_NULL_CHECK(env, j_view_handle, "view cannot be null", 0); JNI_NULL_CHECK(env, j_scalar_handle, "path cannot be null", 0); @@ -2448,7 +2448,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject(JNIEnv *env cudf::column_view *n_column_view = reinterpret_cast(j_view_handle); cudf::strings_column_view n_strings_col_view(*n_column_view); cudf::string_scalar *n_scalar_path = reinterpret_cast(j_scalar_handle); - return release_as_jlong(cudf::get_json_object(n_strings_col_view, *n_scalar_path)); + const auto options = get_json_object_options{ + allow_single_quotes, strip_quotes_from_single_strings, missing_fields_as_nulls}; + return release_as_jlong(cudf::get_json_object(n_strings_col_view, *n_scalar_path, options)); } CATCH_STD(env, 0) } From 721b5369447e1e8af027f50c5102ad67c821657c Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Thu, 1 Feb 2024 15:12:02 -0800 Subject: [PATCH 2/5] fix errors --- .../ai/rapids/cudf/GetJsonObjectOptions.java | 26 +++++++++++++------ java/src/main/native/src/ColumnViewJni.cpp | 6 +++-- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java b/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java index 32c4f47154f..1dd70128ef0 100644 --- a/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java +++ b/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java @@ -1,3 +1,21 @@ +/* + * + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + package ai.rapids.cudf; public class GetJsonObjectOptions { @@ -5,18 +23,12 @@ public class GetJsonObjectOptions { private boolean stripQuotesFromSingleStrings; private boolean missingFieldsAsNulls; - // Constructor with parameters to set boolean values public GetJsonObjectOptions(boolean allowSingleQuotes, boolean stripQuotesFromSingleStrings, boolean missingFieldsAsNulls) { this.allowSingleQuotes = allowSingleQuotes; this.stripQuotesFromSingleStrings = stripQuotesFromSingleStrings; this.missingFieldsAsNulls = missingFieldsAsNulls; } - public GetJsonObjectOptions() { - this(false, true, false); // Calls parameterized constructor with default values - } - - // Getter and setter methods for allowSingleQuotes public boolean isAllowSingleQuotes() { return allowSingleQuotes; } @@ -25,7 +37,6 @@ public void setAllowSingleQuotes(boolean allowSingleQuotes) { this.allowSingleQuotes = allowSingleQuotes; } - // Getter and setter methods for stripQuotesFromSingleStrings public boolean isStripQuotesFromSingleStrings() { return stripQuotesFromSingleStrings; } @@ -34,7 +45,6 @@ public void setStripQuotesFromSingleStrings(boolean stripQuotesFromSingleStrings this.stripQuotesFromSingleStrings = stripQuotesFromSingleStrings; } - // Getter and setter methods for missingFieldsAsNulls public boolean isMissingFieldsAsNulls() { return missingFieldsAsNulls; } diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 7bc361e795c..1c4eb8a83ab 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2448,8 +2448,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject( cudf::column_view *n_column_view = reinterpret_cast(j_view_handle); cudf::strings_column_view n_strings_col_view(*n_column_view); cudf::string_scalar *n_scalar_path = reinterpret_cast(j_scalar_handle); - const auto options = get_json_object_options{ - allow_single_quotes, strip_quotes_from_single_strings, missing_fields_as_nulls}; + auto options = cudf::get_json_object_options{}; + options.set_allow_single_quotes(allow_single_quotes); + options.set_strip_quotes_from_single_strings(strip_quotes_from_single_strings); + options.set_missing_fields_as_nulls(missing_fields_as_nulls); return release_as_jlong(cudf::get_json_object(n_strings_col_view, *n_scalar_path, options)); } CATCH_STD(env, 0) From e4663b0074e35b476a493cbff510687b03f1b8c2 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Thu, 1 Feb 2024 19:12:44 -0800 Subject: [PATCH 3/5] use builder pattern --- .../ai/rapids/cudf/GetJsonObjectOptions.java | 58 +++++++++++++------ .../java/ai/rapids/cudf/ColumnVectorTest.java | 18 +++++- 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java b/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java index 1dd70128ef0..5f9a174b2d3 100644 --- a/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java +++ b/java/src/main/java/ai/rapids/cudf/GetJsonObjectOptions.java @@ -18,38 +18,58 @@ package ai.rapids.cudf; -public class GetJsonObjectOptions { - private boolean allowSingleQuotes; - private boolean stripQuotesFromSingleStrings; - private boolean missingFieldsAsNulls; - - public GetJsonObjectOptions(boolean allowSingleQuotes, boolean stripQuotesFromSingleStrings, boolean missingFieldsAsNulls) { - this.allowSingleQuotes = allowSingleQuotes; - this.stripQuotesFromSingleStrings = stripQuotesFromSingleStrings; - this.missingFieldsAsNulls = missingFieldsAsNulls; +public final class GetJsonObjectOptions { + + public static GetJsonObjectOptions DEFAULT = new GetJsonObjectOptions.Builder().build(); + + private final boolean allowSingleQuotes; + private final boolean stripQuotesFromSingleStrings; + private final boolean missingFieldsAsNulls; + + private GetJsonObjectOptions(Builder builder) { + this.allowSingleQuotes = builder.allowSingleQuotes; + this.stripQuotesFromSingleStrings = builder.stripQuotesFromSingleStrings; + this.missingFieldsAsNulls = builder.missingFieldsAsNulls; } public boolean isAllowSingleQuotes() { return allowSingleQuotes; } - public void setAllowSingleQuotes(boolean allowSingleQuotes) { - this.allowSingleQuotes = allowSingleQuotes; - } - public boolean isStripQuotesFromSingleStrings() { return stripQuotesFromSingleStrings; } - public void setStripQuotesFromSingleStrings(boolean stripQuotesFromSingleStrings) { - this.stripQuotesFromSingleStrings = stripQuotesFromSingleStrings; - } - public boolean isMissingFieldsAsNulls() { return missingFieldsAsNulls; } - public void setMissingFieldsAsNulls(boolean missingFieldsAsNulls) { - this.missingFieldsAsNulls = missingFieldsAsNulls; + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private boolean allowSingleQuotes = false; + private boolean stripQuotesFromSingleStrings = true; + private boolean missingFieldsAsNulls = false; + + public Builder allowSingleQuotes(boolean allowSingleQuotes) { + this.allowSingleQuotes = allowSingleQuotes; + return this; + } + + public Builder stripQuotesFromSingleStrings(boolean stripQuotesFromSingleStrings) { + this.stripQuotesFromSingleStrings = stripQuotesFromSingleStrings; + return this; + } + + public Builder missingFieldsAsNulls(boolean missingFieldsAsNulls) { + this.missingFieldsAsNulls = missingFieldsAsNulls; + return this; + } + + public GetJsonObjectOptions build() { + return new GetJsonObjectOptions(this); + } } } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index dfead3716ee..aa284bdea34 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6379,16 +6379,32 @@ void testGetJSONObject() { " }\n" + "}"; + try (ColumnVector json = ColumnVector.fromStrings(jsonString, jsonString); ColumnVector expectedAuthors = ColumnVector.fromStrings("[\"Nigel Rees\",\"Evelyn " + "Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]", "[\"Nigel Rees\",\"Evelyn " + "Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]"); Scalar path = Scalar.fromString("$.store.book[*].author"); - ColumnVector gotAuthors = json.getJSONObject(path)) { + ColumnVector gotAuthors = json.getJSONObject(path, GetJsonObjectOptions.DEFAULT)) { assertColumnsAreEqual(expectedAuthors, gotAuthors); } } + @Test + void testGetJSONObjectWithSingleQuotes() { + String jsonString = "{" + + "\'a\': \'A\"\'" + + "}"; + + GetJsonObjectOptions options = GetJsonObjectOptions.builder().allowSingleQuotes(true).build(); + try (ColumnVector json = ColumnVector.fromStrings(jsonString, jsonString); + ColumnVector expectedAuthors = ColumnVector.fromStrings("A\"", "A\""); + Scalar path = Scalar.fromString("$.a"); + ColumnVector gotAuthors = json.getJSONObject(path, options)) { + assertColumnsAreEqual(expectedAuthors, gotAuthors); + } +} + @Test void testMakeStructEmpty() { final int numRows = 10; From 60614f579d315c233bf5c296b8a9ace908cf5963 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Thu, 8 Feb 2024 13:54:24 -0800 Subject: [PATCH 4/5] add backwards compatibility Signed-off-by: Suraj Aralihalli --- .../main/java/ai/rapids/cudf/ColumnView.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index b4e42d9de73..997ff77bae3 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2996,6 +2996,23 @@ public final ColumnVector getJSONObject(Scalar path, GetJsonObjectOptions option return new ColumnVector(getJSONObject(getNativeView(), path.getScalarHandle(), options.isAllowSingleQuotes(), options.isStripQuotesFromSingleStrings(), options.isMissingFieldsAsNulls())); } + /** + * Apply a JSONPath string to all rows in an input strings column. + * + * Applies a JSONPath string to an incoming strings column where each row in the column + * is a valid json string. The output is returned by row as a strings column. + * + * For reference, https://tools.ietf.org/id/draft-goessner-dispatch-jsonpath-00.html + * Note: Only implements the operators: $ . [] * + * + * @param path The JSONPath string to be applied to each row + * @return new strings ColumnVector containing the retrieved json object strings + */ + public final ColumnVector getJSONObject(Scalar path) { + assert(type.equals(DType.STRING)) : "column type must be a String"; + return getJSONObject(path, GetJsonObjectOptions.DEFAULT); + } + /** * Returns a new strings column where target string within each string is replaced with the specified * replacement string. From 9b1aef9a6e779a83f819d091dbf3f0ebe3a75593 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Thu, 8 Feb 2024 13:58:36 -0800 Subject: [PATCH 5/5] modify test to use old getJSONObject Signed-off-by: Suraj Aralihalli --- java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index aa284bdea34..75573046af2 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6385,7 +6385,7 @@ void testGetJSONObject() { "Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]", "[\"Nigel Rees\",\"Evelyn " + "Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]"); Scalar path = Scalar.fromString("$.store.book[*].author"); - ColumnVector gotAuthors = json.getJSONObject(path, GetJsonObjectOptions.DEFAULT)) { + ColumnVector gotAuthors = json.getJSONObject(path)) { assertColumnsAreEqual(expectedAuthors, gotAuthors); } }