From 56fe5dbb32bb6d4f4ab954e1abea9218fed404e1 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 1 Nov 2023 09:19:29 -0400 Subject: [PATCH] Expose stream parameter to get_json_object API (#14297) Add stream parameter to public APIs `cudf::get_json_object()` Also removed the API from the `strings` namespace since it does not fit with the other strings library functions. This resulted in updating the source file locations as well. Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Karthikeyan (https://github.com/karthikeyann) - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14297 --- cpp/CMakeLists.txt | 2 +- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/{string => json}/json.cu | 4 +- cpp/include/cudf/{strings => json}/json.hpp | 13 +-- cpp/include/cudf/strings/detail/json.hpp | 43 -------- cpp/include/doxygen_groups.h | 5 +- cpp/src/{strings => }/json/json_path.cu | 19 ++-- cpp/tests/CMakeLists.txt | 5 +- cpp/tests/{strings => json}/json_tests.cpp | 108 +++++++++----------- java/src/main/native/src/ColumnViewJni.cpp | 4 +- python/cudf/cudf/_lib/cpp/strings/json.pxd | 4 +- 11 files changed, 81 insertions(+), 128 deletions(-) rename cpp/benchmarks/{string => json}/json.cu (98%) rename cpp/include/cudf/{strings => json}/json.hpp (94%) delete mode 100644 cpp/include/cudf/strings/detail/json.hpp rename cpp/src/{strings => }/json/json_path.cu (98%) rename cpp/tests/{strings => json}/json_tests.cpp (84%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f7662006cac..dc12564c656 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -440,6 +440,7 @@ add_library( src/join/mixed_join_size_kernel_nulls.cu src/join/mixed_join_size_kernels_semi.cu src/join/semi_join.cu + src/json/json_path.cu src/lists/contains.cu src/lists/combine/concatenate_list_elements.cu src/lists/combine/concatenate_rows.cu @@ -571,7 +572,6 @@ add_library( src/strings/filter_chars.cu src/strings/like.cu src/strings/padding.cu - src/strings/json/json_path.cu src/strings/regex/regcomp.cpp src/strings/regex/regexec.cpp src/strings/regex/regex_program.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index a3e2b4ed6db..6858a3fc69f 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -320,7 +320,7 @@ ConfigureNVBench( # ################################################################################################## # * json benchmark ------------------------------------------------------------------- -ConfigureBench(JSON_BENCH string/json.cu) +ConfigureBench(JSON_BENCH json/json.cu) ConfigureNVBench(FST_NVBENCH io/fst.cu) ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp) ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp) diff --git a/cpp/benchmarks/string/json.cu b/cpp/benchmarks/json/json.cu similarity index 98% rename from cpp/benchmarks/string/json.cu rename to cpp/benchmarks/json/json.cu index 7e89edf3e17..5dc30aebe38 100644 --- a/cpp/benchmarks/string/json.cu +++ b/cpp/benchmarks/json/json.cu @@ -21,9 +21,9 @@ #include #include +#include #include #include -#include #include #include #include @@ -196,7 +196,7 @@ void BM_case(benchmark::State& state, std::string query_arg) for (auto _ : state) { cuda_event_timer raii(state, true); - auto result = cudf::strings::get_json_object(scv, json_path); + auto result = cudf::get_json_object(scv, json_path); CUDF_CUDA_TRY(cudaStreamSynchronize(0)); } diff --git a/cpp/include/cudf/strings/json.hpp b/cpp/include/cudf/json/json.hpp similarity index 94% rename from cpp/include/cudf/strings/json.hpp rename to cpp/include/cudf/json/json.hpp index 8fabee6b9a5..944e0c26dd6 100644 --- a/cpp/include/cudf/strings/json.hpp +++ b/cpp/include/cudf/json/json.hpp @@ -16,16 +16,16 @@ #pragma once #include +#include #include #include namespace cudf { -namespace strings { /** - * @addtogroup strings_json + * @addtogroup json_object * @{ * @file */ @@ -155,20 +155,21 @@ class get_json_object_options { * https://tools.ietf.org/id/draft-goessner-dispatch-jsonpath-00.html * Implements only the operators: $ . [] * * + * @throw std::invalid_argument if provided an invalid operator or an empty name + * * @param col The input strings column. Each row must contain a valid json string * @param json_path The JSONPath string to be applied to each row * @param options Options for controlling the behavior of the function - * @param mr Resource for allocating device memory. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Resource for allocating device memory * @return New strings column containing the retrieved json object strings - * - * @throw std::invalid_argument if provided an invalid operator or an empty name */ std::unique_ptr get_json_object( cudf::strings_column_view const& col, cudf::string_scalar const& json_path, get_json_object_options options = get_json_object_options{}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group -} // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/detail/json.hpp b/cpp/include/cudf/strings/detail/json.hpp deleted file mode 100644 index 0fb06d36570..00000000000 --- a/cpp/include/cudf/strings/detail/json.hpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -#include - -namespace cudf { -namespace strings { -namespace detail { - -/** - * @copydoc cudf::strings::get_json_object - * - * @param stream CUDA stream used for device memory operations and kernel launches - */ -std::unique_ptr get_json_object(cudf::strings_column_view const& col, - cudf::string_scalar const& json_path, - cudf::strings::get_json_object_options options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - -} // namespace detail -} // namespace strings -} // namespace cudf diff --git a/cpp/include/doxygen_groups.h b/cpp/include/doxygen_groups.h index 4da2807bbe6..8845b84613d 100644 --- a/cpp/include/doxygen_groups.h +++ b/cpp/include/doxygen_groups.h @@ -130,7 +130,6 @@ * @defgroup strings_replace Replacing * @defgroup strings_split Splitting * @defgroup strings_extract Extracting - * @defgroup strings_json JSON * @defgroup strings_regex Regex * @} * @defgroup dictionary_apis Dictionary @@ -146,6 +145,10 @@ * @defgroup io_datasources Data Sources * @defgroup io_datasinks Data Sinks * @} + * @defgroup json_apis JSON + * @{ + * @defgroup json_object JSON Path + * @} * @defgroup lists_apis Lists * @{ * @defgroup lists_combine Combining diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/json/json_path.cu similarity index 98% rename from cpp/src/strings/json/json_path.cu rename to cpp/src/json/json_path.cu index c56752f5429..8217e34723c 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/json/json_path.cu @@ -20,9 +20,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -41,7 +41,6 @@ #include namespace cudf { -namespace strings { namespace detail { namespace { @@ -224,7 +223,9 @@ enum json_element_type { NONE, OBJECT, ARRAY, VALUE }; class json_state : private parser { public: __device__ json_state() : parser() {} - __device__ json_state(char const* _input, int64_t _input_len, get_json_object_options _options) + __device__ json_state(char const* _input, + int64_t _input_len, + cudf::get_json_object_options _options) : parser(_input, _input_len), options(_options) @@ -956,9 +957,6 @@ __launch_bounds__(block_size) __global__ } } -/** - * @copydoc cudf::strings::detail::get_json_object - */ std::unique_ptr get_json_object(cudf::strings_column_view const& col, cudf::string_scalar const& json_path, get_json_object_options options, @@ -1011,7 +1009,7 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c cudf::detail::get_value(offsets_view, col.size(), stream); // allocate output string column - auto chars = create_chars_child_column(output_size, stream, mr); + auto chars = cudf::strings::detail::create_chars_child_column(output_size, stream, mr); // potential optimization : if we know that all outputs are valid, we could skip creating // the validity mask altogether @@ -1041,17 +1039,14 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c } // namespace } // namespace detail -/** - * @copydoc cudf::strings::get_json_object - */ std::unique_ptr get_json_object(cudf::strings_column_view const& col, cudf::string_scalar const& json_path, get_json_object_options options, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::get_json_object(col, json_path, options, cudf::get_default_stream(), mr); + return detail::get_json_object(col, json_path, options, stream, mr); } -} // namespace strings } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f856d106d03..e966ef3fb04 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -523,7 +523,6 @@ ConfigureTest( strings/format_lists_tests.cpp strings/integers_tests.cpp strings/ipv4_tests.cpp - strings/json_tests.cpp strings/like_tests.cpp strings/pad_tests.cpp strings/repeat_strings_tests.cpp @@ -537,6 +536,10 @@ ConfigureTest( strings/urls_tests.cpp ) +# ################################################################################################## +# * json path test -------------------------------------------------------------------------------- +ConfigureTest(JSON_PATH_TEST json/json_tests.cpp) + # ################################################################################################## # * structs test ---------------------------------------------------------------------------------- ConfigureTest(STRUCTS_TEST structs/structs_column_tests.cpp structs/utilities_tests.cpp) diff --git a/cpp/tests/strings/json_tests.cpp b/cpp/tests/json/json_tests.cpp similarity index 84% rename from cpp/tests/strings/json_tests.cpp rename to cpp/tests/json/json_tests.cpp index d74bb9258fa..a03880eef5d 100644 --- a/cpp/tests/strings/json_tests.cpp +++ b/cpp/tests/json/json_tests.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ +#include #include -#include #include #include @@ -85,7 +85,7 @@ TEST_F(JsonPathTests, GetJsonObjectRootOp) // root cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); auto expected = drop_whitespace(input); @@ -98,7 +98,7 @@ TEST_F(JsonPathTests, GetJsonObjectChildOp) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); // clang-format off @@ -147,7 +147,7 @@ TEST_F(JsonPathTests, GetJsonObjectChildOp) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); // clang-format off @@ -193,7 +193,7 @@ TEST_F(JsonPathTests, GetJsonObjectWildcardOp) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.*"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); // clang-format off @@ -242,7 +242,7 @@ TEST_F(JsonPathTests, GetJsonObjectWildcardOp) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("*"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); // clang-format off @@ -297,7 +297,7 @@ TEST_F(JsonPathTests, GetJsonObjectSubscriptOp) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book[2]"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); // clang-format off @@ -319,7 +319,7 @@ TEST_F(JsonPathTests, GetJsonObjectSubscriptOp) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store['bicycle']"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); // clang-format off @@ -338,7 +338,7 @@ TEST_F(JsonPathTests, GetJsonObjectSubscriptOp) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book[*]"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); // clang-format off @@ -387,7 +387,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book[*]['isbn']"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); cudf::test::strings_column_wrapper expected_raw{R"(["0-553-21311-3","0-395-19395-8"])"}; @@ -399,7 +399,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book[*].category"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); cudf::test::strings_column_wrapper expected_raw{ @@ -412,7 +412,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book[*].title"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); cudf::test::strings_column_wrapper expected_raw{ @@ -425,7 +425,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book.*.price"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); cudf::test::strings_column_wrapper expected_raw{"[8.95,12.99,8.99,22.99]"}; @@ -440,7 +440,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) // spark: fiction cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book[2].category"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); cudf::test::strings_column_wrapper expected_raw{"fiction"}; @@ -457,7 +457,7 @@ TEST_F(JsonPathTests, GetJsonObjectNullInputs) cudf::test::strings_column_wrapper input({str, str, str, str}, {1, 0, 1, 0}); std::string json_path("$.a"); - auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); cudf::test::strings_column_wrapper expected_raw({"b", "", "b", ""}, {1, 0, 1, 0}); @@ -473,7 +473,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyQuery) { cudf::test::strings_column_wrapper input{R"({"a" : "b"})"}; std::string json_path(""); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -487,7 +487,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyInputsAndOutputs) { cudf::test::strings_column_wrapper input{""}; std::string json_path("$"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -500,7 +500,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyInputsAndOutputs) { cudf::test::strings_column_wrapper input{R"({"store": { "bicycle" : "" } })"}; std::string json_path("$.store.bicycle"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {1}); @@ -512,7 +512,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyInput) { cudf::test::strings_column_wrapper input{}; std::string json_path("$"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, input); } @@ -525,7 +525,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("$$"); auto query = [&]() { - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; EXPECT_THROW(query(), cudf::logic_error); } @@ -535,7 +535,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("$[auh46h-]"); auto query = [&]() { - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; EXPECT_THROW(query(), cudf::logic_error); } @@ -545,7 +545,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("$[[]]"); auto query = [&]() { - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; EXPECT_THROW(query(), cudf::logic_error); } @@ -555,7 +555,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("$[-1]"); auto query = [&]() { - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; EXPECT_THROW(query(), cudf::logic_error); } @@ -565,7 +565,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("."); auto query = [&]() { - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; EXPECT_THROW(query(), std::invalid_argument); } @@ -574,7 +574,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("]["); auto query = [&]() { - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; EXPECT_THROW(query(), std::invalid_argument); } @@ -583,7 +583,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("6hw6,56i3"); auto query = [&]() { - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; EXPECT_THROW(query(), std::invalid_argument); } @@ -596,7 +596,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("$[*].c"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -607,7 +607,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; std::string json_path("$[*].c[2]"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -618,7 +618,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book.price"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -629,7 +629,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) { cudf::test::strings_column_wrapper input{json_string}; std::string json_path("$.store.book[4]"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -672,7 +672,7 @@ TEST_F(JsonPathTests, MixedOutput) cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); { std::string json_path("$.a"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -694,7 +694,7 @@ TEST_F(JsonPathTests, MixedOutput) { std::string json_path("$.a[1]"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -713,7 +713,7 @@ TEST_F(JsonPathTests, MixedOutput) { std::string json_path("$.a.b"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -731,7 +731,7 @@ TEST_F(JsonPathTests, MixedOutput) { std::string json_path("$.a[*]"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -752,7 +752,7 @@ TEST_F(JsonPathTests, MixedOutput) { std::string json_path("$.a.b[*]"); - auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -779,13 +779,12 @@ TEST_F(JsonPathTests, StripQuotes) std::string str("{\"a\" : \"b\"}"); cudf::test::strings_column_wrapper input({str, str}); - cudf::strings::get_json_object_options options; + cudf::get_json_object_options options; options.set_strip_quotes_from_single_strings(false); std::string json_path("$.a"); - auto result_raw = - cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); - auto result = drop_whitespace(*result_raw); + auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path, options); + auto result = drop_whitespace(*result_raw); cudf::test::strings_column_wrapper expected_raw({"\"b\"", "\"b\""}); auto expected = drop_whitespace(expected_raw); @@ -798,11 +797,10 @@ TEST_F(JsonPathTests, StripQuotes) cudf::test::strings_column_wrapper input{R"({"store": { "bicycle" : "" } })"}; std::string json_path("$.store.bicycle"); - cudf::strings::get_json_object_options options; + cudf::get_json_object_options options; options.set_strip_quotes_from_single_strings(true); - auto result = - cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path, options); cudf::test::strings_column_wrapper expected({""}); @@ -859,11 +857,10 @@ TEST_F(JsonPathTests, AllowSingleQuotes) { std::string json_path("$.a"); - cudf::strings::get_json_object_options options; + cudf::get_json_object_options options; options.set_allow_single_quotes(true); - auto result = - cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path, options); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -903,11 +900,10 @@ TEST_F(JsonPathTests, StringsWithSpecialChars) { std::string json_path("$.item"); - cudf::strings::get_json_object_options options; + cudf::get_json_object_options options; options.set_allow_single_quotes(true); - auto result = - cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path, options); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -929,11 +925,10 @@ TEST_F(JsonPathTests, StringsWithSpecialChars) { std::string json_path("$.a"); - cudf::strings::get_json_object_options options; + cudf::get_json_object_options options; options.set_allow_single_quotes(true); - auto result = - cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path, options); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -962,11 +957,10 @@ TEST_F(JsonPathTests, EscapeSequences) { std::string json_path("$.a"); - cudf::strings::get_json_object_options options; + cudf::get_json_object_options options; options.set_allow_single_quotes(true); - auto result = - cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); + auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path, options); // clang-format off cudf::test::strings_column_wrapper expected({ @@ -998,12 +992,12 @@ TEST_F(JsonPathTests, MissingFieldsAsNulls) auto const& missing_fields_output, bool default_valid = true) { cudf::test::strings_column_wrapper input{input_string}; - cudf::strings::get_json_object_options options; + cudf::get_json_object_options options; // Test default behavior options.set_missing_fields_as_nulls(false); auto const default_result = - cudf::strings::get_json_object(cudf::strings_column_view(input), {json_path_string}, options); + cudf::get_json_object(cudf::strings_column_view(input), {json_path_string}, options); cudf::test::strings_column_wrapper default_expected({default_output}, {default_valid}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(default_expected, *default_result); @@ -1011,7 +1005,7 @@ TEST_F(JsonPathTests, MissingFieldsAsNulls) // Test with missing fields as null options.set_missing_fields_as_nulls(true); auto const missing_fields_result = - cudf::strings::get_json_object(cudf::strings_column_view(input), {json_path_string}, options); + cudf::get_json_object(cudf::strings_column_view(input), {json_path_string}, options); cudf::test::strings_column_wrapper missing_fields_expected({missing_fields_output}, {1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(missing_fields_expected, *missing_fields_result); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 462f0d8eac9..7a626daff1f 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -62,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -2443,7 +2443,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject(JNIEnv *env cudf::column_view *n_column_view = reinterpret_cast(j_view_handle); cudf::strings_column_view n_strings_col_view(*n_column_view); cudf::string_scalar *n_scalar_path = reinterpret_cast(j_scalar_handle); - return release_as_jlong(cudf::strings::get_json_object(n_strings_col_view, *n_scalar_path)); + return release_as_jlong(cudf::get_json_object(n_strings_col_view, *n_scalar_path)); } CATCH_STD(env, 0) } diff --git a/python/cudf/cudf/_lib/cpp/strings/json.pxd b/python/cudf/cudf/_lib/cpp/strings/json.pxd index a017e1c5382..eed627c96b5 100644 --- a/python/cudf/cudf/_lib/cpp/strings/json.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/json.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -9,7 +9,7 @@ from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.scalar.scalar cimport scalar, string_scalar -cdef extern from "cudf/strings/json.hpp" namespace "cudf::strings" nogil: +cdef extern from "cudf/json/json.hpp" namespace "cudf" nogil: cdef cppclass get_json_object_options: get_json_object_options() except + # getters