From 13e5be3cd109268b4f80a2333c140bb43591d451 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Oct 2023 23:39:21 +0000 Subject: [PATCH 01/10] Added streams to JSON reader and writer api; added tests --- cpp/include/cudf/io/json.hpp | 4 ++ cpp/src/io/functions.cpp | 12 +++-- cpp/tests/CMakeLists.txt | 6 +++ cpp/tests/io/json_test.cpp | 3 +- cpp/tests/io/json_writer.cpp | 51 ++++++++++++------- cpp/tests/streams/io/functions_test.cpp | 67 +++++++++++++++++++++++++ 6 files changed, 121 insertions(+), 22 deletions(-) create mode 100644 cpp/tests/streams/io/functions_test.cpp diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index d408d249a7f..4689bfa0d67 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -512,6 +512,7 @@ class json_reader_options_builder { * @endcode * * @param options Settings for controlling reading behavior + * @param stream Cuda stream to use for device memory operations * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata. * @@ -519,6 +520,7 @@ class json_reader_options_builder { */ table_with_metadata read_json( json_reader_options options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group @@ -861,9 +863,11 @@ class json_writer_options_builder { * @endcode * * @param options Settings for controlling writing behavior + * @param stream Cuda stream to use for device memory operations * @param mr Device memory resource to use for device memory allocation */ void write_json(json_writer_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 726442d752e..aaae439c586 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -200,7 +200,9 @@ compression_type infer_compression_type(compression_type compression, source_inf return compression_type::NONE; } -table_with_metadata read_json(json_reader_options options, rmm::mr::device_memory_resource* mr) +table_with_metadata read_json(json_reader_options options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); @@ -210,10 +212,12 @@ table_with_metadata read_json(json_reader_options options, rmm::mr::device_memor options.get_byte_range_offset(), options.get_byte_range_size_with_padding()); - return json::detail::read_json(datasources, options, cudf::get_default_stream(), mr); + return json::detail::read_json(datasources, options, stream, mr); } -void write_json(json_writer_options const& options, rmm::mr::device_memory_resource* mr) +void write_json(json_writer_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto sinks = make_datasinks(options.get_sink()); CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for JSON writing"); @@ -222,7 +226,7 @@ void write_json(json_writer_options const& options, rmm::mr::device_memory_resou sinks[0].get(), options.get_table(), options, - cudf::get_default_stream(), + stream, mr); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3e30db7abcb..c8d0dafe99a 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -646,6 +646,12 @@ ConfigureTest( ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) +ConfigureTest( + STREAM_IO_TEST + streams/io/functions_test.cpp + STREAM_MODE + testing +) # ################################################################################################## # Install tests #################################################################################### diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 2ddb0b76544..a85e1e1830a 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -1422,7 +1422,8 @@ TEST_F(JsonReaderTest, JsonLongString) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); cudf::table_view const expected = tbl_view; std::map types; diff --git a/cpp/tests/io/json_writer.cpp b/cpp/tests/io/json_writer.cpp index 3a4074c02ad..b4a76b73142 100644 --- a/cpp/tests/io/json_writer.cpp +++ b/cpp/tests/io/json_writer.cpp @@ -49,14 +49,16 @@ TEST_F(JsonWriterTest, EmptyInput) .build(); // Empty columns in table - cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()); + cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"([])"; EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size())); // Empty columns in table - JSON Lines out_buffer.clear(); out_options.enable_lines(true); - cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()); + cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected_lines = "\n"; EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size())); @@ -64,7 +66,8 @@ TEST_F(JsonWriterTest, EmptyInput) cudf::table_view tbl_view2{}; out_options.set_table(tbl_view2); out_buffer.clear(); - cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()); + cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size())); } @@ -89,16 +92,19 @@ TEST_F(JsonWriterTest, ErrorCases) .build(); // not enough column names - EXPECT_THROW(cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()), + EXPECT_THROW(cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), cudf::logic_error); mt.schema_info.emplace_back("int16"); out_options.set_metadata(mt); - EXPECT_NO_THROW(cudf::io::write_json(out_options, rmm::mr::get_current_device_resource())); + EXPECT_NO_THROW(cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource())); // chunk_rows must be at least 8 out_options.set_rows_per_chunk(0); - EXPECT_THROW(cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()), + EXPECT_THROW(cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), cudf::logic_error); } @@ -121,7 +127,8 @@ TEST_F(JsonWriterTest, PlainTable) .lines(false) .na_rep("null"); - cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"([{"col1":"a","col2":"d","int":1,"float":1.5,"int16":null},{"col1":"b","col2":"e","int":2,"float":2.5,"int16":2},{"col1":"c","col2":"f","int":3,"float":3.5,"int16":null}])"; @@ -151,7 +158,8 @@ TEST_F(JsonWriterTest, SimpleNested) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]} {"a":6,"b":7,"c":{"d":8},"f":10.5} {"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]} @@ -183,7 +191,8 @@ TEST_F(JsonWriterTest, MixedNested) .lines(false) .na_rep("null"); - cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"([{"a":1,"b":2,"c":{"d":[3]},"f":5.5,"g":[{"h":1}]},)" R"({"a":6,"b":7,"c":{"d":[8]},"f":10.5},)" @@ -216,7 +225,8 @@ TEST_F(JsonWriterTest, WriteReadNested) .na_rep("null") .build(); - cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()); + cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]} {"a":6,"b":7,"c":{"d":8},"f":10.5} {"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]} @@ -291,7 +301,8 @@ TEST_F(JsonWriterTest, WriteReadNested) mt.schema_info[2].children.clear(); out_options.set_metadata(mt); out_buffer.clear(); - cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()); + cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); in_options = cudf::io::json_reader_options::builder( cudf::io::source_info{out_buffer.data(), out_buffer.size()}) @@ -314,7 +325,8 @@ TEST_F(JsonWriterTest, WriteReadNested) // without column names out_options.set_metadata(cudf::io::table_metadata{}); out_buffer.clear(); - cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()); + cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); in_options = cudf::io::json_reader_options::builder( cudf::io::source_info{out_buffer.data(), out_buffer.size()}) .lines(true) @@ -352,7 +364,8 @@ TEST_F(JsonWriterTest, SpecialChars) .na_rep("null") .build(); - cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()); + cudf::io::write_json( + out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({"\"a\"":1,"'b'":"abcd"} {"\"a\"":6,"'b'":"b\b\f\n\r\t"} {"\"a\"":1,"'b'":"\"c\""} @@ -385,7 +398,8 @@ TEST_F(JsonWriterTest, NullList) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({"a":[null],"b":[[1,2,3],[null],[null,null,null],[4,null,5]]} {"a":[2,null,null,3],"b":null} {"a":[null,null,4],"b":[[2,null],null]} @@ -424,7 +438,8 @@ TEST_F(JsonWriterTest, ChunkedNested) .na_rep("null") .rows_per_chunk(8); - cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({"a":1,"b":-2,"c":{},"e":[{"f":1}]} {"a":2,"b":-2,"c":{}} @@ -480,7 +495,8 @@ TEST_F(JsonWriterTest, StructAllNullCombinations) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({} {"e":1} {"d":1} @@ -542,7 +558,8 @@ TEST_F(JsonWriterTest, Unicode) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({"col1":"\"\\\/\b\f\n\r\t","col2":"C\u10ae\u226a\u31f3\u434f\u51f9\u6ca6\u738b\u8fbf\u9fb8\ua057\ubbdc\uc2a4\ud3f6\ue4fe\ufd20","int16":null} diff --git a/cpp/tests/streams/io/functions_test.cpp b/cpp/tests/streams/io/functions_test.cpp new file mode 100644 index 00000000000..b1301f60337 --- /dev/null +++ b/cpp/tests/streams/io/functions_test.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +class FunctionsTest : public cudf::test::BaseFixture {}; + +//.dtypes(std::vector{dtype(), dtype()}) +TEST_F(FunctionsTest, JSONreader) +{ + std::string data = "[1, 1.1]\n[2, 2.2]\n[3, 3.3]\n"; + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .dtypes(std::vector{cudf::data_type{cudf::type_id::INT32}, + cudf::data_type{cudf::type_id::FLOAT64}}) + .lines(true) + .legacy(true); + cudf::io::table_with_metadata result = cudf::io::read_json( + in_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); +} + +TEST_F(FunctionsTest, JSONwriter) +{ + cudf::test::strings_column_wrapper col1{"a", "b", "c"}; + cudf::test::strings_column_wrapper col2{"d", "e", "f"}; + cudf::test::fixed_width_column_wrapper col3{1, 2, 3}; + cudf::test::fixed_width_column_wrapper col4{1.5, 2.5, 3.5}; + cudf::test::fixed_width_column_wrapper col5{{1, 2, 3}, + cudf::test::iterators::nulls_at({0, 2})}; + cudf::table_view tbl_view{{col1, col2, col3, col4, col5}}; + cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"int"}, {"float"}, {"int16"}}}; + + std::vector out_buffer; + auto destination = cudf::io::sink_info(&out_buffer); + auto options_builder = cudf::io::json_writer_options_builder(destination, tbl_view) + .include_nulls(true) + .metadata(mt) + .lines(false) + .na_rep("null"); + + cudf::io::write_json( + options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); +} From c324948a3354c935af0247ca57b251e447d234a2 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Mon, 23 Oct 2023 17:40:07 +0000 Subject: [PATCH 02/10] code linting fix for CMakeLists --- cpp/tests/CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c8d0dafe99a..88f9bad8ad5 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -646,12 +646,7 @@ ConfigureTest( ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) -ConfigureTest( - STREAM_IO_TEST - streams/io/functions_test.cpp - STREAM_MODE - testing -) +ConfigureTest(STREAM_IO_TEST streams/io/functions_test.cpp STREAM_MODE testing) # ################################################################################################## # Install tests #################################################################################### From 3b8e9827aa983479da27a1ddb31a04501be8d43b Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Mon, 23 Oct 2023 21:55:39 +0000 Subject: [PATCH 03/10] docstring fix; default stream update in tests --- cpp/include/cudf/io/json.hpp | 4 +- cpp/tests/io/json_test.cpp | 6 ++- cpp/tests/io/json_writer.cpp | 66 ++++++++++++++----------- cpp/tests/streams/io/functions_test.cpp | 8 +-- 4 files changed, 49 insertions(+), 35 deletions(-) diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 4689bfa0d67..bcb9677e718 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -512,7 +512,7 @@ class json_reader_options_builder { * @endcode * * @param options Settings for controlling reading behavior - * @param stream Cuda stream to use for device memory operations + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata. * @@ -863,7 +863,7 @@ class json_writer_options_builder { * @endcode * * @param options Settings for controlling writing behavior - * @param stream Cuda stream to use for device memory operations + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ void write_json(json_writer_options const& options, diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index c6dc9442399..a2db2d69984 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1422,8 +1423,9 @@ TEST_F(JsonReaderTest, JsonLongString) .lines(true) .na_rep("null"); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); cudf::table_view const expected = tbl_view; std::map types; diff --git a/cpp/tests/io/json_writer.cpp b/cpp/tests/io/json_writer.cpp index b4a76b73142..a85a696565b 100644 --- a/cpp/tests/io/json_writer.cpp +++ b/cpp/tests/io/json_writer.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -50,7 +51,7 @@ TEST_F(JsonWriterTest, EmptyInput) // Empty columns in table cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"([])"; EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size())); @@ -58,7 +59,7 @@ TEST_F(JsonWriterTest, EmptyInput) out_buffer.clear(); out_options.enable_lines(true); cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected_lines = "\n"; EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size())); @@ -67,7 +68,7 @@ TEST_F(JsonWriterTest, EmptyInput) out_options.set_table(tbl_view2); out_buffer.clear(); cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size())); } @@ -92,20 +93,22 @@ TEST_F(JsonWriterTest, ErrorCases) .build(); // not enough column names - EXPECT_THROW(cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), - cudf::logic_error); + EXPECT_THROW( + cudf::io::write_json( + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), + cudf::logic_error); mt.schema_info.emplace_back("int16"); out_options.set_metadata(mt); EXPECT_NO_THROW(cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource())); + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource())); // chunk_rows must be at least 8 out_options.set_rows_per_chunk(0); - EXPECT_THROW(cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), - cudf::logic_error); + EXPECT_THROW( + cudf::io::write_json( + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), + cudf::logic_error); } TEST_F(JsonWriterTest, PlainTable) @@ -127,8 +130,9 @@ TEST_F(JsonWriterTest, PlainTable) .lines(false) .na_rep("null"); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); std::string const expected = R"([{"col1":"a","col2":"d","int":1,"float":1.5,"int16":null},{"col1":"b","col2":"e","int":2,"float":2.5,"int16":2},{"col1":"c","col2":"f","int":3,"float":3.5,"int16":null}])"; @@ -158,8 +162,9 @@ TEST_F(JsonWriterTest, SimpleNested) .lines(true) .na_rep("null"); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]} {"a":6,"b":7,"c":{"d":8},"f":10.5} {"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]} @@ -191,8 +196,9 @@ TEST_F(JsonWriterTest, MixedNested) .lines(false) .na_rep("null"); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); std::string const expected = R"([{"a":1,"b":2,"c":{"d":[3]},"f":5.5,"g":[{"h":1}]},)" R"({"a":6,"b":7,"c":{"d":[8]},"f":10.5},)" @@ -226,7 +232,7 @@ TEST_F(JsonWriterTest, WriteReadNested) .build(); cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]} {"a":6,"b":7,"c":{"d":8},"f":10.5} {"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]} @@ -302,7 +308,7 @@ TEST_F(JsonWriterTest, WriteReadNested) out_options.set_metadata(mt); out_buffer.clear(); cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); in_options = cudf::io::json_reader_options::builder( cudf::io::source_info{out_buffer.data(), out_buffer.size()}) @@ -326,7 +332,7 @@ TEST_F(JsonWriterTest, WriteReadNested) out_options.set_metadata(cudf::io::table_metadata{}); out_buffer.clear(); cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); in_options = cudf::io::json_reader_options::builder( cudf::io::source_info{out_buffer.data(), out_buffer.size()}) .lines(true) @@ -365,7 +371,7 @@ TEST_F(JsonWriterTest, SpecialChars) .build(); cudf::io::write_json( - out_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); std::string const expected = R"({"\"a\"":1,"'b'":"abcd"} {"\"a\"":6,"'b'":"b\b\f\n\r\t"} {"\"a\"":1,"'b'":"\"c\""} @@ -398,8 +404,9 @@ TEST_F(JsonWriterTest, NullList) .lines(true) .na_rep("null"); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); std::string const expected = R"({"a":[null],"b":[[1,2,3],[null],[null,null,null],[4,null,5]]} {"a":[2,null,null,3],"b":null} {"a":[null,null,4],"b":[[2,null],null]} @@ -438,8 +445,9 @@ TEST_F(JsonWriterTest, ChunkedNested) .na_rep("null") .rows_per_chunk(8); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); std::string const expected = R"({"a":1,"b":-2,"c":{},"e":[{"f":1}]} {"a":2,"b":-2,"c":{}} @@ -495,8 +503,9 @@ TEST_F(JsonWriterTest, StructAllNullCombinations) .lines(true) .na_rep("null"); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); std::string const expected = R"({} {"e":1} {"d":1} @@ -558,8 +567,9 @@ TEST_F(JsonWriterTest, Unicode) .lines(true) .na_rep("null"); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); std::string const expected = R"({"col1":"\"\\\/\b\f\n\r\t","col2":"C\u10ae\u226a\u31f3\u434f\u51f9\u6ca6\u738b\u8fbf\u9fb8\ua057\ubbdc\uc2a4\ud3f6\ue4fe\ufd20","int16":null} diff --git a/cpp/tests/streams/io/functions_test.cpp b/cpp/tests/streams/io/functions_test.cpp index b1301f60337..de9c26a2e75 100644 --- a/cpp/tests/streams/io/functions_test.cpp +++ b/cpp/tests/streams/io/functions_test.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -40,7 +41,7 @@ TEST_F(FunctionsTest, JSONreader) .lines(true) .legacy(true); cudf::io::table_with_metadata result = cudf::io::read_json( - in_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + in_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); } TEST_F(FunctionsTest, JSONwriter) @@ -62,6 +63,7 @@ TEST_F(FunctionsTest, JSONwriter) .lines(false) .na_rep("null"); - cudf::io::write_json( - options_builder.build(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), + cudf::test::get_default_stream(), + rmm::mr::get_current_device_resource()); } From d3344c572a2661c99d47620b5bfbbaf37d3b4580 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 24 Oct 2023 23:33:12 +0000 Subject: [PATCH 04/10] added copy constructor to column_to_strings_fn struct --- cpp/src/io/json/write_json.cu | 14 ++++++++++++++ cpp/tests/streams/io/functions_test.cpp | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index 2d363c51fce..76c9e3cba5c 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -504,6 +504,20 @@ struct column_to_strings_fn { { } + column_to_strings_fn(column_to_strings_fn const& other) + : options_{other.options_}, + stream_{other.stream_}, + mr_{other.mr_}, + narep(other.narep, other.stream_), + struct_value_separator(other.struct_value_separator, other.stream_), + struct_row_begin_wrap(other.struct_row_begin_wrap, other.stream_), + struct_row_end_wrap(other.struct_row_end_wrap, other.stream_), + list_value_separator(other.list_value_separator, other.stream_), + list_row_begin_wrap(other.list_row_begin_wrap, other.stream_), + list_row_end_wrap(other.list_row_end_wrap, other.stream_) + { + } + // unsupported type of column: template std::enable_if_t(), std::unique_ptr> operator()( diff --git a/cpp/tests/streams/io/functions_test.cpp b/cpp/tests/streams/io/functions_test.cpp index de9c26a2e75..81bcf99168d 100644 --- a/cpp/tests/streams/io/functions_test.cpp +++ b/cpp/tests/streams/io/functions_test.cpp @@ -54,6 +54,10 @@ TEST_F(FunctionsTest, JSONwriter) cudf::test::iterators::nulls_at({0, 2})}; cudf::table_view tbl_view{{col1, col2, col3, col4, col5}}; cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"int"}, {"float"}, {"int16"}}}; + /* + cudf::table_view tbl_view{{col1}}; + cudf::io::table_metadata mt{{{"col1"}}}; + */ std::vector out_buffer; auto destination = cudf::io::sink_info(&out_buffer); From 3b8b0315ee13d37d59dfde8c2ebebd15358e43de Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 25 Oct 2023 15:49:11 +0000 Subject: [PATCH 05/10] removed unused default stream headers from JSON code --- cpp/src/io/json/nested_json.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 3bbfc4b5f83..8d89f4ff927 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include From 80c5a66b5fc1c9a0922df6b5f5b5d0c452133f7d Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 25 Oct 2023 18:35:49 +0000 Subject: [PATCH 06/10] define/delete constructors in column_to_strings_fn struct --- cpp/src/io/json/write_json.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index 76c9e3cba5c..c98e93aec49 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -518,6 +518,11 @@ struct column_to_strings_fn { { } + ~column_to_strings_fn() = default; + column_to_strings_fn& operator=(column_to_strings_fn const&) = delete; + column_to_strings_fn(column_to_strings_fn&&) = default; + column_to_strings_fn& operator=(column_to_strings_fn&&) = default; + // unsupported type of column: template std::enable_if_t(), std::unique_ptr> operator()( From 1d87bfce563d4cc54cc85ba2d0979baf9c0474f5 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 25 Oct 2023 20:08:17 +0000 Subject: [PATCH 07/10] removed unused default stream header --- cpp/include/cudf/io/detail/json.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 6930a4fdb25..d0a9543397d 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -17,7 +17,6 @@ #pragma once #include -#include #include From bcc226cd64d8caf4453a4b9dd2ffeb1a3795da8f Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Thu, 26 Oct 2023 20:02:53 +0000 Subject: [PATCH 08/10] cleanup of rmm default argument --- cpp/tests/streams/io/functions_test.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/tests/streams/io/functions_test.cpp b/cpp/tests/streams/io/functions_test.cpp index 81bcf99168d..d547831d503 100644 --- a/cpp/tests/streams/io/functions_test.cpp +++ b/cpp/tests/streams/io/functions_test.cpp @@ -40,8 +40,8 @@ TEST_F(FunctionsTest, JSONreader) cudf::data_type{cudf::type_id::FLOAT64}}) .lines(true) .legacy(true); - cudf::io::table_with_metadata result = cudf::io::read_json( - in_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::table_with_metadata result = + cudf::io::read_json(in_options, cudf::test::get_default_stream()); } TEST_F(FunctionsTest, JSONwriter) @@ -67,7 +67,5 @@ TEST_F(FunctionsTest, JSONwriter) .lines(false) .na_rep("null"); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); } From b219b13ae2f1ad60aadaf290ba77867bf6995c7c Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 27 Oct 2023 19:12:38 +0000 Subject: [PATCH 09/10] applying patch https://github.com/rapidsai/cudf/pull/14313/files#r1374549500; deleting copy and move assignment constructors --- cpp/src/io/json/write_json.cu | 78 +++++++++++++++-------------------- 1 file changed, 34 insertions(+), 44 deletions(-) diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index c98e93aec49..c211d17f13a 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -504,24 +504,11 @@ struct column_to_strings_fn { { } - column_to_strings_fn(column_to_strings_fn const& other) - : options_{other.options_}, - stream_{other.stream_}, - mr_{other.mr_}, - narep(other.narep, other.stream_), - struct_value_separator(other.struct_value_separator, other.stream_), - struct_row_begin_wrap(other.struct_row_begin_wrap, other.stream_), - struct_row_end_wrap(other.struct_row_end_wrap, other.stream_), - list_value_separator(other.list_value_separator, other.stream_), - list_row_begin_wrap(other.list_row_begin_wrap, other.stream_), - list_row_end_wrap(other.list_row_end_wrap, other.stream_) - { - } - ~column_to_strings_fn() = default; + column_to_strings_fn(column_to_strings_fn const&) = delete; column_to_strings_fn& operator=(column_to_strings_fn const&) = delete; - column_to_strings_fn(column_to_strings_fn&&) = default; - column_to_strings_fn& operator=(column_to_strings_fn&&) = default; + column_to_strings_fn(column_to_strings_fn&&) = delete; + column_to_strings_fn& operator=(column_to_strings_fn&&) = delete; // unsupported type of column: template @@ -633,17 +620,18 @@ struct column_to_strings_fn { auto child_string_with_null = [&]() { if (child_view.type().id() == type_id::STRUCT) { - return (*this).template operator()( - child_view, - children_names.size() > child_index ? children_names[child_index].children - : std::vector{}); - } else if (child_view.type().id() == type_id::LIST) { - return (*this).template operator()(child_view, + return this->template operator()(child_view, children_names.size() > child_index ? children_names[child_index].children : std::vector{}); + } else if (child_view.type().id() == type_id::LIST) { + return this->template operator()(child_view, + children_names.size() > child_index + ? children_names[child_index].children + : std::vector{}); } else { - return cudf::type_dispatcher(child_view.type(), *this, child_view); + return cudf::type_dispatcher( + child_view.type(), *this, child_view); } }; auto new_offsets = cudf::lists::detail::get_normalized_offsets( @@ -698,27 +686,29 @@ struct column_to_strings_fn { // auto i_col_begin = thrust::make_zip_iterator(thrust::counting_iterator(0), column_begin); - std::transform(i_col_begin, - i_col_begin + num_columns, - std::back_inserter(str_column_vec), - [this, &children_names](auto const& i_current_col) { - auto const i = thrust::get<0>(i_current_col); - auto const& current_col = thrust::get<1>(i_current_col); - // Struct needs children's column names - if (current_col.type().id() == type_id::STRUCT) { - return (*this).template operator()( - current_col, - children_names.size() > i ? children_names[i].children - : std::vector{}); - } else if (current_col.type().id() == type_id::LIST) { - return (*this).template operator()( - current_col, - children_names.size() > i ? children_names[i].children - : std::vector{}); - } else { - return cudf::type_dispatcher(current_col.type(), *this, current_col); - } - }); + std::transform( + i_col_begin, + i_col_begin + num_columns, + std::back_inserter(str_column_vec), + [this, &children_names](auto const& i_current_col) { + auto const i = thrust::get<0>(i_current_col); + auto const& current_col = thrust::get<1>(i_current_col); + // Struct needs children's column names + if (current_col.type().id() == type_id::STRUCT) { + return this->template operator()(current_col, + children_names.size() > i + ? children_names[i].children + : std::vector{}); + } else if (current_col.type().id() == type_id::LIST) { + return this->template operator()(current_col, + children_names.size() > i + ? children_names[i].children + : std::vector{}); + } else { + return cudf::type_dispatcher( + current_col.type(), *this, current_col); + } + }); // create string table view from str_column_vec: // From 13022da1454390e006d6d4032ffd9c6675965ea5 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 27 Oct 2023 19:20:40 +0000 Subject: [PATCH 10/10] json test cleanup; separating tests --- cpp/tests/CMakeLists.txt | 2 +- .../streams/io/{functions_test.cpp => json_test.cpp} | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) rename cpp/tests/streams/io/{functions_test.cpp => json_test.cpp} (89%) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 38134f7c465..e9a9c725de3 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -649,7 +649,7 @@ ConfigureTest( ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) -ConfigureTest(STREAM_IO_TEST streams/io/functions_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing) # ################################################################################################## # Install tests #################################################################################### diff --git a/cpp/tests/streams/io/functions_test.cpp b/cpp/tests/streams/io/json_test.cpp similarity index 89% rename from cpp/tests/streams/io/functions_test.cpp rename to cpp/tests/streams/io/json_test.cpp index d547831d503..80619d4d58c 100644 --- a/cpp/tests/streams/io/functions_test.cpp +++ b/cpp/tests/streams/io/json_test.cpp @@ -28,10 +28,9 @@ #include #include -class FunctionsTest : public cudf::test::BaseFixture {}; +class JSONTest : public cudf::test::BaseFixture {}; -//.dtypes(std::vector{dtype(), dtype()}) -TEST_F(FunctionsTest, JSONreader) +TEST_F(JSONTest, JSONreader) { std::string data = "[1, 1.1]\n[2, 2.2]\n[3, 3.3]\n"; cudf::io::json_reader_options in_options = @@ -44,7 +43,7 @@ TEST_F(FunctionsTest, JSONreader) cudf::io::read_json(in_options, cudf::test::get_default_stream()); } -TEST_F(FunctionsTest, JSONwriter) +TEST_F(JSONTest, JSONwriter) { cudf::test::strings_column_wrapper col1{"a", "b", "c"}; cudf::test::strings_column_wrapper col2{"d", "e", "f"}; @@ -54,10 +53,6 @@ TEST_F(FunctionsTest, JSONwriter) cudf::test::iterators::nulls_at({0, 2})}; cudf::table_view tbl_view{{col1, col2, col3, col4, col5}}; cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"int"}, {"float"}, {"int16"}}}; - /* - cudf::table_view tbl_view{{col1}}; - cudf::io::table_metadata mt{{{"col1"}}}; - */ std::vector out_buffer; auto destination = cudf::io::sink_info(&out_buffer);