From ac2fcee15a1db8c7e440d17dee93787128f328f7 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 14 Feb 2023 02:09:35 +0530 Subject: [PATCH 1/3] fix bug in all-null list due to join_list_elements special handling --- cpp/src/io/json/write_json.cu | 37 +++++++++++++++++++++-------------- cpp/tests/io/json_writer.cpp | 32 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index 9849629015d..1e6bb6c89ad 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -417,21 +417,28 @@ struct column_to_strings_fn { auto child_view = lists_column_view(column).get_sliced_child(stream_); auto constexpr child_index = lists_column_view::child_column_index; auto list_string = [&]() { - auto child_string = [&]() { - if (child_view.type().id() == type_id::STRUCT) { - return (*this).template operator()( - child_view, - children_names.size() > child_index ? children_names[child_index].children - : std::vector{}); - } else if (child_view.type().id() == type_id::LIST) { - return (*this).template operator()( - child_view, - children_names.size() > child_index ? children_names[child_index].children - : std::vector{}); - } else { - return cudf::type_dispatcher(child_view.type(), *this, child_view); - } - }(); + // nulls are replaced due to special handling of all-null lists as empty lists + // by join_list_elements + auto child_string = cudf::strings::detail::replace_nulls( + [&]() { + if (child_view.type().id() == type_id::STRUCT) { + return (*this).template operator()( + child_view, + children_names.size() > child_index ? children_names[child_index].children + : std::vector{}); + } else if (child_view.type().id() == type_id::LIST) { + return (*this).template operator()( + child_view, + children_names.size() > child_index ? children_names[child_index].children + : std::vector{}); + } else { + return cudf::type_dispatcher(child_view.type(), *this, child_view); + } + }() + ->view(), + narep, + stream_, + mr_); auto const list_child_string = column_view(column.type(), column.size(), diff --git a/cpp/tests/io/json_writer.cpp b/cpp/tests/io/json_writer.cpp index d129ed306e4..702315d6a97 100644 --- a/cpp/tests/io/json_writer.cpp +++ b/cpp/tests/io/json_writer.cpp @@ -362,4 +362,36 @@ TEST_F(JsonWriterTest, SpecialChars) EXPECT_EQ(expected, output_string); } +TEST_F(JsonWriterTest, NullList) +{ + std::string const data = R"( +{"a": [null], "b": [[1, 2, 3], [null], [null, null, null], [4, null, 5]]} +{"a": [2, null, null, 3] , "b": null} +{"a": [null, null, 4], "b": [[2, null], null]} +{"a": [5, null, null], "b": [null, [3, 4, 5]]} )"; + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .lines(true); + + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + cudf::table_view tbl_view = result.tbl->view(); + cudf::io::table_metadata mt{result.metadata}; + + std::vector out_buffer; + auto destination = cudf::io::sink_info(&out_buffer); + auto options_builder = cudf::io::json_writer_options_builder(destination, tbl_view) + .include_nulls(true) + .metadata(mt) + .lines(true) + .na_rep("null"); + + cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + std::string const expected = R"({"a":[null],"b":[[1,2,3],[null],[null,null,null],[4,null,5]]} +{"a":[2,null,null,3],"b":null} +{"a":[null,null,4],"b":[[2,null],null]} +{"a":[5,null,null],"b":[null,[3,4,5]]} +)"; + EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size())); +} + CUDF_TEST_PROGRAM_MAIN() From 8acb3c3648d7881313492afeb2c3e5f1e7395486 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Wed, 15 Feb 2023 18:58:09 +0530 Subject: [PATCH 2/3] Update cpp/src/io/json/write_json.cu --- cpp/src/io/json/write_json.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index 1e6bb6c89ad..e9deb88cb52 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -438,7 +438,7 @@ struct column_to_strings_fn { ->view(), narep, stream_, - mr_); + rmm::mr::get_current_device_resource()); auto const list_child_string = column_view(column.type(), column.size(), From a27938f5c360ac28300552c455d1ac06dab8b50d Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Thu, 16 Feb 2023 22:26:04 +0530 Subject: [PATCH 3/3] split for readability --- cpp/src/io/json/write_json.cu | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index e9deb88cb52..a7ae4d3bdd1 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -419,26 +419,23 @@ struct column_to_strings_fn { auto list_string = [&]() { // nulls are replaced due to special handling of all-null lists as empty lists // by join_list_elements + auto child_string_with_null = [&]() { + if (child_view.type().id() == type_id::STRUCT) { + return (*this).template operator()( + child_view, + children_names.size() > child_index ? children_names[child_index].children + : std::vector{}); + } else if (child_view.type().id() == type_id::LIST) { + return (*this).template operator()( + child_view, + children_names.size() > child_index ? children_names[child_index].children + : std::vector{}); + } else { + return cudf::type_dispatcher(child_view.type(), *this, child_view); + } + }; auto child_string = cudf::strings::detail::replace_nulls( - [&]() { - if (child_view.type().id() == type_id::STRUCT) { - return (*this).template operator()( - child_view, - children_names.size() > child_index ? children_names[child_index].children - : std::vector{}); - } else if (child_view.type().id() == type_id::LIST) { - return (*this).template operator()( - child_view, - children_names.size() > child_index ? children_names[child_index].children - : std::vector{}); - } else { - return cudf::type_dispatcher(child_view.type(), *this, child_view); - } - }() - ->view(), - narep, - stream_, - rmm::mr::get_current_device_resource()); + child_string_with_null()->view(), narep, stream_, rmm::mr::get_current_device_resource()); auto const list_child_string = column_view(column.type(), column.size(),