From a00295a4db71c7c0c1396d2c0539eecee74a4ac5 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 17 Feb 2023 01:37:56 +0530 Subject: [PATCH] Fix bug in all-null list due to join_list_elements special handling (#12767) All-null list is considered as empty list in `join_list_elements`. So, nulls of children are replaced by `narep` before passing to `join_list_elements` API. Related https://github.com/rapidsai/cudf/issues/12766 Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/12767 --- cpp/src/io/json/write_json.cu | 8 ++++++-- cpp/tests/io/json_writer.cpp | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index 9849629015d..a7ae4d3bdd1 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -417,7 +417,9 @@ struct column_to_strings_fn { auto child_view = lists_column_view(column).get_sliced_child(stream_); auto constexpr child_index = lists_column_view::child_column_index; auto list_string = [&]() { - auto child_string = [&]() { + // nulls are replaced due to special handling of all-null lists as empty lists + // by join_list_elements + auto child_string_with_null = [&]() { if (child_view.type().id() == type_id::STRUCT) { return (*this).template operator()( child_view, @@ -431,7 +433,9 @@ struct column_to_strings_fn { } else { return cudf::type_dispatcher(child_view.type(), *this, child_view); } - }(); + }; + auto child_string = cudf::strings::detail::replace_nulls( + child_string_with_null()->view(), narep, stream_, rmm::mr::get_current_device_resource()); auto const list_child_string = column_view(column.type(), column.size(), diff --git a/cpp/tests/io/json_writer.cpp b/cpp/tests/io/json_writer.cpp index d129ed306e4..702315d6a97 100644 --- a/cpp/tests/io/json_writer.cpp +++ b/cpp/tests/io/json_writer.cpp @@ -362,4 +362,36 @@ TEST_F(JsonWriterTest, SpecialChars) EXPECT_EQ(expected, output_string); } +TEST_F(JsonWriterTest, NullList) +{ + std::string const data = R"( +{"a": [null], "b": [[1, 2, 3], [null], [null, null, null], [4, null, 5]]} +{"a": [2, null, null, 3] , "b": null} +{"a": [null, null, 4], "b": [[2, null], null]} +{"a": [5, null, null], "b": [null, [3, 4, 5]]} )"; + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .lines(true); + + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + cudf::table_view tbl_view = result.tbl->view(); + cudf::io::table_metadata mt{result.metadata}; + + std::vector out_buffer; + auto destination = cudf::io::sink_info(&out_buffer); + auto options_builder = cudf::io::json_writer_options_builder(destination, tbl_view) + .include_nulls(true) + .metadata(mt) + .lines(true) + .na_rep("null"); + + cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource()); + std::string const expected = R"({"a":[null],"b":[[1,2,3],[null],[null,null,null],[4,null,5]]} +{"a":[2,null,null,3],"b":null} +{"a":[null,null,4],"b":[[2,null],null]} +{"a":[5,null,null],"b":[null,[3,4,5]]} +)"; + EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size())); +} + CUDF_TEST_PROGRAM_MAIN()