Skip to content

Commit

Permalink
Fix bug in all-null list due to join_list_elements special handling (#…
Browse files Browse the repository at this point in the history
…12767)

All-null list is considered as empty list in `join_list_elements`. So, nulls of children are replaced by `narep` before passing to `join_list_elements` API.
Related #12766

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: #12767
  • Loading branch information
karthikeyann authored Feb 16, 2023
1 parent 7190e33 commit a00295a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 2 deletions.
8 changes: 6 additions & 2 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,9 @@ struct column_to_strings_fn {
auto child_view = lists_column_view(column).get_sliced_child(stream_);
auto constexpr child_index = lists_column_view::child_column_index;
auto list_string = [&]() {
auto child_string = [&]() {
// nulls are replaced due to special handling of all-null lists as empty lists
// by join_list_elements
auto child_string_with_null = [&]() {
if (child_view.type().id() == type_id::STRUCT) {
return (*this).template operator()<cudf::struct_view>(
child_view,
Expand All @@ -431,7 +433,9 @@ struct column_to_strings_fn {
} else {
return cudf::type_dispatcher(child_view.type(), *this, child_view);
}
}();
};
auto child_string = cudf::strings::detail::replace_nulls(
child_string_with_null()->view(), narep, stream_, rmm::mr::get_current_device_resource());
auto const list_child_string =
column_view(column.type(),
column.size(),
Expand Down
32 changes: 32 additions & 0 deletions cpp/tests/io/json_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,4 +362,36 @@ TEST_F(JsonWriterTest, SpecialChars)
EXPECT_EQ(expected, output_string);
}

TEST_F(JsonWriterTest, NullList)
{
std::string const data = R"(
{"a": [null], "b": [[1, 2, 3], [null], [null, null, null], [4, null, 5]]}
{"a": [2, null, null, 3] , "b": null}
{"a": [null, null, 4], "b": [[2, null], null]}
{"a": [5, null, null], "b": [null, [3, 4, 5]]} )";
cudf::io::json_reader_options in_options =
cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
.lines(true);

cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
cudf::table_view tbl_view = result.tbl->view();
cudf::io::table_metadata mt{result.metadata};

std::vector<char> out_buffer;
auto destination = cudf::io::sink_info(&out_buffer);
auto options_builder = cudf::io::json_writer_options_builder(destination, tbl_view)
.include_nulls(true)
.metadata(mt)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
std::string const expected = R"({"a":[null],"b":[[1,2,3],[null],[null,null,null],[4,null,5]]}
{"a":[2,null,null,3],"b":null}
{"a":[null,null,4],"b":[[2,null],null]}
{"a":[5,null,null],"b":[null,[3,4,5]]}
)";
EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size()));
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit a00295a

Please sign in to comment.