Skip to content

Commit

Permalink
Preserve column hierarchy when getting NULL row from LIST column (#…
Browse files Browse the repository at this point in the history
…8206)

This PR fixes a bug introduced in #8071, when `get_element` retrieves a NULL row in a nested column, the scalar returned not only should be `is_valid() == false`, but also should preserve the column hierarchy of the row-data, even they are invalid. Because depending libraries may use the column hierarchy to deduce the nested type of the column.

This PR also reverts `make_default_constructed_scalar` API for `LIST` type. A `LIST` type scalar should have complete column hierarchy as part of its type information. There isn't enough information provided to the API to construct that.

Another tiny addition: instead of hard coding the position of child column, use `list_column_view::child_column_index` intead.

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - Conor Hoekstra (https://github.com/codereport)
  - Paul Taylor (https://github.com/trxcllnt)

URL: #8206
  • Loading branch information
isVoid authored May 24, 2021
1 parent 7e725b5 commit 7eaf3d7
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 21 deletions.
9 changes: 6 additions & 3 deletions cpp/src/copying/get_element.cu
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ struct get_element_functor {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
{
bool valid = is_element_valid_sync(input, index, stream);
bool valid = is_element_valid_sync(input, index, stream);
auto const child_col_idx = lists_column_view::child_column_index;

if (valid) {
lists_column_view lcv(input);
Expand All @@ -134,9 +135,11 @@ struct get_element_functor {
lists::detail::copy_slice(lcv, index, index + 1, stream, mr)->release();
// Construct scalar with row data
return std::make_unique<list_scalar>(
std::move(*row_slice_contents.children[1]), valid, stream, mr);
std::move(*row_slice_contents.children[child_col_idx]), valid, stream, mr);
} else {
return make_default_constructed_scalar(data_type(type_id::LIST));
auto empty_row_contents = empty_like(input)->release();
return std::make_unique<list_scalar>(
std::move(*empty_row_contents.children[child_col_idx]), valid, stream, mr);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/scalar/scalar_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ template <>
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<list_view>(
rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
return std::make_unique<list_scalar>(column(), false, stream, mr);
CUDF_FAIL("list_view type not supported");
}

template <>
Expand Down
74 changes: 57 additions & 17 deletions cpp/tests/copying/get_value_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,17 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullEmpty)
TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNull)
{
using LCW = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
using FCW = cudf::test::fixed_width_column_wrapper<TypeParam>;

LCW col({LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}, this->odds_valid());
size_type index = 2;

auto s = get_element(col, index);
auto s = get_element(col, index);
auto typed_s = static_cast<list_scalar const *>(s.get());

EXPECT_FALSE(s->is_valid());
// Test preserve column hierarchy
CUDF_TEST_EXPECT_COLUMNS_EQUAL(typed_s->view(), FCW{});
}

TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmpty)
Expand Down Expand Up @@ -301,7 +306,9 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullEmpty)

TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull)
{
using LCW = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
using LCW = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
using FCW = cudf::test::fixed_width_column_wrapper<TypeParam>;
using offset_t = cudf::test::fixed_width_column_wrapper<offset_type>;

std::vector<valid_type> valid{1, 0, 1, 0};
// clang-format off
Expand All @@ -315,9 +322,15 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull)
// clang-format on
size_type index = 1;

auto s = get_element(col, index);
auto s = get_element(col, index);
auto typed_s = static_cast<list_scalar const *>(s.get());

auto expected_data =
make_lists_column(0, offset_t{}.release(), FCW{}.release(), 0, rmm::device_buffer{});

EXPECT_FALSE(s->is_valid());
// Test preserve column hierarchy
CUDF_TEST_EXPECT_COLUMNS_EQUAL(typed_s->view(), *expected_data);
}

struct ListGetStringValueTest : public BaseFixture {
Expand Down Expand Up @@ -363,15 +376,18 @@ TEST_F(ListGetStringValueTest, NonNestedGetNonNullEmpty)

TEST_F(ListGetStringValueTest, NonNestedGetNull)
{
using LCW = cudf::test::lists_column_wrapper<string_view>;
using LCW = cudf::test::lists_column_wrapper<string_view>;
using StringCW = strings_column_wrapper;

std::vector<valid_type> valid{1, 0, 0, 1};
LCW col({LCW{"aaa", "Héllo"}, LCW{}, LCW{""}, LCW{"42"}}, valid.begin());
size_type index = 2;

auto s = get_element(col, index);
auto s = get_element(col, index);
auto typed_s = static_cast<list_scalar const *>(s.get());

EXPECT_FALSE(s->is_valid());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(typed_s->view(), StringCW{});
}

TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmpty)
Expand Down Expand Up @@ -446,7 +462,9 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullEmpty)

TEST_F(ListGetStringValueTest, NestedGetNull)
{
using LCW = cudf::test::lists_column_wrapper<string_view>;
using LCW = cudf::test::lists_column_wrapper<string_view>;
using offset_t = cudf::test::fixed_width_column_wrapper<offset_type>;
using StringCW = cudf::test::strings_column_wrapper;

std::vector<valid_type> valid{0, 0, 1, 1};
// clang-format off
Expand All @@ -458,11 +476,16 @@ TEST_F(ListGetStringValueTest, NestedGetNull)
LCW{}
}, valid.begin());
// clang-format on
LCW expected_data{};
size_type index = 0;

auto s = get_element(col, index);
auto s = get_element(col, index);
auto typed_s = static_cast<list_scalar const *>(s.get());

auto expected_data =
make_lists_column(0, offset_t{}.release(), StringCW{}.release(), 0, rmm::device_buffer{});

EXPECT_FALSE(s->is_valid());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_data, typed_s->view());
}

/**
Expand All @@ -480,10 +503,11 @@ struct ListGetStructValueTest : public BaseFixture {
* in `initializer_list`. However this is an expensive function because it repeatedly
* calls `cudf::set_null_mask` for each row.
*/
std::unique_ptr<cudf::column> make_test_lists_column(size_type num_lists,
fixed_width_column_wrapper<int32_t> offsets,
std::unique_ptr<cudf::column> child,
std::initializer_list<valid_type> null_mask)
std::unique_ptr<cudf::column> make_test_lists_column(
size_type num_lists,
fixed_width_column_wrapper<offset_type> offsets,
std::unique_ptr<cudf::column> child,
std::initializer_list<valid_type> null_mask)
{
size_type null_count = num_lists - std::accumulate(null_mask.begin(), null_mask.end(), 0);
auto d_null_mask = cudf::create_null_mask(
Expand Down Expand Up @@ -619,7 +643,7 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullEmpty)
// 3-rows
// [{1, NULL, NULL}, NULL]
// [{3, "xyz", [3, 8, 4]}]
// [] <- get_element(0)
// [] <- get_element(2)

auto list_column = this->make_test_lists_column(3, {0, 2, 3, 3}, this->leaf_data(), {1, 1, 1});
size_type index = 2;
Expand All @@ -638,15 +662,21 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullEmpty)
TYPED_TEST(ListGetStructValueTest, NonNestedGetNull)
{
// 2-rows
// NULL <- get_element(0)
// NULL <- get_element(0)
// [{3, "xyz", [3, 8, 4]}]

using valid_t = std::vector<valid_type>;

auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {0, 1});
size_type index = 0;

auto s = get_element(list_column->view(), index);
auto s = get_element(list_column->view(), index);
auto typed_s = static_cast<list_scalar const *>(s.get());

auto expected_data = this->make_test_structs_column({}, {}, {}, valid_t{}.begin());

EXPECT_FALSE(s->is_valid());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(typed_s->view(), expected_data);
}

TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty)
Expand Down Expand Up @@ -716,7 +746,7 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNonNullEmpty)
{
// 3-rows
// [[{1, NULL, NULL}, NULL]]
// [] <- get_element(1)
// [] <- get_element(1)
// [[{3, "xyz", [3, 8, 4]}]]

auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1});
Expand All @@ -741,15 +771,25 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNull)
// 3-rows
// [[{1, NULL, NULL}, NULL]]
// []
// NULL <- get_element(1)
// NULL <- get_element(2)

using valid_t = std::vector<valid_type>;
using offset_t = cudf::test::fixed_width_column_wrapper<offset_type>;

auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1});
auto list_column_nested =
this->make_test_lists_column(3, {0, 1, 1, 2}, std::move(list_column), {1, 1, 0});

size_type index = 2;
auto s = get_element(list_column_nested->view(), index);
auto typed_s = static_cast<list_scalar const *>(s.get());

auto nested = this->make_test_structs_column({}, {}, {}, valid_t{}.begin());
auto expected_data =
make_lists_column(0, offset_t{}.release(), nested.release(), 0, rmm::device_buffer{});

EXPECT_FALSE(s->is_valid());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_data, typed_s->view());
}

} // namespace test
Expand Down

0 comments on commit 7eaf3d7

Please sign in to comment.