Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve column hierarchy when getting NULL row from LIST column #8206

Merged
merged 7 commits into from
May 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions cpp/src/copying/get_element.cu
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ struct get_element_functor {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
{
bool valid = is_element_valid_sync(input, index, stream);
bool valid = is_element_valid_sync(input, index, stream);
auto const child_col_idx = lists_column_view::child_column_index;

if (valid) {
lists_column_view lcv(input);
Expand All @@ -134,9 +135,11 @@ struct get_element_functor {
lists::detail::copy_slice(lcv, index, index + 1, stream, mr)->release();
// Construct scalar with row data
return std::make_unique<list_scalar>(
std::move(*row_slice_contents.children[1]), valid, stream, mr);
std::move(*row_slice_contents.children[child_col_idx]), valid, stream, mr);
} else {
return make_default_constructed_scalar(data_type(type_id::LIST));
auto empty_row_contents = empty_like(input)->release();
return std::make_unique<list_scalar>(
std::move(*empty_row_contents.children[child_col_idx]), valid, stream, mr);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/scalar/scalar_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ template <>
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<list_view>(
rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
return std::make_unique<list_scalar>(column(), false, stream, mr);
CUDF_FAIL("list_view type not supported");
}

template <>
Expand Down
74 changes: 57 additions & 17 deletions cpp/tests/copying/get_value_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,17 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullEmpty)
TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNull)
{
using LCW = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
using FCW = cudf::test::fixed_width_column_wrapper<TypeParam>;

LCW col({LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}, this->odds_valid());
size_type index = 2;

auto s = get_element(col, index);
auto s = get_element(col, index);
auto typed_s = static_cast<list_scalar const *>(s.get());

EXPECT_FALSE(s->is_valid());
// Test preserve column hierarchy
CUDF_TEST_EXPECT_COLUMNS_EQUAL(typed_s->view(), FCW{});
}

TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmpty)
Expand Down Expand Up @@ -301,7 +306,9 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullEmpty)

TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull)
{
using LCW = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
using LCW = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
using FCW = cudf::test::fixed_width_column_wrapper<TypeParam>;
using offset_t = cudf::test::fixed_width_column_wrapper<offset_type>;

std::vector<valid_type> valid{1, 0, 1, 0};
// clang-format off
Expand All @@ -315,9 +322,15 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull)
// clang-format on
size_type index = 1;

auto s = get_element(col, index);
auto s = get_element(col, index);
auto typed_s = static_cast<list_scalar const *>(s.get());

auto expected_data =
make_lists_column(0, offset_t{}.release(), FCW{}.release(), 0, rmm::device_buffer{});

EXPECT_FALSE(s->is_valid());
// Test preserve column hierarchy
CUDF_TEST_EXPECT_COLUMNS_EQUAL(typed_s->view(), *expected_data);
}

struct ListGetStringValueTest : public BaseFixture {
Expand Down Expand Up @@ -363,15 +376,18 @@ TEST_F(ListGetStringValueTest, NonNestedGetNonNullEmpty)

TEST_F(ListGetStringValueTest, NonNestedGetNull)
{
using LCW = cudf::test::lists_column_wrapper<string_view>;
using LCW = cudf::test::lists_column_wrapper<string_view>;
using StringCW = strings_column_wrapper;

std::vector<valid_type> valid{1, 0, 0, 1};
LCW col({LCW{"aaa", "Héllo"}, LCW{}, LCW{""}, LCW{"42"}}, valid.begin());
size_type index = 2;

auto s = get_element(col, index);
auto s = get_element(col, index);
auto typed_s = static_cast<list_scalar const *>(s.get());

EXPECT_FALSE(s->is_valid());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(typed_s->view(), StringCW{});
}

TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmpty)
Expand Down Expand Up @@ -446,7 +462,9 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullEmpty)

TEST_F(ListGetStringValueTest, NestedGetNull)
{
using LCW = cudf::test::lists_column_wrapper<string_view>;
using LCW = cudf::test::lists_column_wrapper<string_view>;
using offset_t = cudf::test::fixed_width_column_wrapper<offset_type>;
using StringCW = cudf::test::strings_column_wrapper;

std::vector<valid_type> valid{0, 0, 1, 1};
// clang-format off
Expand All @@ -458,11 +476,16 @@ TEST_F(ListGetStringValueTest, NestedGetNull)
LCW{}
}, valid.begin());
// clang-format on
LCW expected_data{};
size_type index = 0;

auto s = get_element(col, index);
auto s = get_element(col, index);
auto typed_s = static_cast<list_scalar const *>(s.get());

auto expected_data =
make_lists_column(0, offset_t{}.release(), StringCW{}.release(), 0, rmm::device_buffer{});

EXPECT_FALSE(s->is_valid());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_data, typed_s->view());
}

/**
Expand All @@ -480,10 +503,11 @@ struct ListGetStructValueTest : public BaseFixture {
* in `initializer_list`. However this is an expensive function because it repeatedly
* calls `cudf::set_null_mask` for each row.
*/
std::unique_ptr<cudf::column> make_test_lists_column(size_type num_lists,
fixed_width_column_wrapper<int32_t> offsets,
std::unique_ptr<cudf::column> child,
std::initializer_list<valid_type> null_mask)
std::unique_ptr<cudf::column> make_test_lists_column(
size_type num_lists,
fixed_width_column_wrapper<offset_type> offsets,
std::unique_ptr<cudf::column> child,
std::initializer_list<valid_type> null_mask)
{
size_type null_count = num_lists - std::accumulate(null_mask.begin(), null_mask.end(), 0);
auto d_null_mask = cudf::create_null_mask(
Expand Down Expand Up @@ -619,7 +643,7 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullEmpty)
// 3-rows
// [{1, NULL, NULL}, NULL]
// [{3, "xyz", [3, 8, 4]}]
// [] <- get_element(0)
// [] <- get_element(2)

auto list_column = this->make_test_lists_column(3, {0, 2, 3, 3}, this->leaf_data(), {1, 1, 1});
size_type index = 2;
Expand All @@ -638,15 +662,21 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullEmpty)
TYPED_TEST(ListGetStructValueTest, NonNestedGetNull)
{
// 2-rows
// NULL <- get_element(0)
// NULL <- get_element(0)
// [{3, "xyz", [3, 8, 4]}]

using valid_t = std::vector<valid_type>;

auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {0, 1});
size_type index = 0;

auto s = get_element(list_column->view(), index);
auto s = get_element(list_column->view(), index);
auto typed_s = static_cast<list_scalar const *>(s.get());

auto expected_data = this->make_test_structs_column({}, {}, {}, valid_t{}.begin());

EXPECT_FALSE(s->is_valid());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(typed_s->view(), expected_data);
}

TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty)
Expand Down Expand Up @@ -716,7 +746,7 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNonNullEmpty)
{
// 3-rows
// [[{1, NULL, NULL}, NULL]]
// [] <- get_element(1)
// [] <- get_element(1)
// [[{3, "xyz", [3, 8, 4]}]]

auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1});
Expand All @@ -741,15 +771,25 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNull)
// 3-rows
// [[{1, NULL, NULL}, NULL]]
// []
// NULL <- get_element(1)
// NULL <- get_element(2)

using valid_t = std::vector<valid_type>;
using offset_t = cudf::test::fixed_width_column_wrapper<offset_type>;

auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1});
auto list_column_nested =
this->make_test_lists_column(3, {0, 1, 1, 2}, std::move(list_column), {1, 1, 0});

size_type index = 2;
auto s = get_element(list_column_nested->view(), index);
auto typed_s = static_cast<list_scalar const *>(s.get());

auto nested = this->make_test_structs_column({}, {}, {}, valid_t{}.begin());
auto expected_data =
make_lists_column(0, offset_t{}.release(), nested.release(), 0, rmm::device_buffer{});

EXPECT_FALSE(s->is_valid());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_data, typed_s->view());
}

} // namespace test
Expand Down