Skip to content

Commit

Permalink
Handle sliced structs properly in pack/contiguous_split. (#8739)
Browse files Browse the repository at this point in the history
Pre-sliced struct columns (those with a positive offset) were not being handled correctly.

Dependent PR has been merged.

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Robert Maynard (https://github.com/robertmaynard)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #8739
  • Loading branch information
nvdbaranec authored Jul 19, 2021
1 parent 43de98a commit bb9b1be
Show file tree
Hide file tree
Showing 3 changed files with 262 additions and 41 deletions.
44 changes: 35 additions & 9 deletions cpp/src/copying/contiguous_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/bit.hpp>

Expand Down Expand Up @@ -595,8 +596,15 @@ std::pair<src_buf_info*, size_type> buf_info_functor::operator()<cudf::struct_vi
offset_stack_pos += offset_depth;

// recurse on children
return setup_source_buf_info(col.child_begin(),
col.child_end(),
cudf::structs_column_view scv(col);
std::vector<column_view> sliced_children;
sliced_children.reserve(scv.num_children());
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(scv.num_children()),
std::back_inserter(sliced_children),
[&scv](size_type child_index) { return scv.get_sliced_child(child_index); });
return setup_source_buf_info(sliced_children.begin(),
sliced_children.end(),
head,
current,
offset_stack_pos,
Expand Down Expand Up @@ -783,17 +791,35 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,

// if inputs are empty, just return num_partitions empty tables
if (input.column(0).size() == 0) {
// sanitize the inputs (to handle corner cases like sliced tables)
std::vector<std::unique_ptr<column>> empty_columns;
empty_columns.reserve(input.num_columns());
std::transform(
input.begin(), input.end(), std::back_inserter(empty_columns), [](column_view const& col) {
return cudf::empty_like(col);
});
std::vector<cudf::column_view> empty_column_views;
empty_column_views.reserve(input.num_columns());
std::transform(empty_columns.begin(),
empty_columns.end(),
std::back_inserter(empty_column_views),
[](std::unique_ptr<column> const& col) { return col->view(); });
table_view empty_inputs(empty_column_views);

// build the empty results
std::vector<packed_table> result;
result.reserve(num_partitions);
auto iter = thrust::make_counting_iterator(0);
std::transform(
iter, iter + num_partitions, std::back_inserter(result), [&input](int partition_index) {
return packed_table{input,
packed_columns{std::make_unique<packed_columns::metadata>(pack_metadata(
input, static_cast<uint8_t const*>(nullptr), 0)),
std::make_unique<rmm::device_buffer>()}};
});
std::transform(iter,
iter + num_partitions,
std::back_inserter(result),
[&empty_inputs](int partition_index) {
return packed_table{
empty_inputs,
packed_columns{std::make_unique<packed_columns::metadata>(pack_metadata(
empty_inputs, static_cast<uint8_t const*>(nullptr), 0)),
std::make_unique<rmm::device_buffer>()}};
});

return result;
}
Expand Down
97 changes: 73 additions & 24 deletions cpp/tests/copying/pack_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,32 +444,59 @@ TEST_F(PackUnpackTest, NestedEmpty)

TEST_F(PackUnpackTest, NestedSliced)
{
auto valids =
cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
// list
{
auto valids =
cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });

using LCW = cudf::test::lists_column_wrapper<int>;
using LCW = cudf::test::lists_column_wrapper<int>;

cudf::test::lists_column_wrapper<int> col0{{{{1, 2, 3}, valids}, {4, 5}},
{{LCW{}, LCW{}, {7, 8}, LCW{}}, valids},
{{6, 12}},
{{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids},
{{LCW{}, {-1, -2, -3, -4, -5}}, valids},
{LCW{}},
{{-10}, {-100, -200}}};

cudf::test::strings_column_wrapper col1{
"Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"};
cudf::test::fixed_width_column_wrapper<float> col2{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};

std::vector<std::unique_ptr<cudf::column>> children;
children.push_back(std::make_unique<cudf::column>(col2));
children.push_back(std::make_unique<cudf::column>(col0));
children.push_back(std::make_unique<cudf::column>(col1));
auto col3 = cudf::make_structs_column(
static_cast<cudf::column_view>(col0).size(), std::move(children), 0, rmm::device_buffer{});

cudf::table_view t({col0, col1, col2, *col3});
this->run_test(t);
cudf::test::lists_column_wrapper<int> col0{{{{1, 2, 3}, valids}, {4, 5}},
{{LCW{}, LCW{}, {7, 8}, LCW{}}, valids},
{{6, 12}},
{{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids},
{{LCW{}, {-1, -2, -3, -4, -5}}, valids},
{LCW{}},
{{-10}, {-100, -200}}};

cudf::test::strings_column_wrapper col1{
"Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"};
cudf::test::fixed_width_column_wrapper<float> col2{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};

std::vector<std::unique_ptr<cudf::column>> children;
children.push_back(std::make_unique<cudf::column>(col2));
children.push_back(std::make_unique<cudf::column>(col0));
children.push_back(std::make_unique<cudf::column>(col1));
auto col3 = cudf::make_structs_column(
static_cast<cudf::column_view>(col0).size(), std::move(children), 0, rmm::device_buffer{});

cudf::table_view t({col0, col1, col2, *col3});
this->run_test(t);
}

// struct
{
cudf::test::fixed_width_column_wrapper<int> a{0, 1, 2, 3, 4, 5, 6, 7};
cudf::test::fixed_width_column_wrapper<float> b{{0, -1, -2, -3, -4, -5, -6, -7},
{1, 1, 1, 0, 0, 0, 0, 1}};
cudf::test::strings_column_wrapper c{{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"},
{0, 0, 1, 1, 1, 1, 1, 1}};
std::vector<bool> list_validity{1, 0, 1, 0, 1, 0, 1, 1};
cudf::test::lists_column_wrapper<int16_t> d{
{{0, 1}, {2, 3, 4}, {5, 6}, {7}, {8, 9, 10}, {11, 12}, {}, {15, 16, 17}},
list_validity.begin()};
cudf::test::fixed_width_column_wrapper<int> _a{10, 20, 30, 40, 50, 60, 70, 80};
cudf::test::fixed_width_column_wrapper<float> _b{-10, -20, -30, -40, -50, -60, -70, -80};
cudf::test::strings_column_wrapper _c{"aa", "", "ccc", "dddd", "eeeee", "f", "gg", "hhh"};
cudf::test::structs_column_wrapper e({_a, _b, _c}, {1, 1, 1, 0, 1, 1, 1, 0});
cudf::test::structs_column_wrapper s({a, b, c, d, e}, {1, 1, 0, 1, 1, 1, 1, 1});

auto split = cudf::split(s, {2, 5});

this->run_test(cudf::table_view({split[0]}));
this->run_test(cudf::table_view({split[1]}));
this->run_test(cudf::table_view({split[2]}));
}
}

TEST_F(PackUnpackTest, EmptyTable)
Expand All @@ -490,6 +517,28 @@ TEST_F(PackUnpackTest, EmptyTable)
}
}

TEST_F(PackUnpackTest, SlicedEmpty)
{
// empty sliced column. this is specifically testing the corner case:
// - a sliced column of size 0
// - having children that are of size > 0
//
cudf::test::strings_column_wrapper a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"};
cudf::test::lists_column_wrapper<int> b{
{0, 1}, {2}, {3, 4, 5}, {6, 7}, {8, 9}, {10}, {11, 12}, {13, 14}};
cudf::test::fixed_width_column_wrapper<float> c{0, 1, 2, 3, 4, 5, 6, 7};
cudf::test::strings_column_wrapper _a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"};
cudf::test::lists_column_wrapper<float> _b{
{0, 1}, {2}, {3, 4, 5}, {6, 7}, {8, 9}, {10}, {11, 12}, {13, 14}};
cudf::test::fixed_width_column_wrapper<float> _c{0, 1, 2, 3, 4, 5, 6, 7};
cudf::test::structs_column_wrapper d({_a, _b, _c});

cudf::table_view t({a, b, c, d});

auto sliced = cudf::split(t, {0});
this->run_test(sliced[0]);
}

// clang-format on

} // namespace test
Expand Down
162 changes: 154 additions & 8 deletions cpp/tests/copying/split_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1525,24 +1525,25 @@ TEST_F(ContiguousSplitTableCornerCases, PreSplitTable)
auto col3 = cudf::make_structs_column(
static_cast<cudf::column_view>(col0).size(), std::move(children), 0, rmm::device_buffer{});

cudf::table_view t({col0, col1, col2, *col3});
auto pre_split = cudf::split(t, {1});

{
cudf::table_view t({col0, col1, col2, *col3});
std::vector<cudf::size_type> splits{1, 4};

auto result = cudf::contiguous_split(t, splits);
auto expected = cudf::split(t, splits);
auto result = cudf::contiguous_split(pre_split[1], splits);
auto expected = cudf::split(pre_split[1], splits);

for (size_t index = 0; index < expected.size(); index++) {
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected[index], result[index].table);
}
}

{
cudf::table_view t({col0, col1, col2, *col3});
std::vector<cudf::size_type> splits{0, 6};
std::vector<cudf::size_type> splits{0, 5};

auto result = cudf::contiguous_split(t, splits);
auto expected = cudf::split(t, splits);
auto result = cudf::contiguous_split(pre_split[1], splits);
auto expected = cudf::split(pre_split[1], splits);

for (size_t index = 0; index < expected.size(); index++) {
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected[index], result[index].table);
Expand Down Expand Up @@ -1593,6 +1594,118 @@ TEST_F(ContiguousSplitTableCornerCases, PreSplitTableLarge)
}
}

TEST_F(ContiguousSplitTableCornerCases, PreSplitList)
{
// list<list<int>>
{
cudf::test::lists_column_wrapper<int> list{{{1, 2}, {3, 4}},
{{5, 6}, {7}, {8, 9, 10}},
{{11, 12}, {13}},
{{14, 15, 16}, {17, 18}, {}},
{{-1, -2, -3}, {-4, -5, -6, -7}},
{{-8, -9}, {-10, -11}},
{{-12, -13}, {-14}, {-15, -16}},
{{-17, -18}, {}, {-19, -20}}};
auto pre_split = cudf::split(list, {2});

cudf::table_view t({pre_split[1]});
auto result = cudf::contiguous_split(t, {3, 4});
auto expected = cudf::split(t, {3, 4});

auto iter = thrust::make_counting_iterator(0);
std::for_each(iter, iter + expected.size(), [&](cudf::size_type index) {
CUDF_TEST_EXPECT_TABLES_EQUAL(result[index].table, expected[index]);
});
}

// list<struct<float>>
{
cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{
0, 2, 5, 7, 10, 12, 14, 17, 20};
cudf::test::fixed_width_column_wrapper<float> floats{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
cudf::test::structs_column_wrapper data({floats});

auto list =
cudf::make_lists_column(8, offsets.release(), data.release(), 0, rmm::device_buffer{});

auto pre_split = cudf::split(*list, {2});

cudf::table_view t({pre_split[1]});
auto result = cudf::contiguous_split(t, {3, 4});
auto expected = cudf::split(t, {3, 4});

auto iter = thrust::make_counting_iterator(0);
std::for_each(iter, iter + expected.size(), [&](cudf::size_type index) {
CUDF_TEST_EXPECT_TABLES_EQUAL(result[index].table, expected[index]);
});
}
}

TEST_F(ContiguousSplitTableCornerCases, PreSplitStructs)
{
// includes struct<list>
{
cudf::test::fixed_width_column_wrapper<int> a{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
cudf::test::fixed_width_column_wrapper<float> b{{0, -1, -2, -3, -4, -5, -6, -7, -8, -9},
{1, 1, 1, 0, 0, 0, 0, 1, 1, 1}};
cudf::test::strings_column_wrapper c{
{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx", "yy", "zzzz"},
{0, 0, 1, 1, 1, 1, 1, 1, 1, 1}};
std::vector<bool> list_validity{1, 0, 1, 0, 1, 0, 1, 1, 1, 1};
cudf::test::lists_column_wrapper<int16_t> d{
{{0, 1}, {2, 3, 4}, {5, 6}, {7}, {8, 9, 10}, {11, 12}, {}, {15, 16, 17}, {18, 19}, {20}},
list_validity.begin()};
cudf::test::fixed_width_column_wrapper<int> _a{10, 20, 30, 40, 50, 60, 70, 80, 90, 100};
cudf::test::fixed_width_column_wrapper<float> _b{
-10, -20, -30, -40, -50, -60, -70, -80, -90, -100};
cudf::test::strings_column_wrapper _c{
"aa", "", "ccc", "dddd", "eeeee", "f", "gg", "hhh", "i", "jjj"};
cudf::test::structs_column_wrapper e({_a, _b, _c}, {1, 1, 1, 0, 1, 1, 1, 0, 1, 1});
cudf::test::structs_column_wrapper s({a, b, c, d, e}, {1, 1, 0, 1, 1, 1, 1, 1, 1, 1});

auto pre_split = cudf::split(s, {4});

auto iter = thrust::make_counting_iterator(0);
std::for_each(iter, iter + pre_split.size(), [&](cudf::size_type index) {
cudf::table_view t({pre_split[index]});
auto result = cudf::contiguous_split(t, {1});
auto expected = cudf::split(t, {1});

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result[0].table, expected[0]);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result[1].table, expected[1]);
});
}

// struct<list<struct>>
{
cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{
0, 2, 5, 7, 10, 12, 14, 17, 20};
cudf::test::fixed_width_column_wrapper<float> floats{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
cudf::test::structs_column_wrapper data({floats});
auto list =
cudf::make_lists_column(8, offsets.release(), data.release(), 0, rmm::device_buffer{});
cudf::test::strings_column_wrapper strings{"a", "bb", "ccc", "dddd", "", "e", "ff", "ggg"};

std::vector<std::unique_ptr<cudf::column>> struct_children;
struct_children.push_back(std::move(list));
struct_children.push_back(strings.release());
cudf::test::structs_column_wrapper col(std::move(struct_children));

auto pre_split = cudf::split(col, {2});

cudf::table_view t({pre_split[1]});
auto result = cudf::contiguous_split(t, {3, 4});
auto expected = cudf::split(t, {3, 4});

auto iter = thrust::make_counting_iterator(0);
std::for_each(iter, iter + expected.size(), [&](cudf::size_type index) {
CUDF_TEST_EXPECT_TABLES_EQUAL(result[index].table, expected[index]);
});
}
}

TEST_F(ContiguousSplitTableCornerCases, NestedEmpty)
{
// this produces an empty strings column with no children,
Expand Down Expand Up @@ -1687,7 +1800,40 @@ TEST_F(ContiguousSplitTableCornerCases, NestedEmpty)
}
}

TEST_F(ContiguousSplitTableCornerCases, MalformedColumns) {}
TEST_F(ContiguousSplitTableCornerCases, SplitEmpty)
{
// empty sliced column. this is specifically testing the corner case:
// - a sliced column of size 0
// - having children that are of size > 0
//
cudf::test::strings_column_wrapper a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"};
cudf::test::lists_column_wrapper<int> b{
{0, 1}, {2}, {3, 4, 5}, {6, 7}, {8, 9}, {10}, {11, 12}, {13, 14}};
cudf::test::fixed_width_column_wrapper<float> c{0, 1, 2, 3, 4, 5, 6, 7};
cudf::test::strings_column_wrapper _a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"};
cudf::test::lists_column_wrapper<float> _b{
{0, 1}, {2}, {3, 4, 5}, {6, 7}, {8, 9}, {10}, {11, 12}, {13, 14}};
cudf::test::fixed_width_column_wrapper<float> _c{0, 1, 2, 3, 4, 5, 6, 7};
cudf::test::structs_column_wrapper d({_a, _b, _c});

cudf::table_view t({a, b, c, d});

auto sliced = cudf::split(t, {0});

{
auto result = cudf::contiguous_split(sliced[0], {});
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(sliced[0], result[0].table);
}

{
auto result = cudf::contiguous_split(sliced[0], {0});
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(sliced[0], result[0].table);
}

{
EXPECT_THROW(cudf::contiguous_split(sliced[0], {1}), cudf::logic_error);
}
}

struct ContiguousSplitNestedTypesTest : public cudf::test::BaseFixture {
};
Expand Down

0 comments on commit bb9b1be

Please sign in to comment.