Skip to content

Commit

Permalink
Handle pre-sliced nested columns in contiguous_split. (#8391)
Browse files Browse the repository at this point in the history
The computation that applied offsets down a hierarchy of nested columns was starting with the wrong base offset in the case of nested columns.

Note:  marking as Do Not Merge for the time being so it gets some more non-CI testing.  Specifically: this PR #8153

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Devavret Makkar (https://github.com/devavret)
  - MithunR (https://github.com/mythrocks)

URL: #8391
  • Loading branch information
nvdbaranec authored Jun 14, 2021
1 parent 20f08da commit d30695c
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 3 deletions.
9 changes: 6 additions & 3 deletions cpp/src/copying/contiguous_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -886,13 +886,15 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
size_type* offset_stack = &d_offset_stack[stack_pos];
int parent_offsets_index = src_info.parent_offsets_index;
int stack_size = 0;
int root_column_offset = src_info.column_offset;
while (parent_offsets_index >= 0) {
offset_stack[stack_size++] = parent_offsets_index;
root_column_offset = d_src_buf_info[parent_offsets_index].column_offset;
parent_offsets_index = d_src_buf_info[parent_offsets_index].parent_offsets_index;
}
// make sure to include the -column- offset in our calculations
int row_start = d_indices[split_index] + src_info.column_offset;
int row_end = d_indices[split_index + 1] + src_info.column_offset;
// make sure to include the -column- offset on the root column in our calculation.
int row_start = d_indices[split_index] + root_column_offset;
int row_end = d_indices[split_index + 1] + root_column_offset;
while (stack_size > 0) {
stack_size--;
auto const offsets = d_src_buf_info[offset_stack[stack_size]].offsets;
Expand Down Expand Up @@ -923,6 +925,7 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
int const element_size = cudf::type_dispatcher(data_type{src_info.type}, size_of_helper{});
std::size_t const bytes =
static_cast<std::size_t>(num_elements) * static_cast<std::size_t>(element_size);

return dst_buf_info{_round_up_safe(bytes, 64),
num_elements,
element_size,
Expand Down
29 changes: 29 additions & 0 deletions cpp/tests/copying/pack_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,35 @@ TEST_F(PackUnpackTest, NestedEmpty)
this->run_test(src_table);
}
}

TEST_F(PackUnpackTest, NestedSliced)
{
auto valids =
cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });

using LCW = cudf::test::lists_column_wrapper<int>;

cudf::test::lists_column_wrapper<int> col0{ {{{1, 2, 3}, valids}, {4, 5}},
{{LCW{}, LCW{}, {7, 8}, LCW{}}, valids},
{{6, 12}},
{{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids},
{{LCW{}, {-1, -2, -3, -4, -5}}, valids},
{LCW{}},
{{-10}, {-100, -200}} };

cudf::test::strings_column_wrapper col1{"Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"};
cudf::test::fixed_width_column_wrapper<float> col2{ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f };

std::vector<std::unique_ptr<cudf::column>> children;
children.push_back(std::make_unique<cudf::column>(col2));
children.push_back(std::make_unique<cudf::column>(col0));
children.push_back(std::make_unique<cudf::column>(col1));
auto col3 = cudf::make_structs_column(static_cast<cudf::column_view>(col0).size(), std::move(children), 0, rmm::device_buffer{});

cudf::table_view t({col0, col1, col2, *col3});
this->run_test(t);
}

// clang-format on

} // namespace test
Expand Down
51 changes: 51 additions & 0 deletions cpp/tests/copying/split_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,57 @@ TEST_F(ContiguousSplitTableCornerCases, MixedColumnTypes)
}

TEST_F(ContiguousSplitTableCornerCases, PreSplitTable)
{
auto valids =
cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });

using LCW = cudf::test::lists_column_wrapper<int>;

cudf::test::lists_column_wrapper<int> col0{{{{1, 2, 3}, valids}, {4, 5}},
{{LCW{}, LCW{}, {7, 8}, LCW{}}, valids},
{{{6}}},
{{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids},
{{LCW{}, {-1, -2, -3, -4, -5}}, valids},
{LCW{}},
{{-10}, {-100, -200}}};

cudf::test::strings_column_wrapper col1{
"Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"};
cudf::test::fixed_width_column_wrapper<float> col2{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};

std::vector<std::unique_ptr<cudf::column>> children;
children.push_back(std::make_unique<cudf::column>(col2));
children.push_back(std::make_unique<cudf::column>(col0));
children.push_back(std::make_unique<cudf::column>(col1));
auto col3 = cudf::make_structs_column(
static_cast<cudf::column_view>(col0).size(), std::move(children), 0, rmm::device_buffer{});

{
cudf::table_view t({col0, col1, col2, *col3});
std::vector<cudf::size_type> splits{1, 4};

auto result = cudf::contiguous_split(t, splits);
auto expected = cudf::split(t, splits);

for (size_t index = 0; index < expected.size(); index++) {
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected[index], result[index].table);
}
}

{
cudf::table_view t({col0, col1, col2, *col3});
std::vector<cudf::size_type> splits{0, 6};

auto result = cudf::contiguous_split(t, splits);
auto expected = cudf::split(t, splits);

for (size_t index = 0; index < expected.size(); index++) {
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected[index], result[index].table);
}
}
}

TEST_F(ContiguousSplitTableCornerCases, PreSplitTableLarge)
{
// test splitting a table that is already split (has an offset)
cudf::size_type start = 0;
Expand Down

0 comments on commit d30695c

Please sign in to comment.