Skip to content

Commit

Permalink
Fix struct row comparator's exception on empty structs (#10604)
Browse files Browse the repository at this point in the history
Fixes #10603

This PR is to fix a bug of the optimized struct row comparator. For now,  the struct row comparator assumes that structs being compared are non-empty, so it fails when comparing empty structs.

Authors:
  - Alfred Xu (https://github.com/sperlingxx)
  - Devavret Makkar (https://github.com/devavret)

Approvers:
  - Devavret Makkar (https://github.com/devavret)
  - Nghia Truong (https://github.com/ttnghia)

URL: #10604
  • Loading branch information
sperlingxx authored Apr 8, 2022
1 parent 7643a0b commit bc43e6a
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 43 deletions.
2 changes: 1 addition & 1 deletion cpp/benchmarks/sort/sort_structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,5 @@ void nvbench_sort_struct(nvbench::state& state)
NVBENCH_BENCH(nvbench_sort_struct)
.set_name("sort_struct")
.add_int64_power_of_two_axis("NumRows", {10, 18, 26})
.add_int64_axis("Depth", {1, 8})
.add_int64_axis("Depth", {0, 1, 8})
.add_int64_axis("Nulls", {0, 1});
6 changes: 5 additions & 1 deletion cpp/include/cudf/table/experimental/row_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,11 @@ class device_row_comparator {
return cuda::std::make_pair(state, depth);
}

// Structs have been modified to only have 1 child when using this.
if (lcol.num_child_columns() == 0) {
return cuda::std::make_pair(weak_ordering::EQUIVALENT, depth);
}

// Non-empty structs have been modified to only have 1 child when using this.
lcol = lcol.children()[0];
rcol = rcol.children()[0];
++depth;
Expand Down
69 changes: 28 additions & 41 deletions cpp/src/table/row_operators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,57 +85,44 @@ auto decompose_structs(table_view table,
auto const& col = table.column(col_idx);
if (is_nested(col.type())) {
// convert and insert
std::vector<column_view> r_verticalized_columns;
std::vector<int> r_verticalized_col_depths;
std::vector<column_view> flattened;
std::vector<int> depths;
// TODO: Here I added a bogus leaf column at the beginning to help in the while loop below.
// Refactor the while loop so that it can handle the last case.
flattened.push_back(make_empty_column(type_id::INT32)->view());
std::function<void(column_view const&, int)> recursive_child = [&](column_view const& c,
int depth) {
flattened.push_back(c);
depths.push_back(depth);
if (c.type().id() == type_id::STRUCT) {
for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) {
auto scol = structs_column_view(c);
recursive_child(scol.get_sliced_child(child_idx), depth + 1);
std::vector<std::vector<column_view>> flattened;
std::function<void(column_view const&, std::vector<column_view>*, int)> recursive_child =
[&](column_view const& c, std::vector<column_view>* branch, int depth) {
branch->push_back(c);
if (c.type().id() == type_id::STRUCT) {
for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) {
auto scol = structs_column_view(c);
if (child_idx > 0) {
verticalized_col_depths.push_back(depth + 1);
branch = &flattened.emplace_back();
}
recursive_child(scol.get_sliced_child(child_idx), branch, depth + 1);
}
}
}
};
recursive_child(col, 0);
int curr_col_idx = flattened.size() - 1;
column_view curr_col = flattened[curr_col_idx];
while (curr_col_idx > 0) {
auto const& prev_col = flattened[curr_col_idx - 1];
if (not is_nested(prev_col.type())) {
// We hit a column that's a leaf so seal this hierarchy
r_verticalized_columns.push_back(curr_col);
r_verticalized_col_depths.push_back(depths[curr_col_idx - 1]);
curr_col = prev_col;
} else {
curr_col = column_view(prev_col.type(),
prev_col.size(),
};
auto& branch = flattened.emplace_back();
verticalized_col_depths.push_back(0);
recursive_child(col, &branch, 0);

for (auto const& branch : flattened) {
column_view curr_col = branch.back();
for (auto it = branch.crbegin() + 1; it < branch.crend(); ++it) {
curr_col = column_view(it->type(),
it->size(),
nullptr,
prev_col.null_mask(),
it->null_mask(),
UNKNOWN_NULL_COUNT,
prev_col.offset(),
it->offset(),
{curr_col});
}
--curr_col_idx;
verticalized_columns.push_back(curr_col);
}
verticalized_columns.insert(
verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend());
verticalized_col_depths.insert(verticalized_col_depths.end(),
r_verticalized_col_depths.rbegin(),
r_verticalized_col_depths.rend());
if (not column_order.empty()) {
new_column_order.insert(
new_column_order.end(), r_verticalized_columns.size(), column_order[col_idx]);
new_column_order.insert(new_column_order.end(), flattened.size(), column_order[col_idx]);
}
if (not null_precedence.empty()) {
new_null_precedence.insert(
new_null_precedence.end(), r_verticalized_columns.size(), null_precedence[col_idx]);
new_null_precedence.end(), flattened.size(), null_precedence[col_idx]);
}
} else {
verticalized_columns.push_back(col);
Expand Down
54 changes: 54 additions & 0 deletions cpp/tests/sort/sort_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,60 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather)
CUDF_TEST_EXPECT_TABLES_EQUAL(sorted_table, sorted->view());
}

struct SortCornerTest : public BaseFixture {
};

TEST_F(SortCornerTest, WithEmptyStructColumn)
{
using int_col = fixed_width_column_wrapper<int32_t>;

// struct{}, int, int
int_col col_for_mask{{0, 0, 0, 0, 0, 0}, {1, 0, 1, 1, 1, 1}};
auto null_mask = cudf::copy_bitmask(col_for_mask.release()->view());
auto struct_col = cudf::make_structs_column(6, {}, UNKNOWN_NULL_COUNT, std::move(null_mask));

int_col col1{{1, 2, 3, 1, 2, 3}};
int_col col2{{1, 1, 1, 2, 2, 2}};
table_view input{{struct_col->view(), col1, col2}};

int_col expected{{1, 0, 3, 4, 2, 5}};
std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::ASCENDING};
auto got = sorted_order(input, column_order);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

// struct{struct{}, int}
int_col col3{{0, 1, 2, 3, 4, 5}};
std::vector<std::unique_ptr<cudf::column>> child_columns;
child_columns.push_back(std::move(struct_col));
child_columns.push_back(col3.release());
auto struct_col2 =
cudf::make_structs_column(6, std::move(child_columns), 0, rmm::device_buffer{});
table_view input2{{struct_col2->view()}};

int_col expected2{{5, 4, 3, 2, 0, 1}};
auto got2 = sorted_order(input2, {order::DESCENDING});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got2->view());

// struct{struct{}, struct{int}}
int_col col_for_mask2{{0, 0, 0, 0, 0, 0}, {1, 0, 1, 1, 0, 1}};
auto null_mask2 = cudf::copy_bitmask(col_for_mask2.release()->view());
std::vector<std::unique_ptr<cudf::column>> child_columns2;
auto child_col_1 = cudf::make_structs_column(6, {}, UNKNOWN_NULL_COUNT, std::move(null_mask2));
child_columns2.push_back(std::move(child_col_1));
int_col col4{{5, 4, 3, 2, 1, 0}};
std::vector<std::unique_ptr<cudf::column>> grand_child;
grand_child.push_back(std::move(col4.release()));
auto child_col_2 = cudf::make_structs_column(6, std::move(grand_child), 0, rmm::device_buffer{});
child_columns2.push_back(std::move(child_col_2));
auto struct_col3 =
cudf::make_structs_column(6, std::move(child_columns2), 0, rmm::device_buffer{});
table_view input3{{struct_col3->view()}};

int_col expected3{{4, 1, 5, 3, 2, 0}};
auto got3 = sorted_order(input3, {order::ASCENDING});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got3->view());
};

} // namespace test
} // namespace cudf

Expand Down

0 comments on commit bc43e6a

Please sign in to comment.