diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp index d2d8e12c377..81f7ad8a4c1 100644 --- a/cpp/benchmarks/sort/sort_structs.cpp +++ b/cpp/benchmarks/sort/sort_structs.cpp @@ -80,5 +80,5 @@ void nvbench_sort_struct(nvbench::state& state) NVBENCH_BENCH(nvbench_sort_struct) .set_name("sort_struct") .add_int64_power_of_two_axis("NumRows", {10, 18, 26}) - .add_int64_axis("Depth", {1, 8}) + .add_int64_axis("Depth", {0, 1, 8}) .add_int64_axis("Nulls", {0, 1}); diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 8b330c1bd10..0fb1ad7ca68 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -197,7 +197,11 @@ class device_row_comparator { return cuda::std::make_pair(state, depth); } - // Structs have been modified to only have 1 child when using this. + if (lcol.num_child_columns() == 0) { + return cuda::std::make_pair(weak_ordering::EQUIVALENT, depth); + } + + // Non-empty structs have been modified to only have 1 child when using this. lcol = lcol.children()[0]; rcol = rcol.children()[0]; ++depth; diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 0a9396ccdf7..a0400133c68 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -85,57 +85,44 @@ auto decompose_structs(table_view table, auto const& col = table.column(col_idx); if (is_nested(col.type())) { // convert and insert - std::vector r_verticalized_columns; - std::vector r_verticalized_col_depths; - std::vector flattened; - std::vector depths; - // TODO: Here I added a bogus leaf column at the beginning to help in the while loop below. - // Refactor the while loop so that it can handle the last case. - flattened.push_back(make_empty_column(type_id::INT32)->view()); - std::function recursive_child = [&](column_view const& c, - int depth) { - flattened.push_back(c); - depths.push_back(depth); - if (c.type().id() == type_id::STRUCT) { - for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) { - auto scol = structs_column_view(c); - recursive_child(scol.get_sliced_child(child_idx), depth + 1); + std::vector> flattened; + std::function*, int)> recursive_child = + [&](column_view const& c, std::vector* branch, int depth) { + branch->push_back(c); + if (c.type().id() == type_id::STRUCT) { + for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) { + auto scol = structs_column_view(c); + if (child_idx > 0) { + verticalized_col_depths.push_back(depth + 1); + branch = &flattened.emplace_back(); + } + recursive_child(scol.get_sliced_child(child_idx), branch, depth + 1); + } } - } - }; - recursive_child(col, 0); - int curr_col_idx = flattened.size() - 1; - column_view curr_col = flattened[curr_col_idx]; - while (curr_col_idx > 0) { - auto const& prev_col = flattened[curr_col_idx - 1]; - if (not is_nested(prev_col.type())) { - // We hit a column that's a leaf so seal this hierarchy - r_verticalized_columns.push_back(curr_col); - r_verticalized_col_depths.push_back(depths[curr_col_idx - 1]); - curr_col = prev_col; - } else { - curr_col = column_view(prev_col.type(), - prev_col.size(), + }; + auto& branch = flattened.emplace_back(); + verticalized_col_depths.push_back(0); + recursive_child(col, &branch, 0); + + for (auto const& branch : flattened) { + column_view curr_col = branch.back(); + for (auto it = branch.crbegin() + 1; it < branch.crend(); ++it) { + curr_col = column_view(it->type(), + it->size(), nullptr, - prev_col.null_mask(), + it->null_mask(), UNKNOWN_NULL_COUNT, - prev_col.offset(), + it->offset(), {curr_col}); } - --curr_col_idx; + verticalized_columns.push_back(curr_col); } - verticalized_columns.insert( - verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend()); - verticalized_col_depths.insert(verticalized_col_depths.end(), - r_verticalized_col_depths.rbegin(), - r_verticalized_col_depths.rend()); if (not column_order.empty()) { - new_column_order.insert( - new_column_order.end(), r_verticalized_columns.size(), column_order[col_idx]); + new_column_order.insert(new_column_order.end(), flattened.size(), column_order[col_idx]); } if (not null_precedence.empty()) { new_null_precedence.insert( - new_null_precedence.end(), r_verticalized_columns.size(), null_precedence[col_idx]); + new_null_precedence.end(), flattened.size(), null_precedence[col_idx]); } } else { verticalized_columns.push_back(col); diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index ed86277cd2b..be463f8b012 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -804,6 +804,60 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather) CUDF_TEST_EXPECT_TABLES_EQUAL(sorted_table, sorted->view()); } +struct SortCornerTest : public BaseFixture { +}; + +TEST_F(SortCornerTest, WithEmptyStructColumn) +{ + using int_col = fixed_width_column_wrapper; + + // struct{}, int, int + int_col col_for_mask{{0, 0, 0, 0, 0, 0}, {1, 0, 1, 1, 1, 1}}; + auto null_mask = cudf::copy_bitmask(col_for_mask.release()->view()); + auto struct_col = cudf::make_structs_column(6, {}, UNKNOWN_NULL_COUNT, std::move(null_mask)); + + int_col col1{{1, 2, 3, 1, 2, 3}}; + int_col col2{{1, 1, 1, 2, 2, 2}}; + table_view input{{struct_col->view(), col1, col2}}; + + int_col expected{{1, 0, 3, 4, 2, 5}}; + std::vector column_order{order::ASCENDING, order::ASCENDING, order::ASCENDING}; + auto got = sorted_order(input, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + // struct{struct{}, int} + int_col col3{{0, 1, 2, 3, 4, 5}}; + std::vector> child_columns; + child_columns.push_back(std::move(struct_col)); + child_columns.push_back(col3.release()); + auto struct_col2 = + cudf::make_structs_column(6, std::move(child_columns), 0, rmm::device_buffer{}); + table_view input2{{struct_col2->view()}}; + + int_col expected2{{5, 4, 3, 2, 0, 1}}; + auto got2 = sorted_order(input2, {order::DESCENDING}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got2->view()); + + // struct{struct{}, struct{int}} + int_col col_for_mask2{{0, 0, 0, 0, 0, 0}, {1, 0, 1, 1, 0, 1}}; + auto null_mask2 = cudf::copy_bitmask(col_for_mask2.release()->view()); + std::vector> child_columns2; + auto child_col_1 = cudf::make_structs_column(6, {}, UNKNOWN_NULL_COUNT, std::move(null_mask2)); + child_columns2.push_back(std::move(child_col_1)); + int_col col4{{5, 4, 3, 2, 1, 0}}; + std::vector> grand_child; + grand_child.push_back(std::move(col4.release())); + auto child_col_2 = cudf::make_structs_column(6, std::move(grand_child), 0, rmm::device_buffer{}); + child_columns2.push_back(std::move(child_col_2)); + auto struct_col3 = + cudf::make_structs_column(6, std::move(child_columns2), 0, rmm::device_buffer{}); + table_view input3{{struct_col3->view()}}; + + int_col expected3{{4, 1, 5, 3, 2, 0}}; + auto got3 = sorted_order(input3, {order::ASCENDING}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got3->view()); +}; + } // namespace test } // namespace cudf