Skip to content

Commit

Permalink
Fix segmented_reduce on empty column with non-empty offsets (#10876)
Browse files Browse the repository at this point in the history
Fixes `cudf::segmented_reduce` where the input `values` column is empty but the `offsets` are not. In this case, the `offsets` vector `{0,0}` specifies an empty segment which should result in a single null row. The logic has been fixed and new gtest cases have been added.

Closes #10556

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: #10876
  • Loading branch information
davidwendt authored May 18, 2022
1 parent 7b393a7 commit ac77940
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 4 deletions.
1 change: 0 additions & 1 deletion cpp/src/reductions/segmented_reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ std::unique_ptr<column> segmented_reduce(column_view const& segmented_values,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(offsets.size() > 0, "`offsets` should have at least 1 element.");
if (segmented_values.is_empty()) { return empty_like(segmented_values); }

return aggregation_dispatcher(
agg.kind,
Expand Down
2 changes: 0 additions & 2 deletions cpp/tests/reductions/rank_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ struct TypedRankScanTest : BaseScanTest<T> {
std::unique_ptr<scan_aggregation> const& agg)
{
auto col_out = cudf::scan(input, agg, INCLUSIVE_SCAN, INCLUDE_NULLS);
std::cout << "expect type: " << static_cast<int>(expect_vals.type().id()) << std::endl;
std::cout << "out type: " << static_cast<int>(col_out->type().id()) << std::endl;
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expect_vals, col_out->view());
}
};
Expand Down
60 changes: 59 additions & 1 deletion cpp/tests/reductions/segmented_reduction_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
}

TEST_F(SegmentedReductionTestUntyped, PartialSegmentReudction)
TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction)
{
// Segmented reduction allows offsets only specify part of the input columns.
// [1], [2, 3], [4]
Expand Down Expand Up @@ -387,6 +387,43 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
}

TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets)
{
auto input = fixed_width_column_wrapper<int32_t>{};
auto offsets = std::vector<size_type>{0, 0, 0, 0, 0, 0};
auto d_offsets = thrust::device_vector<size_type>(offsets);
auto expect = fixed_width_column_wrapper<int32_t>{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}};

auto aggregates =
std::vector<std::unique_ptr<cudf::segmented_reduce_aggregation,
std::default_delete<cudf::segmented_reduce_aggregation>>>();
aggregates.push_back(std::move(make_max_aggregation<segmented_reduce_aggregation>()));
aggregates.push_back(std::move(make_min_aggregation<segmented_reduce_aggregation>()));
aggregates.push_back(std::move(make_sum_aggregation<segmented_reduce_aggregation>()));
aggregates.push_back(std::move(make_product_aggregation<segmented_reduce_aggregation>()));

auto output_type = data_type{type_to_id<int32_t>()};
for (auto&& agg : aggregates) {
auto result = segmented_reduce(input, d_offsets, *agg, output_type, null_policy::EXCLUDE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
}

auto expect_bool = fixed_width_column_wrapper<bool>{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}};

auto result = segmented_reduce(input,
d_offsets,
*make_any_aggregation<segmented_reduce_aggregation>(),
data_type{type_id::BOOL8},
null_policy::INCLUDE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_bool);
result = segmented_reduce(input,
d_offsets,
*make_all_aggregation<segmented_reduce_aggregation>(),
data_type{type_id::BOOL8},
null_policy::INCLUDE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_bool);
}

template <typename T>
struct SegmentedReductionFixedPointTest : public cudf::test::BaseFixture {
};
Expand Down Expand Up @@ -714,6 +751,27 @@ TEST_F(SegmentedReductionStringTest, MinExcludeNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
}

TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets)
{
auto input = strings_column_wrapper{};
auto offsets = std::vector<size_type>{0, 0, 0, 0};
auto d_offsets = thrust::device_vector<size_type>(offsets);
auto expect = strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0});

auto result = segmented_reduce(input,
d_offsets,
*make_max_aggregation<segmented_reduce_aggregation>(),
data_type{type_id::STRING},
null_policy::EXCLUDE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
result = segmented_reduce(input,
d_offsets,
*make_min_aggregation<segmented_reduce_aggregation>(),
data_type{type_id::STRING},
null_policy::INCLUDE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
}

#undef XXX

} // namespace test
Expand Down

0 comments on commit ac77940

Please sign in to comment.