From fda458ac07fa1a04dea2d96d1b15d4fa35bdd121 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 17 May 2022 10:32:17 -0400 Subject: [PATCH 1/4] Fix reduction on empty column with non-empty offsets --- cpp/src/reductions/segmented_reductions.cpp | 1 - cpp/tests/reductions/rank_tests.cpp | 2 - .../reductions/segmented_reduction_tests.cpp | 68 ++++++++++++++++++- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/cpp/src/reductions/segmented_reductions.cpp b/cpp/src/reductions/segmented_reductions.cpp index 415f5ae488e..8487af69db2 100644 --- a/cpp/src/reductions/segmented_reductions.cpp +++ b/cpp/src/reductions/segmented_reductions.cpp @@ -81,7 +81,6 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(offsets.size() > 0, "`offsets` should have at least 1 element."); - if (segmented_values.is_empty()) { return empty_like(segmented_values); } return aggregation_dispatcher( agg.kind, diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp index 3bf2899ce2f..5e90e5cfed8 100644 --- a/cpp/tests/reductions/rank_tests.cpp +++ b/cpp/tests/reductions/rank_tests.cpp @@ -55,8 +55,6 @@ struct TypedRankScanTest : BaseScanTest { std::unique_ptr const& agg) { auto col_out = cudf::scan(input, agg, INCLUSIVE_SCAN, INCLUDE_NULLS); - std::cout << "expect type: " << static_cast(expect_vals.type().id()) << std::endl; - std::cout << "out type: " << static_cast(col_out->type().id()) << std::endl; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expect_vals, col_out->view()); } }; diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 8a9a8fb549e..7e7361e5fac 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -323,7 +323,7 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } -TEST_F(SegmentedReductionTestUntyped, PartialSegmentReudction) +TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction) { // Segmented reduction allows offsets only specify part of the input columns. // [1], [2, 3], [4] @@ -387,6 +387,47 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } +TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets) +{ + // values: {} + // offsets: {0, 0} + // outputs: {XXX} + // output nullmask: {0} + auto input = fixed_width_column_wrapper{}; + auto offsets = std::vector{0, 0}; + auto d_offsets = thrust::device_vector(offsets); + auto expect = fixed_width_column_wrapper{{XXX}, {0}}; + + auto aggregates = + std::vector>>(); + aggregates.push_back(std::move(make_max_aggregation())); + aggregates.push_back(std::move(make_min_aggregation())); + aggregates.push_back(std::move(make_sum_aggregation())); + aggregates.push_back(std::move(make_product_aggregation())); + + auto output_type = data_type{type_to_id()}; + for (auto&& agg : aggregates) { + auto result = segmented_reduce(input, d_offsets, *agg, output_type, null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); + } + + auto expect_bool = fixed_width_column_wrapper{{XXX}, {0}}; + + auto result = segmented_reduce(input, + d_offsets, + *make_any_aggregation(), + data_type{type_id::BOOL8}, + null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_bool); + result = segmented_reduce(input, + d_offsets, + *make_all_aggregation(), + data_type{type_id::BOOL8}, + null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_bool); +} + // String min/max test grid // Segment: Length 0, length 1, length 2 // Element nulls: No nulls, all nulls, some nulls @@ -508,6 +549,31 @@ TEST_F(SegmentedReductionStringTest, MinExcludeNulls) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } +TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets) +{ + // values: {} + // offsets: {0, 0} + // outputs: {XXX} + // output nullmask: {0} + auto input = strings_column_wrapper{}; + auto offsets = std::vector{0, 0}; + auto d_offsets = thrust::device_vector(offsets); + auto expect = strings_column_wrapper({XXX}, {0}); + + auto result = segmented_reduce(input, + d_offsets, + *make_max_aggregation(), + data_type{type_id::STRING}, + null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); + result = segmented_reduce(input, + d_offsets, + *make_min_aggregation(), + data_type{type_id::STRING}, + null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); +} + #undef XXX } // namespace test From 095caa3018745d028f114613c62e2d4fc625b2ca Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 17 May 2022 15:31:36 -0400 Subject: [PATCH 2/4] add multiple empty segments to test --- cpp/tests/reductions/segmented_reduction_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 7e7361e5fac..017a858e9d7 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -556,9 +556,9 @@ TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets) // outputs: {XXX} // output nullmask: {0} auto input = strings_column_wrapper{}; - auto offsets = std::vector{0, 0}; + auto offsets = std::vector{0, 0, 0, 0}; auto d_offsets = thrust::device_vector(offsets); - auto expect = strings_column_wrapper({XXX}, {0}); + auto expect = strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0}); auto result = segmented_reduce(input, d_offsets, From d53b3f36987960b6915da016e7f54ba41bd394d9 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 17 May 2022 15:34:37 -0400 Subject: [PATCH 3/4] remove incorrect comment --- cpp/tests/reductions/segmented_reduction_tests.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 017a858e9d7..0b625984c7c 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -551,10 +551,6 @@ TEST_F(SegmentedReductionStringTest, MinExcludeNulls) TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets) { - // values: {} - // offsets: {0, 0} - // outputs: {XXX} - // output nullmask: {0} auto input = strings_column_wrapper{}; auto offsets = std::vector{0, 0, 0, 0}; auto d_offsets = thrust::device_vector(offsets); From 889e4ee2f91aceec1be6c82fc6aa69fd3952e20c Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 18 May 2022 08:58:31 -0400 Subject: [PATCH 4/4] added a few more offsets --- cpp/tests/reductions/segmented_reduction_tests.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index fd243d53dcc..3fa369d6a53 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -389,14 +389,10 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets) { - // values: {} - // offsets: {0, 0} - // outputs: {XXX} - // output nullmask: {0} auto input = fixed_width_column_wrapper{}; - auto offsets = std::vector{0, 0}; + auto offsets = std::vector{0, 0, 0, 0, 0, 0}; auto d_offsets = thrust::device_vector(offsets); - auto expect = fixed_width_column_wrapper{{XXX}, {0}}; + auto expect = fixed_width_column_wrapper{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}}; auto aggregates = std::vector{{XXX}, {0}}; + auto expect_bool = fixed_width_column_wrapper{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}}; auto result = segmented_reduce(input, d_offsets,