From 068542a498455f94571ed4b853e0f3e0d4a50e1c Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 16 Mar 2022 15:30:29 -0700 Subject: [PATCH 1/8] Passes compile, initial layout for how test should look like --- .../reductions/segmented_reduction_tests.cpp | 59 ++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 3a432cce801..de8c3256abb 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -19,9 +19,11 @@ #include #include +#include #include #include +#include #include namespace cudf { @@ -36,7 +38,7 @@ struct SegmentedReductionTest : public cudf::test::BaseFixture { struct SegmentedReductionTestUntyped : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(SegmentedReductionTest, NumericTypes); +TYPED_TEST_SUITE(SegmentedReductionTest, NumericTypes); TYPED_TEST(SegmentedReductionTest, SumExcludeNulls) { @@ -385,6 +387,61 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } +int32_t pow10(int32_t exponent) { return exponent == 0 ? 1 : 10 * pow10(exponent - 1); } + +template +struct SegmentedReductionFixedPointTest : public cudf::test::BaseFixture { + public: + std::vector scale_list_by_pow10(std::vector input, + int32_t exponent) + { + std::vector result(input.size()); + std::transform(input.begin(), input.end(), result.begin(), [&exponent](auto x) { + return x * pow10(exponent); + }); + return result; + } +}; + +TYPED_TEST_SUITE(SegmentedReductionFixedPointTest, cudf::test::FixedPointTypes); + +TYPED_TEST(SegmentedReductionFixedPointTest, ProductIncludeNullsZeroInputScale) +{ + // [1, 2, 3], [1], [], [2, NULL, 3], [NULL], [NULL, NULL] | scale: 0 + // values: {1, 2, 3, 1, 2, XXX, 3, XXX, XXX, XXX} + // offsets: {0, 3, 4, 4, 7, 8, 10} + // nullmask:{1, 1, 1, 1, 1, 0, 1, 0, 0, 0} + // output_dtype: decimalXX, scale: -1, 0, 1 + // outputs: {6, 1, XXX, XXX, XXX, XXX} + // output nullmask: {1, 1, 0, 0, 0, 0} + + using DecimalXX = TypeParam; + + for (int output_scale : {-1, 0, 1}) { + fixed_point_column_wrapper input{ + {1, 2, 3, 1, 2, XXX, 3, XXX, XXX, XXX}, + {true, true, true, true, true, false, true, false, false, false}, + numeric::scale_type(0)}; + fixed_width_column_wrapper offsets{0, 3, 4, 4, 7, 8, 10}; + + data_type output_dtype{type_to_id(), numeric::scale_type{output_scale}}; + + auto result_rep = this->scale_list_by_pow10({6, 1, XXX, XXX, XXX, XXX}, -output_scale); + fixed_point_column_wrapper expect{ + result_rep.begin(), + result_rep.end(), + {true, true, false, false, false, false}, + numeric::scale_type(output_scale)}; + + auto res = segmented_reduce(input, + column_view(offsets), + *make_product_aggregation(), + output_dtype, + null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); + } +} + #undef XXX } // namespace test From 66bdb752397fb13560462771d5a39b16ee417d58 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 16 Mar 2022 15:33:01 -0700 Subject: [PATCH 2/8] remove unused include --- cpp/tests/reductions/segmented_reduction_tests.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index de8c3256abb..613ca9b4ab5 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -23,7 +23,6 @@ #include #include -#include #include namespace cudf { From 9bdcc6591e19de5a6ae179f0e283110b512336b0 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 16 Mar 2022 15:40:35 -0700 Subject: [PATCH 3/8] Fix exponent bug --- cpp/tests/reductions/segmented_reduction_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 613ca9b4ab5..59598d63641 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -396,7 +396,7 @@ struct SegmentedReductionFixedPointTest : public cudf::test::BaseFixture { { std::vector result(input.size()); std::transform(input.begin(), input.end(), result.begin(), [&exponent](auto x) { - return x * pow10(exponent); + return exponent >= 0 ? x * pow10(exponent) : x / pow10(-exponent); }); return result; } @@ -404,7 +404,7 @@ struct SegmentedReductionFixedPointTest : public cudf::test::BaseFixture { TYPED_TEST_SUITE(SegmentedReductionFixedPointTest, cudf::test::FixedPointTypes); -TYPED_TEST(SegmentedReductionFixedPointTest, ProductIncludeNullsZeroInputScale) +TYPED_TEST(SegmentedReductionFixedPointTest, ProductIncludeNulls) { // [1, 2, 3], [1], [], [2, NULL, 3], [NULL], [NULL, NULL] | scale: 0 // values: {1, 2, 3, 1, 2, XXX, 3, XXX, XXX, XXX} From 3ddad323dc3fe5a752b8e5fcccf5c01170819cd6 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 17 Mar 2022 16:44:43 -0700 Subject: [PATCH 4/8] Fix to test for max aggregation and add string type test --- .../reductions/segmented_reduction_tests.cpp | 42 ++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 59598d63641..01d9474ac19 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "cudf_test/column_utilities.hpp" #include #include #include @@ -404,14 +405,14 @@ struct SegmentedReductionFixedPointTest : public cudf::test::BaseFixture { TYPED_TEST_SUITE(SegmentedReductionFixedPointTest, cudf::test::FixedPointTypes); -TYPED_TEST(SegmentedReductionFixedPointTest, ProductIncludeNulls) +TYPED_TEST(SegmentedReductionFixedPointTest, MaxIncludeNullsScaleZero) { // [1, 2, 3], [1], [], [2, NULL, 3], [NULL], [NULL, NULL] | scale: 0 // values: {1, 2, 3, 1, 2, XXX, 3, XXX, XXX, XXX} - // offsets: {0, 3, 4, 4, 7, 8, 10} // nullmask:{1, 1, 1, 1, 1, 0, 1, 0, 0, 0} + // offsets: {0, 3, 4, 4, 7, 8, 10} // output_dtype: decimalXX, scale: -1, 0, 1 - // outputs: {6, 1, XXX, XXX, XXX, XXX} + // outputs: {3, 1, XXX, XXX, XXX, XXX} // output nullmask: {1, 1, 0, 0, 0, 0} using DecimalXX = TypeParam; @@ -425,7 +426,7 @@ TYPED_TEST(SegmentedReductionFixedPointTest, ProductIncludeNulls) data_type output_dtype{type_to_id(), numeric::scale_type{output_scale}}; - auto result_rep = this->scale_list_by_pow10({6, 1, XXX, XXX, XXX, XXX}, -output_scale); + auto result_rep = this->scale_list_by_pow10({3, 1, XXX, XXX, XXX, XXX}, -output_scale); fixed_point_column_wrapper expect{ result_rep.begin(), result_rep.end(), @@ -434,13 +435,44 @@ TYPED_TEST(SegmentedReductionFixedPointTest, ProductIncludeNulls) auto res = segmented_reduce(input, column_view(offsets), - *make_product_aggregation(), + *make_max_aggregation(), output_dtype, null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } } +struct SegmentedReductionStringTest : public cudf::test::BaseFixture { +}; + +TEST_F(SegmentedReductionStringTest, MaxIncludeNulls) +{ + // ['world'], ['cudf', NULL, 'cuml'], ['hello', 'rapids', 'ai'], [], [NULL], [NULL, NULL] + // values: {"world", "cudf", XXX, "cuml", "hello", "rapids", "ai", XXX, XXX, XXX} + // nullmask:{1, 1, 0, 1, 1, 1, 1, 0, 0, 0} + // offsets: {0, 1, 4, 7, 7, 8, 10} + // output_dtype: string dtype + // outputs: {"world", XXX, "rapids", XXX, XXX, XXX} + // output nullmask: {1, 0, 1, 0, 0, 0} + + strings_column_wrapper input{ + {"world", "cudf", XXX, "cuml", "hello", "rapids", "ai", XXX, XXX, XXX}, + {true, true, false, true, true, true, true, false, false, false}}; + fixed_width_column_wrapper offsets{0, 1, 4, 7, 7, 8, 10}; + data_type output_dtype{type_id::STRING}; + + strings_column_wrapper expect{{"world", XXX, "rapids", XXX, XXX, XXX}, + {true, false, true, false, false, false}}; + + auto res = segmented_reduce(input, + column_view(offsets), + *make_max_aggregation(), + output_dtype, + null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); +} +} + #undef XXX } // namespace test From 7c5411348278480d6d86e4b54a6a352795b5e55e Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 21 Mar 2022 11:43:58 -0700 Subject: [PATCH 5/8] Add detail name space to cudf::reduction to make consistent with segmented reduction Co-authored-by: Bradley Dice --- cpp/src/reductions/all.cu | 11 ++++++----- cpp/src/reductions/any.cu | 11 ++++++----- cpp/src/reductions/max.cu | 13 +++++++------ cpp/src/reductions/min.cu | 13 +++++++------ cpp/src/reductions/product.cu | 4 ++-- cpp/src/reductions/simple.cuh | 16 +++++++++------- cpp/src/reductions/sum.cu | 4 ++-- cpp/src/reductions/sum_of_squares.cu | 4 ++-- .../reductions/segmented_reduction_tests.cpp | 3 +-- 9 files changed, 42 insertions(+), 37 deletions(-) diff --git a/cpp/src/reductions/all.cu b/cpp/src/reductions/all.cu index 3a076c3b780..b43df279393 100644 --- a/cpp/src/reductions/all.cu +++ b/cpp/src/reductions/all.cu @@ -88,11 +88,12 @@ std::unique_ptr all(column_view const& col, dictionary_column_view(col).keys().type(), detail::all_fn{}, col, stream, mr); } // dispatch for non-dictionary types - return cudf::type_dispatcher(col.type(), - simple::bool_result_element_dispatcher{}, - col, - stream, - mr); + return cudf::type_dispatcher( + col.type(), + simple::detail::bool_result_element_dispatcher{}, + col, + stream, + mr); } } // namespace reduction diff --git a/cpp/src/reductions/any.cu b/cpp/src/reductions/any.cu index 1eb080cfe20..bad7d581255 100644 --- a/cpp/src/reductions/any.cu +++ b/cpp/src/reductions/any.cu @@ -88,11 +88,12 @@ std::unique_ptr any(column_view const& col, dictionary_column_view(col).keys().type(), detail::any_fn{}, col, stream, mr); } // dispatch for non-dictionary types - return cudf::type_dispatcher(col.type(), - simple::bool_result_element_dispatcher{}, - col, - stream, - mr); + return cudf::type_dispatcher( + col.type(), + simple::detail::bool_result_element_dispatcher{}, + col, + stream, + mr); } } // namespace reduction diff --git a/cpp/src/reductions/max.cu b/cpp/src/reductions/max.cu index dd283d86d3b..4adf35414dd 100644 --- a/cpp/src/reductions/max.cu +++ b/cpp/src/reductions/max.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,11 +34,12 @@ std::unique_ptr max(column_view const& col, auto const dispatch_type = cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).indices().type() : col.type(); - return cudf::type_dispatcher(dispatch_type, - simple::same_element_type_dispatcher{}, - col, - stream, - mr); + return cudf::type_dispatcher( + dispatch_type, + simple::detail::same_element_type_dispatcher{}, + col, + stream, + mr); } } // namespace reduction diff --git a/cpp/src/reductions/min.cu b/cpp/src/reductions/min.cu index 5e1301b2904..ac9bdfe9cdc 100644 --- a/cpp/src/reductions/min.cu +++ b/cpp/src/reductions/min.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,11 +32,12 @@ std::unique_ptr min(column_view const& col, auto const dispatch_type = cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).indices().type() : col.type(); - return cudf::type_dispatcher(dispatch_type, - simple::same_element_type_dispatcher{}, - col, - stream, - mr); + return cudf::type_dispatcher( + dispatch_type, + simple::detail::same_element_type_dispatcher{}, + col, + stream, + mr); } } // namespace reduction diff --git a/cpp/src/reductions/product.cu b/cpp/src/reductions/product.cu index 30342bc4728..5caf498712a 100644 --- a/cpp/src/reductions/product.cu +++ b/cpp/src/reductions/product.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ std::unique_ptr product(column_view const& col, { return cudf::type_dispatcher( cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(), - simple::element_type_dispatcher{}, + simple::detail::element_type_dispatcher{}, col, output_dtype, stream, diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index e5303246452..807462d742f 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -38,6 +38,7 @@ namespace cudf { namespace reduction { namespace simple { +namespace detail { /** * @brief Reduction for 'sum', 'product', 'min', 'max', 'sum of squares' * which directly compute the reduction by a single step reduction call @@ -64,11 +65,11 @@ std::unique_ptr simple_reduction(column_view const& col, if (col.has_nulls()) { auto f = simple_op.template get_null_replacing_element_transformer(); auto it = thrust::make_transform_iterator(dcol->pair_begin(), f); - return detail::reduce(it, col.size(), simple_op, stream, mr); + return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr); } else { auto f = simple_op.template get_element_transformer(); auto it = thrust::make_transform_iterator(dcol->begin(), f); - return detail::reduce(it, col.size(), simple_op, stream, mr); + return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr); } }(); @@ -102,11 +103,11 @@ std::unique_ptr fixed_point_reduction(column_view const& col, if (col.has_nulls()) { auto f = simple_op.template get_null_replacing_element_transformer(); auto it = thrust::make_transform_iterator(dcol->pair_begin(), f); - return detail::reduce(it, col.size(), simple_op, stream, mr); + return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr); } else { auto f = simple_op.template get_element_transformer(); auto it = thrust::make_transform_iterator(dcol->begin(), f); - return detail::reduce(it, col.size(), simple_op, stream, mr); + return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr); } }(); @@ -149,7 +150,7 @@ std::unique_ptr dictionary_reduction(column_view const& col, auto p = cudf::dictionary::detail::make_dictionary_pair_iterator(*dcol, col.has_nulls()); auto it = thrust::make_transform_iterator(p, f); - return detail::reduce(it, col.size(), simple_op, stream, mr); + return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr); }(); // set scalar is valid @@ -310,9 +311,9 @@ struct same_element_type_dispatcher { rmm::mr::device_memory_resource* mr) { if (!cudf::is_dictionary(col.type())) { - return simple::simple_reduction(col, stream, mr); + return simple_reduction(col, stream, mr); } - auto index = simple::simple_reduction( + auto index = simple_reduction( dictionary_column_view(col).get_indices_annotated(), stream, rmm::mr::get_current_device_resource()); @@ -442,6 +443,7 @@ struct element_type_dispatcher { } }; +} // namespace detail } // namespace simple } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/sum.cu b/cpp/src/reductions/sum.cu index 8bc157668f4..2db19939bd5 100644 --- a/cpp/src/reductions/sum.cu +++ b/cpp/src/reductions/sum.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ std::unique_ptr sum(column_view const& col, { return cudf::type_dispatcher( cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(), - simple::element_type_dispatcher{}, + simple::detail::element_type_dispatcher{}, col, output_dtype, stream, diff --git a/cpp/src/reductions/sum_of_squares.cu b/cpp/src/reductions/sum_of_squares.cu index eca6aa0d1d9..a3e9368bb02 100644 --- a/cpp/src/reductions/sum_of_squares.cu +++ b/cpp/src/reductions/sum_of_squares.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ std::unique_ptr sum_of_squares(column_view const& col, { return cudf::type_dispatcher( cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(), - simple::element_type_dispatcher{}, + simple::detail::element_type_dispatcher{}, col, output_dtype, stream, diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 01d9474ac19..a5053311a7a 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "cudf_test/column_utilities.hpp" #include +#include #include #include @@ -471,7 +471,6 @@ TEST_F(SegmentedReductionStringTest, MaxIncludeNulls) null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } -} #undef XXX From d0b0f8bb3f4a325b55d400ce6e45af0628291aa8 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 21 Mar 2022 12:06:11 -0700 Subject: [PATCH 6/8] Add detail namespace, and docstring cleanups for compound reductions. Co-authored-by: Bradley Dice --- .../cudf/detail/reduction_functions.hpp | 4 ++++ cpp/src/reductions/compound.cuh | 8 ++++--- cpp/src/reductions/mean.cu | 19 +++++++++------ cpp/src/reductions/std.cu | 24 +++++++++---------- cpp/src/reductions/var.cu | 24 +++++++++---------- 5 files changed, 45 insertions(+), 34 deletions(-) diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp index ccec4bf8a6c..3a6113e66ce 100644 --- a/cpp/include/cudf/detail/reduction_functions.hpp +++ b/cpp/include/cudf/detail/reduction_functions.hpp @@ -192,6 +192,8 @@ std::unique_ptr mean( * * @param col input column to compute variance. * @param output_dtype data type of return type and typecast elements of input column. + * @param ddof Delta degrees of freedom. The divisor used is N - ddof, where N represents the number + * of elements. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned scalar's device memory. * @return Variance as scalar of type `output_dtype`. @@ -213,6 +215,8 @@ std::unique_ptr variance( * * @param col input column to compute standard deviation. * @param output_dtype data type of return type and typecast elements of input column. + * @param ddof Delta degrees of freedom. The divisor used is N - ddof, where N represents the number + * of elements. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned scalar's device memory. * @return Standard deviation as scalar of type `output_dtype`. diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh index c60c819f8e2..f223f846bb1 100644 --- a/cpp/src/reductions/compound.cuh +++ b/cpp/src/reductions/compound.cuh @@ -25,6 +25,7 @@ namespace cudf { namespace reduction { namespace compound { +namespace detail { /** * @brief Multi-step reduction for operations such as mean and variance, and * standard deviation. @@ -61,19 +62,19 @@ std::unique_ptr compound_reduction(column_view const& col, auto it = thrust::make_transform_iterator( dcol->pair_begin(), compound_op.template get_null_replacing_element_transformer()); - result = detail::reduce( + result = cudf::reduction::detail::reduce( it, col.size(), compound_op, valid_count, ddof, stream, mr); } else { auto it = thrust::make_transform_iterator( dcol->begin(), compound_op.template get_element_transformer()); - result = detail::reduce( + result = cudf::reduction::detail::reduce( it, col.size(), compound_op, valid_count, ddof, stream, mr); } } else { auto it = thrust::make_transform_iterator( cudf::dictionary::detail::make_dictionary_pair_iterator(*dcol, col.has_nulls()), compound_op.template get_null_replacing_element_transformer()); - result = detail::reduce( + result = cudf::reduction::detail::reduce( it, col.size(), compound_op, valid_count, ddof, stream, mr); } @@ -152,6 +153,7 @@ struct element_type_dispatcher { } }; +} // namespace detail } // namespace compound } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/mean.cu b/cpp/src/reductions/mean.cu index ca341090b9f..e4b5f754b9b 100644 --- a/cpp/src/reductions/mean.cu +++ b/cpp/src/reductions/mean.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -// The translation unit for reduction `mean` #include #include @@ -21,14 +20,20 @@ #include -std::unique_ptr cudf::reduction::mean(column_view const& col, - cudf::data_type const output_dtype, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +namespace cudf { +namespace reduction { + +std::unique_ptr mean(column_view const& col, + cudf::data_type const output_dtype, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - using reducer = cudf::reduction::compound::element_type_dispatcher; + using reducer = compound::detail::element_type_dispatcher; auto col_type = cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(); return cudf::type_dispatcher( col_type, reducer(), col, output_dtype, /* ddof is not used for mean*/ 1, stream, mr); } + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/std.cu b/cpp/src/reductions/std.cu index 3c7a05abd4e..bb29e5cd030 100644 --- a/cpp/src/reductions/std.cu +++ b/cpp/src/reductions/std.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -// The translation unit for reduction `standard deviation` #include #include @@ -21,21 +20,19 @@ #include -// @param[in] ddof Delta Degrees of Freedom used for `std`, `var`. -// The divisor used in calculations is N - ddof, where N -// represents the number of elements. +namespace cudf { +namespace reduction { -std::unique_ptr cudf::reduction::standard_deviation( - column_view const& col, - cudf::data_type const output_dtype, - cudf::size_type ddof, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr standard_deviation(column_view const& col, + cudf::data_type const output_dtype, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // TODO: add cuda version check when the fix is available #if !defined(__CUDACC_DEBUG__) using reducer = - cudf::reduction::compound::element_type_dispatcher; + compound::detail::element_type_dispatcher; auto col_type = cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(); return cudf::type_dispatcher(col_type, reducer(), col, output_dtype, ddof, stream, mr); @@ -45,3 +42,6 @@ std::unique_ptr cudf::reduction::standard_deviation( CUDF_FAIL("var/std reductions are not supported at debug build."); #endif } + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/var.cu b/cpp/src/reductions/var.cu index 2565e472661..2df653858b0 100644 --- a/cpp/src/reductions/var.cu +++ b/cpp/src/reductions/var.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,27 +14,24 @@ * limitations under the License. */ -// The translation unit for reduction `variance` - #include #include #include #include -// @param[in] ddof Delta Degrees of Freedom used for `std`, `var`. -// The divisor used in calculations is N - ddof, where N -// represents the number of elements. +namespace cudf { +namespace reduction { -std::unique_ptr cudf::reduction::variance(column_view const& col, - cudf::data_type const output_dtype, - cudf::size_type ddof, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr variance(column_view const& col, + cudf::data_type const output_dtype, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // TODO: add cuda version check when the fix is available #if !defined(__CUDACC_DEBUG__) - using reducer = cudf::reduction::compound::element_type_dispatcher; + using reducer = compound::detail::element_type_dispatcher; auto col_type = cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type(); return cudf::type_dispatcher(col_type, reducer(), col, output_dtype, ddof, stream, mr); @@ -44,3 +41,6 @@ std::unique_ptr cudf::reduction::variance(column_view const& col, CUDF_FAIL("var/std reductions are not supported at debug build."); #endif } + +} // namespace reduction +} // namespace cudf From d8df93abc2b07d3506b3da93928aeeb8c39f1410 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 21 Mar 2022 12:11:46 -0700 Subject: [PATCH 7/8] Docstring fixes for compound.cuh Co-authored-by: Bradley Dice --- cpp/src/reductions/compound.cuh | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh index f223f846bb1..89a95f5138c 100644 --- a/cpp/src/reductions/compound.cuh +++ b/cpp/src/reductions/compound.cuh @@ -27,21 +27,19 @@ namespace reduction { namespace compound { namespace detail { /** - * @brief Multi-step reduction for operations such as mean and variance, and - * standard deviation. + * @brief Multi-step reduction for operations such as mean, variance, and standard deviation. * - * @param[in] col input column view - * @param[in] ddof `Delta Degrees of Freedom` used for `std`, `var`. - * The divisor used in calculations is N - ddof, where N - * represents the number of elements. - * @param[in] stream CUDA stream used for device memory operations and kernel launches. - * @param[in] mr Device memory resource used to allocate the returned scalar's device memory - * @return Output scalar in device memory + * @tparam ElementType the input column data-type + * @tparam ResultType the output data-type + * @tparam Op the compound operator derived from `cudf::reduction::op::compound_op` * - * @tparam ElementType the input column cudf dtype - * @tparam ResultType the output cudf dtype - * @tparam Op the compound operator derived from - * `cudf::reduction::op::compound_op` + * @param col input column view + * @param output_dtype data type of return type and typecast elements of input column. + * @param ddof Delta degrees of freedom used for standard deviation and variance. The divisor used + * is N - ddof, where N represents the number of elements. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Output scalar in device memory */ template std::unique_ptr compound_reduction(column_view const& col, From 707ae8ea3fdb9e939f9e38e703a484e9100e2842 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 21 Mar 2022 14:01:09 -0700 Subject: [PATCH 8/8] Revert work on segmented_reduction test. --- .../reductions/segmented_reduction_tests.cpp | 89 +------------------ 1 file changed, 1 insertion(+), 88 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index a5053311a7a..3a432cce801 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -15,12 +15,10 @@ */ #include -#include #include #include #include -#include #include #include @@ -38,7 +36,7 @@ struct SegmentedReductionTest : public cudf::test::BaseFixture { struct SegmentedReductionTestUntyped : public cudf::test::BaseFixture { }; -TYPED_TEST_SUITE(SegmentedReductionTest, NumericTypes); +TYPED_TEST_CASE(SegmentedReductionTest, NumericTypes); TYPED_TEST(SegmentedReductionTest, SumExcludeNulls) { @@ -387,91 +385,6 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } -int32_t pow10(int32_t exponent) { return exponent == 0 ? 1 : 10 * pow10(exponent - 1); } - -template -struct SegmentedReductionFixedPointTest : public cudf::test::BaseFixture { - public: - std::vector scale_list_by_pow10(std::vector input, - int32_t exponent) - { - std::vector result(input.size()); - std::transform(input.begin(), input.end(), result.begin(), [&exponent](auto x) { - return exponent >= 0 ? x * pow10(exponent) : x / pow10(-exponent); - }); - return result; - } -}; - -TYPED_TEST_SUITE(SegmentedReductionFixedPointTest, cudf::test::FixedPointTypes); - -TYPED_TEST(SegmentedReductionFixedPointTest, MaxIncludeNullsScaleZero) -{ - // [1, 2, 3], [1], [], [2, NULL, 3], [NULL], [NULL, NULL] | scale: 0 - // values: {1, 2, 3, 1, 2, XXX, 3, XXX, XXX, XXX} - // nullmask:{1, 1, 1, 1, 1, 0, 1, 0, 0, 0} - // offsets: {0, 3, 4, 4, 7, 8, 10} - // output_dtype: decimalXX, scale: -1, 0, 1 - // outputs: {3, 1, XXX, XXX, XXX, XXX} - // output nullmask: {1, 1, 0, 0, 0, 0} - - using DecimalXX = TypeParam; - - for (int output_scale : {-1, 0, 1}) { - fixed_point_column_wrapper input{ - {1, 2, 3, 1, 2, XXX, 3, XXX, XXX, XXX}, - {true, true, true, true, true, false, true, false, false, false}, - numeric::scale_type(0)}; - fixed_width_column_wrapper offsets{0, 3, 4, 4, 7, 8, 10}; - - data_type output_dtype{type_to_id(), numeric::scale_type{output_scale}}; - - auto result_rep = this->scale_list_by_pow10({3, 1, XXX, XXX, XXX, XXX}, -output_scale); - fixed_point_column_wrapper expect{ - result_rep.begin(), - result_rep.end(), - {true, true, false, false, false, false}, - numeric::scale_type(output_scale)}; - - auto res = segmented_reduce(input, - column_view(offsets), - *make_max_aggregation(), - output_dtype, - null_policy::INCLUDE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); - } -} - -struct SegmentedReductionStringTest : public cudf::test::BaseFixture { -}; - -TEST_F(SegmentedReductionStringTest, MaxIncludeNulls) -{ - // ['world'], ['cudf', NULL, 'cuml'], ['hello', 'rapids', 'ai'], [], [NULL], [NULL, NULL] - // values: {"world", "cudf", XXX, "cuml", "hello", "rapids", "ai", XXX, XXX, XXX} - // nullmask:{1, 1, 0, 1, 1, 1, 1, 0, 0, 0} - // offsets: {0, 1, 4, 7, 7, 8, 10} - // output_dtype: string dtype - // outputs: {"world", XXX, "rapids", XXX, XXX, XXX} - // output nullmask: {1, 0, 1, 0, 0, 0} - - strings_column_wrapper input{ - {"world", "cudf", XXX, "cuml", "hello", "rapids", "ai", XXX, XXX, XXX}, - {true, true, false, true, true, true, true, false, false, false}}; - fixed_width_column_wrapper offsets{0, 1, 4, 7, 7, 8, 10}; - data_type output_dtype{type_id::STRING}; - - strings_column_wrapper expect{{"world", XXX, "rapids", XXX, XXX, XXX}, - {true, false, true, false, false, false}}; - - auto res = segmented_reduce(input, - column_view(offsets), - *make_max_aggregation(), - output_dtype, - null_policy::INCLUDE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); -} - #undef XXX } // namespace test