From 182ee2c4964bccaf2334bfb176ba0ae646b3deab Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 3 Feb 2023 08:49:30 -0500 Subject: [PATCH] Add compound aggregations to cudf::segmented_reduce (#12573) Adds mean, variance, and standard deviation aggregation support to `cudf::segmented_reduce`. These are compound (multi-step) aggregations and are modeled after the same aggregations supported but `cudf::reduce`. Once this approved and merged, the visitor pattern for this approach will be reworked for both `cudf::reduce` and `cudf::segmented_reduce` as per [#10432](https://github.com/rapidsai/cudf/issues/10432#issuecomment-1067560695). The source tree for `src/reductions` has been adjusted to put all segmented-reduce source files into `src/reductions/segmented` and removing the `segmented_` prefix from those file names. Also, the segmented-reduce functions have been moved from `cudf/detail/reduction_functions.hpp` into their own `cudf/detail/segmented_reduction_functions.hpp`. Likewise, the segmented-reduce CUB calls have been moved from `cudf/detail/reduction.cuh` to the new `cudf/detail/segmented_reduction.cuh` to help minimize including CUB headers. Additionally, the sum-of-squares aggregation is also included since it was a simple reduction only requiring the appropriate aggregation class registration and source file. Finally, gtests are added for these new types. The compound types only support floating-point outputs. Follow on PRs will address the visitor pattern already mentioned above as well as additional data types. Discussion on additional aggregations will occur in the reference issue #10432. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Robert Maynard (https://github.com/robertmaynard) - AJ Schmidt (https://github.com/ajschmidt8) - Mike Wilson (https://github.com/hyperbolic2346) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/12573 --- conda/recipes/libcudf/meta.yaml | 1 + cpp/CMakeLists.txt | 19 +- .../cudf/detail/aggregation/aggregation.hpp | 10 +- cpp/include/cudf/detail/reduction.cuh | 91 +---- .../cudf/detail/reduction_functions.hpp | 172 +-------- .../cudf/detail/segmented_reduction.cuh | 198 ++++++++++ .../detail/segmented_reduction_functions.hpp | 338 ++++++++++++++++++ cpp/src/aggregation/aggregation.cpp | 10 +- .../stream_compaction/apply_boolean_mask.cu | 4 +- cpp/src/reductions/compound.cuh | 12 +- cpp/src/reductions/minmax.cu | 2 +- .../{segmented_all.cu => segmented/all.cu} | 4 +- .../{segmented_any.cu => segmented/any.cu} | 4 +- cpp/src/reductions/segmented/compound.cuh | 182 ++++++++++ .../{segmented_max.cu => segmented/max.cu} | 4 +- cpp/src/reductions/segmented/mean.cu | 40 +++ .../{segmented_min.cu => segmented/min.cu} | 4 +- .../product.cu} | 4 +- .../reductions.cpp} | 22 +- .../simple.cuh} | 83 +---- cpp/src/reductions/segmented/std.cu | 41 +++ .../{segmented_sum.cu => segmented/sum.cu} | 4 +- .../reductions/segmented/sum_of_squares.cu | 39 ++ .../reductions/segmented/update_validity.cu | 49 +++ .../reductions/segmented/update_validity.hpp | 58 +++ cpp/src/reductions/segmented/var.cu | 40 +++ cpp/src/reductions/std.cu | 4 +- cpp/src/reductions/var.cu | 4 +- .../reductions/segmented_reduction_tests.cpp | 240 ++++++++++++- 29 files changed, 1318 insertions(+), 365 deletions(-) create mode 100644 cpp/include/cudf/detail/segmented_reduction.cuh create mode 100644 cpp/include/cudf/detail/segmented_reduction_functions.hpp rename cpp/src/reductions/{segmented_all.cu => segmented/all.cu} (94%) rename cpp/src/reductions/{segmented_any.cu => segmented/any.cu} (94%) create mode 100644 cpp/src/reductions/segmented/compound.cuh rename cpp/src/reductions/{segmented_max.cu => segmented/max.cu} (94%) create mode 100644 cpp/src/reductions/segmented/mean.cu rename cpp/src/reductions/{segmented_min.cu => segmented/min.cu} (94%) rename cpp/src/reductions/{segmented_product.cu => segmented/product.cu} (94%) rename cpp/src/reductions/{segmented_reductions.cpp => segmented/reductions.cpp} (87%) rename cpp/src/reductions/{simple_segmented.cuh => segmented/simple.cuh} (84%) create mode 100644 cpp/src/reductions/segmented/std.cu rename cpp/src/reductions/{segmented_sum.cu => segmented/sum.cu} (95%) create mode 100644 cpp/src/reductions/segmented/sum_of_squares.cu create mode 100644 cpp/src/reductions/segmented/update_validity.cu create mode 100644 cpp/src/reductions/segmented/update_validity.hpp create mode 100644 cpp/src/reductions/segmented/var.cu diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 7372d4d4de6..911080ebdb6 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -110,6 +110,7 @@ outputs: - test -f $PREFIX/include/cudf/detail/scan.hpp - test -f $PREFIX/include/cudf/detail/scatter.hpp - test -f $PREFIX/include/cudf/detail/search.hpp + - test -f $PREFIX/include/cudf/detail/segmented_reduction_functions.hpp - test -f $PREFIX/include/cudf/detail/sequence.hpp - test -f $PREFIX/include/cudf/detail/sorting.hpp - test -f $PREFIX/include/cudf/detail/stream_compaction.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9a9b5d1e5ed..19c118016bf 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -441,13 +441,18 @@ add_library( src/reductions/scan/scan.cpp src/reductions/scan/scan_exclusive.cu src/reductions/scan/scan_inclusive.cu - src/reductions/segmented_all.cu - src/reductions/segmented_any.cu - src/reductions/segmented_max.cu - src/reductions/segmented_min.cu - src/reductions/segmented_product.cu - src/reductions/segmented_reductions.cpp - src/reductions/segmented_sum.cu + src/reductions/segmented/all.cu + src/reductions/segmented/any.cu + src/reductions/segmented/max.cu + src/reductions/segmented/mean.cu + src/reductions/segmented/min.cu + src/reductions/segmented/product.cu + src/reductions/segmented/reductions.cpp + src/reductions/segmented/std.cu + src/reductions/segmented/sum.cu + src/reductions/segmented/sum_of_squares.cu + src/reductions/segmented/update_validity.cu + src/reductions/segmented/var.cu src/reductions/std.cu src/reductions/sum.cu src/reductions/sum_of_squares.cu diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 360c314f2db..e269d4d2e13 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -292,7 +292,9 @@ class all_aggregation final : public reduce_aggregation, public segmented_reduce /** * @brief Derived class for specifying a sum_of_squares aggregation */ -class sum_of_squares_aggregation final : public groupby_aggregation, public reduce_aggregation { +class sum_of_squares_aggregation final : public groupby_aggregation, + public reduce_aggregation, + public segmented_reduce_aggregation { public: sum_of_squares_aggregation() : aggregation(SUM_OF_SQUARES) {} @@ -313,7 +315,8 @@ class sum_of_squares_aggregation final : public groupby_aggregation, public redu */ class mean_aggregation final : public rolling_aggregation, public groupby_aggregation, - public reduce_aggregation { + public reduce_aggregation, + public segmented_reduce_aggregation { public: mean_aggregation() : aggregation(MEAN) {} @@ -353,7 +356,8 @@ class m2_aggregation : public groupby_aggregation { */ class std_var_aggregation : public rolling_aggregation, public groupby_aggregation, - public reduce_aggregation { + public reduce_aggregation, + public segmented_reduce_aggregation { public: size_type _ddof; ///< Delta degrees of freedom diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh index f1fe0d2e1a9..9dc3b996afc 100644 --- a/cpp/include/cudf/detail/reduction.cuh +++ b/cpp/include/cudf/detail/reduction.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include "reduction_operators.cuh" +#include #include #include @@ -27,7 +27,6 @@ #include #include -#include #include #include @@ -229,92 +228,6 @@ std::unique_ptr reduce(InputIterator d_in, return std::unique_ptr(result); } -/** - * @brief Compute the specified simple reduction over each of the segments in the - * input range of elements. - * - * @tparam InputIterator the input column iterator - * @tparam OffsetIterator the offset column iterator - * @tparam OutputIterator the output column iterator - * @tparam BinaryOp the device binary operator used to reduce - * @tparam OutputType the output type of reduction - * - * @param[in] d_in the begin iterator to input - * @param[in] d_offset_begin the begin iterator to offset - * @param[in] d_offset_end the end iterator to offset. Note: This is - * num_segments+1 elements past `d_offset_begin`. - * @param[out] d_out the begin iterator to output - * @param[in] binary_op the reduction operator - * @param[in] identity the identity element of the reduction operator - * @param[in] initial_value Initial value of the reduction - * @param[in] stream CUDA stream used for device memory operations and kernel launches - * - */ -template ::type, - typename std::enable_if_t() && - !cudf::is_fixed_point()>* = nullptr> -void segmented_reduce(InputIterator d_in, - OffsetIterator d_offset_begin, - OffsetIterator d_offset_end, - OutputIterator d_out, - BinaryOp binary_op, - OutputType initial_value, - rmm::cuda_stream_view stream) -{ - auto const num_segments = static_cast(std::distance(d_offset_begin, d_offset_end)) - 1; - - // Allocate temporary storage - rmm::device_buffer d_temp_storage; - size_t temp_storage_bytes = 0; - cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), - temp_storage_bytes, - d_in, - d_out, - num_segments, - d_offset_begin, - d_offset_begin + 1, - binary_op, - initial_value, - stream.value()); - d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; - - // Run reduction - cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), - temp_storage_bytes, - d_in, - d_out, - num_segments, - d_offset_begin, - d_offset_begin + 1, - binary_op, - initial_value, - stream.value()); -} - -template ::type, - typename std::enable_if_t() && - !cudf::is_fixed_point())>* = nullptr> -void segmented_reduce(InputIterator, - OffsetIterator, - OffsetIterator, - OutputIterator, - BinaryOp, - OutputType, - rmm::cuda_stream_view) -{ - CUDF_FAIL( - "Unsupported data types called on segmented_reduce. Only numeric and chrono types are " - "supported."); -} - } // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp index a2de286f283..1f892bb90c5 100644 --- a/cpp/include/cudf/detail/reduction_functions.hpp +++ b/cpp/include/cudf/detail/reduction_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -216,7 +216,7 @@ std::unique_ptr mean( std::unique_ptr variance( column_view const& col, data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -239,7 +239,7 @@ std::unique_ptr variance( std::unique_ptr standard_deviation( column_view const& col, data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -338,171 +338,5 @@ std::unique_ptr merge_sets( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Compute sum of each segment in input column. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. - * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. - * - * @param col Input column to compute sum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each sum - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - * @return Sums of segments in type `output_dtype` - */ -std::unique_ptr segmented_sum( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Computes product of each segment in input column. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. - * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. - * - * @param col Input column to compute product - * @param offsets Indices to identify segment boundaries - * @param output_dtype data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each product - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Product as scalar of type `output_dtype` - */ -std::unique_ptr segmented_product( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Compute minimum of each segment in input column. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is convertible to `output_dtype`. - * - * @param col Input column to compute minimum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each minimum - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Minimums of segments in type `output_dtype` - */ -std::unique_ptr segmented_min( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Compute maximum of each segment in input column. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is convertible to `output_dtype`. - * - * @param col Input column to compute maximum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each maximum - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Maximums of segments in type `output_dtype` - */ -std::unique_ptr segmented_max( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Compute if any of the values in the segment are true when typecasted to bool. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is not convertible to bool. - * @throw cudf::logic_error if `output_dtype` is not bool8. - * - * @param col Input column to compute any - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each any - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Column of bool8 for the results of the segments - */ -std::unique_ptr segmented_any( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Compute if all of the values in the segment are true when typecasted to bool. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is not convertible to bool. - * @throw cudf::logic_error if `output_dtype` is not bool8. - * - * @param col Input column to compute all - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each all - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Column of bool8 for the results of the segments - */ -std::unique_ptr segmented_all( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - } // namespace reduction } // namespace cudf diff --git a/cpp/include/cudf/detail/segmented_reduction.cuh b/cpp/include/cudf/detail/segmented_reduction.cuh new file mode 100644 index 00000000000..9a49c1abe38 --- /dev/null +++ b/cpp/include/cudf/detail/segmented_reduction.cuh @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +#include + +#include +#include + +namespace cudf { +namespace reduction { +namespace detail { + +/** + * @brief Compute the specified simple reduction over each of the segments in the + * input range of elements + * + * @tparam InputIterator Input iterator type + * @tparam OffsetIterator Offset iterator type + * @tparam OutputIterator Output iterator type + * @tparam BinaryOp Binary operator used for reduce + * @tparam OutputType The output type derived from the OutputIterator + * + * @param d_in Input data iterator + * @param d_offset_begin Begin iterator to segment indices + * @param d_offset_end End iterator to segment indices + * @param d_out Output data iterator + * @param binary_op The reduction operator + * @param initial_value Initial value of the reduction + * @param stream CUDA stream used for device memory operations and kernel launches + * + */ +template ::type, + typename std::enable_if_t() && + !cudf::is_fixed_point()>* = nullptr> +void segmented_reduce(InputIterator d_in, + OffsetIterator d_offset_begin, + OffsetIterator d_offset_end, + OutputIterator d_out, + BinaryOp binary_op, + OutputType initial_value, + rmm::cuda_stream_view stream) +{ + auto const num_segments = static_cast(std::distance(d_offset_begin, d_offset_end)) - 1; + + // Allocate temporary storage + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedReduce::Reduce(nullptr, + temp_storage_bytes, + d_in, + d_out, + num_segments, + d_offset_begin, + d_offset_begin + 1, + binary_op, + initial_value, + stream.value()); + auto d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; + + // Run reduction + cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), + temp_storage_bytes, + d_in, + d_out, + num_segments, + d_offset_begin, + d_offset_begin + 1, + binary_op, + initial_value, + stream.value()); +} + +template ::type, + typename std::enable_if_t() && + !cudf::is_fixed_point())>* = nullptr> +void segmented_reduce(InputIterator, + OffsetIterator, + OffsetIterator, + OutputIterator, + BinaryOp, + OutputType, + rmm::cuda_stream_view) +{ + CUDF_FAIL( + "Unsupported data types called on segmented_reduce. Only numeric and chrono types are " + "supported."); +} + +/** + * @brief Compute reduction by the compound operator (reduce and transform) + * + * The reduction operator must have an `intermediate::compute_result()` method. + * This method performs reduction using binary operator `Op::Op` and calculates the + * result to `OutputType` using `compute_result()` through the transform method. + * + * @tparam Op Reduction operator + * @tparam InputIterator Input iterator type + * @tparam OffsetIterator Offsets iterator type + * @tparam OutputIterator Output iterator type + * + * @param d_in Input data iterator + * @param d_offset_begin Begin iterator to segment indices + * @param d_offset_end End iterator to segment indices + * @param d_out Output data iterator + * @param op The reduction operator + * @param ddof Delta degrees of freedom used for standard deviation and variance + * @param d_valid_counts Number of valid values per segment + * @param stream CUDA stream used for device memory operations and kernel launches + */ +template +void segmented_reduce(InputIterator d_in, + OffsetIterator d_offset_begin, + OffsetIterator d_offset_end, + OutputIterator d_out, + op::compound_op op, + size_type ddof, + size_type* d_valid_counts, + rmm::cuda_stream_view stream) +{ + using OutputType = typename thrust::iterator_value::type; + using IntermediateType = typename thrust::iterator_value::type; + auto num_segments = static_cast(std::distance(d_offset_begin, d_offset_end)); + auto const binary_op = op.get_binary_op(); + auto const initial_value = op.template get_identity(); + + rmm::device_uvector intermediate_result{static_cast(num_segments), + stream}; + + // Allocate temporary storage + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedReduce::Reduce(nullptr, + temp_storage_bytes, + d_in, + intermediate_result.data(), + num_segments, + d_offset_begin, + d_offset_begin + 1, + binary_op, + initial_value, + stream.value()); + auto d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; + + // Run reduction + cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), + temp_storage_bytes, + d_in, + intermediate_result.data(), + num_segments, + d_offset_begin, + d_offset_begin + 1, + binary_op, + initial_value, + stream.value()); + + // compute the result value from intermediate value in device + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_segments), + d_out, + [ir = intermediate_result.data(), op, d_valid_counts, ddof] __device__(auto idx) { + auto const count = d_valid_counts[idx]; + return count > 0 ? op.template compute_result(ir[idx], count, ddof) + : OutputType{0}; + }); +} + +} // namespace detail +} // namespace reduction +} // namespace cudf diff --git a/cpp/include/cudf/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/detail/segmented_reduction_functions.hpp new file mode 100644 index 00000000000..7b5628fa49a --- /dev/null +++ b/cpp/include/cudf/detail/segmented_reduction_functions.hpp @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace cudf { +namespace reduction { + +/** + * @brief Compute sum of each segment in the input column + * + * If an input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. + * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Sums of segments as type `output_dtype` + */ +std::unique_ptr segmented_sum( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes product of each segment in the input column + * + * If an input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. + * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Product of segments as type `output_dtype` + */ +std::unique_ptr segmented_product( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Compute minimum of each segment in the input column + * + * If an input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Minimums of segments as type `output_dtype` + */ +std::unique_ptr segmented_min( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Compute maximum of each segment in the input column + * + * If an input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Maximums of segments as type `output_dtype` + */ +std::unique_ptr segmented_max( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Compute if any of the values in the segment are true when typecasted to bool + * + * If an input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not convertible to bool. + * @throw cudf::logic_error if `output_dtype` is not BOOL8. + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of type BOOL8 for the results of the segments + */ +std::unique_ptr segmented_any( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Compute if all of the values in the segment are true when typecasted to bool + * + * If an input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not convertible to bool. + * @throw cudf::logic_error if `output_dtype` is not BOOL8. + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of BOOL8 for the results of the segments + */ +std::unique_ptr segmented_all( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes mean of elements of segments in the input column + * + * If input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not arithmetic type + * @throw cudf::logic_error if `output_dtype` is not floating point type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of `output_dtype` for the reduction results of the segments + */ +std::unique_ptr segmented_mean( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes sum of squares of elements of segments in the input column + * + * If input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not arithmetic type + * @throw cudf::logic_error if `output_dtype` is not an arithmetic type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of `output_dtype` for the reduction results of the segments + */ +std::unique_ptr segmented_sum_of_squares( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes the standard deviation of elements of segments in the input column + * + * If input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not arithmetic type + * @throw cudf::logic_error if `output_dtype` is not floating point type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param ddof Delta degrees of freedom. + * The divisor used is N - ddof, where N the number of elements in each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of `output_dtype` for the reduction results of the segments + */ +std::unique_ptr segmented_standard_deviation( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes the variance of elements of segments in the input column + * + * If input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not arithmetic type + * @throw cudf::logic_error if `output_dtype` is not floating point type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param ddof Delta degrees of freedom. + * The divisor used is N - ddof, where N the number of elements in each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of `output_dtype` for the reduction results of the segments + */ +std::unique_ptr segmented_variance( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 6dd014970c7..07c53b3a421 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -492,6 +492,8 @@ template std::unique_ptr make_sum_of_squares_aggregation make_sum_of_squares_aggregation(); template std::unique_ptr make_sum_of_squares_aggregation(); +template std::unique_ptr +make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation template @@ -503,6 +505,8 @@ template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); +template std::unique_ptr +make_mean_aggregation(); /// Factory to create a M2 aggregation template @@ -526,6 +530,8 @@ template std::unique_ptr make_variance_aggregation make_variance_aggregation( size_type ddof); +template std::unique_ptr +make_variance_aggregation(size_type ddof); /// Factory to create a STD aggregation template @@ -540,6 +546,8 @@ template std::unique_ptr make_std_aggregation make_std_aggregation( size_type ddof); +template std::unique_ptr +make_std_aggregation(size_type ddof); /// Factory to create a MEDIAN aggregation template diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu index c1c17dc0688..5acb1cb8849 100644 --- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh index f901ebd2c24..9458ae2d581 100644 --- a/cpp/src/reductions/compound.cuh +++ b/cpp/src/reductions/compound.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ namespace detail { template std::unique_ptr compound_reduction(column_view const& col, data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -99,7 +99,7 @@ struct result_type_dispatcher { template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -109,7 +109,7 @@ struct result_type_dispatcher { template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -132,7 +132,7 @@ struct element_type_dispatcher { template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -143,7 +143,7 @@ struct element_type_dispatcher { template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index 42288a67242..b1d951a783b 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -74,7 +74,7 @@ template ::type> rmm::device_scalar reduce_device(InputIterator d_in, - cudf::size_type num_items, + size_type num_items, Op binary_op, rmm::cuda_stream_view stream) { diff --git a/cpp/src/reductions/segmented_all.cu b/cpp/src/reductions/segmented/all.cu similarity index 94% rename from cpp/src/reductions/segmented_all.cu rename to cpp/src/reductions/segmented/all.cu index 4536785fe82..f75fcd8066c 100644 --- a/cpp/src/reductions/segmented_all.cu +++ b/cpp/src/reductions/segmented/all.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented_any.cu b/cpp/src/reductions/segmented/any.cu similarity index 94% rename from cpp/src/reductions/segmented_any.cu rename to cpp/src/reductions/segmented/any.cu index cc50eb1e1f4..6a4fc70d438 100644 --- a/cpp/src/reductions/segmented_any.cu +++ b/cpp/src/reductions/segmented/any.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented/compound.cuh b/cpp/src/reductions/segmented/compound.cuh new file mode 100644 index 00000000000..dc8a995d1b0 --- /dev/null +++ b/cpp/src/reductions/segmented/compound.cuh @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "update_validity.hpp" + +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf { +namespace reduction { +namespace compound { +namespace detail { +/** + * @brief Multi-step reduction for operations such as mean, variance, and standard deviation. + * + * @tparam InputType the input column data-type + * @tparam ResultType the output data-type + * @tparam Op the compound operator derived from `cudf::reduction::op::compound_op` + * + * @param col Input column view + * @param offsets Indices identifying segments + * @param null_handling Indicates if null elements should be included in the reduction + * @param ddof Delta degrees of freedom used for standard deviation and variance. + * The divisor used is N - ddof, where N represents the number of elements. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Segmented reduce result + */ +template +std::unique_ptr compound_segmented_reduction(column_view const& col, + device_span offsets, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto d_col = cudf::column_device_view::create(col, stream); + auto compound_op = Op{}; + auto const num_segments = offsets.size() - 1; + + auto result = make_fixed_width_column( + data_type{type_to_id()}, num_segments, mask_state::UNALLOCATED, stream, mr); + auto out_itr = result->mutable_view().template begin(); + + // Compute valid counts + rmm::device_uvector valid_counts(num_segments, stream); + if (col.has_nulls() && (null_handling == null_policy::EXCLUDE)) { + auto valid_fn = [] __device__(auto p) -> size_type { return static_cast(p.second); }; + auto itr = thrust::make_transform_iterator(d_col->pair_begin(), valid_fn); + cudf::reduction::detail::segmented_reduce(itr, + offsets.begin(), + offsets.end(), + valid_counts.data(), + thrust::plus{}, + 0, + stream); + } else { + thrust::adjacent_difference( + rmm::exec_policy(stream), offsets.begin() + 1, offsets.end(), valid_counts.begin()); + } + + // Run segmented reduction + if (col.has_nulls()) { + auto nrt = compound_op.template get_null_replacing_element_transformer(); + auto itr = thrust::make_transform_iterator(d_col->pair_begin(), nrt); + cudf::reduction::detail::segmented_reduce( + itr, offsets.begin(), offsets.end(), out_itr, compound_op, ddof, valid_counts.data(), stream); + } else { + auto et = compound_op.template get_element_transformer(); + auto itr = thrust::make_transform_iterator(d_col->begin(), et); + cudf::reduction::detail::segmented_reduce( + itr, offsets.begin(), offsets.end(), out_itr, compound_op, ddof, valid_counts.data(), stream); + } + + // Compute the output null mask + cudf::reduction::detail::segmented_update_validity( + *result, col, offsets, null_handling, std::nullopt, stream, mr); + + return result; +}; + +template +struct compound_float_output_dispatcher { + private: + template + static constexpr bool is_supported_v() + { + return std::is_floating_point_v; + } + + public: + template ()>* = nullptr> + std::unique_ptr operator()(column_view const& col, + device_span offsets, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return compound_segmented_reduction( + col, offsets, null_handling, ddof, stream, mr); + } + + template ()>* = nullptr> + std::unique_ptr operator()(column_view const&, + device_span, + null_policy, + size_type, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource*) + { + CUDF_FAIL("Unsupported output data type"); + } +}; + +template +struct compound_segmented_dispatcher { + private: + template + static constexpr bool is_supported_v() + { + return std::is_arithmetic_v; + } + + public: + template ()>* = nullptr> + std::unique_ptr operator()(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return cudf::type_dispatcher(output_dtype, + compound_float_output_dispatcher(), + col, + offsets, + null_handling, + ddof, + stream, + mr); + } + + template ()>* = nullptr> + std::unique_ptr operator()(column_view const&, + device_span, + cudf::data_type const, + null_policy, + size_type, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource*) + { + CUDF_FAIL("Compound operators are not supported for non-arithmetic types"); + } +}; + +} // namespace detail +} // namespace compound +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented_max.cu b/cpp/src/reductions/segmented/max.cu similarity index 94% rename from cpp/src/reductions/segmented_max.cu rename to cpp/src/reductions/segmented/max.cu index 494aff66797..d72b65301c1 100644 --- a/cpp/src/reductions/segmented_max.cu +++ b/cpp/src/reductions/segmented/max.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented/mean.cu b/cpp/src/reductions/segmented/mean.cu new file mode 100644 index 00000000000..b7a5bfa43d6 --- /dev/null +++ b/cpp/src/reductions/segmented/mean.cu @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compound.cuh" + +#include + +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr segmented_mean(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using reducer = compound::detail::compound_segmented_dispatcher; + constexpr size_type ddof = 1; // ddof for mean calculation + return cudf::type_dispatcher( + col.type(), reducer{}, col, offsets, output_dtype, null_handling, ddof, stream, mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented_min.cu b/cpp/src/reductions/segmented/min.cu similarity index 94% rename from cpp/src/reductions/segmented_min.cu rename to cpp/src/reductions/segmented/min.cu index dee6a989ad6..b7fbedf2690 100644 --- a/cpp/src/reductions/segmented_min.cu +++ b/cpp/src/reductions/segmented/min.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented_product.cu b/cpp/src/reductions/segmented/product.cu similarity index 94% rename from cpp/src/reductions/segmented_product.cu rename to cpp/src/reductions/segmented/product.cu index 25b31d117b3..d5442126660 100644 --- a/cpp/src/reductions/segmented_product.cu +++ b/cpp/src/reductions/segmented/product.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented_reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp similarity index 87% rename from cpp/src/reductions/segmented_reductions.cpp rename to cpp/src/reductions/segmented/reductions.cpp index 6ccc87291cc..1de55b371b3 100644 --- a/cpp/src/reductions/segmented_reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -65,7 +65,7 @@ struct segmented_reduce_dispatch_functor { } template - std::unique_ptr operator()() + std::unique_ptr operator()(segmented_reduce_aggregation const& agg) { switch (k) { case segmented_reduce_aggregation::SUM: @@ -86,6 +86,21 @@ struct segmented_reduce_dispatch_functor { case segmented_reduce_aggregation::ALL: return reduction::segmented_all( col, offsets, output_dtype, null_handling, init, stream, mr); + case segmented_reduce_aggregation::SUM_OF_SQUARES: + return reduction::segmented_sum_of_squares( + col, offsets, output_dtype, null_handling, stream, mr); + case segmented_reduce_aggregation::MEAN: + return reduction::segmented_mean(col, offsets, output_dtype, null_handling, stream, mr); + case aggregation::VARIANCE: { + auto var_agg = static_cast(agg); + return reduction::segmented_variance( + col, offsets, output_dtype, null_handling, var_agg._ddof, stream, mr); + } + case aggregation::STD: { + auto var_agg = static_cast(agg); + return reduction::segmented_standard_deviation( + col, offsets, output_dtype, null_handling, var_agg._ddof, stream, mr); + } default: CUDF_FAIL("Unsupported aggregation type."); // TODO: Add support for compound_ops. GH #10432 @@ -115,7 +130,8 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, return aggregation_dispatcher( agg.kind, segmented_reduce_dispatch_functor{ - segmented_values, offsets, output_dtype, null_handling, init, stream, mr}); + segmented_values, offsets, output_dtype, null_handling, init, stream, mr}, + agg); } } // namespace detail diff --git a/cpp/src/reductions/simple_segmented.cuh b/cpp/src/reductions/segmented/simple.cuh similarity index 84% rename from cpp/src/reductions/simple_segmented.cuh rename to cpp/src/reductions/segmented/simple.cuh index 2b31252c18c..fb080ebf67c 100644 --- a/cpp/src/reductions/simple_segmented.cuh +++ b/cpp/src/reductions/segmented/simple.cuh @@ -16,17 +16,16 @@ #pragma once +#include "update_validity.hpp" + #include #include #include -#include -#include -#include +#include #include #include #include #include -#include #include #include #include @@ -47,7 +46,7 @@ namespace simple { namespace detail { /** - * @brief Segment reduction for 'sum', 'product', 'min', 'max', 'sum of squares' + * @brief Segment reduction for 'sum', 'product', 'min', 'max', 'sum of squares', etc * which directly compute the reduction by a single step reduction call. * * @tparam InputType the input column data-type @@ -56,9 +55,7 @@ namespace detail { * @param col Input column of data to reduce * @param offsets Indices to segment boundaries - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. + * @param null_handling How null entries are processed within each segment * @param init Optional initial value of the reduction * @param stream Used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory @@ -73,7 +70,6 @@ std::unique_ptr simple_segmented_reduction( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // reduction by iterator auto dcol = cudf::column_device_view::create(col, stream); auto simple_op = Op{}; auto const num_segments = offsets.size() - 1; @@ -110,20 +106,8 @@ std::unique_ptr simple_segmented_reduction( } // Compute the output null mask - auto const bitmask = col.null_mask(); - auto const first_bit_indices_begin = offsets.begin(); - auto const first_bit_indices_end = offsets.end() - 1; - auto const last_bit_indices_begin = first_bit_indices_begin + 1; - auto [output_null_mask, output_null_count] = cudf::detail::segmented_null_mask_reduction( - bitmask, - first_bit_indices_begin, - first_bit_indices_end, - last_bit_indices_begin, - null_handling, - init.has_value() ? std::optional(init.value().get().is_valid()) : std::nullopt, - stream, - mr); - result->set_null_mask(std::move(output_null_mask), output_null_count); + cudf::reduction::detail::segmented_update_validity( + *result, col, offsets, null_handling, init, stream, mr); return result; } @@ -139,9 +123,7 @@ std::unique_ptr simple_segmented_reduction( * @param col Input column of data to reduce * @param offsets Indices to segment boundaries - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. + * @param null_handling How null entries are processed within each segment * @param stream Used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column in device memory @@ -184,39 +166,10 @@ std::unique_ptr string_segmented_reduction(column_view const& col, stream, mr) ->release()[0]); - auto [segmented_null_mask, segmented_null_count] = - cudf::detail::segmented_null_mask_reduction(col.null_mask(), - offsets.begin(), - offsets.end() - 1, - offsets.begin() + 1, - null_handling, - std::nullopt, - stream, - mr); - - // If the segmented null mask contains any null values, the segmented null mask - // must be combined with the result null mask. - if (segmented_null_count > 0) { - if (result->null_count() == 0) { - // The result has no nulls. Use the segmented null mask. - result->set_null_mask(std::move(segmented_null_mask), segmented_null_count); - } else { - // Compute the logical AND of the segmented output null mask and the - // result null mask to update the result null mask and null count. - auto result_mview = result->mutable_view(); - std::vector masks{static_cast(result_mview.null_mask()), - static_cast(segmented_null_mask.data())}; - std::vector begin_bits{0, 0}; - auto const valid_count = cudf::detail::inplace_bitmask_and( - device_span(static_cast(result_mview.null_mask()), - num_bitmask_words(result->size())), - masks, - begin_bits, - result->size(), - stream); - result->set_null_count(result->size() - valid_count); - } - } + + // Compute the output null mask + cudf::reduction::detail::segmented_update_validity( + *result, col, offsets, null_handling, std::nullopt, stream, mr); return result; } @@ -242,9 +195,7 @@ std::unique_ptr string_segmented_reduction(column_view const& col, * @param col Input column of data to reduce * @param offsets Indices to segment boundaries - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. + * @param null_handling How null entries are processed within each segment * @param stream Used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column in device memory @@ -390,7 +341,7 @@ struct same_column_type_dispatcher { /** * @brief Call reduce and return a column of the type specified. * - * This is used by operations sum(), product(), and sum_of_squares(). + * This is used by operations such as sum(), product(), sum_of_squares(), etc * It only supports numeric types. If the output type is not the * same as the input type, an extra cast operation may occur. * @@ -456,10 +407,8 @@ struct column_type_dispatcher { * @tparam ElementType The input column type or key type * @param col Input column (must be numeric) * @param offsets Indices to segment boundaries - * @param output_type Requested type of the scalar result - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. + * @param output_type Requested type of the output column + * @param null_handling How null entries are processed within each segment * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned scalar's device memory */ diff --git a/cpp/src/reductions/segmented/std.cu b/cpp/src/reductions/segmented/std.cu new file mode 100644 index 00000000000..6af5a9cf9b6 --- /dev/null +++ b/cpp/src/reductions/segmented/std.cu @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compound.cuh" + +#include + +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr segmented_standard_deviation(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using reducer = + compound::detail::compound_segmented_dispatcher; + return cudf::type_dispatcher( + col.type(), reducer(), col, offsets, output_dtype, null_handling, ddof, stream, mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented_sum.cu b/cpp/src/reductions/segmented/sum.cu similarity index 95% rename from cpp/src/reductions/segmented_sum.cu rename to cpp/src/reductions/segmented/sum.cu index 4caaa727371..0cb8decdc58 100644 --- a/cpp/src/reductions/segmented_sum.cu +++ b/cpp/src/reductions/segmented/sum.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented/sum_of_squares.cu b/cpp/src/reductions/segmented/sum_of_squares.cu new file mode 100644 index 00000000000..1ee4f992b6d --- /dev/null +++ b/cpp/src/reductions/segmented/sum_of_squares.cu @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "simple.cuh" + +#include + +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr segmented_sum_of_squares(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using reducer = simple::detail::column_type_dispatcher; + return cudf::type_dispatcher( + col.type(), reducer{}, col, offsets, output_dtype, null_handling, std::nullopt, stream, mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented/update_validity.cu b/cpp/src/reductions/segmented/update_validity.cu new file mode 100644 index 00000000000..8d70f961b63 --- /dev/null +++ b/cpp/src/reductions/segmented/update_validity.cu @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "update_validity.hpp" + +#include +#include +#include + +namespace cudf { +namespace reduction { +namespace detail { + +void segmented_update_validity(column& result, + column_view const& col, + device_span offsets, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto [output_null_mask, output_null_count] = cudf::detail::segmented_null_mask_reduction( + col.null_mask(), + offsets.begin(), + offsets.end() - 1, + offsets.begin() + 1, + null_handling, + init.has_value() ? std::optional(init.value().get().is_valid()) : std::nullopt, + stream, + mr); + result.set_null_mask(std::move(output_null_mask), output_null_count); +} + +} // namespace detail +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented/update_validity.hpp b/cpp/src/reductions/segmented/update_validity.hpp new file mode 100644 index 00000000000..0003b98308a --- /dev/null +++ b/cpp/src/reductions/segmented/update_validity.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace cudf { +namespace reduction { +namespace detail { + +/** + * @brief Compute the validity mask and set it on the result column + * + * If `null_handling == null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. + * If `null_handling == null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @param result Result of segmented reduce to update the null mask + * @param col Input column before reduce + * @param offsets Indices to segment boundaries + * @param null_handling How null entries are processed within each segment + * @param init Optional initial value + * @param stream Used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +void segmented_update_validity(column& result, + column_view const& col, + device_span offsets, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented/var.cu b/cpp/src/reductions/segmented/var.cu new file mode 100644 index 00000000000..84adf353700 --- /dev/null +++ b/cpp/src/reductions/segmented/var.cu @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compound.cuh" + +#include + +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr segmented_variance(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using reducer = compound::detail::compound_segmented_dispatcher; + return cudf::type_dispatcher( + col.type(), reducer(), col, offsets, output_dtype, null_handling, ddof, stream, mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/std.cu b/cpp/src/reductions/std.cu index bb29e5cd030..e9ba75f68e6 100644 --- a/cpp/src/reductions/std.cu +++ b/cpp/src/reductions/std.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace reduction { std::unique_ptr standard_deviation(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/reductions/var.cu b/cpp/src/reductions/var.cu index 2df653858b0..4d86918d6c6 100644 --- a/cpp/src/reductions/var.cu +++ b/cpp/src/reductions/var.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace reduction { std::unique_ptr variance(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 74e69d2fb05..b4873a14509 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,10 @@ #include #include +#include +#include +#include + #define XXX 0 // null placeholder template @@ -770,6 +774,240 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect); } +TEST_F(SegmentedReductionTestUntyped, Mean) +{ + auto const input = + cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_mean_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; + + auto const expected = + cudf::test::fixed_width_column_wrapper{{10, 0, 30, 70}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, MeanNulls) +{ + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 20, 30, 40, 50, 60, 0, 80, 90}, {1, 1, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_mean_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; + + auto expected = cudf::test::fixed_width_column_wrapper{{10, 0, 30, 70}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{10, 0, 30, 0}, {1, 0, 1, 0}}; + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, SumOfSquares) +{ + auto const input = + cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_sum_of_squares_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::INT32}; + + auto const expected = + cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 25500}, {1, 0, 1, 1}}; + + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, SumOfSquaresNulls) +{ + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 20, 30, 40, 50, 60, 0, 80, 90}, {1, 1, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_sum_of_squares_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::INT64}; + + auto expected = + cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 20600}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 0}, {1, 0, 1, 0}}; + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, StandardDeviation) +{ + constexpr float NaN{std::numeric_limits::quiet_NaN()}; + auto const input = + cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_std_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; + + auto expected = cudf::test::fixed_width_column_wrapper{ + {NaN, 0.f, 10.f, static_cast(std::sqrt(250.))}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, StandardDeviationNulls) +{ + constexpr double NaN{std::numeric_limits::quiet_NaN()}; + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_std_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; + + auto expected = cudf::test::fixed_width_column_wrapper{ + {NaN, 0., std::sqrt(50.), std::sqrt(135.)}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{NaN, 0., 0., 0.}, {1, 0, 0, 0}}; + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, Variance) +{ + constexpr float NaN{std::numeric_limits::quiet_NaN()}; + auto const input = + cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_variance_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; + + auto expected = + cudf::test::fixed_width_column_wrapper{{NaN, 0.f, 100.f, 250.f}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, VarianceNulls) +{ + constexpr double NaN{std::numeric_limits::quiet_NaN()}; + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_variance_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; + + auto expected = + cudf::test::fixed_width_column_wrapper{{NaN, 0., 50., 135.}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{NaN, 0., 0., 0.}, {1, 0, 0, 0}}; + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, Errors) +{ + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const null_policy = cudf::null_policy::EXCLUDE; + auto const output_type = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}; + auto const str_input = + cudf::test::strings_column_wrapper({"10", "0", "20", "30", "54", "63", "", "72", "81"}); + + auto const sum_agg = cudf::make_sum_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *sum_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *sum_agg, output_type, null_policy), + cudf::logic_error); + + auto const product_agg = cudf::make_product_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *product_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *product_agg, output_type, null_policy), + cudf::logic_error); + + auto const min_agg = cudf::make_min_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *min_agg, output_type, null_policy), + cudf::logic_error); + + auto const max_agg = cudf::make_max_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *max_agg, output_type, null_policy), + cudf::logic_error); + + auto const any_agg = cudf::make_any_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *any_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *any_agg, output_type, null_policy), + cudf::logic_error); + + auto const all_agg = cudf::make_all_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *all_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *all_agg, output_type, null_policy), + cudf::logic_error); + + auto const mean_agg = cudf::make_mean_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *mean_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *mean_agg, output_type, null_policy), + cudf::logic_error); + + auto const std_agg = cudf::make_std_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *std_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *std_agg, output_type, null_policy), + cudf::logic_error); + + auto const var_agg = cudf::make_variance_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *var_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *var_agg, output_type, null_policy), + cudf::logic_error); + + auto const squares_agg = + cudf::make_sum_of_squares_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *squares_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *squares_agg, output_type, null_policy), + cudf::logic_error); +} + TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) { auto const input = cudf::test::fixed_width_column_wrapper{};