From a9fdb94fbe3d3093cf82b65be0eaa4b7dcc8af82 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 18 Jan 2023 14:57:33 -0500 Subject: [PATCH 01/12] Add compound aggregations to cudf::segmented_reduce --- conda/recipes/libcudf/meta.yaml | 1 + cpp/CMakeLists.txt | 18 +- .../cudf/detail/aggregation/aggregation.hpp | 12 +- cpp/include/cudf/detail/reduction.cuh | 89 +---- .../cudf/detail/reduction_functions.hpp | 168 +-------- .../cudf/detail/segmented_reduction.cuh | 200 +++++++++++ .../detail/segmented_reduction_functions.hpp | 320 ++++++++++++++++++ cpp/src/aggregation/aggregation.cpp | 10 +- .../stream_compaction/apply_boolean_mask.cu | 4 +- .../{segmented_all.cu => segmented/all.cu} | 4 +- .../{segmented_any.cu => segmented/any.cu} | 4 +- cpp/src/reductions/segmented/compound.cuh | 193 +++++++++++ .../{segmented_max.cu => segmented/max.cu} | 4 +- cpp/src/reductions/segmented/mean.cu | 46 +++ .../{segmented_min.cu => segmented/min.cu} | 4 +- .../product.cu} | 4 +- .../reductions.cpp} | 22 +- .../simple.cuh} | 2 +- cpp/src/reductions/segmented/std.cu | 41 +++ .../{segmented_sum.cu => segmented/sum.cu} | 4 +- .../reductions/segmented/sum_of_squares.cu | 39 +++ cpp/src/reductions/segmented/var.cu | 40 +++ .../reductions/segmented_reduction_tests.cpp | 74 +++- 23 files changed, 1017 insertions(+), 286 deletions(-) create mode 100644 cpp/include/cudf/detail/segmented_reduction.cuh create mode 100644 cpp/include/cudf/detail/segmented_reduction_functions.hpp rename cpp/src/reductions/{segmented_all.cu => segmented/all.cu} (94%) rename cpp/src/reductions/{segmented_any.cu => segmented/any.cu} (94%) create mode 100644 cpp/src/reductions/segmented/compound.cuh rename cpp/src/reductions/{segmented_max.cu => segmented/max.cu} (94%) create mode 100644 cpp/src/reductions/segmented/mean.cu rename cpp/src/reductions/{segmented_min.cu => segmented/min.cu} (94%) rename cpp/src/reductions/{segmented_product.cu => segmented/product.cu} (94%) rename cpp/src/reductions/{segmented_reductions.cpp => segmented/reductions.cpp} (87%) rename cpp/src/reductions/{simple_segmented.cuh => segmented/simple.cuh} (99%) create mode 100644 cpp/src/reductions/segmented/std.cu rename cpp/src/reductions/{segmented_sum.cu => segmented/sum.cu} (95%) create mode 100644 cpp/src/reductions/segmented/sum_of_squares.cu create mode 100644 cpp/src/reductions/segmented/var.cu diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index fce3b848372..ab9c7be80bc 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -109,6 +109,7 @@ outputs: - test -f $PREFIX/include/cudf/detail/scan.hpp - test -f $PREFIX/include/cudf/detail/scatter.hpp - test -f $PREFIX/include/cudf/detail/search.hpp + - test -f $PREFIX/include/cudf/detail/segmented_reduction_functions.hpp - test -f $PREFIX/include/cudf/detail/sequence.hpp - test -f $PREFIX/include/cudf/detail/sorting.hpp - test -f $PREFIX/include/cudf/detail/stream_compaction.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b5fc6592418..502a585781b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -440,13 +440,17 @@ add_library( src/reductions/scan/scan.cpp src/reductions/scan/scan_exclusive.cu src/reductions/scan/scan_inclusive.cu - src/reductions/segmented_all.cu - src/reductions/segmented_any.cu - src/reductions/segmented_max.cu - src/reductions/segmented_min.cu - src/reductions/segmented_product.cu - src/reductions/segmented_reductions.cpp - src/reductions/segmented_sum.cu + src/reductions/segmented/all.cu + src/reductions/segmented/any.cu + src/reductions/segmented/max.cu + src/reductions/segmented/mean.cu + src/reductions/segmented/min.cu + src/reductions/segmented/product.cu + src/reductions/segmented/reductions.cpp + src/reductions/segmented/std.cu + src/reductions/segmented/sum.cu + src/reductions/segmented/sum_of_squares.cu + src/reductions/segmented/var.cu src/reductions/std.cu src/reductions/sum.cu src/reductions/sum_of_squares.cu diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 75027c78a68..fb4db338e33 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -292,7 +292,9 @@ class all_aggregation final : public reduce_aggregation, public segmented_reduce /** * @brief Derived class for specifying a sum_of_squares aggregation */ -class sum_of_squares_aggregation final : public groupby_aggregation, public reduce_aggregation { +class sum_of_squares_aggregation final : public groupby_aggregation, + public reduce_aggregation, + public segmented_reduce_aggregation { public: sum_of_squares_aggregation() : aggregation(SUM_OF_SQUARES) {} @@ -313,7 +315,8 @@ class sum_of_squares_aggregation final : public groupby_aggregation, public redu */ class mean_aggregation final : public rolling_aggregation, public groupby_aggregation, - public reduce_aggregation { + public reduce_aggregation, + public segmented_reduce_aggregation { public: mean_aggregation() : aggregation(MEAN) {} @@ -353,7 +356,8 @@ class m2_aggregation : public groupby_aggregation { */ class std_var_aggregation : public rolling_aggregation, public groupby_aggregation, - public reduce_aggregation { + public reduce_aggregation, + public segmented_reduce_aggregation { public: size_type _ddof; ///< Delta degrees of freedom diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh index f1fe0d2e1a9..7267d0b3524 100644 --- a/cpp/include/cudf/detail/reduction.cuh +++ b/cpp/include/cudf/detail/reduction.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,6 @@ #include #include -#include #include #include @@ -229,92 +228,6 @@ std::unique_ptr reduce(InputIterator d_in, return std::unique_ptr(result); } -/** - * @brief Compute the specified simple reduction over each of the segments in the - * input range of elements. - * - * @tparam InputIterator the input column iterator - * @tparam OffsetIterator the offset column iterator - * @tparam OutputIterator the output column iterator - * @tparam BinaryOp the device binary operator used to reduce - * @tparam OutputType the output type of reduction - * - * @param[in] d_in the begin iterator to input - * @param[in] d_offset_begin the begin iterator to offset - * @param[in] d_offset_end the end iterator to offset. Note: This is - * num_segments+1 elements past `d_offset_begin`. - * @param[out] d_out the begin iterator to output - * @param[in] binary_op the reduction operator - * @param[in] identity the identity element of the reduction operator - * @param[in] initial_value Initial value of the reduction - * @param[in] stream CUDA stream used for device memory operations and kernel launches - * - */ -template ::type, - typename std::enable_if_t() && - !cudf::is_fixed_point()>* = nullptr> -void segmented_reduce(InputIterator d_in, - OffsetIterator d_offset_begin, - OffsetIterator d_offset_end, - OutputIterator d_out, - BinaryOp binary_op, - OutputType initial_value, - rmm::cuda_stream_view stream) -{ - auto const num_segments = static_cast(std::distance(d_offset_begin, d_offset_end)) - 1; - - // Allocate temporary storage - rmm::device_buffer d_temp_storage; - size_t temp_storage_bytes = 0; - cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), - temp_storage_bytes, - d_in, - d_out, - num_segments, - d_offset_begin, - d_offset_begin + 1, - binary_op, - initial_value, - stream.value()); - d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; - - // Run reduction - cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), - temp_storage_bytes, - d_in, - d_out, - num_segments, - d_offset_begin, - d_offset_begin + 1, - binary_op, - initial_value, - stream.value()); -} - -template ::type, - typename std::enable_if_t() && - !cudf::is_fixed_point())>* = nullptr> -void segmented_reduce(InputIterator, - OffsetIterator, - OffsetIterator, - OutputIterator, - BinaryOp, - OutputType, - rmm::cuda_stream_view) -{ - CUDF_FAIL( - "Unsupported data types called on segmented_reduce. Only numeric and chrono types are " - "supported."); -} - } // namespace detail } // namespace reduction } // namespace cudf diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp index a2de286f283..6f1630c52eb 100644 --- a/cpp/include/cudf/detail/reduction_functions.hpp +++ b/cpp/include/cudf/detail/reduction_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -338,171 +338,5 @@ std::unique_ptr merge_sets( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Compute sum of each segment in input column. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. - * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. - * - * @param col Input column to compute sum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each sum - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - * @return Sums of segments in type `output_dtype` - */ -std::unique_ptr segmented_sum( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Computes product of each segment in input column. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. - * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. - * - * @param col Input column to compute product - * @param offsets Indices to identify segment boundaries - * @param output_dtype data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each product - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Product as scalar of type `output_dtype` - */ -std::unique_ptr segmented_product( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Compute minimum of each segment in input column. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is convertible to `output_dtype`. - * - * @param col Input column to compute minimum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each minimum - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Minimums of segments in type `output_dtype` - */ -std::unique_ptr segmented_min( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Compute maximum of each segment in input column. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is convertible to `output_dtype`. - * - * @param col Input column to compute maximum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each maximum - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Maximums of segments in type `output_dtype` - */ -std::unique_ptr segmented_max( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Compute if any of the values in the segment are true when typecasted to bool. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is not convertible to bool. - * @throw cudf::logic_error if `output_dtype` is not bool8. - * - * @param col Input column to compute any - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each any - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Column of bool8 for the results of the segments - */ -std::unique_ptr segmented_any( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Compute if all of the values in the segment are true when typecasted to bool. - * - * If an input segment is empty, the segment result is null. - * - * @throw cudf::logic_error if input column type is not convertible to bool. - * @throw cudf::logic_error if `output_dtype` is not bool8. - * - * @param col Input column to compute all - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each all - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Column of bool8 for the results of the segments - */ -std::unique_ptr segmented_all( - column_view const& col, - device_span offsets, - data_type const output_dtype, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - } // namespace reduction } // namespace cudf diff --git a/cpp/include/cudf/detail/segmented_reduction.cuh b/cpp/include/cudf/detail/segmented_reduction.cuh new file mode 100644 index 00000000000..171f1b414cb --- /dev/null +++ b/cpp/include/cudf/detail/segmented_reduction.cuh @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "reduction_operators.cuh" + +#include +#include +#include + +#include + +#include +#include + +namespace cudf { +namespace reduction { +namespace detail { + +/** + * @brief Compute the specified simple reduction over each of the segments in the + * input range of elements + * + * @tparam InputIterator Input iterator type + * @tparam OffsetIterator Offset iterator type + * @tparam OutputIterator Output iterator type + * @tparam BinaryOp Binary operator used for reduce + * @tparam OutputType The output type derived from the OutputIterator + * + * @param d_in Input data iterator + * @param d_offset_begin Begin iterator to segment indices + * @param d_offset_end End iterator to segment indices + * @param d_out Output data iterator + * @param binary_op The reduction operator + * @param initial_value Initial value of the reduction + * @param stream CUDA stream used for device memory operations and kernel launches + * + */ +template ::type, + typename std::enable_if_t() && + !cudf::is_fixed_point()>* = nullptr> +void segmented_reduce(InputIterator d_in, + OffsetIterator d_offset_begin, + OffsetIterator d_offset_end, + OutputIterator d_out, + BinaryOp binary_op, + OutputType initial_value, + rmm::cuda_stream_view stream) +{ + auto const num_segments = static_cast(std::distance(d_offset_begin, d_offset_end)) - 1; + + // Allocate temporary storage + rmm::device_buffer d_temp_storage; + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), + temp_storage_bytes, + d_in, + d_out, + num_segments, + d_offset_begin, + d_offset_begin + 1, + binary_op, + initial_value, + stream.value()); + d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; + + // Run reduction + cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), + temp_storage_bytes, + d_in, + d_out, + num_segments, + d_offset_begin, + d_offset_begin + 1, + binary_op, + initial_value, + stream.value()); +} + +template ::type, + typename std::enable_if_t() && + !cudf::is_fixed_point())>* = nullptr> +void segmented_reduce(InputIterator, + OffsetIterator, + OffsetIterator, + OutputIterator, + BinaryOp, + OutputType, + rmm::cuda_stream_view) +{ + CUDF_FAIL( + "Unsupported data types called on segmented_reduce. Only numeric and chrono types are " + "supported."); +} + +/** + * @brief Compute reduction by the compound operator (reduce and transform) + * + * The reduction operator must have an `intermediate::compute_result()` method. + * This method performs reduction using binary operator `Op::Op` and calculates the + * result to `OutputType` using `compute_result()` through the transform method. + * + * @tparam Op Reduction operator + * @tparam InputIterator Input iterator type + * @tparam OffsetIterator Offsets iterator type + * @tparam OutputIterator Output iterator type + * + * @param d_in Input data iterator + * @param d_offset_begin Begin iterator to segment indices + * @param d_offset_end End iterator to segment indices + * @param d_out Output data iterator + * @param op The reduction operator + * @param ddof Delta degrees of freedom used for standard deviation and variance + * @param d_valid_counts Number of valid values per segment + * @param stream CUDA stream used for device memory operations and kernel launches + */ +template +void segmented_reduce(InputIterator d_in, + OffsetIterator d_offset_begin, + OffsetIterator d_offset_end, + OutputIterator d_out, + op::compound_op op, + cudf::size_type ddof, + size_type* d_valid_counts, + rmm::cuda_stream_view stream) +{ + using OutputType = typename thrust::iterator_value::type; + using IntermediateType = typename thrust::iterator_value::type; + auto num_segments = static_cast(std::distance(d_offset_begin, d_offset_end)); + auto const binary_op = op.get_binary_op(); + auto const initial_value = op.template get_identity(); + + rmm::device_uvector intermediate_result{static_cast(num_segments), + stream}; + + // Allocate temporary storage + rmm::device_buffer d_temp_storage; + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), + temp_storage_bytes, + d_in, + intermediate_result.data(), + num_segments, + d_offset_begin, + d_offset_begin + 1, + binary_op, + initial_value, + stream.value()); + d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; + + // Run reduction + cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), + temp_storage_bytes, + d_in, + intermediate_result.data(), + num_segments, + d_offset_begin, + d_offset_begin + 1, + binary_op, + initial_value, + stream.value()); + + // compute the result value from intermediate value in device + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_segments), + d_out, + [ir = intermediate_result.data(), op, d_valid_counts, ddof] __device__(auto idx) { + auto const count = d_valid_counts[idx]; + return count > 0 ? op.template compute_result(ir[idx], count, ddof) + : OutputType{0}; + }); +} + +} // namespace detail +} // namespace reduction +} // namespace cudf diff --git a/cpp/include/cudf/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/detail/segmented_reduction_functions.hpp new file mode 100644 index 00000000000..0a790b2d5a6 --- /dev/null +++ b/cpp/include/cudf/detail/segmented_reduction_functions.hpp @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace cudf { +namespace reduction { + +/** + * @brief Compute sum of each segment in input column. + * + * If an input segment is empty, the segment result is null. + * + * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. + * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. + * + * @param col Input column to compute sum + * @param offsets Indices to identify segment boundaries + * @param output_dtype Data type of return type and typecast elements of input column + * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * @param init Initial value of each sum + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Sums of segments in type `output_dtype` + */ +std::unique_ptr segmented_sum( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes product of each segment in input column. + * + * If an input segment is empty, the segment result is null. + * + * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. + * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. + * + * @param col Input column to compute product + * @param offsets Indices to identify segment boundaries + * @param output_dtype data type of return type and typecast elements of input column + * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * @param init Initial value of each product + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Product as scalar of type `output_dtype` + */ +std::unique_ptr segmented_product( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Compute minimum of each segment in input column. + * + * If an input segment is empty, the segment result is null. + * + * @throw cudf::logic_error if input column type is convertible to `output_dtype`. + * + * @param col Input column to compute minimum + * @param offsets Indices to identify segment boundaries + * @param output_dtype Data type of return type and typecast elements of input column + * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * @param init Initial value of each minimum + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Minimums of segments in type `output_dtype` + */ +std::unique_ptr segmented_min( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Compute maximum of each segment in input column. + * + * If an input segment is empty, the segment result is null. + * + * @throw cudf::logic_error if input column type is convertible to `output_dtype`. + * + * @param col Input column to compute maximum + * @param offsets Indices to identify segment boundaries + * @param output_dtype Data type of return type and typecast elements of input column + * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * @param init Initial value of each maximum + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Maximums of segments in type `output_dtype` + */ +std::unique_ptr segmented_max( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Compute if any of the values in the segment are true when typecasted to bool. + * + * If an input segment is empty, the segment result is null. + * + * @throw cudf::logic_error if input column type is not convertible to bool. + * @throw cudf::logic_error if `output_dtype` is not bool8. + * + * @param col Input column to compute any + * @param offsets Indices to identify segment boundaries + * @param output_dtype Data type of return type and typecast elements of input column + * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * @param init Initial value of each any + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Column of bool8 for the results of the segments + */ +std::unique_ptr segmented_any( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Compute if all of the values in the segment are true when typecasted to bool. + * + * If an input segment is empty, the segment result is null. + * + * @throw cudf::logic_error if input column type is not convertible to bool. + * @throw cudf::logic_error if `output_dtype` is not bool8. + * + * @param col Input column to compute all + * @param offsets Indices to identify segment boundaries + * @param output_dtype Data type of return type and typecast elements of input column + * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * @param init Initial value of each all + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Column of bool8 for the results of the segments + */ +std::unique_ptr segmented_all( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes mean of elements of segments in the input column + * + * If input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not arithmetic type + * @throw cudf::logic_error if `output_dtype` is not floating point type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Column of `output_dtype` for the reduction results of the segments + */ +std::unique_ptr segmented_mean( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes sum of squares of elements of segments in the input column + * + * If input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not arithmetic type + * @throw cudf::logic_error if `output_dtype` is not floating point type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Column of `output_dtype` for the reduction results of the segments + */ +std::unique_ptr segmented_sum_of_squares( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes the standard deviation of elements of segments in the input column + * + * If input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not arithmetic type + * @throw cudf::logic_error if `output_dtype` is not floating point type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param ddof Delta degrees of freedom. + * The divisor used is N - ddof, where N the number of elements in each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Column of `output_dtype` for the reduction results of the segments + */ +std::unique_ptr segmented_standard_deviation( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Computes the variance of elements of segments in the input column + * + * If input segment is empty, the segment result is null. + * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @throw cudf::logic_error if input column type is not arithmetic type + * @throw cudf::logic_error if `output_dtype` is not floating point type + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param ddof Delta degrees of freedom. + * The divisor used is N - ddof, where N the number of elements in each segment + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Column of `output_dtype` for the reduction results of the segments + */ +std::unique_ptr segmented_variance( + column_view const& col, + device_span offsets, + data_type const output_dtype, + null_policy null_handling, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 6dd014970c7..07c53b3a421 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -492,6 +492,8 @@ template std::unique_ptr make_sum_of_squares_aggregation make_sum_of_squares_aggregation(); template std::unique_ptr make_sum_of_squares_aggregation(); +template std::unique_ptr +make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation template @@ -503,6 +505,8 @@ template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); +template std::unique_ptr +make_mean_aggregation(); /// Factory to create a M2 aggregation template @@ -526,6 +530,8 @@ template std::unique_ptr make_variance_aggregation make_variance_aggregation( size_type ddof); +template std::unique_ptr +make_variance_aggregation(size_type ddof); /// Factory to create a STD aggregation template @@ -540,6 +546,8 @@ template std::unique_ptr make_std_aggregation make_std_aggregation( size_type ddof); +template std::unique_ptr +make_std_aggregation(size_type ddof); /// Factory to create a MEDIAN aggregation template diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu index c1c17dc0688..5acb1cb8849 100644 --- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/cpp/src/reductions/segmented_all.cu b/cpp/src/reductions/segmented/all.cu similarity index 94% rename from cpp/src/reductions/segmented_all.cu rename to cpp/src/reductions/segmented/all.cu index 4536785fe82..f75fcd8066c 100644 --- a/cpp/src/reductions/segmented_all.cu +++ b/cpp/src/reductions/segmented/all.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented_any.cu b/cpp/src/reductions/segmented/any.cu similarity index 94% rename from cpp/src/reductions/segmented_any.cu rename to cpp/src/reductions/segmented/any.cu index cc50eb1e1f4..6a4fc70d438 100644 --- a/cpp/src/reductions/segmented_any.cu +++ b/cpp/src/reductions/segmented/any.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented/compound.cuh b/cpp/src/reductions/segmented/compound.cuh new file mode 100644 index 00000000000..09a4188426b --- /dev/null +++ b/cpp/src/reductions/segmented/compound.cuh @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf { +namespace reduction { +namespace compound { +namespace detail { +/** + * @brief Multi-step reduction for operations such as mean, variance, and standard deviation. + * + * @tparam InputType the input column data-type + * @tparam ResultType the output data-type + * @tparam Op the compound operator derived from `cudf::reduction::op::compound_op` + * + * @param col Input column view + * @param offsets Indices identifying segments + * @param null_handling Indicates if null elements should be included in the reduction + * @param ddof Delta degrees of freedom used for standard deviation and variance. + * The divisor used is N - ddof, where N represents the number of elements. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return Segmented reduce result + */ +template +std::unique_ptr compound_segmented_reduction(column_view const& col, + device_span offsets, + null_policy null_handling, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto d_col = cudf::column_device_view::create(col, stream); + auto compound_op = Op{}; + auto const num_segments = offsets.size() - 1; + + auto result = make_fixed_width_column( + data_type{type_to_id()}, num_segments, mask_state::UNALLOCATED, stream, mr); + auto out_itr = result->mutable_view().template begin(); + + // Compute valid counts + rmm::device_uvector valid_counts(num_segments, stream); + if (col.has_nulls() && (null_handling == null_policy::EXCLUDE)) { + auto valid_fn = [] __device__(auto p) -> size_type { return static_cast(p.second); }; + auto itr = thrust::make_transform_iterator(d_col->pair_begin(), valid_fn); + cudf::reduction::detail::segmented_reduce(itr, + offsets.begin(), + offsets.end(), + valid_counts.data(), + thrust::plus{}, + 0, + stream); + } else { + thrust::adjacent_difference( + rmm::exec_policy(stream), offsets.begin() + 1, offsets.end(), valid_counts.begin()); + } + + // Run segmented reduction + if (col.has_nulls()) { + auto nrt = compound_op.template get_null_replacing_element_transformer(); + auto itr = thrust::make_transform_iterator(d_col->pair_begin(), nrt); + cudf::reduction::detail::segmented_reduce( + itr, offsets.begin(), offsets.end(), out_itr, compound_op, ddof, valid_counts.data(), stream); + } else { + auto et = compound_op.template get_element_transformer(); + auto itr = thrust::make_transform_iterator(d_col->begin(), et); + cudf::reduction::detail::segmented_reduce( + itr, offsets.begin(), offsets.end(), out_itr, compound_op, ddof, valid_counts.data(), stream); + } + + // Compute the output null mask + auto const bitmask = col.null_mask(); + auto const first_bit_indices_begin = offsets.begin(); + auto const first_bit_indices_end = offsets.end() - 1; + auto const last_bit_indices_begin = first_bit_indices_begin + 1; + auto [output_null_mask, output_null_count] = + cudf::detail::segmented_null_mask_reduction(bitmask, + first_bit_indices_begin, + first_bit_indices_end, + last_bit_indices_begin, + null_handling, + std::nullopt, + stream, + mr); + result->set_null_mask(std::move(output_null_mask), output_null_count); + return result; +}; + +template +struct segmented_result_dispatcher { + private: + template + static constexpr bool is_supported_v() + { + // the operator `mean`, `var`, `std` only accepts + // floating points as output dtype + return std::is_floating_point_v; + } + + public: + template ()>* = nullptr> + std::unique_ptr operator()(column_view const& col, + device_span offsets, + null_policy null_handling, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return compound_segmented_reduction( + col, offsets, null_handling, ddof, stream, mr); + } + + template ()>* = nullptr> + std::unique_ptr operator()(column_view const&, + device_span, + null_policy, + cudf::size_type, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource*) + { + CUDF_FAIL("Unsupported output data type"); + } +}; + +template +struct compound_segmented_dispatcher { + private: + template + static constexpr bool is_supported_v() + { + return std::is_arithmetic_v; + } + + public: + template ()>* = nullptr> + std::unique_ptr operator()(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return cudf::type_dispatcher(output_dtype, + segmented_result_dispatcher(), + col, + offsets, + null_handling, + ddof, + stream, + mr); + } + + template ()>* = nullptr> + std::unique_ptr operator()(column_view const&, + device_span, + cudf::data_type const, + null_policy, + cudf::size_type, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource*) + { + CUDF_FAIL("Compound operators are not supported for non-arithmetic types"); + } +}; + +} // namespace detail +} // namespace compound +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented_max.cu b/cpp/src/reductions/segmented/max.cu similarity index 94% rename from cpp/src/reductions/segmented_max.cu rename to cpp/src/reductions/segmented/max.cu index 494aff66797..d72b65301c1 100644 --- a/cpp/src/reductions/segmented_max.cu +++ b/cpp/src/reductions/segmented/max.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented/mean.cu b/cpp/src/reductions/segmented/mean.cu new file mode 100644 index 00000000000..26c679442e3 --- /dev/null +++ b/cpp/src/reductions/segmented/mean.cu @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compound.cuh" + +#include + +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr segmented_mean(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using reducer = compound::detail::compound_segmented_dispatcher; + return cudf::type_dispatcher(col.type(), + reducer{}, + col, + offsets, + output_dtype, + null_handling, + 1, // ddof is not used for mean + stream, + mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented_min.cu b/cpp/src/reductions/segmented/min.cu similarity index 94% rename from cpp/src/reductions/segmented_min.cu rename to cpp/src/reductions/segmented/min.cu index dee6a989ad6..b7fbedf2690 100644 --- a/cpp/src/reductions/segmented_min.cu +++ b/cpp/src/reductions/segmented/min.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented_product.cu b/cpp/src/reductions/segmented/product.cu similarity index 94% rename from cpp/src/reductions/segmented_product.cu rename to cpp/src/reductions/segmented/product.cu index 25b31d117b3..d5442126660 100644 --- a/cpp/src/reductions/segmented_product.cu +++ b/cpp/src/reductions/segmented/product.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented_reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp similarity index 87% rename from cpp/src/reductions/segmented_reductions.cpp rename to cpp/src/reductions/segmented/reductions.cpp index 6ccc87291cc..1de55b371b3 100644 --- a/cpp/src/reductions/segmented_reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -65,7 +65,7 @@ struct segmented_reduce_dispatch_functor { } template - std::unique_ptr operator()() + std::unique_ptr operator()(segmented_reduce_aggregation const& agg) { switch (k) { case segmented_reduce_aggregation::SUM: @@ -86,6 +86,21 @@ struct segmented_reduce_dispatch_functor { case segmented_reduce_aggregation::ALL: return reduction::segmented_all( col, offsets, output_dtype, null_handling, init, stream, mr); + case segmented_reduce_aggregation::SUM_OF_SQUARES: + return reduction::segmented_sum_of_squares( + col, offsets, output_dtype, null_handling, stream, mr); + case segmented_reduce_aggregation::MEAN: + return reduction::segmented_mean(col, offsets, output_dtype, null_handling, stream, mr); + case aggregation::VARIANCE: { + auto var_agg = static_cast(agg); + return reduction::segmented_variance( + col, offsets, output_dtype, null_handling, var_agg._ddof, stream, mr); + } + case aggregation::STD: { + auto var_agg = static_cast(agg); + return reduction::segmented_standard_deviation( + col, offsets, output_dtype, null_handling, var_agg._ddof, stream, mr); + } default: CUDF_FAIL("Unsupported aggregation type."); // TODO: Add support for compound_ops. GH #10432 @@ -115,7 +130,8 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, return aggregation_dispatcher( agg.kind, segmented_reduce_dispatch_functor{ - segmented_values, offsets, output_dtype, null_handling, init, stream, mr}); + segmented_values, offsets, output_dtype, null_handling, init, stream, mr}, + agg); } } // namespace detail diff --git a/cpp/src/reductions/simple_segmented.cuh b/cpp/src/reductions/segmented/simple.cuh similarity index 99% rename from cpp/src/reductions/simple_segmented.cuh rename to cpp/src/reductions/segmented/simple.cuh index 2b31252c18c..cc734cffdc0 100644 --- a/cpp/src/reductions/simple_segmented.cuh +++ b/cpp/src/reductions/segmented/simple.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/reductions/segmented/std.cu b/cpp/src/reductions/segmented/std.cu new file mode 100644 index 00000000000..16144ab7a72 --- /dev/null +++ b/cpp/src/reductions/segmented/std.cu @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compound.cuh" + +#include + +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr segmented_standard_deviation(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using reducer = + compound::detail::compound_segmented_dispatcher; + return cudf::type_dispatcher( + col.type(), reducer(), col, offsets, output_dtype, null_handling, ddof, stream, mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented_sum.cu b/cpp/src/reductions/segmented/sum.cu similarity index 95% rename from cpp/src/reductions/segmented_sum.cu rename to cpp/src/reductions/segmented/sum.cu index 4caaa727371..0cb8decdc58 100644 --- a/cpp/src/reductions/segmented_sum.cu +++ b/cpp/src/reductions/segmented/sum.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "simple_segmented.cuh" +#include "simple.cuh" #include diff --git a/cpp/src/reductions/segmented/sum_of_squares.cu b/cpp/src/reductions/segmented/sum_of_squares.cu new file mode 100644 index 00000000000..1ee4f992b6d --- /dev/null +++ b/cpp/src/reductions/segmented/sum_of_squares.cu @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "simple.cuh" + +#include + +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr segmented_sum_of_squares(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using reducer = simple::detail::column_type_dispatcher; + return cudf::type_dispatcher( + col.type(), reducer{}, col, offsets, output_dtype, null_handling, std::nullopt, stream, mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/segmented/var.cu b/cpp/src/reductions/segmented/var.cu new file mode 100644 index 00000000000..d69a67f649c --- /dev/null +++ b/cpp/src/reductions/segmented/var.cu @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compound.cuh" + +#include + +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr segmented_variance(column_view const& col, + device_span offsets, + cudf::data_type const output_dtype, + null_policy null_handling, + cudf::size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using reducer = compound::detail::compound_segmented_dispatcher; + return cudf::type_dispatcher( + col.type(), reducer(), col, offsets, output_dtype, null_handling, ddof, stream, mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 74e69d2fb05..804ea406deb 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,8 @@ #include #include +#include + #define XXX 0 // null placeholder template @@ -770,6 +772,76 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect); } +TEST_F(SegmentedReductionTestUntyped, Mean) +{ + auto const input = + cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expected = + cudf::test::fixed_width_column_wrapper{{10, 0, 30, 70}, {true, false, true, true}}; + + auto result = + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_mean_aggregation(), + cudf::data_type{cudf::type_id::FLOAT32}, + cudf::null_policy::INCLUDE); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, SumOfSquares) +{ + auto const input = + cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expected = cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 25500}, + {true, false, true, true}}; + + auto result = cudf::segmented_reduce( + input, + d_offsets, + *cudf::make_sum_of_squares_aggregation(), + cudf::data_type{cudf::type_id::FLOAT64}, + cudf::null_policy::INCLUDE); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, StdVar) +{ + constexpr float NaN{std::numeric_limits::quiet_NaN()}; + auto const input = + cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + + auto expected = cudf::test::fixed_width_column_wrapper{{NaN, 0.f, 10.f, 15.811388f}, + {true, false, true, true}}; + auto result = + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_std_aggregation(), + cudf::data_type{cudf::type_id::FLOAT32}, + cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{NaN, 0.f, 100.f, 250.f}, + {true, false, true, true}}; + result = + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_variance_aggregation(), + cudf::data_type{cudf::type_id::FLOAT32}, + cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) { auto const input = cudf::test::fixed_width_column_wrapper{}; From 26d2f0d1ea653620a28611bc860a563b24c8519b Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 19 Jan 2023 10:00:04 -0500 Subject: [PATCH 02/12] add gtests with nulls include/exclude --- .../reductions/segmented_reduction_tests.cpp | 141 ++++++++++++++++-- 1 file changed, 131 insertions(+), 10 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 804ea406deb..3a45c1a91b1 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -780,7 +780,7 @@ TEST_F(SegmentedReductionTestUntyped, Mean) auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expected = - cudf::test::fixed_width_column_wrapper{{10, 0, 30, 70}, {true, false, true, true}}; + cudf::test::fixed_width_column_wrapper{{10, 0, 30, 70}, {1, 0, 1, 1}}; auto result = cudf::segmented_reduce(input, @@ -792,6 +792,33 @@ TEST_F(SegmentedReductionTestUntyped, Mean) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } +TEST_F(SegmentedReductionTestUntyped, MeanNulls) +{ + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 30, 40, 50, 60, 0, 80, 90}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + + auto expected = cudf::test::fixed_width_column_wrapper{{10, 0, 35, 70}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_mean_aggregation(), + cudf::data_type{cudf::type_id::FLOAT64}, + cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{10, 0, 0, 0}, {1, 0, 0, 0}}; + result = + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_mean_aggregation(), + cudf::data_type{cudf::type_id::FLOAT64}, + cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + TEST_F(SegmentedReductionTestUntyped, SumOfSquares) { auto const input = @@ -799,20 +826,48 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquares) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); - auto const expected = cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 25500}, - {true, false, true, true}}; + auto const expected = + cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 25500}, {1, 0, 1, 1}}; auto result = cudf::segmented_reduce( input, d_offsets, *cudf::make_sum_of_squares_aggregation(), - cudf::data_type{cudf::type_id::FLOAT64}, + cudf::data_type{cudf::type_id::INT32}, cudf::null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } -TEST_F(SegmentedReductionTestUntyped, StdVar) +TEST_F(SegmentedReductionTestUntyped, SumOfSquaresNulls) +{ + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 30, 40, 50, 60, 0, 80, 90}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + + auto expected = + cudf::test::fixed_width_column_wrapper{{100, 0, 2500, 20600}, {1, 0, 1, 1}}; + auto result = cudf::segmented_reduce( + input, + d_offsets, + *cudf::make_sum_of_squares_aggregation(), + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{100, 0, 0, 0}, {1, 0, 0, 0}}; + result = cudf::segmented_reduce( + input, + d_offsets, + *cudf::make_sum_of_squares_aggregation(), + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, StandardDeviation) { constexpr float NaN{std::numeric_limits::quiet_NaN()}; auto const input = @@ -821,8 +876,8 @@ TEST_F(SegmentedReductionTestUntyped, StdVar) auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); - auto expected = cudf::test::fixed_width_column_wrapper{{NaN, 0.f, 10.f, 15.811388f}, - {true, false, true, true}}; + auto expected = cudf::test::fixed_width_column_wrapper{ + {NaN, 0.f, 10.f, static_cast(std::sqrt(250.))}, {1, 0, 1, 1}}; auto result = cudf::segmented_reduce(input, d_offsets, @@ -830,10 +885,48 @@ TEST_F(SegmentedReductionTestUntyped, StdVar) cudf::data_type{cudf::type_id::FLOAT32}, cudf::null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} - expected = cudf::test::fixed_width_column_wrapper{{NaN, 0.f, 100.f, 250.f}, - {true, false, true, true}}; - result = +TEST_F(SegmentedReductionTestUntyped, StandardDeviationNulls) +{ + constexpr double NaN{std::numeric_limits::quiet_NaN()}; + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + + auto expected = cudf::test::fixed_width_column_wrapper{ + {NaN, 0., std::sqrt(50.), std::sqrt(135.)}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_std_aggregation(), + cudf::data_type{cudf::type_id::FLOAT64}, + cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{NaN, 0., 0., 0.}, {1, 0, 0, 0}}; + result = cudf::segmented_reduce(input, + d_offsets, + *cudf::make_std_aggregation(), + cudf::data_type{cudf::type_id::FLOAT64}, + cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + +TEST_F(SegmentedReductionTestUntyped, Variance) +{ + constexpr float NaN{std::numeric_limits::quiet_NaN()}; + auto const input = + cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + + auto expected = + cudf::test::fixed_width_column_wrapper{{NaN, 0.f, 100.f, 250.f}, {1, 0, 1, 1}}; + auto result = cudf::segmented_reduce(input, d_offsets, *cudf::make_variance_aggregation(), @@ -842,6 +935,34 @@ TEST_F(SegmentedReductionTestUntyped, StdVar) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } +TEST_F(SegmentedReductionTestUntyped, VarianceNulls) +{ + constexpr double NaN{std::numeric_limits::quiet_NaN()}; + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + + auto expected = + cudf::test::fixed_width_column_wrapper{{NaN, 0., 50., 135.}, {1, 0, 1, 1}}; + auto result = + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_variance_aggregation(), + cudf::data_type{cudf::type_id::FLOAT64}, + cudf::null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + + expected = cudf::test::fixed_width_column_wrapper{{NaN, 0., 0., 0.}, {1, 0, 0, 0}}; + result = cudf::segmented_reduce(input, + d_offsets, + *cudf::make_std_aggregation(), + cudf::data_type{cudf::type_id::FLOAT64}, + cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); +} + TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) { auto const input = cudf::test::fixed_width_column_wrapper{}; From ebc41a124abe8d7389fd9459b7321a5df1d31084 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 19 Jan 2023 11:17:22 -0500 Subject: [PATCH 03/12] reduce number of nulls in new gtests --- cpp/tests/reductions/segmented_reduction_tests.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 3a45c1a91b1..3d63b821523 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -795,12 +795,12 @@ TEST_F(SegmentedReductionTestUntyped, Mean) TEST_F(SegmentedReductionTestUntyped, MeanNulls) { auto const input = cudf::test::fixed_width_column_wrapper( - {10, 0, 30, 40, 50, 60, 0, 80, 90}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + {10, 20, 30, 40, 50, 60, 0, 80, 90}, {1, 1, 1, 1, 1, 1, 0, 1, 1}); auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); - auto expected = cudf::test::fixed_width_column_wrapper{{10, 0, 35, 70}, {1, 0, 1, 1}}; + auto expected = cudf::test::fixed_width_column_wrapper{{10, 0, 30, 70}, {1, 0, 1, 1}}; auto result = cudf::segmented_reduce(input, d_offsets, @@ -809,7 +809,7 @@ TEST_F(SegmentedReductionTestUntyped, MeanNulls) cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); - expected = cudf::test::fixed_width_column_wrapper{{10, 0, 0, 0}, {1, 0, 0, 0}}; + expected = cudf::test::fixed_width_column_wrapper{{10, 0, 30, 0}, {1, 0, 1, 0}}; result = cudf::segmented_reduce(input, d_offsets, @@ -842,13 +842,13 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquares) TEST_F(SegmentedReductionTestUntyped, SumOfSquaresNulls) { auto const input = cudf::test::fixed_width_column_wrapper( - {10, 0, 30, 40, 50, 60, 0, 80, 90}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + {10, 20, 30, 40, 50, 60, 0, 80, 90}, {1, 1, 1, 1, 1, 1, 0, 1, 1}); auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto expected = - cudf::test::fixed_width_column_wrapper{{100, 0, 2500, 20600}, {1, 0, 1, 1}}; + cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 20600}, {1, 0, 1, 1}}; auto result = cudf::segmented_reduce( input, d_offsets, @@ -857,7 +857,7 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquaresNulls) cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); - expected = cudf::test::fixed_width_column_wrapper{{100, 0, 0, 0}, {1, 0, 0, 0}}; + expected = cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 0}, {1, 0, 1, 0}}; result = cudf::segmented_reduce( input, d_offsets, From ca27894f64a709e0dadd7aae2db7f39d2ccf1b5f Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 24 Jan 2023 14:31:40 -0500 Subject: [PATCH 04/12] update include statements --- cpp/include/cudf/detail/reduction.cuh | 2 +- cpp/include/cudf/detail/segmented_reduction.cuh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh index 7267d0b3524..9dc3b996afc 100644 --- a/cpp/include/cudf/detail/reduction.cuh +++ b/cpp/include/cudf/detail/reduction.cuh @@ -16,7 +16,7 @@ #pragma once -#include "reduction_operators.cuh" +#include #include #include diff --git a/cpp/include/cudf/detail/segmented_reduction.cuh b/cpp/include/cudf/detail/segmented_reduction.cuh index 171f1b414cb..26bf755334d 100644 --- a/cpp/include/cudf/detail/segmented_reduction.cuh +++ b/cpp/include/cudf/detail/segmented_reduction.cuh @@ -16,7 +16,7 @@ #pragma once -#include "reduction_operators.cuh" +#include #include #include @@ -142,7 +142,7 @@ void segmented_reduce(InputIterator d_in, OffsetIterator d_offset_end, OutputIterator d_out, op::compound_op op, - cudf::size_type ddof, + size_type ddof, size_type* d_valid_counts, rmm::cuda_stream_view stream) { From c1f0939781de73e5e565e0d3807c72fc3cd007cf Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 24 Jan 2023 14:31:57 -0500 Subject: [PATCH 05/12] update doxygen for consistency --- .../detail/segmented_reduction_functions.hpp | 152 ++++++++++-------- 1 file changed, 85 insertions(+), 67 deletions(-) diff --git a/cpp/include/cudf/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/detail/segmented_reduction_functions.hpp index 0a790b2d5a6..54751c00ccf 100644 --- a/cpp/include/cudf/detail/segmented_reduction_functions.hpp +++ b/cpp/include/cudf/detail/segmented_reduction_functions.hpp @@ -29,23 +29,26 @@ namespace cudf { namespace reduction { /** - * @brief Compute sum of each segment in input column. + * @brief Compute sum of each segment in the input column * * If an input segment is empty, the segment result is null. * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. * - * @param col Input column to compute sum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each sum + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return Sums of segments in type `output_dtype` + * @return Sums of segments as type `output_dtype` */ std::unique_ptr segmented_sum( column_view const& col, @@ -57,23 +60,26 @@ std::unique_ptr segmented_sum( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Computes product of each segment in input column. + * @brief Computes product of each segment in the input column * * If an input segment is empty, the segment result is null. * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. * @throw cudf::logic_error if `output_dtype` is not an arithmetic type. * - * @param col Input column to compute product - * @param offsets Indices to identify segment boundaries - * @param output_dtype data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each product + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Product as scalar of type `output_dtype` + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Product of segments as type `output_dtype` */ std::unique_ptr segmented_product( column_view const& col, @@ -85,22 +91,25 @@ std::unique_ptr segmented_product( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Compute minimum of each segment in input column. + * @brief Compute minimum of each segment in the input column * * If an input segment is empty, the segment result is null. * - * @throw cudf::logic_error if input column type is convertible to `output_dtype`. - * - * @param col Input column to compute minimum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element * in the segment is valid. - * @param init Initial value of each minimum + * + * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Minimums of segments in type `output_dtype` + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Minimums of segments as type `output_dtype` */ std::unique_ptr segmented_min( column_view const& col, @@ -112,22 +121,25 @@ std::unique_ptr segmented_min( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Compute maximum of each segment in input column. + * @brief Compute maximum of each segment in the input column * * If an input segment is empty, the segment result is null. * - * @throw cudf::logic_error if input column type is convertible to `output_dtype`. - * - * @param col Input column to compute maximum - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element * in the segment is valid. - * @param init Initial value of each maximum + * + * @throw cudf::logic_error if input column type is not convertible to `output_dtype`. + * + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Maximums of segments in type `output_dtype` + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Maximums of segments as type `output_dtype` */ std::unique_ptr segmented_max( column_view const& col, @@ -139,23 +151,26 @@ std::unique_ptr segmented_max( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Compute if any of the values in the segment are true when typecasted to bool. + * @brief Compute if any of the values in the segment are true when typecasted to bool * * If an input segment is empty, the segment result is null. * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * * @throw cudf::logic_error if input column type is not convertible to bool. - * @throw cudf::logic_error if `output_dtype` is not bool8. + * @throw cudf::logic_error if `output_dtype` is not BOOL8. * - * @param col Input column to compute any - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each any + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Column of bool8 for the results of the segments + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of type BOOL8 for the results of the segments */ std::unique_ptr segmented_any( column_view const& col, @@ -167,23 +182,26 @@ std::unique_ptr segmented_any( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Compute if all of the values in the segment are true when typecasted to bool. + * @brief Compute if all of the values in the segment are true when typecasted to bool * * If an input segment is empty, the segment result is null. * + * If `null_handling==null_policy::INCLUDE`, all elements in a segment must be valid + * for the reduced value to be valid. + * If `null_handling==null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * * @throw cudf::logic_error if input column type is not convertible to bool. - * @throw cudf::logic_error if `output_dtype` is not bool8. + * @throw cudf::logic_error if `output_dtype` is not BOOL8. * - * @param col Input column to compute all - * @param offsets Indices to identify segment boundaries - * @param output_dtype Data type of return type and typecast elements of input column - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * @param init Initial value of each all + * @param col Input column data + * @param offsets Indices to identify segment boundaries within input `col` + * @param output_dtype Data type of the output column + * @param null_handling Specifies how null elements are processed for each segment + * @param init Initial value of each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @return Column of bool8 for the results of the segments + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of BOOL8 for the results of the segments */ std::unique_ptr segmented_all( column_view const& col, @@ -212,7 +230,7 @@ std::unique_ptr segmented_all( * @param output_dtype Data type of the output column * @param null_handling Specifies how null elements are processed for each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory + * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of `output_dtype` for the reduction results of the segments */ std::unique_ptr segmented_mean( @@ -241,7 +259,7 @@ std::unique_ptr segmented_mean( * @param output_dtype Data type of the output column * @param null_handling Specifies how null elements are processed for each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory + * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of `output_dtype` for the reduction results of the segments */ std::unique_ptr segmented_sum_of_squares( @@ -272,7 +290,7 @@ std::unique_ptr segmented_sum_of_squares( * @param ddof Delta degrees of freedom. * The divisor used is N - ddof, where N the number of elements in each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory + * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of `output_dtype` for the reduction results of the segments */ std::unique_ptr segmented_standard_deviation( @@ -304,7 +322,7 @@ std::unique_ptr segmented_standard_deviation( * @param ddof Delta degrees of freedom. * The divisor used is N - ddof, where N the number of elements in each segment * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned scalar's device memory + * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of `output_dtype` for the reduction results of the segments */ std::unique_ptr segmented_variance( From cdc2f9c2fb444b401ef01335c069b3ebdba56a18 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 24 Jan 2023 14:32:43 -0500 Subject: [PATCH 06/12] remove unneeded namespace specification --- cpp/src/reductions/compound.cuh | 10 +- cpp/src/reductions/minmax.cu | 2 +- cpp/src/reductions/segmented/compound.cuh | 16 +- cpp/src/reductions/segmented/mean.cu | 12 +- cpp/src/reductions/segmented/std.cu | 2 +- cpp/src/reductions/segmented/var.cu | 2 +- cpp/src/reductions/std.cu | 2 +- cpp/src/reductions/var.cu | 2 +- .../reductions/segmented_reduction_tests.cpp | 139 +++++++++--------- 9 files changed, 92 insertions(+), 95 deletions(-) diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh index f901ebd2c24..cd47402c437 100644 --- a/cpp/src/reductions/compound.cuh +++ b/cpp/src/reductions/compound.cuh @@ -46,7 +46,7 @@ namespace detail { template std::unique_ptr compound_reduction(column_view const& col, data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -99,7 +99,7 @@ struct result_type_dispatcher { template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -109,7 +109,7 @@ struct result_type_dispatcher { template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -132,7 +132,7 @@ struct element_type_dispatcher { template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -143,7 +143,7 @@ struct element_type_dispatcher { template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index 42288a67242..b1d951a783b 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -74,7 +74,7 @@ template ::type> rmm::device_scalar reduce_device(InputIterator d_in, - cudf::size_type num_items, + size_type num_items, Op binary_op, rmm::cuda_stream_view stream) { diff --git a/cpp/src/reductions/segmented/compound.cuh b/cpp/src/reductions/segmented/compound.cuh index 09a4188426b..6d13fe6e9f1 100644 --- a/cpp/src/reductions/segmented/compound.cuh +++ b/cpp/src/reductions/segmented/compound.cuh @@ -49,7 +49,7 @@ template std::unique_ptr compound_segmented_reduction(column_view const& col, device_span offsets, null_policy null_handling, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -110,13 +110,11 @@ std::unique_ptr compound_segmented_reduction(column_view const& col, }; template -struct segmented_result_dispatcher { +struct compound_float_output_dispatcher { private: template static constexpr bool is_supported_v() { - // the operator `mean`, `var`, `std` only accepts - // floating points as output dtype return std::is_floating_point_v; } @@ -125,7 +123,7 @@ struct segmented_result_dispatcher { std::unique_ptr operator()(column_view const& col, device_span offsets, null_policy null_handling, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -137,7 +135,7 @@ struct segmented_result_dispatcher { std::unique_ptr operator()(column_view const&, device_span, null_policy, - cudf::size_type, + size_type, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) { @@ -160,12 +158,12 @@ struct compound_segmented_dispatcher { device_span offsets, cudf::data_type const output_dtype, null_policy null_handling, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher(output_dtype, - segmented_result_dispatcher(), + compound_float_output_dispatcher(), col, offsets, null_handling, @@ -179,7 +177,7 @@ struct compound_segmented_dispatcher { device_span, cudf::data_type const, null_policy, - cudf::size_type, + size_type, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) { diff --git a/cpp/src/reductions/segmented/mean.cu b/cpp/src/reductions/segmented/mean.cu index 26c679442e3..b7a5bfa43d6 100644 --- a/cpp/src/reductions/segmented/mean.cu +++ b/cpp/src/reductions/segmented/mean.cu @@ -31,15 +31,9 @@ std::unique_ptr segmented_mean(column_view const& col, rmm::mr::device_memory_resource* mr) { using reducer = compound::detail::compound_segmented_dispatcher; - return cudf::type_dispatcher(col.type(), - reducer{}, - col, - offsets, - output_dtype, - null_handling, - 1, // ddof is not used for mean - stream, - mr); + constexpr size_type ddof = 1; // ddof for mean calculation + return cudf::type_dispatcher( + col.type(), reducer{}, col, offsets, output_dtype, null_handling, ddof, stream, mr); } } // namespace reduction diff --git a/cpp/src/reductions/segmented/std.cu b/cpp/src/reductions/segmented/std.cu index 16144ab7a72..6af5a9cf9b6 100644 --- a/cpp/src/reductions/segmented/std.cu +++ b/cpp/src/reductions/segmented/std.cu @@ -27,7 +27,7 @@ std::unique_ptr segmented_standard_deviation(column_view const& co device_span offsets, cudf::data_type const output_dtype, null_policy null_handling, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/reductions/segmented/var.cu b/cpp/src/reductions/segmented/var.cu index d69a67f649c..84adf353700 100644 --- a/cpp/src/reductions/segmented/var.cu +++ b/cpp/src/reductions/segmented/var.cu @@ -27,7 +27,7 @@ std::unique_ptr segmented_variance(column_view const& col, device_span offsets, cudf::data_type const output_dtype, null_policy null_handling, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/reductions/std.cu b/cpp/src/reductions/std.cu index bb29e5cd030..e03cd04de97 100644 --- a/cpp/src/reductions/std.cu +++ b/cpp/src/reductions/std.cu @@ -25,7 +25,7 @@ namespace reduction { std::unique_ptr standard_deviation(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/reductions/var.cu b/cpp/src/reductions/var.cu index 2df653858b0..7a2f427893f 100644 --- a/cpp/src/reductions/var.cu +++ b/cpp/src/reductions/var.cu @@ -25,7 +25,7 @@ namespace reduction { std::unique_ptr variance(column_view const& col, cudf::data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 3d63b821523..ac2ff42bbd7 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -779,16 +779,15 @@ TEST_F(SegmentedReductionTestUntyped, Mean) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_mean_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; + auto const expected = cudf::test::fixed_width_column_wrapper{{10, 0, 30, 70}, {1, 0, 1, 1}}; - auto result = - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_mean_aggregation(), - cudf::data_type{cudf::type_id::FLOAT32}, - cudf::null_policy::INCLUDE); - + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } @@ -799,23 +798,16 @@ TEST_F(SegmentedReductionTestUntyped, MeanNulls) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_mean_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; auto expected = cudf::test::fixed_width_column_wrapper{{10, 0, 30, 70}, {1, 0, 1, 1}}; auto result = - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_mean_aggregation(), - cudf::data_type{cudf::type_id::FLOAT64}, - cudf::null_policy::EXCLUDE); + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); expected = cudf::test::fixed_width_column_wrapper{{10, 0, 30, 0}, {1, 0, 1, 0}}; - result = - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_mean_aggregation(), - cudf::data_type{cudf::type_id::FLOAT64}, - cudf::null_policy::INCLUDE); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } @@ -826,16 +818,16 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquares) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_sum_of_squares_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::INT32}; + auto const expected = cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 25500}, {1, 0, 1, 1}}; - auto result = cudf::segmented_reduce( - input, - d_offsets, - *cudf::make_sum_of_squares_aggregation(), - cudf::data_type{cudf::type_id::INT32}, - cudf::null_policy::INCLUDE); - + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } @@ -846,24 +838,17 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquaresNulls) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_sum_of_squares_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::INT64}; auto expected = cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 20600}, {1, 0, 1, 1}}; - auto result = cudf::segmented_reduce( - input, - d_offsets, - *cudf::make_sum_of_squares_aggregation(), - cudf::data_type{cudf::type_id::INT64}, - cudf::null_policy::EXCLUDE); + auto result = + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); expected = cudf::test::fixed_width_column_wrapper{{100, 0, 2900, 0}, {1, 0, 1, 0}}; - result = cudf::segmented_reduce( - input, - d_offsets, - *cudf::make_sum_of_squares_aggregation(), - cudf::data_type{cudf::type_id::INT64}, - cudf::null_policy::INCLUDE); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } @@ -875,15 +860,15 @@ TEST_F(SegmentedReductionTestUntyped, StandardDeviation) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_std_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; auto expected = cudf::test::fixed_width_column_wrapper{ {NaN, 0.f, 10.f, static_cast(std::sqrt(250.))}, {1, 0, 1, 1}}; auto result = - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_std_aggregation(), - cudf::data_type{cudf::type_id::FLOAT32}, - cudf::null_policy::INCLUDE); + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } @@ -895,23 +880,17 @@ TEST_F(SegmentedReductionTestUntyped, StandardDeviationNulls) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_std_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; auto expected = cudf::test::fixed_width_column_wrapper{ {NaN, 0., std::sqrt(50.), std::sqrt(135.)}, {1, 0, 1, 1}}; auto result = - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_std_aggregation(), - cudf::data_type{cudf::type_id::FLOAT64}, - cudf::null_policy::EXCLUDE); + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); expected = cudf::test::fixed_width_column_wrapper{{NaN, 0., 0., 0.}, {1, 0, 0, 0}}; - result = cudf::segmented_reduce(input, - d_offsets, - *cudf::make_std_aggregation(), - cudf::data_type{cudf::type_id::FLOAT64}, - cudf::null_policy::INCLUDE); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } @@ -923,15 +902,15 @@ TEST_F(SegmentedReductionTestUntyped, Variance) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_variance_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; auto expected = cudf::test::fixed_width_column_wrapper{{NaN, 0.f, 100.f, 250.f}, {1, 0, 1, 1}}; auto result = - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_variance_aggregation(), - cudf::data_type{cudf::type_id::FLOAT32}, - cudf::null_policy::INCLUDE); + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } @@ -943,26 +922,52 @@ TEST_F(SegmentedReductionTestUntyped, VarianceNulls) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const agg = cudf::make_variance_aggregation(); + auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; auto expected = cudf::test::fixed_width_column_wrapper{{NaN, 0., 50., 135.}, {1, 0, 1, 1}}; auto result = - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_variance_aggregation(), - cudf::data_type{cudf::type_id::FLOAT64}, - cudf::null_policy::EXCLUDE); + cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::EXCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); expected = cudf::test::fixed_width_column_wrapper{{NaN, 0., 0., 0.}, {1, 0, 0, 0}}; - result = cudf::segmented_reduce(input, - d_offsets, - *cudf::make_std_aggregation(), - cudf::data_type{cudf::type_id::FLOAT64}, - cudf::null_policy::INCLUDE); + result = cudf::segmented_reduce(input, d_offsets, *agg, output_type, cudf::null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } +TEST_F(SegmentedReductionTestUntyped, Errors) +{ + auto const input = cudf::test::fixed_width_column_wrapper( + {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const bad_output_type = cudf::data_type{cudf::type_id::INT64}; + + EXPECT_THROW( + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_mean_aggregation(), + bad_output_type, + cudf::null_policy::EXCLUDE), + cudf::logic_error); + EXPECT_THROW( + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_std_aggregation(), + bad_output_type, + cudf::null_policy::EXCLUDE), + cudf::logic_error); + EXPECT_THROW( + cudf::segmented_reduce(input, + d_offsets, + *cudf::make_variance_aggregation(), + bad_output_type, + cudf::null_policy::EXCLUDE), + cudf::logic_error); +} + TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) { auto const input = cudf::test::fixed_width_column_wrapper{}; From 306497989ff5c884543d1eccf181e0087f453c5f Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 24 Jan 2023 14:58:19 -0500 Subject: [PATCH 07/12] add more error gtests --- .../reductions/segmented_reduction_tests.cpp | 84 ++++++++++++++----- 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index ac2ff42bbd7..63f003b0805 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -943,29 +943,67 @@ TEST_F(SegmentedReductionTestUntyped, Errors) auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); - auto const bad_output_type = cudf::data_type{cudf::type_id::INT64}; - - EXPECT_THROW( - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_mean_aggregation(), - bad_output_type, - cudf::null_policy::EXCLUDE), - cudf::logic_error); - EXPECT_THROW( - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_std_aggregation(), - bad_output_type, - cudf::null_policy::EXCLUDE), - cudf::logic_error); - EXPECT_THROW( - cudf::segmented_reduce(input, - d_offsets, - *cudf::make_variance_aggregation(), - bad_output_type, - cudf::null_policy::EXCLUDE), - cudf::logic_error); + auto const null_policy = cudf::null_policy::EXCLUDE; + auto const output_type = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}; + auto const str_input = + cudf::test::strings_column_wrapper({"10", "0", "20", "30", "54", "63", "", "72", "81"}); + + auto const sum_agg = cudf::make_sum_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *sum_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *sum_agg, output_type, null_policy), + cudf::logic_error); + + auto const product_agg = cudf::make_product_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *product_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *product_agg, output_type, null_policy), + cudf::logic_error); + + auto const min_agg = cudf::make_min_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *min_agg, output_type, null_policy), + cudf::logic_error); + + auto const max_agg = cudf::make_max_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *max_agg, output_type, null_policy), + cudf::logic_error); + + auto const any_agg = cudf::make_any_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *any_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *any_agg, output_type, null_policy), + cudf::logic_error); + + auto const all_agg = cudf::make_all_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *all_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *all_agg, output_type, null_policy), + cudf::logic_error); + + auto const mean_agg = cudf::make_mean_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *mean_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *mean_agg, output_type, null_policy), + cudf::logic_error); + + auto const std_agg = cudf::make_std_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *std_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *std_agg, output_type, null_policy), + cudf::logic_error); + + auto const var_agg = cudf::make_variance_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *var_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *var_agg, output_type, null_policy), + cudf::logic_error); + + auto const squares_agg = + cudf::make_sum_of_squares_aggregation(); + EXPECT_THROW(cudf::segmented_reduce(input, d_offsets, *squares_agg, output_type, null_policy), + cudf::logic_error); + EXPECT_THROW(cudf::segmented_reduce(str_input, d_offsets, *squares_agg, output_type, null_policy), + cudf::logic_error); } TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) From 7436879707f74cbe4ebd2e0d4a3e0e52955e31d1 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 24 Jan 2023 15:00:10 -0500 Subject: [PATCH 08/12] fix copyright year --- cpp/src/reductions/compound.cuh | 2 +- cpp/src/reductions/std.cu | 2 +- cpp/src/reductions/var.cu | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh index cd47402c437..9458ae2d581 100644 --- a/cpp/src/reductions/compound.cuh +++ b/cpp/src/reductions/compound.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/reductions/std.cu b/cpp/src/reductions/std.cu index e03cd04de97..e9ba75f68e6 100644 --- a/cpp/src/reductions/std.cu +++ b/cpp/src/reductions/std.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/reductions/var.cu b/cpp/src/reductions/var.cu index 7a2f427893f..4d86918d6c6 100644 --- a/cpp/src/reductions/var.cu +++ b/cpp/src/reductions/var.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 105fe62c8b3048a30a9717121a39c847690cc402 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 24 Jan 2023 17:22:19 -0500 Subject: [PATCH 09/12] remove unneeded include --- cpp/include/cudf/detail/segmented_reduction_functions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/detail/segmented_reduction_functions.hpp index 54751c00ccf..6b31cb8a825 100644 --- a/cpp/include/cudf/detail/segmented_reduction_functions.hpp +++ b/cpp/include/cudf/detail/segmented_reduction_functions.hpp @@ -16,8 +16,8 @@ #pragma once +#include #include -#include #include #include From d35c4750de73e40cee590bc7bcc07bcbe9ec5bb9 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 27 Jan 2023 19:41:01 -0500 Subject: [PATCH 10/12] refactor validity-mask logic into separate source file --- cpp/CMakeLists.txt | 1 + cpp/src/reductions/segmented/simple.cuh | 101 ++++++------------ .../reductions/segmented/update_validity.cu | 55 ++++++++++ 3 files changed, 91 insertions(+), 66 deletions(-) create mode 100644 cpp/src/reductions/segmented/update_validity.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5d73bfe247d..81a2b40f38b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -450,6 +450,7 @@ add_library( src/reductions/segmented/std.cu src/reductions/segmented/sum.cu src/reductions/segmented/sum_of_squares.cu + src/reductions/segmented/update_validity.cu src/reductions/segmented/var.cu src/reductions/std.cu src/reductions/sum.cu diff --git a/cpp/src/reductions/segmented/simple.cuh b/cpp/src/reductions/segmented/simple.cuh index cc734cffdc0..44bb097fe28 100644 --- a/cpp/src/reductions/segmented/simple.cuh +++ b/cpp/src/reductions/segmented/simple.cuh @@ -19,14 +19,11 @@ #include #include #include -#include -#include #include #include #include #include #include -#include #include #include #include @@ -47,7 +44,31 @@ namespace simple { namespace detail { /** - * @brief Segment reduction for 'sum', 'product', 'min', 'max', 'sum of squares' + * @brief Compute the validity mask and set it on the result column + * + * If `null_handling == null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. + * If `null_handling == null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @param result Result of segmented reduce to update the null mask + * @param col Input column before reduce + * @param offsets Indices to segment boundaries + * @param null_handling How null entries are processed within each segment + * @param init Optional initial value + * @param stream Used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +void update_validity(column& result, + column_view const& col, + device_span offsets, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief Segment reduction for 'sum', 'product', 'min', 'max', 'sum of squares', etc * which directly compute the reduction by a single step reduction call. * * @tparam InputType the input column data-type @@ -56,9 +77,7 @@ namespace detail { * @param col Input column of data to reduce * @param offsets Indices to segment boundaries - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. + * @param null_handling How null entries are processed within each segment * @param init Optional initial value of the reduction * @param stream Used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory @@ -73,7 +92,6 @@ std::unique_ptr simple_segmented_reduction( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // reduction by iterator auto dcol = cudf::column_device_view::create(col, stream); auto simple_op = Op{}; auto const num_segments = offsets.size() - 1; @@ -110,20 +128,7 @@ std::unique_ptr simple_segmented_reduction( } // Compute the output null mask - auto const bitmask = col.null_mask(); - auto const first_bit_indices_begin = offsets.begin(); - auto const first_bit_indices_end = offsets.end() - 1; - auto const last_bit_indices_begin = first_bit_indices_begin + 1; - auto [output_null_mask, output_null_count] = cudf::detail::segmented_null_mask_reduction( - bitmask, - first_bit_indices_begin, - first_bit_indices_end, - last_bit_indices_begin, - null_handling, - init.has_value() ? std::optional(init.value().get().is_valid()) : std::nullopt, - stream, - mr); - result->set_null_mask(std::move(output_null_mask), output_null_count); + update_validity(*result, col, offsets, null_handling, init, stream, mr); return result; } @@ -139,9 +144,7 @@ std::unique_ptr simple_segmented_reduction( * @param col Input column of data to reduce * @param offsets Indices to segment boundaries - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. + * @param null_handling How null entries are processed within each segment * @param stream Used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column in device memory @@ -184,39 +187,9 @@ std::unique_ptr string_segmented_reduction(column_view const& col, stream, mr) ->release()[0]); - auto [segmented_null_mask, segmented_null_count] = - cudf::detail::segmented_null_mask_reduction(col.null_mask(), - offsets.begin(), - offsets.end() - 1, - offsets.begin() + 1, - null_handling, - std::nullopt, - stream, - mr); - - // If the segmented null mask contains any null values, the segmented null mask - // must be combined with the result null mask. - if (segmented_null_count > 0) { - if (result->null_count() == 0) { - // The result has no nulls. Use the segmented null mask. - result->set_null_mask(std::move(segmented_null_mask), segmented_null_count); - } else { - // Compute the logical AND of the segmented output null mask and the - // result null mask to update the result null mask and null count. - auto result_mview = result->mutable_view(); - std::vector masks{static_cast(result_mview.null_mask()), - static_cast(segmented_null_mask.data())}; - std::vector begin_bits{0, 0}; - auto const valid_count = cudf::detail::inplace_bitmask_and( - device_span(static_cast(result_mview.null_mask()), - num_bitmask_words(result->size())), - masks, - begin_bits, - result->size(), - stream); - result->set_null_count(result->size() - valid_count); - } - } + + // Compute the output null mask + update_validity(*result, col, offsets, null_handling, std::nullopt, stream, mr); return result; } @@ -242,9 +215,7 @@ std::unique_ptr string_segmented_reduction(column_view const& col, * @param col Input column of data to reduce * @param offsets Indices to segment boundaries - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. + * @param null_handling How null entries are processed within each segment * @param stream Used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column in device memory @@ -390,7 +361,7 @@ struct same_column_type_dispatcher { /** * @brief Call reduce and return a column of the type specified. * - * This is used by operations sum(), product(), and sum_of_squares(). + * This is used by operations such as sum(), product(), sum_of_squares(), etc * It only supports numeric types. If the output type is not the * same as the input type, an extra cast operation may occur. * @@ -456,10 +427,8 @@ struct column_type_dispatcher { * @tparam ElementType The input column type or key type * @param col Input column (must be numeric) * @param offsets Indices to segment boundaries - * @param output_type Requested type of the scalar result - * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. + * @param output_type Requested type of the output column + * @param null_handling How null entries are processed within each segment * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned scalar's device memory */ diff --git a/cpp/src/reductions/segmented/update_validity.cu b/cpp/src/reductions/segmented/update_validity.cu new file mode 100644 index 00000000000..68b658f411b --- /dev/null +++ b/cpp/src/reductions/segmented/update_validity.cu @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include + +namespace cudf { +namespace reduction { +namespace simple { +namespace detail { + +void update_validity(column& result, + column_view const& col, + device_span offsets, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto [output_null_mask, output_null_count] = cudf::detail::segmented_null_mask_reduction( + col.null_mask(), + offsets.begin(), + offsets.end() - 1, + offsets.begin() + 1, + null_handling, + init.has_value() ? std::optional(init.value().get().is_valid()) : std::nullopt, + stream, + mr); + result.set_null_mask(std::move(output_null_mask), output_null_count); +} + +} // namespace detail +} // namespace simple +} // namespace reduction +} // namespace cudf From 5bedc8488776e98089df2d6a301fcc3d5ef3436b Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 2 Feb 2023 13:43:53 -0500 Subject: [PATCH 11/12] additional refactor for update_validity --- .../cudf/detail/reduction_functions.hpp | 4 +- .../cudf/detail/segmented_reduction.cuh | 10 ++-- .../detail/segmented_reduction_functions.hpp | 6 +- cpp/src/reductions/segmented/compound.cuh | 19 ++---- cpp/src/reductions/segmented/simple.cuh | 31 ++-------- .../reductions/segmented/update_validity.cu | 10 +--- .../reductions/segmented/update_validity.hpp | 58 +++++++++++++++++++ .../reductions/segmented_reduction_tests.cpp | 4 +- 8 files changed, 82 insertions(+), 60 deletions(-) create mode 100644 cpp/src/reductions/segmented/update_validity.hpp diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp index 6f1630c52eb..1f892bb90c5 100644 --- a/cpp/include/cudf/detail/reduction_functions.hpp +++ b/cpp/include/cudf/detail/reduction_functions.hpp @@ -216,7 +216,7 @@ std::unique_ptr mean( std::unique_ptr variance( column_view const& col, data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -239,7 +239,7 @@ std::unique_ptr variance( std::unique_ptr standard_deviation( column_view const& col, data_type const output_dtype, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/detail/segmented_reduction.cuh b/cpp/include/cudf/detail/segmented_reduction.cuh index 26bf755334d..9a49c1abe38 100644 --- a/cpp/include/cudf/detail/segmented_reduction.cuh +++ b/cpp/include/cudf/detail/segmented_reduction.cuh @@ -68,9 +68,8 @@ void segmented_reduce(InputIterator d_in, auto const num_segments = static_cast(std::distance(d_offset_begin, d_offset_end)) - 1; // Allocate temporary storage - rmm::device_buffer d_temp_storage; size_t temp_storage_bytes = 0; - cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), + cub::DeviceSegmentedReduce::Reduce(nullptr, temp_storage_bytes, d_in, d_out, @@ -80,7 +79,7 @@ void segmented_reduce(InputIterator d_in, binary_op, initial_value, stream.value()); - d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; + auto d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; // Run reduction cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), @@ -156,9 +155,8 @@ void segmented_reduce(InputIterator d_in, stream}; // Allocate temporary storage - rmm::device_buffer d_temp_storage; size_t temp_storage_bytes = 0; - cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), + cub::DeviceSegmentedReduce::Reduce(nullptr, temp_storage_bytes, d_in, intermediate_result.data(), @@ -168,7 +166,7 @@ void segmented_reduce(InputIterator d_in, binary_op, initial_value, stream.value()); - d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; + auto d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream}; // Run reduction cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(), diff --git a/cpp/include/cudf/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/detail/segmented_reduction_functions.hpp index 6b31cb8a825..7b5628fa49a 100644 --- a/cpp/include/cudf/detail/segmented_reduction_functions.hpp +++ b/cpp/include/cudf/detail/segmented_reduction_functions.hpp @@ -252,7 +252,7 @@ std::unique_ptr segmented_mean( * in the segment is valid. * * @throw cudf::logic_error if input column type is not arithmetic type - * @throw cudf::logic_error if `output_dtype` is not floating point type + * @throw cudf::logic_error if `output_dtype` is not an arithmetic type * * @param col Input column data * @param offsets Indices to identify segment boundaries within input `col` @@ -298,7 +298,7 @@ std::unique_ptr segmented_standard_deviation( device_span offsets, data_type const output_dtype, null_policy null_handling, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -330,7 +330,7 @@ std::unique_ptr segmented_variance( device_span offsets, data_type const output_dtype, null_policy null_handling, - cudf::size_type ddof, + size_type ddof, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/reductions/segmented/compound.cuh b/cpp/src/reductions/segmented/compound.cuh index 6d13fe6e9f1..ffde1fc306b 100644 --- a/cpp/src/reductions/segmented/compound.cuh +++ b/cpp/src/reductions/segmented/compound.cuh @@ -16,6 +16,8 @@ #pragma once +#include "update_validity.hpp" + #include #include #include @@ -92,20 +94,9 @@ std::unique_ptr compound_segmented_reduction(column_view const& col, } // Compute the output null mask - auto const bitmask = col.null_mask(); - auto const first_bit_indices_begin = offsets.begin(); - auto const first_bit_indices_end = offsets.end() - 1; - auto const last_bit_indices_begin = first_bit_indices_begin + 1; - auto [output_null_mask, output_null_count] = - cudf::detail::segmented_null_mask_reduction(bitmask, - first_bit_indices_begin, - first_bit_indices_end, - last_bit_indices_begin, - null_handling, - std::nullopt, - stream, - mr); - result->set_null_mask(std::move(output_null_mask), output_null_count); + cudf::reduction::detail::update_validity( + *result, col, offsets, null_handling, std::nullopt, stream, mr); + return result; }; diff --git a/cpp/src/reductions/segmented/simple.cuh b/cpp/src/reductions/segmented/simple.cuh index 44bb097fe28..54827177d4f 100644 --- a/cpp/src/reductions/segmented/simple.cuh +++ b/cpp/src/reductions/segmented/simple.cuh @@ -16,6 +16,8 @@ #pragma once +#include "update_validity.hpp" + #include #include #include @@ -43,30 +45,6 @@ namespace reduction { namespace simple { namespace detail { -/** - * @brief Compute the validity mask and set it on the result column - * - * If `null_handling == null_policy::INCLUDE`, all elements in a segment must be valid for the - * reduced value to be valid. - * If `null_handling == null_policy::EXCLUDE`, the reduced value is valid if any element - * in the segment is valid. - * - * @param result Result of segmented reduce to update the null mask - * @param col Input column before reduce - * @param offsets Indices to segment boundaries - * @param null_handling How null entries are processed within each segment - * @param init Optional initial value - * @param stream Used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - */ -void update_validity(column& result, - column_view const& col, - device_span offsets, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - /** * @brief Segment reduction for 'sum', 'product', 'min', 'max', 'sum of squares', etc * which directly compute the reduction by a single step reduction call. @@ -128,7 +106,7 @@ std::unique_ptr simple_segmented_reduction( } // Compute the output null mask - update_validity(*result, col, offsets, null_handling, init, stream, mr); + cudf::reduction::detail::update_validity(*result, col, offsets, null_handling, init, stream, mr); return result; } @@ -189,7 +167,8 @@ std::unique_ptr string_segmented_reduction(column_view const& col, ->release()[0]); // Compute the output null mask - update_validity(*result, col, offsets, null_handling, std::nullopt, stream, mr); + cudf::reduction::detail::update_validity( + *result, col, offsets, null_handling, std::nullopt, stream, mr); return result; } diff --git a/cpp/src/reductions/segmented/update_validity.cu b/cpp/src/reductions/segmented/update_validity.cu index 68b658f411b..ca075d5895a 100644 --- a/cpp/src/reductions/segmented/update_validity.cu +++ b/cpp/src/reductions/segmented/update_validity.cu @@ -14,19 +14,14 @@ * limitations under the License. */ -#include -#include +#include "update_validity.hpp" + #include #include #include -#include - -#include - namespace cudf { namespace reduction { -namespace simple { namespace detail { void update_validity(column& result, @@ -50,6 +45,5 @@ void update_validity(column& result, } } // namespace detail -} // namespace simple } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/segmented/update_validity.hpp b/cpp/src/reductions/segmented/update_validity.hpp new file mode 100644 index 00000000000..b86729451f7 --- /dev/null +++ b/cpp/src/reductions/segmented/update_validity.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace cudf { +namespace reduction { +namespace detail { + +/** + * @brief Compute the validity mask and set it on the result column + * + * If `null_handling == null_policy::INCLUDE`, all elements in a segment must be valid for the + * reduced value to be valid. + * If `null_handling == null_policy::EXCLUDE`, the reduced value is valid if any element + * in the segment is valid. + * + * @param result Result of segmented reduce to update the null mask + * @param col Input column before reduce + * @param offsets Indices to segment boundaries + * @param null_handling How null entries are processed within each segment + * @param init Optional initial value + * @param stream Used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +void update_validity(column& result, + column_view const& col, + device_span offsets, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace reduction +} // namespace cudf diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 63f003b0805..b4873a14509 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -24,7 +24,9 @@ #include #include -#include +#include +#include +#include #define XXX 0 // null placeholder From 2d2587b6fd6c931ebb68a5190a56bf9591c74d2e Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 2 Feb 2023 20:06:26 -0500 Subject: [PATCH 12/12] rename update-validity --- cpp/src/reductions/segmented/compound.cuh | 2 +- cpp/src/reductions/segmented/simple.cuh | 5 +++-- cpp/src/reductions/segmented/update_validity.cu | 14 +++++++------- cpp/src/reductions/segmented/update_validity.hpp | 14 +++++++------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/cpp/src/reductions/segmented/compound.cuh b/cpp/src/reductions/segmented/compound.cuh index ffde1fc306b..dc8a995d1b0 100644 --- a/cpp/src/reductions/segmented/compound.cuh +++ b/cpp/src/reductions/segmented/compound.cuh @@ -94,7 +94,7 @@ std::unique_ptr compound_segmented_reduction(column_view const& col, } // Compute the output null mask - cudf::reduction::detail::update_validity( + cudf::reduction::detail::segmented_update_validity( *result, col, offsets, null_handling, std::nullopt, stream, mr); return result; diff --git a/cpp/src/reductions/segmented/simple.cuh b/cpp/src/reductions/segmented/simple.cuh index 54827177d4f..fb080ebf67c 100644 --- a/cpp/src/reductions/segmented/simple.cuh +++ b/cpp/src/reductions/segmented/simple.cuh @@ -106,7 +106,8 @@ std::unique_ptr simple_segmented_reduction( } // Compute the output null mask - cudf::reduction::detail::update_validity(*result, col, offsets, null_handling, init, stream, mr); + cudf::reduction::detail::segmented_update_validity( + *result, col, offsets, null_handling, init, stream, mr); return result; } @@ -167,7 +168,7 @@ std::unique_ptr string_segmented_reduction(column_view const& col, ->release()[0]); // Compute the output null mask - cudf::reduction::detail::update_validity( + cudf::reduction::detail::segmented_update_validity( *result, col, offsets, null_handling, std::nullopt, stream, mr); return result; diff --git a/cpp/src/reductions/segmented/update_validity.cu b/cpp/src/reductions/segmented/update_validity.cu index ca075d5895a..8d70f961b63 100644 --- a/cpp/src/reductions/segmented/update_validity.cu +++ b/cpp/src/reductions/segmented/update_validity.cu @@ -24,13 +24,13 @@ namespace cudf { namespace reduction { namespace detail { -void update_validity(column& result, - column_view const& col, - device_span offsets, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +void segmented_update_validity(column& result, + column_view const& col, + device_span offsets, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto [output_null_mask, output_null_count] = cudf::detail::segmented_null_mask_reduction( col.null_mask(), diff --git a/cpp/src/reductions/segmented/update_validity.hpp b/cpp/src/reductions/segmented/update_validity.hpp index b86729451f7..0003b98308a 100644 --- a/cpp/src/reductions/segmented/update_validity.hpp +++ b/cpp/src/reductions/segmented/update_validity.hpp @@ -45,13 +45,13 @@ namespace detail { * @param stream Used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory */ -void update_validity(column& result, - column_view const& col, - device_span offsets, - null_policy null_handling, - std::optional> init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +void segmented_update_validity(column& result, + column_view const& col, + device_span offsets, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace reduction