From 91129078e5146ea551e3cdf5d4a701b62addc1c3 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 29 Apr 2022 15:05:01 -0700 Subject: [PATCH] Support Segmented Min/Max Reduction on String Type (#10447) This PR adds `min/max` segmented reduction to string type. Part of https://github.com/rapidsai/cudf/issues/10417 Authors: - Michael Wang (https://github.com/isVoid) - Bradley Dice (https://github.com/bdice) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/10447 --- cpp/include/cudf/detail/reduction.cuh | 27 ++-- .../detail/utilities/element_argminmax.cuh | 61 ++++++++ .../sort/group_single_pass_reduction_util.cuh | 36 +---- cpp/src/reductions/simple_segmented.cuh | 136 +++++++++++++++++- .../reductions/segmented_reduction_tests.cpp | 121 ++++++++++++++++ 5 files changed, 331 insertions(+), 50 deletions(-) create mode 100644 cpp/include/cudf/detail/utilities/element_argminmax.cuh diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh index 76afbf7e4b8..023d83f3c24 100644 --- a/cpp/include/cudf/detail/reduction.cuh +++ b/cpp/include/cudf/detail/reduction.cuh @@ -227,36 +227,36 @@ std::unique_ptr reduce(InputIterator d_in, * @brief Compute the specified simple reduction over each of the segments in the * input range of elements. * - * @tparam Op the reduction operator with device binary operator * @tparam InputIterator the input column iterator * @tparam OffsetIterator the offset column iterator + * @tparam BinaryOp the device binary operator used to reduce * @tparam OutputType the output type of reduction * * @param[in] d_in the begin iterator to input * @param[in] d_offset the begin iterator to offset * @param[in] num_segments the number of segments - * @param[in] sop the reduction operator + * @param[in] binary_op the reduction operator + * @param[in] identity the identity element of the reduction operator * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @param[in] mr Device memory resource used to allocate the returned column's device * memory * @returns Output column in device memory * */ -template ::type, typename std::enable_if_t() && - not cudf::is_fixed_point()>* = nullptr> + !cudf::is_fixed_point()>* = nullptr> std::unique_ptr segmented_reduce(InputIterator d_in, OffsetIterator d_offset, cudf::size_type num_segments, - op::simple_op sop, + BinaryOp binary_op, + OutputType identity, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto binary_op = sop.get_binary_op(); - auto identity = sop.template get_identity(); auto dev_result = make_fixed_width_column( data_type{type_to_id()}, num_segments, mask_state::UNALLOCATED, stream, mr); auto dev_result_mview = dev_result->mutable_view(); @@ -291,16 +291,17 @@ std::unique_ptr segmented_reduce(InputIterator d_in, return dev_result; } -template ::type, - typename std::enable_if_t() || - is_fixed_point()>* = nullptr> + typename std::enable_if_t() && + !cudf::is_fixed_point())>* = nullptr> std::unique_ptr segmented_reduce(InputIterator, OffsetIterator, cudf::size_type, - op::simple_op, + BinaryOp, + OutputType, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) { diff --git a/cpp/include/cudf/detail/utilities/element_argminmax.cuh b/cpp/include/cudf/detail/utilities/element_argminmax.cuh new file mode 100644 index 00000000000..45b56278dba --- /dev/null +++ b/cpp/include/cudf/detail/utilities/element_argminmax.cuh @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +namespace cudf { +namespace detail { + +/** + * @brief Binary `argmin`/`argmax` operator + * + * @tparam T Type of the underlying column. Must support '<' operator. + */ +template +struct element_argminmax_fn { + column_device_view const d_col; + bool const has_nulls; + bool const arg_min; + + __device__ inline auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const + { + // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and + // github.com/NVIDIA/thrust/issues/1525 + // where invalid random values may be passed here by thrust::reduce_by_key + auto out_of_bound_or_null = [this] __device__(size_type const& idx) { + return idx < 0 || idx >= this->d_col.size() || + (this->has_nulls && this->d_col.is_null_nocheck(idx)); + }; + if (out_of_bound_or_null(lhs_idx)) { return rhs_idx; } + if (out_of_bound_or_null(rhs_idx)) { return lhs_idx; } + + // Return `lhs_idx` iff: + // row(lhs_idx) < row(rhs_idx) and finding ArgMin, or + // row(lhs_idx) >= row(rhs_idx) and finding ArgMax. + auto const less = d_col.element(lhs_idx) < d_col.element(rhs_idx); + return less == arg_min ? lhs_idx : rhs_idx; + } +}; + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 8e1463f7964..93d5e6c032c 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -40,37 +41,6 @@ namespace cudf { namespace groupby { namespace detail { -/** - * @brief Binary operator with index values into the input column. - * - * @tparam T Type of the underlying column. Must support '<' operator. - */ -template -struct element_arg_minmax_fn { - column_device_view const d_col; - bool const has_nulls; - bool const arg_min; - - __device__ inline auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const - { - // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and - // github.com/NVIDIA/thrust/issues/1525 - // where invalid random values may be passed here by thrust::reduce_by_key - if (lhs_idx < 0 || lhs_idx >= d_col.size() || (has_nulls && d_col.is_null_nocheck(lhs_idx))) { - return rhs_idx; - } - if (rhs_idx < 0 || rhs_idx >= d_col.size() || (has_nulls && d_col.is_null_nocheck(rhs_idx))) { - return lhs_idx; - } - - // Return `lhs_idx` iff: - // row(lhs_idx) < row(rhs_idx) and finding ArgMin, or - // row(lhs_idx) >= row(rhs_idx) and finding ArgMax. - auto const less = d_col.element(lhs_idx) < d_col.element(rhs_idx); - return less == arg_min ? lhs_idx : rhs_idx; - } -}; - /** * @brief Value accessor for column which supports dictionary column too. * @@ -211,8 +181,8 @@ struct group_reduction_functor(0); - auto const binop = - element_arg_minmax_fn{*d_values_ptr, values.has_nulls(), K == aggregation::ARGMIN}; + auto const binop = cudf::detail::element_argminmax_fn{ + *d_values_ptr, values.has_nulls(), K == aggregation::ARGMIN}; do_reduction(count_iter, result_begin, binop); } else { using OpType = cudf::detail::corresponding_operator_t; diff --git a/cpp/src/reductions/simple_segmented.cuh b/cpp/src/reductions/simple_segmented.cuh index 99837e67398..7796794502d 100644 --- a/cpp/src/reductions/simple_segmented.cuh +++ b/cpp/src/reductions/simple_segmented.cuh @@ -16,12 +16,15 @@ #pragma once +#include #include +#include #include #include #include #include #include +#include #include #include #include @@ -31,9 +34,12 @@ #include +#include #include #include +#include + namespace cudf { namespace reduction { namespace simple { @@ -70,18 +76,21 @@ std::unique_ptr simple_segmented_reduction(column_view const& col, auto simple_op = Op{}; size_type num_segments = offsets.size() - 1; + auto binary_op = simple_op.get_binary_op(); + auto identity = simple_op.template get_identity(); + // TODO: Explore rewriting null_replacing_element_transformer/element_transformer with nullate auto result = [&] { if (col.has_nulls()) { auto f = simple_op.template get_null_replacing_element_transformer(); auto it = thrust::make_transform_iterator(dcol->pair_begin(), f); return cudf::reduction::detail::segmented_reduce( - it, offsets.begin(), num_segments, simple_op, stream, mr); + it, offsets.begin(), num_segments, binary_op, identity, stream, mr); } else { auto f = simple_op.template get_element_transformer(); auto it = thrust::make_transform_iterator(dcol->begin(), f); return cudf::reduction::detail::segmented_reduce( - it, offsets.begin(), num_segments, simple_op, stream, mr); + it, offsets.begin(), num_segments, binary_op, identity, stream, mr); } }(); @@ -103,6 +112,112 @@ std::unique_ptr simple_segmented_reduction(column_view const& col, return result; } +/** + * @brief String segmented reduction for 'min', 'max'. + * + * This algorithm uses argmin/argmax as a custom comparator to build a gather + * map, then builds the output. + * + * @tparam InputType the input column data-type + * @tparam Op the operator of cudf::reduction::op:: + + * @param col Input column of data to reduce. + * @param offsets Indices to segment boundaries. + * @param null_handling If `null_policy::INCLUDE`, all elements in a segment + * must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`, + * the reduced value is valid if any element in the segment is valid. + * @param stream Used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Output column in device memory + */ + +template || + std::is_same_v)> +std::unique_ptr string_segmented_reduction(column_view const& col, + device_span offsets, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Pass to simple_segmented_reduction, get indices to gather, perform gather here. + auto device_col = cudf::column_device_view::create(col, stream); + + auto it = thrust::make_counting_iterator(0); + auto const num_segments = static_cast(offsets.size()) - 1; + + bool constexpr is_argmin = std::is_same_v; + auto string_comparator = + cudf::detail::element_argminmax_fn{*device_col, col.has_nulls(), is_argmin}; + auto constexpr identity = + is_argmin ? cudf::detail::ARGMIN_SENTINEL : cudf::detail::ARGMAX_SENTINEL; + + auto gather_map = + cudf::reduction::detail::segmented_reduce(it, + offsets.begin(), + num_segments, + string_comparator, + identity, + stream, + rmm::mr::get_current_device_resource()); + auto result = std::move(cudf::detail::gather(table_view{{col}}, + *gather_map, + cudf::out_of_bounds_policy::NULLIFY, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release()[0]); + auto const [segmented_null_mask, segmented_null_count] = + cudf::detail::segmented_null_mask_reduction(col.null_mask(), + offsets.begin(), + offsets.end() - 1, + offsets.begin() + 1, + null_handling, + stream, + mr); + + // If the segmented null mask contains any null values, the segmented null mask + // must be combined with the result null mask. + if (segmented_null_count > 0) { + if (result->null_count() == 0) { + // The result has no nulls. Use the segmented null mask. + result->set_null_mask(segmented_null_mask, segmented_null_count, stream); + } else { + // Compute the logical AND of the segmented output null mask and the + // result null mask to update the result null mask and null count. + auto result_mview = result->mutable_view(); + std::vector masks{static_cast(result_mview.null_mask()), + static_cast(segmented_null_mask.data())}; + std::vector begin_bits{0, 0}; + auto const valid_count = cudf::detail::inplace_bitmask_and( + device_span(static_cast(result_mview.null_mask()), + num_bitmask_words(result->size())), + masks, + begin_bits, + result->size(), + stream, + mr); + result->set_null_count(result->size() - valid_count); + } + } + + return result; +} + +template () && + !std::is_same_v())> +std::unique_ptr string_segmented_reduction(column_view const& col, + device_span offsets, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FAIL("Segmented reduction on string column only supports min and max reduction."); +} + /** * @brief Call reduce and return a column of type bool. * @@ -153,7 +268,9 @@ struct same_column_type_dispatcher { } public: - template ()>* = nullptr> + template () && + !std::is_same_v)> std::unique_ptr operator()(column_view const& col, device_span offsets, null_policy null_handling, @@ -164,7 +281,18 @@ struct same_column_type_dispatcher { col, offsets, null_handling, stream, mr); } - template ()>* = nullptr> + template () && std::is_same_v)> + std::unique_ptr operator()(column_view const& col, + device_span offsets, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return string_segmented_reduction(col, offsets, null_handling, stream, mr); + } + + template ())> std::unique_ptr operator()(column_view const&, device_span, null_policy, diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index f750c432efb..8a9a8fb549e 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -387,6 +387,127 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); } +// String min/max test grid +// Segment: Length 0, length 1, length 2 +// Element nulls: No nulls, all nulls, some nulls +// String: Empty string, +// Position of the min/max: start of segment, end of segment +// Include null, exclude null + +#undef XXX +#define XXX "" // null placeholder + +struct SegmentedReductionStringTest : public cudf::test::BaseFixture { + std::pair> input() + { + return std::pair( + strings_column_wrapper{ + {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX}, + {true, true, false, true, true, true, true, true, true, false, false, false}}, + fixed_width_column_wrapper{0, 1, 4, 7, 9, 9, 10, 12}); + } +}; + +TEST_F(SegmentedReductionStringTest, MaxIncludeNulls) +{ + // data: ['world'], ['cudf', NULL, ''], ['rapids', 'i am', 'ai'], ['apples', 'zebras'], + // [], [NULL], [NULL, NULL] + // values: {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX} + // nullmask:{1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0} + // offsets: {0, 1, 4, 7, 9, 9, 10, 12} + // output_dtype: string dtype + // outputs: {"world", XXX, "rapids", "zebras", XXX, XXX, XXX} + // output nullmask: {1, 0, 1, 1, 0, 0, 0} + + auto const [input, offsets] = this->input(); + data_type output_dtype{type_id::STRING}; + + strings_column_wrapper expect{{"world", XXX, "rapids", "zebras", XXX, XXX, XXX}, + {true, false, true, true, false, false, false}}; + + auto res = segmented_reduce(input, + column_view(offsets), + *make_max_aggregation(), + output_dtype, + null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); +} + +TEST_F(SegmentedReductionStringTest, MaxExcludeNulls) +{ + // data: ['world'], ['cudf', NULL, ''], ['rapids', 'i am', 'ai'], ['apples', 'zebras'], + // [], [NULL], [NULL, NULL] + // values: {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX} + // nullmask:{1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0} + // offsets: {0, 1, 4, 7, 9, 9, 10, 12} + // output_dtype: string dtype + // outputs: {"world", "cudf", "rapids", "zebras", XXX, XXX, XXX} + // output nullmask: {1, 1, 1, 1, 0, 0, 0} + + auto const [input, offsets] = this->input(); + data_type output_dtype{type_id::STRING}; + + strings_column_wrapper expect{{"world", "cudf", "rapids", "zebras", XXX, XXX, XXX}, + {true, true, true, true, false, false, false}}; + + auto res = segmented_reduce(input, + column_view(offsets), + *make_max_aggregation(), + output_dtype, + null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); +} + +TEST_F(SegmentedReductionStringTest, MinIncludeNulls) +{ + // data: ['world'], ['cudf', NULL, ''], ['rapids', 'i am', 'ai'], ['apples', 'zebras'], + // [], [NULL], [NULL, NULL] + // values: {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX} + // nullmask:{1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0} + // offsets: {0, 1, 4, 7, 9, 9, 10, 12} + // output_dtype: string dtype + // outputs: {"world", XXX, "ai", "apples", XXX, XXX, XXX} + // output nullmask: {1, 0, 1, 1, 0, 0, 0} + + auto const [input, offsets] = this->input(); + data_type output_dtype{type_id::STRING}; + + strings_column_wrapper expect{{"world", XXX, "ai", "apples", XXX, XXX, XXX}, + {true, false, true, true, false, false, false}}; + + auto res = segmented_reduce(input, + column_view(offsets), + *make_min_aggregation(), + output_dtype, + null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); +} + +TEST_F(SegmentedReductionStringTest, MinExcludeNulls) +{ + // data: ['world'], ['cudf', NULL, ''], ['rapids', 'i am', 'ai'], ['apples', 'zebras'], + // [], [NULL], [NULL, NULL] + // values: {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX} + // nullmask:{1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0} + // offsets: {0, 1, 4, 7, 9, 9, 10, 12} + // output_dtype: string dtype + // outputs: {"world", "", "ai", "apples", XXX, XXX, XXX} + // output nullmask: {1, 1, 1, 1, 0, 0, 0} + + auto const [input, offsets] = this->input(); + data_type output_dtype{type_id::STRING}; + + strings_column_wrapper expect{{"world", "", "ai", "apples", XXX, XXX, XXX}, + {true, true, true, true, false, false, false}}; + + auto res = segmented_reduce(input, + column_view(offsets), + *make_min_aggregation(), + output_dtype, + null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect); +} + #undef XXX } // namespace test