Skip to content

Commit

Permalink
Support Segmented Min/Max Reduction on String Type (#10447)
Browse files Browse the repository at this point in the history
This PR adds `min/max` segmented reduction to string type.

Part of #10417

Authors:
  - Michael Wang (https://github.com/isVoid)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Bradley Dice (https://github.com/bdice)

URL: #10447
  • Loading branch information
isVoid authored Apr 29, 2022
1 parent 9b8d26f commit 9112907
Show file tree
Hide file tree
Showing 5 changed files with 331 additions and 50 deletions.
27 changes: 14 additions & 13 deletions cpp/include/cudf/detail/reduction.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -227,36 +227,36 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
* @brief Compute the specified simple reduction over each of the segments in the
* input range of elements.
*
* @tparam Op the reduction operator with device binary operator
* @tparam InputIterator the input column iterator
* @tparam OffsetIterator the offset column iterator
* @tparam BinaryOp the device binary operator used to reduce
* @tparam OutputType the output type of reduction
*
* @param[in] d_in the begin iterator to input
* @param[in] d_offset the begin iterator to offset
* @param[in] num_segments the number of segments
* @param[in] sop the reduction operator
* @param[in] binary_op the reduction operator
* @param[in] identity the identity element of the reduction operator
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
* @param[in] mr Device memory resource used to allocate the returned column's device
* memory
* @returns Output column in device memory
*
*/
template <typename Op,
typename InputIterator,
template <typename InputIterator,
typename OffsetIterator,
typename BinaryOp,
typename OutputType = typename thrust::iterator_value<InputIterator>::type,
typename std::enable_if_t<is_fixed_width<OutputType>() &&
not cudf::is_fixed_point<OutputType>()>* = nullptr>
!cudf::is_fixed_point<OutputType>()>* = nullptr>
std::unique_ptr<column> segmented_reduce(InputIterator d_in,
OffsetIterator d_offset,
cudf::size_type num_segments,
op::simple_op<Op> sop,
BinaryOp binary_op,
OutputType identity,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto binary_op = sop.get_binary_op();
auto identity = sop.template get_identity<OutputType>();
auto dev_result = make_fixed_width_column(
data_type{type_to_id<OutputType>()}, num_segments, mask_state::UNALLOCATED, stream, mr);
auto dev_result_mview = dev_result->mutable_view();
Expand Down Expand Up @@ -291,16 +291,17 @@ std::unique_ptr<column> segmented_reduce(InputIterator d_in,
return dev_result;
}

template <typename Op,
typename InputIterator,
template <typename InputIterator,
typename OffsetIterator,
typename BinaryOp,
typename OutputType = typename thrust::iterator_value<InputIterator>::type,
typename std::enable_if_t<not is_fixed_width<OutputType>() ||
is_fixed_point<OutputType>()>* = nullptr>
typename std::enable_if_t<!(is_fixed_width<OutputType>() &&
!cudf::is_fixed_point<OutputType>())>* = nullptr>
std::unique_ptr<column> segmented_reduce(InputIterator,
OffsetIterator,
cudf::size_type,
op::simple_op<Op>,
BinaryOp,
OutputType,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*)
{
Expand Down
61 changes: 61 additions & 0 deletions cpp/include/cudf/detail/utilities/element_argminmax.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/column/column_device_view.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>

#include <type_traits>

namespace cudf {
namespace detail {

/**
* @brief Binary `argmin`/`argmax` operator
*
* @tparam T Type of the underlying column. Must support '<' operator.
*/
template <typename T>
struct element_argminmax_fn {
column_device_view const d_col;
bool const has_nulls;
bool const arg_min;

__device__ inline auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const
{
// The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and
// github.com/NVIDIA/thrust/issues/1525
// where invalid random values may be passed here by thrust::reduce_by_key
auto out_of_bound_or_null = [this] __device__(size_type const& idx) {
return idx < 0 || idx >= this->d_col.size() ||
(this->has_nulls && this->d_col.is_null_nocheck(idx));
};
if (out_of_bound_or_null(lhs_idx)) { return rhs_idx; }
if (out_of_bound_or_null(rhs_idx)) { return lhs_idx; }

// Return `lhs_idx` iff:
// row(lhs_idx) < row(rhs_idx) and finding ArgMin, or
// row(lhs_idx) >= row(rhs_idx) and finding ArgMax.
auto const less = d_col.element<T>(lhs_idx) < d_col.element<T>(rhs_idx);
return less == arg_min ? lhs_idx : rhs_idx;
}
};

} // namespace detail
} // namespace cudf
36 changes: 3 additions & 33 deletions cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/column/column_view.hpp>
#include <cudf/detail/aggregation/aggregation.cuh>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/utilities/element_argminmax.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/table/row_operators.cuh>
#include <cudf/types.hpp>
Expand All @@ -40,37 +41,6 @@ namespace cudf {
namespace groupby {
namespace detail {

/**
* @brief Binary operator with index values into the input column.
*
* @tparam T Type of the underlying column. Must support '<' operator.
*/
template <typename T>
struct element_arg_minmax_fn {
column_device_view const d_col;
bool const has_nulls;
bool const arg_min;

__device__ inline auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const
{
// The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and
// github.com/NVIDIA/thrust/issues/1525
// where invalid random values may be passed here by thrust::reduce_by_key
if (lhs_idx < 0 || lhs_idx >= d_col.size() || (has_nulls && d_col.is_null_nocheck(lhs_idx))) {
return rhs_idx;
}
if (rhs_idx < 0 || rhs_idx >= d_col.size() || (has_nulls && d_col.is_null_nocheck(rhs_idx))) {
return lhs_idx;
}

// Return `lhs_idx` iff:
// row(lhs_idx) < row(rhs_idx) and finding ArgMin, or
// row(lhs_idx) >= row(rhs_idx) and finding ArgMax.
auto const less = d_col.element<T>(lhs_idx) < d_col.element<T>(rhs_idx);
return less == arg_min ? lhs_idx : rhs_idx;
}
};

/**
* @brief Value accessor for column which supports dictionary column too.
*
Expand Down Expand Up @@ -211,8 +181,8 @@ struct group_reduction_functor<K, T, std::enable_if_t<is_group_reduction_support

if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) {
auto const count_iter = thrust::make_counting_iterator<ResultType>(0);
auto const binop =
element_arg_minmax_fn<T>{*d_values_ptr, values.has_nulls(), K == aggregation::ARGMIN};
auto const binop = cudf::detail::element_argminmax_fn<T>{
*d_values_ptr, values.has_nulls(), K == aggregation::ARGMIN};
do_reduction(count_iter, result_begin, binop);
} else {
using OpType = cudf::detail::corresponding_operator_t<K>;
Expand Down
136 changes: 132 additions & 4 deletions cpp/src/reductions/simple_segmented.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@

#pragma once

#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/null_mask.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/reduction.cuh>
#include <cudf/detail/unary.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/element_argminmax.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/types.hpp>
Expand All @@ -31,9 +34,12 @@

#include <rmm/cuda_stream_view.hpp>

#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>

#include <type_traits>

namespace cudf {
namespace reduction {
namespace simple {
Expand Down Expand Up @@ -70,18 +76,21 @@ std::unique_ptr<column> simple_segmented_reduction(column_view const& col,
auto simple_op = Op{};
size_type num_segments = offsets.size() - 1;

auto binary_op = simple_op.get_binary_op();
auto identity = simple_op.template get_identity<ResultType>();

// TODO: Explore rewriting null_replacing_element_transformer/element_transformer with nullate
auto result = [&] {
if (col.has_nulls()) {
auto f = simple_op.template get_null_replacing_element_transformer<ResultType>();
auto it = thrust::make_transform_iterator(dcol->pair_begin<InputType, true>(), f);
return cudf::reduction::detail::segmented_reduce(
it, offsets.begin(), num_segments, simple_op, stream, mr);
it, offsets.begin(), num_segments, binary_op, identity, stream, mr);
} else {
auto f = simple_op.template get_element_transformer<ResultType>();
auto it = thrust::make_transform_iterator(dcol->begin<InputType>(), f);
return cudf::reduction::detail::segmented_reduce(
it, offsets.begin(), num_segments, simple_op, stream, mr);
it, offsets.begin(), num_segments, binary_op, identity, stream, mr);
}
}();

Expand All @@ -103,6 +112,112 @@ std::unique_ptr<column> simple_segmented_reduction(column_view const& col,
return result;
}

/**
* @brief String segmented reduction for 'min', 'max'.
*
* This algorithm uses argmin/argmax as a custom comparator to build a gather
* map, then builds the output.
*
* @tparam InputType the input column data-type
* @tparam Op the operator of cudf::reduction::op::
* @param col Input column of data to reduce.
* @param offsets Indices to segment boundaries.
* @param null_handling If `null_policy::INCLUDE`, all elements in a segment
* must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`,
* the reduced value is valid if any element in the segment is valid.
* @param stream Used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Output column in device memory
*/

template <typename InputType,
typename Op,
CUDF_ENABLE_IF(std::is_same_v<Op, cudf::reduction::op::min> ||
std::is_same_v<Op, cudf::reduction::op::max>)>
std::unique_ptr<column> string_segmented_reduction(column_view const& col,
device_span<size_type const> offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// Pass to simple_segmented_reduction, get indices to gather, perform gather here.
auto device_col = cudf::column_device_view::create(col, stream);

auto it = thrust::make_counting_iterator(0);
auto const num_segments = static_cast<size_type>(offsets.size()) - 1;

bool constexpr is_argmin = std::is_same_v<Op, cudf::reduction::op::min>;
auto string_comparator =
cudf::detail::element_argminmax_fn<InputType>{*device_col, col.has_nulls(), is_argmin};
auto constexpr identity =
is_argmin ? cudf::detail::ARGMIN_SENTINEL : cudf::detail::ARGMAX_SENTINEL;

auto gather_map =
cudf::reduction::detail::segmented_reduce(it,
offsets.begin(),
num_segments,
string_comparator,
identity,
stream,
rmm::mr::get_current_device_resource());
auto result = std::move(cudf::detail::gather(table_view{{col}},
*gather_map,
cudf::out_of_bounds_policy::NULLIFY,
cudf::detail::negative_index_policy::NOT_ALLOWED,
stream,
mr)
->release()[0]);
auto const [segmented_null_mask, segmented_null_count] =
cudf::detail::segmented_null_mask_reduction(col.null_mask(),
offsets.begin(),
offsets.end() - 1,
offsets.begin() + 1,
null_handling,
stream,
mr);

// If the segmented null mask contains any null values, the segmented null mask
// must be combined with the result null mask.
if (segmented_null_count > 0) {
if (result->null_count() == 0) {
// The result has no nulls. Use the segmented null mask.
result->set_null_mask(segmented_null_mask, segmented_null_count, stream);
} else {
// Compute the logical AND of the segmented output null mask and the
// result null mask to update the result null mask and null count.
auto result_mview = result->mutable_view();
std::vector masks{static_cast<bitmask_type const*>(result_mview.null_mask()),
static_cast<bitmask_type const*>(segmented_null_mask.data())};
std::vector<size_type> begin_bits{0, 0};
auto const valid_count = cudf::detail::inplace_bitmask_and(
device_span<bitmask_type>(static_cast<bitmask_type*>(result_mview.null_mask()),
num_bitmask_words(result->size())),
masks,
begin_bits,
result->size(),
stream,
mr);
result->set_null_count(result->size() - valid_count);
}
}

return result;
}

template <typename InputType,
typename Op,
CUDF_ENABLE_IF(!std::is_same_v<Op, cudf::reduction::op::min>() &&
!std::is_same_v<Op, cudf::reduction::op::max>())>
std::unique_ptr<column> string_segmented_reduction(column_view const& col,
device_span<size_type const> offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FAIL("Segmented reduction on string column only supports min and max reduction.");
}

/**
* @brief Call reduce and return a column of type bool.
*
Expand Down Expand Up @@ -153,7 +268,9 @@ struct same_column_type_dispatcher {
}

public:
template <typename ElementType, std::enable_if_t<is_supported<ElementType>()>* = nullptr>
template <typename ElementType,
CUDF_ENABLE_IF(is_supported<ElementType>() &&
!std::is_same_v<ElementType, string_view>)>
std::unique_ptr<column> operator()(column_view const& col,
device_span<size_type const> offsets,
null_policy null_handling,
Expand All @@ -164,7 +281,18 @@ struct same_column_type_dispatcher {
col, offsets, null_handling, stream, mr);
}

template <typename ElementType, std::enable_if_t<not is_supported<ElementType>()>* = nullptr>
template <typename ElementType,
CUDF_ENABLE_IF(is_supported<ElementType>() && std::is_same_v<ElementType, string_view>)>
std::unique_ptr<column> operator()(column_view const& col,
device_span<size_type const> offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return string_segmented_reduction<ElementType, Op>(col, offsets, null_handling, stream, mr);
}

template <typename ElementType, CUDF_ENABLE_IF(!is_supported<ElementType>())>
std::unique_ptr<column> operator()(column_view const&,
device_span<size_type const>,
null_policy,
Expand Down
Loading

0 comments on commit 9112907

Please sign in to comment.