Skip to content

Commit

Permalink
Fix call to thrust::reduce_by_key in argmin/argmax libcudf groupby (#…
Browse files Browse the repository at this point in the history
…9263)

Closes #9156

This PR simplifies the parameters when calling thrust::reduce_by_key for the argmin/argmax aggregations in cudf::groupby. The illegalMemoryAccess found in #9156 was due to invalid data being passed from thrust::reduce_by_key through to the BinaryPredicate function as documented in NVIDIA/thrust#1525

The invalid data being passed is only a real issue for strings columns where the device pointer was neither nullptr nor a valid address. The new logic provides only size_type values to thrust::reduce_by_key so invalid values can only be out-of-bounds for the input column which is easily checked before retrieving the string_view objects within the ArgMin and ArgMax operators.

This the same as #9244 but based on 21.10

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Devavret Makkar (https://github.com/devavret)
  - Nghia Truong (https://github.com/ttnghia)
  - Robert Maynard (https://github.com/robertmaynard)

URL: #9263
  • Loading branch information
davidwendt authored Sep 22, 2021
1 parent ba2cbd9 commit 9da7c01
Showing 1 changed file with 30 additions and 66 deletions.
96 changes: 30 additions & 66 deletions cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,77 +31,50 @@
#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/transform_output_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/reduce.h>

namespace cudf {
namespace groupby {
namespace detail {

// ArgMin binary operator with tuple of (value, index)
/**
* @brief ArgMin binary operator with index values into input column.
*
* @tparam T Type of the underlying column. Must support '<' operator.
*/
template <typename T>
struct ArgMin {
CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple<T, size_type> const& lhs,
thrust::tuple<T, size_type> const& rhs) const
{
if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL)
return rhs;
else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL)
return lhs;
else
return thrust::get<0>(lhs) < thrust::get<0>(rhs) ? lhs : rhs;
}
};

// ArgMax binary operator with tuple of (value, index)
template <typename T>
struct ArgMax {
CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple<T, size_type> const& lhs,
thrust::tuple<T, size_type> const& rhs) const
{
if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL)
return rhs;
else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL)
return lhs;
else
return thrust::get<0>(lhs) > thrust::get<0>(rhs) ? lhs : rhs;
}
};

struct get_tuple_second_element {
template <typename T>
__device__ size_type operator()(thrust::tuple<T, size_type> const& rhs) const
column_device_view const d_col;
CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const
{
return thrust::get<1>(rhs);
// The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and
// github.com/NVIDIA/thrust/issues/1525
// where invalid random values may be passed here by thrust::reduce_by_key
if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; }
if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; }
return d_col.element<T>(lhs) < d_col.element<T>(rhs) ? lhs : rhs;
}
};

/**
* @brief Functor to store the boolean value to null mask.
* @brief ArgMax binary operator with index values into input column.
*
* @tparam T Type of the underlying column. Must support '<' operator.
*/
struct bool_to_nullmask {
mutable_column_device_view d_result;
__device__ void operator()(size_type i, bool rhs)
template <typename T>
struct ArgMax {
column_device_view const d_col;
CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const
{
if (rhs) {
d_result.set_valid(i);
} else {
d_result.set_null(i);
}
// The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and
// github.com/NVIDIA/thrust/issues/1525
// where invalid random values may be passed here by thrust::reduce_by_key
if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; }
if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; }
return d_col.element<T>(rhs) < d_col.element<T>(lhs) ? lhs : rhs;
}
};

/**
* @brief Returns index for non-null element, and SENTINEL for null element in a column.
*
*/
struct null_as_sentinel {
column_device_view const col;
size_type const SENTINEL;
__device__ size_type operator()(size_type i) const { return col.is_null(i) ? SENTINEL : i; }
};

/**
* @brief Value accessor for column which supports dictionary column too.
*
Expand Down Expand Up @@ -191,25 +164,16 @@ struct reduce_functor {
auto resultview = mutable_column_device_view::create(result->mutable_view(), stream);
auto valuesview = column_device_view::create(values, stream);
if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) {
constexpr auto SENTINEL =
(K == aggregation::ARGMAX ? cudf::detail::ARGMAX_SENTINEL : cudf::detail::ARGMIN_SENTINEL);
auto idx_begin =
cudf::detail::make_counting_transform_iterator(0, null_as_sentinel{*valuesview, SENTINEL});
// dictionary keys are sorted, so dictionary32 index comparison is enough.
auto column_begin = valuesview->begin<DeviceType>();
auto begin = thrust::make_zip_iterator(thrust::make_tuple(column_begin, idx_begin));
auto result_begin = thrust::make_transform_output_iterator(resultview->begin<ResultDType>(),
get_tuple_second_element{});
using OpType =
std::conditional_t<(K == aggregation::ARGMAX), ArgMax<DeviceType>, ArgMin<DeviceType>>;
thrust::reduce_by_key(rmm::exec_policy(stream),
group_labels.data(),
group_labels.data() + group_labels.size(),
begin,
thrust::make_counting_iterator<ResultType>(0),
thrust::make_discard_iterator(),
result_begin,
thrust::equal_to<size_type>{},
OpType{});
resultview->begin<ResultType>(),
thrust::equal_to<ResultType>{},
OpType{*valuesview});
} else {
auto init = OpType::template identity<DeviceType>();
auto begin = cudf::detail::make_counting_transform_iterator(
Expand Down

0 comments on commit 9da7c01

Please sign in to comment.