From 9da7c01bf394243ae37319277e83a8edda3b4c70 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 22 Sep 2021 12:37:56 -0400 Subject: [PATCH] Fix call to thrust::reduce_by_key in argmin/argmax libcudf groupby (#9263) Closes #9156 This PR simplifies the parameters when calling thrust::reduce_by_key for the argmin/argmax aggregations in cudf::groupby. The illegalMemoryAccess found in #9156 was due to invalid data being passed from thrust::reduce_by_key through to the BinaryPredicate function as documented in NVIDIA/thrust#1525 The invalid data being passed is only a real issue for strings columns where the device pointer was neither nullptr nor a valid address. The new logic provides only size_type values to thrust::reduce_by_key so invalid values can only be out-of-bounds for the input column which is easily checked before retrieving the string_view objects within the ArgMin and ArgMax operators. This the same as #9244 but based on 21.10 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Devavret Makkar (https://github.com/devavret) - Nghia Truong (https://github.com/ttnghia) - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/cudf/pull/9263 --- .../sort/group_single_pass_reduction_util.cuh | 96 ++++++------------- 1 file changed, 30 insertions(+), 66 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 8eccadd653e..db2ae5b5d8e 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -31,77 +31,50 @@ #include #include #include -#include -#include #include namespace cudf { namespace groupby { namespace detail { -// ArgMin binary operator with tuple of (value, index) +/** + * @brief ArgMin binary operator with index values into input column. + * + * @tparam T Type of the underlying column. Must support '<' operator. + */ template struct ArgMin { - CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple const& lhs, - thrust::tuple const& rhs) const - { - if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL) - return rhs; - else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL) - return lhs; - else - return thrust::get<0>(lhs) < thrust::get<0>(rhs) ? lhs : rhs; - } -}; - -// ArgMax binary operator with tuple of (value, index) -template -struct ArgMax { - CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple const& lhs, - thrust::tuple const& rhs) const - { - if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL) - return rhs; - else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL) - return lhs; - else - return thrust::get<0>(lhs) > thrust::get<0>(rhs) ? lhs : rhs; - } -}; - -struct get_tuple_second_element { - template - __device__ size_type operator()(thrust::tuple const& rhs) const + column_device_view const d_col; + CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const { - return thrust::get<1>(rhs); + // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and + // github.com/NVIDIA/thrust/issues/1525 + // where invalid random values may be passed here by thrust::reduce_by_key + if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; } + if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; } + return d_col.element(lhs) < d_col.element(rhs) ? lhs : rhs; } }; /** - * @brief Functor to store the boolean value to null mask. + * @brief ArgMax binary operator with index values into input column. + * + * @tparam T Type of the underlying column. Must support '<' operator. */ -struct bool_to_nullmask { - mutable_column_device_view d_result; - __device__ void operator()(size_type i, bool rhs) +template +struct ArgMax { + column_device_view const d_col; + CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const { - if (rhs) { - d_result.set_valid(i); - } else { - d_result.set_null(i); - } + // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and + // github.com/NVIDIA/thrust/issues/1525 + // where invalid random values may be passed here by thrust::reduce_by_key + if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; } + if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; } + return d_col.element(rhs) < d_col.element(lhs) ? lhs : rhs; } }; -/** - * @brief Returns index for non-null element, and SENTINEL for null element in a column. - * - */ -struct null_as_sentinel { - column_device_view const col; - size_type const SENTINEL; - __device__ size_type operator()(size_type i) const { return col.is_null(i) ? SENTINEL : i; } -}; - /** * @brief Value accessor for column which supports dictionary column too. * @@ -191,25 +164,16 @@ struct reduce_functor { auto resultview = mutable_column_device_view::create(result->mutable_view(), stream); auto valuesview = column_device_view::create(values, stream); if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) { - constexpr auto SENTINEL = - (K == aggregation::ARGMAX ? cudf::detail::ARGMAX_SENTINEL : cudf::detail::ARGMIN_SENTINEL); - auto idx_begin = - cudf::detail::make_counting_transform_iterator(0, null_as_sentinel{*valuesview, SENTINEL}); - // dictionary keys are sorted, so dictionary32 index comparison is enough. - auto column_begin = valuesview->begin(); - auto begin = thrust::make_zip_iterator(thrust::make_tuple(column_begin, idx_begin)); - auto result_begin = thrust::make_transform_output_iterator(resultview->begin(), - get_tuple_second_element{}); using OpType = std::conditional_t<(K == aggregation::ARGMAX), ArgMax, ArgMin>; thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.data(), group_labels.data() + group_labels.size(), - begin, + thrust::make_counting_iterator(0), thrust::make_discard_iterator(), - result_begin, - thrust::equal_to{}, - OpType{}); + resultview->begin(), + thrust::equal_to{}, + OpType{*valuesview}); } else { auto init = OpType::template identity(); auto begin = cudf::detail::make_counting_transform_iterator(