-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix call to thrust::reduce_by_key in argmin/argmax libcudf groupby #9263
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,77 +31,50 @@ | |
#include <thrust/functional.h> | ||
#include <thrust/iterator/counting_iterator.h> | ||
#include <thrust/iterator/discard_iterator.h> | ||
#include <thrust/iterator/transform_output_iterator.h> | ||
#include <thrust/iterator/zip_iterator.h> | ||
#include <thrust/reduce.h> | ||
|
||
namespace cudf { | ||
namespace groupby { | ||
namespace detail { | ||
|
||
// ArgMin binary operator with tuple of (value, index) | ||
/** | ||
* @brief ArgMin binary operator with index values into input column. | ||
* | ||
* @tparam T Type of the underlying column. Must support '<' operator. | ||
*/ | ||
template <typename T> | ||
struct ArgMin { | ||
CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple<T, size_type> const& lhs, | ||
thrust::tuple<T, size_type> const& rhs) const | ||
{ | ||
if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL) | ||
return rhs; | ||
else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL) | ||
return lhs; | ||
else | ||
return thrust::get<0>(lhs) < thrust::get<0>(rhs) ? lhs : rhs; | ||
} | ||
}; | ||
|
||
// ArgMax binary operator with tuple of (value, index) | ||
template <typename T> | ||
struct ArgMax { | ||
CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple<T, size_type> const& lhs, | ||
thrust::tuple<T, size_type> const& rhs) const | ||
{ | ||
if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL) | ||
return rhs; | ||
else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL) | ||
return lhs; | ||
else | ||
return thrust::get<0>(lhs) > thrust::get<0>(rhs) ? lhs : rhs; | ||
} | ||
}; | ||
|
||
struct get_tuple_second_element { | ||
template <typename T> | ||
__device__ size_type operator()(thrust::tuple<T, size_type> const& rhs) const | ||
column_device_view const d_col; | ||
CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const | ||
{ | ||
return thrust::get<1>(rhs); | ||
// The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and | ||
// github.com/NVIDIA/thrust/issues/1525 | ||
// where invalid random values may be passed here by thrust::reduce_by_key | ||
if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think the bounds checking is necessary seeing as this gets its values from a counting iterator which is always less than the size of For null, I do think it's ok to return the index of a null element. If both are null then either can be returned, the winning idx will later be removed when compared against an idx corresponding to valid value. And if the entire group contains nulls then it'll be nullified in the group mask generation step. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The bounds checking is necessary because the two issues mentioned in the comment above.The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see. So bound check here is only to ensure the line 55 below works correctly, not to ensure the output to be used correctly later (since it is already handled). |
||
if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; } | ||
return d_col.element<T>(lhs) < d_col.element<T>(rhs) ? lhs : rhs; | ||
} | ||
}; | ||
|
||
/** | ||
* @brief Functor to store the boolean value to null mask. | ||
* @brief ArgMax binary operator with index values into input column. | ||
* | ||
* @tparam T Type of the underlying column. Must support '<' operator. | ||
*/ | ||
struct bool_to_nullmask { | ||
mutable_column_device_view d_result; | ||
__device__ void operator()(size_type i, bool rhs) | ||
template <typename T> | ||
struct ArgMax { | ||
column_device_view const d_col; | ||
CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const | ||
{ | ||
if (rhs) { | ||
d_result.set_valid(i); | ||
} else { | ||
d_result.set_null(i); | ||
} | ||
// The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and | ||
// github.com/NVIDIA/thrust/issues/1525 | ||
// where invalid random values may be passed here by thrust::reduce_by_key | ||
if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; } | ||
if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; } | ||
return d_col.element<T>(rhs) < d_col.element<T>(lhs) ? lhs : rhs; | ||
} | ||
}; | ||
|
||
/** | ||
* @brief Returns index for non-null element, and SENTINEL for null element in a column. | ||
* | ||
*/ | ||
struct null_as_sentinel { | ||
column_device_view const col; | ||
size_type const SENTINEL; | ||
__device__ size_type operator()(size_type i) const { return col.is_null(i) ? SENTINEL : i; } | ||
}; | ||
|
||
/** | ||
* @brief Value accessor for column which supports dictionary column too. | ||
* | ||
|
@@ -191,25 +164,16 @@ struct reduce_functor { | |
auto resultview = mutable_column_device_view::create(result->mutable_view(), stream); | ||
auto valuesview = column_device_view::create(values, stream); | ||
if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) { | ||
constexpr auto SENTINEL = | ||
(K == aggregation::ARGMAX ? cudf::detail::ARGMAX_SENTINEL : cudf::detail::ARGMIN_SENTINEL); | ||
auto idx_begin = | ||
cudf::detail::make_counting_transform_iterator(0, null_as_sentinel{*valuesview, SENTINEL}); | ||
// dictionary keys are sorted, so dictionary32 index comparison is enough. | ||
auto column_begin = valuesview->begin<DeviceType>(); | ||
auto begin = thrust::make_zip_iterator(thrust::make_tuple(column_begin, idx_begin)); | ||
auto result_begin = thrust::make_transform_output_iterator(resultview->begin<ResultDType>(), | ||
get_tuple_second_element{}); | ||
using OpType = | ||
std::conditional_t<(K == aggregation::ARGMAX), ArgMax<DeviceType>, ArgMin<DeviceType>>; | ||
thrust::reduce_by_key(rmm::exec_policy(stream), | ||
group_labels.data(), | ||
group_labels.data() + group_labels.size(), | ||
begin, | ||
thrust::make_counting_iterator<ResultType>(0), | ||
thrust::make_discard_iterator(), | ||
result_begin, | ||
thrust::equal_to<size_type>{}, | ||
OpType{}); | ||
resultview->begin<ResultType>(), | ||
thrust::equal_to<ResultType>{}, | ||
OpType{*valuesview}); | ||
} else { | ||
auto init = OpType::template identity<DeviceType>(); | ||
auto begin = cudf::detail::make_counting_transform_iterator( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there ever valid random values?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It could happen. The data could be randomly valid. Either way, the
thrust::reduce_by_key
ignores the result -- it is just trying to fill a block/warp with minimal divergence.