Skip to content

Commit

Permalink
Accept const refs instead of const unique_ptr refs in reduce and scan…
Browse files Browse the repository at this point in the history
… APIs. (#11960)

There is almost never a good reason to pass arguments as `unique_ptr<T> const&`. Since those arguments cannot be modified, the only use case is accessing the underlying pointer, at which point the function better communicates its intent by accepting the underlying pointer/reference as an argument instead and is also more flexible as a result.

Resolves #10393

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: #11960
  • Loading branch information
vyasr authored Oct 21, 2022
1 parent dec8bde commit 9c06330
Show file tree
Hide file tree
Showing 21 changed files with 478 additions and 471 deletions.
2 changes: 1 addition & 1 deletion cpp/benchmarks/reduction/anyall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void BM_reduction_anyall(benchmark::State& state,

for (auto _ : state) {
cuda_event_timer timer(state, true);
auto result = cudf::reduce(*values, agg, output_dtype);
auto result = cudf::reduce(*values, *agg, output_dtype);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/reduction/dictionary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void BM_reduction_dictionary(benchmark::State& state,

for (auto _ : state) {
cuda_event_timer timer(state, true);
auto result = cudf::reduce(*values, agg, output_dtype);
auto result = cudf::reduce(*values, *agg, output_dtype);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/reduction/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void BM_reduction(benchmark::State& state, std::unique_ptr<cudf::reduce_aggregat

for (auto _ : state) {
cuda_event_timer timer(state, true);
auto result = cudf::reduce(*input_column, agg, output_dtype);
auto result = cudf::reduce(*input_column, *agg, output_dtype);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/reduction/scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ static void BM_reduction_scan(benchmark::State& state, bool include_nulls)
for (auto _ : state) {
cuda_event_timer timer(state, true);
auto result = cudf::scan(
*column, cudf::make_min_aggregation<cudf::scan_aggregation>(), cudf::scan_type::INCLUSIVE);
*column, *cudf::make_min_aggregation<cudf::scan_aggregation>(), cudf::scan_type::INCLUSIVE);
}
}

Expand Down
8 changes: 4 additions & 4 deletions cpp/include/cudf/detail/scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace detail {
* `agg` is not Min or Max.
*
* @param input The input column view for the scan.
* @param agg unique_ptr to aggregation operator applied by the scan.
* @param agg Aggregation operator applied by the scan
* @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE.
* Include nulls if null_policy::INCLUDE. Any operation with a null results in
* a null.
Expand All @@ -47,7 +47,7 @@ namespace detail {
* @returns Column with scan results.
*/
std::unique_ptr<column> scan_exclusive(column_view const& input,
std::unique_ptr<scan_aggregation> const& agg,
scan_aggregation const& agg,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
Expand All @@ -64,7 +64,7 @@ std::unique_ptr<column> scan_exclusive(column_view const& input,
* but the `agg` is not Min or Max.
*
* @param input The input column view for the scan.
* @param agg unique_ptr to aggregation operator applied by the scan.
* @param agg Aggregation operator applied by the scan
* @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE.
* Include nulls if null_policy::INCLUDE. Any operation with a null results in
* a null.
Expand All @@ -73,7 +73,7 @@ std::unique_ptr<column> scan_exclusive(column_view const& input,
* @returns Column with scan results.
*/
std::unique_ptr<column> scan_inclusive(column_view const& input,
std::unique_ptr<scan_aggregation> const& agg,
scan_aggregation const& agg,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
*/
std::unique_ptr<scalar> reduce(
column_view const& col,
std::unique_ptr<reduce_aggregation> const& agg,
reduce_aggregation const& agg,
data_type output_dtype,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand All @@ -89,7 +89,7 @@ std::unique_ptr<scalar> reduce(
*/
std::unique_ptr<scalar> reduce(
column_view const& col,
std::unique_ptr<reduce_aggregation> const& agg,
reduce_aggregation const& agg,
data_type output_dtype,
std::optional<std::reference_wrapper<scalar const>> init,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
Expand Down Expand Up @@ -187,7 +187,7 @@ std::unique_ptr<column> segmented_reduce(
*/
std::unique_ptr<column> scan(
const column_view& input,
std::unique_ptr<scan_aggregation> const& agg,
scan_aggregation const& agg,
scan_type inclusive,
null_policy null_handling = null_policy::EXCLUDE,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
Expand Down
63 changes: 31 additions & 32 deletions cpp/src/reductions/reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct reduce_dispatch_functor {
}

template <aggregation::Kind k>
std::unique_ptr<scalar> operator()(std::unique_ptr<reduce_aggregation> const& agg)
std::unique_ptr<scalar> operator()(reduce_aggregation const& agg)
{
switch (k) {
case aggregation::SUM: return reduction::sum(col, output_dtype, init, stream, mr);
Expand All @@ -62,12 +62,12 @@ struct reduce_dispatch_functor {
return reduction::sum_of_squares(col, output_dtype, stream, mr);
case aggregation::MEAN: return reduction::mean(col, output_dtype, stream, mr);
case aggregation::VARIANCE: {
auto var_agg = dynamic_cast<var_aggregation const*>(agg.get());
return reduction::variance(col, output_dtype, var_agg->_ddof, stream, mr);
auto var_agg = static_cast<var_aggregation const&>(agg);
return reduction::variance(col, output_dtype, var_agg._ddof, stream, mr);
}
case aggregation::STD: {
auto var_agg = dynamic_cast<std_aggregation const*>(agg.get());
return reduction::standard_deviation(col, output_dtype, var_agg->_ddof, stream, mr);
auto var_agg = static_cast<std_aggregation const&>(agg);
return reduction::standard_deviation(col, output_dtype, var_agg._ddof, stream, mr);
}
case aggregation::MEDIAN: {
auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream);
Expand All @@ -78,60 +78,59 @@ struct reduce_dispatch_functor {
return get_element(*col_ptr, 0, stream, mr);
}
case aggregation::QUANTILE: {
auto quantile_agg = dynamic_cast<quantile_aggregation const*>(agg.get());
CUDF_EXPECTS(quantile_agg->_quantiles.size() == 1,
auto quantile_agg = static_cast<quantile_aggregation const&>(agg);
CUDF_EXPECTS(quantile_agg._quantiles.size() == 1,
"Reduction quantile accepts only one quantile value");
auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream);
auto valid_sorted_indices =
split(*sorted_indices, {col.size() - col.null_count()}, stream)[0];

auto col_ptr = quantile(col,
quantile_agg->_quantiles,
quantile_agg->_interpolation,
quantile_agg._quantiles,
quantile_agg._interpolation,
valid_sorted_indices,
true,
stream);
return get_element(*col_ptr, 0, stream, mr);
}
case aggregation::NUNIQUE: {
auto nunique_agg = dynamic_cast<nunique_aggregation const*>(agg.get());
auto nunique_agg = static_cast<nunique_aggregation const&>(agg);
return make_fixed_width_scalar(
detail::distinct_count(
col, nunique_agg->_null_handling, nan_policy::NAN_IS_VALID, stream),
detail::distinct_count(col, nunique_agg._null_handling, nan_policy::NAN_IS_VALID, stream),
stream,
mr);
}
case aggregation::NTH_ELEMENT: {
auto nth_agg = dynamic_cast<nth_element_aggregation const*>(agg.get());
return reduction::nth_element(col, nth_agg->_n, nth_agg->_null_handling, stream, mr);
auto nth_agg = static_cast<nth_element_aggregation const&>(agg);
return reduction::nth_element(col, nth_agg._n, nth_agg._null_handling, stream, mr);
}
case aggregation::COLLECT_LIST: {
auto col_agg = dynamic_cast<collect_list_aggregation const*>(agg.get());
return reduction::collect_list(col, col_agg->_null_handling, stream, mr);
auto col_agg = static_cast<collect_list_aggregation const&>(agg);
return reduction::collect_list(col, col_agg._null_handling, stream, mr);
}
case aggregation::COLLECT_SET: {
auto col_agg = dynamic_cast<collect_set_aggregation const*>(agg.get());
auto col_agg = static_cast<collect_set_aggregation const&>(agg);
return reduction::collect_set(
col, col_agg->_null_handling, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr);
col, col_agg._null_handling, col_agg._nulls_equal, col_agg._nans_equal, stream, mr);
}
case aggregation::MERGE_LISTS: {
return reduction::merge_lists(col, stream, mr);
}
case aggregation::MERGE_SETS: {
auto col_agg = dynamic_cast<merge_sets_aggregation const*>(agg.get());
return reduction::merge_sets(col, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr);
auto col_agg = static_cast<merge_sets_aggregation const&>(agg);
return reduction::merge_sets(col, col_agg._nulls_equal, col_agg._nans_equal, stream, mr);
}
case aggregation::TDIGEST: {
CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
"Tdigest aggregations expect output type to be STRUCT");
auto td_agg = dynamic_cast<tdigest_aggregation const*>(agg.get());
return detail::tdigest::reduce_tdigest(col, td_agg->max_centroids, stream, mr);
auto td_agg = static_cast<tdigest_aggregation const&>(agg);
return detail::tdigest::reduce_tdigest(col, td_agg.max_centroids, stream, mr);
}
case aggregation::MERGE_TDIGEST: {
CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
"Tdigest aggregations expect output type to be STRUCT");
auto td_agg = dynamic_cast<merge_tdigest_aggregation const*>(agg.get());
return detail::tdigest::reduce_merge_tdigest(col, td_agg->max_centroids, stream, mr);
auto td_agg = static_cast<merge_tdigest_aggregation const&>(agg);
return detail::tdigest::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr);
}
default: CUDF_FAIL("Unsupported reduction operator");
}
Expand All @@ -140,24 +139,24 @@ struct reduce_dispatch_functor {

std::unique_ptr<scalar> reduce(
column_view const& col,
std::unique_ptr<reduce_aggregation> const& agg,
reduce_aggregation const& agg,
data_type output_dtype,
std::optional<std::reference_wrapper<scalar const>> init,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(),
"column and initial value must be the same type");
if (init.has_value() && !(agg->kind == aggregation::SUM || agg->kind == aggregation::PRODUCT ||
agg->kind == aggregation::MIN || agg->kind == aggregation::MAX ||
agg->kind == aggregation::ANY || agg->kind == aggregation::ALL)) {
if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT ||
agg.kind == aggregation::MIN || agg.kind == aggregation::MAX ||
agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) {
CUDF_FAIL(
"Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types");
}
// Returns default scalar if input column is non-valid. In terms of nested columns, we need to
// handcraft the default scalar with input column.
if (col.size() <= col.null_count()) {
if (agg->kind == aggregation::TDIGEST || agg->kind == aggregation::MERGE_TDIGEST) {
if (agg.kind == aggregation::TDIGEST || agg.kind == aggregation::MERGE_TDIGEST) {
return detail::tdigest::make_empty_tdigest_scalar();
}
if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) {
Expand All @@ -176,12 +175,12 @@ std::unique_ptr<scalar> reduce(
}

return aggregation_dispatcher(
agg->kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg);
agg.kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg);
}
} // namespace detail

std::unique_ptr<scalar> reduce(column_view const& col,
std::unique_ptr<reduce_aggregation> const& agg,
reduce_aggregation const& agg,
data_type output_dtype,
rmm::mr::device_memory_resource* mr)
{
Expand All @@ -190,7 +189,7 @@ std::unique_ptr<scalar> reduce(column_view const& col,
}

std::unique_ptr<scalar> reduce(column_view const& col,
std::unique_ptr<reduce_aggregation> const& agg,
reduce_aggregation const& agg,
data_type output_dtype,
std::optional<std::reference_wrapper<scalar const>> init,
rmm::mr::device_memory_resource* mr)
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/reductions/scan/scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ namespace cudf {

namespace detail {
std::unique_ptr<column> scan(column_view const& input,
std::unique_ptr<scan_aggregation> const& agg,
scan_aggregation const& agg,
scan_type inclusive,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (agg->kind == aggregation::RANK) {
if (agg.kind == aggregation::RANK) {
CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE,
"Rank aggregation operator requires an inclusive scan");
auto const& rank_agg = dynamic_cast<cudf::detail::rank_aggregation const&>(*agg);
auto const& rank_agg = static_cast<cudf::detail::rank_aggregation const&>(agg);
if (rank_agg._method == rank_method::MIN) {
if (rank_agg._percentage == rank_percentage::NONE) {
return inclusive_rank_scan(input, stream, mr);
Expand All @@ -55,7 +55,7 @@ std::unique_ptr<column> scan(column_view const& input,
} // namespace detail

std::unique_ptr<column> scan(column_view const& input,
std::unique_ptr<scan_aggregation> const& agg,
scan_aggregation const& agg,
scan_type inclusive,
null_policy null_handling,
rmm::mr::device_memory_resource* mr)
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/reductions/scan/scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ rmm::device_buffer mask_scan(column_view const& input_view,

template <template <typename> typename DispatchFn>
std::unique_ptr<column> scan_agg_dispatch(const column_view& input,
std::unique_ptr<scan_aggregation> const& agg,
scan_aggregation const& agg,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
switch (agg->kind) {
switch (agg.kind) {
case aggregation::SUM:
return type_dispatcher<dispatch_storage_type>(
input.type(), DispatchFn<DeviceSum>(), input, null_handling, stream, mr);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reductions/scan/scan_exclusive.cu
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ struct scan_dispatcher {
} // namespace

std::unique_ptr<column> scan_exclusive(const column_view& input,
std::unique_ptr<scan_aggregation> const& agg,
scan_aggregation const& agg,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reductions/scan/scan_inclusive.cu
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ struct scan_dispatcher {

std::unique_ptr<column> scan_inclusive(
column_view const& input,
std::unique_ptr<scan_aggregation> const& agg,
scan_aggregation const& agg,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/quantiles/percentile_approx_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ void percentile_approx_test(column_view const& _keys,
// result is a scalar, but we want to extract out the underlying column
auto scalar_result =
cudf::reduce(values,
cudf::make_tdigest_aggregation<cudf::reduce_aggregation>(delta),
*cudf::make_tdigest_aggregation<cudf::reduce_aggregation>(delta),
data_type{type_id::STRUCT});
auto tbl = static_cast<cudf::struct_scalar const*>(scalar_result.get())->view();
std::vector<std::unique_ptr<cudf::column>> cols;
Expand Down
Loading

0 comments on commit 9c06330

Please sign in to comment.