From 9c06330363db4da99803a3728b8bf44f9829f0b9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Oct 2022 08:23:14 -0700 Subject: [PATCH] Accept const refs instead of const unique_ptr refs in reduce and scan APIs. (#11960) There is almost never a good reason to pass arguments as `unique_ptr const&`. Since those arguments cannot be modified, the only use case is accessing the underlying pointer, at which point the function better communicates its intent by accepting the underlying pointer/reference as an argument instead and is also more flexible as a result. Resolves #10393 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11960 --- cpp/benchmarks/reduction/anyall.cpp | 2 +- cpp/benchmarks/reduction/dictionary.cpp | 2 +- cpp/benchmarks/reduction/reduce.cpp | 2 +- cpp/benchmarks/reduction/scan.cpp | 2 +- cpp/include/cudf/detail/scan.hpp | 8 +- cpp/include/cudf/reduction.hpp | 6 +- cpp/src/reductions/reductions.cpp | 63 +- cpp/src/reductions/scan/scan.cpp | 8 +- cpp/src/reductions/scan/scan.cuh | 4 +- cpp/src/reductions/scan/scan_exclusive.cu | 2 +- cpp/src/reductions/scan/scan_inclusive.cu | 2 +- cpp/tests/quantiles/percentile_approx_test.cu | 2 +- cpp/tests/reductions/collect_ops_tests.cpp | 22 +- cpp/tests/reductions/list_rank_test.cpp | 16 +- cpp/tests/reductions/rank_tests.cpp | 62 +- cpp/tests/reductions/reduction_tests.cpp | 565 +++++++++--------- cpp/tests/reductions/scan_tests.cpp | 155 +++-- cpp/tests/reductions/tdigest_tests.cu | 6 +- java/src/main/native/src/ColumnViewJni.cpp | 10 +- python/cudf/cudf/_lib/cpp/reduce.pxd | 4 +- python/cudf/cudf/_lib/reduce.pyx | 6 +- 21 files changed, 478 insertions(+), 471 deletions(-) diff --git a/cpp/benchmarks/reduction/anyall.cpp b/cpp/benchmarks/reduction/anyall.cpp index 80a85b0f217..755fa1ca2ad 100644 --- a/cpp/benchmarks/reduction/anyall.cpp +++ b/cpp/benchmarks/reduction/anyall.cpp @@ -41,7 +41,7 @@ void BM_reduction_anyall(benchmark::State& state, for (auto _ : state) { cuda_event_timer timer(state, true); - auto result = cudf::reduce(*values, agg, output_dtype); + auto result = cudf::reduce(*values, *agg, output_dtype); } } diff --git a/cpp/benchmarks/reduction/dictionary.cpp b/cpp/benchmarks/reduction/dictionary.cpp index 219564d6b5c..8f2f0be33ca 100644 --- a/cpp/benchmarks/reduction/dictionary.cpp +++ b/cpp/benchmarks/reduction/dictionary.cpp @@ -51,7 +51,7 @@ void BM_reduction_dictionary(benchmark::State& state, for (auto _ : state) { cuda_event_timer timer(state, true); - auto result = cudf::reduce(*values, agg, output_dtype); + auto result = cudf::reduce(*values, *agg, output_dtype); } } diff --git a/cpp/benchmarks/reduction/reduce.cpp b/cpp/benchmarks/reduction/reduce.cpp index 4e354352c11..4dfa7f0bbdc 100644 --- a/cpp/benchmarks/reduction/reduce.cpp +++ b/cpp/benchmarks/reduction/reduce.cpp @@ -45,7 +45,7 @@ void BM_reduction(benchmark::State& state, std::unique_ptr(), cudf::scan_type::INCLUSIVE); + *column, *cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE); } } diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index 13dddd3b0c8..f4b2d51d0cb 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -38,7 +38,7 @@ namespace detail { * `agg` is not Min or Max. * * @param input The input column view for the scan. - * @param agg unique_ptr to aggregation operator applied by the scan. + * @param agg Aggregation operator applied by the scan * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE. * Include nulls if null_policy::INCLUDE. Any operation with a null results in * a null. @@ -47,7 +47,7 @@ namespace detail { * @returns Column with scan results. */ std::unique_ptr scan_exclusive(column_view const& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); @@ -64,7 +64,7 @@ std::unique_ptr scan_exclusive(column_view const& input, * but the `agg` is not Min or Max. * * @param input The input column view for the scan. - * @param agg unique_ptr to aggregation operator applied by the scan. + * @param agg Aggregation operator applied by the scan * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE. * Include nulls if null_policy::INCLUDE. Any operation with a null results in * a null. @@ -73,7 +73,7 @@ std::unique_ptr scan_exclusive(column_view const& input, * @returns Column with scan results. */ std::unique_ptr scan_inclusive(column_view const& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index 083892aa856..7aa7ada6896 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -72,7 +72,7 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE }; */ std::unique_ptr reduce( column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -89,7 +89,7 @@ std::unique_ptr reduce( */ std::unique_ptr reduce( column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, std::optional> init, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -187,7 +187,7 @@ std::unique_ptr segmented_reduce( */ std::unique_ptr scan( const column_view& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, scan_type inclusive, null_policy null_handling = null_policy::EXCLUDE, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 4166becbf4d..a7d7e14a193 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -49,7 +49,7 @@ struct reduce_dispatch_functor { } template - std::unique_ptr operator()(std::unique_ptr const& agg) + std::unique_ptr operator()(reduce_aggregation const& agg) { switch (k) { case aggregation::SUM: return reduction::sum(col, output_dtype, init, stream, mr); @@ -62,12 +62,12 @@ struct reduce_dispatch_functor { return reduction::sum_of_squares(col, output_dtype, stream, mr); case aggregation::MEAN: return reduction::mean(col, output_dtype, stream, mr); case aggregation::VARIANCE: { - auto var_agg = dynamic_cast(agg.get()); - return reduction::variance(col, output_dtype, var_agg->_ddof, stream, mr); + auto var_agg = static_cast(agg); + return reduction::variance(col, output_dtype, var_agg._ddof, stream, mr); } case aggregation::STD: { - auto var_agg = dynamic_cast(agg.get()); - return reduction::standard_deviation(col, output_dtype, var_agg->_ddof, stream, mr); + auto var_agg = static_cast(agg); + return reduction::standard_deviation(col, output_dtype, var_agg._ddof, stream, mr); } case aggregation::MEDIAN: { auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream); @@ -78,60 +78,59 @@ struct reduce_dispatch_functor { return get_element(*col_ptr, 0, stream, mr); } case aggregation::QUANTILE: { - auto quantile_agg = dynamic_cast(agg.get()); - CUDF_EXPECTS(quantile_agg->_quantiles.size() == 1, + auto quantile_agg = static_cast(agg); + CUDF_EXPECTS(quantile_agg._quantiles.size() == 1, "Reduction quantile accepts only one quantile value"); auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream); auto valid_sorted_indices = split(*sorted_indices, {col.size() - col.null_count()}, stream)[0]; auto col_ptr = quantile(col, - quantile_agg->_quantiles, - quantile_agg->_interpolation, + quantile_agg._quantiles, + quantile_agg._interpolation, valid_sorted_indices, true, stream); return get_element(*col_ptr, 0, stream, mr); } case aggregation::NUNIQUE: { - auto nunique_agg = dynamic_cast(agg.get()); + auto nunique_agg = static_cast(agg); return make_fixed_width_scalar( - detail::distinct_count( - col, nunique_agg->_null_handling, nan_policy::NAN_IS_VALID, stream), + detail::distinct_count(col, nunique_agg._null_handling, nan_policy::NAN_IS_VALID, stream), stream, mr); } case aggregation::NTH_ELEMENT: { - auto nth_agg = dynamic_cast(agg.get()); - return reduction::nth_element(col, nth_agg->_n, nth_agg->_null_handling, stream, mr); + auto nth_agg = static_cast(agg); + return reduction::nth_element(col, nth_agg._n, nth_agg._null_handling, stream, mr); } case aggregation::COLLECT_LIST: { - auto col_agg = dynamic_cast(agg.get()); - return reduction::collect_list(col, col_agg->_null_handling, stream, mr); + auto col_agg = static_cast(agg); + return reduction::collect_list(col, col_agg._null_handling, stream, mr); } case aggregation::COLLECT_SET: { - auto col_agg = dynamic_cast(agg.get()); + auto col_agg = static_cast(agg); return reduction::collect_set( - col, col_agg->_null_handling, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr); + col, col_agg._null_handling, col_agg._nulls_equal, col_agg._nans_equal, stream, mr); } case aggregation::MERGE_LISTS: { return reduction::merge_lists(col, stream, mr); } case aggregation::MERGE_SETS: { - auto col_agg = dynamic_cast(agg.get()); - return reduction::merge_sets(col, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr); + auto col_agg = static_cast(agg); + return reduction::merge_sets(col, col_agg._nulls_equal, col_agg._nans_equal, stream, mr); } case aggregation::TDIGEST: { CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT, "Tdigest aggregations expect output type to be STRUCT"); - auto td_agg = dynamic_cast(agg.get()); - return detail::tdigest::reduce_tdigest(col, td_agg->max_centroids, stream, mr); + auto td_agg = static_cast(agg); + return detail::tdigest::reduce_tdigest(col, td_agg.max_centroids, stream, mr); } case aggregation::MERGE_TDIGEST: { CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT, "Tdigest aggregations expect output type to be STRUCT"); - auto td_agg = dynamic_cast(agg.get()); - return detail::tdigest::reduce_merge_tdigest(col, td_agg->max_centroids, stream, mr); + auto td_agg = static_cast(agg); + return detail::tdigest::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr); } default: CUDF_FAIL("Unsupported reduction operator"); } @@ -140,7 +139,7 @@ struct reduce_dispatch_functor { std::unique_ptr reduce( column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, std::optional> init, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -148,16 +147,16 @@ std::unique_ptr reduce( { CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(), "column and initial value must be the same type"); - if (init.has_value() && !(agg->kind == aggregation::SUM || agg->kind == aggregation::PRODUCT || - agg->kind == aggregation::MIN || agg->kind == aggregation::MAX || - agg->kind == aggregation::ANY || agg->kind == aggregation::ALL)) { + if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || + agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || + agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { CUDF_FAIL( "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types"); } // Returns default scalar if input column is non-valid. In terms of nested columns, we need to // handcraft the default scalar with input column. if (col.size() <= col.null_count()) { - if (agg->kind == aggregation::TDIGEST || agg->kind == aggregation::MERGE_TDIGEST) { + if (agg.kind == aggregation::TDIGEST || agg.kind == aggregation::MERGE_TDIGEST) { return detail::tdigest::make_empty_tdigest_scalar(); } if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) { @@ -176,12 +175,12 @@ std::unique_ptr reduce( } return aggregation_dispatcher( - agg->kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg); + agg.kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg); } } // namespace detail std::unique_ptr reduce(column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, rmm::mr::device_memory_resource* mr) { @@ -190,7 +189,7 @@ std::unique_ptr reduce(column_view const& col, } std::unique_ptr reduce(column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, std::optional> init, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp index c0b787b3a1d..2871ee283ba 100644 --- a/cpp/src/reductions/scan/scan.cpp +++ b/cpp/src/reductions/scan/scan.cpp @@ -25,16 +25,16 @@ namespace cudf { namespace detail { std::unique_ptr scan(column_view const& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, scan_type inclusive, null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (agg->kind == aggregation::RANK) { + if (agg.kind == aggregation::RANK) { CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE, "Rank aggregation operator requires an inclusive scan"); - auto const& rank_agg = dynamic_cast(*agg); + auto const& rank_agg = static_cast(agg); if (rank_agg._method == rank_method::MIN) { if (rank_agg._percentage == rank_percentage::NONE) { return inclusive_rank_scan(input, stream, mr); @@ -55,7 +55,7 @@ std::unique_ptr scan(column_view const& input, } // namespace detail std::unique_ptr scan(column_view const& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, scan_type inclusive, null_policy null_handling, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/reductions/scan/scan.cuh b/cpp/src/reductions/scan/scan.cuh index 127f2ae95b4..2ad6124cdd0 100644 --- a/cpp/src/reductions/scan/scan.cuh +++ b/cpp/src/reductions/scan/scan.cuh @@ -35,12 +35,12 @@ rmm::device_buffer mask_scan(column_view const& input_view, template