From 245e68c9fff679c3f5cb11aa32c4e2bd4be97515 Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Tue, 22 Feb 2022 11:44:57 -0600 Subject: [PATCH 01/19] Add scan_aggregation and reduce_aggregations. C++ side only. --- cpp/include/cudf/aggregation.hpp | 22 + .../cudf/detail/aggregation/aggregation.hpp | 50 +- cpp/include/cudf/detail/scan.hpp | 4 +- cpp/include/cudf/reduction.hpp | 4 +- cpp/src/aggregation/aggregation.cpp | 28 +- cpp/src/reductions/reductions.cpp | 6 +- cpp/src/reductions/scan/scan.cpp | 2 +- cpp/src/reductions/scan/scan.cuh | 2 +- cpp/src/reductions/scan/scan_exclusive.cu | 2 +- cpp/src/reductions/scan/scan_inclusive.cu | 2 +- cpp/tests/reductions/rank_tests.cpp | 8 +- cpp/tests/reductions/reduction_tests.cpp | 651 +++++++++++------- cpp/tests/reductions/scan_tests.cpp | 303 +++++--- 13 files changed, 704 insertions(+), 380 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index fb5b968671f..9dc8f1750ed 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -148,6 +148,28 @@ class groupby_scan_aggregation : public virtual aggregation { groupby_scan_aggregation() {} }; +/** + * @brief Derived class intended for reduction usage. + */ +class reduce_aggregation : public virtual aggregation { + public: + ~reduce_aggregation() override = default; + + protected: + reduce_aggregation() {} +}; + +/** + * @brief Derived class intended for scan usage. + */ +class scan_aggregation : public virtual aggregation { + public: + ~scan_aggregation() override = default; + + protected: + scan_aggregation() {} +}; + enum class udf_type : bool { CUDA, PTX }; enum class correlation_type : int32_t { PEARSON, KENDALL, SPEARMAN }; diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index a234f4d3715..9fa93edee77 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -147,7 +147,9 @@ class aggregation_finalizer { // Declares the interface for the finalizer */ class sum_aggregation final : public rolling_aggregation, public groupby_aggregation, - public groupby_scan_aggregation { + public groupby_scan_aggregation, + public reduce_aggregation, + public scan_aggregation { public: sum_aggregation() : aggregation(SUM) {} @@ -166,7 +168,9 @@ class sum_aggregation final : public rolling_aggregation, /** * @brief Derived class for specifying a product aggregation */ -class product_aggregation final : public groupby_aggregation { +class product_aggregation final : public groupby_aggregation, + public reduce_aggregation, + public scan_aggregation { public: product_aggregation() : aggregation(PRODUCT) {} @@ -187,7 +191,9 @@ class product_aggregation final : public groupby_aggregation { */ class min_aggregation final : public rolling_aggregation, public groupby_aggregation, - public groupby_scan_aggregation { + public groupby_scan_aggregation, + public reduce_aggregation, + public scan_aggregation { public: min_aggregation() : aggregation(MIN) {} @@ -208,7 +214,9 @@ class min_aggregation final : public rolling_aggregation, */ class max_aggregation final : public rolling_aggregation, public groupby_aggregation, - public groupby_scan_aggregation { + public groupby_scan_aggregation, + public reduce_aggregation, + public scan_aggregation { public: max_aggregation() : aggregation(MAX) {} @@ -248,7 +256,7 @@ class count_aggregation final : public rolling_aggregation, /** * @brief Derived class for specifying an any aggregation */ -class any_aggregation final : public aggregation { +class any_aggregation final : public reduce_aggregation { public: any_aggregation() : aggregation(ANY) {} @@ -267,7 +275,7 @@ class any_aggregation final : public aggregation { /** * @brief Derived class for specifying an all aggregation */ -class all_aggregation final : public aggregation { +class all_aggregation final : public reduce_aggregation { public: all_aggregation() : aggregation(ALL) {} @@ -286,7 +294,7 @@ class all_aggregation final : public aggregation { /** * @brief Derived class for specifying a sum_of_squares aggregation */ -class sum_of_squares_aggregation final : public groupby_aggregation { +class sum_of_squares_aggregation final : public groupby_aggregation, public reduce_aggregation { public: sum_of_squares_aggregation() : aggregation(SUM_OF_SQUARES) {} @@ -305,7 +313,9 @@ class sum_of_squares_aggregation final : public groupby_aggregation { /** * @brief Derived class for specifying a mean aggregation */ -class mean_aggregation final : public rolling_aggregation, public groupby_aggregation { +class mean_aggregation final : public rolling_aggregation, + public groupby_aggregation, + public reduce_aggregation { public: mean_aggregation() : aggregation(MEAN) {} @@ -343,7 +353,9 @@ class m2_aggregation : public groupby_aggregation { /** * @brief Derived class for specifying a standard deviation/variance aggregation */ -class std_var_aggregation : public rolling_aggregation, public groupby_aggregation { +class std_var_aggregation : public rolling_aggregation, + public groupby_aggregation, + public reduce_aggregation { public: size_type _ddof; ///< Delta degrees of freedom @@ -415,7 +427,7 @@ class std_aggregation final : public std_var_aggregation { /** * @brief Derived class for specifying a median aggregation */ -class median_aggregation final : public groupby_aggregation { +class median_aggregation final : public groupby_aggregation, public reduce_aggregation { public: median_aggregation() : aggregation(MEDIAN) {} @@ -434,7 +446,7 @@ class median_aggregation final : public groupby_aggregation { /** * @brief Derived class for specifying a quantile aggregation */ -class quantile_aggregation final : public groupby_aggregation { +class quantile_aggregation final : public groupby_aggregation, public reduce_aggregation { public: quantile_aggregation(std::vector const& q, interpolation i) : aggregation{QUANTILE}, _quantiles{q}, _interpolation{i} @@ -521,7 +533,7 @@ class argmin_aggregation final : public rolling_aggregation, public groupby_aggr /** * @brief Derived class for specifying a nunique aggregation */ -class nunique_aggregation final : public groupby_aggregation { +class nunique_aggregation final : public groupby_aggregation, public reduce_aggregation { public: nunique_aggregation(null_policy null_handling) : aggregation{NUNIQUE}, _null_handling{null_handling} @@ -560,7 +572,7 @@ class nunique_aggregation final : public groupby_aggregation { /** * @brief Derived class for specifying a nth element aggregation */ -class nth_element_aggregation final : public groupby_aggregation { +class nth_element_aggregation final : public groupby_aggregation, public reduce_aggregation { public: nth_element_aggregation(size_type n, null_policy null_handling) : aggregation{NTH_ELEMENT}, _n{n}, _null_handling{null_handling} @@ -622,7 +634,9 @@ class row_number_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a rank aggregation */ -class rank_aggregation final : public rolling_aggregation, public groupby_scan_aggregation { +class rank_aggregation final : public rolling_aggregation, + public groupby_scan_aggregation, + public scan_aggregation { public: rank_aggregation() : aggregation{RANK} {} @@ -641,7 +655,9 @@ class rank_aggregation final : public rolling_aggregation, public groupby_scan_a /** * @brief Derived class for specifying a dense rank aggregation */ -class dense_rank_aggregation final : public rolling_aggregation, public groupby_scan_aggregation { +class dense_rank_aggregation final : public rolling_aggregation, + public groupby_scan_aggregation, + public scan_aggregation { public: dense_rank_aggregation() : aggregation{DENSE_RANK} {} @@ -657,7 +673,9 @@ class dense_rank_aggregation final : public rolling_aggregation, public groupby_ void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; -class percent_rank_aggregation final : public rolling_aggregation, public groupby_scan_aggregation { +class percent_rank_aggregation final : public rolling_aggregation, + public groupby_scan_aggregation, + public scan_aggregation { public: percent_rank_aggregation() : aggregation{PERCENT_RANK} {} diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index 36dce6caf0b..fc829617c2d 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -47,7 +47,7 @@ namespace detail { * @returns Column with scan results. */ std::unique_ptr scan_exclusive(column_view const& input, - std::unique_ptr const& agg, + std::unique_ptr const& agg, null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); @@ -73,7 +73,7 @@ std::unique_ptr scan_exclusive(column_view const& input, * @returns Column with scan results. */ std::unique_ptr scan_inclusive(column_view const& input, - std::unique_ptr const& agg, + std::unique_ptr const& agg, null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index d094118293b..ea18fb2f1cb 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -65,7 +65,7 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE }; */ std::unique_ptr reduce( column_view const& col, - std::unique_ptr const& agg, + std::unique_ptr const& agg, data_type output_dtype, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -89,7 +89,7 @@ std::unique_ptr reduce( */ std::unique_ptr scan( const column_view& input, - std::unique_ptr const& agg, + std::unique_ptr const& agg, scan_type inclusive, null_policy null_handling = null_policy::EXCLUDE, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 8e2a167f7b2..48a9f6752eb 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -417,6 +417,8 @@ template std::unique_ptr make_sum_aggregation(); template std::unique_ptr make_sum_aggregation(); template std::unique_ptr make_sum_aggregation(); template std::unique_ptr make_sum_aggregation(); +template std::unique_ptr make_sum_aggregation(); +template std::unique_ptr make_sum_aggregation(); /// Factory to create a PRODUCT aggregation template @@ -426,6 +428,8 @@ std::unique_ptr make_product_aggregation() } template std::unique_ptr make_product_aggregation(); template std::unique_ptr make_product_aggregation(); +template std::unique_ptr make_product_aggregation(); +template std::unique_ptr make_product_aggregation(); /// Factory to create a MIN aggregation template @@ -437,6 +441,8 @@ template std::unique_ptr make_min_aggregation(); template std::unique_ptr make_min_aggregation(); template std::unique_ptr make_min_aggregation(); template std::unique_ptr make_min_aggregation(); +template std::unique_ptr make_min_aggregation(); +template std::unique_ptr make_min_aggregation(); /// Factory to create a MAX aggregation template @@ -448,6 +454,8 @@ template std::unique_ptr make_max_aggregation(); template std::unique_ptr make_max_aggregation(); template std::unique_ptr make_max_aggregation(); template std::unique_ptr make_max_aggregation(); +template std::unique_ptr make_max_aggregation(); +template std::unique_ptr make_max_aggregation(); /// Factory to create a COUNT aggregation template @@ -472,7 +480,7 @@ std::unique_ptr make_any_aggregation() { return std::make_unique(); } -template std::unique_ptr make_any_aggregation(); +template std::unique_ptr make_any_aggregation(); /// Factory to create a ALL aggregation template @@ -480,7 +488,7 @@ std::unique_ptr make_all_aggregation() { return std::make_unique(); } -template std::unique_ptr make_all_aggregation(); +template std::unique_ptr make_all_aggregation(); /// Factory to create a SUM_OF_SQUARES aggregation template @@ -491,6 +499,7 @@ std::unique_ptr make_sum_of_squares_aggregation() template std::unique_ptr make_sum_of_squares_aggregation(); template std::unique_ptr make_sum_of_squares_aggregation(); +template std::unique_ptr make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation template @@ -501,6 +510,7 @@ std::unique_ptr make_mean_aggregation() template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); +template std::unique_ptr make_mean_aggregation(); /// Factory to create a M2 aggregation template @@ -522,6 +532,8 @@ template std::unique_ptr make_variance_aggregation make_variance_aggregation( size_type ddof); +template std::unique_ptr make_variance_aggregation( + size_type ddof); /// Factory to create a STD aggregation template @@ -534,6 +546,8 @@ template std::unique_ptr make_std_aggregation make_std_aggregation( size_type ddof); +template std::unique_ptr make_std_aggregation( + size_type ddof); /// Factory to create a MEDIAN aggregation template @@ -543,6 +557,7 @@ std::unique_ptr make_median_aggregation() } template std::unique_ptr make_median_aggregation(); template std::unique_ptr make_median_aggregation(); +template std::unique_ptr make_median_aggregation(); /// Factory to create a QUANTILE aggregation template @@ -555,6 +570,8 @@ template std::unique_ptr make_quantile_aggregation( std::vector const& quantiles, interpolation interp); template std::unique_ptr make_quantile_aggregation( std::vector const& quantiles, interpolation interp); +template std::unique_ptr make_quantile_aggregation( + std::vector const& quantiles, interpolation interp); /// Factory to create an ARGMAX aggregation template @@ -586,6 +603,8 @@ template std::unique_ptr make_nunique_aggregation( null_policy null_handling); template std::unique_ptr make_nunique_aggregation( null_policy null_handling); +template std::unique_ptr make_nunique_aggregation( + null_policy null_handling); /// Factory to create an NTH_ELEMENT aggregation template @@ -597,6 +616,8 @@ template std::unique_ptr make_nth_element_aggregation( size_type n, null_policy null_handling); template std::unique_ptr make_nth_element_aggregation( size_type n, null_policy null_handling); +template std::unique_ptr make_nth_element_aggregation( + size_type n, null_policy null_handling); /// Factory to create a ROW_NUMBER aggregation template @@ -616,6 +637,7 @@ std::unique_ptr make_rank_aggregation() template std::unique_ptr make_rank_aggregation(); template std::unique_ptr make_rank_aggregation(); +template std::unique_ptr make_rank_aggregation(); /// Factory to create a DENSE_RANK aggregation template @@ -626,6 +648,7 @@ std::unique_ptr make_dense_rank_aggregation() template std::unique_ptr make_dense_rank_aggregation(); template std::unique_ptr make_dense_rank_aggregation(); +template std::unique_ptr make_dense_rank_aggregation(); /// Factory to create a PERCENT_RANK aggregation template @@ -636,6 +659,7 @@ std::unique_ptr make_percent_rank_aggregation() template std::unique_ptr make_percent_rank_aggregation(); template std::unique_ptr make_percent_rank_aggregation(); +template std::unique_ptr make_percent_rank_aggregation(); /// Factory to create a COLLECT_LIST aggregation template diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 234eaf51f96..3558b5348ea 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -45,7 +45,7 @@ struct reduce_dispatch_functor { } template - std::unique_ptr operator()(std::unique_ptr const& agg) + std::unique_ptr operator()(std::unique_ptr const& agg) { switch (k) { case aggregation::SUM: return reduction::sum(col, output_dtype, stream, mr); break; @@ -109,7 +109,7 @@ struct reduce_dispatch_functor { std::unique_ptr reduce( column_view const& col, - std::unique_ptr const& agg, + std::unique_ptr const& agg, data_type output_dtype, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) @@ -129,7 +129,7 @@ std::unique_ptr reduce( } // namespace detail std::unique_ptr reduce(column_view const& col, - std::unique_ptr const& agg, + std::unique_ptr const& agg, data_type output_dtype, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp index d73fc862380..52aaad5ddcf 100644 --- a/cpp/src/reductions/scan/scan.cpp +++ b/cpp/src/reductions/scan/scan.cpp @@ -25,7 +25,7 @@ namespace cudf { std::unique_ptr scan(column_view const& input, - std::unique_ptr const& agg, + std::unique_ptr const& agg, scan_type inclusive, null_policy null_handling, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/reductions/scan/scan.cuh b/cpp/src/reductions/scan/scan.cuh index 84387aba914..00746aa2d22 100644 --- a/cpp/src/reductions/scan/scan.cuh +++ b/cpp/src/reductions/scan/scan.cuh @@ -35,7 +35,7 @@ rmm::device_buffer mask_scan(column_view const& input_view, template