diff --git a/cpp/benchmarks/groupby/group_nth_benchmark.cu b/cpp/benchmarks/groupby/group_nth_benchmark.cu index 9765a4a265c..c6dbffb162e 100644 --- a/cpp/benchmarks/groupby/group_nth_benchmark.cu +++ b/cpp/benchmarks/groupby/group_nth_benchmark.cu @@ -63,7 +63,7 @@ void BM_pre_sorted_nth(benchmark::State& state) std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); requests[0].values = vals; - requests[0].aggregations.push_back(cudf::make_nth_element_aggregation(-1)); + requests[0].aggregations.push_back(cudf::make_nth_element_aggregation(-1)); for (auto _ : state) { cuda_event_timer timer(state, true); diff --git a/cpp/examples/basic/src/process_csv.cpp b/cpp/examples/basic/src/process_csv.cpp index 2467c97393b..cd469af0036 100644 --- a/cpp/examples/basic/src/process_csv.cpp +++ b/cpp/examples/basic/src/process_csv.cpp @@ -25,7 +25,7 @@ void write_csv(cudf::table_view const& tbl_view, std::string const& file_path) } std::vector make_single_aggregation_request( - std::unique_ptr&& agg, cudf::column_view value) + std::unique_ptr&& agg, cudf::column_view value) { std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); @@ -42,7 +42,7 @@ std::unique_ptr average_closing_price(cudf::table_view stock_info_t // Compute the average of each company's closing price with entire column cudf::groupby::groupby grpby_obj(keys); - auto requests = make_single_aggregation_request(cudf::make_mean_aggregation(), val); + auto requests = make_single_aggregation_request(cudf::make_mean_aggregation(), val); auto agg_results = grpby_obj.aggregate(requests); diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 7ac3638b21c..ff665e2706a 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -106,8 +106,7 @@ class aggregation { }; /** - * @brief Derived class intended for enforcing operation-specific restrictions - * when calling various cudf functions. + * @brief Derived class intended for rolling_window specific aggregation usage. * * As an example, rolling_window will only accept rolling_aggregation inputs, * and the appropriate derived classes (sum_aggregation, mean_aggregation, etc) @@ -121,6 +120,28 @@ class rolling_aggregation : public virtual aggregation { rolling_aggregation() {} }; +/** + * @brief Derived class intended for groupby specific aggregation usage. + */ +class groupby_aggregation : public virtual aggregation { + public: + ~groupby_aggregation() = default; + + protected: + groupby_aggregation() {} +}; + +/** + * @brief Derived class intended for groupby specific scan usage. + */ +class groupby_scan_aggregation : public virtual aggregation { + public: + ~groupby_scan_aggregation() = default; + + protected: + groupby_scan_aggregation() {} +}; + enum class udf_type : bool { CUDA, PTX }; /// Factory to create a SUM aggregation diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 163ad3e480f..4e4c63ae517 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -130,7 +130,9 @@ class aggregation_finalizer { // Declares the interface for the finalizer /** * @brief Derived class for specifying a sum aggregation */ -class sum_aggregation final : public rolling_aggregation { +class sum_aggregation final : public rolling_aggregation, + public groupby_aggregation, + public groupby_scan_aggregation { public: sum_aggregation() : aggregation(SUM) {} @@ -149,7 +151,7 @@ class sum_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a product aggregation */ -class product_aggregation final : public aggregation { +class product_aggregation final : public groupby_aggregation { public: product_aggregation() : aggregation(PRODUCT) {} @@ -168,7 +170,9 @@ class product_aggregation final : public aggregation { /** * @brief Derived class for specifying a min aggregation */ -class min_aggregation final : public rolling_aggregation { +class min_aggregation final : public rolling_aggregation, + public groupby_aggregation, + public groupby_scan_aggregation { public: min_aggregation() : aggregation(MIN) {} @@ -187,7 +191,9 @@ class min_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a max aggregation */ -class max_aggregation final : public rolling_aggregation { +class max_aggregation final : public rolling_aggregation, + public groupby_aggregation, + public groupby_scan_aggregation { public: max_aggregation() : aggregation(MAX) {} @@ -206,7 +212,9 @@ class max_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a count aggregation */ -class count_aggregation final : public rolling_aggregation { +class count_aggregation final : public rolling_aggregation, + public groupby_aggregation, + public groupby_scan_aggregation { public: count_aggregation(aggregation::Kind kind) : aggregation(kind) {} @@ -263,7 +271,7 @@ class all_aggregation final : public aggregation { /** * @brief Derived class for specifying a sum_of_squares aggregation */ -class sum_of_squares_aggregation final : public aggregation { +class sum_of_squares_aggregation final : public groupby_aggregation { public: sum_of_squares_aggregation() : aggregation(SUM_OF_SQUARES) {} @@ -282,7 +290,7 @@ class sum_of_squares_aggregation final : public aggregation { /** * @brief Derived class for specifying a mean aggregation */ -class mean_aggregation final : public rolling_aggregation { +class mean_aggregation final : public rolling_aggregation, public groupby_aggregation { public: mean_aggregation() : aggregation(MEAN) {} @@ -301,7 +309,7 @@ class mean_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a m2 aggregation */ -class m2_aggregation : public aggregation { +class m2_aggregation : public groupby_aggregation { public: m2_aggregation() : aggregation{M2} {} @@ -320,7 +328,7 @@ class m2_aggregation : public aggregation { /** * @brief Derived class for specifying a standard deviation/variance aggregation */ -class std_var_aggregation : public aggregation { +class std_var_aggregation : public groupby_aggregation { public: size_type _ddof; ///< Delta degrees of freedom @@ -339,7 +347,6 @@ class std_var_aggregation : public aggregation { CUDF_EXPECTS(k == aggregation::STD or k == aggregation::VARIANCE, "std_var_aggregation can accept only STD, VARIANCE"); } - size_type hash_impl() const { return std::hash{}(_ddof); } }; @@ -348,7 +355,10 @@ class std_var_aggregation : public aggregation { */ class var_aggregation final : public std_var_aggregation { public: - var_aggregation(size_type ddof) : std_var_aggregation{aggregation::VARIANCE, ddof} {} + var_aggregation(size_type ddof) + : aggregation{aggregation::VARIANCE}, std_var_aggregation{aggregation::VARIANCE, ddof} + { + } std::unique_ptr clone() const override { @@ -367,7 +377,10 @@ class var_aggregation final : public std_var_aggregation { */ class std_aggregation final : public std_var_aggregation { public: - std_aggregation(size_type ddof) : std_var_aggregation{aggregation::STD, ddof} {} + std_aggregation(size_type ddof) + : aggregation{aggregation::STD}, std_var_aggregation{aggregation::STD, ddof} + { + } std::unique_ptr clone() const override { @@ -384,7 +397,7 @@ class std_aggregation final : public std_var_aggregation { /** * @brief Derived class for specifying a median aggregation */ -class median_aggregation final : public aggregation { +class median_aggregation final : public groupby_aggregation { public: median_aggregation() : aggregation(MEDIAN) {} @@ -403,7 +416,7 @@ class median_aggregation final : public aggregation { /** * @brief Derived class for specifying a quantile aggregation */ -class quantile_aggregation final : public aggregation { +class quantile_aggregation final : public groupby_aggregation { public: quantile_aggregation(std::vector const& q, interpolation i) : aggregation{QUANTILE}, _quantiles{q}, _interpolation{i} @@ -449,7 +462,7 @@ class quantile_aggregation final : public aggregation { /** * @brief Derived class for specifying an argmax aggregation */ -class argmax_aggregation final : public rolling_aggregation { +class argmax_aggregation final : public rolling_aggregation, public groupby_aggregation { public: argmax_aggregation() : aggregation(ARGMAX) {} @@ -468,7 +481,7 @@ class argmax_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying an argmin aggregation */ -class argmin_aggregation final : public rolling_aggregation { +class argmin_aggregation final : public rolling_aggregation, public groupby_aggregation { public: argmin_aggregation() : aggregation(ARGMIN) {} @@ -487,7 +500,7 @@ class argmin_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a nunique aggregation */ -class nunique_aggregation final : public aggregation { +class nunique_aggregation final : public groupby_aggregation { public: nunique_aggregation(null_policy null_handling) : aggregation{NUNIQUE}, _null_handling{null_handling} @@ -523,7 +536,7 @@ class nunique_aggregation final : public aggregation { /** * @brief Derived class for specifying a nth element aggregation */ -class nth_element_aggregation final : public aggregation { +class nth_element_aggregation final : public groupby_aggregation { public: nth_element_aggregation(size_type n, null_policy null_handling) : aggregation{NTH_ELEMENT}, _n{n}, _null_handling{null_handling} @@ -582,7 +595,7 @@ class row_number_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a rank aggregation */ -class rank_aggregation final : public rolling_aggregation { +class rank_aggregation final : public rolling_aggregation, public groupby_scan_aggregation { public: rank_aggregation() : aggregation{RANK} {} @@ -601,7 +614,7 @@ class rank_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying a dense rank aggregation */ -class dense_rank_aggregation final : public rolling_aggregation { +class dense_rank_aggregation final : public rolling_aggregation, public groupby_scan_aggregation { public: dense_rank_aggregation() : aggregation{DENSE_RANK} {} @@ -620,7 +633,7 @@ class dense_rank_aggregation final : public rolling_aggregation { /** * @brief Derived aggregation class for specifying COLLECT_LIST aggregation */ -class collect_list_aggregation final : public rolling_aggregation { +class collect_list_aggregation final : public rolling_aggregation, public groupby_aggregation { public: explicit collect_list_aggregation(null_policy null_handling = null_policy::INCLUDE) : aggregation{COLLECT_LIST}, _null_handling{null_handling} @@ -656,7 +669,7 @@ class collect_list_aggregation final : public rolling_aggregation { /** * @brief Derived aggregation class for specifying COLLECT_SET aggregation */ -class collect_set_aggregation final : public rolling_aggregation { +class collect_set_aggregation final : public rolling_aggregation, public groupby_aggregation { public: explicit collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE, null_equality nulls_equal = null_equality::EQUAL, @@ -795,7 +808,7 @@ class udf_aggregation final : public rolling_aggregation { /** * @brief Derived aggregation class for specifying MERGE_LISTS aggregation */ -class merge_lists_aggregation final : public aggregation { +class merge_lists_aggregation final : public groupby_aggregation { public: explicit merge_lists_aggregation() : aggregation{MERGE_LISTS} {} @@ -814,7 +827,7 @@ class merge_lists_aggregation final : public aggregation { /** * @brief Derived aggregation class for specifying MERGE_SETS aggregation */ -class merge_sets_aggregation final : public aggregation { +class merge_sets_aggregation final : public groupby_aggregation { public: explicit merge_sets_aggregation(null_equality nulls_equal, nan_equality nans_equal) : aggregation{MERGE_SETS}, _nulls_equal(nulls_equal), _nans_equal(nans_equal) @@ -855,7 +868,7 @@ class merge_sets_aggregation final : public aggregation { /** * @brief Derived aggregation class for specifying MERGE_M2 aggregation */ -class merge_m2_aggregation final : public aggregation { +class merge_m2_aggregation final : public groupby_aggregation { public: explicit merge_m2_aggregation() : aggregation{MERGE_M2} {} diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 5656b38a0ef..3b8354ebc9f 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -56,8 +56,23 @@ class sort_groupby_helper; * `values.size()` column must equal `keys.num_rows()`. */ struct aggregation_request { - column_view values; ///< The elements to aggregate - std::vector> aggregations; ///< Desired aggregations + column_view values; ///< The elements to aggregate + std::vector> aggregations; ///< Desired aggregations +}; + +/** + * @brief Request for groupby aggregation(s) for scanning a column. + * + * The group membership of each `value[i]` is determined by the corresponding + * row `i` in the original order of `keys` used to construct the + * `groupby`. I.e., for each `aggregation`, `values[i]` is aggregated with all + * other `values[j]` where rows `i` and `j` in `keys` are equivalent. + * + * `values.size()` column must equal `keys.num_rows()`. + */ +struct scan_request { + column_view values; ///< The elements to aggregate + std::vector> aggregations; ///< Desired aggregations }; /** @@ -222,7 +237,7 @@ class groupby { * specified in `requests`. */ std::pair, std::vector> scan( - host_span requests, + host_span requests, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -388,7 +403,7 @@ class groupby { rmm::mr::device_memory_resource* mr); std::pair, std::vector> sort_scan( - host_span requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); }; diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 016f2367139..f0c522257fb 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -362,6 +362,8 @@ std::unique_ptr make_sum_aggregation() } template std::unique_ptr make_sum_aggregation(); template std::unique_ptr make_sum_aggregation(); +template std::unique_ptr make_sum_aggregation(); +template std::unique_ptr make_sum_aggregation(); /// Factory to create a PRODUCT aggregation template @@ -370,6 +372,7 @@ std::unique_ptr make_product_aggregation() return std::make_unique(); } template std::unique_ptr make_product_aggregation(); +template std::unique_ptr make_product_aggregation(); /// Factory to create a MIN aggregation template @@ -379,6 +382,8 @@ std::unique_ptr make_min_aggregation() } template std::unique_ptr make_min_aggregation(); template std::unique_ptr make_min_aggregation(); +template std::unique_ptr make_min_aggregation(); +template std::unique_ptr make_min_aggregation(); /// Factory to create a MAX aggregation template @@ -388,6 +393,8 @@ std::unique_ptr make_max_aggregation() } template std::unique_ptr make_max_aggregation(); template std::unique_ptr make_max_aggregation(); +template std::unique_ptr make_max_aggregation(); +template std::unique_ptr make_max_aggregation(); /// Factory to create a COUNT aggregation template @@ -401,6 +408,10 @@ template std::unique_ptr make_count_aggregation( null_policy null_handling); template std::unique_ptr make_count_aggregation( null_policy null_handling); +template std::unique_ptr make_count_aggregation( + null_policy null_handling); +template std::unique_ptr make_count_aggregation( + null_policy null_handling); /// Factory to create a ANY aggregation template @@ -425,6 +436,8 @@ std::unique_ptr make_sum_of_squares_aggregation() return std::make_unique(); } template std::unique_ptr make_sum_of_squares_aggregation(); +template std::unique_ptr +make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation template @@ -434,6 +447,7 @@ std::unique_ptr make_mean_aggregation() } template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); +template std::unique_ptr make_mean_aggregation(); /// Factory to create a M2 aggregation template @@ -442,6 +456,7 @@ std::unique_ptr make_m2_aggregation() return std::make_unique(); } template std::unique_ptr make_m2_aggregation(); +template std::unique_ptr make_m2_aggregation(); /// Factory to create a VARIANCE aggregation template @@ -450,6 +465,8 @@ std::unique_ptr make_variance_aggregation(size_type ddof) return std::make_unique(ddof); } template std::unique_ptr make_variance_aggregation(size_type ddof); +template std::unique_ptr make_variance_aggregation( + size_type ddof); /// Factory to create a STD aggregation template @@ -458,6 +475,8 @@ std::unique_ptr make_std_aggregation(size_type ddof) return std::make_unique(ddof); } template std::unique_ptr make_std_aggregation(size_type ddof); +template std::unique_ptr make_std_aggregation( + size_type ddof); /// Factory to create a MEDIAN aggregation template @@ -466,6 +485,7 @@ std::unique_ptr make_median_aggregation() return std::make_unique(); } template std::unique_ptr make_median_aggregation(); +template std::unique_ptr make_median_aggregation(); /// Factory to create a QUANTILE aggregation template @@ -475,6 +495,8 @@ std::unique_ptr make_quantile_aggregation(std::vector const& q, in } template std::unique_ptr make_quantile_aggregation( std::vector const& q, interpolation i); +template std::unique_ptr make_quantile_aggregation( + std::vector const& q, interpolation i); /// Factory to create an ARGMAX aggregation template @@ -484,6 +506,7 @@ std::unique_ptr make_argmax_aggregation() } template std::unique_ptr make_argmax_aggregation(); template std::unique_ptr make_argmax_aggregation(); +template std::unique_ptr make_argmax_aggregation(); /// Factory to create an ARGMIN aggregation template @@ -493,6 +516,7 @@ std::unique_ptr make_argmin_aggregation() } template std::unique_ptr make_argmin_aggregation(); template std::unique_ptr make_argmin_aggregation(); +template std::unique_ptr make_argmin_aggregation(); /// Factory to create an NUNIQUE aggregation template @@ -502,6 +526,8 @@ std::unique_ptr make_nunique_aggregation(null_policy null_handling) } template std::unique_ptr make_nunique_aggregation( null_policy null_handling); +template std::unique_ptr make_nunique_aggregation( + null_policy null_handling); /// Factory to create an NTH_ELEMENT aggregation template @@ -511,6 +537,8 @@ std::unique_ptr make_nth_element_aggregation(size_type n, null_policy null } template std::unique_ptr make_nth_element_aggregation( size_type n, null_policy null_handling); +template std::unique_ptr make_nth_element_aggregation( + size_type n, null_policy null_handling); /// Factory to create a ROW_NUMBER aggregation template @@ -528,6 +556,8 @@ std::unique_ptr make_rank_aggregation() return std::make_unique(); } template std::unique_ptr make_rank_aggregation(); +template std::unique_ptr +make_rank_aggregation(); /// Factory to create a DENSE_RANK aggregation template @@ -536,6 +566,8 @@ std::unique_ptr make_dense_rank_aggregation() return std::make_unique(); } template std::unique_ptr make_dense_rank_aggregation(); +template std::unique_ptr +make_dense_rank_aggregation(); /// Factory to create a COLLECT_LIST aggregation template @@ -547,6 +579,8 @@ template std::unique_ptr make_collect_list_aggregation null_policy null_handling); template std::unique_ptr make_collect_list_aggregation( null_policy null_handling); +template std::unique_ptr make_collect_list_aggregation( + null_policy null_handling); /// Factory to create a COLLECT_SET aggregation template @@ -560,6 +594,8 @@ template std::unique_ptr make_collect_set_aggregation( null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); template std::unique_ptr make_collect_set_aggregation( null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); +template std::unique_ptr make_collect_set_aggregation( + null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); /// Factory to create a LAG aggregation template @@ -605,6 +641,7 @@ std::unique_ptr make_merge_lists_aggregation() return std::make_unique(); } template std::unique_ptr make_merge_lists_aggregation(); +template std::unique_ptr make_merge_lists_aggregation(); /// Factory to create a MERGE_SETS aggregation template @@ -615,6 +652,8 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal, } template std::unique_ptr make_merge_sets_aggregation(null_equality, nan_equality); +template std::unique_ptr make_merge_sets_aggregation( + null_equality, nan_equality); /// Factory to create a MERGE_M2 aggregation template @@ -623,6 +662,7 @@ std::unique_ptr make_merge_m2_aggregation() return std::make_unique(); } template std::unique_ptr make_merge_m2_aggregation(); +template std::unique_ptr make_merge_m2_aggregation(); namespace detail { namespace { diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp index e8d5c60f81a..3da20fb9af3 100644 --- a/cpp/src/groupby/common/utils.hpp +++ b/cpp/src/groupby/common/utils.hpp @@ -24,8 +24,10 @@ namespace cudf { namespace groupby { namespace detail { -inline std::vector extract_results( - host_span requests, cudf::detail::result_cache& cache) + +template +inline std::vector extract_results(host_span requests, + cudf::detail::result_cache& cache) { std::vector results(requests.size()); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 8c43c071a85..a26d69e3d46 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -120,7 +120,8 @@ struct empty_column_constructor { }; /// Make an empty table with appropriate types for requested aggs -auto empty_results(host_span requests) +template +auto empty_results(host_span requests) { std::vector empty_results; @@ -144,7 +145,8 @@ auto empty_results(host_span requests) } /// Verifies the agg requested on the request's values is valid -void verify_valid_requests(host_span requests) +template +void verify_valid_requests(host_span requests) { CUDF_EXPECTS( std::all_of( @@ -184,7 +186,7 @@ std::pair, std::vector> groupby::aggr // Compute scan requests std::pair, std::vector> groupby::scan( - host_span requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 450a8313402..c43df77bb5e 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -152,7 +152,7 @@ void scan_result_functor::operator()(aggregation const& // Sort-based groupby std::pair, std::vector> groupby::sort_scan( - host_span requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index a8f117c22bf..85608a0984a 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -87,12 +87,12 @@ std::unique_ptr aggregate_keys_info(std::unique_ptr
info) auto const info_view = info->view(); std::vector requests; requests.emplace_back(groupby::aggregation_request{info_view.column(0)}); - requests.back().aggregations.emplace_back(make_min_aggregation()); - requests.back().aggregations.emplace_back(make_nth_element_aggregation(0)); + requests.back().aggregations.emplace_back(make_min_aggregation()); + requests.back().aggregations.emplace_back(make_nth_element_aggregation(0)); requests.emplace_back(groupby::aggregation_request{info_view.column(1)}); - requests.back().aggregations.emplace_back(make_min_aggregation()); - requests.back().aggregations.emplace_back(make_nth_element_aggregation(0)); + requests.back().aggregations.emplace_back(make_min_aggregation()); + requests.back().aggregations.emplace_back(make_nth_element_aggregation(0)); // Aggregate by hash values groupby::groupby gb_obj( diff --git a/cpp/tests/groupby/argmax_tests.cpp b/cpp/tests/groupby/argmax_tests.cpp index 6bf627d7b78..7cf693f7b08 100644 --- a/cpp/tests/groupby/argmax_tests.cpp +++ b/cpp/tests/groupby/argmax_tests.cpp @@ -47,10 +47,10 @@ TYPED_TEST(groupby_argmax_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; fixed_width_column_wrapper expect_vals{0, 1, 2}; - auto agg = cudf::make_argmax_aggregation(); + auto agg = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmax_aggregation(); + auto agg2 = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -67,10 +67,10 @@ TYPED_TEST(groupby_argmax_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_argmax_aggregation(); + auto agg = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmax_aggregation(); + auto agg2 = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -87,10 +87,10 @@ TYPED_TEST(groupby_argmax_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_argmax_aggregation(); + auto agg = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmax_aggregation(); + auto agg2 = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -111,10 +111,10 @@ TYPED_TEST(groupby_argmax_test, null_keys_and_values) // {6, 3, 5, 4, 0, 2, 1, -} fixed_width_column_wrapper expect_vals({3, 4, 7, 0}, {1, 1, 1, 0}); - auto agg = cudf::make_argmax_aggregation(); + auto agg = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmax_aggregation(); + auto agg2 = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -132,10 +132,10 @@ TEST_F(groupby_argmax_string_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; fixed_width_column_wrapper expect_vals({0, 4, 2}); - auto agg = cudf::make_argmax_aggregation(); + auto agg = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmax_aggregation(); + auto agg2 = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -150,10 +150,10 @@ TEST_F(groupby_argmax_string_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_argmax_aggregation(); + auto agg = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmax_aggregation(); + auto agg2 = cudf::make_argmax_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -172,12 +172,13 @@ TEST_F(groupby_dictionary_argmax_test, basic) fixed_width_column_wrapper expect_vals({ 0, 4, 2 }); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_argmax_aggregation()); + test_single_agg( + keys, vals, expect_keys, expect_vals, cudf::make_argmax_aggregation()); test_single_agg(keys, vals, expect_keys, expect_vals, - cudf::make_argmax_aggregation(), + cudf::make_argmax_aggregation(), force_use_sort_impl::YES); } diff --git a/cpp/tests/groupby/argmin_tests.cpp b/cpp/tests/groupby/argmin_tests.cpp index d192c1b21b1..915575546c9 100644 --- a/cpp/tests/groupby/argmin_tests.cpp +++ b/cpp/tests/groupby/argmin_tests.cpp @@ -47,10 +47,10 @@ TYPED_TEST(groupby_argmin_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; fixed_width_column_wrapper expect_vals{6, 9, 8}; - auto agg = cudf::make_argmin_aggregation(); + auto agg = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmin_aggregation(); + auto agg2 = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -67,10 +67,10 @@ TYPED_TEST(groupby_argmin_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_argmin_aggregation(); + auto agg = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmin_aggregation(); + auto agg2 = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -87,10 +87,10 @@ TYPED_TEST(groupby_argmin_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_argmin_aggregation(); + auto agg = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmin_aggregation(); + auto agg2 = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -111,11 +111,11 @@ TYPED_TEST(groupby_argmin_test, null_keys_and_values) // { 9, 6, 8, 5, 0, 7, 1, -} fixed_width_column_wrapper expect_vals({3, 9, 8, 0}, {1, 1, 1, 0}); - auto agg = cudf::make_argmin_aggregation(); + auto agg = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); // TODO: explore making this a gtest parameter - auto agg2 = cudf::make_argmin_aggregation(); + auto agg2 = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -133,10 +133,10 @@ TEST_F(groupby_argmin_string_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; fixed_width_column_wrapper expect_vals({3, 5, 7}); - auto agg = cudf::make_argmin_aggregation(); + auto agg = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmin_aggregation(); + auto agg2 = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -151,10 +151,10 @@ TEST_F(groupby_argmin_string_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_argmin_aggregation(); + auto agg = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_argmin_aggregation(); + auto agg2 = cudf::make_argmin_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -173,12 +173,13 @@ TEST_F(groupby_dictionary_argmin_test, basic) fixed_width_column_wrapper expect_vals({ 3, 5, 7 }); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_argmin_aggregation()); + test_single_agg( + keys, vals, expect_keys, expect_vals, cudf::make_argmin_aggregation()); test_single_agg(keys, vals, expect_keys, expect_vals, - cudf::make_argmin_aggregation(), + cudf::make_argmin_aggregation(), force_use_sort_impl::YES); } diff --git a/cpp/tests/groupby/collect_list_tests.cpp b/cpp/tests/groupby/collect_list_tests.cpp index 43c62743b9f..009917dabae 100644 --- a/cpp/tests/groupby/collect_list_tests.cpp +++ b/cpp/tests/groupby/collect_list_tests.cpp @@ -45,7 +45,7 @@ TYPED_TEST(groupby_collect_list_test, CollectWithoutNulls) fixed_width_column_wrapper expect_keys{1, 2}; lists_column_wrapper expect_vals{{1, 2, 3}, {4, 5, 6}}; - auto agg = cudf::make_collect_list_aggregation(); + auto agg = cudf::make_collect_list_aggregation(); test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg)); } @@ -64,7 +64,7 @@ TYPED_TEST(groupby_collect_list_test, CollectWithNulls) lists_column_wrapper expect_vals{ {{1, 2}, validity.begin()}, {{3, 4}, validity.begin()}, {{5, 6}, validity.begin()}}; - auto agg = cudf::make_collect_list_aggregation(); + auto agg = cudf::make_collect_list_aggregation(); test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg)); } @@ -82,7 +82,7 @@ TYPED_TEST(groupby_collect_list_test, CollectWithNullExclusion) lists_column_wrapper expect_vals{{2}, {4}, {}, {8, 9}}; - auto agg = cudf::make_collect_list_aggregation(null_policy::EXCLUDE); + auto agg = cudf::make_collect_list_aggregation(null_policy::EXCLUDE); test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg)); } @@ -97,7 +97,7 @@ TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInput) fixed_width_column_wrapper expect_keys{}; lists_column_wrapper expect_vals{}; - auto agg = cudf::make_collect_list_aggregation(null_policy::EXCLUDE); + auto agg = cudf::make_collect_list_aggregation(null_policy::EXCLUDE); test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg)); } @@ -116,7 +116,7 @@ TYPED_TEST(groupby_collect_list_test, CollectLists) lists_column_wrapper expect_vals{ {{1, 2}, {3, 4}}, {{5, 6, 7}, LCW{}}, {{9, 10}, {11}}}; - auto agg = cudf::make_collect_list_aggregation(); + auto agg = cudf::make_collect_list_aggregation(); test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg)); } @@ -135,7 +135,7 @@ TYPED_TEST(groupby_collect_list_test, CollectListsWithNullExclusion) LCW expect_vals{{{1, 2}}, {LCW{}}, {{9, 10}, {11}}, {}}; - auto agg = cudf::make_collect_list_aggregation(null_policy::EXCLUDE); + auto agg = cudf::make_collect_list_aggregation(null_policy::EXCLUDE); test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg)); } @@ -158,7 +158,7 @@ TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInputLists) auto expect_values = cudf::make_lists_column(0, make_empty_column(offsets), std::move(expect_child), 0, {}); - auto agg = cudf::make_collect_list_aggregation(); + auto agg = cudf::make_collect_list_aggregation(); test_single_agg(keys, values->view(), expect_keys, expect_values->view(), std::move(agg)); } @@ -190,7 +190,7 @@ TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInputListsOfStructs) auto expect_values = cudf::make_lists_column( 0, make_empty_column(data_type{type_to_id()}), std::move(expect_child), 0, {}); - auto agg = cudf::make_collect_list_aggregation(); + auto agg = cudf::make_collect_list_aggregation(); test_single_agg(keys, values->view(), expect_keys, expect_values->view(), std::move(agg)); } @@ -212,8 +212,11 @@ TYPED_TEST(groupby_collect_list_test, dictionary) 0, rmm::device_buffer{}); - test_single_agg( - keys, vals, expect_keys, expect_vals->view(), cudf::make_collect_list_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals->view(), + cudf::make_collect_list_aggregation()); } } // namespace test diff --git a/cpp/tests/groupby/collect_set_tests.cpp b/cpp/tests/groupby/collect_set_tests.cpp index 2f89b04c745..198caabfca9 100644 --- a/cpp/tests/groupby/collect_set_tests.cpp +++ b/cpp/tests/groupby/collect_set_tests.cpp @@ -33,16 +33,20 @@ namespace test { #define VALIDITY std::initializer_list struct CollectSetTest : public cudf::test::BaseFixture { - static auto collect_set() { return cudf::make_collect_set_aggregation(); } + static auto collect_set() + { + return cudf::make_collect_set_aggregation(); + } static auto collect_set_null_unequal() { - return cudf::make_collect_set_aggregation(null_policy::INCLUDE, null_equality::UNEQUAL); + return cudf::make_collect_set_aggregation(null_policy::INCLUDE, + null_equality::UNEQUAL); } static auto collect_set_null_exclude() { - return cudf::make_collect_set_aggregation(null_policy::EXCLUDE); + return cudf::make_collect_set_aggregation(null_policy::EXCLUDE); } }; @@ -174,7 +178,7 @@ TEST_F(CollectSetTest, FloatsWithNaN) vals, keys_expected, vals_expected, - cudf::make_collect_set_aggregation( + cudf::make_collect_set_aggregation( null_policy::INCLUDE, null_equality::EQUAL, nan_equality::ALL_EQUAL)); // null unequal with nan equal vals_expected = { @@ -183,7 +187,7 @@ TEST_F(CollectSetTest, FloatsWithNaN) vals, keys_expected, vals_expected, - cudf::make_collect_set_aggregation( + cudf::make_collect_set_aggregation( null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::ALL_EQUAL)); } diff --git a/cpp/tests/groupby/count_scan_tests.cpp b/cpp/tests/groupby/count_scan_tests.cpp index 9740bfa1954..62e8b11241d 100644 --- a/cpp/tests/groupby/count_scan_tests.cpp +++ b/cpp/tests/groupby/count_scan_tests.cpp @@ -53,11 +53,11 @@ TYPED_TEST(groupby_count_scan_test, basic) result_wrapper expect_vals{0, 1, 2, 0, 1, 2, 3, 0, 1, 2}; // clang-format on - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)), "Unsupported groupby scan aggregation"); - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -74,10 +74,10 @@ TYPED_TEST(groupby_count_scan_test, empty_cols) result_wrapper expect_vals; // clang-format on - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); EXPECT_NO_THROW(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1))); - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -94,7 +94,7 @@ TYPED_TEST(groupby_count_scan_test, zero_valid_keys) result_wrapper expect_vals{}; // clang-format on - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -111,7 +111,7 @@ TYPED_TEST(groupby_count_scan_test, zero_valid_values) result_wrapper expect_vals{0, 1, 2}; // clang-format on - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -130,7 +130,7 @@ TYPED_TEST(groupby_count_scan_test, null_keys_and_values) result_wrapper expect_vals{0, 1, 2, 0, 1, 2, 3, 0, 1, 0}; // clang-format on - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -151,7 +151,7 @@ TEST_F(groupby_count_scan_string_test, basic) result_wrapper expect_vals{0, 0, 0, 1, 0, 1}; // clang-format on - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -182,10 +182,14 @@ TYPED_TEST(FixedPointTestBothReps, GroupByCountScan) // clang-format on CUDF_EXPECT_THROW_MESSAGE( - test_single_scan(keys, vals, expect_keys, expect_vals, cudf::make_count_aggregation()), + test_single_scan(keys, + vals, + expect_keys, + expect_vals, + cudf::make_count_aggregation()), "Unsupported groupby scan aggregation"); - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -205,11 +209,14 @@ TEST_F(groupby_dictionary_count_scan_test, basic) result_wrapper expect_vals{0, 0, 0, 1, 0, 1}; // clang-format on - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)), "Unsupported groupby scan aggregation"); - test_single_scan( - keys, vals, expect_keys, expect_vals, cudf::make_count_aggregation(null_policy::INCLUDE)); + test_single_scan(keys, + vals, + expect_keys, + expect_vals, + cudf::make_count_aggregation(null_policy::INCLUDE)); } } // namespace test diff --git a/cpp/tests/groupby/count_tests.cpp b/cpp/tests/groupby/count_tests.cpp index 2d45de04607..cbb821767c9 100644 --- a/cpp/tests/groupby/count_tests.cpp +++ b/cpp/tests/groupby/count_tests.cpp @@ -45,13 +45,13 @@ TYPED_TEST(groupby_count_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; fixed_width_column_wrapper expect_vals{3, 4, 3}; - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg1), force_use_sort_impl::YES); - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -66,10 +66,10 @@ TYPED_TEST(groupby_count_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals; - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg1), force_use_sort_impl::YES); } @@ -84,13 +84,13 @@ TYPED_TEST(groupby_count_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg1), force_use_sort_impl::YES); - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -105,14 +105,14 @@ TYPED_TEST(groupby_count_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals{0}; - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg1), force_use_sort_impl::YES); fixed_width_column_wrapper expect_vals2{3}; - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals2, std::move(agg2)); } @@ -133,14 +133,14 @@ TYPED_TEST(groupby_count_test, null_keys_and_values) fixed_width_column_wrapper expect_vals({2, 3, 2, 0}); // clang-format on - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg1), force_use_sort_impl::YES); fixed_width_column_wrapper expect_vals2{3, 4, 2, 1}; - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals2, std::move(agg2)); } @@ -160,10 +160,10 @@ TEST_F(groupby_count_string_test, basic) fixed_width_column_wrapper expect_keys{0, 1, 3, 5}; fixed_width_column_wrapper expect_vals{1, 1, 2, 2}; - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg1), force_use_sort_impl::YES); } // clang-format on @@ -191,13 +191,13 @@ TYPED_TEST(FixedPointTestBothReps, GroupByCount) auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; auto const expect_vals = fixed_width_column_wrapper{3, 4, 3}; - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg1 = cudf::make_count_aggregation(); + auto agg1 = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg1), force_use_sort_impl::YES); - auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); + auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2)); } @@ -216,9 +216,14 @@ TEST_F(groupby_dictionary_count_test, basic) fixed_width_column_wrapper expect_vals{1, 1, 2, 2}; // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_count_aggregation()); test_single_agg( - keys, vals, expect_keys, expect_vals, cudf::make_count_aggregation(), force_use_sort_impl::YES); + keys, vals, expect_keys, expect_vals, cudf::make_count_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_count_aggregation(), + force_use_sort_impl::YES); } } // namespace test diff --git a/cpp/tests/groupby/groupby_test_util.hpp b/cpp/tests/groupby/groupby_test_util.hpp index 9a083ac8e74..542205b5b51 100644 --- a/cpp/tests/groupby/groupby_test_util.hpp +++ b/cpp/tests/groupby/groupby_test_util.hpp @@ -63,7 +63,7 @@ inline void test_single_agg(column_view const& keys, column_view const& values, column_view const& expect_keys, column_view const& expect_vals, - std::unique_ptr&& agg, + std::unique_ptr&& agg, force_use_sort_impl use_sort = force_use_sort_impl::NO, null_policy include_null_keys = null_policy::EXCLUDE, sorted keys_are_sorted = sorted::NO, @@ -78,7 +78,7 @@ inline void test_single_agg(column_view const& keys, if (use_sort == force_use_sort_impl::YES) { // WAR to force groupby to use sort implementation - requests[0].aggregations.push_back(make_nth_element_aggregation(0)); + requests[0].aggregations.push_back(make_nth_element_aggregation(0)); } groupby::groupby gb_obj( @@ -105,14 +105,14 @@ inline void test_single_scan(column_view const& keys, column_view const& values, column_view const& expect_keys, column_view const& expect_vals, - std::unique_ptr&& agg, + std::unique_ptr&& agg, null_policy include_null_keys = null_policy::EXCLUDE, sorted keys_are_sorted = sorted::NO, std::vector const& column_order = {}, std::vector const& null_precedence = {}) { - std::vector requests; - requests.emplace_back(groupby::aggregation_request()); + std::vector requests; + requests.emplace_back(groupby::scan_request()); requests[0].values = values; requests[0].aggregations.push_back(std::move(agg)); diff --git a/cpp/tests/groupby/keys_tests.cpp b/cpp/tests/groupby/keys_tests.cpp index 91db37a5ff6..683eeb7eb01 100644 --- a/cpp/tests/groupby/keys_tests.cpp +++ b/cpp/tests/groupby/keys_tests.cpp @@ -50,7 +50,7 @@ TYPED_TEST(groupby_keys_test, basic) fixed_width_column_wrapper expect_vals { 3, 4, 3 }; // clang-format on - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -68,7 +68,7 @@ TYPED_TEST(groupby_keys_test, zero_valid_keys) fixed_width_column_wrapper expect_vals { }; // clang-format on - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -89,7 +89,7 @@ TYPED_TEST(groupby_keys_test, some_null_keys) fixed_width_column_wrapper expect_vals { 3, 4, 2, 1}; // clang-format on - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -111,7 +111,7 @@ TYPED_TEST(groupby_keys_test, include_null_keys) fixed_width_column_wrapper expect_vals { 9, 19, 10, 4, 7}; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, @@ -135,7 +135,7 @@ TYPED_TEST(groupby_keys_test, pre_sorted_keys) fixed_width_column_wrapper expect_vals { 3, 18, 24, 4}; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, @@ -160,7 +160,7 @@ TYPED_TEST(groupby_keys_test, pre_sorted_keys_descending) fixed_width_column_wrapper expect_vals { 0, 6, 22, 21 }; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, @@ -187,7 +187,7 @@ TYPED_TEST(groupby_keys_test, pre_sorted_keys_nullable) fixed_width_column_wrapper expect_vals { 3, 15, 17, 4}; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, @@ -215,7 +215,7 @@ TYPED_TEST(groupby_keys_test, pre_sorted_keys_nulls_before_include_nulls) fixed_width_column_wrapper expect_vals { 3, 7, 11, 7, 17, 4}; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, @@ -234,10 +234,11 @@ TYPED_TEST(groupby_keys_test, mismatch_num_rows) fixed_width_column_wrapper keys{1, 2, 3}; fixed_width_column_wrapper vals{0, 1, 2, 3, 4}; - auto agg = cudf::make_count_aggregation(); + auto agg = cudf::make_count_aggregation(); CUDF_EXPECT_THROW_MESSAGE(test_single_agg(keys, vals, keys, vals, std::move(agg)), "Size mismatch between request values and groupby keys."); - CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, keys, vals, std::move(agg)), + auto agg2 = cudf::make_count_aggregation(); + CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, keys, vals, std::move(agg2)), "Size mismatch between request values and groupby keys."); } @@ -257,7 +258,7 @@ TEST_F(groupby_string_keys_test, basic) fixed_width_column_wrapper expect_vals { 9, 19, 17 }; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } // clang-format on @@ -278,9 +279,14 @@ TEST_F(groupby_dictionary_keys_test, basic) fixed_width_column_wrapper expect_vals({ 9, 19, 17 }); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_sum_aggregation()); test_single_agg( - keys, vals, expect_keys, expect_vals, cudf::make_sum_aggregation(), force_use_sort_impl::YES); + keys, vals, expect_keys, expect_vals, cudf::make_sum_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_sum_aggregation(), + force_use_sort_impl::YES); } } // namespace test diff --git a/cpp/tests/groupby/m2_tests.cpp b/cpp/tests/groupby/m2_tests.cpp index 7b338a0d9b8..be7d6c1ce05 100644 --- a/cpp/tests/groupby/m2_tests.cpp +++ b/cpp/tests/groupby/m2_tests.cpp @@ -44,7 +44,7 @@ auto compute_M2(cudf::column_view const& keys, cudf::column_view const& values) std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); requests[0].values = values; - requests[0].aggregations.emplace_back(cudf::make_m2_aggregation()); + requests[0].aggregations.emplace_back(cudf::make_m2_aggregation()); auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); auto result = gb_obj.aggregate(requests); diff --git a/cpp/tests/groupby/max_scan_tests.cpp b/cpp/tests/groupby/max_scan_tests.cpp index 70a48da69e8..4d83dc9f7ba 100644 --- a/cpp/tests/groupby/max_scan_tests.cpp +++ b/cpp/tests/groupby/max_scan_tests.cpp @@ -55,7 +55,7 @@ TYPED_TEST(groupby_max_scan_test, basic) result_wrapper expect_vals({5, 8, 8, 6, 9, 9, 9, 7, 7, 7}); // clang-format on - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -70,7 +70,7 @@ TYPED_TEST(groupby_max_scan_test, empty_cols) key_wrapper expect_keys{}; result_wrapper expect_vals{}; - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -87,7 +87,7 @@ TYPED_TEST(groupby_max_scan_test, zero_valid_keys) result_wrapper expect_vals{}; // clang-format on - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -104,7 +104,7 @@ TYPED_TEST(groupby_max_scan_test, zero_valid_values) result_wrapper expect_vals({-1, -1, -1}, all_nulls()); // clang-format on - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -124,7 +124,7 @@ TYPED_TEST(groupby_max_scan_test, null_keys_and_values) { 0, 1, 1, 1, 1, 0, 1, 1, 1, 0}); // clang-format on - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -152,7 +152,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxScanDecimalAsValue) auto const expect_vals_max = fp_wrapper{{5, 8, 8, 6, 9, 9, 9, 7, 7, 7}, scale}; // clang-format on - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals_max, std::move(agg)); } } diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp index b5710d3f4bc..a1e34b625e8 100644 --- a/cpp/tests/groupby/max_tests.cpp +++ b/cpp/tests/groupby/max_tests.cpp @@ -46,10 +46,10 @@ TYPED_TEST(groupby_max_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; fixed_width_column_wrapper expect_vals({6, 9, 8}); - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_max_aggregation(); + auto agg2 = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -64,10 +64,10 @@ TYPED_TEST(groupby_max_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_max_aggregation(); + auto agg2 = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -82,10 +82,10 @@ TYPED_TEST(groupby_max_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_max_aggregation(); + auto agg2 = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -100,10 +100,10 @@ TYPED_TEST(groupby_max_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_max_aggregation(); + auto agg2 = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -122,10 +122,10 @@ TYPED_TEST(groupby_max_test, null_keys_and_values) // { 0, 3, 1, 4, 5, 2, 8, -} fixed_width_column_wrapper expect_vals({3, 5, 8, 0}, {1, 1, 1, 0}); - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_max_aggregation(); + auto agg2 = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -140,10 +140,10 @@ TEST_F(groupby_max_string_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; strings_column_wrapper expect_vals({"año", "zit", "₹1"}); - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_max_aggregation(); + auto agg2 = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -155,10 +155,10 @@ TEST_F(groupby_max_string_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; strings_column_wrapper expect_vals({""}, all_nulls()); - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_max_aggregation(); + auto agg2 = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -187,7 +187,7 @@ TEST_F(groupby_max_string_test, max_sorted_strings) // fixed_width_column_wrapper expect_argmax( // {6, 10, 14, 18, 22, 26, 30, 34, 38, 42, -1}, // {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}); - auto agg = cudf::make_max_aggregation(); + auto agg = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, @@ -214,12 +214,16 @@ TEST_F(groupby_dictionary_max_test, basic) auto expect_vals = cudf::dictionary::set_keys(expect_vals_w, vals.keys()); - test_single_agg(keys, vals, expect_keys, expect_vals->view(), cudf::make_max_aggregation()); test_single_agg(keys, vals, expect_keys, expect_vals->view(), - cudf::make_max_aggregation(), + cudf::make_max_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals->view(), + cudf::make_max_aggregation(), force_use_sort_impl::YES); } @@ -247,7 +251,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxDecimalAsValue) auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; auto const expect_vals_max = fp_wrapper{{6, 9, 8}, scale}; - auto agg3 = cudf::make_max_aggregation(); + auto agg3 = cudf::make_max_aggregation(); test_single_agg( keys, vals, expect_keys, expect_vals_max, std::move(agg3), force_use_sort_impl::YES); } @@ -271,7 +275,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupByHashMaxDecimalAsValue) auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; auto const expect_vals_max = fp_wrapper{{6, 9, 8}, scale}; - auto agg7 = cudf::make_max_aggregation(); + auto agg7 = cudf::make_max_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals_max, std::move(agg7)); } } diff --git a/cpp/tests/groupby/mean_tests.cpp b/cpp/tests/groupby/mean_tests.cpp index bac95b11e81..613e1555b79 100644 --- a/cpp/tests/groupby/mean_tests.cpp +++ b/cpp/tests/groupby/mean_tests.cpp @@ -67,7 +67,7 @@ TYPED_TEST(groupby_mean_test, basic) fixed_width_column_wrapper expect_vals(expect_v.cbegin(), expect_v.cend()); // clang-format on - auto agg = cudf::make_mean_aggregation(); + auto agg = cudf::make_mean_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -82,7 +82,7 @@ TYPED_TEST(groupby_mean_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_mean_aggregation(); + auto agg = cudf::make_mean_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -97,7 +97,7 @@ TYPED_TEST(groupby_mean_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_mean_aggregation(); + auto agg = cudf::make_mean_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -112,7 +112,7 @@ TYPED_TEST(groupby_mean_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_mean_aggregation(); + auto agg = cudf::make_mean_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -135,7 +135,7 @@ TYPED_TEST(groupby_mean_test, null_keys_and_values) fixed_width_column_wrapper expect_vals(expect_v.cbegin(), expect_v.cend(), {1, 1, 1, 0}); // clang-format on - auto agg = cudf::make_mean_aggregation(); + auto agg = cudf::make_mean_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } // clang-format on @@ -156,7 +156,8 @@ TEST_F(groupby_dictionary_mean_test, basic) fixed_width_column_wrapper expect_vals({9. / 3, 19. / 4, 17. / 3}); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_mean_aggregation()); + test_single_agg( + keys, vals, expect_keys, expect_vals, cudf::make_mean_aggregation()); } } // namespace test diff --git a/cpp/tests/groupby/median_tests.cpp b/cpp/tests/groupby/median_tests.cpp index 18979820911..86d89325401 100644 --- a/cpp/tests/groupby/median_tests.cpp +++ b/cpp/tests/groupby/median_tests.cpp @@ -51,7 +51,7 @@ TYPED_TEST(groupby_median_test, basic) fixed_width_column_wrapper expect_vals({3., 4.5, 7.}, no_nulls()); // clang-format on - auto agg = cudf::make_median_aggregation(); + auto agg = cudf::make_median_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -66,7 +66,7 @@ TYPED_TEST(groupby_median_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_median_aggregation(); + auto agg = cudf::make_median_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -81,7 +81,7 @@ TYPED_TEST(groupby_median_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_median_aggregation(); + auto agg = cudf::make_median_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -96,7 +96,7 @@ TYPED_TEST(groupby_median_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_median_aggregation(); + auto agg = cudf::make_median_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -115,7 +115,7 @@ TYPED_TEST(groupby_median_test, null_keys_and_values) // { 3, 6, 1, 4, 9, 2, 8, -} fixed_width_column_wrapper expect_vals({4.5, 4., 5., 0.}, {1, 1, 1, 0}); - auto agg = cudf::make_median_aggregation(); + auto agg = cudf::make_median_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -134,7 +134,8 @@ TYPED_TEST(groupby_median_test, dictionary) fixed_width_column_wrapper expect_vals({3., 4.5, 7. }, no_nulls()); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_median_aggregation()); + test_single_agg( + keys, vals, expect_keys, expect_vals, cudf::make_median_aggregation()); } } // namespace test diff --git a/cpp/tests/groupby/merge_lists_tests.cpp b/cpp/tests/groupby/merge_lists_tests.cpp index 29c6185e3a5..b6b1d1a1720 100644 --- a/cpp/tests/groupby/merge_lists_tests.cpp +++ b/cpp/tests/groupby/merge_lists_tests.cpp @@ -42,7 +42,8 @@ auto merge_lists(vcol_views const& keys_cols, vcol_views const& values_cols) std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); requests[0].values = *values; - requests[0].aggregations.emplace_back(cudf::make_merge_lists_aggregation()); + requests[0].aggregations.emplace_back( + cudf::make_merge_lists_aggregation()); auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys})); auto result = gb_obj.aggregate(requests); diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp index 3ec8bfec774..60067e78022 100644 --- a/cpp/tests/groupby/merge_m2_tests.cpp +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -59,9 +59,9 @@ auto compute_partial_results(cudf::column_view const& keys, cudf::column_view co std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); requests[0].values = values; - requests[0].aggregations.emplace_back(cudf::make_count_aggregation()); - requests[0].aggregations.emplace_back(cudf::make_mean_aggregation()); - requests[0].aggregations.emplace_back(cudf::make_m2_aggregation()); + requests[0].aggregations.emplace_back(cudf::make_count_aggregation()); + requests[0].aggregations.emplace_back(cudf::make_mean_aggregation()); + requests[0].aggregations.emplace_back(cudf::make_m2_aggregation()); auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); auto [out_keys, out_results] = gb_obj.aggregate(requests); @@ -88,7 +88,8 @@ auto merge_M2(vcol_views const& keys_cols, vcol_views const& values_cols) std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); requests[0].values = *values; - requests[0].aggregations.emplace_back(cudf::make_merge_m2_aggregation()); + requests[0].aggregations.emplace_back( + cudf::make_merge_m2_aggregation()); auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys})); auto result = gb_obj.aggregate(requests); diff --git a/cpp/tests/groupby/merge_sets_tests.cpp b/cpp/tests/groupby/merge_sets_tests.cpp index ee4f61bf44f..5a65774b430 100644 --- a/cpp/tests/groupby/merge_sets_tests.cpp +++ b/cpp/tests/groupby/merge_sets_tests.cpp @@ -42,7 +42,8 @@ auto merge_sets(vcol_views const& keys_cols, vcol_views const& values_cols) std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); requests[0].values = *values; - requests[0].aggregations.emplace_back(cudf::make_merge_sets_aggregation()); + requests[0].aggregations.emplace_back( + cudf::make_merge_sets_aggregation()); auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys})); auto result = gb_obj.aggregate(requests); diff --git a/cpp/tests/groupby/min_scan_tests.cpp b/cpp/tests/groupby/min_scan_tests.cpp index ef548407761..452f70eaf16 100644 --- a/cpp/tests/groupby/min_scan_tests.cpp +++ b/cpp/tests/groupby/min_scan_tests.cpp @@ -53,7 +53,7 @@ TYPED_TEST(groupby_min_scan_test, basic) result_wrapper expect_vals({5, 5, 1, 6, 6, 0, 0, 7, 2, 2}); // clang-format on - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -68,7 +68,7 @@ TYPED_TEST(groupby_min_scan_test, empty_cols) key_wrapper expect_keys{}; result_wrapper expect_vals{}; - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -85,7 +85,7 @@ TYPED_TEST(groupby_min_scan_test, zero_valid_keys) result_wrapper expect_vals{}; // clang-format on - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -102,7 +102,7 @@ TYPED_TEST(groupby_min_scan_test, zero_valid_values) result_wrapper expect_vals({-1, -1, -1}, all_nulls()); // clang-format on - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -122,7 +122,7 @@ TYPED_TEST(groupby_min_scan_test, null_keys_and_values) { 0, 1, 1, 1, 1, 0, 1, 1, 1, 0}); // clang-format on - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -137,7 +137,7 @@ TEST_F(groupby_min_scan_string_test, basic) key_wrapper expect_keys{1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; strings_column_wrapper expect_vals; - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)), "Unsupported groupby scan type-agg combination"); } @@ -167,7 +167,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortMinScanDecimalAsValue) auto const expect_vals_min = fp_wrapper{{5, 5, 1, 6, 6, 0, 0, 7, 2, 2}, scale}; // clang-format on - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals_min, std::move(agg)); } } diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp index 1544e867595..59e9d540709 100644 --- a/cpp/tests/groupby/min_tests.cpp +++ b/cpp/tests/groupby/min_tests.cpp @@ -46,10 +46,10 @@ TYPED_TEST(groupby_min_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; fixed_width_column_wrapper expect_vals({0, 1, 2}); - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_min_aggregation(); + auto agg2 = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -64,10 +64,10 @@ TYPED_TEST(groupby_min_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_min_aggregation(); + auto agg2 = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -82,10 +82,10 @@ TYPED_TEST(groupby_min_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_min_aggregation(); + auto agg2 = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -100,10 +100,10 @@ TYPED_TEST(groupby_min_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_min_aggregation(); + auto agg2 = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -122,10 +122,10 @@ TYPED_TEST(groupby_min_test, null_keys_and_values) // { 3, 6, 1, 4, 9, 2, 8, -} fixed_width_column_wrapper expect_vals({3, 1, 2, 0}, {1, 1, 1, 0}); - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_min_aggregation(); + auto agg2 = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -140,10 +140,10 @@ TEST_F(groupby_min_string_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; strings_column_wrapper expect_vals({"aaa", "bat", "$1"}); - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_min_aggregation(); + auto agg2 = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -155,10 +155,10 @@ TEST_F(groupby_min_string_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; strings_column_wrapper expect_vals({""}, all_nulls()); - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_min_aggregation(); + auto agg2 = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -187,7 +187,7 @@ TEST_F(groupby_min_string_test, min_sorted_strings) // fixed_width_column_wrapper expect_argmin( // {6, 10, 14, 18, 22, 26, 30, 34, 38, 42, -1}, // {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}); - auto agg = cudf::make_min_aggregation(); + auto agg = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, @@ -214,12 +214,16 @@ TEST_F(groupby_dictionary_min_test, basic) auto expect_vals = cudf::dictionary::set_keys(expect_vals_w, vals.keys()); - test_single_agg(keys, vals, expect_keys, expect_vals->view(), cudf::make_min_aggregation()); test_single_agg(keys, vals, expect_keys, expect_vals->view(), - cudf::make_min_aggregation(), + cudf::make_min_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals->view(), + cudf::make_min_aggregation(), force_use_sort_impl::YES); } @@ -246,7 +250,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortMinDecimalAsValue) auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; auto const expect_vals_min = fp_wrapper{{0, 1, 2}, scale}; - auto agg2 = cudf::make_min_aggregation(); + auto agg2 = cudf::make_min_aggregation(); test_single_agg( keys, vals, expect_keys, expect_vals_min, std::move(agg2), force_use_sort_impl::YES); } @@ -270,7 +274,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupByHashMinDecimalAsValue) auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; auto const expect_vals_min = fp_wrapper{{0, 1, 2}, scale}; - auto agg6 = cudf::make_min_aggregation(); + auto agg6 = cudf::make_min_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals_min, std::move(agg6)); } } diff --git a/cpp/tests/groupby/nth_element_tests.cpp b/cpp/tests/groupby/nth_element_tests.cpp index d5029147906..22f1e14815f 100644 --- a/cpp/tests/groupby/nth_element_tests.cpp +++ b/cpp/tests/groupby/nth_element_tests.cpp @@ -50,15 +50,15 @@ TYPED_TEST(groupby_nth_element_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; //groupby.first() - auto agg = cudf::make_nth_element_aggregation(0); + auto agg = cudf::make_nth_element_aggregation(0); fixed_width_column_wrapper expect_vals0({0, 1, 2}); test_single_agg(keys, vals, expect_keys, expect_vals0, std::move(agg)); - agg = cudf::make_nth_element_aggregation(1); + agg = cudf::make_nth_element_aggregation(1); fixed_width_column_wrapper expect_vals1({3, 4, 7}); test_single_agg(keys, vals, expect_keys, expect_vals1, std::move(agg)); - agg = cudf::make_nth_element_aggregation(2); + agg = cudf::make_nth_element_aggregation(2); fixed_width_column_wrapper expect_vals2({6, 5, 8}); test_single_agg(keys, vals, expect_keys, expect_vals2, std::move(agg)); } @@ -75,7 +75,7 @@ TYPED_TEST(groupby_nth_element_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_nth_element_aggregation(0); + auto agg = cudf::make_nth_element_aggregation(0); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -90,7 +90,7 @@ TYPED_TEST(groupby_nth_element_test, basic_out_of_bounds) fixed_width_column_wrapper expect_keys{1, 2, 3}; - auto agg = cudf::make_nth_element_aggregation(3); + auto agg = cudf::make_nth_element_aggregation(3); fixed_width_column_wrapper expect_vals({0, 9, 0}, {0, 1, 0}); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -109,15 +109,15 @@ TYPED_TEST(groupby_nth_element_test, negative) fixed_width_column_wrapper expect_keys{1, 2, 3}; //groupby.last() - auto agg = cudf::make_nth_element_aggregation(-1); + auto agg = cudf::make_nth_element_aggregation(-1); fixed_width_column_wrapper expect_vals0({6, 9, 8}); test_single_agg(keys, vals, expect_keys, expect_vals0, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-2); + agg = cudf::make_nth_element_aggregation(-2); fixed_width_column_wrapper expect_vals1({3, 5, 7}); test_single_agg(keys, vals, expect_keys, expect_vals1, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-3); + agg = cudf::make_nth_element_aggregation(-3); fixed_width_column_wrapper expect_vals2({0, 4, 2}); test_single_agg(keys, vals, expect_keys, expect_vals2, std::move(agg)); } @@ -133,7 +133,7 @@ TYPED_TEST(groupby_nth_element_test, negative_out_of_bounds) fixed_width_column_wrapper expect_keys{1, 2, 3}; - auto agg = cudf::make_nth_element_aggregation(-4); + auto agg = cudf::make_nth_element_aggregation(-4); fixed_width_column_wrapper expect_vals({0, 1, 0}, {0, 1, 0}); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -150,7 +150,7 @@ TYPED_TEST(groupby_nth_element_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_nth_element_aggregation(0); + auto agg = cudf::make_nth_element_aggregation(0); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -166,7 +166,7 @@ TYPED_TEST(groupby_nth_element_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({3}, all_nulls()); - auto agg = cudf::make_nth_element_aggregation(0); + auto agg = cudf::make_nth_element_aggregation(0); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -186,7 +186,7 @@ TYPED_TEST(groupby_nth_element_test, null_keys_and_values) //vals {-,3,6, 1,4,-,9, 2,8, -} fixed_width_column_wrapper expect_vals({-1, 1, 2, -1}, {0, 1, 1, 0}); - auto agg = cudf::make_nth_element_aggregation(0); + auto agg = cudf::make_nth_element_aggregation(0); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -206,7 +206,7 @@ TYPED_TEST(groupby_nth_element_test, null_keys_and_values_out_of_bounds) // value, null, out, out fixed_width_column_wrapper expect_vals({6, -1, -1, -1}, {1, 0, 0, 0}); - auto agg = cudf::make_nth_element_aggregation(2); + auto agg = cudf::make_nth_element_aggregation(2); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -237,18 +237,18 @@ TYPED_TEST(groupby_nth_element_test, exclude_nulls) fixed_width_column_wrapper expect_vals1({6, 4, 2, -1}, {1, 1, 1, 0}); fixed_width_column_wrapper expect_vals2({-1, 9, 8, -1}, {0, 1, 1, 0}); - auto agg = cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE); + auto agg = cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_nuls0, std::move(agg)); - agg = cudf::make_nth_element_aggregation(1, cudf::null_policy::INCLUDE); + agg = cudf::make_nth_element_aggregation(1, cudf::null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_nuls1, std::move(agg)); - agg = cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE); + agg = cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_nuls2, std::move(agg)); - agg = cudf::make_nth_element_aggregation(0, cudf::null_policy::EXCLUDE); + agg = cudf::make_nth_element_aggregation(0, cudf::null_policy::EXCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals0, std::move(agg)); - agg = cudf::make_nth_element_aggregation(1, cudf::null_policy::EXCLUDE); + agg = cudf::make_nth_element_aggregation(1, cudf::null_policy::EXCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals1, std::move(agg)); - agg = cudf::make_nth_element_aggregation(2, cudf::null_policy::EXCLUDE); + agg = cudf::make_nth_element_aggregation(2, cudf::null_policy::EXCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals2, std::move(agg)); } @@ -282,18 +282,18 @@ TYPED_TEST(groupby_nth_element_test, exclude_nulls_negative_index) fixed_width_column_wrapper expect_vals1({3, 4, 2, -1}, {1, 1, 1, 0}); fixed_width_column_wrapper expect_vals2({-1, 1, 2, -1}, {0, 1, 1, 0}); - auto agg = cudf::make_nth_element_aggregation(-1, cudf::null_policy::INCLUDE); + auto agg = cudf::make_nth_element_aggregation(-1, cudf::null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_nuls0, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-2, cudf::null_policy::INCLUDE); + agg = cudf::make_nth_element_aggregation(-2, cudf::null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_nuls1, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-3, cudf::null_policy::INCLUDE); + agg = cudf::make_nth_element_aggregation(-3, cudf::null_policy::INCLUDE); test_single_agg(keys, vals, expect_keys, expect_nuls2, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-1, cudf::null_policy::EXCLUDE); + agg = cudf::make_nth_element_aggregation(-1, cudf::null_policy::EXCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals0, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-2, cudf::null_policy::EXCLUDE); + agg = cudf::make_nth_element_aggregation(-2, cudf::null_policy::EXCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals1, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-3, cudf::null_policy::EXCLUDE); + agg = cudf::make_nth_element_aggregation(-3, cudf::null_policy::EXCLUDE); test_single_agg(keys, vals, expect_keys, expect_vals2, std::move(agg)); } @@ -312,38 +312,38 @@ TEST_F(groupby_nth_element_string_test, basic_string) fixed_width_column_wrapper expect_keys{1, 2, 3}; //groupby.first() - auto agg = cudf::make_nth_element_aggregation(0); + auto agg = cudf::make_nth_element_aggregation(0); strings_column_wrapper expect_vals0{"ABCD", "1", "2"}; test_single_agg(keys, vals, expect_keys, expect_vals0, std::move(agg)); - agg = cudf::make_nth_element_aggregation(1); + agg = cudf::make_nth_element_aggregation(1); strings_column_wrapper expect_vals1{"3", "4", "7"}; test_single_agg(keys, vals, expect_keys, expect_vals1, std::move(agg)); - agg = cudf::make_nth_element_aggregation(2); + agg = cudf::make_nth_element_aggregation(2); strings_column_wrapper expect_vals2{"6", "5", "8"}; test_single_agg(keys, vals, expect_keys, expect_vals2, std::move(agg)); //+ve out of bounds - agg = cudf::make_nth_element_aggregation(3); + agg = cudf::make_nth_element_aggregation(3); strings_column_wrapper expect_vals3{{"", "9", ""}, {0, 1, 0}}; test_single_agg(keys, vals, expect_keys, expect_vals3, std::move(agg)); //groupby.last() - agg = cudf::make_nth_element_aggregation(-1); + agg = cudf::make_nth_element_aggregation(-1); strings_column_wrapper expect_vals4{"6", "9", "8"}; test_single_agg(keys, vals, expect_keys, expect_vals4, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-2); + agg = cudf::make_nth_element_aggregation(-2); strings_column_wrapper expect_vals5{"3", "5", "7"}; test_single_agg(keys, vals, expect_keys, expect_vals5, std::move(agg)); - agg = cudf::make_nth_element_aggregation(-3); + agg = cudf::make_nth_element_aggregation(-3); strings_column_wrapper expect_vals6{"ABCD", "4", "2"}; test_single_agg(keys, vals, expect_keys, expect_vals6, std::move(agg)); //-ve out of bounds - agg = cudf::make_nth_element_aggregation(-4); + agg = cudf::make_nth_element_aggregation(-4); strings_column_wrapper expect_vals7{{"", "1", ""}, {0, 1, 0}}; test_single_agg(keys, vals, expect_keys, expect_vals7, std::move(agg)); } @@ -361,8 +361,11 @@ TEST_F(groupby_nth_element_string_test, dictionary) auto expect_vals = cudf::dictionary::set_keys(expect_vals_w, vals.keys()); - test_single_agg( - keys, vals, expect_keys, expect_vals->view(), cudf::make_nth_element_aggregation(2)); + test_single_agg(keys, + vals, + expect_keys, + expect_vals->view(), + cudf::make_nth_element_aggregation(2)); } template @@ -384,8 +387,11 @@ TYPED_TEST(groupby_nth_element_lists_test, Basics) auto expected_keys = fixed_width_column_wrapper{1, 2, 3}; auto expected_values = lists{{1, 2}, {5, 6, 7}, {9, 10}}; - test_single_agg( - keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(0)); + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(0)); } TYPED_TEST(groupby_nth_element_lists_test, EmptyInput) @@ -401,8 +407,11 @@ TYPED_TEST(groupby_nth_element_lists_test, EmptyInput) auto expected_keys = fixed_width_column_wrapper{}; auto expected_values = lists{}; - test_single_agg( - keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(2)); + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(2)); } } // namespace test diff --git a/cpp/tests/groupby/nunique_tests.cpp b/cpp/tests/groupby/nunique_tests.cpp index 089ca8805d4..88a6a1c903b 100644 --- a/cpp/tests/groupby/nunique_tests.cpp +++ b/cpp/tests/groupby/nunique_tests.cpp @@ -49,7 +49,7 @@ TYPED_TEST(groupby_nunique_test, basic) fixed_width_column_wrapper expect_bool_vals{2, 1, 1}; // clang-format on - auto agg = cudf::make_nunique_aggregation(); + auto agg = cudf::make_nunique_aggregation(); if (std::is_same()) test_single_agg(keys, vals, expect_keys, expect_bool_vals, std::move(agg)); else @@ -67,7 +67,7 @@ TYPED_TEST(groupby_nunique_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_nunique_aggregation(); + auto agg = cudf::make_nunique_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -83,7 +83,7 @@ TYPED_TEST(groupby_nunique_test, basic_duplicates) fixed_width_column_wrapper expect_vals{2, 4, 1}; fixed_width_column_wrapper expect_bool_vals{2, 1, 1}; - auto agg = cudf::make_nunique_aggregation(); + auto agg = cudf::make_nunique_aggregation(); if (std::is_same()) test_single_agg(keys, vals, expect_keys, expect_bool_vals, std::move(agg)); else @@ -101,7 +101,7 @@ TYPED_TEST(groupby_nunique_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_nunique_aggregation(); + auto agg = cudf::make_nunique_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -116,7 +116,7 @@ TYPED_TEST(groupby_nunique_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals{0}; - auto agg = cudf::make_nunique_aggregation(); + auto agg = cudf::make_nunique_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -136,7 +136,7 @@ TYPED_TEST(groupby_nunique_test, null_keys_and_values) fixed_width_column_wrapper expect_vals{2, 3, 2, 0}; fixed_width_column_wrapper expect_bool_vals{1, 1, 1, 0}; - auto agg = cudf::make_nunique_aggregation(); + auto agg = cudf::make_nunique_aggregation(); if (std::is_same()) test_single_agg(keys, vals, expect_keys, expect_bool_vals, std::move(agg)); else @@ -160,7 +160,7 @@ TYPED_TEST(groupby_nunique_test, null_keys_and_values_with_duplicates) fixed_width_column_wrapper expect_vals{2, 3, 2, 0}; fixed_width_column_wrapper expect_bool_vals{1, 1, 1, 0}; - auto agg = cudf::make_nunique_aggregation(); + auto agg = cudf::make_nunique_aggregation(); if (std::is_same()) test_single_agg(keys, vals, expect_keys, expect_bool_vals, std::move(agg)); else @@ -184,7 +184,7 @@ TYPED_TEST(groupby_nunique_test, include_nulls) fixed_width_column_wrapper expect_vals{3, 4, 2, 1}; fixed_width_column_wrapper expect_bool_vals{2, 2, 1, 1}; - auto agg = cudf::make_nunique_aggregation(null_policy::INCLUDE); + auto agg = cudf::make_nunique_aggregation(null_policy::INCLUDE); if (std::is_same()) test_single_agg(keys, vals, expect_keys, expect_bool_vals, std::move(agg)); else @@ -213,8 +213,11 @@ TYPED_TEST(groupby_nunique_test, dictionary) cudf::column_view expect_vals = (std::is_same()) ? cudf::column_view{expect_bool_vals} : cudf::column_view{expect_fixed_vals}; - test_single_agg( - keys, vals, expect_keys, expect_vals, cudf::make_nunique_aggregation(null_policy::INCLUDE)); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_nunique_aggregation(null_policy::INCLUDE)); } } // namespace test diff --git a/cpp/tests/groupby/product_tests.cpp b/cpp/tests/groupby/product_tests.cpp index eaa2cc07ff8..047bf856493 100644 --- a/cpp/tests/groupby/product_tests.cpp +++ b/cpp/tests/groupby/product_tests.cpp @@ -51,7 +51,11 @@ TYPED_TEST(groupby_product_test, basic) fixed_width_column_wrapper expect_vals({ 0., 180., 112. }, no_nulls()); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_product_aggregation()); } TYPED_TEST(groupby_product_test, empty_cols) @@ -65,7 +69,11 @@ TYPED_TEST(groupby_product_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_product_aggregation()); } TYPED_TEST(groupby_product_test, zero_valid_keys) @@ -79,7 +87,11 @@ TYPED_TEST(groupby_product_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_product_aggregation()); } TYPED_TEST(groupby_product_test, zero_valid_values) @@ -93,7 +105,11 @@ TYPED_TEST(groupby_product_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_product_aggregation()); } TYPED_TEST(groupby_product_test, null_keys_and_values) @@ -114,7 +130,11 @@ TYPED_TEST(groupby_product_test, null_keys_and_values) { 1, 1, 1, 0}); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_product_aggregation()); } TYPED_TEST(groupby_product_test, dictionary) @@ -132,7 +152,11 @@ TYPED_TEST(groupby_product_test, dictionary) fixed_width_column_wrapper expect_vals({ 0., 180., 112. }, no_nulls()); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_product_aggregation()); } TYPED_TEST(groupby_product_test, dictionary_with_nulls) @@ -151,7 +175,11 @@ TYPED_TEST(groupby_product_test, dictionary_with_nulls) fixed_width_column_wrapper expect_vals({ 0., 180., 56. }, no_nulls()); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_product_aggregation()); } } // namespace test diff --git a/cpp/tests/groupby/quantile_tests.cpp b/cpp/tests/groupby/quantile_tests.cpp index a82dae9edcb..43b065ee4d3 100644 --- a/cpp/tests/groupby/quantile_tests.cpp +++ b/cpp/tests/groupby/quantile_tests.cpp @@ -51,7 +51,7 @@ TYPED_TEST(groupby_quantile_test, basic) fixed_width_column_wrapper expect_vals({3., 4.5, 7.}, no_nulls()); // clang-format on - auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); + auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -66,7 +66,7 @@ TYPED_TEST(groupby_quantile_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); + auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -81,7 +81,7 @@ TYPED_TEST(groupby_quantile_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); + auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -96,7 +96,7 @@ TYPED_TEST(groupby_quantile_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); + auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -115,7 +115,7 @@ TYPED_TEST(groupby_quantile_test, null_keys_and_values) // { 3, 6, 1, 4, 9, 2, 8, -} fixed_width_column_wrapper expect_vals({4.5, 4., 5., 0.}, {1, 1, 1, 0}); - auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); + auto agg = cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -134,7 +134,8 @@ TYPED_TEST(groupby_quantile_test, multiple_quantile) fixed_width_column_wrapper expect_vals({1.5, 4.5, 3.25, 6., 4.5, 7.5}, no_nulls()); // clang-format on - auto agg = cudf::make_quantile_aggregation({0.25, 0.75}, interpolation::LINEAR); + auto agg = + cudf::make_quantile_aggregation({0.25, 0.75}, interpolation::LINEAR); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES); } @@ -152,27 +153,27 @@ TYPED_TEST(groupby_quantile_test, interpolation_types) // {0, 3, 6, 1, 4, 5, 9, 2, 7} fixed_width_column_wrapper expect_vals1({2.4, 4.2, 4.}, no_nulls()); - auto agg1 = cudf::make_quantile_aggregation({0.4}, interpolation::LINEAR); + auto agg1 = cudf::make_quantile_aggregation({0.4}, interpolation::LINEAR); test_single_agg(keys, vals, expect_keys, expect_vals1, std::move(agg1)); // {0, 3, 6, 1, 4, 5, 9, 2, 7} fixed_width_column_wrapper expect_vals2({3, 4, 2}, no_nulls()); - auto agg2 = cudf::make_quantile_aggregation({0.4}, interpolation::NEAREST); + auto agg2 = cudf::make_quantile_aggregation({0.4}, interpolation::NEAREST); test_single_agg(keys, vals, expect_keys, expect_vals2, std::move(agg2)); // {0, 3, 6, 1, 4, 5, 9, 2, 7} fixed_width_column_wrapper expect_vals3({0, 4, 2}, no_nulls()); - auto agg3 = cudf::make_quantile_aggregation({0.4}, interpolation::LOWER); + auto agg3 = cudf::make_quantile_aggregation({0.4}, interpolation::LOWER); test_single_agg(keys, vals, expect_keys, expect_vals3, std::move(agg3)); // {0, 3, 6, 1, 4, 5, 9, 2, 7} fixed_width_column_wrapper expect_vals4({3, 5, 7}, no_nulls()); - auto agg4 = cudf::make_quantile_aggregation({0.4}, interpolation::HIGHER); + auto agg4 = cudf::make_quantile_aggregation({0.4}, interpolation::HIGHER); test_single_agg(keys, vals, expect_keys, expect_vals4, std::move(agg4)); // {0, 3, 6, 1, 4, 5, 9, 2, 7} fixed_width_column_wrapper expect_vals5({1.5, 4.5, 4.5}, no_nulls()); - auto agg5 = cudf::make_quantile_aggregation({0.4}, interpolation::MIDPOINT); + auto agg5 = cudf::make_quantile_aggregation({0.4}, interpolation::MIDPOINT); test_single_agg(keys, vals, expect_keys, expect_vals5, std::move(agg5)); // clang-format on } @@ -192,11 +193,12 @@ TYPED_TEST(groupby_quantile_test, dictionary) fixed_width_column_wrapper expect_vals({3., 4.5, 7.}, no_nulls()); // clang-format on - test_single_agg(keys, - vals, - expect_keys, - expect_vals, - cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR)); + test_single_agg( + keys, + vals, + expect_keys, + expect_vals, + cudf::make_quantile_aggregation({0.5}, interpolation::LINEAR)); } } // namespace test diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp index 51c4c1e63c2..37e75e2e906 100644 --- a/cpp/tests/groupby/rank_scan_tests.cpp +++ b/cpp/tests/groupby/rank_scan_tests.cpp @@ -39,11 +39,16 @@ inline void test_pair_rank_scans(column_view const& keys, order, keys, expected_dense, - make_dense_rank_aggregation(), + make_dense_rank_aggregation(), + null_policy::INCLUDE, + sorted::YES); + test_single_scan(keys, + order, + keys, + expected_rank, + make_rank_aggregation(), null_policy::INCLUDE, sorted::YES); - test_single_scan( - keys, order, keys, expected_rank, make_rank_aggregation(), null_policy::INCLUDE, sorted::YES); } struct groupby_rank_scan_test : public BaseFixture { @@ -201,11 +206,11 @@ TYPED_TEST(typed_groupby_rank_scan_test, mixedStructs) auto expected_rank_vals = fixed_width_column_wrapper{1, 1, 3, 3, 5, 6, 1, 1, 3, 1, 1, 3}; - std::vector requests; - requests.emplace_back(groupby::aggregation_request()); + std::vector requests; + requests.emplace_back(groupby::scan_request()); requests[0].values = *struct_col; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); + requests[0].aggregations.push_back(make_dense_rank_aggregation()); + requests[0].aggregations.push_back(make_rank_aggregation()); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto result = gb_obj.scan(requests); @@ -377,34 +382,61 @@ TEST_F(groupby_rank_scan_test_failures, test_exception_triggers) fixed_width_column_wrapper col{3, 3, 1}; CUDF_EXPECT_THROW_MESSAGE( - test_single_scan( - keys, col, keys, col, make_dense_rank_aggregation(), null_policy::INCLUDE, sorted::NO), + test_single_scan(keys, + col, + keys, + col, + make_dense_rank_aggregation(), + null_policy::INCLUDE, + sorted::NO), "Dense rank aggregate in groupby scan requires the keys to be presorted"); - CUDF_EXPECT_THROW_MESSAGE( - test_single_scan( - keys, col, keys, col, make_rank_aggregation(), null_policy::INCLUDE, sorted::NO), - "Rank aggregate in groupby scan requires the keys to be presorted"); + CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, + col, + keys, + col, + make_rank_aggregation(), + null_policy::INCLUDE, + sorted::NO), + "Rank aggregate in groupby scan requires the keys to be presorted"); CUDF_EXPECT_THROW_MESSAGE( - test_single_scan( - keys, col, keys, col, make_dense_rank_aggregation(), null_policy::EXCLUDE, sorted::YES), + test_single_scan(keys, + col, + keys, + col, + make_dense_rank_aggregation(), + null_policy::EXCLUDE, + sorted::YES), "Dense rank aggregate in groupby scan requires the keys to be presorted"); - CUDF_EXPECT_THROW_MESSAGE( - test_single_scan( - keys, col, keys, col, make_rank_aggregation(), null_policy::EXCLUDE, sorted::YES), - "Rank aggregate in groupby scan requires the keys to be presorted"); + CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, + col, + keys, + col, + make_rank_aggregation(), + null_policy::EXCLUDE, + sorted::YES), + "Rank aggregate in groupby scan requires the keys to be presorted"); CUDF_EXPECT_THROW_MESSAGE( - test_single_scan( - keys, col, keys, col, make_dense_rank_aggregation(), null_policy::EXCLUDE, sorted::NO), + test_single_scan(keys, + col, + keys, + col, + make_dense_rank_aggregation(), + null_policy::EXCLUDE, + sorted::NO), "Dense rank aggregate in groupby scan requires the keys to be presorted"); - CUDF_EXPECT_THROW_MESSAGE( - test_single_scan( - keys, col, keys, col, make_rank_aggregation(), null_policy::EXCLUDE, sorted::NO), - "Rank aggregate in groupby scan requires the keys to be presorted"); + CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, + col, + keys, + col, + make_rank_aggregation(), + null_policy::EXCLUDE, + sorted::NO), + "Rank aggregate in groupby scan requires the keys to be presorted"); } } // namespace test diff --git a/cpp/tests/groupby/std_tests.cpp b/cpp/tests/groupby/std_tests.cpp index c771971ad9a..e2edabf3e8f 100644 --- a/cpp/tests/groupby/std_tests.cpp +++ b/cpp/tests/groupby/std_tests.cpp @@ -53,7 +53,7 @@ TYPED_TEST(groupby_std_test, basic) fixed_width_column_wrapper expect_vals({3., sqrt(131./12), sqrt(31./3)}, no_nulls()); // clang-format on - auto agg = cudf::make_std_aggregation(); + auto agg = cudf::make_std_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -68,7 +68,7 @@ TYPED_TEST(groupby_std_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_std_aggregation(); + auto agg = cudf::make_std_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -83,7 +83,7 @@ TYPED_TEST(groupby_std_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_std_aggregation(); + auto agg = cudf::make_std_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -98,7 +98,7 @@ TYPED_TEST(groupby_std_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_std_aggregation(); + auto agg = cudf::make_std_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -118,7 +118,7 @@ TYPED_TEST(groupby_std_test, null_keys_and_values) fixed_width_column_wrapper expect_vals({3 / sqrt(2), 7 / sqrt(3), 3 * sqrt(2), 0.}, {1, 1, 1, 0}); - auto agg = cudf::make_std_aggregation(); + auto agg = cudf::make_std_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -137,7 +137,7 @@ TYPED_TEST(groupby_std_test, ddof_non_default) // { 3, 6, 1, 4, 9, 2, 8, 3} fixed_width_column_wrapper expect_vals({0., 7 * sqrt(2. / 3), 0., 0.}, {0, 1, 0, 0}); - auto agg = cudf::make_std_aggregation(2); + auto agg = cudf::make_std_aggregation(2); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -156,7 +156,8 @@ TYPED_TEST(groupby_std_test, dictionary) fixed_width_column_wrapper expect_vals({3., sqrt(131./12), sqrt(31./3)}, no_nulls()); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_std_aggregation()); + test_single_agg( + keys, vals, expect_keys, expect_vals, cudf::make_std_aggregation()); } } // namespace test diff --git a/cpp/tests/groupby/sum_of_squares_tests.cpp b/cpp/tests/groupby/sum_of_squares_tests.cpp index 12b044c7382..0dab2c6483e 100644 --- a/cpp/tests/groupby/sum_of_squares_tests.cpp +++ b/cpp/tests/groupby/sum_of_squares_tests.cpp @@ -49,7 +49,7 @@ TYPED_TEST(groupby_sum_of_squares_test, basic) // { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8} fixed_width_column_wrapper expect_vals({45., 123., 117.}, no_nulls()); - auto agg = cudf::make_sum_of_squares_aggregation(); + auto agg = cudf::make_sum_of_squares_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -64,7 +64,7 @@ TYPED_TEST(groupby_sum_of_squares_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_sum_of_squares_aggregation(); + auto agg = cudf::make_sum_of_squares_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -79,7 +79,7 @@ TYPED_TEST(groupby_sum_of_squares_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_sum_of_squares_aggregation(); + auto agg = cudf::make_sum_of_squares_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -94,7 +94,7 @@ TYPED_TEST(groupby_sum_of_squares_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_sum_of_squares_aggregation(); + auto agg = cudf::make_sum_of_squares_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -113,7 +113,7 @@ TYPED_TEST(groupby_sum_of_squares_test, null_keys_and_values) // { 3, 6, 1, 4, 9, 2, 8, 3} fixed_width_column_wrapper expect_vals({45., 98., 68., 9.}, {1, 1, 1, 0}); - auto agg = cudf::make_sum_of_squares_aggregation(); + auto agg = cudf::make_sum_of_squares_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -132,7 +132,11 @@ TYPED_TEST(groupby_sum_of_squares_test, dictionary) fixed_width_column_wrapper expect_vals({45., 123., 117. }, no_nulls()); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_sum_of_squares_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_sum_of_squares_aggregation()); } } // namespace test diff --git a/cpp/tests/groupby/sum_scan_tests.cpp b/cpp/tests/groupby/sum_scan_tests.cpp index 2f1928747ae..86fc0238597 100644 --- a/cpp/tests/groupby/sum_scan_tests.cpp +++ b/cpp/tests/groupby/sum_scan_tests.cpp @@ -57,7 +57,7 @@ TYPED_TEST(groupby_sum_scan_test, basic) // {0, 3, 6, 1, 4, 5, 9, 2, 7, 8} result_wrapper expect_vals{0, 3, 9, 1, 5, 10, 19, 2, 9, 17}; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -74,7 +74,7 @@ TYPED_TEST(groupby_sum_scan_test, empty_cols) result_wrapper expect_vals{}; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -91,7 +91,7 @@ TYPED_TEST(groupby_sum_scan_test, zero_valid_keys) result_wrapper expect_vals{}; // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -108,7 +108,7 @@ TYPED_TEST(groupby_sum_scan_test, zero_valid_values) result_wrapper expect_vals({3, 4, 5}, all_nulls()); // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -128,7 +128,7 @@ TYPED_TEST(groupby_sum_scan_test, null_keys_and_values) { 0, 1, 1, 1, 1, 0, 1, 1, 1, 0}); // clang-format on - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -156,7 +156,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortSumScanDecimalAsValue) auto const expect_vals_sum = out_fp_wrapper{{0, 3, 9, 1, 5, 10, 19, 2, 9, 17}, scale}; // clang-format on - auto agg2 = cudf::make_sum_aggregation(); + auto agg2 = cudf::make_sum_aggregation(); test_single_scan(keys, vals, expect_keys, expect_vals_sum, std::move(agg2)); } } diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp index 458937ff2e4..5c935ee5a9d 100644 --- a/cpp/tests/groupby/sum_tests.cpp +++ b/cpp/tests/groupby/sum_tests.cpp @@ -49,10 +49,10 @@ TYPED_TEST(groupby_sum_test, basic) fixed_width_column_wrapper expect_keys{1, 2, 3}; fixed_width_column_wrapper expect_vals{9, 19, 17}; - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_sum_aggregation(); + auto agg2 = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -67,10 +67,10 @@ TYPED_TEST(groupby_sum_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_sum_aggregation(); + auto agg2 = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -85,10 +85,10 @@ TYPED_TEST(groupby_sum_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_sum_aggregation(); + auto agg2 = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -103,10 +103,10 @@ TYPED_TEST(groupby_sum_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_sum_aggregation(); + auto agg2 = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } @@ -125,10 +125,10 @@ TYPED_TEST(groupby_sum_test, null_keys_and_values) // { 3, 6, 1, 4, 9, 2, 8, -} fixed_width_column_wrapper expect_vals({9, 14, 10, 0}, {1, 1, 1, 0}); - auto agg = cudf::make_sum_aggregation(); + auto agg = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); - auto agg2 = cudf::make_sum_aggregation(); + auto agg2 = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg2), force_use_sort_impl::YES); } // clang-format on @@ -146,9 +146,14 @@ TYPED_TEST(groupby_sum_test, dictionary) fixed_width_column_wrapper expect_vals{ 9, 19, 17}; // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_sum_aggregation()); test_single_agg( - keys, vals, expect_keys, expect_vals, cudf::make_sum_aggregation(), force_use_sort_impl::YES); + keys, vals, expect_keys, expect_vals, cudf::make_sum_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_sum_aggregation(), + force_use_sort_impl::YES); } template @@ -176,11 +181,11 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortSumDecimalAsValue) auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; auto const expect_vals_sum = fp64_wrapper{{9, 19, 17}, scale}; - auto agg1 = cudf::make_sum_aggregation(); + auto agg1 = cudf::make_sum_aggregation(); test_single_agg( keys, vals, expect_keys, expect_vals_sum, std::move(agg1), force_use_sort_impl::YES); - auto agg4 = cudf::make_product_aggregation(); + auto agg4 = cudf::make_product_aggregation(); EXPECT_THROW( test_single_agg(keys, vals, expect_keys, {}, std::move(agg4), force_use_sort_impl::YES), cudf::logic_error); @@ -206,10 +211,10 @@ TYPED_TEST(FixedPointTestBothReps, GroupByHashSumDecimalAsValue) auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; auto const expect_vals_sum = fp64_wrapper{{9, 19, 17}, scale}; - auto agg5 = cudf::make_sum_aggregation(); + auto agg5 = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals_sum, std::move(agg5)); - auto agg8 = cudf::make_product_aggregation(); + auto agg8 = cudf::make_product_aggregation(); EXPECT_THROW(test_single_agg(keys, vals, expect_keys, {}, std::move(agg8)), cudf::logic_error); } } diff --git a/cpp/tests/groupby/var_tests.cpp b/cpp/tests/groupby/var_tests.cpp index c3fc781801d..68ccf791960 100644 --- a/cpp/tests/groupby/var_tests.cpp +++ b/cpp/tests/groupby/var_tests.cpp @@ -53,7 +53,7 @@ TYPED_TEST(groupby_var_test, basic) fixed_width_column_wrapper expect_vals({9., 131. / 12, 31. / 3}, no_nulls()); // clang-format on - auto agg = cudf::make_variance_aggregation(); + auto agg = cudf::make_variance_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -68,7 +68,7 @@ TYPED_TEST(groupby_var_test, empty_cols) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_variance_aggregation(); + auto agg = cudf::make_variance_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -83,7 +83,7 @@ TYPED_TEST(groupby_var_test, zero_valid_keys) fixed_width_column_wrapper expect_keys{}; fixed_width_column_wrapper expect_vals{}; - auto agg = cudf::make_variance_aggregation(); + auto agg = cudf::make_variance_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -98,7 +98,7 @@ TYPED_TEST(groupby_var_test, zero_valid_values) fixed_width_column_wrapper expect_keys{1}; fixed_width_column_wrapper expect_vals({0}, all_nulls()); - auto agg = cudf::make_variance_aggregation(); + auto agg = cudf::make_variance_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -119,7 +119,7 @@ TYPED_TEST(groupby_var_test, null_keys_and_values) fixed_width_column_wrapper expect_vals({4.5, 49. / 3, 18., 0.}, {1, 1, 1, 0}); // clang-format on - auto agg = cudf::make_variance_aggregation(); + auto agg = cudf::make_variance_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -141,7 +141,7 @@ TYPED_TEST(groupby_var_test, ddof_non_default) {0, 1, 0, 0}); // clang-format on - auto agg = cudf::make_variance_aggregation(2); + auto agg = cudf::make_variance_aggregation(2); test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } @@ -160,7 +160,11 @@ TYPED_TEST(groupby_var_test, dictionary) fixed_width_column_wrapper expect_vals({9., 131./12, 31./3 }, no_nulls()); // clang-format on - test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_variance_aggregation()); + test_single_agg(keys, + vals, + expect_keys, + expect_vals, + cudf::make_variance_aggregation()); } } // namespace test diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index f9cd81e7e97..595bc1df151 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -2345,11 +2345,19 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_groupByAggregate( for (int i = 0; i < n_values.size(); i++) { cudf::groupby::aggregation_request req; int col_index = n_values[i]; + + cudf::groupby_aggregation *agg = + dynamic_cast(n_agg_instances[i]); + JNI_ARG_CHECK(env, agg != nullptr, "aggregation is not an instance of groupby_aggregation", + nullptr); + std::unique_ptr cloned( + dynamic_cast(agg->clone().release())); + if (col_index == previous_index) { - requests.back().aggregations.push_back(n_agg_instances[i]->clone()); + requests.back().aggregations.push_back(std::move(cloned)); } else { req.values = n_input_table->column(col_index); - req.aggregations.push_back(n_agg_instances[i]->clone()); + req.aggregations.push_back(std::move(cloned)); requests.push_back(std::move(req)); } previous_index = col_index; @@ -2401,17 +2409,25 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_groupByScan( // Aggregates are passed in already grouped by column, so we just need to fill it in // as we go. - std::vector requests; + std::vector requests; int previous_index = -1; for (int i = 0; i < n_values.size(); i++) { - cudf::groupby::aggregation_request req; + cudf::groupby::scan_request req; int col_index = n_values[i]; + + cudf::groupby_scan_aggregation *agg = + dynamic_cast(n_agg_instances[i]); + JNI_ARG_CHECK(env, agg != nullptr, + "aggregation is not an instance of groupby_scan_aggregation", nullptr); + std::unique_ptr cloned( + dynamic_cast(agg->clone().release())); + if (col_index == previous_index) { - requests.back().aggregations.push_back(n_agg_instances[i]->clone()); + requests.back().aggregations.push_back(std::move(cloned)); } else { req.values = n_input_table->column(col_index); - req.aggregations.push_back(n_agg_instances[i]->clone()); + req.aggregations.push_back(std::move(cloned)); requests.push_back(std::move(req)); } previous_index = col_index; diff --git a/python/cudf/cudf/_lib/aggregation.pxd b/python/cudf/cudf/_lib/aggregation.pxd index f608dab3fe1..84bcaed1b36 100644 --- a/python/cudf/cudf/_lib/aggregation.pxd +++ b/python/cudf/cudf/_lib/aggregation.pxd @@ -2,7 +2,12 @@ from libcpp.memory cimport unique_ptr -from cudf._lib.cpp.aggregation cimport aggregation, rolling_aggregation +from cudf._lib.cpp.aggregation cimport ( + aggregation, + groupby_aggregation, + groupby_scan_aggregation, + rolling_aggregation, +) cdef class Aggregation: @@ -11,5 +16,13 @@ cdef class Aggregation: cdef class RollingAggregation: cdef unique_ptr[rolling_aggregation] c_obj +cdef class GroupbyAggregation: + cdef unique_ptr[groupby_aggregation] c_obj + +cdef class GroupbyScanAggregation: + cdef unique_ptr[groupby_scan_aggregation] c_obj + cdef Aggregation make_aggregation(op, kwargs=*) cdef RollingAggregation make_rolling_aggregation(op, kwargs=*) +cdef GroupbyAggregation make_groupby_aggregation(op, kwargs=*) +cdef GroupbyScanAggregation make_groupby_scan_aggregation(op, kwargs=*) diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx index 211d7c996cb..da407cdbfa8 100644 --- a/python/cudf/cudf/_lib/aggregation.pyx +++ b/python/cudf/cudf/_lib/aggregation.pyx @@ -461,6 +461,299 @@ cdef class RollingAggregation: )) return agg +cdef class GroupbyAggregation: + """A Cython wrapper for groupby aggregations. + + **This class should never be instantiated using a standard constructor, + only using one of its many factories.** These factories handle mapping + different cudf operations to their libcudf analogs, e.g. + `cudf.DataFrame.idxmin` -> `libcudf.argmin`. Additionally, they perform + any additional configuration needed to translate Python arguments into + their corresponding C++ types (for instance, C++ enumerations used for + flag arguments). The factory approach is necessary to support operations + like `df.agg(lambda x: x.sum())`; such functions are called with this + class as an argument to generation the desired aggregation. + """ + @property + def kind(self): + return AggregationKind(self.c_obj.get()[0].kind).name + + @classmethod + def sum(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation.make_sum_aggregation[groupby_aggregation]()) + return agg + + @classmethod + def min(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation.make_min_aggregation[groupby_aggregation]()) + return agg + + @classmethod + def max(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation.make_max_aggregation[groupby_aggregation]()) + return agg + + @classmethod + def idxmin(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation.make_argmin_aggregation[ + groupby_aggregation]()) + return agg + + @classmethod + def idxmax(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation.make_argmax_aggregation[ + groupby_aggregation]()) + return agg + + @classmethod + def mean(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation.make_mean_aggregation[groupby_aggregation]()) + return agg + + @classmethod + def count(cls, dropna=True): + cdef libcudf_types.null_policy c_null_handling + if dropna: + c_null_handling = libcudf_types.null_policy.EXCLUDE + else: + c_null_handling = libcudf_types.null_policy.INCLUDE + + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation.make_count_aggregation[groupby_aggregation]( + c_null_handling + )) + return agg + + @classmethod + def size(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation.make_count_aggregation[groupby_aggregation]( + ( + NullHandling.INCLUDE) + )) + return agg + + @classmethod + def collect(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_collect_list_aggregation[groupby_aggregation]()) + return agg + + @classmethod + def nunique(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_nunique_aggregation[groupby_aggregation]()) + return agg + + @classmethod + def nth(cls, libcudf_types.size_type size): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_nth_element_aggregation[groupby_aggregation](size)) + return agg + + @classmethod + def product(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_product_aggregation[groupby_aggregation]()) + return agg + prod = product + + @classmethod + def sum_of_squares(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_sum_of_squares_aggregation[groupby_aggregation]() + ) + return agg + + @classmethod + def var(cls, ddof=1): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_variance_aggregation[groupby_aggregation](ddof)) + return agg + + @classmethod + def std(cls, ddof=1): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_std_aggregation[groupby_aggregation](ddof)) + return agg + + @classmethod + def median(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_median_aggregation[groupby_aggregation]()) + return agg + + @classmethod + def quantile(cls, q=0.5, interpolation="linear"): + cdef GroupbyAggregation agg = cls() + + if not pd.api.types.is_list_like(q): + q = [q] + + cdef vector[double] c_q = q + cdef libcudf_types.interpolation c_interp = ( + ( + ( + Interpolation[interpolation.upper()] + ) + ) + ) + agg.c_obj = move( + libcudf_aggregation.make_quantile_aggregation[groupby_aggregation]( + c_q, c_interp) + ) + return agg + + @classmethod + def unique(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_collect_set_aggregation[groupby_aggregation]()) + return agg + + @classmethod + def first(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_nth_element_aggregation[groupby_aggregation]( + 0, + ( + NullHandling.EXCLUDE + ) + ) + ) + return agg + + @classmethod + def last(cls): + cdef GroupbyAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_nth_element_aggregation[groupby_aggregation]( + -1, + ( + NullHandling.EXCLUDE + ) + ) + ) + return agg + +cdef class GroupbyScanAggregation: + """A Cython wrapper for groupby scan aggregations. + + **This class should never be instantiated using a standard constructor, + only using one of its many factories.** These factories handle mapping + different cudf operations to their libcudf analogs, e.g. + `cudf.DataFrame.idxmin` -> `libcudf.argmin`. Additionally, they perform + any additional configuration needed to translate Python arguments into + their corresponding C++ types (for instance, C++ enumerations used for + flag arguments). The factory approach is necessary to support operations + like `df.agg(lambda x: x.sum())`; such functions are called with this + class as an argument to generation the desired aggregation. + """ + @property + def kind(self): + return AggregationKind(self.c_obj.get()[0].kind).name + + @classmethod + def sum(cls): + cdef GroupbyScanAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_sum_aggregation[groupby_scan_aggregation]()) + return agg + + @classmethod + def min(cls): + cdef GroupbyScanAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_min_aggregation[groupby_scan_aggregation]()) + return agg + + @classmethod + def max(cls): + cdef GroupbyScanAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_max_aggregation[groupby_scan_aggregation]()) + return agg + + @classmethod + def count(cls, dropna=True): + cdef libcudf_types.null_policy c_null_handling + if dropna: + c_null_handling = libcudf_types.null_policy.EXCLUDE + else: + c_null_handling = libcudf_types.null_policy.INCLUDE + + cdef GroupbyScanAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_count_aggregation[groupby_scan_aggregation](c_null_handling)) + return agg + + @classmethod + def size(cls): + cdef GroupbyScanAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_count_aggregation[groupby_scan_aggregation]( + ( + NullHandling.INCLUDE) + )) + return agg + + @classmethod + def cumcount(cls): + cdef GroupbyScanAggregation agg = cls() + agg.c_obj = move( + libcudf_aggregation. + make_count_aggregation[groupby_scan_aggregation]( + libcudf_types.null_policy.INCLUDE + )) + return agg + + # scan aggregations + # TODO: update this after adding per algorithm aggregation derived types + # https://github.com/rapidsai/cudf/issues/7106 + cumsum = sum + cummin = min + cummax = max + + cdef Aggregation make_aggregation(op, kwargs=None): r""" Parameters @@ -536,3 +829,79 @@ cdef RollingAggregation make_rolling_aggregation(op, kwargs=None): else: raise TypeError(f"Unknown aggregation {op}") return agg + +cdef GroupbyAggregation make_groupby_aggregation(op, kwargs=None): + r""" + Parameters + ---------- + op : str or callable + If callable, must meet one of the following requirements: + + * Is of the form lambda x: x.agg(*args, **kwargs), where + `agg` is the name of a supported aggregation. Used to + to specify aggregations that take arguments, e.g., + `lambda x: x.quantile(0.5)`. + * Is a user defined aggregation function that operates on + group values. In this case, the output dtype must be + specified in the `kwargs` dictionary. + \*\*kwargs : dict, optional + Any keyword arguments to be passed to the op. + + Returns + ------- + GroupbyAggregation + """ + if kwargs is None: + kwargs = {} + + cdef GroupbyAggregation agg + if isinstance(op, str): + agg = getattr(GroupbyAggregation, op)(**kwargs) + elif callable(op): + if op is list: + agg = GroupbyAggregation.collect() + elif "dtype" in kwargs: + agg = GroupbyAggregation.from_udf(op, **kwargs) + else: + agg = op(GroupbyAggregation) + else: + raise TypeError(f"Unknown aggregation {op}") + return agg + +cdef GroupbyScanAggregation make_groupby_scan_aggregation(op, kwargs=None): + r""" + Parameters + ---------- + op : str or callable + If callable, must meet one of the following requirements: + + * Is of the form lambda x: x.agg(*args, **kwargs), where + `agg` is the name of a supported aggregation. Used to + to specify aggregations that take arguments, e.g., + `lambda x: x.quantile(0.5)`. + * Is a user defined aggregation function that operates on + group values. In this case, the output dtype must be + specified in the `kwargs` dictionary. + \*\*kwargs : dict, optional + Any keyword arguments to be passed to the op. + + Returns + ------- + GroupbyScanAggregation + """ + if kwargs is None: + kwargs = {} + + cdef GroupbyScanAggregation agg + if isinstance(op, str): + agg = getattr(GroupbyScanAggregation, op)(**kwargs) + elif callable(op): + if op is list: + agg = GroupbyScanAggregation.collect() + elif "dtype" in kwargs: + agg = GroupbyScanAggregation.from_udf(op, **kwargs) + else: + agg = op(GroupbyScanAggregation) + else: + raise TypeError(f"Unknown aggregation {op}") + return agg diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd index 6daee5077ed..13bfa49057c 100644 --- a/python/cudf/cudf/_lib/cpp/aggregation.pxd +++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd @@ -43,6 +43,12 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: cdef cppclass rolling_aggregation: aggregation.Kind kind + cdef cppclass groupby_aggregation: + aggregation.Kind kind + + cdef cppclass groupby_scan_aggregation: + aggregation.Kind kind + ctypedef enum udf_type: CUDA 'cudf::udf_type::CUDA' PTX 'cudf::udf_type::PTX' diff --git a/python/cudf/cudf/_lib/cpp/groupby.pxd b/python/cudf/cudf/_lib/cpp/groupby.pxd index 2d8f251799d..2ecdf76842f 100644 --- a/python/cudf/cudf/_lib/cpp/groupby.pxd +++ b/python/cudf/cudf/_lib/cpp/groupby.pxd @@ -5,7 +5,10 @@ from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair from libcpp.vector cimport vector -from cudf._lib.cpp.aggregation cimport aggregation +from cudf._lib.cpp.aggregation cimport ( + groupby_aggregation, + groupby_scan_aggregation, +) from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.libcpp.functional cimport reference_wrapper @@ -26,7 +29,12 @@ cdef extern from "cudf/groupby.hpp" \ cdef cppclass aggregation_request: aggregation_request() except + column_view values - vector[unique_ptr[aggregation]] aggregations + vector[unique_ptr[groupby_aggregation]] aggregations + + cdef cppclass scan_request: + scan_request() except + + column_view values + vector[unique_ptr[groupby_scan_aggregation]] aggregations cdef cppclass aggregation_result: vector[unique_ptr[column]] results @@ -76,7 +84,7 @@ cdef extern from "cudf/groupby.hpp" \ unique_ptr[table], vector[aggregation_result] ] scan( - const vector[aggregation_request]& requests, + const vector[scan_request]& requests, ) except + pair[ diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index ed9820300d8..d7416625248 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -32,7 +32,12 @@ from cudf._lib.scalar import as_device_scalar cimport cudf._lib.cpp.groupby as libcudf_groupby cimport cudf._lib.cpp.types as libcudf_types -from cudf._lib.aggregation cimport Aggregation, make_aggregation +from cudf._lib.aggregation cimport ( + GroupbyAggregation, + GroupbyScanAggregation, + make_groupby_aggregation, + make_groupby_scan_aggregation, +) from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.libcpp.functional cimport reference_wrapper @@ -105,30 +110,13 @@ cdef class GroupBy: ) return grouped_keys, grouped_values, c_group_offsets - def aggregate(self, Table values, aggregations): - """ - Parameters - ---------- - values : Table - aggregations - A dict mapping column names in `Table` to a list of aggregations - to perform on that column - - Each aggregation may be specified as: - - a string (e.g., "max") - - a lambda/function - - Returns - ------- - Table of aggregated values - """ + def aggregate_internal(self, Table values, aggregations): from cudf.core.column_accessor import ColumnAccessor cdef vector[libcudf_groupby.aggregation_request] c_agg_requests cdef libcudf_groupby.aggregation_request c_agg_request cdef Column col - cdef Aggregation agg_obj + cdef GroupbyAggregation agg_obj - cdef bool scan = _is_all_scan_aggregate(aggregations) allow_empty = all(len(v) == 0 for v in aggregations.values()) included_aggregations = defaultdict(list) @@ -154,7 +142,7 @@ cdef class GroupBy: c_agg_request = move(libcudf_groupby.aggregation_request()) for agg in aggs: - agg_obj = make_aggregation(agg) + agg_obj = make_groupby_aggregation(agg) if (valid_aggregations == "ALL" or agg_obj.kind in valid_aggregations): included_aggregations[col_name].append(agg) @@ -175,30 +163,90 @@ cdef class GroupBy: vector[libcudf_groupby.aggregation_result] ] c_result - try: - with nogil: - if scan: - c_result = move( - self.c_obj.get()[0].scan( - c_agg_requests - ) - ) - else: - c_result = move( - self.c_obj.get()[0].aggregate( - c_agg_requests - ) + with nogil: + c_result = move( + self.c_obj.get()[0].aggregate( + c_agg_requests + ) + ) + + grouped_keys, _ = data_from_unique_ptr( + move(c_result.first), + column_names=self.keys._column_names + ) + + result_data = ColumnAccessor(multiindex=True) + # Note: This loop relies on the included_aggregations dict being + # insertion ordered to map results to requested aggregations by index. + for i, col_name in enumerate(included_aggregations): + for j, agg_name in enumerate(included_aggregations[col_name]): + if callable(agg_name): + agg_name = agg_name.__name__ + result_data[(col_name, agg_name)] = ( + Column.from_unique_ptr(move(c_result.second[i].results[j])) + ) + + return result_data, cudf.Index._from_data(grouped_keys) + + def scan_internal(self, Table values, aggregations): + from cudf.core.column_accessor import ColumnAccessor + cdef vector[libcudf_groupby.scan_request] c_agg_requests + cdef libcudf_groupby.scan_request c_agg_request + cdef Column col + cdef GroupbyScanAggregation agg_obj + + allow_empty = all(len(v) == 0 for v in aggregations.values()) + + included_aggregations = defaultdict(list) + for i, (col_name, aggs) in enumerate(aggregations.items()): + col = values._data[col_name] + dtype = col.dtype + + valid_aggregations = ( + _LIST_AGGS if is_list_dtype(dtype) + else _STRING_AGGS if is_string_dtype(dtype) + else _CATEGORICAL_AGGS if is_categorical_dtype(dtype) + else _STRUCT_AGGS if is_struct_dtype(dtype) + else _INTERVAL_AGGS if is_interval_dtype(dtype) + else _DECIMAL_AGGS if is_decimal_dtype(dtype) + else "ALL" + ) + if (valid_aggregations is _DECIMAL_AGGS + and rmm._cuda.gpu.runtimeGetVersion() < 11000): + raise RuntimeError( + "Decimal aggregations are only supported on CUDA >= 11 " + "due to an nvcc compiler bug." + ) + + c_agg_request = move(libcudf_groupby.scan_request()) + for agg in aggs: + agg_obj = make_groupby_scan_aggregation(agg) + if (valid_aggregations == "ALL" + or agg_obj.kind in valid_aggregations): + included_aggregations[col_name].append(agg) + c_agg_request.aggregations.push_back( + move(agg_obj.c_obj) ) - except RuntimeError as e: - # TODO: remove this try..except after - # https://github.com/rapidsai/cudf/issues/7611 - # is resolved - if ("make_empty_column") in str(e): - raise NotImplementedError( - "Aggregation not supported for empty columns" - ) from e - else: - raise + if not c_agg_request.aggregations.empty(): + c_agg_request.values = col.view() + c_agg_requests.push_back( + move(c_agg_request) + ) + + if c_agg_requests.empty() and not allow_empty: + raise DataError("All requested aggregations are unsupported.") + + cdef pair[ + unique_ptr[table], + vector[libcudf_groupby.aggregation_result] + ] c_result + + with nogil: + c_result = move( + self.c_obj.get()[0].scan( + c_agg_requests + ) + ) grouped_keys, _ = data_from_unique_ptr( move(c_result.first), @@ -218,6 +266,28 @@ cdef class GroupBy: return result_data, cudf.Index._from_data(grouped_keys) + def aggregate(self, Table values, aggregations): + """ + Parameters + ---------- + values : Table + aggregations + A dict mapping column names in `Table` to a list of aggregations + to perform on that column + + Each aggregation may be specified as: + - a string (e.g., "max") + - a lambda/function + + Returns + ------- + Table of aggregated values + """ + if _is_all_scan_aggregate(aggregations): + return self.scan_internal(values, aggregations) + + return self.aggregate_internal(values, aggregations) + def shift(self, Table values, int periods, list fill_values): cdef table_view view = values.view() cdef size_type num_col = view.num_columns()