Direct SeriesGroupBy.aggregate to SeriesGroupBy.agg (#14971)

Calling `SeriesGroupBy.aggregate` is currently directed to `GroupBy.agg` instead of `SeriesGroupBy.agg`. This means that `SeriesGroupBy.aggregate` currently produces a `DataFrame` in many cases that it *should* produce a `Series`. This PR corrects the underlying problem. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Bradley Dice (https://github.com/bdice) - Matthew Roeschke (https://github.com/mroeschke) URL: #14971
rapidsai · Feb 6, 2024 · 20ed009 · 20ed009
1 parent 6b989f4
commit 20ed009
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 1 deletion.
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
@@ -2640,6 +2640,8 @@ def agg(self, func):
 
         return result
 
+    aggregate = agg
+
     def apply(self, func, *args):
         result = super().apply(func, *args)
 

diff --git a/python/cudf/cudf/tests/groupby/test_agg.py b/python/cudf/cudf/tests/groupby/test_agg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 import numpy as np
 import pytest
 
@@ -16,3 +16,13 @@ def test_agg_count_dtype(empty):
         df = df.iloc[:0]
     result = df.groupby("a").agg({"c": "count"})
     assert result["c"].dtype == np.dtype("int64")
+
+
+@pytest.mark.parametrize("attr", ["agg", "aggregate"])
+def test_series_agg(attr):
+    df = cudf.DataFrame({"a": [1, 2, 1, 2], "b": [0, 0, 0, 0]})
+    pdf = df.to_pandas()
+    agg = getattr(df.groupby("a")["a"], attr)("count")
+    pd_agg = getattr(pdf.groupby(["a"])["a"], attr)("count")
+
+    assert agg.ndim == pd_agg.ndim