diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index c48e5109ff2..73e6774f5ce 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -592,7 +592,9 @@ def agg(self, func): # Structs lose their labels which we reconstruct here col = col._with_type_metadata(cudf.ListDtype(orig_dtype)) - if ( + if agg_kind in {"COUNT", "SIZE"}: + data[key] = col.astype("int64") + elif ( self.obj.empty and ( isinstance(agg_name, str) @@ -609,8 +611,6 @@ def agg(self, func): ) ): data[key] = col.astype(orig_dtype) - elif agg_kind in {"COUNT", "SIZE"}: - data[key] = col.astype("int64") else: data[key] = col data = ColumnAccessor(data, multiindex=multilevel) diff --git a/python/cudf/cudf/tests/groupby/test_agg.py b/python/cudf/cudf/tests/groupby/test_agg.py new file mode 100644 index 00000000000..7919ee4a9f1 --- /dev/null +++ b/python/cudf/cudf/tests/groupby/test_agg.py @@ -0,0 +1,18 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +import numpy as np +import pytest + +import cudf + + +@pytest.mark.parametrize( + "empty", + [True, False], + ids=["empty", "nonempty"], +) +def test_agg_count_dtype(empty): + df = cudf.DataFrame({"a": [1, 2, 1], "c": ["a", "b", "c"]}) + if empty: + df = df.iloc[:0] + result = df.groupby("a").agg({"c": "count"}) + assert result["c"].dtype == np.dtype("int64")