From 2ce8c87b712695b68b767c5fb51d61f4ed7908f3 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 22 Nov 2023 16:13:11 +0000
Subject: [PATCH 1/2] Add test of #14200

---
 python/cudf/cudf/tests/groupby/test_agg.py | 26 ++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 python/cudf/cudf/tests/groupby/test_agg.py

diff --git a/python/cudf/cudf/tests/groupby/test_agg.py b/python/cudf/cudf/tests/groupby/test_agg.py
new file mode 100644
index 00000000000..692c6dc75be
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_agg.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+import numpy as np
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "empty",
+    [
+        pytest.param(
+            True,
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/14200"
+            ),
+        ),
+        False,
+    ],
+    ids=["empty", "nonempty"],
+)
+def test_agg_count_dtype(empty):
+    df = cudf.DataFrame({"a": [1, 2, 1], "c": ["a", "b", "c"]})
+    if empty:
+        df = df.iloc[:0]
+    result = df.groupby("a").agg({"c": "count"})
+    assert result["c"].dtype == np.dtype("int64")

From fb3a0ba98f7bb9ec774c85928a13b678515f5314 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 22 Nov 2023 16:15:26 +0000
Subject: [PATCH 2/2] Correct dtype of count aggregations on empty dataframes

A count aggregation should always return an int64 column, even if the
grouped dataframe is empty. Previously we did not do this because the
short-circuiting for empty inputs was hit before handling the count
case. Fix this by reordering the conditions.

- Closes #14200
---
 python/cudf/cudf/core/groupby/groupby.py   |  6 +++---
 python/cudf/cudf/tests/groupby/test_agg.py | 10 +---------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index c48e5109ff2..73e6774f5ce 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -592,7 +592,9 @@ def agg(self, func):
                     # Structs lose their labels which we reconstruct here
                     col = col._with_type_metadata(cudf.ListDtype(orig_dtype))
 
-                if (
+                if agg_kind in {"COUNT", "SIZE"}:
+                    data[key] = col.astype("int64")
+                elif (
                     self.obj.empty
                     and (
                         isinstance(agg_name, str)
@@ -609,8 +611,6 @@ def agg(self, func):
                     )
                 ):
                     data[key] = col.astype(orig_dtype)
-                elif agg_kind in {"COUNT", "SIZE"}:
-                    data[key] = col.astype("int64")
                 else:
                     data[key] = col
         data = ColumnAccessor(data, multiindex=multilevel)
diff --git a/python/cudf/cudf/tests/groupby/test_agg.py b/python/cudf/cudf/tests/groupby/test_agg.py
index 692c6dc75be..7919ee4a9f1 100644
--- a/python/cudf/cudf/tests/groupby/test_agg.py
+++ b/python/cudf/cudf/tests/groupby/test_agg.py
@@ -7,15 +7,7 @@
 
 @pytest.mark.parametrize(
     "empty",
-    [
-        pytest.param(
-            True,
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/14200"
-            ),
-        ),
-        False,
-    ],
+    [True, False],
     ids=["empty", "nonempty"],
 )
 def test_agg_count_dtype(empty):