rapidsai · rapids-bot · May 11, 2022 · May 11, 2022 · May 11, 2022 · May 11, 2022
@@ -89,7 +89,16 @@ def count(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "count" for c in self.obj.columns if c not in self.by},
+            {
+                c: "count"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -104,7 +113,16 @@ def mean(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "mean" for c in self.obj.columns if c not in self.by},
+            {
+                c: "mean"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -119,7 +137,16 @@ def std(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "std" for c in self.obj.columns if c not in self.by},
+            {
+                c: "std"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -134,7 +161,16 @@ def var(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "var" for c in self.obj.columns if c not in self.by},
+            {
+                c: "var"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -149,7 +185,16 @@ def sum(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "sum" for c in self.obj.columns if c not in self.by},
+            {
+                c: "sum"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -164,7 +209,16 @@ def min(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "min" for c in self.obj.columns if c not in self.by},
+            {
+                c: "min"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -179,7 +233,16 @@ def max(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "max" for c in self.obj.columns if c not in self.by},
+            {
+                c: "max"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -194,7 +257,16 @@ def collect(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "collect" for c in self.obj.columns if c not in self.by},
+            {
+                c: "collect"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -209,7 +281,16 @@ def first(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "first" for c in self.obj.columns if c not in self.by},
+            {
+                c: "first"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,
@@ -224,7 +305,16 @@ def last(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
             self.by,
-            {c: "last" for c in self.obj.columns if c not in self.by},
+            {
+                c: "last"
+                for c in self.obj.columns
+                if c
+                not in (
+                    self.by
+                    if isinstance(self.by, (tuple, list))
+                    else [self.by]
+                )
+            },
             split_every=split_every,
             split_out=split_out,
             sep=self.sep,

@@ -20,18 +20,19 @@ def test_groupby_basic(series, aggregation):
     np.random.seed(0)
     pdf = pd.DataFrame(
         {
-            "x": np.random.randint(0, 5, size=10000),
+            "xx": np.random.randint(0, 5, size=10000),
+            "x": np.random.normal(size=10000),
             "y": np.random.normal(size=10000),
         }
     )
 
     gdf = cudf.DataFrame.from_pandas(pdf)
-    gdf_grouped = gdf.groupby("x")
-    ddf_grouped = dask_cudf.from_cudf(gdf, npartitions=5).groupby("x")
+    gdf_grouped = gdf.groupby("xx")
+    ddf_grouped = dask_cudf.from_cudf(gdf, npartitions=5).groupby("xx")
 
     if series:
-        gdf_grouped = gdf_grouped.x
-        ddf_grouped = ddf_grouped.x
+        gdf_grouped = gdf_grouped.xx
+        ddf_grouped = ddf_grouped.xx
 
     a = getattr(gdf_grouped, aggregation)()
     b = getattr(ddf_grouped, aggregation)().compute()
@@ -41,8 +42,8 @@ def test_groupby_basic(series, aggregation):
     else:
         dd.assert_eq(a, b)
 
-    a = gdf_grouped.agg({"x": aggregation})
-    b = ddf_grouped.agg({"x": aggregation}).compute()
+    a = gdf_grouped.agg({"xx": aggregation})
+    b = ddf_grouped.agg({"xx": aggregation}).compute()
 
     if aggregation == "count":
         dd.assert_eq(a, b, check_dtype=False)