Skip to content

Commit

Permalink
Fix groupby failures in dask_cudf CI (#11561)
Browse files Browse the repository at this point in the history
Dask-cudf groupby tests *should* be failing as a result of dask/dask#9302 (see [failures](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/prb/job/cudf-gpu-test/CUDA=11.5,GPU_LABEL=driver-495,LINUX_VER=ubuntu20.04,PYTHON=3.9/9946/) in #11565 is merged - where dask/main is being installed correctly).  This PR updates the dask_cudf groupby code to fix these failures.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #11561
  • Loading branch information
rjzamora authored Aug 19, 2022
1 parent 5ffee5c commit f42d117
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
18 changes: 14 additions & 4 deletions python/dask_cudf/dask_cudf/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def last(self, split_every=None, split_out=1):
)

@_dask_cudf_nvtx_annotate
def aggregate(self, arg, split_every=None, split_out=1):
def aggregate(self, arg, split_every=None, split_out=1, shuffle=None):
if arg == "size":
return self.size()

Expand All @@ -274,7 +274,12 @@ def aggregate(self, arg, split_every=None, split_out=1):
)

return super().aggregate(
arg, split_every=split_every, split_out=split_out
arg,
split_every=split_every,
split_out=split_out,
# TODO: Change following line to `shuffle=shuffle,`
# when dask_cudf is pinned to dask>2022.8.0
**({} if shuffle is None else {"shuffle": shuffle}),
)


Expand Down Expand Up @@ -436,7 +441,7 @@ def last(self, split_every=None, split_out=1):
)[self._slice]

@_dask_cudf_nvtx_annotate
def aggregate(self, arg, split_every=None, split_out=1):
def aggregate(self, arg, split_every=None, split_out=1, shuffle=None):
if arg == "size":
return self.size()

Expand All @@ -459,7 +464,12 @@ def aggregate(self, arg, split_every=None, split_out=1):
)[self._slice]

return super().aggregate(
arg, split_every=split_every, split_out=split_out
arg,
split_every=split_every,
split_out=split_out,
# TODO: Change following line to `shuffle=shuffle,`
# when dask_cudf is pinned to dask>2022.8.0
**({} if shuffle is None else {"shuffle": shuffle}),
)


Expand Down
4 changes: 2 additions & 2 deletions python/dask_cudf/dask_cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,12 @@ def test_groupby_categorical_key():
ddf = gddf.to_dask_dataframe()

got = (
gddf.groupby("name")
gddf.groupby("name", sort=True)
.agg({"x": ["mean", "max"], "y": ["mean", "count"]})
.compute()
)
expect = (
ddf.groupby("name")
ddf.groupby("name", sort=True)
.agg({"x": ["mean", "max"], "y": ["mean", "count"]})
.compute()
)
Expand Down

0 comments on commit f42d117

Please sign in to comment.