From bcf01240c941b876944f75cd20d7abba2c299b1a Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 4 Jan 2024 00:57:00 -0800 Subject: [PATCH 1/4] Update to Dask's `shuffle_method` kwarg https://github.com/dask/dask/pull/10738 has deprecated the `shuffle` kwarg in favor of `shuffle_method` which now raises a `FutureWarning`. This change transitions to the new kwarg. --- python/dask_cudf/dask_cudf/core.py | 38 ++++++++++----- python/dask_cudf/dask_cudf/groupby.py | 48 +++++++++++-------- python/dask_cudf/dask_cudf/sorting.py | 18 +++---- python/dask_cudf/dask_cudf/tests/test_core.py | 6 +-- .../dask_cudf/tests/test_distributed.py | 6 +-- .../dask_cudf/dask_cudf/tests/test_groupby.py | 24 ++++++---- python/dask_cudf/dask_cudf/tests/test_sort.py | 4 +- 7 files changed, 86 insertions(+), 58 deletions(-) diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index ecdc566037d..a4c6588e328 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. import math import textwrap @@ -112,16 +112,19 @@ def do_apply_rows(df, func, incols, outcols, kwargs): ) @_dask_cudf_nvtx_annotate - def merge(self, other, shuffle=None, **kwargs): + def merge(self, other, shuffle_method=None, **kwargs): on = kwargs.pop("on", None) if isinstance(on, tuple): on = list(on) return super().merge( - other, on=on, shuffle=_get_shuffle_type(shuffle), **kwargs + other, + on=on, + shuffle_method=_get_shuffle_type(shuffle_method), + **kwargs, ) @_dask_cudf_nvtx_annotate - def join(self, other, shuffle=None, **kwargs): + def join(self, other, shuffle_method=None, **kwargs): # CuDF doesn't support "right" join yet how = kwargs.pop("how", "left") if how == "right": @@ -131,12 +134,21 @@ def join(self, other, shuffle=None, **kwargs): if isinstance(on, tuple): on = list(on) return super().join( - other, how=how, on=on, shuffle=_get_shuffle_type(shuffle), **kwargs + other, + how=how, + on=on, + shuffle_method=_get_shuffle_type(shuffle_method), + **kwargs, ) @_dask_cudf_nvtx_annotate def set_index( - self, other, sorted=False, divisions=None, shuffle=None, **kwargs + self, + other, + sorted=False, + divisions=None, + shuffle_method=None, + **kwargs, ): pre_sorted = sorted @@ -153,7 +165,7 @@ def set_index( # Let upstream-dask handle "pre-sorted" case if pre_sorted: - return dd.shuffle.set_sorted_index( + return dd.shuffle_method.set_sorted_index( self, other, divisions=divisions, **kwargs ) @@ -172,7 +184,7 @@ def set_index( divisions=divisions, set_divisions=True, ignore_index=True, - shuffle=shuffle, + shuffle_method=shuffle_method, ) # Ignore divisions if its a dataframe @@ -199,7 +211,7 @@ def set_index( return super().set_index( other, sorted=pre_sorted, - shuffle=_get_shuffle_type(shuffle), + shuffle_method=_get_shuffle_type(shuffle_method), divisions=divisions, **kwargs, ) @@ -216,7 +228,7 @@ def sort_values( na_position="last", sort_function=None, sort_function_kwargs=None, - shuffle=None, + shuffle_method=None, **kwargs, ): if kwargs: @@ -233,7 +245,7 @@ def sort_values( ignore_index=ignore_index, ascending=ascending, na_position=na_position, - shuffle=shuffle, + shuffle_method=shuffle_method, sort_function=sort_function, sort_function_kwargs=sort_function_kwargs, ) @@ -287,10 +299,10 @@ def var( return _parallel_var(self, meta, skipna, split_every, out) @_dask_cudf_nvtx_annotate - def shuffle(self, *args, shuffle=None, **kwargs): + def shuffle(self, *args, shuffle_method=None, **kwargs): """Wraps dask.dataframe DataFrame.shuffle method""" return super().shuffle( - *args, shuffle=_get_shuffle_type(shuffle), **kwargs + *args, shuffle_method=_get_shuffle_type(shuffle_method), **kwargs ) @_dask_cudf_nvtx_annotate diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index b1fdf443a17..8a5ad2e49bf 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from functools import wraps from typing import Set @@ -190,7 +190,9 @@ def last(self, split_every=None, split_out=1): ) @_dask_cudf_nvtx_annotate - def aggregate(self, arg, split_every=None, split_out=1, shuffle=None): + def aggregate( + self, arg, split_every=None, split_out=1, shuffle_method=None + ): if arg == "size": return self.size() @@ -211,7 +213,7 @@ def aggregate(self, arg, split_every=None, split_out=1, shuffle=None): sep=self.sep, sort=self.sort, as_index=self.as_index, - shuffle=shuffle, + shuffle_method=shuffle_method, **self.dropna, ) @@ -219,7 +221,7 @@ def aggregate(self, arg, split_every=None, split_out=1, shuffle=None): arg, split_every=split_every, split_out=split_out, - shuffle=shuffle, + shuffle_method=shuffle_method, ) @@ -331,7 +333,9 @@ def last(self, split_every=None, split_out=1): )[self._slice] @_dask_cudf_nvtx_annotate - def aggregate(self, arg, split_every=None, split_out=1, shuffle=None): + def aggregate( + self, arg, split_every=None, split_out=1, shuffle_method=None + ): if arg == "size": return self.size() @@ -342,14 +346,14 @@ def aggregate(self, arg, split_every=None, split_out=1, shuffle=None): if _groupby_optimized(self) and _aggs_optimized(arg, OPTIMIZED_AGGS): return _make_groupby_agg_call( - self, arg, split_every, split_out, shuffle + self, arg, split_every, split_out, shuffle_method )[self._slice] return super().aggregate( arg, split_every=split_every, split_out=split_out, - shuffle=shuffle, + shuffle_method=shuffle_method, ) @@ -364,7 +368,7 @@ def _shuffle_aggregate( split_out, token=None, sort=None, - shuffle=None, + shuffle_method=None, ): # Shuffle-based groupby aggregation # NOTE: This function is the dask_cudf version of @@ -391,7 +395,7 @@ def _shuffle_aggregate( .sort_values( gb_cols, ignore_index=True, - shuffle=shuffle, + shuffle_method=shuffle_method, ) .map_partitions( aggregate, @@ -405,7 +409,7 @@ def _shuffle_aggregate( gb_cols, npartitions=shuffle_npartitions, ignore_index=True, - shuffle=shuffle, + shuffle_method=shuffle_method, ).map_partitions( aggregate, meta=aggregate(chunked._meta, **aggregate_kwargs), @@ -429,7 +433,7 @@ def groupby_agg( sep="___", sort=False, as_index=True, - shuffle=None, + shuffle_method=None, ): """Optimized groupby aggregation for Dask-CuDF. @@ -450,7 +454,7 @@ def groupby_agg( sort : bool Sort the group keys, better performance is obtained when not sorting. - shuffle : str (optional) + shuffle_method : str (optional) Control how shuffling of the DataFrame is performed. sep : str Internal usage. @@ -575,12 +579,12 @@ def groupby_agg( "aggs_renames": aggs_renames, } - # Use shuffle=True for split_out>1 - if sort and split_out > 1 and shuffle is None: - shuffle = "tasks" + # Use shuffle_method=True for split_out>1 + if sort and split_out > 1 and shuffle_method is None: + shuffle_method = "tasks" # Check if we are using the shuffle-based algorithm - if shuffle: + if shuffle_method: # Shuffle-based aggregation return _shuffle_aggregate( ddf, @@ -593,7 +597,9 @@ def groupby_agg( split_out, token="cudf-aggregate", sort=sort, - shuffle=shuffle if isinstance(shuffle, str) else None, + shuffle_method=shuffle_method + if isinstance(shuffle_method, str) + else None, ) # Deal with sort/shuffle defaults @@ -602,7 +608,7 @@ def groupby_agg( "dask-cudf's groupby algorithm does not yet support " "`sort=True` when `split_out>1`, unless a shuffle-based " "algorithm is used. Please use `split_out=1`, group " - "with `sort=False`, or set `shuffle=True`." + "with `sort=False`, or set `shuffle_method=True`." ) # Determine required columns to enable column projection @@ -629,7 +635,9 @@ def groupby_agg( @_dask_cudf_nvtx_annotate -def _make_groupby_agg_call(gb, aggs, split_every, split_out, shuffle=None): +def _make_groupby_agg_call( + gb, aggs, split_every, split_out, shuffle_method=None +): """Helper method to consolidate the common `groupby_agg` call for all aggregations in one place """ @@ -643,7 +651,7 @@ def _make_groupby_agg_call(gb, aggs, split_every, split_out, shuffle=None): sep=gb.sep, sort=gb.sort, as_index=gb.as_index, - shuffle=shuffle, + shuffle_method=shuffle_method, **gb.dropna, ) diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index 27ba82c390c..153fee0b0e0 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from collections.abc import Iterator @@ -239,7 +239,7 @@ def sort_values( ignore_index=False, ascending=True, na_position="last", - shuffle=None, + shuffle_method=None, sort_function=None, sort_function_kwargs=None, ): @@ -295,7 +295,7 @@ def sort_values( "_partitions", max_branch=max_branch, npartitions=len(divisions) - 1, - shuffle=_get_shuffle_type(shuffle), + shuffle_method=_get_shuffle_type(shuffle_method), ignore_index=ignore_index, ).drop(columns=["_partitions"]) df3.divisions = (None,) * (df3.npartitions + 1) @@ -320,14 +320,14 @@ def get_default_shuffle_method(): return default -def _get_shuffle_type(shuffle): - # Utility to set the shuffle-kwarg default +def _get_shuffle_type(shuffle_method): + # Utility to set the shuffle_method-kwarg default # and to validate user-specified options - shuffle = shuffle or get_default_shuffle_method() - if shuffle not in _SHUFFLE_SUPPORT: + shuffle_method = shuffle_method or get_default_shuffle_method() + if shuffle_method not in _SHUFFLE_SUPPORT: raise ValueError( "Dask-cudf only supports the following shuffle " - f"methods: {_SHUFFLE_SUPPORT}. Got shuffle={shuffle}" + f"methods: {_SHUFFLE_SUPPORT}. Got shuffle_method={shuffle_method}" ) - return shuffle + return shuffle_method diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py index 7f8876c8564..63fd6599496 100644 --- a/python/dask_cudf/dask_cudf/tests/test_core.py +++ b/python/dask_cudf/dask_cudf/tests/test_core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. import random @@ -331,10 +331,10 @@ def test_rearrange_by_divisions(nelem, index): divisions = (0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20) expect = dd.shuffle.rearrange_by_divisions( - ddf1, "x", divisions=divisions, shuffle="tasks" + ddf1, "x", divisions=divisions, shuffle_method="tasks" ) result = dd.shuffle.rearrange_by_divisions( - gdf1, "x", divisions=divisions, shuffle="tasks" + gdf1, "x", divisions=divisions, shuffle_method="tasks" ) dd.assert_eq(expect, result) diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py index db3f3695648..39eadb45c91 100644 --- a/python/dask_cudf/dask_cudf/tests/test_distributed.py +++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import numba.cuda import pytest @@ -80,7 +80,7 @@ def test_str_series_roundtrip(): def test_p2p_shuffle(): - # Check that we can use `shuffle="p2p"` + # Check that we can use `shuffle_method="p2p"` with dask_cuda.LocalCUDACluster(n_workers=1) as cluster: with Client(cluster): ddf = ( @@ -93,7 +93,7 @@ def test_p2p_shuffle(): .to_backend("cudf") ) dd.assert_eq( - ddf.sort_values("x", shuffle="p2p").compute(), + ddf.sort_values("x", shuffle_method="p2p").compute(), ddf.compute().sort_values("x"), check_index=False, ) diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index 84a821aaf79..b40bab6da91 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. import numpy as np import pandas as pd @@ -834,26 +834,34 @@ def test_groupby_shuffle(): # Sorted aggregation, single-partition output # (sort=True, split_out=1) - got = gddf.groupby("a", sort=True).agg(spec, shuffle=True, split_out=1) + got = gddf.groupby("a", sort=True).agg( + spec, shuffle_method=True, split_out=1 + ) dd.assert_eq(expect, got) # Sorted aggregation, multi-partition output # (sort=True, split_out=2) - got = gddf.groupby("a", sort=True).agg(spec, shuffle=True, split_out=2) + got = gddf.groupby("a", sort=True).agg( + spec, shuffle_method=True, split_out=2 + ) dd.assert_eq(expect, got) # Un-sorted aggregation, single-partition output # (sort=False, split_out=1) - got = gddf.groupby("a", sort=False).agg(spec, shuffle=True, split_out=1) + got = gddf.groupby("a", sort=False).agg( + spec, shuffle_method=True, split_out=1 + ) dd.assert_eq(expect.sort_index(), got.compute().sort_index()) # Un-sorted aggregation, multi-partition output # (sort=False, split_out=2) - # NOTE: `shuffle=True` should be default + # NOTE: `shuffle_method=True` should be default got = gddf.groupby("a", sort=False).agg(spec, split_out=2) dd.assert_eq(expect, got.compute().sort_index()) - # Sorted aggregation fails with split_out>1 when shuffle is False - # (sort=True, split_out=2, shuffle=False) + # Sorted aggregation fails with split_out>1 when shuffle_method is False + # (sort=True, split_out=2, shuffle_method=False) with pytest.raises(ValueError): - gddf.groupby("a", sort=True).agg(spec, shuffle=False, split_out=2) + gddf.groupby("a", sort=True).agg( + spec, shuffle_method=False, split_out=2 + ) diff --git a/python/dask_cudf/dask_cudf/tests/test_sort.py b/python/dask_cudf/dask_cudf/tests/test_sort.py index e58255cda06..8cf621da1bf 100644 --- a/python/dask_cudf/dask_cudf/tests/test_sort.py +++ b/python/dask_cudf/dask_cudf/tests/test_sort.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import cupy as cp import numpy as np @@ -123,5 +123,5 @@ def test_disk_shuffle(): pytest.skip("need a version of dask that has partd_encode_dispatch") df = cudf.DataFrame({"a": [1, 2, 3] * 20, "b": [4, 5, 6, 7] * 15}) ddf = dd.from_pandas(df, npartitions=4) - got = dd.DataFrame.shuffle(ddf, "a", shuffle="disk") + got = dd.DataFrame.shuffle(ddf, "a", shuffle_method="disk") dd.assert_eq(got, df) From 3876950443cf0f9336bccfa9854e18e8e469b0d2 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 4 Jan 2024 03:16:40 -0800 Subject: [PATCH 2/4] Fix method name changed by mistake --- python/dask_cudf/dask_cudf/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index a4c6588e328..597ca10f4f3 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -165,7 +165,7 @@ def set_index( # Let upstream-dask handle "pre-sorted" case if pre_sorted: - return dd.shuffle_method.set_sorted_index( + return dd.shuffle.set_sorted_index( self, other, divisions=divisions, **kwargs ) From 137ffe68d6f39807c51d84cf9b0b2c378349341f Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 4 Jan 2024 03:17:34 -0800 Subject: [PATCH 3/4] Fix `aggregate` whose `shuffle` argument hasn't been deprecated --- python/dask_cudf/dask_cudf/groupby.py | 16 +++++--------- .../dask_cudf/dask_cudf/tests/test_groupby.py | 22 ++++++------------- 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index 8a5ad2e49bf..fde78cd4450 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -190,9 +190,7 @@ def last(self, split_every=None, split_out=1): ) @_dask_cudf_nvtx_annotate - def aggregate( - self, arg, split_every=None, split_out=1, shuffle_method=None - ): + def aggregate(self, arg, split_every=None, split_out=1, shuffle=None): if arg == "size": return self.size() @@ -213,7 +211,7 @@ def aggregate( sep=self.sep, sort=self.sort, as_index=self.as_index, - shuffle_method=shuffle_method, + shuffle_method=shuffle, **self.dropna, ) @@ -221,7 +219,7 @@ def aggregate( arg, split_every=split_every, split_out=split_out, - shuffle_method=shuffle_method, + shuffle=shuffle, ) @@ -333,9 +331,7 @@ def last(self, split_every=None, split_out=1): )[self._slice] @_dask_cudf_nvtx_annotate - def aggregate( - self, arg, split_every=None, split_out=1, shuffle_method=None - ): + def aggregate(self, arg, split_every=None, split_out=1, shuffle=None): if arg == "size": return self.size() @@ -346,14 +342,14 @@ def aggregate( if _groupby_optimized(self) and _aggs_optimized(arg, OPTIMIZED_AGGS): return _make_groupby_agg_call( - self, arg, split_every, split_out, shuffle_method + self, arg, split_every, split_out, shuffle )[self._slice] return super().aggregate( arg, split_every=split_every, split_out=split_out, - shuffle_method=shuffle_method, + shuffle=shuffle, ) diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index b40bab6da91..996457e4861 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -834,34 +834,26 @@ def test_groupby_shuffle(): # Sorted aggregation, single-partition output # (sort=True, split_out=1) - got = gddf.groupby("a", sort=True).agg( - spec, shuffle_method=True, split_out=1 - ) + got = gddf.groupby("a", sort=True).agg(spec, shuffle=True, split_out=1) dd.assert_eq(expect, got) # Sorted aggregation, multi-partition output # (sort=True, split_out=2) - got = gddf.groupby("a", sort=True).agg( - spec, shuffle_method=True, split_out=2 - ) + got = gddf.groupby("a", sort=True).agg(spec, shuffle=True, split_out=2) dd.assert_eq(expect, got) # Un-sorted aggregation, single-partition output # (sort=False, split_out=1) - got = gddf.groupby("a", sort=False).agg( - spec, shuffle_method=True, split_out=1 - ) + got = gddf.groupby("a", sort=False).agg(spec, shuffle=True, split_out=1) dd.assert_eq(expect.sort_index(), got.compute().sort_index()) # Un-sorted aggregation, multi-partition output # (sort=False, split_out=2) - # NOTE: `shuffle_method=True` should be default + # NOTE: `shuffle=True` should be default got = gddf.groupby("a", sort=False).agg(spec, split_out=2) dd.assert_eq(expect, got.compute().sort_index()) - # Sorted aggregation fails with split_out>1 when shuffle_method is False - # (sort=True, split_out=2, shuffle_method=False) + # Sorted aggregation fails with split_out>1 when shuffle is False + # (sort=True, split_out=2, shuffle=False) with pytest.raises(ValueError): - gddf.groupby("a", sort=True).agg( - spec, shuffle_method=False, split_out=2 - ) + gddf.groupby("a", sort=True).agg(spec, shuffle=False, split_out=2) From cdb298b80f89ace961a3716da779774bb2d9a34e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 4 Jan 2024 06:55:17 -0800 Subject: [PATCH 4/4] Rename `_get_shuffle_type` to `_get_shuffle_method` --- python/dask_cudf/dask_cudf/core.py | 10 +++++----- python/dask_cudf/dask_cudf/sorting.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 597ca10f4f3..cd764381b3a 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -26,7 +26,7 @@ from dask_cudf import sorting from dask_cudf.accessors import ListMethods, StructMethods -from dask_cudf.sorting import _get_shuffle_type +from dask_cudf.sorting import _get_shuffle_method class _Frame(dd.core._Frame, OperatorMethodMixin): @@ -119,7 +119,7 @@ def merge(self, other, shuffle_method=None, **kwargs): return super().merge( other, on=on, - shuffle_method=_get_shuffle_type(shuffle_method), + shuffle_method=_get_shuffle_method(shuffle_method), **kwargs, ) @@ -137,7 +137,7 @@ def join(self, other, shuffle_method=None, **kwargs): other, how=how, on=on, - shuffle_method=_get_shuffle_type(shuffle_method), + shuffle_method=_get_shuffle_method(shuffle_method), **kwargs, ) @@ -211,7 +211,7 @@ def set_index( return super().set_index( other, sorted=pre_sorted, - shuffle_method=_get_shuffle_type(shuffle_method), + shuffle_method=_get_shuffle_method(shuffle_method), divisions=divisions, **kwargs, ) @@ -302,7 +302,7 @@ def var( def shuffle(self, *args, shuffle_method=None, **kwargs): """Wraps dask.dataframe DataFrame.shuffle method""" return super().shuffle( - *args, shuffle_method=_get_shuffle_type(shuffle_method), **kwargs + *args, shuffle_method=_get_shuffle_method(shuffle_method), **kwargs ) @_dask_cudf_nvtx_annotate diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index 153fee0b0e0..f89682c092a 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -295,7 +295,7 @@ def sort_values( "_partitions", max_branch=max_branch, npartitions=len(divisions) - 1, - shuffle_method=_get_shuffle_type(shuffle_method), + shuffle_method=_get_shuffle_method(shuffle_method), ignore_index=ignore_index, ).drop(columns=["_partitions"]) df3.divisions = (None,) * (df3.npartitions + 1) @@ -320,7 +320,7 @@ def get_default_shuffle_method(): return default -def _get_shuffle_type(shuffle_method): +def _get_shuffle_method(shuffle_method): # Utility to set the shuffle_method-kwarg default # and to validate user-specified options shuffle_method = shuffle_method or get_default_shuffle_method()