From 2e85a3f090ab3293d151fd130cd24c89983917f3 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Thu, 26 Oct 2023 08:16:17 -0500 Subject: [PATCH] Unpin dask and distributed for 23.12 development (#14320) This PR relaxes `dask` and `distributed` versions pinnings for `23.12` development. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - https://github.com/jakirkham - Peter Andreas Entschev (https://github.com/pentschev) - Ray Douglass (https://github.com/raydouglass) --- ci/test_wheel_dask_cudf.sh | 2 +- .../all_cuda-118_arch-x86_64.yaml | 6 +- .../all_cuda-120_arch-x86_64.yaml | 6 +- conda/recipes/custreamz/meta.yaml | 6 +- conda/recipes/dask-cudf/meta.yaml | 12 +-- conda/recipes/dask-cudf/run_test.sh | 2 +- dependencies.yaml | 6 +- python/dask_cudf/dask_cudf/backends.py | 80 ++++++++----------- python/dask_cudf/pyproject.toml | 4 +- 9 files changed, 57 insertions(+), 67 deletions(-) diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index 8c4ab696249..f89aa43c20a 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -11,7 +11,7 @@ RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from python -m pip install --no-deps ./local-cudf-dep/cudf*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.12 +python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.12 # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/dask_cudf*.whl)[test] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index d847690a48b..623f79b7c34 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -24,10 +24,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core==2023.9.2 +- dask-core>=2023.9.2 - dask-cuda==23.12.* -- dask==2023.9.2 -- distributed==2023.9.2 +- dask>=2023.9.2 +- distributed>=2023.9.2 - dlpack>=0.5,<0.6.0a0 - doxygen=1.9.1 - fastavro>=0.22.9 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 163e82b1325..770095ad088 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -26,10 +26,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core==2023.9.2 +- dask-core>=2023.9.2 - dask-cuda==23.12.* -- dask==2023.9.2 -- distributed==2023.9.2 +- dask>=2023.9.2 +- distributed>=2023.9.2 - dlpack>=0.5,<0.6.0a0 - doxygen=1.9.1 - fastavro>=0.22.9 diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 233d51baf31..c5d14f1c884 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -45,9 +45,9 @@ requirements: - streamz - cudf ={{ version }} - cudf_kafka ={{ version }} - - dask ==2023.9.2 - - dask-core ==2023.9.2 - - distributed ==2023.9.2 + - dask >=2023.9.2 + - dask-core >=2023.9.2 + - distributed >=2023.9.2 - python-confluent-kafka >=1.9.0,<1.10.0a0 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 4c8af071074..444a9850c74 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -38,16 +38,16 @@ requirements: host: - python - cudf ={{ version }} - - dask ==2023.9.2 - - dask-core ==2023.9.2 - - distributed ==2023.9.2 + - dask >=2023.9.2 + - dask-core >=2023.9.2 + - distributed >=2023.9.2 - cuda-version ={{ cuda_version }} run: - python - cudf ={{ version }} - - dask ==2023.9.2 - - dask-core ==2023.9.2 - - distributed ==2023.9.2 + - dask >=2023.9.2 + - dask-core >=2023.9.2 + - distributed >=2023.9.2 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh index c79c014a89a..e7238d00f2b 100644 --- a/conda/recipes/dask-cudf/run_test.sh +++ b/conda/recipes/dask-cudf/run_test.sh @@ -18,7 +18,7 @@ if [ "${ARCH}" = "aarch64" ]; then fi # Dask & Distributed option to install main(nightly) or `conda-forge` packages. -export INSTALL_DASK_MAIN=0 +export INSTALL_DASK_MAIN=1 # Dask version to install when `INSTALL_DASK_MAIN=0` export DASK_STABLE_VERSION="2023.9.2" diff --git a/dependencies.yaml b/dependencies.yaml index da3ba0e5108..59755c31e92 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -478,12 +478,12 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - dask==2023.9.2 - - distributed==2023.9.2 + - dask>=2023.9.2 + - distributed>=2023.9.2 - output_types: conda packages: - cupy>=12.0.0 - - dask-core==2023.9.2 # dask-core in conda is the actual package & dask is the meta package + - dask-core>=2023.9.2 # dask-core in conda is the actual package & dask is the meta package - output_types: pyproject packages: - &cudf cudf==23.12.* diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 7b35c71ff09..65d9c438fba 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -12,6 +12,7 @@ import dask.dataframe as dd from dask import config +from dask.array.dispatch import percentile_lookup from dask.dataframe.backends import ( DataFrameBackendEntrypoint, PandasBackendEntrypoint, @@ -320,56 +321,45 @@ def get_grouper_cudf(obj): return cudf.core.groupby.Grouper -try: - try: - from dask.array.dispatch import percentile_lookup - except ImportError: - from dask.dataframe.dispatch import ( - percentile_dispatch as percentile_lookup, - ) - - @percentile_lookup.register((cudf.Series, cp.ndarray, cudf.BaseIndex)) - @_dask_cudf_nvtx_annotate - def percentile_cudf(a, q, interpolation="linear"): - # Cudf dispatch to the equivalent of `np.percentile`: - # https://numpy.org/doc/stable/reference/generated/numpy.percentile.html - a = cudf.Series(a) - # a is series. - n = len(a) - if not len(a): - return None, n - if isinstance(q, Iterator): - q = list(q) - - if cudf.api.types.is_categorical_dtype(a.dtype): - result = cp.percentile(a.cat.codes, q, interpolation=interpolation) - - return ( - pd.Categorical.from_codes( - result, a.dtype.categories, a.dtype.ordered - ), - n, - ) - if np.issubdtype(a.dtype, np.datetime64): - result = a.quantile( - [i / 100.0 for i in q], interpolation=interpolation - ) +@percentile_lookup.register((cudf.Series, cp.ndarray, cudf.BaseIndex)) +@_dask_cudf_nvtx_annotate +def percentile_cudf(a, q, interpolation="linear"): + # Cudf dispatch to the equivalent of `np.percentile`: + # https://numpy.org/doc/stable/reference/generated/numpy.percentile.html + a = cudf.Series(a) + # a is series. + n = len(a) + if not len(a): + return None, n + if isinstance(q, Iterator): + q = list(q) + + if cudf.api.types.is_categorical_dtype(a.dtype): + result = cp.percentile(a.cat.codes, q, interpolation=interpolation) - if q[0] == 0: - # https://github.com/dask/dask/issues/6864 - result[0] = min(result[0], a.min()) - return result.to_pandas(), n - if not np.issubdtype(a.dtype, np.number): - interpolation = "nearest" return ( - a.quantile( - [i / 100.0 for i in q], interpolation=interpolation - ).to_pandas(), + pd.Categorical.from_codes( + result, a.dtype.categories, a.dtype.ordered + ), n, ) + if np.issubdtype(a.dtype, np.datetime64): + result = a.quantile( + [i / 100.0 for i in q], interpolation=interpolation + ) -except ImportError: - pass + if q[0] == 0: + # https://github.com/dask/dask/issues/6864 + result[0] = min(result[0], a.min()) + return result.to_pandas(), n + if not np.issubdtype(a.dtype, np.number): + interpolation = "nearest" + return ( + a.quantile( + [i / 100.0 for i in q], interpolation=interpolation + ).to_pandas(), + n, + ) @pyarrow_schema_dispatch.register((cudf.DataFrame,)) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 0a6e776e0f9..8461c51c573 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -20,8 +20,8 @@ requires-python = ">=3.9" dependencies = [ "cudf==23.12.*", "cupy-cuda11x>=12.0.0", - "dask==2023.9.2", - "distributed==2023.9.2", + "dask>=2023.9.2", + "distributed>=2023.9.2", "fsspec>=0.6.0", "numpy>=1.21,<1.25", "pandas>=1.3,<1.6.0dev0",