diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index 31d080e95d7..a773ec6ec62 100755 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/" export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" # Dask & Distributed option to install main(nightly) or `conda-forge` packages. -export INSTALL_DASK_MAIN=0 +export INSTALL_DASK_MAIN=1 function remove_libcudf_kernel_cache_dir { EXITCODE=$? @@ -82,8 +82,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then gpuci_logger "gpuci_mamba_retry update dask" gpuci_mamba_retry update dask else - gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall" - gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall + gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall" + gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall fi # Install the master version of streamz diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 08f9034357a..4e52044ffb1 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags` export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` # Dask & Distributed option to install main(nightly) or `conda-forge` packages. -export INSTALL_DASK_MAIN=0 +export INSTALL_DASK_MAIN=1 # ucx-py version export UCX_PY_VERSION='0.26.*' @@ -112,8 +112,8 @@ function install_dask { gpuci_mamba_retry update dask conda list else - gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall" - gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall + gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall" + gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall fi # Install the main version of streamz gpuci_logger "Install the main version of streamz" diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index a085f1ee6c5..6bea7b2623b 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -43,8 +43,8 @@ dependencies: - pydocstyle=6.1.1 - typing_extensions - pre-commit - - dask==2022.03.0 - - distributed==2022.03.0 + - dask>=2022.03.0 + - distributed>=2022.03.0 - streamz - arrow-cpp=7.0.0 - dlpack>=0.5,<0.6.0a0 diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 313b8982b43..a067ff210c9 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -32,8 +32,8 @@ requirements: - python - streamz - cudf {{ version }} - - dask==2022.03.0 - - distributed==2022.03.0 + - dask>=2022.03.0 + - distributed>=2022.03.0 - python-confluent-kafka >=1.7.0,<1.8.0a0 - cudf_kafka {{ version }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 125b7d995f5..97d2249b63b 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -27,14 +27,14 @@ requirements: host: - python - cudf {{ version }} - - dask==2022.03.0 - - distributed==2022.03.0 + - dask>=2022.03.0 + - distributed>=2022.03.0 - cudatoolkit {{ cuda_version }} run: - python - cudf {{ version }} - - dask==2022.03.0 - - distributed==2022.03.0 + - dask>=2022.03.0 + - distributed>=2022.03.0 - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} test: # [linux64] diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 36e3416c8a3..ac600c73285 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -142,10 +142,12 @@ def meta_nonempty_cudf(x): res = cudf.DataFrame(index=idx) for col in x._data.names: dtype = str(x._data[col].dtype) - if dtype in ("list", "struct"): - # Not possible to hash and store list & struct types - # as they can contain different levels of nesting or - # fields. + if dtype in ("list", "struct", "category"): + # 1. Not possible to hash and store list & struct types + # as they can contain different levels of nesting or + # fields. + # 2. Not possible to has `category` types as + # they often contain an underlying types to them. res._data[col] = _get_non_empty_data(x._data[col]) else: if dtype not in columns_with_dtype: diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index 5b286b0ff3d..880e2365fe6 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -294,6 +294,6 @@ def sort_values( df4 = df3.map_partitions(sort_function, **sort_kwargs) if not isinstance(divisions, gd.DataFrame) and set_divisions: # Can't have multi-column divisions elsewhere in dask (yet) - df4.divisions = methods.tolist(divisions) + df4.divisions = tuple(methods.tolist(divisions)) return df4 diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 5b5a3646700..5a8b2d1b216 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -10,8 +10,8 @@ install_requires = [ "cudf", - "dask==2022.03.0", - "distributed==2022.03.0", + "dask>=2022.03.0", + "distributed>=2022.03.0", "fsspec>=0.6.0", "numpy", "pandas>=1.0,<1.4.0dev0",