From e97fc1cf7ef0b408492eb988050d10da8c802d1f Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 26 May 2021 14:37:29 -0500 Subject: [PATCH] Add backward compatibility for `dask-cudf` to work with other versions of `dask` (#8368) The current `dask-cudf` includes latest changes that work with latest `dask` changes. But when an existing release version of `dask` is used with `dask-cudf` it will fail to import some of the dispatches needed for `make_meta` & `make_meta_util`.. This PR will handle those import errors and have a fallback in place. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - https://github.com/jakirkham - Keith Kraus (https://github.com/kkraus14) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/8368 --- conda/environments/cudf_dev_cuda11.0.yml | 4 ++-- conda/environments/cudf_dev_cuda11.2.yml | 4 ++-- conda/recipes/custreamz/meta.yaml | 4 ++-- conda/recipes/dask-cudf/meta.yaml | 8 ++++---- python/custreamz/dev_requirements.txt | 4 ++-- python/dask_cudf/dask_cudf/backends.py | 6 +++++- python/dask_cudf/dask_cudf/core.py | 14 ++++++++++---- python/dask_cudf/dask_cudf/tests/test_core.py | 14 ++++++++++++-- python/dask_cudf/dev_requirements.txt | 4 ++-- python/dask_cudf/setup.py | 8 ++++---- 10 files changed, 45 insertions(+), 25 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index e61b76145e3..fceafca4991 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -41,8 +41,8 @@ dependencies: - mypy=0.782 - typing_extensions - pre_commit - - dask==2021.4.0 - - distributed>=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.0 + - distributed>=2.22.0,<=2021.5.0 - streamz - dlpack>=0.5,<0.6.0a0 - arrow-cpp=1.0.1 diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 3ad4bf3d4e1..1b19bdb6a87 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -41,8 +41,8 @@ dependencies: - mypy=0.782 - typing_extensions - pre_commit - - dask==2021.4.0 - - distributed>=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.0 + - distributed>=2.22.0,<=2021.5.0 - streamz - dlpack>=0.5,<0.6.0a0 - arrow-cpp=1.0.1 diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index bb5186d7057..94dda92ba1b 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -31,8 +31,8 @@ requirements: - python - streamz - cudf {{ version }} - - dask >=2.22.0,<=2021.4.0 - - distributed >=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.0 + - distributed>=2.22.0,<=2021.5.0 - python-confluent-kafka - cudf_kafka {{ version }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 14376f54ba1..8132ccc5a3d 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -26,13 +26,13 @@ requirements: host: - python - cudf {{ version }} - - dask==2021.4.0 - - distributed >=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.0 + - distributed>=2.22.0,<=2021.5.0 run: - python - cudf {{ version }} - - dask==2021.4.0 - - distributed >=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.0 + - distributed>=2.22.0,<=2021.5.0 test: requires: diff --git a/python/custreamz/dev_requirements.txt b/python/custreamz/dev_requirements.txt index 4234d7ee2ab..c39cf33d386 100644 --- a/python/custreamz/dev_requirements.txt +++ b/python/custreamz/dev_requirements.txt @@ -3,8 +3,8 @@ flake8==3.8.3 black==19.10b0 isort==5.0.7 -dask==2021.4.0 -distributed>=2.22.0,<=2021.4.0 +dask>=2021.4.0,<=2021.5.0 +distributed>=2.22.0,<=2021.5.0 streamz python-confluent-kafka pytest diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 27ed7070b6d..a958f0a934b 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -19,9 +19,13 @@ is_arraylike, is_scalar, make_meta, - make_meta_obj, ) +try: + from dask.dataframe.utils import make_meta_obj as make_meta_obj +except ImportError: + from dask.dataframe.utils import make_meta as make_meta_obj + import cudf from cudf.utils.dtypes import is_string_dtype diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 67b03a5890c..1f1af7ab07e 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -1,4 +1,5 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. + import math import warnings from distutils.version import LooseVersion @@ -25,6 +26,11 @@ from dask_cudf import sorting from dask_cudf.accessors import ListMethods +try: + from dask.dataframe.utils import make_meta_util as dask_make_meta +except ImportError: + from dask.dataframe.core import make_meta as dask_make_meta + DASK_VERSION = LooseVersion(dask.__version__) @@ -72,7 +78,7 @@ def __init__(self, dsk, name, meta, divisions): dsk = HighLevelGraph.from_collections(name, dsk, dependencies=[]) self.dask = dsk self._name = name - meta = dd.utils.make_meta_util(meta) + meta = dask_make_meta(meta) if not isinstance(meta, self._partition_type): raise TypeError( f"Expected meta to specify type " @@ -115,7 +121,7 @@ def assigner(df, k, v): out[k] = v return out - meta = assigner(self._meta, k, dd.utils.make_meta_util(v)) + meta = assigner(self._meta, k, dask_make_meta(v)) return self.map_partitions(assigner, k, v, meta=meta) def apply_rows(self, func, incols, outcols, kwargs=None, cache_key=None): @@ -677,7 +683,7 @@ def reduction( if meta is None: meta_chunk = _emulate(apply, chunk, args, chunk_kwargs) meta = _emulate(apply, aggregate, [[meta_chunk]], aggregate_kwargs) - meta = dd.utils.make_meta_util(meta) + meta = dask_make_meta(meta) graph = HighLevelGraph.from_collections(b, dsk, dependencies=args) return dd.core.new_dd_object(graph, b, meta, (None, None)) diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py index 0db985bb4db..7e24d829745 100644 --- a/python/dask_cudf/dask_cudf/tests/test_core.py +++ b/python/dask_cudf/dask_cudf/tests/test_core.py @@ -10,7 +10,12 @@ import dask from dask import dataframe as dd from dask.dataframe.core import meta_nonempty -from dask.dataframe.utils import make_meta_util + +try: + from dask.dataframe.utils import make_meta_util as dask_make_meta +except ImportError: + from dask.dataframe.core import make_meta as dask_make_meta + from dask.utils import M import cudf @@ -639,7 +644,7 @@ def test_make_meta_backends(index): df = df.set_index(index) # Check "empty" metadata types - chk_meta = make_meta_util(df) + chk_meta = dask_make_meta(df) dd.assert_eq(chk_meta.dtypes, df.dtypes) # Check "non-empty" metadata types @@ -781,6 +786,11 @@ def test_index_map_partitions(): def test_correct_meta(): + try: + from dask.dataframe.utils import make_meta_util # noqa: F401 + except ImportError: + pytest.skip("need make_meta_util to be preset") + # Need these local imports in this specific order. # For context: https://github.com/rapidsai/cudf/issues/7946 import pandas as pd diff --git a/python/dask_cudf/dev_requirements.txt b/python/dask_cudf/dev_requirements.txt index 59aca76ae58..665104cb20e 100644 --- a/python/dask_cudf/dev_requirements.txt +++ b/python/dask_cudf/dev_requirements.txt @@ -1,7 +1,7 @@ # Copyright (c) 2021, NVIDIA CORPORATION. -dask==2021.4.0 -distributed>=2.22.0,<=2021.4.0 +dask>=2021.4.0,<=2021.5.0 +distributed>=2.22.0,<=2021.5.0 fsspec>=0.6.0 numba>=0.53.1 numpy diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 93f10fbcf48..011fb823e68 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -10,8 +10,8 @@ install_requires = [ "cudf", - "dask==2021.4.0", - "distributed>=2.22.0,<=2021.4.0", + "dask>=2021.4.0,<=2021.5.0", + "distributed>=2.22.0,<=2021.5.0", "fsspec>=0.6.0", "numpy", "pandas>=1.0,<1.3.0dev0", @@ -23,8 +23,8 @@ "pandas>=1.0,<1.3.0dev0", "pytest", "numba>=0.53.1", - "dask==2021.4.0", - "distributed>=2.22.0,<=2021.4.0", + "dask>=2021.4.0,<=2021.5.0", + "distributed>=2.22.0,<=2021.5.0", ] }