Skip to content

Commit

Permalink
Add backward compatibility for dask-cudf to work with other version…
Browse files Browse the repository at this point in the history
…s of `dask` (#8368)

The current `dask-cudf` includes latest changes that work with latest `dask` changes. But when an existing release version of `dask` is used with `dask-cudf` it will fail to import some of the dispatches needed for `make_meta` & `make_meta_util`.. This PR will handle those import errors and have a fallback in place.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - https://github.com/jakirkham
  - Keith Kraus (https://github.com/kkraus14)
  - Ray Douglass (https://github.com/raydouglass)

URL: #8368
  • Loading branch information
galipremsagar authored May 26, 2021
1 parent fa6e7e0 commit e97fc1c
Show file tree
Hide file tree
Showing 10 changed files with 45 additions and 25 deletions.
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ dependencies:
- mypy=0.782
- typing_extensions
- pre_commit
- dask==2021.4.0
- distributed>=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.0
- distributed>=2.22.0,<=2021.5.0
- streamz
- dlpack>=0.5,<0.6.0a0
- arrow-cpp=1.0.1
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ dependencies:
- mypy=0.782
- typing_extensions
- pre_commit
- dask==2021.4.0
- distributed>=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.0
- distributed>=2.22.0,<=2021.5.0
- streamz
- dlpack>=0.5,<0.6.0a0
- arrow-cpp=1.0.1
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ requirements:
- python
- streamz
- cudf {{ version }}
- dask >=2.22.0,<=2021.4.0
- distributed >=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.0
- distributed>=2.22.0,<=2021.5.0
- python-confluent-kafka
- cudf_kafka {{ version }}

Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ requirements:
host:
- python
- cudf {{ version }}
- dask==2021.4.0
- distributed >=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.0
- distributed>=2.22.0,<=2021.5.0
run:
- python
- cudf {{ version }}
- dask==2021.4.0
- distributed >=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.0
- distributed>=2.22.0,<=2021.5.0

test:
requires:
Expand Down
4 changes: 2 additions & 2 deletions python/custreamz/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
flake8==3.8.3
black==19.10b0
isort==5.0.7
dask==2021.4.0
distributed>=2.22.0,<=2021.4.0
dask>=2021.4.0,<=2021.5.0
distributed>=2.22.0,<=2021.5.0
streamz
python-confluent-kafka
pytest
Expand Down
6 changes: 5 additions & 1 deletion python/dask_cudf/dask_cudf/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@
is_arraylike,
is_scalar,
make_meta,
make_meta_obj,
)

try:
from dask.dataframe.utils import make_meta_obj as make_meta_obj
except ImportError:
from dask.dataframe.utils import make_meta as make_meta_obj

import cudf
from cudf.utils.dtypes import is_string_dtype

Expand Down
14 changes: 10 additions & 4 deletions python/dask_cudf/dask_cudf/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2018-2020, NVIDIA CORPORATION.
# Copyright (c) 2018-2021, NVIDIA CORPORATION.

import math
import warnings
from distutils.version import LooseVersion
Expand All @@ -25,6 +26,11 @@
from dask_cudf import sorting
from dask_cudf.accessors import ListMethods

try:
from dask.dataframe.utils import make_meta_util as dask_make_meta
except ImportError:
from dask.dataframe.core import make_meta as dask_make_meta

DASK_VERSION = LooseVersion(dask.__version__)


Expand Down Expand Up @@ -72,7 +78,7 @@ def __init__(self, dsk, name, meta, divisions):
dsk = HighLevelGraph.from_collections(name, dsk, dependencies=[])
self.dask = dsk
self._name = name
meta = dd.utils.make_meta_util(meta)
meta = dask_make_meta(meta)
if not isinstance(meta, self._partition_type):
raise TypeError(
f"Expected meta to specify type "
Expand Down Expand Up @@ -115,7 +121,7 @@ def assigner(df, k, v):
out[k] = v
return out

meta = assigner(self._meta, k, dd.utils.make_meta_util(v))
meta = assigner(self._meta, k, dask_make_meta(v))
return self.map_partitions(assigner, k, v, meta=meta)

def apply_rows(self, func, incols, outcols, kwargs=None, cache_key=None):
Expand Down Expand Up @@ -677,7 +683,7 @@ def reduction(
if meta is None:
meta_chunk = _emulate(apply, chunk, args, chunk_kwargs)
meta = _emulate(apply, aggregate, [[meta_chunk]], aggregate_kwargs)
meta = dd.utils.make_meta_util(meta)
meta = dask_make_meta(meta)

graph = HighLevelGraph.from_collections(b, dsk, dependencies=args)
return dd.core.new_dd_object(graph, b, meta, (None, None))
Expand Down
14 changes: 12 additions & 2 deletions python/dask_cudf/dask_cudf/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
import dask
from dask import dataframe as dd
from dask.dataframe.core import meta_nonempty
from dask.dataframe.utils import make_meta_util

try:
from dask.dataframe.utils import make_meta_util as dask_make_meta
except ImportError:
from dask.dataframe.core import make_meta as dask_make_meta

from dask.utils import M

import cudf
Expand Down Expand Up @@ -639,7 +644,7 @@ def test_make_meta_backends(index):
df = df.set_index(index)

# Check "empty" metadata types
chk_meta = make_meta_util(df)
chk_meta = dask_make_meta(df)
dd.assert_eq(chk_meta.dtypes, df.dtypes)

# Check "non-empty" metadata types
Expand Down Expand Up @@ -781,6 +786,11 @@ def test_index_map_partitions():


def test_correct_meta():
try:
from dask.dataframe.utils import make_meta_util # noqa: F401
except ImportError:
pytest.skip("need make_meta_util to be preset")

# Need these local imports in this specific order.
# For context: https://github.com/rapidsai/cudf/issues/7946
import pandas as pd
Expand Down
4 changes: 2 additions & 2 deletions python/dask_cudf/dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

dask==2021.4.0
distributed>=2.22.0,<=2021.4.0
dask>=2021.4.0,<=2021.5.0
distributed>=2.22.0,<=2021.5.0
fsspec>=0.6.0
numba>=0.53.1
numpy
Expand Down
8 changes: 4 additions & 4 deletions python/dask_cudf/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

install_requires = [
"cudf",
"dask==2021.4.0",
"distributed>=2.22.0,<=2021.4.0",
"dask>=2021.4.0,<=2021.5.0",
"distributed>=2.22.0,<=2021.5.0",
"fsspec>=0.6.0",
"numpy",
"pandas>=1.0,<1.3.0dev0",
Expand All @@ -23,8 +23,8 @@
"pandas>=1.0,<1.3.0dev0",
"pytest",
"numba>=0.53.1",
"dask==2021.4.0",
"distributed>=2.22.0,<=2021.4.0",
"dask>=2021.4.0,<=2021.5.0",
"distributed>=2.22.0,<=2021.5.0",
]
}

Expand Down

0 comments on commit e97fc1c

Please sign in to comment.