diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 96b62e185b3..79005193b4e 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1954,10 +1954,12 @@ def _repeat( @_cudf_nvtx_annotate @_warn_no_dask_cudf def __dask_tokenize__(self): + from dask.base import normalize_token + return [ type(self), - self._dtypes, - self.to_pandas(), + normalize_token(self._dtypes), + normalize_token(self.to_pandas()), ] diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 15277ff5586..0a0cefde9cd 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -6176,11 +6176,13 @@ def convert_dtypes( @_warn_no_dask_cudf def __dask_tokenize__(self): + from dask.base import normalize_token + return [ type(self), - self._dtypes, - self.index, - self.hash_values().values_host, + normalize_token(self._dtypes), + normalize_token(self.index), + normalize_token(self.hash_values().values_host), ] diff --git a/python/dask_cudf/dask_cudf/tests/test_dispatch.py b/python/dask_cudf/dask_cudf/tests/test_dispatch.py index c64e25fd437..76703206726 100644 --- a/python/dask_cudf/dask_cudf/tests/test_dispatch.py +++ b/python/dask_cudf/dask_cudf/tests/test_dispatch.py @@ -1,4 +1,6 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. + +from datetime import datetime import numpy as np import pandas as pd @@ -82,6 +84,16 @@ def test_deterministic_tokenize(index): assert tokenize(df2) == tokenize(df2) +def test_deterministic_tokenize_multiindex(): + dt = datetime.strptime("1995-03-15", "%Y-%m-%d") + index = cudf.MultiIndex( + levels=[[1, 2], [dt]], + codes=[[0, 1], [0, 0]], + ) + df = cudf.DataFrame(index=index) + assert tokenize(df) == tokenize(df) + + @pytest.mark.parametrize("preserve_index", [True, False]) def test_pyarrow_schema_dispatch(preserve_index): from dask.dataframe.dispatch import (