rapidsai · rapids-bot · May 1, 2024 · Apr 19, 2024 · Apr 22, 2024 · Apr 24, 2024
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -86,6 +86,7 @@
     to_cudf_compatible_scalar,
 )
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import _warn_no_dask_cudf
 
 
 def _format_percentile_names(percentiles):
@@ -3658,6 +3659,17 @@ def where(self, cond, other=None, inplace=False):
             inplace=inplace,
         )
 
+    @_cudf_nvtx_annotate
+    @_warn_no_dask_cudf
+    def __dask_tokenize__(self):
+        from dask.base import normalize_token
+
+        return [
+            type(self),
+            str(self.dtype),
+            normalize_token(self.to_pandas()),
+        ]
+
 
 def make_binop_func(op):
     # This function is used to wrap binary operations in Frame with an

@@ -111,7 +111,8 @@ def test_categorical_accessor_initialization2(data):
         dsr.cat
 
 
-@xfail_dask_expr("TODO: Unexplained dask-expr failure")
+# TODO: Remove this once we are pinned to dask>=2024.5.0
+@xfail_dask_expr("Requires: https://github.com/dask/dask/pull/11059")
 @pytest.mark.parametrize("data", [data_cat_1()])
 def test_categorical_basic(data):
     cat = data.copy()
@@ -203,7 +204,6 @@ def test_categorical_compare_unordered(data):
         dsr < dsr
 
 
-@xfail_dask_expr("TODO: Unexplained dask-expr failure")
 @pytest.mark.parametrize("data", [data_cat_3()])
 def test_categorical_compare_ordered(data):
     cat1 = data[0].copy()
@@ -274,7 +274,6 @@ def test_categorical_categories():
     )
 
 
-@xfail_dask_expr("TODO: Unexplained dask-expr failure")
 def test_categorical_as_known():
     df = dask_cudf.from_cudf(DataFrame({"col_1": [0, 1, 2, 3]}), npartitions=2)
     df["col_1"] = df["col_1"].astype("category")
@@ -283,7 +282,19 @@ def test_categorical_as_known():
     pdf = dd.from_pandas(pd.DataFrame({"col_1": [0, 1, 2, 3]}), npartitions=2)
     pdf["col_1"] = pdf["col_1"].astype("category")
     expected = pdf["col_1"].cat.as_known()
-    dd.assert_eq(expected, actual)
+
+    # Note: Categories may be ordered differently in
+    # cudf and pandas. Therefore, we need to compare
+    # the global set of categories (before and after
+    # calling `compute`), then we need to check that
+    # the initial order of rows was preserved.
+    assert set(expected.cat.categories) == set(
+        actual.cat.categories.values_host
+    )
+    assert set(expected.compute().cat.categories) == set(
+        actual.compute().cat.categories.values_host
+    )
+    dd.assert_eq(expected, actual.astype(expected.dtype))
 
 
 def test_str_slice():