diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index 465cf36e1f3..80f44065e9f 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -5,7 +5,9 @@ """ import itertools import operator +import pickle import re +from io import BytesIO import cupy as cp import numpy as np @@ -1553,3 +1555,49 @@ def test_multiIndex_duplicate_names(): ) assert_eq(gi, pi) + + +@pytest.mark.parametrize( + "names", + [ + ["a", "b", "c"], + [None, None, None], + ["aa", "aa", "aa"], + ["bb", "aa", "aa"], + None, + ], +) +def test_pickle_roundtrip_multiIndex(names): + df = cudf.DataFrame( + { + "one": [1, 2, 3], + "two": [True, False, True], + "three": ["ab", "cd", "ef"], + "four": [0.2, 0.1, -10.2], + } + ) + expected_df = df.set_index(["one", "two", "three"]) + expected_df.index.names = names + local_file = BytesIO() + + pickle.dump(expected_df, local_file) + local_file.seek(0) + actual_df = pickle.load(local_file) + assert_eq(expected_df, actual_df) + + +def test_difference(): + midx = cudf.MultiIndex( + levels=[[1, 3, 4, 5], [1, 2, 5]], + codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]], + names=["x", "y"], + ) + midx2 = cudf.MultiIndex( + levels=[[1, 3, 4, 5], [1, 2, 5]], + codes=[[0, 0, 1, 2, 3, 3], [0, 2, 1, 1, 0, 2]], + names=["x", "y"], + ) + + expected = midx2.to_pandas().difference(midx.to_pandas()) + actual = midx2.difference(midx) + assert_eq(expected, actual)