diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index a82735be901..e6cee7c1038 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -190,6 +190,18 @@ def names(self): def names(self, value): value = [None] * self.nlevels if value is None else value assert len(value) == self.nlevels + + if len(value) == len(set(value)): + # IMPORTANT: if the provided names are unique, + # we reconstruct self._data with the names as keys. + # If they are not unique, the keys of self._data + # and self._names will be different, which can lead + # to unexpected behaviour in some cases. This is + # definitely buggy, but we can't disallow non-unique + # names either... + self._data = self._data._create_unsafe( + dict(zip(value, self._data.values())) + ) self._names = pd.core.indexes.frozen.FrozenList(value) def rename(self, names, inplace=False): diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index 50141428b02..9164bfe98d1 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -1725,3 +1725,16 @@ def test_merge_with_lists(how): got = gd_left.merge(gd_right, on="a") assert_join_results_equal(expect, got, how=how) + + +def test_join_renamed_index(): + df = cudf.DataFrame( + {0: [1, 2, 3, 4, 5], 1: [1, 2, 3, 4, 5], "c": [1, 2, 3, 4, 5]} + ).set_index([0, 1]) + df.index.names = ["a", "b"] # doesn't actually change df._index._data + + expect = df.to_pandas().merge( + df.to_pandas(), left_index=True, right_index=True + ) + got = df.merge(df, left_index=True, right_index=True, how="inner") + assert_join_results_equal(expect, got, how="inner")