From 3e25394b0db2eee6af66df748d2e078c97015cba Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 8 Aug 2024 14:18:52 -0700 Subject: [PATCH] Disallow indexing by selecting duplicate labels --- python/cudf/cudf/core/column_accessor.py | 4 ++++ python/cudf/cudf/tests/test_indexing.py | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 819d351b2c4..83596704672 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -530,6 +530,10 @@ def _select_by_label_list_like(self, key: Any) -> ColumnAccessor: ) else: data = {k: self._grouped_data[k] for k in key} + if len(data) != len(key): + raise ValueError( + "Selecting duplicate column labels is not supported." + ) if self.multiindex: data = dict(_to_flat_dict_inner(data)) return self.__class__( diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 7005cbc6834..716b4dc6acd 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -2361,3 +2361,11 @@ def test_sliced_categorical_as_ordered(): name="a", ) assert_eq(result, expected) + + +def test_duplicate_labels_raises(): + df = cudf.DataFrame([[1, 2]], columns=["a", "b"]) + with pytest.raises(ValueError): + df[["a", "a"]] + with pytest.raises(ValueError): + df.loc[:, ["a", "a"]]