From 0cd58fbec63d5e461b487e7e37aa9942ebe0f116 Mon Sep 17 00:00:00 2001
From: AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
Date: Thu, 25 Jan 2024 11:40:04 -0500
Subject: [PATCH] Fix index difference to follow the pandas format (#14789)

This PR fixes an error in `Index.difference` where the function keeps duplicate elements while pandas removes the duplicates. The tests had no inputs with duplicates, so I added new tests too (I added the test from the original issue).

- closes #14489

Authors:
  - AmirAli Mirian (https://github.com/amiralimi)
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/14789
---
 python/cudf/cudf/core/_base_index.py | 4 ++--
 python/cudf/cudf/tests/test_index.py | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 2aef77b6c99..d7d8e26db1b 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -1040,11 +1040,11 @@ def difference(self, other, sort=None):
         res_name = _get_result_name(self.name, other.name)
 
         if is_mixed_with_object_dtype(self, other):
-            difference = self.copy()
+            difference = self.copy().unique()
         else:
             other = other.copy(deep=False)
             difference = cudf.core.index._index_from_data(
-                cudf.DataFrame._from_data({"None": self._column})
+                cudf.DataFrame._from_data({"None": self._column.unique()})
                 .merge(
                     cudf.DataFrame._from_data({"None": other._column}),
                     how="leftanti",
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index a480a4624f7..e0a369d8d91 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 """
 Test related to Index
@@ -803,6 +803,7 @@ def test_index_to_series(data):
         pd.Series(["1", "2", "a", "3", None], dtype="category"),
         range(0, 10),
         [],
+        [1, 1, 2, 2],
     ],
 )
 @pytest.mark.parametrize(
@@ -819,6 +820,7 @@ def test_index_to_series(data):
         range(2, 4),
         pd.Series(["1", "a", "3", None], dtype="category"),
         [],
+        [2],
     ],
 )
 @pytest.mark.parametrize("sort", [None, False])