From 675af5401833287dac7e491bb1d6fa79c863bc0f Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 29 Mar 2018 18:35:24 -0700
Subject: [PATCH 1/3] Deprecated Index.get_duplicates()

---
 doc/source/whatsnew/v0.23.0.txt   |  1 +
 pandas/core/indexes/base.py       | 13 +++++++++++--
 pandas/tests/indexes/test_base.py |  5 +++++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index e83f149db1f18..cb3544b5df3a6 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -831,6 +831,7 @@ Deprecations
 - ``pandas.tseries.plotting.tsplot`` is deprecated. Use :func:`Series.plot` instead (:issue:`18627`)
 - ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`)
 - ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`)
+- ``Index.get_duplicates()`` is deprecated and will be removed in a future version (:issue:`20239`)
 
 .. _whatsnew_0230.prior_deprecations:
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 12bb09e8f8a8a..51b3b682fd4a5 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1824,6 +1824,9 @@ def get_duplicates(self):
         Returns a sorted list of index elements which appear more than once in
         the index.
 
+        .. deprecated:: 0.23.0
+            Use idx[idx.duplicated()].unique() instead
+
         Returns
         -------
         array-like
@@ -1870,14 +1873,20 @@ def get_duplicates(self):
         >>> pd.Index(dates).get_duplicates()
         DatetimeIndex([], dtype='datetime64[ns]', freq=None)
         """
+        warnings.warn("'get_duplicates' is deprecated and will be removed in "
+                      "a future release. You can use "
+                      "idx[idx.duplicated()].unique() instead",
+                      FutureWarning, stacklevel=2)
+
+        return self._get_duplicates()
+
+    def _get_duplicates(self):
         from collections import defaultdict
         counter = defaultdict(lambda: 0)
         for k in self.values:
             counter[k] += 1
         return sorted(k for k, v in compat.iteritems(counter) if v > 1)
 
-    _get_duplicates = get_duplicates
-
     def _cleanup(self):
         self._engine.clear_mapping()
 
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index ff9c86fbfe384..d396d3b7e0036 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -2061,6 +2061,11 @@ def test_cached_properties_not_settable(self):
         with tm.assert_raises_regex(AttributeError, "Can't set attribute"):
             idx.is_unique = False
 
+    def test_get_duplicates_deprecated(self):
+        idx = pd.Index([1, 2, 3])
+        with tm.assert_produces_warning(FutureWarning):
+            idx.get_duplicates()
+
 
 class TestMixedIntIndex(Base):
     # Mostly the tests from common.py for which the results differ

From ed58eec355350f2005bc1ccbee7ee46b465a0791 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 1 Apr 2018 09:30:04 -0700
Subject: [PATCH 2/3] Updated return val and test cases

---
 pandas/core/indexes/base.py        | 9 +--------
 pandas/tests/indexes/test_multi.py | 6 ++++--
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 51b3b682fd4a5..6f7da6ae6c3d2 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1878,14 +1878,7 @@ def get_duplicates(self):
                       "idx[idx.duplicated()].unique() instead",
                       FutureWarning, stacklevel=2)
 
-        return self._get_duplicates()
-
-    def _get_duplicates(self):
-        from collections import defaultdict
-        counter = defaultdict(lambda: 0)
-        for k in self.values:
-            counter[k] += 1
-        return sorted(k for k, v in compat.iteritems(counter) if v > 1)
+        return self[self.duplicated()].unique()
 
     def _cleanup(self):
         self._engine.clear_mapping()
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 984f37042d600..f99b94216aa33 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -2413,7 +2413,8 @@ def check(nlevels, with_nulls):
         for a in [101, 102]:
             mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
             assert not mi.has_duplicates
-            assert mi.get_duplicates() == []
+            assert mi.get_duplicates().equals(
+                MultiIndex.from_arrays([[], []]))
             tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
                 2, dtype='bool'))
 
@@ -2425,7 +2426,8 @@ def check(nlevels, with_nulls):
                                 labels=np.random.permutation(list(lab)).T)
                 assert len(mi) == (n + 1) * (m + 1)
                 assert not mi.has_duplicates
-                assert mi.get_duplicates() == []
+                assert mi.get_duplicates().equals(
+                    MultiIndex.from_arrays([[], []]))
                 tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
                     len(mi), dtype='bool'))
 

From a1c5e51298715fec33e377e16575cca0f03b681e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 1 Apr 2018 09:44:26 -0700
Subject: [PATCH 3/3] Updated internal refs

---
 pandas/core/frame.py                             |  5 +++--
 pandas/core/indexes/datetimelike.py              |  4 ----
 pandas/core/reshape/concat.py                    |  2 +-
 pandas/tests/indexes/datetimes/test_datetime.py  |  6 +++++-
 pandas/tests/indexes/test_multi.py               | 16 ++++++++++++----
 .../tests/indexes/timedeltas/test_timedelta.py   |  7 ++++++-
 6 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 35f3a7c20e270..11b9d93a27284 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3840,8 +3840,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         index = _ensure_index_from_sequences(arrays, names)
 
         if verify_integrity and not index.is_unique:
-            duplicates = index.get_duplicates()
-            raise ValueError('Index has duplicate keys: %s' % duplicates)
+            duplicates = index[index.duplicated()].unique()
+            raise ValueError('Index has duplicate keys: {duplicates!s}'.format(
+                duplicates=duplicates))
 
         for c in to_remove:
             del frame[c]
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index b906ea0f4784c..ae6bd80de5d12 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -501,10 +501,6 @@ def take(self, indices, axis=0, allow_fill=True,
         freq = self.freq if isinstance(self, ABCPeriodIndex) else None
         return self._shallow_copy(taken, freq=freq)
 
-    def get_duplicates(self):
-        values = Index.get_duplicates(self)
-        return self._simple_new(values)
-
     _can_hold_na = True
 
     _na_value = NaT
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 20f4384a3d698..6e564975f34cd 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -504,7 +504,7 @@ def _get_concat_axis(self):
     def _maybe_check_integrity(self, concat_index):
         if self.verify_integrity:
             if not concat_index.is_unique:
-                overlap = concat_index.get_duplicates()
+                overlap = concat_index[concat_index.duplicated()].unique()
                 raise ValueError('Indexes have overlapping values: '
                                  '{overlap!s}'.format(overlap=overlap))
 
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index 51788b3e25507..b3aab6dba796c 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -1,3 +1,4 @@
+import warnings
 
 import pytest
 
@@ -178,7 +179,10 @@ def test_get_duplicates(self):
         idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02',
                              '2000-01-03', '2000-01-03', '2000-01-04'])
 
-        result = idx.get_duplicates()
+        with warnings.catch_warnings(record=True):
+            # Deprecated - see GH20239
+            result = idx.get_duplicates()
+
         ex = DatetimeIndex(['2000-01-02', '2000-01-03'])
         tm.assert_index_equal(result, ex)
 
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index f99b94216aa33..0ae4b43575f66 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -2413,8 +2413,12 @@ def check(nlevels, with_nulls):
         for a in [101, 102]:
             mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
             assert not mi.has_duplicates
-            assert mi.get_duplicates().equals(
-                MultiIndex.from_arrays([[], []]))
+
+            with warnings.catch_warnings(record=True):
+                # Deprecated - see GH20239
+                assert mi.get_duplicates().equals(MultiIndex.from_arrays(
+                    [[], []]))
+
             tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
                 2, dtype='bool'))
 
@@ -2426,8 +2430,12 @@ def check(nlevels, with_nulls):
                                 labels=np.random.permutation(list(lab)).T)
                 assert len(mi) == (n + 1) * (m + 1)
                 assert not mi.has_duplicates
-                assert mi.get_duplicates().equals(
-                    MultiIndex.from_arrays([[], []]))
+
+                with warnings.catch_warnings(record=True):
+                    # Deprecated - see GH20239
+                    assert mi.get_duplicates().equals(MultiIndex.from_arrays(
+                        [[], []]))
+
                 tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
                     len(mi), dtype='bool'))
 
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index 4692b6d675e6b..d7745ffd94cd9 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -1,3 +1,5 @@
+import warnings
+
 import pytest
 
 import numpy as np
@@ -145,7 +147,10 @@ def test_get_duplicates(self):
         idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day',
                               '4day'])
 
-        result = idx.get_duplicates()
+        with warnings.catch_warnings(record=True):
+            # Deprecated - see GH20239
+            result = idx.get_duplicates()
+
         ex = TimedeltaIndex(['2 day', '3day'])
         tm.assert_index_equal(result, ex)