From d110ec79cbb068f2fb8cea54f9573f8474be0ded Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 16 Mar 2017 14:03:37 -0400 Subject: [PATCH] Drop take_last kwarg from method signatures Affected methods: 1) nlargest 2) nsmallest 3) duplicated 4) drop_duplicates xref gh-10236, gh-10792, gh-10920. --- asv_bench/benchmarks/series_methods.py | 12 ++--- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/base.py | 6 --- pandas/core/frame.py | 6 --- pandas/core/groupby.py | 6 +-- pandas/core/series.py | 4 -- pandas/indexes/base.py | 4 -- pandas/indexes/category.py | 2 - pandas/indexes/multi.py | 2 - pandas/tests/frame/test_analytics.py | 75 -------------------------- pandas/tests/groupby/test_groupby.py | 4 -- pandas/tests/series/test_analytics.py | 33 ------------ pandas/tests/test_base.py | 16 ------ pandas/tests/test_multilevel.py | 11 ---- vb_suite/series_methods.py | 16 +++--- 15 files changed, 16 insertions(+), 182 deletions(-) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 413c4e044fd3a..c66654ee1e006 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -68,8 +68,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nlargest1(self): - self.s1.nlargest(3, take_last=True) - self.s1.nlargest(3, take_last=False) + self.s1.nlargest(3, keep='last') + self.s1.nlargest(3, keep='first') class series_nlargest2(object): @@ -83,8 +83,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nlargest2(self): - self.s2.nlargest(3, take_last=True) - self.s2.nlargest(3, take_last=False) + self.s2.nlargest(3, keep='last') + self.s2.nlargest(3, keep='first') class series_nsmallest2(object): @@ -98,8 +98,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nsmallest2(self): - self.s2.nsmallest(3, take_last=True) - self.s2.nsmallest(3, take_last=False) + self.s2.nsmallest(3, keep='last') + self.s2.nsmallest(3, keep='first') class series_dropna_int64(object): diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a56212328f5c3..52eeced15401e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -769,6 +769,7 @@ Removal of prior version deprecations/changes in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) +- The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) .. _whatsnew_0200.performance: diff --git a/pandas/core/base.py b/pandas/core/base.py index d7c9e35ab6a51..bde60be3ddcff 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1065,7 +1065,6 @@ def searchsorted(self, value, side='left', sorter=None): - ``first`` : Drop duplicates except for the first occurrence. - ``last`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. - take_last : deprecated %(inplace)s Returns @@ -1073,8 +1072,6 @@ def searchsorted(self, value, side='left', sorter=None): deduplicated : %(klass)s """) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1100,15 +1097,12 @@ def drop_duplicates(self, keep='first', inplace=False): - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. - take_last : deprecated Returns ------- duplicated : %(duplicated)s """) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.algorithms import duplicated diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 987eb10101f12..a11be53adf959 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3169,8 +3169,6 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: return result - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def drop_duplicates(self, subset=None, keep='first', inplace=False): """ Return DataFrame with duplicate rows removed, optionally only @@ -3185,7 +3183,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): - ``first`` : Drop duplicates except for the first occurrence. - ``last`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. - take_last : deprecated inplace : boolean, default False Whether to drop duplicates in place or to return a copy @@ -3203,8 +3200,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): else: return self[-duplicated] - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def duplicated(self, subset=None, keep='first'): """ Return boolean Series denoting duplicate rows, optionally only @@ -3221,7 +3216,6 @@ def duplicated(self, subset=None, keep='first'): - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. - take_last : deprecated Returns ------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 7a017ffae284c..fad7f87bdb77c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3025,16 +3025,12 @@ def nunique(self, dropna=True): index=ri, name=self.name) - @deprecate_kwarg('take_last', 'keep', - mapping={True: 'last', False: 'first'}) @Appender(Series.nlargest.__doc__) def nlargest(self, n=5, keep='first'): - # ToDo: When we remove deprecate_kwargs, we can remote these methods + # TODO: When we remove deprecate_kwargs, we can remove these methods # and include nlargest and nsmallest to _series_apply_whitelist return self.apply(lambda x: x.nlargest(n=n, keep=keep)) - @deprecate_kwarg('take_last', 'keep', - mapping={True: 'last', False: 'first'}) @Appender(Series.nsmallest.__doc__) def nsmallest(self, n=5, keep='first'): return self.apply(lambda x: x.nsmallest(n=n, keep=keep)) diff --git a/pandas/core/series.py b/pandas/core/series.py index cfa25ca1299eb..bfa26af2f8c1d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1211,14 +1211,10 @@ def unique(self): return result.asobject.values return result - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): return super(Series, self).drop_duplicates(keep=keep, inplace=inplace) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs) def duplicated(self, keep='first'): return super(Series, self).duplicated(keep=keep) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5b942e2565c29..381e4d5caa8ac 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -3500,14 +3500,10 @@ def unique(self): result = super(Index, self).unique() return self._shallow_copy(result) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, keep='first'): return super(Index, self).drop_duplicates(keep=keep) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): return super(Index, self).duplicated(keep=keep) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 923dd4ec785c5..2542b64858526 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -301,8 +301,6 @@ def unique(self): return self._shallow_copy(result, categories=result.categories, ordered=result.ordered) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas._libs.hashtable import duplicated_int64 diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 1c1609fed1dd1..978492131ca89 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -755,8 +755,6 @@ def f(k, stringify): for k, stringify in zip(key, self._have_mixed_levels)]) return hash_tuples(key) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 6c917444f9f43..4fb1d2222fa06 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1381,12 +1381,6 @@ def test_drop_duplicates(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('AAA', take_last=True) - expected = df.loc[[6, 7]] - tm.assert_frame_equal(result, expected) - # multi column expected = df.loc[[0, 1, 2, 3]] result = df.drop_duplicates(np.array(['AAA', 'B'])) @@ -1402,12 +1396,6 @@ def test_drop_duplicates(self): expected = df.loc[[0]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(('AAA', 'B'), take_last=True) - expected = df.loc[[0, 5, 6, 7]] - tm.assert_frame_equal(result, expected) - # consider everything df2 = df.loc[:, ['AAA', 'B', 'C']] @@ -1424,13 +1412,6 @@ def test_drop_duplicates(self): expected = df2.drop_duplicates(['AAA', 'B'], keep=False) tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df2.drop_duplicates(take_last=True) - with tm.assert_produces_warning(FutureWarning): - expected = df2.drop_duplicates(['AAA', 'B'], take_last=True) - tm.assert_frame_equal(result, expected) - # integers result = df.drop_duplicates('C') expected = df.iloc[[0, 2]] @@ -1529,12 +1510,6 @@ def test_drop_duplicates_tuple(self): self.assertEqual(len(result), 0) tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(('AA', 'AB'), take_last=True) - expected = df.loc[[6, 7]] - tm.assert_frame_equal(result, expected) - # multi column expected = df.loc[[0, 1, 2, 3]] result = df.drop_duplicates((('AA', 'AB'), 'B')) @@ -1563,12 +1538,6 @@ def test_drop_duplicates_NA(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('A', take_last=True) - expected = df.loc[[1, 6, 7]] - tm.assert_frame_equal(result, expected) - # multi column result = df.drop_duplicates(['A', 'B']) expected = df.loc[[0, 2, 3, 6]] @@ -1582,12 +1551,6 @@ def test_drop_duplicates_NA(self): expected = df.loc[[6]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(['A', 'B'], take_last=True) - expected = df.loc[[1, 5, 6, 7]] - tm.assert_frame_equal(result, expected) - # nan df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'bar', 'foo'], @@ -1610,12 +1573,6 @@ def test_drop_duplicates_NA(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('C', take_last=True) - expected = df.loc[[3, 7]] - tm.assert_frame_equal(result, expected) - # multi column result = df.drop_duplicates(['C', 'B']) expected = df.loc[[0, 1, 2, 4]] @@ -1629,12 +1586,6 @@ def test_drop_duplicates_NA(self): expected = df.loc[[1]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(['C', 'B'], take_last=True) - expected = df.loc[[1, 3, 6, 7]] - tm.assert_frame_equal(result, expected) - def test_drop_duplicates_NA_for_take_all(self): # none df = DataFrame({'A': [None, None, 'foo', 'bar', @@ -1697,14 +1648,6 @@ def test_drop_duplicates_inplace(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(df), 0) - # deprecate take_last - df = orig.copy() - with tm.assert_produces_warning(FutureWarning): - df.drop_duplicates('A', take_last=True, inplace=True) - expected = orig.loc[[6, 7]] - result = df - tm.assert_frame_equal(result, expected) - # multi column df = orig.copy() df.drop_duplicates(['A', 'B'], inplace=True) @@ -1724,14 +1667,6 @@ def test_drop_duplicates_inplace(self): result = df tm.assert_frame_equal(result, expected) - # deprecate take_last - df = orig.copy() - with tm.assert_produces_warning(FutureWarning): - df.drop_duplicates(['A', 'B'], take_last=True, inplace=True) - expected = orig.loc[[0, 5, 6, 7]] - result = df - tm.assert_frame_equal(result, expected) - # consider everything orig2 = orig.loc[:, ['A', 'B', 'C']].copy() @@ -1754,17 +1689,7 @@ def test_drop_duplicates_inplace(self): result = df2 tm.assert_frame_equal(result, expected) - # deprecate take_last - df2 = orig2.copy() - with tm.assert_produces_warning(FutureWarning): - df2.drop_duplicates(take_last=True, inplace=True) - with tm.assert_produces_warning(FutureWarning): - expected = orig2.drop_duplicates(['A', 'B'], take_last=True) - result = df2 - tm.assert_frame_equal(result, expected) - # Rounding - def test_round(self): # GH 2665 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c25974c94bfd1..240e943d1d9fb 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4025,8 +4025,6 @@ def test_nlargest(self): 3, 2, 1, 3, 3, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]])) assert_series_equal(gb.nlargest(3, keep='last'), e) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(gb.nlargest(3, take_last=True), e) def test_nsmallest(self): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) @@ -4044,8 +4042,6 @@ def test_nsmallest(self): 0, 1, 1, 0, 1, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]])) assert_series_equal(gb.nsmallest(3, keep='last'), e) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(gb.nsmallest(3, take_last=True), e) def test_transform_doesnt_clobber_ints(self): # GH 7972 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index c2543581dca50..dc71fafb1094f 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -917,17 +917,6 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep='last', inplace=True) assert_series_equal(sc, s[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.drop_duplicates(take_last=True), s[~expected]) - sc = s.copy() - with tm.assert_produces_warning(FutureWarning): - sc.drop_duplicates(take_last=True, inplace=True) - assert_series_equal(sc, s[~expected]) - expected = Series([False, False, True, True]) assert_series_equal(s.duplicated(keep=False), expected) assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) @@ -951,17 +940,6 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep='last', inplace=True) assert_series_equal(sc, s[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.drop_duplicates(take_last=True), s[~expected]) - sc = s.copy() - with tm.assert_produces_warning(FutureWarning): - sc.drop_duplicates(take_last=True, inplace=True) - assert_series_equal(sc, s[~expected]) - expected = Series([False, True, True, False, True, True, False]) assert_series_equal(s.duplicated(keep=False), expected) assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) @@ -1443,18 +1421,7 @@ def test_nsmallest_nlargest(self): for s in s_list: assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) - assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.nsmallest(2, take_last=True), s.iloc[[2, 3]]) - - assert_series_equal(s.nlargest(3), s.iloc[[4, 0, 1]]) - - assert_series_equal(s.nlargest(3, keep='last'), s.iloc[[4, 0, 3]]) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.nlargest(3, take_last=True), s.iloc[[4, 0, 3]]) empty = s.iloc[0:0] assert_series_equal(s.nsmallest(0), empty) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 1d4dddf6477df..68db0d19344b9 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -816,15 +816,6 @@ def test_duplicated_drop_duplicates_index(self): result = idx.drop_duplicates(keep='last') tm.assert_index_equal(result, idx[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - duplicated = idx.duplicated(take_last=True) - tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) - with tm.assert_produces_warning(FutureWarning): - result = idx.drop_duplicates(take_last=True) - tm.assert_index_equal(result, idx[~expected]) - base = [False] * len(original) + [True, True] base[3] = True base[5] = True @@ -867,13 +858,6 @@ def test_duplicated_drop_duplicates_index(self): tm.assert_series_equal(s.drop_duplicates(keep='last'), s[~np.array(base)]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal( - s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(s.drop_duplicates(take_last=True), - s[~np.array(base)]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d7b115d808312..fd5421abc89ad 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2037,17 +2037,6 @@ def test_duplicated_drop_duplicates(self): expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) - # deprecate take_last - expected = np.array([True, False, False, False, False, False]) - with tm.assert_produces_warning(FutureWarning): - duplicated = idx.duplicated(take_last=True) - tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) - expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) - with tm.assert_produces_warning(FutureWarning): - tm.assert_index_equal( - idx.drop_duplicates(take_last=True), expected) - def test_multiindex_set_index(self): # segfault in #3308 d = {'t1': [2, 2.5, 3], 't2': [4, 5, 6]} diff --git a/vb_suite/series_methods.py b/vb_suite/series_methods.py index cd8688495fa09..c545f419c2dec 100644 --- a/vb_suite/series_methods.py +++ b/vb_suite/series_methods.py @@ -12,22 +12,22 @@ s4 = s3.astype('object') """ -series_nlargest1 = Benchmark('s1.nlargest(3, take_last=True);' - 's1.nlargest(3, take_last=False)', +series_nlargest1 = Benchmark("s1.nlargest(3, keep='last');" + "s1.nlargest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nlargest2 = Benchmark('s2.nlargest(3, take_last=True);' - 's2.nlargest(3, take_last=False)', +series_nlargest2 = Benchmark("s2.nlargest(3, keep='last');" + "s2.nlargest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nsmallest2 = Benchmark('s1.nsmallest(3, take_last=True);' - 's1.nsmallest(3, take_last=False)', +series_nsmallest2 = Benchmark("s1.nsmallest(3, keep='last');" + "s1.nsmallest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nsmallest2 = Benchmark('s2.nsmallest(3, take_last=True);' - 's2.nsmallest(3, take_last=False)', +series_nsmallest2 = Benchmark("s2.nsmallest(3, keep='last');" + "s2.nsmallest(3, keep='first')", setup, start_date=datetime(2014, 1, 25))