From bfa4bf6e21640bcd4317264ce0ce8339e3054ca0 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 27 Aug 2017 03:21:55 -0700 Subject: [PATCH] DEPR: Deprecate convert parameter in take xref gh-16948. The parameter is not respected, nor is it a parameter in many 'take' implementations. --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/frame.py | 12 +++---- pandas/core/generic.py | 24 +++++++++----- pandas/core/groupby.py | 8 ++--- pandas/core/indexing.py | 17 +++++----- pandas/core/series.py | 33 ++++++++++++++----- pandas/core/sparse/series.py | 28 ++++++++++++++-- .../tests/frame/test_axis_select_reindex.py | 8 +++-- pandas/tests/indexing/test_loc.py | 4 +-- pandas/tests/series/test_indexing.py | 17 ++++++++++ pandas/tests/sparse/test_series.py | 3 ++ 11 files changed, 113 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fcadd26156b1d4..80dd085a568847 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -300,7 +300,7 @@ Other API Changes Deprecations ~~~~~~~~~~~~ - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). - +- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`) - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). .. _whatsnew_0210.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b5b3df64d24c0b..0ebae0c99390df 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2033,7 +2033,7 @@ def _ixs(self, i, axis=0): return self.loc[:, lab_slice] else: if isinstance(label, Index): - return self.take(i, axis=1, convert=True) + return self.take(i, axis=1) index_len = len(self.index) @@ -2115,10 +2115,10 @@ def _getitem_array(self, key): # be reindexed to match DataFrame rows key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] - return self.take(indexer, axis=0, convert=False) + return self.take(indexer, axis=0) else: indexer = self.loc._convert_to_indexer(key, axis=1) - return self.take(indexer, axis=1, convert=True) + return self.take(indexer, axis=1) def _getitem_multilevel(self, key): loc = self.columns.get_loc(key) @@ -3354,7 +3354,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: raise TypeError('must specify how or thresh') - result = self.take(mask.nonzero()[0], axis=axis, convert=False) + result = self.take(mask.nonzero()[0], axis=axis) if inplace: self._update_inplace(result) @@ -3490,7 +3490,7 @@ def trans(v): new_data = self._data.take(indexer, axis=self._get_block_manager_axis(axis), - convert=False, verify=False) + verify=False) if inplace: return self._update_inplace(new_data) @@ -3551,7 +3551,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, baxis = self._get_block_manager_axis(axis) new_data = self._data.take(indexer, axis=baxis, - convert=False, verify=False) + verify=False) # reconstruct axis if needed new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f8366c804e3e79..d533869b32f0f8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1819,7 +1819,7 @@ def _iget_item_cache(self, item): if ax.is_unique: lower = self._get_item_cache(ax[item]) else: - lower = self.take(item, axis=self._info_axis_number, convert=True) + lower = self.take(item, axis=self._info_axis_number) return lower def _box_item_values(self, key, values): @@ -2074,9 +2074,9 @@ def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): The axis on which to select elements. "0" means that we are selecting rows, "1" means that we are selecting columns, etc. convert : bool, default True - Whether to convert negative indices to positive ones, just as with - indexing into Python lists. For example, if `-1` was passed in, - this index would be converted ``n - 1``. + .. deprecated:: 0.21.0 + + This parameter is not respected by the function. is_copy : bool, default True Whether to return a copy of the original object or not. @@ -2134,9 +2134,15 @@ class max_speed """ nv.validate_take(tuple(), kwargs) self._consolidate_inplace() + + if not convert: + msg = ("The 'convert' parameter is deprecated " + "and will be removed in a future version.") + warnings.warn(msg, FutureWarning, stacklevel=2) + new_data = self._data.take(indices, axis=self._get_block_manager_axis(axis), - convert=True, verify=True) + verify=True) result = self._constructor(new_data).__finalize__(self) # maybe set copy if we didn't actually change the index @@ -2245,9 +2251,9 @@ def xs(self, key, axis=0, level=None, drop_level=True): if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: inds, = loc.nonzero() - return self.take(inds, axis=axis, convert=False) + return self.take(inds, axis=axis) else: - return self.take(loc, axis=axis, convert=True) + return self.take(loc, axis=axis) if not is_scalar(loc): new_index = self.index[loc] @@ -5066,7 +5072,7 @@ def at_time(self, time, asof=False): """ try: indexer = self.index.indexer_at_time(time, asof=asof) - return self.take(indexer, convert=False) + return self.take(indexer) except AttributeError: raise TypeError('Index must be DatetimeIndex') @@ -5090,7 +5096,7 @@ def between_time(self, start_time, end_time, include_start=True, indexer = self.index.indexer_between_time( start_time, end_time, include_start=include_start, include_end=include_end) - return self.take(indexer, convert=False) + return self.take(indexer) except AttributeError: raise TypeError('Index must be DatetimeIndex') diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c23b00dc740a43..896f51a8022879 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -317,7 +317,7 @@ def _set_grouper(self, obj, sort=False): indexer = self.indexer = ax.argsort(kind='mergesort') ax = ax.take(indexer) obj = obj.take(indexer, axis=self.axis, - convert=False, is_copy=False) + is_copy=False) self.obj = obj self.grouper = ax @@ -640,7 +640,7 @@ def get_group(self, name, obj=None): if not len(inds): raise KeyError(name) - return obj.take(inds, axis=self.axis, convert=False) + return obj.take(inds, axis=self.axis) def __iter__(self): """ @@ -2190,7 +2190,7 @@ def _aggregate_series_fast(self, obj, func): # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)).to_dense() indexer = get_group_index_sorter(group_index, ngroups) - obj = obj.take(indexer, convert=False).to_dense() + obj = obj.take(indexer).to_dense() group_index = algorithms.take_nd( group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, @@ -4419,7 +4419,7 @@ def __iter__(self): yield i, self._chop(sdata, slice(start, end)) def _get_sorted_data(self): - return self.data.take(self.sort_idx, axis=self.axis, convert=False) + return self.data.take(self.sort_idx, axis=self.axis) def _chop(self, sdata, slice_obj): return sdata.iloc[slice_obj] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6b9ad5cd2d93b7..09d46c1ecf758a 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1092,7 +1092,7 @@ def _getitem_iterable(self, key, axis=0): if is_bool_indexer(key): key = check_bool_indexer(labels, key) inds, = key.nonzero() - return self.obj.take(inds, axis=axis, convert=False) + return self.obj.take(inds, axis=axis) else: # Have the index compute an indexer or return None # if it cannot handle; we only act on all found values @@ -1125,15 +1125,14 @@ def _getitem_iterable(self, key, axis=0): keyarr) if new_indexer is not None: - result = self.obj.take(indexer[indexer != -1], axis=axis, - convert=False) + result = self.obj.take(indexer[indexer != -1], axis=axis) result = result._reindex_with_indexers( {axis: [new_target, new_indexer]}, copy=True, allow_dups=True) else: - result = self.obj.take(indexer, axis=axis, convert=False) + result = self.obj.take(indexer, axis=axis) return result @@ -1263,7 +1262,7 @@ def _get_slice_axis(self, slice_obj, axis=0): if isinstance(indexer, slice): return self._slice(indexer, axis=axis, kind='iloc') else: - return self.obj.take(indexer, axis=axis, convert=False) + return self.obj.take(indexer, axis=axis) class _IXIndexer(_NDFrameIndexer): @@ -1348,7 +1347,7 @@ def _getbool_axis(self, key, axis=0): key = check_bool_indexer(labels, key) inds, = key.nonzero() try: - return self.obj.take(inds, axis=axis, convert=False) + return self.obj.take(inds, axis=axis) except Exception as detail: raise self._exception(detail) @@ -1365,7 +1364,7 @@ def _get_slice_axis(self, slice_obj, axis=0): if isinstance(indexer, slice): return self._slice(indexer, axis=axis, kind='iloc') else: - return self.obj.take(indexer, axis=axis, convert=False) + return self.obj.take(indexer, axis=axis) class _LocIndexer(_LocationIndexer): @@ -1703,7 +1702,7 @@ def _get_slice_axis(self, slice_obj, axis=0): if isinstance(slice_obj, slice): return self._slice(slice_obj, axis=axis, kind='iloc') else: - return self.obj.take(slice_obj, axis=axis, convert=False) + return self.obj.take(slice_obj, axis=axis) def _get_list_axis(self, key, axis=0): """ @@ -1719,7 +1718,7 @@ def _get_list_axis(self, key, axis=0): Series object """ try: - return self.obj.take(key, axis=axis, convert=False) + return self.obj.take(key, axis=axis) except IndexError: # re-raise with different error message raise IndexError("positional indexers are out-of-bounds") diff --git a/pandas/core/series.py b/pandas/core/series.py index 75dc3d6403650c..340cfcbc2c270c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -49,7 +49,7 @@ standardize_mapping) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) -from pandas.core.indexing import check_bool_indexer, maybe_convert_indices +from pandas.core.indexing import check_bool_indexer from pandas.core import generic, base from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical, CategoricalAccessor @@ -2561,27 +2561,44 @@ def memory_usage(self, index=True, deep=False): def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): """ - return Series corresponding to requested indices + Return a Series corresponding to requested indices. Parameters ---------- - indices : list / array of ints - convert : translate negative to positive indices (default) + indices : array-like + An array of ints indicating which positions to take. + axis : int, default 0 + The axis on which to select elements. This implementation + ignores this parameter because there is only one axis. + convert : bool, default True + .. deprecated:: 0.21.0 + + Whether to translate negative to positive indices. In the future, + this function will always translate negative to positive indices. + + is_copy : bool, default False + Whether to return a copy of the original object or not. This + implementation ignores this parameter. Returns ------- taken : Series + An array-like containing the elements taken from the object. - See also + See Also -------- numpy.ndarray.take + numpy.take """ + if kwargs: nv.validate_take(tuple(), kwargs) - # check/convert indicies here - if convert: - indices = maybe_convert_indices(indices, len(self._get_axis(axis))) + # Check / convert indices here. + if not convert: + msg = ("The 'convert' parameter is deprecated " + "and will be removed in a future version.") + warnings.warn(msg, FutureWarning, stacklevel=2) indices = _ensure_platform_int(indices) new_index = self.index.take(indices) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 99aec2dd115697..0d449b78233f1a 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -604,14 +604,38 @@ def sparse_reindex(self, new_index): def take(self, indices, axis=0, convert=True, *args, **kwargs): """ - Sparse-compatible version of ndarray.take + Sparse-compatible version of ndarray.take. + + Parameters + ---------- + indices : array-like + An array of ints indicating which positions to take. + axis : int, default 0 + The axis on which to select elements. This implementation + ignores this parameter because there is only one axis. + convert : bool, default True + .. deprecated:: 0.21.0 + + This parameter is not respected by the function. Returns ------- - taken : ndarray + taken : numpy.ndarray + An array-like containing the elements taken from the object. + + See Also + -------- + numpy.ndarray.take + numpy.take """ convert = nv.validate_take_with_convert(convert, args, kwargs) + + if not convert: + msg = ("The 'convert' parameter is deprecated " + "and will be removed in a future version.") + warnings.warn(msg, FutureWarning, stacklevel=2) + new_values = SparseArray.take(self.values, indices) new_index = self.index.take(indices) return self._constructor(new_values, diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index e76869bf6712b5..3790301c46a5da 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -787,7 +787,7 @@ def test_take(self): expected = df.loc[:, ['D', 'B', 'C', 'A']] assert_frame_equal(result, expected, check_names=False) - # neg indicies + # negative indices order = [2, 1, -1] for df in [self.frame]: @@ -795,6 +795,10 @@ def test_take(self): expected = df.reindex(df.index.take(order)) assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): + result = df.take(order, convert=False, axis=0) + assert_frame_equal(result, expected) + # axis = 1 result = df.take(order, axis=1) expected = df.loc[:, ['C', 'B', 'D']] @@ -819,7 +823,7 @@ def test_take(self): expected = df.loc[:, ['foo', 'B', 'C', 'A', 'D']] assert_frame_equal(result, expected) - # neg indicies + # negative indices order = [4, 1, -2] for df in [self.mixed_frame]: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3e863a59df67e6..17316a714e2609 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -581,11 +581,11 @@ def gen_test(l, l2): def gen_expected(df, mask): l = len(mask) - return pd.concat([df.take([0], convert=False), + return pd.concat([df.take([0]), DataFrame(np.ones((l, len(columns))), index=[0] * l, columns=columns), - df.take(mask[1:], convert=False)]) + df.take(mask[1:])]) df = gen_test(900, 100) assert not df.index.is_unique diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 45a92f6d6f50b0..1740be08d5a6c3 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1066,6 +1066,23 @@ def test_setitem_with_tz_dst(self): s.iloc[[1, 2]] = vals tm.assert_series_equal(s, exp) + def test_take(self): + s = Series([-1, 5, 6, 2, 4]) + + actual = s.take([1, 3, 4]) + expected = Series([5, 2, 4], index=[1, 3, 4]) + tm.assert_series_equal(actual, expected) + + actual = s.take([-1, 3, 4]) + expected = Series([4, 2, 4], index=[4, 3, 4]) + tm.assert_series_equal(actual, expected) + + pytest.raises(IndexError, s.take, [1, 10]) + pytest.raises(IndexError, s.take, [2, 5]) + + with tm.assert_produces_warning(FutureWarning): + s.take([-1, 3, 4], convert=False) + def test_where(self): s = Series(np.random.randn(5)) cond = s > 0 diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index b44314d4e733be..799c1de2fcbea0 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -520,6 +520,9 @@ def _compare(idx): exp = pd.Series(np.repeat(nan, 5)) tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp) + with tm.assert_produces_warning(FutureWarning): + sp.take([1, 5], convert=False) + def test_numpy_take(self): sp = SparseSeries([1.0, 2.0, 3.0]) indices = [1, 2]