From d3a19121790a517c03755fc01f74ab149d9c77f5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 21 May 2019 08:23:51 +0200 Subject: [PATCH] DEPR: deprecate SparseArray.values (#26421) --- doc/source/whatsnew/v0.25.0.rst | 4 +- pandas/_libs/reduction.pyx | 19 ++++++--- pandas/_libs/src/ujson/python/objToJSON.c | 26 ++++++++++-- pandas/core/arrays/sparse.py | 49 +++++++---------------- pandas/core/internals/managers.py | 4 +- pandas/core/ops.py | 4 +- pandas/core/sparse/frame.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 44 +++++++++++--------- pandas/tests/sparse/series/test_series.py | 6 +-- pandas/util/testing.py | 2 +- setup.cfg | 3 ++ 11 files changed, 93 insertions(+), 70 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 30bc332f8a04b..91b70334dc9bc 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -260,8 +260,10 @@ Deprecations - The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`). - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`) +- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or + the :meth:`SparseArray.to_dense` method instead (:issue:`26421`). - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) -- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version. +- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`). .. _whatsnew_0250.prior_deprecations: diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 6c69f7669bee5..739ac0ed397ca 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -15,7 +15,7 @@ from numpy cimport (ndarray, cnp.import_array() cimport pandas._libs.util as util -from pandas._libs.lib import maybe_convert_objects +from pandas._libs.lib import maybe_convert_objects, values_from_object cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt): @@ -28,6 +28,14 @@ cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt): return np.empty(size, dtype='O') +cdef bint _is_sparse_array(object obj): + # TODO can be removed one SparseArray.values is removed (GH26421) + if hasattr(obj, '_subtyp'): + if obj._subtyp == 'sparse_array': + return True + return False + + cdef class Reducer: """ Performs generic reduction operation on a C or Fortran-contiguous ndarray @@ -146,7 +154,8 @@ cdef class Reducer: else: res = self.f(chunk) - if hasattr(res, 'values') and util.is_array(res.values): + if (not _is_sparse_array(res) and hasattr(res, 'values') + and util.is_array(res.values)): res = res.values if i == 0: result = _get_result_array(res, @@ -432,7 +441,8 @@ cdef class SeriesGrouper: cdef inline _extract_result(object res): """ extract the result object, it might be a 0-dim ndarray or a len-1 0-dim, or a scalar """ - if hasattr(res, 'values') and util.is_array(res.values): + if (not _is_sparse_array(res) and hasattr(res, 'values') + and util.is_array(res.values)): res = res.values if not np.isscalar(res): if util.is_array(res): @@ -635,8 +645,7 @@ def reduce(arr, f, axis=0, dummy=None, labels=None): raise Exception('Cannot use shortcut') # pass as an ndarray - if hasattr(labels, 'values'): - labels = labels.values + labels = values_from_object(labels) reducer = Reducer(arr, f, axis=axis, dummy=dummy, labels=labels) return reducer.get_result() diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 52788f85ff71e..cc87d95bf35d8 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -210,9 +210,29 @@ static TypeContext *createTypeContext(void) { return pc; } + +static int is_sparse_array(PyObject *obj) { + // TODO can be removed again once SparseArray.values is removed (GH26421) + if (PyObject_HasAttrString(obj, "_subtyp")) { + PyObject *_subtype = PyObject_GetAttrString(obj, "_subtyp"); + PyObject *sparse_array = PyUnicode_FromString("sparse_array"); + int ret = PyUnicode_Compare(_subtype, sparse_array); + + if (ret == 0) { + return 1; + } + } + return 0; +} + + static PyObject *get_values(PyObject *obj) { - PyObject *values = PyObject_GetAttrString(obj, "values"); - PRINTMARK(); + PyObject *values = NULL; + + if (!is_sparse_array(obj)) { + values = PyObject_GetAttrString(obj, "values"); + PRINTMARK(); + } if (values && !PyArray_CheckExact(values)) { @@ -220,7 +240,7 @@ static PyObject *get_values(PyObject *obj) { values = PyObject_CallMethod(values, "to_numpy", NULL); } - if (PyObject_HasAttrString(values, "values")) { + if (!is_sparse_array(values) && PyObject_HasAttrString(values, "values")) { PyObject *subvals = get_values(values); PyErr_Clear(); PRINTMARK(); diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 2aaa00ff01355..7a66e0ff33cc7 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -21,11 +21,10 @@ from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import ( astype_nansafe, construct_1d_arraylike_from_scalar, find_common_type, - infer_dtype_from_scalar, maybe_convert_platform) + infer_dtype_from_scalar) from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal, - is_integer, is_list_like, is_object_dtype, is_scalar, is_string_dtype, - pandas_dtype) + is_integer, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.generic import ( ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries) @@ -890,7 +889,16 @@ def npoints(self): def values(self): """ Dense values + + .. deprecated:: 0.25.0 + + Use ``np.asarray(...)`` or the ``.to_dense()`` method instead. """ + msg = ( + "The SparseArray.values attribute is deprecated and will be " + "removed in a future version. You can use `np.asarray(...)` or " + "the `.to_dense()` method instead.") + warnings.warn(msg, FutureWarning, stacklevel=2) return self.to_dense() def isna(self): @@ -1076,7 +1084,7 @@ def __getitem__(self, key): if is_integer(key): return self._get_val_at(key) elif isinstance(key, tuple): - data_slice = self.values[key] + data_slice = self.to_dense()[key] elif isinstance(key, slice): # special case to preserve dtypes if key == slice(None): @@ -1635,7 +1643,7 @@ def __array_wrap__(self, array, context=None): from pandas.core.dtypes.generic import ABCSparseSeries ufunc, inputs, _ = context - inputs = tuple(x.values if isinstance(x, ABCSparseSeries) else x + inputs = tuple(x.to_dense() if isinstance(x, ABCSparseSeries) else x for x in inputs) return self.__array_ufunc__(ufunc, '__call__', *inputs) @@ -1854,37 +1862,10 @@ def _maybe_to_sparse(array): array must be SparseSeries or SparseArray """ if isinstance(array, ABCSparseSeries): - array = array.values.copy() + array = array.array.copy() return array -def _sanitize_values(arr): - """ - return an ndarray for our input, - in a platform independent manner - """ - - if hasattr(arr, 'values'): - arr = arr.values - else: - - # scalar - if is_scalar(arr): - arr = [arr] - - # ndarray - if isinstance(arr, np.ndarray): - pass - - elif is_list_like(arr) and len(arr) > 0: - arr = maybe_convert_platform(arr) - - else: - arr = np.asarray(arr) - - return arr - - def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False): """ Convert ndarray to sparse format @@ -1902,7 +1883,7 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False): (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) """ - arr = _sanitize_values(arr) + arr = com.values_from_object(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e5abac5d7a94f..96a672b60da70 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -375,8 +375,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, # with a .values attribute. aligned_args = {k: kwargs[k] for k in align_keys - if hasattr(kwargs[k], 'values') and - not isinstance(kwargs[k], ABCExtensionArray)} + if not isinstance(kwargs[k], ABCExtensionArray) and + hasattr(kwargs[k], 'values')} for b in self.blocks: if filter is not None: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0996eab4befa7..86a255321f827 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2272,10 +2272,10 @@ def _cast_sparse_series_op(left, right, opname): # TODO: This should be moved to the array? if is_integer_dtype(left) and is_integer_dtype(right): # series coerces to float64 if result should have NaN/inf - if opname in ('floordiv', 'mod') and (right.values == 0).any(): + if opname in ('floordiv', 'mod') and (right.to_dense() == 0).any(): left = left.astype(SparseDtype(np.float64, left.fill_value)) right = right.astype(SparseDtype(np.float64, right.fill_value)) - elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any(): + elif opname in ('rfloordiv', 'rmod') and (left.to_dense() == 0).any(): left = left.astype(SparseDtype(np.float64, left.fill_value)) right = right.astype(SparseDtype(np.float64, right.fill_value)) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 79b3a622ad72e..d21a809d7246d 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -627,7 +627,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, # .take returns SparseArray new = values.take(indexer) if need_mask: - new = new.values + new = new.to_dense() # convert integer to float if necessary. need to do a lot # more than that, handle boolean etc also new, fill_value = maybe_upcast(new, fill_value=fill_value) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index e09f4e2ccc59c..aa364870c7e60 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -433,9 +433,9 @@ def test_constructor_bool(self): tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) - for dense in [arr.to_dense(), arr.values]: - assert dense.dtype == bool - tm.assert_numpy_array_equal(dense, data) + dense = arr.to_dense() + assert dense.dtype == bool + tm.assert_numpy_array_equal(dense, data) def test_constructor_bool_fill_value(self): arr = SparseArray([True, False, True], dtype=None) @@ -463,9 +463,9 @@ def test_constructor_float32(self): tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([0, 2], dtype=np.int32)) - for dense in [arr.to_dense(), arr.values]: - assert dense.dtype == np.float32 - tm.assert_numpy_array_equal(dense, data) + dense = arr.to_dense() + assert dense.dtype == np.float32 + tm.assert_numpy_array_equal(dense, data) def test_astype(self): # float -> float @@ -514,7 +514,7 @@ def test_astype_all(self, any_real_dtype): assert res.dtype == SparseDtype(typ, 1) assert res.sp_values.dtype == typ - tm.assert_numpy_array_equal(np.asarray(res.values), + tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ)) @pytest.mark.parametrize('array, dtype, expected', [ @@ -596,7 +596,6 @@ def test_copy_shallow(self): assert arr2.sp_index is self.arr.sp_index def test_values_asarray(self): - assert_almost_equal(self.arr.values, self.arr_data) assert_almost_equal(self.arr.to_dense(), self.arr_data) @pytest.mark.parametrize('data,shape,dtype', [ @@ -627,7 +626,7 @@ def test_dense_repr(self, vals, fill_value, method): def test_getitem(self): def _checkit(i): - assert_almost_equal(self.arr[i], self.arr.values[i]) + assert_almost_equal(self.arr[i], self.arr.to_dense()[i]) for i in range(len(self.arr)): _checkit(i) @@ -641,11 +640,11 @@ def test_getitem_arraylike_mask(self): def test_getslice(self): result = self.arr[:-3] - exp = SparseArray(self.arr.values[:-3]) + exp = SparseArray(self.arr.to_dense()[:-3]) tm.assert_sp_array_equal(result, exp) result = self.arr[-4:] - exp = SparseArray(self.arr.values[-4:]) + exp = SparseArray(self.arr.to_dense()[-4:]) tm.assert_sp_array_equal(result, exp) # two corner cases from Series @@ -654,7 +653,7 @@ def test_getslice(self): tm.assert_sp_array_equal(result, exp) result = self.arr[:-12] - exp = SparseArray(self.arr.values[:0]) + exp = SparseArray(self.arr.to_dense()[:0]) tm.assert_sp_array_equal(result, exp) def test_getslice_tuple(self): @@ -702,16 +701,16 @@ def test_binary_operators(self, op): def _check_op(op, first, second): res = op(first, second) - exp = SparseArray(op(first.values, second.values), + exp = SparseArray(op(first.to_dense(), second.to_dense()), fill_value=first.fill_value) assert isinstance(res, SparseArray) - assert_almost_equal(res.values, exp.values) + assert_almost_equal(res.to_dense(), exp.to_dense()) - res2 = op(first, second.values) + res2 = op(first, second.to_dense()) assert isinstance(res2, SparseArray) tm.assert_sp_array_equal(res, res2) - res3 = op(first.values, second) + res3 = op(first.to_dense(), second) assert isinstance(res3, SparseArray) tm.assert_sp_array_equal(res, res3) @@ -720,13 +719,13 @@ def _check_op(op, first, second): # Ignore this if the actual op raises (e.g. pow). try: - exp = op(first.values, 4) + exp = op(first.to_dense(), 4) exp_fv = op(first.fill_value, 4) except ValueError: pass else: assert_almost_equal(res4.fill_value, exp_fv) - assert_almost_equal(res4.values, exp) + assert_almost_equal(res4.to_dense(), exp) with np.errstate(all="ignore"): for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]: @@ -1230,3 +1229,12 @@ def test_map_missing(): result = arr.map({0: 10, 1: 11}) tm.assert_sp_array_equal(result, expected) + + +def test_deprecated_values(): + arr = SparseArray([0, 1, 2]) + + with tm.assert_produces_warning(FutureWarning): + result = arr.values + + tm.assert_numpy_array_equal(result, arr.to_dense()) diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 88921cf932140..004a382f9067c 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -255,7 +255,7 @@ def test_constructor(self): assert isinstance(self.iseries.sp_index, IntIndex) assert self.zbseries.fill_value == 0 - tm.assert_numpy_array_equal(self.zbseries.values.values, + tm.assert_numpy_array_equal(self.zbseries.values.to_dense(), self.bseries.to_dense().fillna(0).values) # pass SparseSeries @@ -322,7 +322,7 @@ def test_constructor_ndarray(self): def test_constructor_nonnan(self): arr = [0, 0, 0, nan, nan] sp_series = SparseSeries(arr, fill_value=0) - tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr)) + tm.assert_numpy_array_equal(sp_series.values.to_dense(), np.array(arr)) assert len(sp_series) == 5 assert sp_series.shape == (5, ) @@ -514,7 +514,7 @@ def _compare(idx): sparse_result = sp.take(idx) assert isinstance(sparse_result, SparseSeries) tm.assert_almost_equal(dense_result, - sparse_result.values.values) + sparse_result.values.to_dense()) _compare([1., 2., 3., 4., 5., 0.]) _compare([7, 2, 9, 0, 4]) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index da92540e2b86f..92d450140a891 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1403,7 +1403,7 @@ def assert_sp_array_equal(left, right, check_dtype=True, check_kind=True, assert_attr_equal('fill_value', left, right) if check_dtype: assert_attr_equal('dtype', left, right) - assert_numpy_array_equal(left.values, right.values, + assert_numpy_array_equal(left.to_dense(), right.to_dense(), check_dtype=check_dtype) diff --git a/setup.cfg b/setup.cfg index 160784a8b5b65..c0833c5609bea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -69,6 +69,9 @@ markers = doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL addopts = --strict-data-files xfail_strict = True +filterwarnings = + error:The SparseArray:FutureWarning + [coverage:run] branch = False