Skip to content

Commit

Permalink
DEPR: pd.value_counts (#53493)
Browse files Browse the repository at this point in the history
* DEPR: pd.value_counts

* update docs

* suppress warning

* suppress warning

* suppress warning

* suppress warning

* pyright ignore

* pylint ignore

* warn inside value_counts
  • Loading branch information
jbrockmendel authored Jun 12, 2023
1 parent d7af4db commit 8b6a244
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 23 deletions.
4 changes: 1 addition & 3 deletions doc/source/user_guide/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ matching index:
Value counts (histogramming) / mode
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The :meth:`~Series.value_counts` Series method and top-level function computes a histogram
The :meth:`~Series.value_counts` Series method computes a histogram
of a 1D array of values. It can also be used as a function on regular arrays:

.. ipython:: python
Expand All @@ -684,7 +684,6 @@ of a 1D array of values. It can also be used as a function on regular arrays:
data
s = pd.Series(data)
s.value_counts()
pd.value_counts(data)
The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns.
By default all columns are used but a subset can be selected using the ``subset`` argument.
Expand Down Expand Up @@ -733,7 +732,6 @@ normally distributed data into equal-size quartiles like so:
arr = np.random.randn(30)
factor = pd.qcut(arr, [0, 0.25, 0.5, 0.75, 1])
factor
pd.value_counts(factor)
We can also pass infinite values to define the bins:

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ Deprecations
- Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`)
- Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
- Deprecated :func:`value_counts`, use ``pd.Series(obj).value_counts()`` instead (:issue:`47862`)
- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`)
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
Expand Down
29 changes: 27 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,31 @@ def value_counts(
-------
Series
"""
warnings.warn(
# GH#53493
"pandas.value_counts is deprecated and will be removed in a "
"future version. Use pd.Series(obj).value_counts() instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return value_counts_internal(
values,
sort=sort,
ascending=ascending,
normalize=normalize,
bins=bins,
dropna=dropna,
)


def value_counts_internal(
values,
sort: bool = True,
ascending: bool = False,
normalize: bool = False,
bins=None,
dropna: bool = True,
) -> Series:
from pandas import (
Index,
Series,
Expand Down Expand Up @@ -1678,8 +1703,8 @@ def union_with_duplicates(
"""
from pandas import Series

l_count = value_counts(lvals, dropna=False)
r_count = value_counts(rvals, dropna=False)
l_count = value_counts_internal(lvals, dropna=False)
r_count = value_counts_internal(rvals, dropna=False)
l_count, r_count = l_count.align(r_count, fill_value=0)
final_count = np.maximum(l_count.values, r_count.values)
final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from pandas.core.algorithms import (
take,
unique,
value_counts,
value_counts_internal as value_counts,
)
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.array_algos.transforms import shift
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
isin,
take,
unique,
value_counts,
value_counts_internal as value_counts,
)
from pandas.core.arrays.base import (
ExtensionArray,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
return self._wrap_reduction_result(axis, result)

def value_counts(self, dropna: bool = True) -> Series:
from pandas import value_counts
from pandas.core.algorithms import value_counts_internal as value_counts

result = value_counts(self._ndarray, dropna=dropna).astype("Int64")
result.index = result.index.astype(self.dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ def value_counts(
NaN 1
Name: count, dtype: int64
"""
return algorithms.value_counts(
return algorithms.value_counts_internal(
self,
sort=sort,
ascending=ascending,
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
is_scalar,
)
from pandas.core import arraylike
from pandas.core.algorithms import value_counts_internal as value_counts
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import (
ExtensionArray,
Expand Down Expand Up @@ -273,8 +274,6 @@ def convert_values(param):
return np.asarray(res, dtype=bool)

def value_counts(self, dropna: bool = True):
from pandas.core.algorithms import value_counts

return value_counts(self.to_numpy(), dropna=dropna)


Expand Down
41 changes: 29 additions & 12 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,21 +1181,26 @@ def test_value_counts(self):
factor = cut(arr, 4)

# assert isinstance(factor, n)
result = algos.value_counts(factor)
msg = "pandas.value_counts is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(factor)
breaks = [-1.194, -0.535, 0.121, 0.777, 1.433]
index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True))
expected = Series([1, 1, 1, 1], index=index, name="count")
tm.assert_series_equal(result.sort_index(), expected.sort_index())

def test_value_counts_bins(self):
s = [1, 2, 3, 4]
result = algos.value_counts(s, bins=1)
msg = "pandas.value_counts is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(s, bins=1)
expected = Series(
[4], index=IntervalIndex.from_tuples([(0.996, 4.0)]), name="count"
)
tm.assert_series_equal(result, expected)

result = algos.value_counts(s, bins=2, sort=False)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(s, bins=2, sort=False)
expected = Series(
[2, 2],
index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]),
Expand All @@ -1204,31 +1209,40 @@ def test_value_counts_bins(self):
tm.assert_series_equal(result, expected)

def test_value_counts_dtypes(self):
result = algos.value_counts(np.array([1, 1.0]))
msg2 = "pandas.value_counts is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = algos.value_counts(np.array([1, 1.0]))
assert len(result) == 1

result = algos.value_counts(np.array([1, 1.0]), bins=1)
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = algos.value_counts(np.array([1, 1.0]), bins=1)
assert len(result) == 1

result = algos.value_counts(Series([1, 1.0, "1"])) # object
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = algos.value_counts(Series([1, 1.0, "1"])) # object
assert len(result) == 2

msg = "bins argument only works with numeric data"
with pytest.raises(TypeError, match=msg):
algos.value_counts(np.array(["1", 1], dtype=object), bins=1)
with tm.assert_produces_warning(FutureWarning, match=msg2):
algos.value_counts(np.array(["1", 1], dtype=object), bins=1)

def test_value_counts_nat(self):
td = Series([np.timedelta64(10000), NaT], dtype="timedelta64[ns]")
dt = to_datetime(["NaT", "2014-01-01"])

msg = "pandas.value_counts is deprecated"

for s in [td, dt]:
vc = algos.value_counts(s)
vc_with_na = algos.value_counts(s, dropna=False)
with tm.assert_produces_warning(FutureWarning, match=msg):
vc = algos.value_counts(s)
vc_with_na = algos.value_counts(s, dropna=False)
assert len(vc) == 1
assert len(vc_with_na) == 2

exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}, name="count")
tm.assert_series_equal(algos.value_counts(dt), exp_dt)
with tm.assert_produces_warning(FutureWarning, match=msg):
tm.assert_series_equal(algos.value_counts(dt), exp_dt)
# TODO same for (timedelta)

def test_value_counts_datetime_outofbounds(self):
Expand Down Expand Up @@ -1388,13 +1402,16 @@ def test_value_counts_normalized(self, dtype):
def test_value_counts_uint64(self):
arr = np.array([2**63], dtype=np.uint64)
expected = Series([1], index=[2**63], name="count")
result = algos.value_counts(arr)
msg = "pandas.value_counts is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(arr)

tm.assert_series_equal(result, expected)

arr = np.array([-1, 2**63], dtype=object)
expected = Series([1, 1], index=[-1, 2**63], name="count")
result = algos.value_counts(arr)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(arr)

tm.assert_series_equal(result, expected)

Expand Down

0 comments on commit 8b6a244

Please sign in to comment.