diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index eec33afb9ab95..65892f01326e4 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -675,7 +675,7 @@ matching index: Value counts (histogramming) / mode ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The :meth:`~Series.value_counts` Series method and top-level function computes a histogram +The :meth:`~Series.value_counts` Series method computes a histogram of a 1D array of values. It can also be used as a function on regular arrays: .. ipython:: python @@ -684,7 +684,6 @@ of a 1D array of values. It can also be used as a function on regular arrays: data s = pd.Series(data) s.value_counts() - pd.value_counts(data) The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns. By default all columns are used but a subset can be selected using the ``subset`` argument. @@ -733,7 +732,6 @@ normally distributed data into equal-size quartiles like so: arr = np.random.randn(30) factor = pd.qcut(arr, [0, 0.25, 0.5, 0.75, 1]) factor - pd.value_counts(factor) We can also pass infinite values to define the bins: diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index a435358d787d5..43d1ec7f22fac 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -276,6 +276,7 @@ Deprecations - Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`) - Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`) - Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`) +- Deprecated :func:`value_counts`, use ``pd.Series(obj).value_counts()`` instead (:issue:`47862`) - Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`) - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`) - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3a0fa1261701c..c3574829f9b0e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -838,6 +838,31 @@ def value_counts( ------- Series """ + warnings.warn( + # GH#53493 + "pandas.value_counts is deprecated and will be removed in a " + "future version. Use pd.Series(obj).value_counts() instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return value_counts_internal( + values, + sort=sort, + ascending=ascending, + normalize=normalize, + bins=bins, + dropna=dropna, + ) + + +def value_counts_internal( + values, + sort: bool = True, + ascending: bool = False, + normalize: bool = False, + bins=None, + dropna: bool = True, +) -> Series: from pandas import ( Index, Series, @@ -1678,8 +1703,8 @@ def union_with_duplicates( """ from pandas import Series - l_count = value_counts(lvals, dropna=False) - r_count = value_counts(rvals, dropna=False) + l_count = value_counts_internal(lvals, dropna=False) + r_count = value_counts_internal(rvals, dropna=False) l_count, r_count = l_count.align(r_count, fill_value=0) final_count = np.maximum(l_count.values, r_count.values) final_count = Series(final_count, index=l_count.index, dtype="int", copy=False) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index d93ecc087844c..0100c17805d76 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -48,7 +48,7 @@ from pandas.core.algorithms import ( take, unique, - value_counts, + value_counts_internal as value_counts, ) from pandas.core.array_algos.quantile import quantile_with_mask from pandas.core.array_algos.transforms import shift diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 2842d8267b7c6..7f874a07341eb 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -79,7 +79,7 @@ isin, take, unique, - value_counts, + value_counts_internal as value_counts, ) from pandas.core.arrays.base import ( ExtensionArray, diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index d2cd183f26173..a6579879cab96 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -496,7 +496,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: return self._wrap_reduction_result(axis, result) def value_counts(self, dropna: bool = True) -> Series: - from pandas import value_counts + from pandas.core.algorithms import value_counts_internal as value_counts result = value_counts(self._ndarray, dropna=dropna).astype("Int64") result.index = result.index.astype(self.dtype) diff --git a/pandas/core/base.py b/pandas/core/base.py index f66abaa17d8a7..d4a808f4d7dd1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -958,7 +958,7 @@ def value_counts( NaN 1 Name: count, dtype: int64 """ - return algorithms.value_counts( + return algorithms.value_counts_internal( self, sort=sort, ascending=ascending, diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 3e495e9ac6814..393c01488c234 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -25,6 +25,7 @@ is_scalar, ) from pandas.core import arraylike +from pandas.core.algorithms import value_counts_internal as value_counts from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ( ExtensionArray, @@ -273,8 +274,6 @@ def convert_values(param): return np.asarray(res, dtype=bool) def value_counts(self, dropna: bool = True): - from pandas.core.algorithms import value_counts - return value_counts(self.to_numpy(), dropna=dropna) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 6d09488df06e2..86142d9fe4d95 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1181,7 +1181,9 @@ def test_value_counts(self): factor = cut(arr, 4) # assert isinstance(factor, n) - result = algos.value_counts(factor) + msg = "pandas.value_counts is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = algos.value_counts(factor) breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True)) expected = Series([1, 1, 1, 1], index=index, name="count") @@ -1189,13 +1191,16 @@ def test_value_counts(self): def test_value_counts_bins(self): s = [1, 2, 3, 4] - result = algos.value_counts(s, bins=1) + msg = "pandas.value_counts is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = algos.value_counts(s, bins=1) expected = Series( [4], index=IntervalIndex.from_tuples([(0.996, 4.0)]), name="count" ) tm.assert_series_equal(result, expected) - result = algos.value_counts(s, bins=2, sort=False) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = algos.value_counts(s, bins=2, sort=False) expected = Series( [2, 2], index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]), @@ -1204,31 +1209,40 @@ def test_value_counts_bins(self): tm.assert_series_equal(result, expected) def test_value_counts_dtypes(self): - result = algos.value_counts(np.array([1, 1.0])) + msg2 = "pandas.value_counts is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg2): + result = algos.value_counts(np.array([1, 1.0])) assert len(result) == 1 - result = algos.value_counts(np.array([1, 1.0]), bins=1) + with tm.assert_produces_warning(FutureWarning, match=msg2): + result = algos.value_counts(np.array([1, 1.0]), bins=1) assert len(result) == 1 - result = algos.value_counts(Series([1, 1.0, "1"])) # object + with tm.assert_produces_warning(FutureWarning, match=msg2): + result = algos.value_counts(Series([1, 1.0, "1"])) # object assert len(result) == 2 msg = "bins argument only works with numeric data" with pytest.raises(TypeError, match=msg): - algos.value_counts(np.array(["1", 1], dtype=object), bins=1) + with tm.assert_produces_warning(FutureWarning, match=msg2): + algos.value_counts(np.array(["1", 1], dtype=object), bins=1) def test_value_counts_nat(self): td = Series([np.timedelta64(10000), NaT], dtype="timedelta64[ns]") dt = to_datetime(["NaT", "2014-01-01"]) + msg = "pandas.value_counts is deprecated" + for s in [td, dt]: - vc = algos.value_counts(s) - vc_with_na = algos.value_counts(s, dropna=False) + with tm.assert_produces_warning(FutureWarning, match=msg): + vc = algos.value_counts(s) + vc_with_na = algos.value_counts(s, dropna=False) assert len(vc) == 1 assert len(vc_with_na) == 2 exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}, name="count") - tm.assert_series_equal(algos.value_counts(dt), exp_dt) + with tm.assert_produces_warning(FutureWarning, match=msg): + tm.assert_series_equal(algos.value_counts(dt), exp_dt) # TODO same for (timedelta) def test_value_counts_datetime_outofbounds(self): @@ -1388,13 +1402,16 @@ def test_value_counts_normalized(self, dtype): def test_value_counts_uint64(self): arr = np.array([2**63], dtype=np.uint64) expected = Series([1], index=[2**63], name="count") - result = algos.value_counts(arr) + msg = "pandas.value_counts is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = algos.value_counts(arr) tm.assert_series_equal(result, expected) arr = np.array([-1, 2**63], dtype=object) expected = Series([1, 1], index=[-1, 2**63], name="count") - result = algos.value_counts(arr) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = algos.value_counts(arr) tm.assert_series_equal(result, expected)