DEPR: pd.value_counts (#53493)

* DEPR: pd.value_counts * update docs * suppress warning * suppress warning * suppress warning * suppress warning * pyright ignore * pylint ignore * warn inside value_counts
pandas-dev · Jun 12, 2023 · 8b6a244 · 8b6a244
1 parent d7af4db
commit 8b6a244
Show file tree

Hide file tree

Showing 9 changed files with 63 additions and 23 deletions.
diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -675,7 +675,7 @@ matching index:
 Value counts (histogramming) / mode
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The :meth:`~Series.value_counts` Series method and top-level function computes a histogram
+The :meth:`~Series.value_counts` Series method computes a histogram
 of a 1D array of values. It can also be used as a function on regular arrays:
 
 .. ipython:: python
@@ -684,7 +684,6 @@ of a 1D array of values. It can also be used as a function on regular arrays:
    data
    s = pd.Series(data)
    s.value_counts()
-   pd.value_counts(data)
 
 The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns.
 By default all columns are used but a subset can be selected using the ``subset`` argument.
@@ -733,7 +732,6 @@ normally distributed data into equal-size quartiles like so:
    arr = np.random.randn(30)
    factor = pd.qcut(arr, [0, 0.25, 0.5, 0.75, 1])
    factor
-   pd.value_counts(factor)
 
 We can also pass infinite values to define the bins:
 

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -276,6 +276,7 @@ Deprecations
 - Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`)
 - Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
 - Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
+- Deprecated :func:`value_counts`, use ``pd.Series(obj).value_counts()`` instead (:issue:`47862`)
 - Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`)
 - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
 - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -838,6 +838,31 @@ def value_counts(
     -------
     Series
     """
+    warnings.warn(
+        # GH#53493
+        "pandas.value_counts is deprecated and will be removed in a "
+        "future version. Use pd.Series(obj).value_counts() instead.",
+        FutureWarning,
+        stacklevel=find_stack_level(),
+    )
+    return value_counts_internal(
+        values,
+        sort=sort,
+        ascending=ascending,
+        normalize=normalize,
+        bins=bins,
+        dropna=dropna,
+    )
+
+
+def value_counts_internal(
+    values,
+    sort: bool = True,
+    ascending: bool = False,
+    normalize: bool = False,
+    bins=None,
+    dropna: bool = True,
+) -> Series:
     from pandas import (
         Index,
         Series,
@@ -1678,8 +1703,8 @@ def union_with_duplicates(
     """
     from pandas import Series
 
-    l_count = value_counts(lvals, dropna=False)
-    r_count = value_counts(rvals, dropna=False)
+    l_count = value_counts_internal(lvals, dropna=False)
+    r_count = value_counts_internal(rvals, dropna=False)
     l_count, r_count = l_count.align(r_count, fill_value=0)
     final_count = np.maximum(l_count.values, r_count.values)
     final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
@@ -48,7 +48,7 @@
 from pandas.core.algorithms import (
     take,
     unique,
-    value_counts,
+    value_counts_internal as value_counts,
 )
 from pandas.core.array_algos.quantile import quantile_with_mask
 from pandas.core.array_algos.transforms import shift

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -79,7 +79,7 @@
     isin,
     take,
     unique,
-    value_counts,
+    value_counts_internal as value_counts,
 )
 from pandas.core.arrays.base import (
     ExtensionArray,

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -496,7 +496,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
         return self._wrap_reduction_result(axis, result)
 
     def value_counts(self, dropna: bool = True) -> Series:
-        from pandas import value_counts
+        from pandas.core.algorithms import value_counts_internal as value_counts
 
         result = value_counts(self._ndarray, dropna=dropna).astype("Int64")
         result.index = result.index.astype(self.dtype)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -958,7 +958,7 @@ def value_counts(
         NaN    1
         Name: count, dtype: int64
         """
-        return algorithms.value_counts(
+        return algorithms.value_counts_internal(
             self,
             sort=sort,
             ascending=ascending,

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -25,6 +25,7 @@
     is_scalar,
 )
 from pandas.core import arraylike
+from pandas.core.algorithms import value_counts_internal as value_counts
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays import (
     ExtensionArray,
@@ -273,8 +274,6 @@ def convert_values(param):
         return np.asarray(res, dtype=bool)
 
     def value_counts(self, dropna: bool = True):
-        from pandas.core.algorithms import value_counts
-
         return value_counts(self.to_numpy(), dropna=dropna)
 
 

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -1181,21 +1181,26 @@ def test_value_counts(self):
         factor = cut(arr, 4)
 
         # assert isinstance(factor, n)
-        result = algos.value_counts(factor)
+        msg = "pandas.value_counts is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = algos.value_counts(factor)
         breaks = [-1.194, -0.535, 0.121, 0.777, 1.433]
         index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True))
         expected = Series([1, 1, 1, 1], index=index, name="count")
         tm.assert_series_equal(result.sort_index(), expected.sort_index())
 
     def test_value_counts_bins(self):
         s = [1, 2, 3, 4]
-        result = algos.value_counts(s, bins=1)
+        msg = "pandas.value_counts is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = algos.value_counts(s, bins=1)
         expected = Series(
             [4], index=IntervalIndex.from_tuples([(0.996, 4.0)]), name="count"
         )
         tm.assert_series_equal(result, expected)
 
-        result = algos.value_counts(s, bins=2, sort=False)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = algos.value_counts(s, bins=2, sort=False)
         expected = Series(
             [2, 2],
             index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]),
@@ -1204,31 +1209,40 @@ def test_value_counts_bins(self):
         tm.assert_series_equal(result, expected)
 
     def test_value_counts_dtypes(self):
-        result = algos.value_counts(np.array([1, 1.0]))
+        msg2 = "pandas.value_counts is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg2):
+            result = algos.value_counts(np.array([1, 1.0]))
         assert len(result) == 1
 
-        result = algos.value_counts(np.array([1, 1.0]), bins=1)
+        with tm.assert_produces_warning(FutureWarning, match=msg2):
+            result = algos.value_counts(np.array([1, 1.0]), bins=1)
         assert len(result) == 1
 
-        result = algos.value_counts(Series([1, 1.0, "1"]))  # object
+        with tm.assert_produces_warning(FutureWarning, match=msg2):
+            result = algos.value_counts(Series([1, 1.0, "1"]))  # object
         assert len(result) == 2
 
         msg = "bins argument only works with numeric data"
         with pytest.raises(TypeError, match=msg):
-            algos.value_counts(np.array(["1", 1], dtype=object), bins=1)
+            with tm.assert_produces_warning(FutureWarning, match=msg2):
+                algos.value_counts(np.array(["1", 1], dtype=object), bins=1)
 
     def test_value_counts_nat(self):
         td = Series([np.timedelta64(10000), NaT], dtype="timedelta64[ns]")
         dt = to_datetime(["NaT", "2014-01-01"])
 
+        msg = "pandas.value_counts is deprecated"
+
         for s in [td, dt]:
-            vc = algos.value_counts(s)
-            vc_with_na = algos.value_counts(s, dropna=False)
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                vc = algos.value_counts(s)
+                vc_with_na = algos.value_counts(s, dropna=False)
             assert len(vc) == 1
             assert len(vc_with_na) == 2
 
         exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}, name="count")
-        tm.assert_series_equal(algos.value_counts(dt), exp_dt)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            tm.assert_series_equal(algos.value_counts(dt), exp_dt)
         # TODO same for (timedelta)
 
     def test_value_counts_datetime_outofbounds(self):
@@ -1388,13 +1402,16 @@ def test_value_counts_normalized(self, dtype):
     def test_value_counts_uint64(self):
         arr = np.array([2**63], dtype=np.uint64)
         expected = Series([1], index=[2**63], name="count")
-        result = algos.value_counts(arr)
+        msg = "pandas.value_counts is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = algos.value_counts(arr)
 
         tm.assert_series_equal(result, expected)
 
         arr = np.array([-1, 2**63], dtype=object)
         expected = Series([1, 1], index=[-1, 2**63], name="count")
-        result = algos.value_counts(arr)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = algos.value_counts(arr)
 
         tm.assert_series_equal(result, expected)