From f1a78d5a9f271c3107de35c18564c919eadd2713 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Mon, 26 Feb 2024 15:34:41 -0700 Subject: [PATCH] Doc: resolve GL08 for pandas.core.groupby.SeriesGroupBy.value_counts (#57609) --- ci/code_checks.sh | 1 - pandas/core/groupby/generic.py | 85 ++++++++++++++++++++++++++++++++-- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 6de0a8c2ca325..22c3a71812de7 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -178,7 +178,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Timestamp.tzinfo\ pandas.Timestamp.value\ pandas.Timestamp.year\ - pandas.core.groupby.SeriesGroupBy.value_counts\ pandas.tseries.offsets.BQuarterBegin.is_anchored\ pandas.tseries.offsets.BQuarterBegin.is_on_offset\ pandas.tseries.offsets.BQuarterBegin.n\ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3b7963693bcae..a604283f3d078 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -801,6 +801,85 @@ def value_counts( bins=None, dropna: bool = True, ) -> Series | DataFrame: + """ + Return a Series or DataFrame containing counts of unique rows. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + normalize : bool, default False + Return proportions rather than frequencies. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + bins : int or list of ints, optional + Rather than count values, group them into half-open bins, + a convenience for pd.cut, only works with numeric data. + dropna : bool, default True + Don't include counts of rows that contain NA values. + + Returns + ------- + Series or DataFrame + Series if the groupby ``as_index`` is True, otherwise DataFrame. + + See Also + -------- + Series.value_counts: Equivalent method on Series. + DataFrame.value_counts: Equivalent method on DataFrame. + DataFrameGroupBy.value_counts: Equivalent method on DataFrameGroupBy. + + Notes + ----- + - If the groupby ``as_index`` is True then the returned Series will have a + MultiIndex with one level per input column. + - If the groupby ``as_index`` is False then the returned DataFrame will have an + additional column with the value_counts. The column is labelled 'count' or + 'proportion', depending on the ``normalize`` parameter. + + By default, rows that contain any NA values are omitted from + the result. + + By default, the result will be in descending order so that the + first element of each group is the most frequently-occurring row. + + Examples + -------- + >>> s = pd.Series( + ... [1, 1, 2, 3, 2, 3, 3, 1, 1, 3, 3, 3], + ... index=["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"], + ... ) + >>> s + A 1 + A 1 + A 2 + A 3 + A 2 + A 3 + B 3 + B 1 + B 1 + B 3 + B 3 + B 3 + dtype: int64 + >>> g1 = s.groupby(s.index) + >>> g1.value_counts(bins=2) + A (0.997, 2.0] 4 + (2.0, 3.0] 2 + B (2.0, 3.0] 4 + (0.997, 2.0] 2 + Name: count, dtype: int64 + >>> g1.value_counts(normalize=True) + A 1 0.333333 + 2 0.333333 + 3 0.333333 + B 3 0.666667 + 1 0.333333 + Name: proportion, dtype: float64 + """ name = "proportion" if normalize else "count" if bins is None: @@ -2303,7 +2382,7 @@ def value_counts( Returns ------- Series or DataFrame - Series if the groupby as_index is True, otherwise DataFrame. + Series if the groupby ``as_index`` is True, otherwise DataFrame. See Also -------- @@ -2313,9 +2392,9 @@ def value_counts( Notes ----- - - If the groupby as_index is True then the returned Series will have a + - If the groupby ``as_index`` is True then the returned Series will have a MultiIndex with one level per input column. - - If the groupby as_index is False then the returned DataFrame will have an + - If the groupby ``as_index`` is False then the returned DataFrame will have an additional column with the value_counts. The column is labelled 'count' or 'proportion', depending on the ``normalize`` parameter.