From 594933324aa464bce3443e44c25da753e5c29a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Fri, 30 Jun 2023 11:46:38 +0200 Subject: [PATCH] DOC: Fixing EX01 - Added examples (#53930) * Examples Resampler.ohlc, prod, size, sem, std, sum, var * Updated code_checks.sh * Corrected sum docstring * corrected default no, removed print --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- ci/code_checks.sh | 8 --- pandas/core/groupby/groupby.py | 42 +++++++++++ pandas/core/resample.py | 128 ++++++++++++++++++++++++++++++++- 3 files changed, 169 insertions(+), 9 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index cc7c64f001809..a0fb7ab6e411d 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -118,14 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.io.stata.StataReader.value_labels \ pandas.io.stata.StataReader.variable_labels \ pandas.io.stata.StataWriter.write_file \ - pandas.core.resample.Resampler.ohlc \ - pandas.core.resample.Resampler.prod \ - pandas.core.resample.Resampler.size \ - pandas.core.resample.Resampler.sem \ - pandas.core.resample.Resampler.std \ - pandas.core.resample.Resampler.sum \ - pandas.core.resample.Resampler.var \ - pandas.core.resample.Resampler.quantile \ pandas.describe_option \ pandas.reset_option \ pandas.get_option \ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fa7f75bc41b9c..678ab7444bb58 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2795,6 +2795,20 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): a 1 1.5 4.5 2 0.5 2.0 + + For Resampler: + + >>> ser = pd.Series([1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex(['2023-01-01', + ... '2023-01-10', + ... '2023-01-15', + ... '2023-02-01', + ... '2023-02-10', + ... '2023-02-15'])) + >>> ser.resample('MS').sem() + 2023-01-01 0.577350 + 2023-02-01 1.527525 + Freq: MS, dtype: float64 """ if numeric_only and self.obj.ndim == 1 and not is_numeric_dtype(self.obj.dtype): raise TypeError( @@ -2851,6 +2865,20 @@ def size(self) -> DataFrame | Series: 1 2 7 1 dtype: int64 + + For Resampler: + + >>> ser = pd.Series([1, 2, 3], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + dtype: int64 + >>> ser.resample('MS').size() + 2023-01-01 2 + 2023-02-01 1 + Freq: MS, dtype: int64 """ result = self.grouper.size() @@ -3303,6 +3331,20 @@ def ohlc(self) -> DataFrame: open high low close open high low close CAC 2.3 4.5 1.0 1.0 9.0 9.4 1.0 1.0 SPX 1.2 8.9 1.2 2.0 3.4 8.8 3.4 8.2 + + For Resampler: + + >>> ser = pd.Series([1, 3, 2, 4, 3, 5], + ... index=pd.DatetimeIndex(['2023-01-01', + ... '2023-01-10', + ... '2023-01-15', + ... '2023-02-01', + ... '2023-02-10', + ... '2023-02-15'])) + >>> ser.resample('MS').ohlc() + open high low close + 2023-01-01 1 3 1 2 + 2023-02-01 4 5 3 5 """ if self.obj.ndim == 1: obj = self._selected_obj diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 501b530d80535..3916f90e1f0b2 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1109,11 +1109,46 @@ def sum( *args, **kwargs, ): + """ + Compute sum of group values. + + Parameters + ---------- + numeric_only : bool, default False + Include only float, int, boolean columns. + + .. versionchanged:: 2.0.0 + + numeric_only no longer accepts ``None``. + + min_count : int, default 0 + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + + Returns + ------- + Series or DataFrame + Computed sum of values within each group. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample('MS').sum() + 2023-01-01 3 + 2023-02-01 7 + Freq: MS, dtype: int64 + """ maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs) nv.validate_resampler_func("sum", args, kwargs) return self._downsample("sum", numeric_only=numeric_only, min_count=min_count) - @doc(GroupBy.prod) def prod( self, numeric_only: bool = False, @@ -1121,6 +1156,42 @@ def prod( *args, **kwargs, ): + """ + Compute prod of group values. + + Parameters + ---------- + numeric_only : bool, default False + Include only float, int, boolean columns. + + .. versionchanged:: 2.0.0 + + numeric_only no longer accepts ``None``. + + min_count : int, default 0 + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + + Returns + ------- + Series or DataFrame + Computed prod of values within each group. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample('MS').prod() + 2023-01-01 2 + 2023-02-01 12 + Freq: MS, dtype: int64 + """ maybe_warn_args_and_kwargs(type(self), "prod", args, kwargs) nv.validate_resampler_func("prod", args, kwargs) return self._downsample("prod", numeric_only=numeric_only, min_count=min_count) @@ -1292,6 +1363,21 @@ def std( ------- DataFrame or Series Standard deviation of values within each group. + + Examples + -------- + + >>> ser = pd.Series([1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex(['2023-01-01', + ... '2023-01-10', + ... '2023-01-15', + ... '2023-02-01', + ... '2023-02-10', + ... '2023-02-15'])) + >>> ser.resample('MS').std() + 2023-01-01 1.000000 + 2023-02-01 2.645751 + Freq: MS, dtype: float64 """ maybe_warn_args_and_kwargs(type(self), "std", args, kwargs) nv.validate_resampler_func("std", args, kwargs) @@ -1325,6 +1411,26 @@ def var( ------- DataFrame or Series Variance of values within each group. + + Examples + -------- + + >>> ser = pd.Series([1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex(['2023-01-01', + ... '2023-01-10', + ... '2023-01-15', + ... '2023-02-01', + ... '2023-02-10', + ... '2023-02-15'])) + >>> ser.resample('MS').var() + 2023-01-01 1.0 + 2023-02-01 7.0 + Freq: MS, dtype: float64 + + >>> ser.resample('MS').var(ddof=0) + 2023-01-01 0.666667 + 2023-02-01 4.666667 + Freq: MS, dtype: float64 """ maybe_warn_args_and_kwargs(type(self), "var", args, kwargs) nv.validate_resampler_func("var", args, kwargs) @@ -1421,6 +1527,26 @@ def quantile(self, q: float | AnyArrayLike = 0.5, **kwargs): DataFrameGroupBy.quantile Return a DataFrame, where the columns are groupby columns, and the values are its quantiles. + + Examples + -------- + + >>> ser = pd.Series([1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex(['2023-01-01', + ... '2023-01-10', + ... '2023-01-15', + ... '2023-02-01', + ... '2023-02-10', + ... '2023-02-15'])) + >>> ser.resample('MS').quantile() + 2023-01-01 2.0 + 2023-02-01 4.0 + Freq: MS, dtype: float64 + + >>> ser.resample('MS').quantile(.25) + 2023-01-01 1.5 + 2023-02-01 3.5 + Freq: MS, dtype: float64 """ return self._downsample("quantile", q=q, **kwargs)