Skip to content

Commit

Permalink
DOC: Fixing EX01 - Added examples (#53930)
Browse files Browse the repository at this point in the history
* Examples Resampler.ohlc, prod, size, sem, std, sum, var

* Updated code_checks.sh

* Corrected sum docstring

* corrected default no, removed print

---------

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
DeaMariaLeon and mroeschke authored Jun 30, 2023
1 parent ad17842 commit 5949333
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 9 deletions.
8 changes: 0 additions & 8 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.io.stata.StataReader.value_labels \
pandas.io.stata.StataReader.variable_labels \
pandas.io.stata.StataWriter.write_file \
pandas.core.resample.Resampler.ohlc \
pandas.core.resample.Resampler.prod \
pandas.core.resample.Resampler.size \
pandas.core.resample.Resampler.sem \
pandas.core.resample.Resampler.std \
pandas.core.resample.Resampler.sum \
pandas.core.resample.Resampler.var \
pandas.core.resample.Resampler.quantile \
pandas.describe_option \
pandas.reset_option \
pandas.get_option \
Expand Down
42 changes: 42 additions & 0 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2795,6 +2795,20 @@ def sem(self, ddof: int = 1, numeric_only: bool = False):
a
1 1.5 4.5
2 0.5 2.0
For Resampler:
>>> ser = pd.Series([1, 3, 2, 4, 3, 8],
... index=pd.DatetimeIndex(['2023-01-01',
... '2023-01-10',
... '2023-01-15',
... '2023-02-01',
... '2023-02-10',
... '2023-02-15']))
>>> ser.resample('MS').sem()
2023-01-01 0.577350
2023-02-01 1.527525
Freq: MS, dtype: float64
"""
if numeric_only and self.obj.ndim == 1 and not is_numeric_dtype(self.obj.dtype):
raise TypeError(
Expand Down Expand Up @@ -2851,6 +2865,20 @@ def size(self) -> DataFrame | Series:
1 2
7 1
dtype: int64
For Resampler:
>>> ser = pd.Series([1, 2, 3], index=pd.DatetimeIndex(
... ['2023-01-01', '2023-01-15', '2023-02-01']))
>>> ser
2023-01-01 1
2023-01-15 2
2023-02-01 3
dtype: int64
>>> ser.resample('MS').size()
2023-01-01 2
2023-02-01 1
Freq: MS, dtype: int64
"""
result = self.grouper.size()

Expand Down Expand Up @@ -3303,6 +3331,20 @@ def ohlc(self) -> DataFrame:
open high low close open high low close
CAC 2.3 4.5 1.0 1.0 9.0 9.4 1.0 1.0
SPX 1.2 8.9 1.2 2.0 3.4 8.8 3.4 8.2
For Resampler:
>>> ser = pd.Series([1, 3, 2, 4, 3, 5],
... index=pd.DatetimeIndex(['2023-01-01',
... '2023-01-10',
... '2023-01-15',
... '2023-02-01',
... '2023-02-10',
... '2023-02-15']))
>>> ser.resample('MS').ohlc()
open high low close
2023-01-01 1 3 1 2
2023-02-01 4 5 3 5
"""
if self.obj.ndim == 1:
obj = self._selected_obj
Expand Down
128 changes: 127 additions & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1109,18 +1109,89 @@ def sum(
*args,
**kwargs,
):
"""
Compute sum of group values.
Parameters
----------
numeric_only : bool, default False
Include only float, int, boolean columns.
.. versionchanged:: 2.0.0
numeric_only no longer accepts ``None``.
min_count : int, default 0
The required number of valid values to perform the operation. If fewer
than ``min_count`` non-NA values are present the result will be NA.
Returns
-------
Series or DataFrame
Computed sum of values within each group.
Examples
--------
>>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))
>>> ser
2023-01-01 1
2023-01-15 2
2023-02-01 3
2023-02-15 4
dtype: int64
>>> ser.resample('MS').sum()
2023-01-01 3
2023-02-01 7
Freq: MS, dtype: int64
"""
maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs)
nv.validate_resampler_func("sum", args, kwargs)
return self._downsample("sum", numeric_only=numeric_only, min_count=min_count)

@doc(GroupBy.prod)
def prod(
self,
numeric_only: bool = False,
min_count: int = 0,
*args,
**kwargs,
):
"""
Compute prod of group values.
Parameters
----------
numeric_only : bool, default False
Include only float, int, boolean columns.
.. versionchanged:: 2.0.0
numeric_only no longer accepts ``None``.
min_count : int, default 0
The required number of valid values to perform the operation. If fewer
than ``min_count`` non-NA values are present the result will be NA.
Returns
-------
Series or DataFrame
Computed prod of values within each group.
Examples
--------
>>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))
>>> ser
2023-01-01 1
2023-01-15 2
2023-02-01 3
2023-02-15 4
dtype: int64
>>> ser.resample('MS').prod()
2023-01-01 2
2023-02-01 12
Freq: MS, dtype: int64
"""
maybe_warn_args_and_kwargs(type(self), "prod", args, kwargs)
nv.validate_resampler_func("prod", args, kwargs)
return self._downsample("prod", numeric_only=numeric_only, min_count=min_count)
Expand Down Expand Up @@ -1292,6 +1363,21 @@ def std(
-------
DataFrame or Series
Standard deviation of values within each group.
Examples
--------
>>> ser = pd.Series([1, 3, 2, 4, 3, 8],
... index=pd.DatetimeIndex(['2023-01-01',
... '2023-01-10',
... '2023-01-15',
... '2023-02-01',
... '2023-02-10',
... '2023-02-15']))
>>> ser.resample('MS').std()
2023-01-01 1.000000
2023-02-01 2.645751
Freq: MS, dtype: float64
"""
maybe_warn_args_and_kwargs(type(self), "std", args, kwargs)
nv.validate_resampler_func("std", args, kwargs)
Expand Down Expand Up @@ -1325,6 +1411,26 @@ def var(
-------
DataFrame or Series
Variance of values within each group.
Examples
--------
>>> ser = pd.Series([1, 3, 2, 4, 3, 8],
... index=pd.DatetimeIndex(['2023-01-01',
... '2023-01-10',
... '2023-01-15',
... '2023-02-01',
... '2023-02-10',
... '2023-02-15']))
>>> ser.resample('MS').var()
2023-01-01 1.0
2023-02-01 7.0
Freq: MS, dtype: float64
>>> ser.resample('MS').var(ddof=0)
2023-01-01 0.666667
2023-02-01 4.666667
Freq: MS, dtype: float64
"""
maybe_warn_args_and_kwargs(type(self), "var", args, kwargs)
nv.validate_resampler_func("var", args, kwargs)
Expand Down Expand Up @@ -1421,6 +1527,26 @@ def quantile(self, q: float | AnyArrayLike = 0.5, **kwargs):
DataFrameGroupBy.quantile
Return a DataFrame, where the columns are groupby columns,
and the values are its quantiles.
Examples
--------
>>> ser = pd.Series([1, 3, 2, 4, 3, 8],
... index=pd.DatetimeIndex(['2023-01-01',
... '2023-01-10',
... '2023-01-15',
... '2023-02-01',
... '2023-02-10',
... '2023-02-15']))
>>> ser.resample('MS').quantile()
2023-01-01 2.0
2023-02-01 4.0
Freq: MS, dtype: float64
>>> ser.resample('MS').quantile(.25)
2023-01-01 1.5
2023-02-01 3.5
Freq: MS, dtype: float64
"""
return self._downsample("quantile", q=q, **kwargs)

Expand Down

0 comments on commit 5949333

Please sign in to comment.