Skip to content

Commit

Permalink
API: rolling.apply will pass Series to function (pandas-dev#20584)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback authored Apr 16, 2018
1 parent da33359 commit 4a34497
Show file tree
Hide file tree
Showing 5 changed files with 479 additions and 306 deletions.
32 changes: 32 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,35 @@ The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtyp
pd.get_dummies(df, columns=['c'], dtype=bool).dtypes


.. _whatsnew_0230.enhancements.window_raw:

Rolling/Expanding.apply() accepts a ``raw`` keyword to pass a ``Series`` to the function
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

:func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
:func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` have gained a ``raw=None`` parameter.
This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The
default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``.
In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`)

.. ipython:: python

s = pd.Series(np.arange(5), np.arange(5) + 1)
s

Pass a ``Series``:

.. ipython:: python

s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False)

Mimic the original behavior of passing a ndarray:

.. ipython:: python

s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True)


.. _whatsnew_0230.enhancements.merge_on_columns_and_levels:

Merging on a combination of columns and index levels
Expand Down Expand Up @@ -817,6 +846,7 @@ Other API Changes
- :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`)
- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`).
- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`)
- A user-defined-function that is passed to :func:`Series.rolling().aggregate() <pandas.core.window.Rolling.aggregate>`, :func:`DataFrame.rolling().aggregate() <pandas.core.window.Rolling.aggregate>`, or its expanding cousins, will now *always* be passed a ``Series``, rather than an ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here <whatsnew_0230.enhancements.window_raw>`. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`)

.. _whatsnew_0230.deprecations:

Expand Down Expand Up @@ -845,6 +875,8 @@ Deprecations
- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`)
- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`)
- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`).
- :func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
:func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`)

.. _whatsnew_0230.prior_deprecations:

Expand Down
46 changes: 32 additions & 14 deletions pandas/_libs/window.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1432,39 +1432,44 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
return output


def roll_generic(ndarray[float64_t, cast=True] input,
def roll_generic(object obj,
int64_t win, int64_t minp, object index, object closed,
int offset, object func,
int offset, object func, bint raw,
object args, object kwargs):
cdef:
ndarray[double_t] output, counts, bufarr
ndarray[float64_t, cast=True] arr
float64_t *buf
float64_t *oldbuf
int64_t nobs = 0, i, j, s, e, N
bint is_variable
ndarray[int64_t] start, end

if not input.flags.c_contiguous:
input = input.copy('C')

n = len(input)
n = len(obj)
if n == 0:
return input
return obj

arr = np.asarray(obj)

# ndarray input
if raw:
if not arr.flags.c_contiguous:
arr = arr.copy('C')

counts = roll_sum(np.concatenate([np.isfinite(input).astype(float),
counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float),
np.array([0.] * offset)]),
win, minp, index, closed)[offset:]

start, end, N, win, minp, is_variable = get_window_indexer(input, win,
start, end, N, win, minp, is_variable = get_window_indexer(arr, win,
minp, index,
closed,
floor=0)

output = np.empty(N, dtype=float)

if is_variable:
# variable window arr or series

# variable window
if offset != 0:
raise ValueError("unable to roll_generic with a non-zero offset")

Expand All @@ -1473,7 +1478,20 @@ def roll_generic(ndarray[float64_t, cast=True] input,
e = end[i]

if counts[i] >= minp:
output[i] = func(input[s:e], *args, **kwargs)
if raw:
output[i] = func(arr[s:e], *args, **kwargs)
else:
output[i] = func(obj.iloc[s:e], *args, **kwargs)
else:
output[i] = NaN

elif not raw:
# series
for i from 0 <= i < N:
if counts[i] >= minp:
sl = slice(int_max(i + offset - win + 1, 0),
int_min(i + offset + 1, N))
output[i] = func(obj.iloc[sl], *args, **kwargs)
else:
output[i] = NaN

Expand All @@ -1482,12 +1500,12 @@ def roll_generic(ndarray[float64_t, cast=True] input,
# truncated windows at the beginning, through first full-length window
for i from 0 <= i < (int_min(win, N) - offset):
if counts[i] >= minp:
output[i] = func(input[0: (i + offset + 1)], *args, **kwargs)
output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
else:
output[i] = NaN

# remaining full-length windows
buf = <float64_t *> input.data
buf = <float64_t *> arr.data
bufarr = np.empty(win, dtype=float)
oldbuf = <float64_t *> bufarr.data
for i from (win - offset) <= i < (N - offset):
Expand All @@ -1502,7 +1520,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
# truncated windows at the end
for i from int_max(N - offset, 0) <= i < N:
if counts[i] >= minp:
output[i] = func(input[int_max(i + offset - win + 1, 0): N],
output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
*args,
**kwargs)
else:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4292,6 +4292,8 @@ def pipe(self, func, *args, **kwargs):
Notes
-----
`agg` is an alias for `aggregate`. Use the alias.
A passed user-defined-function will be passed a Series for evaluation.
""")

_shared_docs['transform'] = ("""
Expand Down
54 changes: 43 additions & 11 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def _center_window(self, result, window):
def aggregate(self, arg, *args, **kwargs):
result, how = self._aggregate(arg, *args, **kwargs)
if result is None:
return self.apply(arg, args=args, kwargs=kwargs)
return self.apply(arg, raw=False, args=args, kwargs=kwargs)
return result

agg = aggregate
Expand Down Expand Up @@ -954,23 +954,53 @@ def count(self):
Parameters
----------
func : function
Must produce a single value from an ndarray input
\*args and \*\*kwargs are passed to the function""")
Must produce a single value from an ndarray input if ``raw=True``
or a Series if ``raw=False``
raw : bool, default None
* ``False`` : passes each row or column as a Series to the
function.
* ``True`` or ``None`` : the passed function will receive ndarray
objects instead.
If you are just applying a NumPy reduction function this will
achieve much better performance.
The `raw` parameter is required and will show a FutureWarning if
not passed. In the future `raw` will default to False.
.. versionadded:: 0.23.0
\*args and \*\*kwargs are passed to the function""")

def apply(self, func, raw=None, args=(), kwargs={}):
from pandas import Series

def apply(self, func, args=(), kwargs={}):
# TODO: _level is unused?
_level = kwargs.pop('_level', None) # noqa
window = self._get_window()
offset = _offset(window, self.center)
index, indexi = self._get_index()

# TODO: default is for backward compat
# change to False in the future
if raw is None:
warnings.warn(
"Currently, 'apply' passes the values as ndarrays to the "
"applied function. In the future, this will change to passing "
"it as Series objects. You need to specify 'raw=True' to keep "
"the current behaviour, and you can pass 'raw=False' to "
"silence this warning", FutureWarning, stacklevel=3)
raw = True

def f(arg, window, min_periods, closed):
minp = _use_window(min_periods, window)
return _window.roll_generic(arg, window, minp, indexi, closed,
offset, func, args, kwargs)
if not raw:
arg = Series(arg, index=self.obj.index)
return _window.roll_generic(
arg, window, minp, indexi,
closed, offset, func, raw, args, kwargs)

return self._apply(f, func, args=args, kwargs=kwargs,
center=False)
center=False, raw=raw)

def sum(self, *args, **kwargs):
nv.validate_window_func('sum', args, kwargs)
Expand Down Expand Up @@ -1498,8 +1528,9 @@ def count(self):
@Substitution(name='rolling')
@Appender(_doc_template)
@Appender(_shared_docs['apply'])
def apply(self, func, args=(), kwargs={}):
return super(Rolling, self).apply(func, args=args, kwargs=kwargs)
def apply(self, func, raw=None, args=(), kwargs={}):
return super(Rolling, self).apply(
func, raw=raw, args=args, kwargs=kwargs)

@Substitution(name='rolling')
@Appender(_shared_docs['sum'])
Expand Down Expand Up @@ -1756,8 +1787,9 @@ def count(self, **kwargs):
@Substitution(name='expanding')
@Appender(_doc_template)
@Appender(_shared_docs['apply'])
def apply(self, func, args=(), kwargs={}):
return super(Expanding, self).apply(func, args=args, kwargs=kwargs)
def apply(self, func, raw=None, args=(), kwargs={}):
return super(Expanding, self).apply(
func, raw=raw, args=args, kwargs=kwargs)

@Substitution(name='expanding')
@Appender(_shared_docs['sum'])
Expand Down
Loading

0 comments on commit 4a34497

Please sign in to comment.