diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 1992c27fd11ed..641214550a3b7 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -65,6 +65,35 @@ The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtyp pd.get_dummies(df, columns=['c'], dtype=bool).dtypes +.. _whatsnew_0230.enhancements.window_raw: + +Rolling/Expanding.apply() accepts a ``raw`` keyword to pass a ``Series`` to the function +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, +:func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` have gained a ``raw=None`` parameter. +This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The +default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``. +In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`) + +.. ipython:: python + + s = pd.Series(np.arange(5), np.arange(5) + 1) + s + +Pass a ``Series``: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False) + +Mimic the original behavior of passing a ndarray: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True) + + .. _whatsnew_0230.enhancements.merge_on_columns_and_levels: Merging on a combination of columns and index levels @@ -817,6 +846,7 @@ Other API Changes - :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) - Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). - :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) +- A user-defined-function that is passed to :func:`Series.rolling().aggregate() `, :func:`DataFrame.rolling().aggregate() `, or its expanding cousins, will now *always* be passed a ``Series``, rather than an ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) .. _whatsnew_0230.deprecations: @@ -845,6 +875,8 @@ Deprecations - ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`) - ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`) - The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`). +- :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, + :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index aa13f03d8e9e4..e524f823605a4 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1432,30 +1432,35 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, return output -def roll_generic(ndarray[float64_t, cast=True] input, +def roll_generic(object obj, int64_t win, int64_t minp, object index, object closed, - int offset, object func, + int offset, object func, bint raw, object args, object kwargs): cdef: ndarray[double_t] output, counts, bufarr + ndarray[float64_t, cast=True] arr float64_t *buf float64_t *oldbuf int64_t nobs = 0, i, j, s, e, N bint is_variable ndarray[int64_t] start, end - if not input.flags.c_contiguous: - input = input.copy('C') - - n = len(input) + n = len(obj) if n == 0: - return input + return obj + + arr = np.asarray(obj) + + # ndarray input + if raw: + if not arr.flags.c_contiguous: + arr = arr.copy('C') - counts = roll_sum(np.concatenate([np.isfinite(input).astype(float), + counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float), np.array([0.] * offset)]), win, minp, index, closed)[offset:] - start, end, N, win, minp, is_variable = get_window_indexer(input, win, + start, end, N, win, minp, is_variable = get_window_indexer(arr, win, minp, index, closed, floor=0) @@ -1463,8 +1468,8 @@ def roll_generic(ndarray[float64_t, cast=True] input, output = np.empty(N, dtype=float) if is_variable: + # variable window arr or series - # variable window if offset != 0: raise ValueError("unable to roll_generic with a non-zero offset") @@ -1473,7 +1478,20 @@ def roll_generic(ndarray[float64_t, cast=True] input, e = end[i] if counts[i] >= minp: - output[i] = func(input[s:e], *args, **kwargs) + if raw: + output[i] = func(arr[s:e], *args, **kwargs) + else: + output[i] = func(obj.iloc[s:e], *args, **kwargs) + else: + output[i] = NaN + + elif not raw: + # series + for i from 0 <= i < N: + if counts[i] >= minp: + sl = slice(int_max(i + offset - win + 1, 0), + int_min(i + offset + 1, N)) + output[i] = func(obj.iloc[sl], *args, **kwargs) else: output[i] = NaN @@ -1482,12 +1500,12 @@ def roll_generic(ndarray[float64_t, cast=True] input, # truncated windows at the beginning, through first full-length window for i from 0 <= i < (int_min(win, N) - offset): if counts[i] >= minp: - output[i] = func(input[0: (i + offset + 1)], *args, **kwargs) + output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs) else: output[i] = NaN # remaining full-length windows - buf = input.data + buf = arr.data bufarr = np.empty(win, dtype=float) oldbuf = bufarr.data for i from (win - offset) <= i < (N - offset): @@ -1502,7 +1520,7 @@ def roll_generic(ndarray[float64_t, cast=True] input, # truncated windows at the end for i from int_max(N - offset, 0) <= i < N: if counts[i] >= minp: - output[i] = func(input[int_max(i + offset - win + 1, 0): N], + output[i] = func(arr[int_max(i + offset - win + 1, 0): N], *args, **kwargs) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ae9d160db08e9..d3ab7afc025c9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4292,6 +4292,8 @@ def pipe(self, func, *args, **kwargs): Notes ----- `agg` is an alias for `aggregate`. Use the alias. + + A passed user-defined-function will be passed a Series for evaluation. """) _shared_docs['transform'] = (""" diff --git a/pandas/core/window.py b/pandas/core/window.py index 5cd4fffb5d7dd..f8b5aa292f309 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -314,7 +314,7 @@ def _center_window(self, result, window): def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) if result is None: - return self.apply(arg, args=args, kwargs=kwargs) + return self.apply(arg, raw=False, args=args, kwargs=kwargs) return result agg = aggregate @@ -954,23 +954,53 @@ def count(self): Parameters ---------- func : function - Must produce a single value from an ndarray input - \*args and \*\*kwargs are passed to the function""") + Must produce a single value from an ndarray input if ``raw=True`` + or a Series if ``raw=False`` + raw : bool, default None + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` or ``None`` : the passed function will receive ndarray + objects instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + + The `raw` parameter is required and will show a FutureWarning if + not passed. In the future `raw` will default to False. + + .. versionadded:: 0.23.0 + + \*args and \*\*kwargs are passed to the function""") + + def apply(self, func, raw=None, args=(), kwargs={}): + from pandas import Series - def apply(self, func, args=(), kwargs={}): # TODO: _level is unused? _level = kwargs.pop('_level', None) # noqa window = self._get_window() offset = _offset(window, self.center) index, indexi = self._get_index() + # TODO: default is for backward compat + # change to False in the future + if raw is None: + warnings.warn( + "Currently, 'apply' passes the values as ndarrays to the " + "applied function. In the future, this will change to passing " + "it as Series objects. You need to specify 'raw=True' to keep " + "the current behaviour, and you can pass 'raw=False' to " + "silence this warning", FutureWarning, stacklevel=3) + raw = True + def f(arg, window, min_periods, closed): minp = _use_window(min_periods, window) - return _window.roll_generic(arg, window, minp, indexi, closed, - offset, func, args, kwargs) + if not raw: + arg = Series(arg, index=self.obj.index) + return _window.roll_generic( + arg, window, minp, indexi, + closed, offset, func, raw, args, kwargs) return self._apply(f, func, args=args, kwargs=kwargs, - center=False) + center=False, raw=raw) def sum(self, *args, **kwargs): nv.validate_window_func('sum', args, kwargs) @@ -1498,8 +1528,9 @@ def count(self): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['apply']) - def apply(self, func, args=(), kwargs={}): - return super(Rolling, self).apply(func, args=args, kwargs=kwargs) + def apply(self, func, raw=None, args=(), kwargs={}): + return super(Rolling, self).apply( + func, raw=raw, args=args, kwargs=kwargs) @Substitution(name='rolling') @Appender(_shared_docs['sum']) @@ -1756,8 +1787,9 @@ def count(self, **kwargs): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['apply']) - def apply(self, func, args=(), kwargs={}): - return super(Expanding, self).apply(func, args=args, kwargs=kwargs) + def apply(self, func, raw=None, args=(), kwargs={}): + return super(Expanding, self).apply( + func, raw=raw, args=args, kwargs=kwargs) @Substitution(name='expanding') @Appender(_shared_docs['sum']) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index dabdb1e8e689c..605230390ff1d 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -29,6 +29,22 @@ def assert_equal(left, right): tm.assert_frame_equal(left, right) +@pytest.fixture(params=[True, False]) +def raw(request): + return request.param + + +@pytest.fixture(params=['triang', 'blackman', 'hamming', 'bartlett', 'bohman', + 'blackmanharris', 'nuttall', 'barthann']) +def win_types(request): + return request.param + + +@pytest.fixture(params=['kaiser', 'gaussian', 'general_gaussian', 'slepian']) +def win_types_special(request): + return request.param + + class Base(object): _nan_locs = np.arange(20, 40) @@ -157,9 +173,16 @@ def test_agg(self): expected.columns = pd.MultiIndex.from_tuples(exp_cols) tm.assert_frame_equal(result, expected, check_like=True) + def test_agg_apply(self, raw): + # passed lambda + df = DataFrame({'A': range(5), 'B': range(0, 10, 2)}) + + r = df.rolling(window=3) + a_sum = r['A'].sum() + result = r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)}) - rcustom = r['B'].apply(lambda x: np.std(x, ddof=1)) + rcustom = r['B'].apply(lambda x: np.std(x, ddof=1), raw=raw) expected = concat([a_sum, rcustom], axis=1) tm.assert_frame_equal(result, expected, check_like=True) @@ -289,43 +312,51 @@ def setup_method(self, method): self._create_data() @td.skip_if_no_scipy - def test_constructor(self): + @pytest.mark.parametrize( + 'which', ['series', 'frame']) + def test_constructor(self, which): # GH 12669 - for o in [self.series, self.frame]: - c = o.rolling + o = getattr(self, which) + c = o.rolling - # valid - c(win_type='boxcar', window=2, min_periods=1) - c(win_type='boxcar', window=2, min_periods=1, center=True) - c(win_type='boxcar', window=2, min_periods=1, center=False) + # valid + c(win_type='boxcar', window=2, min_periods=1) + c(win_type='boxcar', window=2, min_periods=1, center=True) + c(win_type='boxcar', window=2, min_periods=1, center=False) - for wt in ['boxcar', 'triang', 'blackman', 'hamming', 'bartlett', - 'bohman', 'blackmanharris', 'nuttall', 'barthann']: - c(win_type=wt, window=2) + # not valid + for w in [2., 'foo', np.array([2])]: + with pytest.raises(ValueError): + c(win_type='boxcar', window=2, min_periods=w) + with pytest.raises(ValueError): + c(win_type='boxcar', window=2, min_periods=1, center=w) - # not valid - for w in [2., 'foo', np.array([2])]: - with pytest.raises(ValueError): - c(win_type='boxcar', window=2, min_periods=w) - with pytest.raises(ValueError): - c(win_type='boxcar', window=2, min_periods=1, center=w) + for wt in ['foobar', 1]: + with pytest.raises(ValueError): + c(win_type=wt, window=2) - for wt in ['foobar', 1]: - with pytest.raises(ValueError): - c(win_type=wt, window=2) + @td.skip_if_no_scipy + @pytest.mark.parametrize( + 'which', ['series', 'frame']) + def test_constructor_with_win_type(self, which, win_types): + # GH 12669 + o = getattr(self, which) + c = o.rolling + c(win_type=win_types, window=2) - def test_numpy_compat(self): + @pytest.mark.parametrize( + 'method', ['sum', 'mean']) + def test_numpy_compat(self, method): # see gh-12811 w = rwindow.Window(Series([2, 4, 6]), window=[0, 2]) msg = "numpy operations are not valid with window objects" - for func in ('sum', 'mean'): - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(w, func), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(w, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(w, method), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(w, method), dtype=np.float64) class TestRolling(Base): @@ -340,59 +371,65 @@ def test_doc_string(self): df.rolling(2).sum() df.rolling(2, min_periods=1).sum() - def test_constructor(self): + @pytest.mark.parametrize( + 'which', ['series', 'frame']) + def test_constructor(self, which): # GH 12669 - for o in [self.series, self.frame]: - c = o.rolling + o = getattr(self, which) + c = o.rolling - # valid - c(window=2) - c(window=2, min_periods=1) - c(window=2, min_periods=1, center=True) - c(window=2, min_periods=1, center=False) + # valid + c(window=2) + c(window=2, min_periods=1) + c(window=2, min_periods=1, center=True) + c(window=2, min_periods=1, center=False) - # GH 13383 - c(0) - with pytest.raises(ValueError): - c(-1) + # GH 13383 + c(0) + with pytest.raises(ValueError): + c(-1) - # not valid - for w in [2., 'foo', np.array([2])]: - with pytest.raises(ValueError): - c(window=w) - with pytest.raises(ValueError): - c(window=2, min_periods=w) - with pytest.raises(ValueError): - c(window=2, min_periods=1, center=w) + # not valid + for w in [2., 'foo', np.array([2])]: + with pytest.raises(ValueError): + c(window=w) + with pytest.raises(ValueError): + c(window=2, min_periods=w) + with pytest.raises(ValueError): + c(window=2, min_periods=1, center=w) @td.skip_if_no_scipy - def test_constructor_with_win_type(self): + @pytest.mark.parametrize( + 'which', ['series', 'frame']) + def test_constructor_with_win_type(self, which): # GH 13383 - for o in [self.series, self.frame]: - c = o.rolling - c(0, win_type='boxcar') - with pytest.raises(ValueError): - c(-1, win_type='boxcar') + o = getattr(self, which) + c = o.rolling + c(0, win_type='boxcar') + with pytest.raises(ValueError): + c(-1, win_type='boxcar') - def test_constructor_with_timedelta_window(self): + @pytest.mark.parametrize( + 'window', [timedelta(days=3), pd.Timedelta(days=3)]) + def test_constructor_with_timedelta_window(self, window): # GH 15440 n = 10 df = DataFrame({'value': np.arange(n)}, index=pd.date_range('2015-12-24', periods=n, freq="D")) expected_data = np.append([0., 1.], np.arange(3., 27., 3)) - for window in [timedelta(days=3), pd.Timedelta(days=3)]: - result = df.rolling(window=window).sum() - expected = DataFrame({'value': expected_data}, - index=pd.date_range('2015-12-24', periods=n, - freq="D")) - tm.assert_frame_equal(result, expected) - expected = df.rolling('3D').sum() - tm.assert_frame_equal(result, expected) + + result = df.rolling(window=window).sum() + expected = DataFrame({'value': expected_data}, + index=pd.date_range('2015-12-24', periods=n, + freq="D")) + tm.assert_frame_equal(result, expected) + expected = df.rolling('3D').sum() + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( 'window', [timedelta(days=3), pd.Timedelta(days=3), '3D']) - def test_constructor_with_timedelta_window_and_minperiods(self, window): + def test_constructor_timedelta_window_and_minperiods(self, window, raw): # GH 15305 n = 10 df = DataFrame({'value': np.arange(n)}, @@ -402,21 +439,22 @@ def test_constructor_with_timedelta_window_and_minperiods(self, window): index=pd.date_range('2017-08-08', periods=n, freq="D")) result_roll_sum = df.rolling(window=window, min_periods=2).sum() result_roll_generic = df.rolling(window=window, - min_periods=2).apply(sum) + min_periods=2).apply(sum, raw=raw) tm.assert_frame_equal(result_roll_sum, expected) tm.assert_frame_equal(result_roll_generic, expected) - def test_numpy_compat(self): + @pytest.mark.parametrize( + 'method', ['std', 'mean', 'sum', 'max', 'min', 'var']) + def test_numpy_compat(self, method): # see gh-12811 r = rwindow.Rolling(Series([2, 4, 6]), window=2) msg = "numpy operations are not valid with window objects" - for func in ('std', 'mean', 'sum', 'max', 'min', 'var'): - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, func), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, method), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, method), dtype=np.float64) def test_closed(self): df = DataFrame({'A': [0, 1, 2, 3, 4]}) @@ -483,35 +521,38 @@ def test_doc_string(self): df df.expanding(2).sum() - def test_constructor(self): + @pytest.mark.parametrize( + 'which', ['series', 'frame']) + def test_constructor(self, which): # GH 12669 - for o in [self.series, self.frame]: - c = o.expanding + o = getattr(self, which) + c = o.expanding - # valid - c(min_periods=1) - c(min_periods=1, center=True) - c(min_periods=1, center=False) + # valid + c(min_periods=1) + c(min_periods=1, center=True) + c(min_periods=1, center=False) - # not valid - for w in [2., 'foo', np.array([2])]: - with pytest.raises(ValueError): - c(min_periods=w) - with pytest.raises(ValueError): - c(min_periods=1, center=w) + # not valid + for w in [2., 'foo', np.array([2])]: + with pytest.raises(ValueError): + c(min_periods=w) + with pytest.raises(ValueError): + c(min_periods=1, center=w) - def test_numpy_compat(self): + @pytest.mark.parametrize( + 'method', ['std', 'mean', 'sum', 'max', 'min', 'var']) + def test_numpy_compat(self, method): # see gh-12811 e = rwindow.Expanding(Series([2, 4, 6]), window=2) msg = "numpy operations are not valid with window objects" - for func in ('std', 'mean', 'sum', 'max', 'min', 'var'): - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, func), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, method), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, method), dtype=np.float64) @pytest.mark.parametrize( 'expander', @@ -558,55 +599,58 @@ def test_doc_string(self): df df.ewm(com=0.5).mean() - def test_constructor(self): - for o in [self.series, self.frame]: - c = o.ewm - - # valid - c(com=0.5) - c(span=1.5) - c(alpha=0.5) - c(halflife=0.75) - c(com=0.5, span=None) - c(alpha=0.5, com=None) - c(halflife=0.75, alpha=None) + @pytest.mark.parametrize( + 'which', ['series', 'frame']) + def test_constructor(self, which): + o = getattr(self, which) + c = o.ewm + + # valid + c(com=0.5) + c(span=1.5) + c(alpha=0.5) + c(halflife=0.75) + c(com=0.5, span=None) + c(alpha=0.5, com=None) + c(halflife=0.75, alpha=None) + + # not valid: mutually exclusive + with pytest.raises(ValueError): + c(com=0.5, alpha=0.5) + with pytest.raises(ValueError): + c(span=1.5, halflife=0.75) + with pytest.raises(ValueError): + c(alpha=0.5, span=1.5) - # not valid: mutually exclusive - with pytest.raises(ValueError): - c(com=0.5, alpha=0.5) - with pytest.raises(ValueError): - c(span=1.5, halflife=0.75) - with pytest.raises(ValueError): - c(alpha=0.5, span=1.5) + # not valid: com < 0 + with pytest.raises(ValueError): + c(com=-0.5) - # not valid: com < 0 - with pytest.raises(ValueError): - c(com=-0.5) + # not valid: span < 1 + with pytest.raises(ValueError): + c(span=0.5) - # not valid: span < 1 - with pytest.raises(ValueError): - c(span=0.5) + # not valid: halflife <= 0 + with pytest.raises(ValueError): + c(halflife=0) - # not valid: halflife <= 0 + # not valid: alpha <= 0 or alpha > 1 + for alpha in (-0.5, 1.5): with pytest.raises(ValueError): - c(halflife=0) + c(alpha=alpha) - # not valid: alpha <= 0 or alpha > 1 - for alpha in (-0.5, 1.5): - with pytest.raises(ValueError): - c(alpha=alpha) - - def test_numpy_compat(self): + @pytest.mark.parametrize( + 'method', ['std', 'mean', 'var']) + def test_numpy_compat(self, method): # see gh-12811 e = rwindow.EWM(Series([2, 4, 6]), alpha=0.5) msg = "numpy operations are not valid with window objects" - for func in ('std', 'mean', 'var'): - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, func), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, method), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, method), dtype=np.float64) # gh-12373 : rolling functions error on float32 data @@ -943,11 +987,8 @@ def test_cmov_window_na_min_periods(self): tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy - def test_cmov_window_regular(self): + def test_cmov_window_regular(self, win_types): # GH 8238 - win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', - 'blackmanharris', 'nuttall', 'barthann'] - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) xps = { @@ -969,33 +1010,25 @@ def test_cmov_window_regular(self): 14.0825, 11.5675, np.nan, np.nan] } - for wt in win_types: - xp = Series(xps[wt]) - rs = Series(vals).rolling(5, win_type=wt, center=True).mean() - tm.assert_series_equal(xp, rs) + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy - def test_cmov_window_regular_linear_range(self): + def test_cmov_window_regular_linear_range(self, win_types): # GH 8238 - win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', - 'blackmanharris', 'nuttall', 'barthann'] - vals = np.array(range(10), dtype=np.float) xp = vals.copy() xp[:2] = np.nan xp[-2:] = np.nan xp = Series(xp) - for wt in win_types: - rs = Series(vals).rolling(5, win_type=wt, center=True).mean() - tm.assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy - def test_cmov_window_regular_missing_data(self): + def test_cmov_window_regular_missing_data(self, win_types): # GH 8238 - win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', - 'blackmanharris', 'nuttall', 'barthann'] - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48]) xps = { @@ -1017,17 +1050,18 @@ def test_cmov_window_regular_missing_data(self): 9.16438, 13.05052, 14.02175, 16.1098, 13.65509] } - for wt in win_types: - xp = Series(xps[wt]) - rs = Series(vals).rolling(5, win_type=wt, min_periods=3).mean() - tm.assert_series_equal(xp, rs) + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() + tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy - def test_cmov_window_special(self): + def test_cmov_window_special(self, win_types_special): # GH 8238 - win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] - kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., - 'width': 2.}, {'width': 0.5}] + kwds = { + 'kaiser': {'beta': 1.}, + 'gaussian': {'std': 1.}, + 'general_gaussian': {'power': 2., 'width': 2.}, + 'slepian': {'width': 0.5}} vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) @@ -1043,17 +1077,20 @@ def test_cmov_window_special(self): 12.90702, 12.83757, np.nan, np.nan] } - for wt, k in zip(win_types, kwds): - xp = Series(xps[wt]) - rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) - tm.assert_series_equal(xp, rs) + xp = Series(xps[win_types_special]) + rs = Series(vals).rolling( + 5, win_type=win_types_special, center=True).mean( + **kwds[win_types_special]) + tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy - def test_cmov_window_special_linear_range(self): + def test_cmov_window_special_linear_range(self, win_types_special): # GH 8238 - win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] - kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., - 'width': 2.}, {'width': 0.5}] + kwds = { + 'kaiser': {'beta': 1.}, + 'gaussian': {'std': 1.}, + 'general_gaussian': {'power': 2., 'width': 2.}, + 'slepian': {'width': 0.5}} vals = np.array(range(10), dtype=np.float) xp = vals.copy() @@ -1061,9 +1098,10 @@ def test_cmov_window_special_linear_range(self): xp[-2:] = np.nan xp = Series(xp) - for wt, k in zip(win_types, kwds): - rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) - tm.assert_series_equal(xp, rs) + rs = Series(vals).rolling( + 5, win_type=win_types_special, center=True).mean( + **kwds[win_types_special]) + tm.assert_series_equal(xp, rs) def test_rolling_median(self): self._check_moment_func(np.median, name='median') @@ -1150,43 +1188,76 @@ def test_rolling_quantile_param(self): with pytest.raises(TypeError): ser.rolling(3).quantile('foo') - def test_rolling_apply(self): + def test_rolling_apply(self, raw): # suppress warnings about empty slices, as we are deliberately testing # with a 0-length Series + with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - ser = Series([]) - tm.assert_series_equal(ser, - ser.rolling(10).apply(lambda x: x.mean())) - def f(x): return x[np.isfinite(x)].mean() - self._check_moment_func(np.mean, name='apply', func=f) + self._check_moment_func(np.mean, name='apply', func=f, raw=raw) - # GH 8080 + expected = Series([]) + result = expected.rolling(10).apply(lambda x: x.mean(), raw=raw) + tm.assert_series_equal(result, expected) + + # gh-8080 s = Series([None, None, None]) - result = s.rolling(2, min_periods=0).apply(lambda x: len(x)) + result = s.rolling(2, min_periods=0).apply(lambda x: len(x), raw=raw) expected = Series([1., 2., 2.]) tm.assert_series_equal(result, expected) - result = s.rolling(2, min_periods=0).apply(len) + result = s.rolling(2, min_periods=0).apply(len, raw=raw) tm.assert_series_equal(result, expected) - def test_rolling_apply_out_of_bounds(self): - # #1850 + @pytest.mark.parametrize('klass', [Series, DataFrame]) + @pytest.mark.parametrize( + 'method', [lambda x: x.rolling(window=2), lambda x: x.expanding()]) + def test_apply_future_warning(self, klass, method): + + # gh-5071 + s = klass(np.arange(3)) + + with tm.assert_produces_warning(FutureWarning): + method(s).apply(lambda x: len(x)) + + def test_rolling_apply_out_of_bounds(self, raw): + # gh-1850 vals = pd.Series([1, 2, 3, 4]) - result = vals.rolling(10).apply(np.sum) + result = vals.rolling(10).apply(np.sum, raw=raw) assert result.isna().all() - result = vals.rolling(10, min_periods=1).apply(np.sum) + result = vals.rolling(10, min_periods=1).apply(np.sum, raw=raw) expected = pd.Series([1, 3, 6, 10], dtype=float) tm.assert_almost_equal(result, expected) + @pytest.mark.parametrize('window', [2, '2s']) + def test_rolling_apply_with_pandas_objects(self, window): + # 5071 + df = pd.DataFrame({'A': np.random.randn(5), + 'B': np.random.randint(0, 10, size=5)}, + index=pd.date_range('20130101', periods=5, freq='s')) + + # we have an equal spaced timeseries index + # so simulate removing the first period + def f(x): + if x.index[0] == df.index[0]: + return np.nan + return x.iloc[-1] + + result = df.rolling(window).apply(f, raw=False) + expected = df.iloc[2:].reindex_like(df) + tm.assert_frame_equal(result, expected) + + with pytest.raises(AttributeError): + df.rolling(window).apply(f, raw=True) + def test_rolling_std(self): self._check_moment_func(lambda x: np.std(x, ddof=1), name='std') @@ -1256,10 +1327,10 @@ def get_result(obj, window, min_periods=None, center=False): frame_result = get_result(self.frame, window=50) assert isinstance(frame_result, DataFrame) - tm.assert_series_equal(frame_result.iloc[-1, :], - self.frame.iloc[-50:, :].apply(static_comp, - axis=0), - check_names=False) + tm.assert_series_equal( + frame_result.iloc[-1, :], + self.frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), + check_names=False) # check time_rule works if has_time_rule: @@ -1287,7 +1358,7 @@ def get_result(obj, window, min_periods=None, center=False): static_comp(trunc_series)) tm.assert_series_equal(frame_result.xs(last_date), - trunc_frame.apply(static_comp), + trunc_frame.apply(static_comp, raw=raw), check_names=False) # excluding NaNs correctly @@ -1402,26 +1473,20 @@ def test_ewma(self): result = vals.ewm(span=100, adjust=False).mean().sum() assert np.abs(result - 1) < 1e-2 + @pytest.mark.parametrize('adjust', [True, False]) + @pytest.mark.parametrize('ignore_na', [True, False]) + def test_ewma_cases(self, adjust, ignore_na): + # try adjust/ignore_na args matrix + s = Series([1.0, 2.0, 4.0, 8.0]) - expected = Series([1.0, 1.6, 2.736842, 4.923077]) - for f in [lambda s: s.ewm(com=2.0, adjust=True).mean(), - lambda s: s.ewm(com=2.0, adjust=True, - ignore_na=False).mean(), - lambda s: s.ewm(com=2.0, adjust=True, ignore_na=True).mean(), - ]: - result = f(s) - tm.assert_series_equal(result, expected) + if adjust: + expected = Series([1.0, 1.6, 2.736842, 4.923077]) + else: + expected = Series([1.0, 1.333333, 2.222222, 4.148148]) - expected = Series([1.0, 1.333333, 2.222222, 4.148148]) - for f in [lambda s: s.ewm(com=2.0, adjust=False).mean(), - lambda s: s.ewm(com=2.0, adjust=False, - ignore_na=False).mean(), - lambda s: s.ewm(com=2.0, adjust=False, - ignore_na=True).mean(), - ]: - result = f(s) - tm.assert_series_equal(result, expected) + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() + tm.assert_series_equal(result, expected) def test_ewma_nan_handling(self): s = Series([1.] + [np.nan] * 5 + [1.]) @@ -1555,14 +1620,13 @@ def test_ewm_domain_checks(self): s.ewm(alpha=1.0) pytest.raises(ValueError, s.ewm, alpha=1.1) - def test_ew_empty_series(self): + @pytest.mark.parametrize('method', ['mean', 'vol', 'var']) + def test_ew_empty_series(self, method): vals = pd.Series([], dtype=np.float64) ewm = vals.ewm(3) - funcs = ['mean', 'vol', 'var'] - for f in funcs: - result = getattr(ewm, f)() - tm.assert_almost_equal(result, vals) + result = getattr(ewm, method)() + tm.assert_almost_equal(result, vals) def _check_ew(self, name=None, preserve_nan=False): series_result = getattr(self.series.ewm(com=10), name)() @@ -2160,7 +2224,7 @@ def test_expanding_consistency(self, min_periods): if name == 'count': expanding_f_result = expanding_f() expanding_apply_f_result = x.expanding( - min_periods=0).apply(func=f) + min_periods=0).apply(func=f, raw=True) else: if name in ['cov', 'corr']: expanding_f_result = expanding_f( @@ -2168,7 +2232,7 @@ def test_expanding_consistency(self, min_periods): else: expanding_f_result = expanding_f() expanding_apply_f_result = x.expanding( - min_periods=min_periods).apply(func=f) + min_periods=min_periods).apply(func=f, raw=True) # GH 9422 if name in ['sum', 'prod']: @@ -2259,7 +2323,7 @@ def test_rolling_consistency(self, window, min_periods, center): rolling_f_result = rolling_f() rolling_apply_f_result = x.rolling( window=window, min_periods=0, - center=center).apply(func=f) + center=center).apply(func=f, raw=True) else: if name in ['cov', 'corr']: rolling_f_result = rolling_f( @@ -2268,7 +2332,7 @@ def test_rolling_consistency(self, window, min_periods, center): rolling_f_result = rolling_f() rolling_apply_f_result = x.rolling( window=window, min_periods=min_periods, - center=center).apply(func=f) + center=center).apply(func=f, raw=True) # GH 9422 if name in ['sum', 'prod']: @@ -2348,29 +2412,25 @@ def test_corr_sanity(self): except AssertionError: print(res) - def test_flex_binary_frame(self): - def _check(method): - series = self.frame[1] + @pytest.mark.parametrize('method', ['corr', 'cov']) + def test_flex_binary_frame(self, method): + series = self.frame[1] - res = getattr(series.rolling(window=10), method)(self.frame) - res2 = getattr(self.frame.rolling(window=10), method)(series) - exp = self.frame.apply(lambda x: getattr( - series.rolling(window=10), method)(x)) + res = getattr(series.rolling(window=10), method)(self.frame) + res2 = getattr(self.frame.rolling(window=10), method)(series) + exp = self.frame.apply(lambda x: getattr( + series.rolling(window=10), method)(x)) - tm.assert_frame_equal(res, exp) - tm.assert_frame_equal(res2, exp) + tm.assert_frame_equal(res, exp) + tm.assert_frame_equal(res2, exp) - frame2 = self.frame.copy() - frame2.values[:] = np.random.randn(*frame2.shape) + frame2 = self.frame.copy() + frame2.values[:] = np.random.randn(*frame2.shape) - res3 = getattr(self.frame.rolling(window=10), method)(frame2) - exp = DataFrame(dict((k, getattr(self.frame[k].rolling( - window=10), method)(frame2[k])) for k in self.frame)) - tm.assert_frame_equal(res3, exp) - - methods = ['corr', 'cov'] - for meth in methods: - _check(meth) + res3 = getattr(self.frame.rolling(window=10), method)(frame2) + exp = DataFrame(dict((k, getattr(self.frame[k].rolling( + window=10), method)(frame2[k])) for k in self.frame)) + tm.assert_frame_equal(res3, exp) def test_ewmcov(self): self._check_binary_ew('cov') @@ -2417,19 +2477,24 @@ def func(A, B, com, **kwargs): pytest.raises(Exception, func, A, randn(50), 20, min_periods=5) - def test_expanding_apply_args_kwargs(self): + def test_expanding_apply_args_kwargs(self, raw): + def mean_w_arg(x, const): return np.mean(x) + const df = DataFrame(np.random.rand(20, 3)) - expected = df.expanding().apply(np.mean) + 20. + expected = df.expanding().apply(np.mean, raw=raw) + 20. - tm.assert_frame_equal(df.expanding().apply(mean_w_arg, args=(20, )), - expected) - tm.assert_frame_equal(df.expanding().apply(mean_w_arg, - kwargs={'const': 20}), - expected) + result = df.expanding().apply(mean_w_arg, + raw=raw, + args=(20, )) + tm.assert_frame_equal(result, expected) + + result = df.expanding().apply(mean_w_arg, + raw=raw, + kwargs={'const': 20}) + tm.assert_frame_equal(result, expected) def test_expanding_corr(self): A = self.series.dropna() @@ -2539,42 +2604,47 @@ def test_rolling_corr_diff_length(self): result = s1.rolling(window=3, min_periods=2).corr(s2a) tm.assert_series_equal(result, expected) - def test_rolling_functions_window_non_shrinkage(self): + @pytest.mark.parametrize( + 'f', + [ + lambda x: (x.rolling(window=10, min_periods=5) + .cov(x, pairwise=False)), + lambda x: (x.rolling(window=10, min_periods=5) + .corr(x, pairwise=False)), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling( + window=10, min_periods=5).quantile(quantile=0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply( + sum, raw=False), + lambda x: x.rolling(window=10, min_periods=5).apply( + sum, raw=True), + lambda x: x.rolling(win_type='boxcar', + window=10, min_periods=5).mean()]) + def test_rolling_functions_window_non_shrinkage(self, f): # GH 7764 s = Series(range(4)) s_expected = Series(np.nan, index=s.index) df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) - functions = [lambda x: (x.rolling(window=10, min_periods=5) - .cov(x, pairwise=False)), - lambda x: (x.rolling(window=10, min_periods=5) - .corr(x, pairwise=False)), - lambda x: x.rolling(window=10, min_periods=5).max(), - lambda x: x.rolling(window=10, min_periods=5).min(), - lambda x: x.rolling(window=10, min_periods=5).sum(), - lambda x: x.rolling(window=10, min_periods=5).mean(), - lambda x: x.rolling(window=10, min_periods=5).std(), - lambda x: x.rolling(window=10, min_periods=5).var(), - lambda x: x.rolling(window=10, min_periods=5).skew(), - lambda x: x.rolling(window=10, min_periods=5).kurt(), - lambda x: x.rolling( - window=10, min_periods=5).quantile(quantile=0.5), - lambda x: x.rolling(window=10, min_periods=5).median(), - lambda x: x.rolling(window=10, min_periods=5).apply(sum), - lambda x: x.rolling(win_type='boxcar', - window=10, min_periods=5).mean()] - for f in functions: - try: - s_result = f(s) - tm.assert_series_equal(s_result, s_expected) + try: + s_result = f(s) + tm.assert_series_equal(s_result, s_expected) - df_result = f(df) - tm.assert_frame_equal(df_result, df_expected) - except (ImportError): + df_result = f(df) + tm.assert_frame_equal(df_result, df_expected) + except (ImportError): - # scipy needed for rolling_window - continue + # scipy needed for rolling_window + pytest.skip("scipy not available") def test_rolling_functions_window_non_shrinkage_binary(self): @@ -2620,7 +2690,10 @@ def test_moment_functions_zero_length(self): lambda x: x.expanding(min_periods=5).kurt(), lambda x: x.expanding(min_periods=5).quantile(0.5), lambda x: x.expanding(min_periods=5).median(), - lambda x: x.expanding(min_periods=5).apply(sum), + lambda x: x.expanding(min_periods=5).apply( + sum, raw=False), + lambda x: x.expanding(min_periods=5).apply( + sum, raw=True), lambda x: x.rolling(window=10).count(), lambda x: x.rolling(window=10, min_periods=5).cov( x, pairwise=False), @@ -2637,7 +2710,10 @@ def test_moment_functions_zero_length(self): lambda x: x.rolling( window=10, min_periods=5).quantile(0.5), lambda x: x.rolling(window=10, min_periods=5).median(), - lambda x: x.rolling(window=10, min_periods=5).apply(sum), + lambda x: x.rolling(window=10, min_periods=5).apply( + sum, raw=False), + lambda x: x.rolling(window=10, min_periods=5).apply( + sum, raw=True), lambda x: x.rolling(win_type='boxcar', window=10, min_periods=5).mean(), ] @@ -2805,20 +2881,25 @@ def expanding_func(x, min_periods=1, center=False, axis=0): return getattr(exp, func)() self._check_expanding(expanding_func, static_comp, preserve_nan=False) - def test_expanding_apply(self): + def test_expanding_apply(self, raw): def expanding_mean(x, min_periods=1): + exp = x.expanding(min_periods=min_periods) - return exp.apply(lambda x: x.mean()) + result = exp.apply(lambda x: x.mean(), raw=raw) + return result - self._check_expanding(expanding_mean, np.mean) + # TODO(jreback), needed to add preserve_nan=False + # here to make this pass + self._check_expanding(expanding_mean, np.mean, preserve_nan=False) ser = Series([]) - tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) + tm.assert_series_equal(ser, ser.expanding().apply( + lambda x: x.mean(), raw=raw)) # GH 8080 s = Series([None, None, None]) - result = s.expanding(min_periods=0).apply(lambda x: len(x)) + result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw) expected = Series([1., 2., 3.]) tm.assert_series_equal(result, expected) @@ -3057,13 +3138,14 @@ def func(x): expected = g.apply(func) tm.assert_series_equal(result, expected) - def test_rolling_apply(self): + def test_rolling_apply(self, raw): g = self.frame.groupby('A') r = g.rolling(window=4) # reduction - result = r.apply(lambda x: x.sum()) - expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum())) + result = r.apply(lambda x: x.sum(), raw=raw) + expected = g.apply( + lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) tm.assert_frame_equal(result, expected) def test_expanding(self): @@ -3104,13 +3186,14 @@ def func(x): expected = g.apply(func) tm.assert_series_equal(result, expected) - def test_expanding_apply(self): + def test_expanding_apply(self, raw): g = self.frame.groupby('A') r = g.expanding() # reduction - result = r.apply(lambda x: x.sum()) - expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum())) + result = r.apply(lambda x: x.sum(), raw=raw) + expected = g.apply( + lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)) tm.assert_frame_equal(result, expected) @@ -3624,22 +3707,22 @@ def test_ragged_max(self): expected['B'] = [0.0, 1, 2, 3, 4] tm.assert_frame_equal(result, expected) - def test_ragged_apply(self): + def test_ragged_apply(self, raw): df = self.ragged f = lambda x: 1 - result = df.rolling(window='1s', min_periods=1).apply(f) + result = df.rolling(window='1s', min_periods=1).apply(f, raw=raw) expected = df.copy() expected['B'] = 1. tm.assert_frame_equal(result, expected) - result = df.rolling(window='2s', min_periods=1).apply(f) + result = df.rolling(window='2s', min_periods=1).apply(f, raw=raw) expected = df.copy() expected['B'] = 1. tm.assert_frame_equal(result, expected) - result = df.rolling(window='5s', min_periods=1).apply(f) + result = df.rolling(window='5s', min_periods=1).apply(f, raw=raw) expected = df.copy() expected['B'] = 1. tm.assert_frame_equal(result, expected) @@ -3662,8 +3745,14 @@ def test_all(self): expected = er.quantile(0.5) tm.assert_frame_equal(result, expected) - result = r.apply(lambda x: 1) - expected = er.apply(lambda x: 1) + def test_all_apply(self, raw): + + df = self.regular * 2 + er = df.rolling(window=1) + r = df.rolling(window='1s') + + result = r.apply(lambda x: 1, raw=raw) + expected = er.apply(lambda x: 1, raw=raw) tm.assert_frame_equal(result, expected) def test_all2(self):