From 7cb42b059d5c781257063c66a904e17f455f57c5 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 27 Oct 2019 16:25:43 -0700 Subject: [PATCH 01/28] Have self._get_roll_func validate function import from window.pyx --- pandas/core/window/rolling.py | 86 ++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 68eb1f630bfc3..267e67d4d78f8 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -349,36 +349,30 @@ def _center_window(self, result, window) -> np.ndarray: result = np.copy(result[tuple(lead_indexer)]) return result - def _get_roll_func( - self, cfunc: Callable, check_minp: Callable, index: np.ndarray, **kwargs - ) -> Callable: + def _get_roll_func(self, func_name: str) -> Callable: """ Wrap rolling function to check values passed. Parameters ---------- - cfunc : callable + func_name : str Cython function used to calculate rolling statistics - check_minp : callable - function to check minimum period parameter - index : ndarray - used for variable window Returns ------- func : callable """ - - def func(arg, window, min_periods=None, closed=None): - minp = check_minp(min_periods, window) - return cfunc(arg, window, minp, index, closed, **kwargs) - - return func + window_func = getattr(libwindow, func_name, None) + if window_func is None: + raise ValueError( + "we do not support this function " + "in libwindow.{func_name}".format(func_name=func_name) + ) + return window_func def _apply( self, - func: Union[str, Callable], - name: Optional[str] = None, + func: Callable, window: Optional[Union[int, str]] = None, center: Optional[bool] = None, check_minp: Optional[Callable] = None, @@ -391,9 +385,7 @@ def _apply( Parameters ---------- - func : str/callable to apply - name : str, optional - name of this function + func : callable function to apply window : int/str, default to _get_window() window length or offset center : bool, default to self.center @@ -914,13 +906,15 @@ def aggregate(self, func, *args, **kwargs): @Appender(_shared_docs["sum"]) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) - return self._apply("roll_weighted_sum", **kwargs) + window_func = self._get_roll_func("roll_weighted_sum") + return self._apply(window_func, **kwargs) @Substitution(name="window") @Appender(_shared_docs["mean"]) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) - return self._apply("roll_weighted_mean", **kwargs) + window_func = self._get_roll_func("roll_weighted_mean") + return self._apply(window_func, **kwargs) class _Rolling(_Window): @@ -1069,12 +1063,14 @@ def f(arg, window, min_periods, closed): args, kwargs, ) + window_func = self._get_roll_func("roll_generic") - return self._apply(f, func, args=args, kwargs=kwargs, center=False, raw=raw) + return self._apply(window_func, args=args, kwargs=kwargs, center=False, raw=raw) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) - return self._apply("roll_sum", "sum", **kwargs) + window_func = self._get_roll_func("roll_sum") + return self._apply(window_func, **kwargs) _shared_docs["max"] = dedent( """ @@ -1089,7 +1085,8 @@ def sum(self, *args, **kwargs): def max(self, *args, **kwargs): nv.validate_window_func("max", args, kwargs) - return self._apply("roll_max", "max", **kwargs) + window_func = self._get_roll_func("roll_max") + return self._apply(window_func, **kwargs) _shared_docs["min"] = dedent( """ @@ -1130,11 +1127,13 @@ def max(self, *args, **kwargs): def min(self, *args, **kwargs): nv.validate_window_func("min", args, kwargs) - return self._apply("roll_min", "min", **kwargs) + window_func = self._get_roll_func("roll_min") + return self._apply(window_func, **kwargs) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) - return self._apply("roll_mean", "mean", **kwargs) + window_func = self._get_roll_func("roll_mean") + return self._apply(window_func, **kwargs) _shared_docs["median"] = dedent( """ @@ -1174,7 +1173,8 @@ def mean(self, *args, **kwargs): ) def median(self, **kwargs): - return self._apply("roll_median_c", "median", **kwargs) + window_func = self._get_roll_func("roll_median_c") + return self._apply(window_func, **kwargs) _shared_docs["std"] = dedent( """ @@ -1246,10 +1246,9 @@ def f(arg, *args, **kwargs): return _zsqrt( libwindow.roll_var(arg, window, minp, index_as_array, self.closed, ddof) ) + window_func = self._get_roll_func("roll_var") - return self._apply( - f, "std", check_minp=_require_min_periods(1), ddof=ddof, **kwargs - ) + return self._apply(window_func, check_minp=_require_min_periods(1), ddof=ddof, **kwargs) _shared_docs["var"] = dedent( """ @@ -1313,9 +1312,8 @@ def f(arg, *args, **kwargs): def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) - return self._apply( - "roll_var", "var", check_minp=_require_min_periods(1), ddof=ddof, **kwargs - ) + window_func = self._get_roll_func("roll_var") + return self._apply(window_func, check_minp=_require_min_periods(1), ddof=ddof, **kwargs) _shared_docs[ "skew" @@ -1329,9 +1327,8 @@ def var(self, ddof=1, *args, **kwargs): """ def skew(self, **kwargs): - return self._apply( - "roll_skew", "skew", check_minp=_require_min_periods(3), **kwargs - ) + window_func = self._get_roll_func("roll_skew") + return self._apply(window_func, check_minp=_require_min_periods(3), **kwargs) _shared_docs["kurt"] = dedent( """ @@ -1366,9 +1363,8 @@ def skew(self, **kwargs): ) def kurt(self, **kwargs): - return self._apply( - "roll_kurt", "kurt", check_minp=_require_min_periods(4), **kwargs - ) + window_func = self._get_roll_func("roll_kurt") + return self._apply(window_func, check_minp=_require_min_periods(4), **kwargs) _shared_docs["quantile"] = dedent( """ @@ -1451,7 +1447,14 @@ def f(arg, *args, **kwargs): interpolation, ) - return self._apply(f, "quantile", quantile=quantile, **kwargs) + if quantile == 1.0: + window_func = self._get_roll_func("roll_max") + elif quantile == 0.0: + window_func = self._get_roll_func("roll_min") + else: + window_func = self._get_roll_func("roll_quantile") + + return self._apply(window_func, quantile=quantile, **kwargs) _shared_docs[ "cov" @@ -1803,7 +1806,8 @@ def count(self): # different impl for freq counting if self.is_freq_type: - return self._apply("roll_count", "count") + window_func = self._get_roll_func("roll_count") + return self._apply(window_func) return super().count() From dfe6f39c44a0b3e135bc27ada19ff812dc228ded Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 27 Oct 2019 16:41:28 -0700 Subject: [PATCH 02/28] Standardize center arg --- pandas/core/window/rolling.py | 37 ++++++++++++++++------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 267e67d4d78f8..7b5a23d9c257c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -373,8 +373,8 @@ def _get_roll_func(self, func_name: str) -> Callable: def _apply( self, func: Callable, + center: bool, window: Optional[Union[int, str]] = None, - center: Optional[bool] = None, check_minp: Optional[Callable] = None, **kwargs ): @@ -386,9 +386,9 @@ def _apply( Parameters ---------- func : callable function to apply + center : bool window : int/str, default to _get_window() window length or offset - center : bool, default to self.center check_minp : function, default to _use_window **kwargs additional arguments for rolling function and window function @@ -397,9 +397,6 @@ def _apply( ------- y : type of input """ - if center is None: - center = self.center - if check_minp is None: check_minp = _use_window @@ -907,14 +904,14 @@ def aggregate(self, func, *args, **kwargs): def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = self._get_roll_func("roll_weighted_sum") - return self._apply(window_func, **kwargs) + return self._apply(window_func, self.center, **kwargs) @Substitution(name="window") @Appender(_shared_docs["mean"]) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func = self._get_roll_func("roll_weighted_mean") - return self._apply(window_func, **kwargs) + return self._apply(window_func, self.center, **kwargs) class _Rolling(_Window): @@ -1064,13 +1061,13 @@ def f(arg, window, min_periods, closed): kwargs, ) window_func = self._get_roll_func("roll_generic") - - return self._apply(window_func, args=args, kwargs=kwargs, center=False, raw=raw) + # Why do we always pass center=False? + return self._apply(window_func, False, args=args, kwargs=kwargs, raw=raw) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = self._get_roll_func("roll_sum") - return self._apply(window_func, **kwargs) + return self._apply(window_func, self.center, **kwargs) _shared_docs["max"] = dedent( """ @@ -1086,7 +1083,7 @@ def sum(self, *args, **kwargs): def max(self, *args, **kwargs): nv.validate_window_func("max", args, kwargs) window_func = self._get_roll_func("roll_max") - return self._apply(window_func, **kwargs) + return self._apply(window_func, self.center, **kwargs) _shared_docs["min"] = dedent( """ @@ -1128,12 +1125,12 @@ def max(self, *args, **kwargs): def min(self, *args, **kwargs): nv.validate_window_func("min", args, kwargs) window_func = self._get_roll_func("roll_min") - return self._apply(window_func, **kwargs) + return self._apply(window_func, self.center, **kwargs) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func = self._get_roll_func("roll_mean") - return self._apply(window_func, **kwargs) + return self._apply(window_func, self.center, **kwargs) _shared_docs["median"] = dedent( """ @@ -1174,7 +1171,7 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") - return self._apply(window_func, **kwargs) + return self._apply(window_func, self.center, **kwargs) _shared_docs["std"] = dedent( """ @@ -1248,7 +1245,7 @@ def f(arg, *args, **kwargs): ) window_func = self._get_roll_func("roll_var") - return self._apply(window_func, check_minp=_require_min_periods(1), ddof=ddof, **kwargs) + return self._apply(window_func, self.center, check_minp=_require_min_periods(1), ddof=ddof, **kwargs) _shared_docs["var"] = dedent( """ @@ -1313,7 +1310,7 @@ def f(arg, *args, **kwargs): def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = self._get_roll_func("roll_var") - return self._apply(window_func, check_minp=_require_min_periods(1), ddof=ddof, **kwargs) + return self._apply(window_func, self.center, check_minp=_require_min_periods(1), ddof=ddof, **kwargs) _shared_docs[ "skew" @@ -1328,7 +1325,7 @@ def var(self, ddof=1, *args, **kwargs): def skew(self, **kwargs): window_func = self._get_roll_func("roll_skew") - return self._apply(window_func, check_minp=_require_min_periods(3), **kwargs) + return self._apply(window_func, self.center, check_minp=_require_min_periods(3), **kwargs) _shared_docs["kurt"] = dedent( """ @@ -1364,7 +1361,7 @@ def skew(self, **kwargs): def kurt(self, **kwargs): window_func = self._get_roll_func("roll_kurt") - return self._apply(window_func, check_minp=_require_min_periods(4), **kwargs) + return self._apply(window_func, self.center, check_minp=_require_min_periods(4), **kwargs) _shared_docs["quantile"] = dedent( """ @@ -1454,7 +1451,7 @@ def f(arg, *args, **kwargs): else: window_func = self._get_roll_func("roll_quantile") - return self._apply(window_func, quantile=quantile, **kwargs) + return self._apply(window_func, self.center, quantile=quantile, **kwargs) _shared_docs[ "cov" @@ -1807,7 +1804,7 @@ def count(self): # different impl for freq counting if self.is_freq_type: window_func = self._get_roll_func("roll_count") - return self._apply(window_func) + return self._apply(window_func, self.center) return super().count() From c608b4454362a3e3d431d1feb7a058eb833256f3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 27 Oct 2019 17:02:21 -0700 Subject: [PATCH 03/28] always call get_window in _apply --- pandas/core/window/rolling.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 7b5a23d9c257c..e1920cd00995a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -374,7 +374,6 @@ def _apply( self, func: Callable, center: bool, - window: Optional[Union[int, str]] = None, check_minp: Optional[Callable] = None, **kwargs ): @@ -387,8 +386,6 @@ def _apply( ---------- func : callable function to apply center : bool - window : int/str, default to _get_window() - window length or offset check_minp : function, default to _use_window **kwargs additional arguments for rolling function and window function @@ -400,8 +397,8 @@ def _apply( if check_minp is None: check_minp = _use_window - if window is None: - window = self._get_window(**kwargs) + # Returns ndarray if win_type is specified or just an integer + window = self._get_window(**kwargs) blocks, obj = self._create_blocks() block_list = list(blocks) From 45c89c72d53d5b23dfa3288179176699f2733e3d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 27 Oct 2019 19:45:26 -0700 Subject: [PATCH 04/28] move min_period calculation up the stack --- pandas/core/window/common.py | 20 ++++++++++++++++++++ pandas/core/window/rolling.py | 32 ++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 0f2920b3558c9..843dadbf35b94 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -1,5 +1,6 @@ """Common utility functions for rolling operations""" from collections import defaultdict +from typing import Optional import warnings import numpy as np @@ -250,6 +251,25 @@ def _use_window(minp, window): return minp +def _calculate_min_periods(window: int, + min_periods: Optional[int], + num_values: int, + required_min_periods: int, + floor: int): + if min_periods is None: + min_periods = window + else: + min_periods = max(required_min_periods, min_periods) + if min_periods > window: + raise ValueError("min_periods {min_periods} must be <= " + "window {window}".format(min_periods=min_periods, window=window)) + elif min_periods > num_values: + min_periods = num_values + 1 + elif min_periods < 0: + raise ValueError('min_periods must be >= 0') + return max(min_periods, floor) + + def _zsqrt(x): with np.errstate(all="ignore"): result = np.sqrt(x) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e1920cd00995a..d09da1802b639 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -39,6 +39,7 @@ import pandas.core.common as com from pandas.core.index import Index, ensure_index from pandas.core.window.common import ( + _calculate_min_periods, _doc_template, _flex_binary_moment, _GroupByMixin, @@ -374,7 +375,8 @@ def _apply( self, func: Callable, center: bool, - check_minp: Optional[Callable] = None, + require_min_periods: int = 0, + floor: int = 1, **kwargs ): """ @@ -386,7 +388,8 @@ def _apply( ---------- func : callable function to apply center : bool - check_minp : function, default to _use_window + require_min_periods : int + floor: int **kwargs additional arguments for rolling function and window function @@ -394,9 +397,6 @@ def _apply( ------- y : type of input """ - if check_minp is None: - check_minp = _use_window - # Returns ndarray if win_type is specified or just an integer window = self._get_window(**kwargs) @@ -439,18 +439,22 @@ def _apply( additional_nans = np.array([np.NaN] * offset) def calc(x): + x = np.concatenate((x, additional_nans)) + min_periods = _calculate_min_periods(window, self.min_periods, len(x), require_min_periods, floor) return func( - np.concatenate((x, additional_nans)), + x, window, - min_periods=self.min_periods, + min_periods=min_periods, closed=self.closed, ) else: def calc(x): + min_periods = _calculate_min_periods(window, self.min_periods, len(x), + require_min_periods, floor) return func( - x, window, min_periods=self.min_periods, closed=self.closed + x, window, min_periods=min_periods, closed=self.closed ) with np.errstate(all="ignore"): @@ -1059,12 +1063,12 @@ def f(arg, window, min_periods, closed): ) window_func = self._get_roll_func("roll_generic") # Why do we always pass center=False? - return self._apply(window_func, False, args=args, kwargs=kwargs, raw=raw) + return self._apply(window_func, False, floor=0, args=args, kwargs=kwargs, raw=raw) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = self._get_roll_func("roll_sum") - return self._apply(window_func, self.center, **kwargs) + return self._apply(window_func, self.center, floor=0, **kwargs) _shared_docs["max"] = dedent( """ @@ -1242,7 +1246,7 @@ def f(arg, *args, **kwargs): ) window_func = self._get_roll_func("roll_var") - return self._apply(window_func, self.center, check_minp=_require_min_periods(1), ddof=ddof, **kwargs) + return self._apply(window_func, self.center, require_min_periods=1, ddof=ddof, **kwargs) _shared_docs["var"] = dedent( """ @@ -1307,7 +1311,7 @@ def f(arg, *args, **kwargs): def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = self._get_roll_func("roll_var") - return self._apply(window_func, self.center, check_minp=_require_min_periods(1), ddof=ddof, **kwargs) + return self._apply(window_func, self.center, require_min_periods=1, ddof=ddof, **kwargs) _shared_docs[ "skew" @@ -1322,7 +1326,7 @@ def var(self, ddof=1, *args, **kwargs): def skew(self, **kwargs): window_func = self._get_roll_func("roll_skew") - return self._apply(window_func, self.center, check_minp=_require_min_periods(3), **kwargs) + return self._apply(window_func, self.center, require_min_periods=3, **kwargs) _shared_docs["kurt"] = dedent( """ @@ -1358,7 +1362,7 @@ def skew(self, **kwargs): def kurt(self, **kwargs): window_func = self._get_roll_func("roll_kurt") - return self._apply(window_func, self.center, check_minp=_require_min_periods(4), **kwargs) + return self._apply(window_func, self.center, require_min_periods=4, **kwargs) _shared_docs["quantile"] = dedent( """ From c288137b2978447d3a926bd6b6bf0473be375772 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 27 Oct 2019 19:46:41 -0700 Subject: [PATCH 05/28] Remove fetching of window function in _apply --- pandas/core/window/rolling.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d09da1802b639..7483731084be7 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -422,17 +422,6 @@ def _apply( results.append(values.copy()) continue - # if we have a string function name, wrap it - if isinstance(func, str): - cfunc = getattr(libwindow, func, None) - if cfunc is None: - raise ValueError( - "we do not support this function " - "in libwindow.{func}".format(func=func) - ) - - func = self._get_roll_func(cfunc, check_minp, index_as_array, **kwargs) - # calculation function if center: offset = _offset(window, center) From 1d90df6efa6507ee9dd96b110639bfbf0bc3bc9b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 27 Oct 2019 19:48:01 -0700 Subject: [PATCH 06/28] Black --- pandas/core/window/common.py | 20 ++++++++++++-------- pandas/core/window/rolling.py | 30 +++++++++++++++++------------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 843dadbf35b94..e7ee603808adb 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -251,22 +251,26 @@ def _use_window(minp, window): return minp -def _calculate_min_periods(window: int, - min_periods: Optional[int], - num_values: int, - required_min_periods: int, - floor: int): +def _calculate_min_periods( + window: int, + min_periods: Optional[int], + num_values: int, + required_min_periods: int, + floor: int, +): if min_periods is None: min_periods = window else: min_periods = max(required_min_periods, min_periods) if min_periods > window: - raise ValueError("min_periods {min_periods} must be <= " - "window {window}".format(min_periods=min_periods, window=window)) + raise ValueError( + "min_periods {min_periods} must be <= " + "window {window}".format(min_periods=min_periods, window=window) + ) elif min_periods > num_values: min_periods = num_values + 1 elif min_periods < 0: - raise ValueError('min_periods must be >= 0') + raise ValueError("min_periods must be >= 0") return max(min_periods, floor) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 7483731084be7..09383ef8cbf19 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -429,22 +429,18 @@ def _apply( def calc(x): x = np.concatenate((x, additional_nans)) - min_periods = _calculate_min_periods(window, self.min_periods, len(x), require_min_periods, floor) - return func( - x, - window, - min_periods=min_periods, - closed=self.closed, + min_periods = _calculate_min_periods( + window, self.min_periods, len(x), require_min_periods, floor ) + return func(x, window, min_periods=min_periods, closed=self.closed) else: def calc(x): - min_periods = _calculate_min_periods(window, self.min_periods, len(x), - require_min_periods, floor) - return func( - x, window, min_periods=min_periods, closed=self.closed + min_periods = _calculate_min_periods( + window, self.min_periods, len(x), require_min_periods, floor ) + return func(x, window, min_periods=min_periods, closed=self.closed) with np.errstate(all="ignore"): if values.ndim > 1: @@ -1050,9 +1046,12 @@ def f(arg, window, min_periods, closed): args, kwargs, ) + window_func = self._get_roll_func("roll_generic") # Why do we always pass center=False? - return self._apply(window_func, False, floor=0, args=args, kwargs=kwargs, raw=raw) + return self._apply( + window_func, False, floor=0, args=args, kwargs=kwargs, raw=raw + ) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) @@ -1233,9 +1232,12 @@ def f(arg, *args, **kwargs): return _zsqrt( libwindow.roll_var(arg, window, minp, index_as_array, self.closed, ddof) ) + window_func = self._get_roll_func("roll_var") - return self._apply(window_func, self.center, require_min_periods=1, ddof=ddof, **kwargs) + return self._apply( + window_func, self.center, require_min_periods=1, ddof=ddof, **kwargs + ) _shared_docs["var"] = dedent( """ @@ -1300,7 +1302,9 @@ def f(arg, *args, **kwargs): def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = self._get_roll_func("roll_var") - return self._apply(window_func, self.center, require_min_periods=1, ddof=ddof, **kwargs) + return self._apply( + window_func, self.center, require_min_periods=1, ddof=ddof, **kwargs + ) _shared_docs[ "skew" From 20d4490af222158de21289f18699c17783065c7e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 28 Oct 2019 22:11:03 -0700 Subject: [PATCH 07/28] Ensure all passed _apply functions are standardized --- pandas/core/window/rolling.py | 88 +++++++++++------------------------ 1 file changed, 26 insertions(+), 62 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 09383ef8cbf19..2e599d9bde1d3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -3,6 +3,7 @@ similar to how we have a Groupby object. """ from datetime import timedelta +from functools import partial from textwrap import dedent from typing import Callable, List, Optional, Set, Union import warnings @@ -1014,7 +1015,6 @@ def apply(self, func, raw=None, args=(), kwargs={}): kwargs.pop("_level", None) window = self._get_window() offset = _offset(window, self.center) - index_as_array = self._get_index() # TODO: default is for backward compat # change to False in the future @@ -1030,28 +1030,22 @@ def apply(self, func, raw=None, args=(), kwargs={}): ) raw = True - def f(arg, window, min_periods, closed): - minp = _use_window(min_periods, window) + window_func = partial( + self._get_roll_func("roll_generic"), + args=args, + kwargs=kwargs, + raw=raw, + offset=offset, + func=func, + ) + + def apply_func(values, begin, end, min_periods, raw=raw): if not raw: - arg = Series(arg, index=self.obj.index) - return libwindow.roll_generic( - arg, - window, - minp, - index_as_array, - closed, - offset, - func, - raw, - args, - kwargs, - ) + values = Series(values, index=self.obj.index) + return window_func(values, begin, end, min_periods) - window_func = self._get_roll_func("roll_generic") # Why do we always pass center=False? - return self._apply( - window_func, False, floor=0, args=args, kwargs=kwargs, raw=raw - ) + return self._apply(apply_func, False, floor=0) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) @@ -1224,20 +1218,12 @@ def median(self, **kwargs): def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) - window = self._get_window() - index_as_array = self._get_index() - - def f(arg, *args, **kwargs): - minp = _require_min_periods(1)(self.min_periods, window) - return _zsqrt( - libwindow.roll_var(arg, window, minp, index_as_array, self.closed, ddof) - ) - window_func = self._get_roll_func("roll_var") - return self._apply( - window_func, self.center, require_min_periods=1, ddof=ddof, **kwargs - ) + def zsqrt_func(values, begin, end, min_periods): + return _zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) + + return self._apply(zsqrt_func, self.center, require_min_periods=1, **kwargs) _shared_docs["var"] = dedent( """ @@ -1301,10 +1287,8 @@ def f(arg, *args, **kwargs): def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) - window_func = self._get_roll_func("roll_var") - return self._apply( - window_func, self.center, require_min_periods=1, ddof=ddof, **kwargs - ) + window_func = partial(self._get_roll_func("roll_var"), ddof=ddof) + return self._apply(window_func, self.center, require_min_periods=1, **kwargs) _shared_docs[ "skew" @@ -1414,38 +1398,18 @@ def kurt(self, **kwargs): ) def quantile(self, quantile, interpolation="linear", **kwargs): - window = self._get_window() - index_as_array = self._get_index() - - def f(arg, *args, **kwargs): - minp = _use_window(self.min_periods, window) - if quantile == 1.0: - return libwindow.roll_max( - arg, window, minp, index_as_array, self.closed - ) - elif quantile == 0.0: - return libwindow.roll_min( - arg, window, minp, index_as_array, self.closed - ) - else: - return libwindow.roll_quantile( - arg, - window, - minp, - index_as_array, - self.closed, - quantile, - interpolation, - ) - if quantile == 1.0: window_func = self._get_roll_func("roll_max") elif quantile == 0.0: window_func = self._get_roll_func("roll_min") else: - window_func = self._get_roll_func("roll_quantile") + window_func = partial( + self._get_roll_func("roll_quantile"), + quantile=quantile, + interpolation=interpolation, + ) - return self._apply(window_func, self.center, quantile=quantile, **kwargs) + return self._apply(window_func, self.center, **kwargs) _shared_docs[ "cov" From 782209f9a1a4ea39c7efc5210ef85f8b6bcc170b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 29 Oct 2019 21:40:58 -0700 Subject: [PATCH 08/28] Migrate indexers to their own file --- pandas/_libs/window.pyx | 199 -------------------------------- pandas/_libs/window_indexer.pyx | 186 +++++++++++++++++++++++++++++ pandas/core/window/rolling.py | 9 ++ 3 files changed, 195 insertions(+), 199 deletions(-) create mode 100644 pandas/_libs/window_indexer.pyx diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index a2096d389823f..bdac816a936f6 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -96,205 +96,6 @@ def _check_minp(win, minp, N, floor=None): # Physical description: 366 p. # Series: Prentice-Hall Series in Automatic Computation -# ---------------------------------------------------------------------- -# The indexer objects for rolling -# These define start/end indexers to compute offsets - - -cdef class WindowIndexer: - - cdef: - ndarray start, end - int64_t N, minp, win - bint is_variable - - def get_data(self): - return (self.start, self.end, self.N, - self.win, self.minp, - self.is_variable) - - -cdef class MockFixedWindowIndexer(WindowIndexer): - """ - - We are just checking parameters of the indexer, - and returning a consistent API with fixed/variable - indexers. - - Parameters - ---------- - values: ndarray - values data array - win: int64_t - window size - minp: int64_t - min number of obs in a window to consider non-NaN - index: object - index of the values - floor: optional - unit for flooring - left_closed: bint - left endpoint closedness - right_closed: bint - right endpoint closedness - - """ - def __init__(self, ndarray values, int64_t win, int64_t minp, - bint left_closed, bint right_closed, - object index=None, object floor=None): - - assert index is None - self.is_variable = 0 - self.N = len(values) - self.minp = _check_minp(win, minp, self.N, floor=floor) - self.start = np.empty(0, dtype='int64') - self.end = np.empty(0, dtype='int64') - self.win = win - - -cdef class FixedWindowIndexer(WindowIndexer): - """ - create a fixed length window indexer object - that has start & end, that point to offsets in - the index object; these are defined based on the win - arguments - - Parameters - ---------- - values: ndarray - values data array - win: int64_t - window size - minp: int64_t - min number of obs in a window to consider non-NaN - index: object - index of the values - floor: optional - unit for flooring the unit - left_closed: bint - left endpoint closedness - right_closed: bint - right endpoint closedness - - """ - def __init__(self, ndarray values, int64_t win, int64_t minp, - bint left_closed, bint right_closed, - object index=None, object floor=None): - cdef ndarray start_s, start_e, end_s, end_e - - assert index is None - self.is_variable = 0 - self.N = len(values) - self.minp = _check_minp(win, minp, self.N, floor=floor) - - start_s = np.zeros(win, dtype='int64') - start_e = np.arange(win, self.N, dtype='int64') - win + 1 - self.start = np.concatenate([start_s, start_e]) - - end_s = np.arange(win, dtype='int64') + 1 - end_e = start_e + win - self.end = np.concatenate([end_s, end_e]) - self.win = win - - -cdef class VariableWindowIndexer(WindowIndexer): - """ - create a variable length window indexer object - that has start & end, that point to offsets in - the index object; these are defined based on the win - arguments - - Parameters - ---------- - values: ndarray - values data array - win: int64_t - window size - minp: int64_t - min number of obs in a window to consider non-NaN - index: ndarray - index of the values - left_closed: bint - left endpoint closedness - True if the left endpoint is closed, False if open - right_closed: bint - right endpoint closedness - True if the right endpoint is closed, False if open - floor: optional - unit for flooring the unit - """ - def __init__(self, ndarray values, int64_t win, int64_t minp, - bint left_closed, bint right_closed, ndarray index, - object floor=None): - - self.is_variable = 1 - self.N = len(index) - self.minp = _check_minp(win, minp, self.N, floor=floor) - - self.start = np.empty(self.N, dtype='int64') - self.start.fill(-1) - - self.end = np.empty(self.N, dtype='int64') - self.end.fill(-1) - - self.build(index, win, left_closed, right_closed) - - # max window size - self.win = (self.end - self.start).max() - - def build(self, const int64_t[:] index, int64_t win, bint left_closed, - bint right_closed): - - cdef: - ndarray[int64_t] start, end - int64_t start_bound, end_bound, N - Py_ssize_t i, j - - start = self.start - end = self.end - N = self.N - - start[0] = 0 - - # right endpoint is closed - if right_closed: - end[0] = 1 - # right endpoint is open - else: - end[0] = 0 - - with nogil: - - # start is start of slice interval (including) - # end is end of slice interval (not including) - for i in range(1, N): - end_bound = index[i] - start_bound = index[i] - win - - # left endpoint is closed - if left_closed: - start_bound -= 1 - - # advance the start bound until we are - # within the constraint - start[i] = i - for j in range(start[i - 1], i): - if index[j] > start_bound: - start[i] = j - break - - # end bound is previous end - # or current index - if index[end[i - 1]] <= end_bound: - end[i] = i + 1 - else: - end[i] = end[i - 1] - - # right endpoint is open - if not right_closed: - end[i] -= 1 - - def get_window_indexer(values, win, minp, index, closed, floor=None, use_mock=True): """ diff --git a/pandas/_libs/window_indexer.pyx b/pandas/_libs/window_indexer.pyx new file mode 100644 index 0000000000000..266c894fc84bc --- /dev/null +++ b/pandas/_libs/window_indexer.pyx @@ -0,0 +1,186 @@ +# cython: boundscheck=False, wraparound=False, cdivision=True + +import numpy as np +from numpy cimport ndarray, int64_t +cnp.import_array() + +# ---------------------------------------------------------------------- +# The indexer objects for rolling +# These define start/end indexers to compute offsets + + +cpdef class MockFixedWindowIndexer: + """ + + We are just checking parameters of the indexer, + and returning a consistent API with fixed/variable + indexers. + + Parameters + ---------- + values: ndarray + values data array + win: int64_t + window size + minp: int64_t + min number of obs in a window to consider non-NaN + index: object + index of the values + floor: optional + unit for flooring + closed: string + closed behavior + """ + def __init__(self, ndarray values, int64_t win, int64_t minp, + object closed, + object index=None, object floor=None): + + self.start = np.empty(0, dtype='int64') + self.end = np.empty(0, dtype='int64') + + def get_window_bounds(self): + return self.start, self.end + + +cpdef class FixedWindowIndexer: + """ + create a fixed length window indexer object + that has start & end, that point to offsets in + the index object; these are defined based on the win + arguments + + Parameters + ---------- + values: ndarray + values data array + win: int64_t + window size + minp: int64_t + min number of obs in a window to consider non-NaN + index: object + index of the values + floor: optional + unit for flooring the unit + closed: string + closed behavior + """ + def __init__(self, ndarray values, int64_t win, int64_t minp, + object closed, + object index=None, object floor=None): + cdef: + ndarray start_s, start_e, end_s, end_e + int64_t N = len(values) + + start_s = np.zeros(win, dtype='int64') + start_e = np.arange(win, N, dtype='int64') - win + 1 + self.start = np.concatenate([start_s, start_e]) + + end_s = np.arange(win, dtype='int64') + 1 + end_e = start_e + win + self.end = np.concatenate([end_s, end_e]) + + def get_window_bounds(self): + return self.start, self.end + +cpdef class VariableWindowIndexer: + """ + create a variable length window indexer object + that has start & end, that point to offsets in + the index object; these are defined based on the win + arguments + + Parameters + ---------- + values: ndarray + values data array + win: int64_t + window size + minp: int64_t + min number of obs in a window to consider non-NaN + index: ndarray + index of the values + closed: string + closed behavior + floor: optional + unit for flooring the unit + """ + def __init__(self, ndarray values, int64_t win, int64_t minp, + object closed, ndarray index, + object floor=None): + cdef: + bint left_closed = False + bint right_closed = False + int64_t N = len(index) + + # if windows is variable, default is 'right', otherwise default is 'both' + if closed is None: + closed = 'right' if index is not None else 'both' + + if closed in ['right', 'both']: + right_closed = True + + if closed in ['left', 'both']: + left_closed = True + + self.start, self.end = self.build(index, win, left_closed, right_closed, N) + + # TODO: Maybe will need to use this? + # max window size + #self.win = (self.end - self.start).max() + + def build(self, const int64_t[:] index, int64_t win, bint left_closed, + bint right_closed, int64_t N): + + cdef: + ndarray[int64_t] start, end + int64_t start_bound, end_bound, N + Py_ssize_t i, j + + start = np.empty(N, dtype='int64') + start.fill(-1) + end = np.empty(N, dtype='int64') + end.fill(-1) + + start[0] = 0 + + # right endpoint is closed + if right_closed: + end[0] = 1 + # right endpoint is open + else: + end[0] = 0 + + with nogil: + + # start is start of slice interval (including) + # end is end of slice interval (not including) + for i in range(1, N): + end_bound = index[i] + start_bound = index[i] - win + + # left endpoint is closed + if left_closed: + start_bound -= 1 + + # advance the start bound until we are + # within the constraint + start[i] = i + for j in range(start[i - 1], i): + if index[j] > start_bound: + start[i] = j + break + + # end bound is previous end + # or current index + if index[end[i - 1]] <= end_bound: + end[i] = i + 1 + else: + end[i] = end[i - 1] + + # right endpoint is open + if not right_closed: + end[i] -= 1 + return start, end + + def get_window_bounds(self): + return self.start, self.end \ No newline at end of file diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 2e599d9bde1d3..ae864013a8b31 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -11,6 +11,7 @@ import numpy as np import pandas._libs.window as libwindow +import pandas._libs.window_indexer as libwindow_indexer from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -372,6 +373,14 @@ def _get_roll_func(self, func_name: str) -> Callable: ) return window_func + def _get_window_indexer(self, index_as_array): + """ + Return an indexer class that will compute the window start and end bounds + """ + if self.is_freq_type: + return libwindow_indexer.VariableWindowIndexer(index_as_array) + return libwindow_indexer.FixedWindowIndexer() + def _apply( self, func: Callable, From 6b1935e2a8cc00895292650fec84e93f8970cab9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 3 Nov 2019 18:39:07 -0800 Subject: [PATCH 09/28] Integrating indexers --- pandas/_libs/window_indexer.pyx | 24 +++--------------------- pandas/core/window/rolling.py | 13 ++++++++++--- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/window_indexer.pyx b/pandas/_libs/window_indexer.pyx index 266c894fc84bc..a17989cbb45cc 100644 --- a/pandas/_libs/window_indexer.pyx +++ b/pandas/_libs/window_indexer.pyx @@ -22,18 +22,12 @@ cpdef class MockFixedWindowIndexer: values data array win: int64_t window size - minp: int64_t - min number of obs in a window to consider non-NaN index: object index of the values - floor: optional - unit for flooring closed: string closed behavior """ - def __init__(self, ndarray values, int64_t win, int64_t minp, - object closed, - object index=None, object floor=None): + def __init__(self, ndarray values, int64_t win, object closed, object index=None): self.start = np.empty(0, dtype='int64') self.end = np.empty(0, dtype='int64') @@ -55,18 +49,12 @@ cpdef class FixedWindowIndexer: values data array win: int64_t window size - minp: int64_t - min number of obs in a window to consider non-NaN index: object index of the values - floor: optional - unit for flooring the unit closed: string closed behavior """ - def __init__(self, ndarray values, int64_t win, int64_t minp, - object closed, - object index=None, object floor=None): + def __init__(self, ndarray values, int64_t win, object closed, object index=None): cdef: ndarray start_s, start_e, end_s, end_e int64_t N = len(values) @@ -95,18 +83,12 @@ cpdef class VariableWindowIndexer: values data array win: int64_t window size - minp: int64_t - min number of obs in a window to consider non-NaN index: ndarray index of the values closed: string closed behavior - floor: optional - unit for flooring the unit """ - def __init__(self, ndarray values, int64_t win, int64_t minp, - object closed, ndarray index, - object floor=None): + def __init__(self, ndarray values, int64_t win, object closed, ndarray index): cdef: bint left_closed = False bint right_closed = False diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ae864013a8b31..586639995ac21 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -373,13 +373,13 @@ def _get_roll_func(self, func_name: str) -> Callable: ) return window_func - def _get_window_indexer(self, index_as_array): + def _get_window_indexer(self): """ Return an indexer class that will compute the window start and end bounds """ if self.is_freq_type: - return libwindow_indexer.VariableWindowIndexer(index_as_array) - return libwindow_indexer.FixedWindowIndexer() + return libwindow_indexer.VariableWindowIndexer + return libwindow_indexer.FixedWindowIndexer def _apply( self, @@ -413,6 +413,7 @@ def _apply( blocks, obj = self._create_blocks() block_list = list(blocks) index_as_array = self._get_index() + window_indexer = self._get_window_indexer() results = [] exclude = [] # type: List[Scalar] @@ -442,6 +443,9 @@ def calc(x): min_periods = _calculate_min_periods( window, self.min_periods, len(x), require_min_periods, floor ) + start, end = window_indexer.get_window_bounds( + x, window, self.closed, index_as_array + ) return func(x, window, min_periods=min_periods, closed=self.closed) else: @@ -450,6 +454,9 @@ def calc(x): min_periods = _calculate_min_periods( window, self.min_periods, len(x), require_min_periods, floor ) + start, end = window_indexer.get_window_bounds( + x, window, self.closed, index_as_array + ) return func(x, window, min_periods=min_periods, closed=self.closed) with np.errstate(all="ignore"): From c7df9626cacce9ee9a627756f0bfde6a9571c0d9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 3 Nov 2019 21:57:32 -0800 Subject: [PATCH 10/28] Change all cython function signatures --- pandas/_libs/window.pyx | 720 ++++++++++++++++------------------ pandas/core/window/rolling.py | 4 +- 2 files changed, 351 insertions(+), 373 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index bdac816a936f6..b689bbee1081d 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -96,81 +96,21 @@ def _check_minp(win, minp, N, floor=None): # Physical description: 366 p. # Series: Prentice-Hall Series in Automatic Computation -def get_window_indexer(values, win, minp, index, closed, - floor=None, use_mock=True): - """ - return the correct window indexer for the computation - - Parameters - ---------- - values: 1d ndarray - win: integer, window size - minp: integer, minimum periods - index: 1d ndarray, optional - index to the values array - closed: string, default None - {'right', 'left', 'both', 'neither'} - window endpoint closedness. Defaults to 'right' in - VariableWindowIndexer and to 'both' in FixedWindowIndexer - floor: optional - unit for flooring the unit - use_mock: boolean, default True - if we are a fixed indexer, return a mock indexer - instead of the FixedWindow Indexer. This is a type - compat Indexer that allows us to use a standard - code path with all of the indexers. - - - Returns - ------- - tuple of 1d int64 ndarrays of the offsets & data about the window - - """ - - cdef: - bint left_closed = False - bint right_closed = False - - assert closed is None or closed in ['right', 'left', 'both', 'neither'] - - # if windows is variable, default is 'right', otherwise default is 'both' - if closed is None: - closed = 'right' if index is not None else 'both' - - if closed in ['right', 'both']: - right_closed = True - - if closed in ['left', 'both']: - left_closed = True - - if index is not None: - indexer = VariableWindowIndexer(values, win, minp, left_closed, - right_closed, index, floor) - elif use_mock: - indexer = MockFixedWindowIndexer(values, win, minp, left_closed, - right_closed, index, floor) - else: - indexer = FixedWindowIndexer(values, win, minp, left_closed, - right_closed, index, floor) - return indexer.get_data() - # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware -def roll_count(ndarray[float64_t] values, int64_t win, int64_t minp, - object index, object closed): +def roll_count(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp): cdef: float64_t val, count_x = 0.0 - int64_t s, e, nobs, N + int64_t s, e, nobs, N = len(values) Py_ssize_t i, j ndarray[int64_t] start, end ndarray[float64_t] output - start, end, N, win, minp, _ = get_window_indexer(values, win, - minp, index, closed) output = np.empty(N, dtype=float) with nogil: @@ -245,80 +185,76 @@ cdef inline void remove_sum(float64_t val, sum_x[0] = sum_x[0] - val -def roll_sum(ndarray[float64_t] values, int64_t win, int64_t minp, - object index, object closed): +def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp): cdef: - float64_t val, prev_x, sum_x = 0 - int64_t s, e, range_endpoint - int64_t nobs = 0, i, j, N - bint is_variable + float64_t sum_x = 0 + int64_t s, e + int64_t nobs = 0, i, j, N = len(values) ndarray[int64_t] start, end ndarray[float64_t] output - start, end, N, win, minp, is_variable = get_window_indexer(values, win, - minp, index, - closed, - floor=0) output = np.empty(N, dtype=float) - # for performance we are going to iterate - # fixed windows separately, makes the code more complex as we have 2 paths - # but is faster + with nogil: - if is_variable: + for i in range(0, N): + s = start[i] + e = end[i] - # variable window - with nogil: + if i == 0: - for i in range(0, N): - s = start[i] - e = end[i] + # setup + sum_x = 0.0 + nobs = 0 + for j in range(s, e): + add_sum(values[j], &nobs, &sum_x) - if i == 0: + else: - # setup - sum_x = 0.0 - nobs = 0 - for j in range(s, e): - add_sum(values[j], &nobs, &sum_x) + # calculate deletes + for j in range(start[i - 1], s): + remove_sum(values[j], &nobs, &sum_x) - else: + # calculate adds + for j in range(end[i - 1], e): + add_sum(values[j], &nobs, &sum_x) - # calculate deletes - for j in range(start[i - 1], s): - remove_sum(values[j], &nobs, &sum_x) + output[i] = calc_sum(minp, nobs, sum_x) - # calculate adds - for j in range(end[i - 1], e): - add_sum(values[j], &nobs, &sum_x) + return output - output[i] = calc_sum(minp, nobs, sum_x) - else: +def roll_sum_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win): + cdef: + float64_t val, prev_x, sum_x = 0 + int64_t range_endpoint + int64_t nobs = 0, i, N = len(values) + ndarray[float64_t] output - # fixed window + output = np.empty(N, dtype=float) - range_endpoint = int_max(minp, 1) - 1 + range_endpoint = int_max(minp, 1) - 1 - with nogil: + with nogil: - for i in range(0, range_endpoint): - add_sum(values[i], &nobs, &sum_x) - output[i] = NaN + for i in range(0, range_endpoint): + add_sum(values[i], &nobs, &sum_x) + output[i] = NaN - for i in range(range_endpoint, N): - val = values[i] - add_sum(val, &nobs, &sum_x) + for i in range(range_endpoint, N): + val = values[i] + add_sum(val, &nobs, &sum_x) - if i > win - 1: - prev_x = values[i - win] - remove_sum(prev_x, &nobs, &sum_x) + if i > win - 1: + prev_x = values[i - win] + remove_sum(prev_x, &nobs, &sum_x) - output[i] = calc_sum(minp, nobs, sum_x) + output[i] = calc_sum(minp, nobs, sum_x) return output - # ---------------------------------------------------------------------- # Rolling mean @@ -366,77 +302,74 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean(ndarray[float64_t] values, int64_t win, int64_t minp, - object index, object closed): +def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win): cdef: - float64_t val, prev_x, result, sum_x = 0 - int64_t s, e - bint is_variable - Py_ssize_t nobs = 0, i, j, neg_ct = 0, N - ndarray[int64_t] start, end + float64_t val, prev_x, sum_x = 0 + Py_ssize_t nobs = 0, i, neg_ct = 0, N = len(values) ndarray[float64_t] output - start, end, N, win, minp, is_variable = get_window_indexer(values, win, - minp, index, - closed) output = np.empty(N, dtype=float) - # for performance we are going to iterate - # fixed windows separately, makes the code more complex as we have 2 paths - # but is faster + with nogil: + for i in range(minp - 1): + val = values[i] + add_mean(val, &nobs, &sum_x, &neg_ct) + output[i] = NaN - if is_variable: + for i in range(minp - 1, N): + val = values[i] + add_mean(val, &nobs, &sum_x, &neg_ct) - with nogil: + if i > win - 1: + prev_x = values[i - win] + remove_mean(prev_x, &nobs, &sum_x, &neg_ct) - for i in range(0, N): - s = start[i] - e = end[i] + output[i] = calc_mean(minp, nobs, neg_ct, sum_x) - if i == 0: + return output - # setup - sum_x = 0.0 - nobs = 0 - for j in range(s, e): - val = values[j] - add_mean(val, &nobs, &sum_x, &neg_ct) +def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp): + cdef: + float64_t val, sum_x = 0 + int64_t s, e + Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values) + ndarray[float64_t] output - else: + output = np.empty(N, dtype=float) - # calculate deletes - for j in range(start[i - 1], s): - val = values[j] - remove_mean(val, &nobs, &sum_x, &neg_ct) + with nogil: - # calculate adds - for j in range(end[i - 1], e): - val = values[j] - add_mean(val, &nobs, &sum_x, &neg_ct) + for i in range(0, N): + s = start[i] + e = end[i] - output[i] = calc_mean(minp, nobs, neg_ct, sum_x) + if i == 0: - else: + # setup + sum_x = 0.0 + nobs = 0 + for j in range(s, e): + val = values[j] + add_mean(val, &nobs, &sum_x, &neg_ct) - with nogil: - for i in range(minp - 1): - val = values[i] - add_mean(val, &nobs, &sum_x, &neg_ct) - output[i] = NaN + else: - for i in range(minp - 1, N): - val = values[i] - add_mean(val, &nobs, &sum_x, &neg_ct) + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + remove_mean(val, &nobs, &sum_x, &neg_ct) - if i > win - 1: - prev_x = values[i - win] - remove_mean(prev_x, &nobs, &sum_x, &neg_ct) + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + add_mean(val, &nobs, &sum_x, &neg_ct) - output[i] = calc_mean(minp, nobs, neg_ct, sum_x) + output[i] = calc_mean(minp, nobs, neg_ct, sum_x) return output - # ---------------------------------------------------------------------- # Rolling variance @@ -499,8 +432,8 @@ cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, ssqdm_x[0] = 0 -def roll_var(ndarray[float64_t] values, int64_t win, int64_t minp, - object index, object closed, int ddof=1): +def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, + int64_t win, int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -508,94 +441,102 @@ def roll_var(ndarray[float64_t] values, int64_t win, int64_t minp, float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, float64_t val, prev, delta, mean_x_old int64_t s, e - bint is_variable Py_ssize_t i, j, N ndarray[int64_t] start, end ndarray[float64_t] output - start, end, N, win, minp, is_variable = get_window_indexer(values, win, - minp, index, - closed) output = np.empty(N, dtype=float) # Check for windows larger than array, addresses #7297 win = min(win, N) - # for performance we are going to iterate - # fixed windows separately, makes the code more complex as we - # have 2 paths but is faster + with nogil: - if is_variable: + # Over the first window, observations can only be added, never + # removed + for i in range(win): + add_var(values[i], &nobs, &mean_x, &ssqdm_x) + output[i] = calc_var(minp, ddof, nobs, ssqdm_x) - with nogil: + # a part of Welford's method for the online variance-calculation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - for i in range(0, N): + # After the first window, observations can both be added and + # removed + for i in range(win, N): + val = values[i] + prev = values[i - win] - s = start[i] - e = end[i] + if notnan(val): + if prev == prev: - # Over the first window, observations can only be added - # never removed - if i == 0: + # Adding one observation and removing another one + delta = val - prev + mean_x_old = mean_x - for j in range(s, e): - add_var(values[j], &nobs, &mean_x, &ssqdm_x) + mean_x += delta / nobs + ssqdm_x += ((nobs - 1) * val + + (nobs + 1) * prev + - 2 * nobs * mean_x_old) * delta / nobs else: + add_var(val, &nobs, &mean_x, &ssqdm_x) + elif prev == prev: + remove_var(prev, &nobs, &mean_x, &ssqdm_x) - # After the first window, observations can both be added - # and removed + output[i] = calc_var(minp, ddof, nobs, ssqdm_x) - # calculate adds - for j in range(end[i - 1], e): - add_var(values[j], &nobs, &mean_x, &ssqdm_x) + return output - # calculate deletes - for j in range(start[i - 1], s): - remove_var(values[j], &nobs, &mean_x, &ssqdm_x) - output[i] = calc_var(minp, ddof, nobs, ssqdm_x) +def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, + int64_t win, int ddof=1): + """ + Numerically stable implementation using Welford's method. + """ + cdef: + float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, + float64_t val, prev, delta, mean_x_old + int64_t s, e + Py_ssize_t i, j, N + ndarray[int64_t] start, end + ndarray[float64_t] output - else: + output = np.empty(N, dtype=float) + + # Check for windows larger than array, addresses #7297 + win = min(win, N) - with nogil: + with nogil: - # Over the first window, observations can only be added, never - # removed - for i in range(win): - add_var(values[i], &nobs, &mean_x, &ssqdm_x) - output[i] = calc_var(minp, ddof, nobs, ssqdm_x) + for i in range(0, N): - # a part of Welford's method for the online variance-calculation - # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + s = start[i] + e = end[i] - # After the first window, observations can both be added and - # removed - for i in range(win, N): - val = values[i] - prev = values[i - win] + # Over the first window, observations can only be added + # never removed + if i == 0: - if notnan(val): - if prev == prev: + for j in range(s, e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x) - # Adding one observation and removing another one - delta = val - prev - mean_x_old = mean_x + else: - mean_x += delta / nobs - ssqdm_x += ((nobs - 1) * val - + (nobs + 1) * prev - - 2 * nobs * mean_x_old) * delta / nobs + # After the first window, observations can both be added + # and removed - else: - add_var(val, &nobs, &mean_x, &ssqdm_x) - elif prev == prev: - remove_var(prev, &nobs, &mean_x, &ssqdm_x) + # calculate adds + for j in range(end[i - 1], e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x) - output[i] = calc_var(minp, ddof, nobs, ssqdm_x) + # calculate deletes + for j in range(start[i - 1], s): + remove_var(values[j], &nobs, &mean_x, &ssqdm_x) - return output + output[i] = calc_var(minp, ddof, nobs, ssqdm_x) + return output # ---------------------------------------------------------------------- # Rolling skewness @@ -663,78 +604,83 @@ cdef inline void remove_skew(float64_t val, int64_t *nobs, xx[0] = xx[0] - val * val xxx[0] = xxx[0] - val * val * val - -def roll_skew(ndarray[float64_t] values, int64_t win, int64_t minp, - object index, object closed): +def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0 - int64_t nobs = 0, i, j, N + int64_t nobs = 0, i, j, N = len(values) int64_t s, e bint is_variable ndarray[int64_t] start, end ndarray[float64_t] output - start, end, N, win, minp, is_variable = get_window_indexer(values, win, - minp, index, - closed) output = np.empty(N, dtype=float) - if is_variable: + with nogil: + for i in range(minp - 1): + val = values[i] + add_skew(val, &nobs, &x, &xx, &xxx) + output[i] = NaN - with nogil: + for i in range(minp - 1, N): + val = values[i] + add_skew(val, &nobs, &x, &xx, &xxx) - for i in range(0, N): + if i > win - 1: + prev = values[i - win] + remove_skew(prev, &nobs, &x, &xx, &xxx) + + output[i] = calc_skew(minp, nobs, x, xx, xxx) - s = start[i] - e = end[i] + return output - # Over the first window, observations can only be added - # never removed - if i == 0: +def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): + cdef: + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0 + int64_t nobs = 0, i, j, N = len(values) + int64_t s, e + bint is_variable + ndarray[int64_t] start, end + ndarray[float64_t] output - for j in range(s, e): - val = values[j] - add_skew(val, &nobs, &x, &xx, &xxx) + output = np.empty(N, dtype=float) - else: + with nogil: - # After the first window, observations can both be added - # and removed + for i in range(0, N): - # calculate adds - for j in range(end[i - 1], e): - val = values[j] - add_skew(val, &nobs, &x, &xx, &xxx) + s = start[i] + e = end[i] - # calculate deletes - for j in range(start[i - 1], s): - val = values[j] - remove_skew(val, &nobs, &x, &xx, &xxx) + # Over the first window, observations can only be added + # never removed + if i == 0: - output[i] = calc_skew(minp, nobs, x, xx, xxx) + for j in range(s, e): + val = values[j] + add_skew(val, &nobs, &x, &xx, &xxx) - else: + else: - with nogil: - for i in range(minp - 1): - val = values[i] - add_skew(val, &nobs, &x, &xx, &xxx) - output[i] = NaN + # After the first window, observations can both be added + # and removed - for i in range(minp - 1, N): - val = values[i] - add_skew(val, &nobs, &x, &xx, &xxx) + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + add_skew(val, &nobs, &x, &xx, &xxx) - if i > win - 1: - prev = values[i - win] - remove_skew(prev, &nobs, &x, &xx, &xxx) + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + remove_skew(val, &nobs, &x, &xx, &xxx) - output[i] = calc_skew(minp, nobs, x, xx, xxx) + output[i] = calc_skew(minp, nobs, x, xx, xxx) return output - # ---------------------------------------------------------------------- # Rolling kurtosis @@ -807,70 +753,73 @@ cdef inline void remove_kurt(float64_t val, int64_t *nobs, xxx[0] = xxx[0] - val * val * val xxxx[0] = xxxx[0] - val * val * val * val - -def roll_kurt(ndarray[float64_t] values, int64_t win, int64_t minp, - object index, object closed): +def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 int64_t nobs = 0, i, j, N int64_t s, e - bint is_variable ndarray[int64_t] start, end ndarray[float64_t] output - start, end, N, win, minp, is_variable = get_window_indexer(values, win, - minp, index, - closed) output = np.empty(N, dtype=float) - if is_variable: + with nogil: - with nogil: + for i in range(minp - 1): + add_kurt(values[i], &nobs, &x, &xx, &xxx, &xxxx) + output[i] = NaN - for i in range(0, N): + for i in range(minp - 1, N): + add_kurt(values[i], &nobs, &x, &xx, &xxx, &xxxx) - s = start[i] - e = end[i] + if i > win - 1: + prev = values[i - win] + remove_kurt(prev, &nobs, &x, &xx, &xxx, &xxxx) - # Over the first window, observations can only be added - # never removed - if i == 0: + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx) - for j in range(s, e): - add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + return output - else: +def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): + cdef: + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 + int64_t nobs = 0, i, j, N = len(values) + ndarray[int64_t] start, end + ndarray[float64_t] output - # After the first window, observations can both be added - # and removed + output = np.empty(N, dtype=float) - # calculate adds - for j in range(end[i - 1], e): - add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + with nogil: - # calculate deletes - for j in range(start[i - 1], s): - remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + for i in range(0, N): - output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx) + s = start[i] + e = end[i] - else: + # Over the first window, observations can only be added + # never removed + if i == 0: - with nogil: + for j in range(s, e): + add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) - for i in range(minp - 1): - add_kurt(values[i], &nobs, &x, &xx, &xxx, &xxxx) - output[i] = NaN + else: - for i in range(minp - 1, N): - add_kurt(values[i], &nobs, &x, &xx, &xxx, &xxxx) + # After the first window, observations can both be added + # and removed - if i > win - 1: - prev = values[i - win] - remove_kurt(prev, &nobs, &x, &xx, &xxx, &xxxx) + # calculate adds + for j in range(end[i - 1], e): + add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) - output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx) + # calculate deletes + for j in range(start[i - 1], s): + remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx) return output @@ -879,8 +828,8 @@ def roll_kurt(ndarray[float64_t] values, int64_t win, int64_t minp, # Rolling median, min, max -def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, - object index, object closed): +def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win): cdef: float64_t val, res, prev bint err = 0, is_variable @@ -894,10 +843,6 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs - start, end, N, win, minp, is_variable = get_window_indexer( - values, win, - minp, index, closed, - use_mock=False) output = np.empty(N, dtype=float) if win == 0: @@ -1012,8 +957,8 @@ cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, return result -def roll_max(ndarray[numeric] values, int64_t win, int64_t minp, - object index, object closed): +def roll_max_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1029,11 +974,10 @@ def roll_max(ndarray[numeric] values, int64_t win, int64_t minp, make the interval closed on the right, left, both or neither endpoints """ - return _roll_min_max(values, win, minp, index, closed=closed, is_max=1) + return _roll_min_max_fixed(values, start, end, minp, win, is_max=1) -def roll_min(ndarray[numeric] values, int64_t win, int64_t minp, - object index, object closed): +def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1045,43 +989,55 @@ def roll_min(ndarray[numeric] values, int64_t win, int64_t minp, is below this, output a NaN index: ndarray, optional index for window computation + closed: 'right', 'left', 'both', 'neither' + make the interval closed on the right, left, + both or neither endpoints """ - return _roll_min_max(values, win, minp, index, is_max=0, closed=closed) + return _roll_min_max_variable(values, start, end, minp, is_max=1) -cdef _roll_min_max(ndarray[numeric] values, int64_t win, int64_t minp, - object index, object closed, bint is_max): +def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win): """ - Moving min/max of 1d array of any numeric type along axis=0 - ignoring NaNs. + Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values: numpy array + window: int, size of rolling window + minp: if number of observations in window + is below this, output a NaN + index: ndarray, optional + index for window computation """ - cdef: - ndarray[int64_t] starti, endi - int64_t N - bint is_variable + return _roll_min_max_fixed(values, start, end, minp, win, is_max=0) - starti, endi, N, win, minp, is_variable = get_window_indexer( - values, win, - minp, index, closed) - if is_variable: - return _roll_min_max_variable(values, starti, endi, N, win, minp, - is_max) - else: - return _roll_min_max_fixed(values, N, win, minp, is_max) +def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): + """ + Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values: numpy array + window: int, size of rolling window + minp: if number of observations in window + is below this, output a NaN + index: ndarray, optional + index for window computation + """ + return _roll_min_max_variable(values, start, end, minp, is_max=0) cdef _roll_min_max_variable(ndarray[numeric] values, ndarray[int64_t] starti, ndarray[int64_t] endi, - int64_t N, - int64_t win, int64_t minp, bint is_max): cdef: numeric ai int64_t i, close_offset, curr_win_size - Py_ssize_t nobs = 0 + Py_ssize_t nobs = 0, N = len(values) deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute ndarray[float64_t, ndim=1] output @@ -1156,15 +1112,16 @@ cdef _roll_min_max_variable(ndarray[numeric] values, cdef _roll_min_max_fixed(ndarray[numeric] values, - int64_t N, - int64_t win, + ndarray[int64_t] starti, + ndarray[int64_t] endi, int64_t minp, + int64_t win, bint is_max): cdef: numeric ai bint should_replace int64_t i, removed, window_i, - Py_ssize_t nobs = 0 + Py_ssize_t nobs = 0, N = len(values) int64_t* death numeric* ring numeric* minvalue @@ -1260,8 +1217,8 @@ interpolation_types = { } -def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, - int64_t minp, object index, object closed, +def roll_quantile(ndarray[float64_t, cast=True] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win, float64_t quantile, str interpolation): """ O(N log(window)) implementation using skip list @@ -1289,10 +1246,6 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs - start, end, N, win, minp, is_variable = get_window_indexer( - values, win, - minp, index, closed, - use_mock=False) output = np.empty(N, dtype=float) if win == 0: @@ -1378,11 +1331,11 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, return output - -def roll_generic(object obj, - int64_t win, int64_t minp, object index, object closed, - int offset, object func, bint raw, - object args, object kwargs): +def roll_generic_fixed(object obj, + ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win, + int offset, object func, bint raw, + object args, object kwargs): cdef: ndarray[float64_t] output, counts, bufarr ndarray[float64_t, cast=True] arr @@ -1403,36 +1356,13 @@ def roll_generic(object obj, if not arr.flags.c_contiguous: arr = arr.copy('C') - counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float), - np.array([0.] * offset)]), - win, minp, index, closed)[offset:] - - start, end, N, win, minp, is_variable = get_window_indexer(arr, win, - minp, index, - closed, - floor=0) + counts = roll_sum_fixed(np.concatenate([np.isfinite(arr).astype(float), + np.array([0.] * offset)]), + start, end, minp, win)[offset:] output = np.empty(N, dtype=float) - if is_variable: - # variable window arr or series - - if offset != 0: - raise ValueError("unable to roll_generic with a non-zero offset") - - for i in range(0, N): - s = start[i] - e = end[i] - - if counts[i] >= minp: - if raw: - output[i] = func(arr[s:e], *args, **kwargs) - else: - output[i] = func(obj.iloc[s:e], *args, **kwargs) - else: - output[i] = NaN - - elif not raw: + if not raw: # series for i in range(N): if counts[i] >= minp: @@ -1475,6 +1405,54 @@ def roll_generic(object obj, return output +def roll_generic_variable(object obj, + ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, + int offset, object func, bint raw, + object args, object kwargs): + cdef: + ndarray[float64_t] output, counts, bufarr + ndarray[float64_t, cast=True] arr + float64_t *buf + float64_t *oldbuf + int64_t nobs = 0, i, j, s, e, N + bint is_variable + ndarray[int64_t] start, end + + n = len(obj) + if n == 0: + return obj + + arr = np.asarray(obj) + + # ndarray input + if raw: + if not arr.flags.c_contiguous: + arr = arr.copy('C') + + counts = roll_sum_variable(np.concatenate([np.isfinite(arr).astype(float), + np.array([0.] * offset)]), + start, end, minp)[offset:] + + output = np.empty(N, dtype=float) + + if offset != 0: + raise ValueError("unable to roll_generic with a non-zero offset") + + for i in range(0, N): + s = start[i] + e = end[i] + + if counts[i] >= minp: + if raw: + output[i] = func(arr[s:e], *args, **kwargs) + else: + output[i] = func(obj.iloc[s:e], *args, **kwargs) + else: + output[i] = NaN + + return output + # ---------------------------------------------------------------------- # Rolling sum and mean for weighted window diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 586639995ac21..99aee212582b7 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -446,7 +446,7 @@ def calc(x): start, end = window_indexer.get_window_bounds( x, window, self.closed, index_as_array ) - return func(x, window, min_periods=min_periods, closed=self.closed) + return func(x, start, end, min_periods) else: @@ -457,7 +457,7 @@ def calc(x): start, end = window_indexer.get_window_bounds( x, window, self.closed, index_as_array ) - return func(x, window, min_periods=min_periods, closed=self.closed) + return func(x, start, end, min_periods) with np.errstate(all="ignore"): if values.ndim > 1: From 9cc2b03f109d708755a5b9755166b8f7523f92cb Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 4 Nov 2019 22:29:24 -0800 Subject: [PATCH 11/28] Add function to get variable vs fixed function --- pandas/core/window/rolling.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 99aee212582b7..33e27f979d59e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -373,6 +373,16 @@ def _get_roll_func(self, func_name: str) -> Callable: ) return window_func + def _get_cython_func_type(self, func): + """ + Return a variable or fixed cython function type. + + Variable algorithms do not use window while fixed do. + """ + if self.is_freq_type: + return self._get_roll_func("{}_variable".format(func)) + return partial(self._get_roll_func("{}_fixed".format(func)), win=self.window) + def _get_window_indexer(self): """ Return an indexer class that will compute the window start and end bounds @@ -1047,7 +1057,7 @@ def apply(self, func, raw=None, args=(), kwargs={}): raw = True window_func = partial( - self._get_roll_func("roll_generic"), + self._get_cython_func_type("roll_generic"), args=args, kwargs=kwargs, raw=raw, @@ -1065,7 +1075,7 @@ def apply_func(values, begin, end, min_periods, raw=raw): def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) - window_func = self._get_roll_func("roll_sum") + window_func = self._get_cython_func_type("roll_sum") return self._apply(window_func, self.center, floor=0, **kwargs) _shared_docs["max"] = dedent( @@ -1081,7 +1091,7 @@ def sum(self, *args, **kwargs): def max(self, *args, **kwargs): nv.validate_window_func("max", args, kwargs) - window_func = self._get_roll_func("roll_max") + window_func = self._get_cython_func_type("roll_max") return self._apply(window_func, self.center, **kwargs) _shared_docs["min"] = dedent( @@ -1123,12 +1133,12 @@ def max(self, *args, **kwargs): def min(self, *args, **kwargs): nv.validate_window_func("min", args, kwargs) - window_func = self._get_roll_func("roll_min") + window_func = self._get_cython_func_type("roll_min") return self._apply(window_func, self.center, **kwargs) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) - window_func = self._get_roll_func("roll_mean") + window_func = self._get_cython_func_type("roll_mean") return self._apply(window_func, self.center, **kwargs) _shared_docs["median"] = dedent( @@ -1170,6 +1180,7 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") + window_func = partial(window_func, win=self.window) return self._apply(window_func, self.center, **kwargs) _shared_docs["std"] = dedent( @@ -1234,7 +1245,7 @@ def median(self, **kwargs): def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) - window_func = self._get_roll_func("roll_var") + window_func = self._get_cython_func_type("roll_var") def zsqrt_func(values, begin, end, min_periods): return _zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) @@ -1303,7 +1314,7 @@ def zsqrt_func(values, begin, end, min_periods): def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) - window_func = partial(self._get_roll_func("roll_var"), ddof=ddof) + window_func = partial(self._get_cython_func_type("roll_var"), ddof=ddof) return self._apply(window_func, self.center, require_min_periods=1, **kwargs) _shared_docs[ @@ -1318,7 +1329,7 @@ def var(self, ddof=1, *args, **kwargs): """ def skew(self, **kwargs): - window_func = self._get_roll_func("roll_skew") + window_func = self._get_cython_func_type("roll_skew") return self._apply(window_func, self.center, require_min_periods=3, **kwargs) _shared_docs["kurt"] = dedent( @@ -1354,7 +1365,7 @@ def skew(self, **kwargs): ) def kurt(self, **kwargs): - window_func = self._get_roll_func("roll_kurt") + window_func = self._get_cython_func_type("roll_kurt") return self._apply(window_func, self.center, require_min_periods=4, **kwargs) _shared_docs["quantile"] = dedent( @@ -1415,9 +1426,9 @@ def kurt(self, **kwargs): def quantile(self, quantile, interpolation="linear", **kwargs): if quantile == 1.0: - window_func = self._get_roll_func("roll_max") + window_func = self._get_cython_func_type("roll_max") elif quantile == 0.0: - window_func = self._get_roll_func("roll_min") + window_func = self._get_cython_func_type("roll_min") else: window_func = partial( self._get_roll_func("roll_quantile"), From ef09f13c4e3e2af200dcd5a4a428222195359b0f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 5 Nov 2019 21:24:24 -0800 Subject: [PATCH 12/28] Remove is_variable cdef variable --- pandas/_libs/window.pyx | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 75db42554b2c4..23224e5edff15 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -609,7 +609,6 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[i float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N = len(values) int64_t s, e - bint is_variable int64_t[:] start, end ndarray[float64_t] output @@ -639,7 +638,6 @@ def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarra float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N = len(values) int64_t s, e - bint is_variable ndarray[int64_t] start, end ndarray[float64_t] output @@ -830,7 +828,7 @@ def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int int64_t minp, int64_t win): cdef: float64_t val, res, prev - bint err = 0, is_variable + bint err = 0 int ret = 0 skiplist_t *sl Py_ssize_t i, j @@ -1226,7 +1224,6 @@ def roll_quantile(ndarray[float64_t, cast=True] values, ndarray[int64_t] start, skiplist_t *skiplist int64_t nobs = 0, i, j, s, e, N Py_ssize_t idx - bint is_variable int64_t[:] start, end ndarray[float64_t] output float64_t vlow, vhigh @@ -1340,7 +1337,6 @@ def roll_generic_fixed(object obj, float64_t *buf float64_t *oldbuf int64_t nobs = 0, i, j, s, e, N - bint is_variable int64_t[:] start, end n = len(obj) @@ -1414,7 +1410,6 @@ def roll_generic_variable(object obj, float64_t *buf float64_t *oldbuf int64_t nobs = 0, i, j, s, e, N - bint is_variable ndarray[int64_t] start, end n = len(obj) From 1195461110a4e0edaabc692ba56c2b72c3ba037c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 5 Nov 2019 21:41:18 -0800 Subject: [PATCH 13/28] Fix some cython compilation --- pandas/_libs/window.pyx | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 23224e5edff15..2106ed7a80d4e 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -108,7 +108,6 @@ def roll_count(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_ float64_t val, count_x = 0.0 int64_t s, e, nobs, N = len(values) Py_ssize_t i, j - int64_t[:] start, end ndarray[float64_t] output output = np.empty(N, dtype=float) @@ -189,7 +188,6 @@ def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray float64_t sum_x = 0 int64_t s, e int64_t nobs = 0, i, j, N = len(values) - int64_t[:] start, end ndarray[float64_t] output output = np.empty(N, dtype=float) @@ -439,8 +437,7 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[in float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, float64_t val, prev, delta, mean_x_old int64_t s, e - Py_ssize_t i, j, N - int64_t[:] start, end + Py_ssize_t i, j, N = len(values) ndarray[float64_t] output output = np.empty(N, dtype=float) @@ -496,8 +493,7 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, float64_t val, prev, delta, mean_x_old int64_t s, e - Py_ssize_t i, j, N - ndarray[int64_t] start, end + Py_ssize_t i, j, N = len(values) ndarray[float64_t] output output = np.empty(N, dtype=float) @@ -609,7 +605,6 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[i float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N = len(values) int64_t s, e - int64_t[:] start, end ndarray[float64_t] output output = np.empty(N, dtype=float) @@ -638,7 +633,6 @@ def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarra float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N = len(values) int64_t s, e - ndarray[int64_t] start, end ndarray[float64_t] output output = np.empty(N, dtype=float) @@ -754,9 +748,8 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[i cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 - int64_t nobs = 0, i, j, N + int64_t nobs = 0, i, j, N = len(values) int64_t s, e - int64_t[:] start, end ndarray[float64_t] output output = np.empty(N, dtype=float) @@ -782,8 +775,7 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarra cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 - int64_t nobs = 0, i, j, N = len(values) - ndarray[int64_t] start, end + int64_t nobs = 0, i, j, s, e, N = len(values) ndarray[float64_t] output output = np.empty(N, dtype=float) @@ -832,9 +824,8 @@ def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int int ret = 0 skiplist_t *sl Py_ssize_t i, j - int64_t nobs = 0, N, s, e + int64_t nobs = 0, N = len(values), s, e int midpoint - int64_t[:] start, end ndarray[float64_t] output # we use the Fixed/Variable Indexer here as the @@ -1222,9 +1213,8 @@ def roll_quantile(ndarray[float64_t, cast=True] values, ndarray[int64_t] start, cdef: float64_t val, prev, midpoint, idx_with_fraction skiplist_t *skiplist - int64_t nobs = 0, i, j, s, e, N + int64_t nobs = 0, i, j, s, e, N = len(values) Py_ssize_t idx - int64_t[:] start, end ndarray[float64_t] output float64_t vlow, vhigh InterpolationType interpolation_type @@ -1336,8 +1326,7 @@ def roll_generic_fixed(object obj, ndarray[float64_t, cast=True] arr float64_t *buf float64_t *oldbuf - int64_t nobs = 0, i, j, s, e, N - int64_t[:] start, end + int64_t nobs = 0, i, j, s, e, N = len(start) n = len(obj) if n == 0: @@ -1409,8 +1398,7 @@ def roll_generic_variable(object obj, ndarray[float64_t, cast=True] arr float64_t *buf float64_t *oldbuf - int64_t nobs = 0, i, j, s, e, N - ndarray[int64_t] start, end + int64_t nobs = 0, i, j, s, e, N = len(start) n = len(obj) if n == 0: From 71e08da9ff0c5ae24cb606f121bfc129638ae3c1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 5 Nov 2019 23:07:46 -0800 Subject: [PATCH 14/28] Ensure window_indexer.pyx gets compiled by cython --- pandas/_libs/window_indexer.pyx | 9 ++++----- pandas/core/window/rolling.py | 2 -- setup.py | 2 ++ 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/window_indexer.pyx b/pandas/_libs/window_indexer.pyx index a17989cbb45cc..891c4dda2d669 100644 --- a/pandas/_libs/window_indexer.pyx +++ b/pandas/_libs/window_indexer.pyx @@ -2,14 +2,13 @@ import numpy as np from numpy cimport ndarray, int64_t -cnp.import_array() # ---------------------------------------------------------------------- # The indexer objects for rolling # These define start/end indexers to compute offsets -cpdef class MockFixedWindowIndexer: +class MockFixedWindowIndexer: """ We are just checking parameters of the indexer, @@ -36,7 +35,7 @@ cpdef class MockFixedWindowIndexer: return self.start, self.end -cpdef class FixedWindowIndexer: +class FixedWindowIndexer: """ create a fixed length window indexer object that has start & end, that point to offsets in @@ -70,7 +69,7 @@ cpdef class FixedWindowIndexer: def get_window_bounds(self): return self.start, self.end -cpdef class VariableWindowIndexer: +class VariableWindowIndexer: """ create a variable length window indexer object that has start & end, that point to offsets in @@ -115,7 +114,7 @@ cpdef class VariableWindowIndexer: cdef: ndarray[int64_t] start, end - int64_t start_bound, end_bound, N + int64_t start_bound, end_bound Py_ssize_t i, j start = np.empty(N, dtype='int64') diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 33e27f979d59e..8e86273bfde79 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -46,9 +46,7 @@ _flex_binary_moment, _GroupByMixin, _offset, - _require_min_periods, _shared_docs, - _use_window, _zsqrt, ) diff --git a/setup.py b/setup.py index 3dd38bdb6adbb..70dadc4637a50 100755 --- a/setup.py +++ b/setup.py @@ -348,6 +348,7 @@ class CheckSDist(sdist_class): "pandas/_libs/tslibs/resolution.pyx", "pandas/_libs/tslibs/parsing.pyx", "pandas/_libs/tslibs/tzconversion.pyx", + "pandas/_libs/window_indexer.pyx", "pandas/_libs/writers.pyx", "pandas/io/sas/sas.pyx", ] @@ -687,6 +688,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.testing": {"pyxfile": "_libs/testing"}, "_libs.window": {"pyxfile": "_libs/window", "language": "c++", "suffix": ".cpp"}, + "_libs.window_indexer": {"pyxfile": "_libs/window_indexer"}, "_libs.writers": {"pyxfile": "_libs/writers"}, "io.sas._sas": {"pyxfile": "io/sas/sas"}, "io.msgpack._packer": { From 4c1915aec18a9ef90c19d29b68e0070337481335 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 10 Nov 2019 22:48:16 -0800 Subject: [PATCH 15/28] incremental progress on tests --- pandas/core/window/rolling.py | 72 +++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ee5f163f5dde8..41529f4132299 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -41,11 +41,13 @@ import pandas.core.common as com from pandas.core.index import Index, ensure_index from pandas.core.window.common import ( + WindowGroupByMixin, _calculate_min_periods, _doc_template, _flex_binary_moment, _offset, _shared_docs, + _use_window, _zsqrt, ) @@ -411,6 +413,7 @@ def _apply( center: bool, require_min_periods: int = 0, floor: int = 1, + is_weighted: bool = False, **kwargs ): """ @@ -424,6 +427,7 @@ def _apply( center : bool require_min_periods : int floor: int + is_weighted: bool **kwargs additional arguments for rolling function and window function @@ -458,30 +462,26 @@ def _apply( continue # calculation function - if center: - offset = _offset(window, center) - additional_nans = np.array([np.NaN] * offset) + offset = _offset(window, center) if center else 0 + additional_nans = np.array([np.nan] * offset) + + if not is_weighted: def calc(x): x = np.concatenate((x, additional_nans)) min_periods = _calculate_min_periods( window, self.min_periods, len(x), require_min_periods, floor ) - start, end = window_indexer.get_window_bounds( + start, end = window_indexer( x, window, self.closed, index_as_array - ) + ).get_window_bounds() return func(x, start, end, min_periods) else: def calc(x): - min_periods = _calculate_min_periods( - window, self.min_periods, len(x), require_min_periods, floor - ) - start, end = window_indexer.get_window_bounds( - x, window, self.closed, index_as_array - ) - return func(x, start, end, min_periods) + x = np.concatenate((x, additional_nans)) + return func(x, window, self.min_periods) with np.errstate(all="ignore"): if values.ndim > 1: @@ -992,8 +992,8 @@ def _get_window( # GH #15662. `False` makes symmetric window, rather than periodic. return sig.get_window(win_type, window, False).astype(float) - def _get_roll_func( - self, cfunc: Callable, check_minp: Callable, index: np.ndarray, **kwargs + def _get_weighted_roll_func( + self, cfunc: Callable, check_minp: Callable, **kwargs ) -> Callable: def func(arg, window, min_periods=None, closed=None): minp = check_minp(min_periods, len(window)) @@ -1068,20 +1068,26 @@ def aggregate(self, func, *args, **kwargs): def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = self._get_roll_func("roll_weighted_sum") - return self._apply(window_func, self.center, **kwargs) + window_func = self._get_weighted_roll_func(window_func, _use_window) + return self._apply(window_func, center=self.center, is_weighted=True, **kwargs) @Substitution(name="window") @Appender(_shared_docs["mean"]) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func = self._get_roll_func("roll_weighted_mean") - return self._apply(window_func, self.center, **kwargs) + window_func = self._get_weighted_roll_func(window_func, _use_window) + return self._apply(window_func, center=self.center, is_weighted=True, **kwargs) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @Appender(_shared_docs["var"]) def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) - return self._apply("roll_weighted_var", ddof=ddof, **kwargs) + window_func = self._get_roll_func("roll_weighted_var") + window_func = partial( + self._get_weighted_roll_func(window_func, _use_window), ddof=ddof + ) + return self._apply(window_func, center=self.center, is_weighted=True, **kwargs) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @Appender(_shared_docs["std"]) @@ -1234,12 +1240,12 @@ def apply_func(values, begin, end, min_periods, raw=raw): return window_func(values, begin, end, min_periods) # Why do we always pass center=False? - return self._apply(apply_func, False, floor=0) + return self._apply(apply_func, center=False, floor=0) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = self._get_cython_func_type("roll_sum") - return self._apply(window_func, self.center, floor=0, **kwargs) + return self._apply(window_func, center=self.center, floor=0, **kwargs) _shared_docs["max"] = dedent( """ @@ -1255,7 +1261,7 @@ def sum(self, *args, **kwargs): def max(self, *args, **kwargs): nv.validate_window_func("max", args, kwargs) window_func = self._get_cython_func_type("roll_max") - return self._apply(window_func, self.center, **kwargs) + return self._apply(window_func, center=self.center, **kwargs) _shared_docs["min"] = dedent( """ @@ -1297,12 +1303,12 @@ def max(self, *args, **kwargs): def min(self, *args, **kwargs): nv.validate_window_func("min", args, kwargs) window_func = self._get_cython_func_type("roll_min") - return self._apply(window_func, self.center, **kwargs) + return self._apply(window_func, center=self.center, **kwargs) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func = self._get_cython_func_type("roll_mean") - return self._apply(window_func, self.center, **kwargs) + return self._apply(window_func, center=self.center, **kwargs) _shared_docs["median"] = dedent( """ @@ -1344,7 +1350,7 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") window_func = partial(window_func, win=self.window) - return self._apply(window_func, self.center, **kwargs) + return self._apply(window_func, center=self.center, **kwargs) def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) @@ -1353,12 +1359,16 @@ def std(self, ddof=1, *args, **kwargs): def zsqrt_func(values, begin, end, min_periods): return _zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) - return self._apply(zsqrt_func, self.center, require_min_periods=1, **kwargs) + return self._apply( + zsqrt_func, center=self.center, require_min_periods=1, **kwargs + ) def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = partial(self._get_cython_func_type("roll_var"), ddof=ddof) - return self._apply(window_func, self.center, require_min_periods=1, **kwargs) + return self._apply( + window_func, center=self.center, require_min_periods=1, **kwargs + ) _shared_docs[ "skew" @@ -1373,7 +1383,9 @@ def var(self, ddof=1, *args, **kwargs): def skew(self, **kwargs): window_func = self._get_cython_func_type("roll_skew") - return self._apply(window_func, self.center, require_min_periods=3, **kwargs) + return self._apply( + window_func, center=self.center, require_min_periods=3, **kwargs + ) _shared_docs["kurt"] = dedent( """ @@ -1409,7 +1421,9 @@ def skew(self, **kwargs): def kurt(self, **kwargs): window_func = self._get_cython_func_type("roll_kurt") - return self._apply(window_func, self.center, require_min_periods=4, **kwargs) + return self._apply( + window_func, center=self.center, require_min_periods=4, **kwargs + ) _shared_docs["quantile"] = dedent( """ @@ -1479,7 +1493,7 @@ def quantile(self, quantile, interpolation="linear", **kwargs): interpolation=interpolation, ) - return self._apply(window_func, self.center, **kwargs) + return self._apply(window_func, center=self.center, **kwargs) _shared_docs[ "cov" @@ -1835,7 +1849,7 @@ def count(self): # different impl for freq counting if self.is_freq_type: window_func = self._get_roll_func("roll_count") - return self._apply(window_func, self.center) + return self._apply(window_func, center=self.center) return super().count() From 045a2859f8fc542e0b6f9ccae1437f3ca5115b4d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Nov 2019 23:58:41 -0800 Subject: [PATCH 16/28] Fix more tests --- pandas/core/window/rolling.py | 64 +++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 41529f4132299..f2089d878637d 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -397,7 +397,9 @@ def _get_cython_func_type(self, func): """ if self.is_freq_type: return self._get_roll_func("{}_variable".format(func)) - return partial(self._get_roll_func("{}_fixed".format(func)), win=self.window) + return partial( + self._get_roll_func("{}_fixed".format(func)), win=self._get_window() + ) def _get_window_indexer(self): """ @@ -414,6 +416,7 @@ def _apply( require_min_periods: int = 0, floor: int = 1, is_weighted: bool = False, + name: Optional[str] = None, **kwargs ): """ @@ -427,7 +430,9 @@ def _apply( center : bool require_min_periods : int floor: int - is_weighted: bool + is_weighted + name: str, + compatibility with groupby.rolling **kwargs additional arguments for rolling function and window function @@ -1069,7 +1074,9 @@ def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = self._get_roll_func("roll_weighted_sum") window_func = self._get_weighted_roll_func(window_func, _use_window) - return self._apply(window_func, center=self.center, is_weighted=True, **kwargs) + return self._apply( + window_func, center=self.center, is_weighted=True, name="sum", **kwargs + ) @Substitution(name="window") @Appender(_shared_docs["mean"]) @@ -1077,7 +1084,9 @@ def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func = self._get_roll_func("roll_weighted_mean") window_func = self._get_weighted_roll_func(window_func, _use_window) - return self._apply(window_func, center=self.center, is_weighted=True, **kwargs) + return self._apply( + window_func, center=self.center, is_weighted=True, name="mean", **kwargs + ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @Appender(_shared_docs["var"]) @@ -1087,13 +1096,15 @@ def var(self, ddof=1, *args, **kwargs): window_func = partial( self._get_weighted_roll_func(window_func, _use_window), ddof=ddof ) - return self._apply(window_func, center=self.center, is_weighted=True, **kwargs) + return self._apply( + window_func, center=self.center, is_weighted=True, name="var", **kwargs + ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @Appender(_shared_docs["std"]) def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) - return _zsqrt(self.var(ddof=ddof, **kwargs)) + return _zsqrt(self.var(ddof=ddof, name="std", **kwargs)) class _Rolling(_Window): @@ -1208,6 +1219,7 @@ def apply(self, func, raw=None, args=(), kwargs={}): from pandas import Series kwargs.pop("_level", None) + kwargs.pop("floor", None) window = self._get_window() offset = _offset(window, self.center) @@ -1245,7 +1257,10 @@ def apply_func(values, begin, end, min_periods, raw=raw): def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = self._get_cython_func_type("roll_sum") - return self._apply(window_func, center=self.center, floor=0, **kwargs) + kwargs.pop("floor", None) + return self._apply( + window_func, center=self.center, floor=0, name="sum", **kwargs + ) _shared_docs["max"] = dedent( """ @@ -1261,7 +1276,7 @@ def sum(self, *args, **kwargs): def max(self, *args, **kwargs): nv.validate_window_func("max", args, kwargs) window_func = self._get_cython_func_type("roll_max") - return self._apply(window_func, center=self.center, **kwargs) + return self._apply(window_func, center=self.center, name="max", **kwargs) _shared_docs["min"] = dedent( """ @@ -1303,12 +1318,12 @@ def max(self, *args, **kwargs): def min(self, *args, **kwargs): nv.validate_window_func("min", args, kwargs) window_func = self._get_cython_func_type("roll_min") - return self._apply(window_func, center=self.center, **kwargs) + return self._apply(window_func, center=self.center, name="min", **kwargs) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func = self._get_cython_func_type("roll_mean") - return self._apply(window_func, center=self.center, **kwargs) + return self._apply(window_func, center=self.center, name="mean", **kwargs) _shared_docs["median"] = dedent( """ @@ -1350,24 +1365,26 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") window_func = partial(window_func, win=self.window) - return self._apply(window_func, center=self.center, **kwargs) + return self._apply(window_func, center=self.center, name="median", **kwargs) def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) + kwargs.pop("require_min_periods", None) window_func = self._get_cython_func_type("roll_var") def zsqrt_func(values, begin, end, min_periods): return _zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) return self._apply( - zsqrt_func, center=self.center, require_min_periods=1, **kwargs + zsqrt_func, center=self.center, require_min_periods=1, name="std", **kwargs ) def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) + kwargs.pop("require_min_periods", None) window_func = partial(self._get_cython_func_type("roll_var"), ddof=ddof) return self._apply( - window_func, center=self.center, require_min_periods=1, **kwargs + window_func, center=self.center, require_min_periods=1, name="var", **kwargs ) _shared_docs[ @@ -1383,8 +1400,13 @@ def var(self, ddof=1, *args, **kwargs): def skew(self, **kwargs): window_func = self._get_cython_func_type("roll_skew") + kwargs.pop("require_min_periods", None) return self._apply( - window_func, center=self.center, require_min_periods=3, **kwargs + window_func, + center=self.center, + require_min_periods=3, + name="skew", + **kwargs ) _shared_docs["kurt"] = dedent( @@ -1421,8 +1443,13 @@ def skew(self, **kwargs): def kurt(self, **kwargs): window_func = self._get_cython_func_type("roll_kurt") + kwargs.pop("require_min_periods", None) return self._apply( - window_func, center=self.center, require_min_periods=4, **kwargs + window_func, + center=self.center, + require_min_periods=4, + name="kurt", + **kwargs ) _shared_docs["quantile"] = dedent( @@ -1489,11 +1516,14 @@ def quantile(self, quantile, interpolation="linear", **kwargs): else: window_func = partial( self._get_roll_func("roll_quantile"), + win=self._get_window(), quantile=quantile, interpolation=interpolation, ) - return self._apply(window_func, center=self.center, **kwargs) + # Pass through for groupby.rolling + kwargs["quantile"] = quantile + return self._apply(window_func, center=self.center, name="quantile", **kwargs) _shared_docs[ "cov" @@ -1849,7 +1879,7 @@ def count(self): # different impl for freq counting if self.is_freq_type: window_func = self._get_roll_func("roll_count") - return self._apply(window_func, center=self.center) + return self._apply(window_func, center=self.center, name="count") return super().count() From 3844a3a8fe11ebd7d0aaad9d133930f450806431 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 12 Nov 2019 23:53:29 -0800 Subject: [PATCH 17/28] Pass name=func for groupby.rolling compat --- pandas/core/window/common.py | 1 + pandas/core/window/rolling.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 5c76b86e855e0..eae0cdce47446 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -69,6 +69,7 @@ def _apply( Dispatch to apply; we are stripping all of the _apply kwargs and performing the original function call on the grouped object. """ + kwargs.pop('floor', None) # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f2089d878637d..e6859e35ec090 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1252,7 +1252,8 @@ def apply_func(values, begin, end, min_periods, raw=raw): return window_func(values, begin, end, min_periods) # Why do we always pass center=False? - return self._apply(apply_func, center=False, floor=0) + # name=func for WindowGroupByMixin._apply + return self._apply(apply_func, center=False, floor=0, name=func) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) From 019c58efc89fd49ef5633617404bfc9fb1168f41 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 13 Nov 2019 00:03:44 -0800 Subject: [PATCH 18/28] Fix 2 more tests --- pandas/core/window/common.py | 2 +- pandas/core/window/rolling.py | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index eae0cdce47446..50062b3499449 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -69,7 +69,7 @@ def _apply( Dispatch to apply; we are stripping all of the _apply kwargs and performing the original function call on the grouped object. """ - kwargs.pop('floor', None) + kwargs.pop("floor", None) # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e6859e35ec090..acfa3bb7c6145 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1376,16 +1376,28 @@ def std(self, ddof=1, *args, **kwargs): def zsqrt_func(values, begin, end, min_periods): return _zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) + # ddof passed again for compat with groupby.rolling return self._apply( - zsqrt_func, center=self.center, require_min_periods=1, name="std", **kwargs + zsqrt_func, + center=self.center, + require_min_periods=1, + name="std", + ddof=ddof, + **kwargs ) def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) kwargs.pop("require_min_periods", None) window_func = partial(self._get_cython_func_type("roll_var"), ddof=ddof) + # ddof passed again for compat with groupby.rolling return self._apply( - window_func, center=self.center, require_min_periods=1, name="var", **kwargs + window_func, + center=self.center, + require_min_periods=1, + name="var", + ddof=ddof, + **kwargs ) _shared_docs[ From 8401061109925a67e9e2484b262220da24a8dbd1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 13 Nov 2019 22:29:31 -0800 Subject: [PATCH 19/28] Fix more tests --- pandas/core/window/rolling.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index acfa3bb7c6145..2477bd041c708 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -478,7 +478,7 @@ def calc(x): window, self.min_periods, len(x), require_min_periods, floor ) start, end = window_indexer( - x, window, self.closed, index_as_array + x, min(window, len(x)), self.closed, index_as_array ).get_window_bounds() return func(x, start, end, min_periods) @@ -1092,10 +1092,9 @@ def mean(self, *args, **kwargs): @Appender(_shared_docs["var"]) def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) - window_func = self._get_roll_func("roll_weighted_var") - window_func = partial( - self._get_weighted_roll_func(window_func, _use_window), ddof=ddof - ) + window_func = partial(self._get_roll_func("roll_weighted_var"), ddof=ddof) + window_func = self._get_weighted_roll_func(window_func, _use_window) + kwargs.pop("name", None) return self._apply( window_func, center=self.center, is_weighted=True, name="var", **kwargs ) From e20682af129603883919cdae819d407b55f8d2f4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 18 Nov 2019 22:49:20 -0800 Subject: [PATCH 20/28] Fix more bugs --- pandas/_libs/window_indexer.pyx | 4 ++-- pandas/core/window/rolling.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/window_indexer.pyx b/pandas/_libs/window_indexer.pyx index 891c4dda2d669..7a5a46ea309f0 100644 --- a/pandas/_libs/window_indexer.pyx +++ b/pandas/_libs/window_indexer.pyx @@ -60,11 +60,11 @@ class FixedWindowIndexer: start_s = np.zeros(win, dtype='int64') start_e = np.arange(win, N, dtype='int64') - win + 1 - self.start = np.concatenate([start_s, start_e]) + self.start = np.concatenate([start_s, start_e])[:N] end_s = np.arange(win, dtype='int64') + 1 end_e = start_e + win - self.end = np.concatenate([end_s, end_e]) + self.end = np.concatenate([end_s, end_e])[:N] def get_window_bounds(self): return self.start, self.end diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 286e7c9c9be9d..ed6b684048fd0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -478,7 +478,7 @@ def calc(x): window, self.min_periods, len(x), require_min_periods, floor ) start, end = window_indexer( - x, min(window, len(x)), self.closed, index_as_array + x, window, self.closed, index_as_array ).get_window_bounds() return func(x, start, end, min_periods) @@ -1364,7 +1364,7 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") - window_func = partial(window_func, win=self.window) + window_func = partial(window_func, win=self._get_window()) return self._apply(window_func, center=self.center, name="median", **kwargs) def std(self, ddof=1, *args, **kwargs): From 57c96217eb83c6d50abb16067c53ffc93f3ee0d1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 18 Nov 2019 23:36:03 -0800 Subject: [PATCH 21/28] Fix all the tests --- pandas/_libs/window.pyx | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 4ea99b2ea9fa2..02ac53a9e91ed 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -485,7 +485,7 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[in def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, - int64_t win, int ddof=1): + int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -498,9 +498,6 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray output = np.empty(N, dtype=float) - # Check for windows larger than array, addresses #7297 - win = min(win, N) - with nogil: for i in range(0, N): @@ -832,10 +829,10 @@ def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int # actual skiplist ops outweigh any window computation costs output = np.empty(N, dtype=float) - if win == 0: + if win == 0 or (end - start).max() == 0: output[:] = NaN return output - + win = (end - start).max() sl = skiplist_init(win) if sl == NULL: raise MemoryError("skiplist_init failed") @@ -1232,10 +1229,10 @@ def roll_quantile(ndarray[float64_t, cast=True] values, ndarray[int64_t] start, # actual skiplist ops outweigh any window computation costs output = np.empty(N, dtype=float) - if win == 0: + if win == 0 or (end - start).max() == 0: output[:] = NaN return output - + win = (end - start).max() skiplist = skiplist_init(win) if skiplist == NULL: raise MemoryError("skiplist_init failed") From 23cc995db0a52371ab826793369303d0dfef943a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 18 Nov 2019 23:37:59 -0800 Subject: [PATCH 22/28] black --- pandas/core/window/rolling.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ed6b684048fd0..bf36a671fc63d 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1382,7 +1382,7 @@ def zsqrt_func(values, begin, end, min_periods): require_min_periods=1, name="std", ddof=ddof, - **kwargs + **kwargs, ) def var(self, ddof=1, *args, **kwargs): @@ -1396,7 +1396,7 @@ def var(self, ddof=1, *args, **kwargs): require_min_periods=1, name="var", ddof=ddof, - **kwargs + **kwargs, ) _shared_docs[ @@ -1418,7 +1418,7 @@ def skew(self, **kwargs): center=self.center, require_min_periods=3, name="skew", - **kwargs + **kwargs, ) _shared_docs["kurt"] = dedent( @@ -1461,7 +1461,7 @@ def kurt(self, **kwargs): center=self.center, require_min_periods=4, name="kurt", - **kwargs + **kwargs, ) _shared_docs["quantile"] = dedent( From 5f8751a79827b8f59595fe1879aa41ca18fcfef8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 19 Nov 2019 17:45:20 -0800 Subject: [PATCH 23/28] Lint pyx files --- pandas/_libs/window.pyx | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 02ac53a9e91ed..f4f780ac166f6 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -182,8 +182,8 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogi sum_x[0] = sum_x[0] - val -def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp): +def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): cdef: float64_t sum_x = 0 int64_t s, e @@ -221,8 +221,8 @@ def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray return output -def roll_sum_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp, int64_t win): +def roll_sum_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): cdef: float64_t val, prev_x, sum_x = 0 int64_t range_endpoint @@ -298,8 +298,8 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp, int64_t win): +def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): cdef: float64_t val, prev_x, sum_x = 0 Py_ssize_t nobs = 0, i, neg_ct = 0, N = len(values) @@ -325,8 +325,9 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[i return output -def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp): + +def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, sum_x = 0 int64_t s, e From 72703bda3b46a48ffac59aa13fc929cadddc261a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 19 Nov 2019 23:20:16 -0800 Subject: [PATCH 24/28] Address Brock's comments --- pandas/_libs/window_indexer.pyx | 11 ++++------- pandas/core/window/rolling.py | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/window_indexer.pyx b/pandas/_libs/window_indexer.pyx index 7a5a46ea309f0..93cb7dd4258c6 100644 --- a/pandas/_libs/window_indexer.pyx +++ b/pandas/_libs/window_indexer.pyx @@ -55,7 +55,7 @@ class FixedWindowIndexer: """ def __init__(self, ndarray values, int64_t win, object closed, object index=None): cdef: - ndarray start_s, start_e, end_s, end_e + ndarray[int64_t, ndim=1] start_s, start_e, end_s, end_e int64_t N = len(values) start_s = np.zeros(win, dtype='int64') @@ -105,11 +105,8 @@ class VariableWindowIndexer: self.start, self.end = self.build(index, win, left_closed, right_closed, N) - # TODO: Maybe will need to use this? - # max window size - #self.win = (self.end - self.start).max() - - def build(self, const int64_t[:] index, int64_t win, bint left_closed, + @staticmethod + def build(const int64_t[:] index, int64_t win, bint left_closed, bint right_closed, int64_t N): cdef: @@ -164,4 +161,4 @@ class VariableWindowIndexer: return start, end def get_window_bounds(self): - return self.start, self.end \ No newline at end of file + return self.start, self.end diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f675356feea90..59d0f1015977c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1250,7 +1250,7 @@ def apply_func(values, begin, end, min_periods, raw=raw): values = Series(values, index=self.obj.index) return window_func(values, begin, end, min_periods) - # Why do we always pass center=False? + # TODO: Why do we always pass center=False? # name=func for WindowGroupByMixin._apply return self._apply(apply_func, center=False, floor=0, name=func) From 43af04fc88e6789a1cc597cb22c7609c4abe407f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 19 Nov 2019 23:36:57 -0800 Subject: [PATCH 25/28] Deprivatize calculate_min_periods --- pandas/core/window/common.py | 19 +++++++++++++++++-- pandas/core/window/rolling.py | 4 ++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 50062b3499449..683b41102dd27 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -269,13 +269,28 @@ def _use_window(minp, window): return minp -def _calculate_min_periods( +def calculate_min_periods( window: int, min_periods: Optional[int], num_values: int, required_min_periods: int, floor: int, -): +) -> int: + """ + Calculates final minimum periods value for rolling aggregations. + + Parameters + ---------- + window : passed window value + min_periods : passed min periods value + num_values : total number of values + required_min_periods : required min periods per aggregation function + floor : required min periods per aggregation function + + Returns + ------- + min_periods : int + """ if min_periods is None: min_periods = window else: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 59d0f1015977c..fd2e8aa2ad02f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -42,13 +42,13 @@ from pandas.core.index import Index, ensure_index from pandas.core.window.common import ( WindowGroupByMixin, - _calculate_min_periods, _doc_template, _flex_binary_moment, _offset, _shared_docs, _use_window, _zsqrt, + calculate_min_periods, ) @@ -474,7 +474,7 @@ def _apply( def calc(x): x = np.concatenate((x, additional_nans)) - min_periods = _calculate_min_periods( + min_periods = calculate_min_periods( window, self.min_periods, len(x), require_min_periods, floor ) start, end = window_indexer( From f1e7c7a1bdb9441bb44e943d81f9b88ae9a301c8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 19 Nov 2019 23:49:59 -0800 Subject: [PATCH 26/28] More linting of pyx files --- pandas/_libs/window.pyx | 46 ++++++++++++++++++++------------- pandas/_libs/window_indexer.pyx | 1 + 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index f4f780ac166f6..a1bbd24d5760c 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -429,8 +429,8 @@ cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, ssqdm_x[0] = 0 -def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, - int64_t win, int ddof=1): +def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win, int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -485,8 +485,8 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[in return output -def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, - int ddof=1): +def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -533,6 +533,7 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray # ---------------------------------------------------------------------- # Rolling skewness + cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, float64_t x, float64_t xx, float64_t xxx) nogil: @@ -596,8 +597,9 @@ cdef inline void remove_skew(float64_t val, int64_t *nobs, xx[0] = xx[0] - val * val xxx[0] = xxx[0] - val * val * val -def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp, int64_t win): + +def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0 @@ -625,7 +627,9 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[i return output -def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): + +def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0 @@ -741,8 +745,9 @@ cdef inline void remove_kurt(float64_t val, int64_t *nobs, xxx[0] = xxx[0] - val * val * val xxxx[0] = xxxx[0] - val * val * val * val -def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp, int64_t win): + +def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 @@ -769,7 +774,8 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[i return output -def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): +def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): cdef: float64_t val, prev float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 @@ -814,8 +820,8 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarra # Rolling median, min, max -def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp, int64_t win): +def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): cdef: float64_t val, res, prev bint err = 0 @@ -942,8 +948,8 @@ cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, return result -def roll_max_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp, int64_t win): +def roll_max_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -962,7 +968,8 @@ def roll_max_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[in return _roll_min_max_fixed(values, start, end, minp, win, is_max=1) -def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): +def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -981,8 +988,8 @@ def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray return _roll_min_max_variable(values, start, end, minp, is_max=1) -def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, - int64_t minp, int64_t win): +def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -998,7 +1005,8 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[in return _roll_min_max_fixed(values, start, end, minp, win, is_max=0) -def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): +def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1313,6 +1321,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, ndarray[int64_t] start, return output + def roll_generic_fixed(object obj, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int64_t win, @@ -1385,6 +1394,7 @@ def roll_generic_fixed(object obj, return output + def roll_generic_variable(object obj, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, diff --git a/pandas/_libs/window_indexer.pyx b/pandas/_libs/window_indexer.pyx index 93cb7dd4258c6..8f49a8b9462d3 100644 --- a/pandas/_libs/window_indexer.pyx +++ b/pandas/_libs/window_indexer.pyx @@ -69,6 +69,7 @@ class FixedWindowIndexer: def get_window_bounds(self): return self.start, self.end + class VariableWindowIndexer: """ create a variable length window indexer object From 82947059e857cc2cdfadb4b9b68bbf1700439de0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 20 Nov 2019 11:20:00 -0800 Subject: [PATCH 27/28] Fix typing and lint issue --- pandas/_libs/window.pyx | 1 + pandas/core/window/common.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 452775eecca6e..fa615bc24ea73 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -774,6 +774,7 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, return output + def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): cdef: diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 683b41102dd27..453fd12495543 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -1,6 +1,6 @@ """Common utility functions for rolling operations""" from collections import defaultdict -from typing import Optional +from typing import Callable, Optional import warnings import numpy as np @@ -63,7 +63,14 @@ def __init__(self, obj, *args, **kwargs): cov = _dispatch("cov", other=None, pairwise=None) def _apply( - self, func, name=None, window=None, center=None, check_minp=None, **kwargs + self, + func: Callable, + center: bool, + require_min_periods: int = 0, + floor: int = 1, + is_weighted: bool = False, + name: Optional[str] = None, + **kwargs, ): """ Dispatch to apply; we are stripping all of the _apply kwargs and From 09ae25806962eee488499d12ffa5f76393bc041c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 20 Nov 2019 23:43:45 -0800 Subject: [PATCH 28/28] Docstring updates --- pandas/_libs/window.pyx | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index fa615bc24ea73..303b4f6f24eac 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -956,13 +956,13 @@ def roll_max_fixed(ndarray[float64_t] values, ndarray[int64_t] start, Parameters ---------- - values: numpy array - window: int, size of rolling window - minp: if number of observations in window + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window is below this, output a NaN - index: ndarray, optional + index : ndarray, optional index for window computation - closed: 'right', 'left', 'both', 'neither' + closed : 'right', 'left', 'both', 'neither' make the interval closed on the right, left, both or neither endpoints """ @@ -976,13 +976,13 @@ def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, Parameters ---------- - values: numpy array - window: int, size of rolling window - minp: if number of observations in window + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window is below this, output a NaN - index: ndarray, optional + index : ndarray, optional index for window computation - closed: 'right', 'left', 'both', 'neither' + closed : 'right', 'left', 'both', 'neither' make the interval closed on the right, left, both or neither endpoints """ @@ -996,11 +996,11 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, Parameters ---------- - values: numpy array - window: int, size of rolling window - minp: if number of observations in window + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window is below this, output a NaN - index: ndarray, optional + index : ndarray, optional index for window computation """ return _roll_min_max_fixed(values, start, end, minp, win, is_max=0) @@ -1013,11 +1013,11 @@ def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, Parameters ---------- - values: numpy array - window: int, size of rolling window - minp: if number of observations in window + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window is below this, output a NaN - index: ndarray, optional + index : ndarray, optional index for window computation """ return _roll_min_max_variable(values, start, end, minp, is_max=0)