pandas-dev · jreback · Dec 27, 2019 · Dec 8, 2019 · Dec 8, 2019 · Dec 9, 2019
diff --git a/ci/deps/azure-36-minimum_versions.yaml b/ci/deps/azure-36-minimum_versions.yaml
@@ -17,6 +17,7 @@ dependencies:
   - beautifulsoup4=4.6.0
   - bottleneck=1.2.1
   - jinja2=2.8
+  - numba=0.46.0
   - numexpr=2.6.2
   - numpy=1.13.3
   - openpyxl=2.5.7

diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml
@@ -17,6 +17,7 @@ dependencies:
   - bottleneck
   - fastparquet>=0.3.2
   - matplotlib=3.0.2
+  - numba
   - numexpr
   - numpy=1.15.*
   - openpyxl

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -255,6 +255,7 @@ gcsfs                     0.2.2              Google Cloud Storage access
 html5lib                                     HTML parser for read_html (see :ref:`note <optional_html>`)
 lxml                      3.8.0              HTML parser for read_html (see :ref:`note <optional_html>`)
 matplotlib                2.2.2              Visualization
+numba                     0.46.0             Alternative execution engine for rolling operations
 openpyxl                  2.5.7              Reading / writing for xlsx files
 pandas-gbq                0.8.0              Google Big Query access
 psycopg2                                     PostgreSQL engine for sqlalchemy

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
@@ -321,6 +321,11 @@ We provide a number of common statistical functions:
     :meth:`~Rolling.cov`, Unbiased covariance (binary)
     :meth:`~Rolling.corr`, Correlation (binary)
 
+.. _stats.rolling_apply:
+
+Rolling Apply
+~~~~~~~~~~~~~
+
 The :meth:`~Rolling.apply` function takes an extra ``func`` argument and performs
 generic rolling computations. The ``func`` argument should be a single function
 that produces a single value from an ndarray input. Suppose we wanted to
@@ -334,6 +339,48 @@ compute the mean absolute deviation on a rolling basis:
    @savefig rolling_apply_ex.png
    s.rolling(window=60).apply(mad, raw=True).plot(style='k')
 
+.. versionadded:: 1.0
+
+Additionally, :meth:`~Rolling.apply` can leverage `Numba <https://numba.pydata.org/>`__
+if installed as an optional dependency. The apply aggregation can be executed using Numba by specifying
+``engine='numba'`` and ``engine_kwargs`` arguments (``raw`` must also be set to ``True``).
+Numba will be applied in potentially two routines:
+
+1. If ``func`` is a standard Python function, the engine will `JIT <http://numba.pydata.org/numba-doc/latest/user/overview.html>`__
+the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again.
+2. The engine will JIT the for loop where the apply function is applied to each window.
+
+The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the
+`numba.jit decorator <https://numba.pydata.org/numba-doc/latest/reference/jit-compilation.html#numba.jit>`__.
+These keyword arguments will be applied to *both* the passed function (if a standard Python function)
+and the apply for loop over each window. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported,
+and their default values are set to ``False``, ``True`` and ``False`` respectively.
+
+.. note::
+
+   In terms of performance, **the first time a function is run using the Numba engine will be slow**
+   as Numba will have some function compilation overhead. However, ``rolling`` objects will cache
+   the function and subsequent calls will be fast. In general, the Numba engine is performant with
+   a larger amount of data points (e.g. 1+ million).
+
+.. code-block:: ipython
+
+   In [1]: data = pd.Series(range(1_000_000))
+
+   In [2]: roll = data.rolling(10)
+
+   In [3]: def f(x):
+      ...:     return np.sum(x) + 5
+   # Run the first time, compilation time will affect performance
+   In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True)  # noqa: E225
+   1.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
+   # Function is cached and performance will improve
+   In [5]: %timeit roll.apply(f, engine='numba', raw=True)
+   188 ms ± 1.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [6]: %timeit roll.apply(f, engine='cython', raw=True)
+   3.92 s ± 59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
 .. _stats.rolling_window:
 
 Rolling windows

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -169,6 +169,17 @@ You can use the alias ``"boolean"`` as well.
    s = pd.Series([True, False, None], dtype="boolean")
    s
 
+.. _whatsnew_1000.numba_rolling_apply:
+
+Using Numba in ``rolling.apply``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We've added an ``engine`` keyword to :meth:`~Rolling.apply` that allows the user to execute the
+routine using `Numba <https://numba.pydata.org/>`__ instead of Cython. Using the Numba engine
+can yield significant performance gains if the apply function can operate on numpy arrays and
+the data set is larger. For more details, see :ref:`rolling apply documentation <stats.rolling_apply>`
+(:issue:`28987`)
+
 .. _whatsnew_1000.custom_window:
 
 Defining custom windows for rolling operations
@@ -428,6 +439,8 @@ Optional libraries below the lowest tested version may still work, but are not c
 +-----------------+-----------------+---------+
 | matplotlib      | 2.2.2           |         |
 +-----------------+-----------------+---------+
+| numba           | 0.46.0          |         |
++-----------------+-----------------+---------+
 | openpyxl        | 2.5.7           |    X    |
 +-----------------+-----------------+---------+
 | pyarrow         | 0.12.0          |    X    |

diff --git a/environment.yml b/environment.yml
@@ -72,6 +72,7 @@ dependencies:
   - matplotlib>=2.2.2  # pandas.plotting, Series.plot, DataFrame.plot
   - numexpr>=2.6.8
   - scipy>=1.1
+  - numba>=0.46.0
 
   # optional for io
   - beautifulsoup4>=4.6.0  # pandas.read_html

diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
@@ -27,6 +27,7 @@
     "xlrd": "1.1.0",
     "xlwt": "1.2.0",
     "xlsxwriter": "0.9.8",
+    "numba": "0.46.0",
 }
 
 

diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
@@ -70,6 +70,7 @@ def _apply(
         floor: int = 1,
         is_weighted: bool = False,
         name: Optional[str] = None,
+        use_numba_cache: Optional[bool] = False,
         **kwargs,
     ):
         """

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
@@ -0,0 +1,100 @@
+import types
+from typing import Any, Callable, Dict, Optional, Tuple
+
+import numpy as np
+
+from pandas._typing import Scalar
+from pandas.compat._optional import import_optional_dependency
+
+
+def make_rolling_apply(func, args, nogil, parallel, nopython):
+    numba = import_optional_dependency("numba")
+
+    if parallel:
+        loop_range = numba.prange
+    else:
+        loop_range = range
+
+    if isinstance(func, numba.targets.registry.CPUDispatcher):
+        # Don't jit a user passed jitted function
+        numba_func = func
+    else:
+
+        @numba.generated_jit(nopython=nopython, nogil=nogil, parallel=parallel)
+        def numba_func(window, *_args):
+            if getattr(np, func.__name__, False) is func or isinstance(
+                func, types.BuiltinFunctionType
+            ):
+                jf = func
+            else:
+                jf = numba.jit(func, nopython=nopython)
+
+            def impl(window, *_args):
+                return jf(window, *_args)
+
+            return impl
+
+    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
+    def roll_apply(
+        values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int,
+    ) -> np.ndarray:
+        result = np.empty(len(begin))
+        for i in loop_range(len(result)):
+            start = begin[i]
+            stop = end[i]
+            window = values[start:stop]
+            count_nan = np.sum(np.isnan(window))
+            if len(window) - count_nan >= minimum_periods:
+                result[i] = numba_func(window, *args)
+            else:
+                result[i] = np.nan
+        return result
+
+    return roll_apply
+
+
+def generate_numba_apply_func(
+    args: Tuple,
+    kwargs: Dict[str, Any],
+    func: Callable[..., Scalar],
+    engine_kwargs: Optional[Dict[str, bool]],
+):
+    """
+    Generate a numba jitted apply function specified by values from engine_kwargs.
+
+    1. jit the user's function
+    2. Return a rolling apply function with the jitted function inline
+
+    Configurations specified in engine_kwargs apply to both the user's
+    function _AND_ the rolling apply function.
+
+    Parameters
+    ----------
+    args : tuple
+        *args to be passed into the function
+    kwargs : dict
+        **kwargs to be passed into the function
+    func : function
+        function to be applied to each window and will be JITed
+    engine_kwargs : dict
+        dictionary of arguments to be passed into numba.jit
+
+    Returns
+    -------
+    Numba function
+    """
+
+    if engine_kwargs is None:
+        engine_kwargs = {}
+
+    nopython = engine_kwargs.get("nopython", True)
+    nogil = engine_kwargs.get("nogil", False)
+    parallel = engine_kwargs.get("parallel", False)
+
+    if kwargs and nopython:
+        raise ValueError(
+            "numba does not support kwargs with nopython=True: "
+            "https://github.com/numba/numba/issues/2916"
+        )
+
+    return make_rolling_apply(func, args, nogil, parallel, nopython)