From 3b9bff8d90eab34880bb95e583cc66126613b7c8 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 8 Dec 2019 11:54:05 -0800
Subject: [PATCH 01/44] Add numba to import_optional_dependencies

---
 doc/source/getting_started/install.rst | 1 +
 pandas/compat/_optional.py             | 1 +
 2 files changed, 2 insertions(+)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 9f3ab22496ae7..ae15ad3a5d2c3 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -255,6 +255,7 @@ gcsfs                     0.2.2              Google Cloud Storage access
 html5lib                                     HTML parser for read_html (see :ref:`note <optional_html>`)
 lxml                      3.8.0              HTML parser for read_html (see :ref:`note <optional_html>`)
 matplotlib                2.2.2              Visualization
+numba                     0.46.0             Alternative execution engine for rolling operations
 openpyxl                  2.4.8              Reading / writing for xlsx files
 pandas-gbq                0.8.0              Google Big Query access
 psycopg2                                     PostgreSQL engine for sqlalchemy
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index 0be201daea425..9650ba39bf46a 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -27,6 +27,7 @@
     "xlrd": "1.1.0",
     "xlwt": "1.2.0",
     "xlsxwriter": "0.9.8",
+    "numba": "0.46.0",
 }
 
 

From 9a302bff89e1ef00e9db6d994d24fb644cabe4f4 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 8 Dec 2019 12:45:03 -0800
Subject: [PATCH 02/44] Start adding keywords

---
 pandas/core/window/rolling.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 9f804584f532a..e8a6ac5cc9a93 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1246,9 +1246,15 @@ def count(self):
           objects instead.
           If you are just applying a NumPy reduction function this will
           achieve much better performance.
-
-    *args, **kwargs
-        Arguments and keyword arguments to be passed into func.
+    args : tuple, default None
+        Positional arguments to be passed into func
+    kwargs : dict, default None
+        Keyword arguments to be passed into func
+    engine : str, default 'cython'
+        Execution engine for the applied function.
+        * ``'cython'`` : Runs rolling apply through C-extensions from cython.
+        * ``'numba'`` : Runn rolling apply through JIT compiled code from numba.
+          Only available when ``raw`` is set to ``True``.
 
     Returns
     -------
@@ -1262,15 +1268,23 @@ def count(self):
     """
     )
 
-    def apply(self, func, raw=False, args=(), kwargs={}):
+    def apply(self, func, raw=False, args=None, kwargs=None, engine='cython'):
         from pandas import Series
 
         kwargs.pop("_level", None)
         kwargs.pop("floor", None)
         window = self._get_window()
         offset = _offset(window, self.center)
+        if args is None:
+            args = ()
+        if kwargs is None:
+            kwargs = {}
+        if engine not in {'cython', 'numba'}:
+            raise ValueError("engine must be either 'numba' or 'cython'")
         if not is_bool(raw):
             raise ValueError("raw parameter must be `True` or `False`")
+        if raw is False and engine == 'numba':
+            raise ValueError("raw must be `True` when using the numba engine")
 
         window_func = partial(
             self._get_cython_func_type("roll_generic"),

From 0e9a600a6929fe58211ff57578d2777e19ef19a9 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 8 Dec 2019 19:47:58 -0800
Subject: [PATCH 03/44] Modify apply for numba and cython

---
 pandas/core/window/rolling.py | 119 +++++++++++++++++++++++++++++++---
 1 file changed, 109 insertions(+), 10 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index e8a6ac5cc9a93..06b945e9136ce 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1253,8 +1253,14 @@ def count(self):
     engine : str, default 'cython'
         Execution engine for the applied function.
         * ``'cython'`` : Runs rolling apply through C-extensions from cython.
-        * ``'numba'`` : Runn rolling apply through JIT compiled code from numba.
+        * ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
           Only available when ``raw`` is set to ``True``.
+    engine_kwargs : dict, default None
+        Arguments to specify for the execution engine.
+        * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+        * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+          and ``parallel``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+          ``{'nopython': True, 'nogil': False, 'parallel': False}``
 
     Returns
     -------
@@ -1268,8 +1274,15 @@ def count(self):
     """
     )
 
-    def apply(self, func, raw=False, args=None, kwargs=None, engine='cython'):
-        from pandas import Series
+    def apply(
+        self,
+        func,
+        raw=False,
+        args=None,
+        kwargs=None,
+        engine="cython",
+        engine_kwargs=None,
+    ):
 
         kwargs.pop("_level", None)
         kwargs.pop("floor", None)
@@ -1279,12 +1292,30 @@ def apply(self, func, raw=False, args=None, kwargs=None, engine='cython'):
             args = ()
         if kwargs is None:
             kwargs = {}
-        if engine not in {'cython', 'numba'}:
-            raise ValueError("engine must be either 'numba' or 'cython'")
         if not is_bool(raw):
             raise ValueError("raw parameter must be `True` or `False`")
-        if raw is False and engine == 'numba':
-            raise ValueError("raw must be `True` when using the numba engine")
+
+        if engine == "cython":
+            if engine_kwargs is not None:
+                raise ValueError("cython engine does not accept engine_kwargs")
+            apply_func = self._generate_cython_apply_func(
+                args, kwargs, raw, offset, func
+            )
+        elif engine == "numba":
+            if raw is False:
+                raise ValueError("raw must be `True` when using the numba engine")
+            apply_func = self._generate_numba_apply_func(
+                args, kwargs, func, engine_kwargs
+            )
+        else:
+            raise ValueError("engine must be either 'numba' or 'cython'")
+
+        # TODO: Why do we always pass center=False?
+        # name=func for WindowGroupByMixin._apply
+        return self._apply(apply_func, center=False, floor=0, name=func)
+
+    def _generate_cython_apply_func(self, args, kwargs, raw, offset, func):
+        from pandas import Series
 
         window_func = partial(
             self._get_cython_func_type("roll_generic"),
@@ -1300,9 +1331,77 @@ def apply_func(values, begin, end, min_periods, raw=raw):
                 values = Series(values, index=self.obj.index)
             return window_func(values, begin, end, min_periods)
 
-        # TODO: Why do we always pass center=False?
-        # name=func for WindowGroupByMixin._apply
-        return self._apply(apply_func, center=False, floor=0, name=func)
+        return apply_func
+
+    def _generate_numba_apply_func(self, args, kwargs, func, engine_kwargs):
+        numba = import_optional_dependency("numba")
+
+        if engine_kwargs is None:
+            engine_kwargs = {"nopython": True, "nogil": False, "parallel": False}
+
+        nopython = engine_kwargs.get("nopython", True)
+        nogil = engine_kwargs.get("nogil", False)
+        # Maybe raise something here about 32 bit compat, if not compat.is_platform_32bit()
+        parallel = engine_kwargs.get("parallel", False)
+
+        if kwargs and nopython:
+            raise ValueError(
+                "numba does not support kwargs with nopython=True: "
+                "https://github.com/numba/numba/issues/2916"
+            )
+
+        if parallel:
+            loop_range = numba.prange
+        else:
+            loop_range = range
+
+        def make_rolling_apply(func):
+            """
+            1. jit the user's function
+            2. Return a rolling apply function with the jitted function inline
+
+            Configurations specified in engine_kwargs apply to both the user's
+            function _AND_ the rolling apply function.
+            """
+
+            @numba.generated_jit(nopython=nopython)
+            def numba_func(window, *_args):
+                if getattr(np, func.__name__, False) is func:
+
+                    def impl(window, *_args):
+                        return func(window, *_args)
+
+                    return impl
+                else:
+                    jf = numba.jit(func, nopython=nopython)
+
+                    def impl(window, *_args):
+                        return jf(window, *_args)
+
+                    return impl
+
+            @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
+            def roll_apply(
+                values: np.ndarray,
+                begin: np.ndarray,
+                end: np.ndarray,
+                minimum_periods: int,
+            ):
+                result = np.empty(len(begin))
+                for i in loop_range(len(result)):
+                    start = begin[i]
+                    stop = end[i]
+                    window = values[start:stop]
+                    count_nan = np.sum(np.isnan(window))
+                    if len(window) - count_nan >= minimum_periods:
+                        result[i] = numba_func(window, *args)
+                    else:
+                        result[i] = np.nan
+                return result
+
+            return roll_apply
+
+        return make_rolling_apply(func)
 
     def sum(self, *args, **kwargs):
         nv.validate_window_func("sum", args, kwargs)

From dbb2a9b360f71371ffc2ab475d04de03da6bca71 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 8 Dec 2019 20:23:01 -0800
Subject: [PATCH 04/44] Add numba as optional dependency

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index 2b171d097a693..937f80c5cd2b3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -72,6 +72,7 @@ dependencies:
   - matplotlib>=2.2.2  # pandas.plotting, Series.plot, DataFrame.plot
   - numexpr>=2.6.8
   - scipy>=1.1
+  - numba>=0.46.0
 
   # optional for io
   - beautifulsoup4>=4.6.0  # pandas.read_html

From f0e9a4dd342d1d146719aee64b128308df895311 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 8 Dec 2019 22:05:26 -0800
Subject: [PATCH 05/44] Add premil tests

---
 pandas/core/window/rolling.py   | 29 +++++++++++++++++++++--------
 pandas/tests/window/test_api.py | 28 ++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 06b945e9136ce..e27ca21e9bea7 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1283,15 +1283,14 @@ def apply(
         engine="cython",
         engine_kwargs=None,
     ):
-
-        kwargs.pop("_level", None)
-        kwargs.pop("floor", None)
-        window = self._get_window()
-        offset = _offset(window, self.center)
         if args is None:
             args = ()
         if kwargs is None:
             kwargs = {}
+        kwargs.pop("_level", None)
+        kwargs.pop("floor", None)
+        window = self._get_window()
+        offset = _offset(window, self.center)
         if not is_bool(raw):
             raise ValueError("raw parameter must be `True` or `False`")
 
@@ -1341,7 +1340,6 @@ def _generate_numba_apply_func(self, args, kwargs, func, engine_kwargs):
 
         nopython = engine_kwargs.get("nopython", True)
         nogil = engine_kwargs.get("nogil", False)
-        # Maybe raise something here about 32 bit compat, if not compat.is_platform_32bit()
         parallel = engine_kwargs.get("parallel", False)
 
         if kwargs and nopython:
@@ -2047,8 +2045,23 @@ def count(self):
 
     @Substitution(name="rolling")
     @Appender(_shared_docs["apply"])
-    def apply(self, func, raw=False, args=(), kwargs={}):
-        return super().apply(func, raw=raw, args=args, kwargs=kwargs)
+    def apply(
+        self,
+        func,
+        raw=False,
+        args=None,
+        kwargs=None,
+        engine="cython",
+        engine_kwargs=None,
+    ):
+        return super().apply(
+            func,
+            raw=raw,
+            args=args,
+            kwargs=kwargs,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+        )
 
     @Substitution(name="rolling")
     @Appender(_shared_docs["sum"])
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index 5085576cc96f0..a099e6731e2dd 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -342,3 +342,31 @@ def test_multiple_agg_funcs(self, func, window_size, expected_vals):
         )
 
         tm.assert_frame_equal(result, expected)
+
+
+class TestEngine:
+    def test_invalid_engine(self):
+        with pytest.raises(
+            ValueError, match="engine must be either 'numba' or 'cython'"
+        ):
+            Series(range(1)).rolling(1).apply(lambda x: x, engine="foo")
+
+    def test_invalid_engine_kwargs_cython(self):
+        with pytest.raises(
+            ValueError, match="cython engine does not accept engine_kwargs"
+        ):
+            Series(range(1)).rolling(1).apply(
+                lambda x: x, engine="cython", engine_kwargs={"nopython": False}
+            )
+
+    def test_invalid_raw_numba(self):
+        with pytest.raises(
+            ValueError, match="raw must be `True` when using the numba engine"
+        ):
+            Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba")
+
+    def test_invalid_kwargs_nopython(self):
+        with pytest.raises(ValueError, match="numba does not support kwargs with"):
+            Series(range(1)).rolling(1).apply(
+                lambda x: x, kwargs={"a": 1}, engine="numba", raw=True
+            )

From cb976cf9f8d3ac8cffae1b1f4b644baa7e3b059d Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 10 Dec 2019 23:15:32 -0800
Subject: [PATCH 06/44] Add numba to requirements-dev, type and reorder
 signature in apply

---
 pandas/core/window/rolling.py | 20 ++++++++++----------
 requirements-dev.txt          |  3 ++-
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index ad9f194dddbe9..5b1f17ad1d3a6 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1239,10 +1239,6 @@ def count(self):
           objects instead.
           If you are just applying a NumPy reduction function this will
           achieve much better performance.
-    args : tuple, default None
-        Positional arguments to be passed into func
-    kwargs : dict, default None
-        Keyword arguments to be passed into func
     engine : str, default 'cython'
         Execution engine for the applied function.
         * ``'cython'`` : Runs rolling apply through C-extensions from cython.
@@ -1254,6 +1250,10 @@ def count(self):
         * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
           and ``parallel``. The default ``engine_kwargs`` for the ``'numba'`` engine is
           ``{'nopython': True, 'nogil': False, 'parallel': False}``
+    args : tuple, default None
+        Positional arguments to be passed into func
+    kwargs : dict, default None
+        Keyword arguments to be passed into func
 
     Returns
     -------
@@ -1269,12 +1269,12 @@ def count(self):
 
     def apply(
         self,
-        func,
-        raw=False,
-        args=None,
-        kwargs=None,
-        engine="cython",
-        engine_kwargs=None,
+        func: Callable,
+        raw: bool = False,
+        engine: str = "cython",
+        engine_kwargs: Optional[Dict] = None,
+        args: Optional[Tuple] = None,
+        kwargs: Optional[Dict] = None,
     ):
         if args is None:
             args = ()
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5f67726a3e476..a829b323a4e06 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -64,4 +64,5 @@ xlsxwriter
 xlwt
 odfpy
 pyreadstat
-git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
\ No newline at end of file
+git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
+numba>=0.46.0
\ No newline at end of file

From 45420bb59ccd6906f21737156b750f574aea8537 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 10 Dec 2019 23:25:02 -0800
Subject: [PATCH 07/44] Move numba routines to its own file

---
 pandas/core/window/numba_.py  | 77 +++++++++++++++++++++++++++++++++++
 pandas/core/window/rolling.py | 74 +--------------------------------
 2 files changed, 79 insertions(+), 72 deletions(-)
 create mode 100644 pandas/core/window/numba_.py

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
new file mode 100644
index 0000000000000..bacefd51724f3
--- /dev/null
+++ b/pandas/core/window/numba_.py
@@ -0,0 +1,77 @@
+from typing import Callable, Dict, Optional, Tuple
+
+import numpy as np
+
+from pandas.compat._optional import import_optional_dependency
+
+
+def _generate_numba_apply_func(
+    args: Tuple, kwargs: Dict, func: Callable, engine_kwargs: Optional[Dict]
+):
+    numba = import_optional_dependency("numba")
+
+    if engine_kwargs is None:
+        engine_kwargs = {"nopython": True, "nogil": False, "parallel": False}
+
+    nopython = engine_kwargs.get("nopython", True)
+    nogil = engine_kwargs.get("nogil", False)
+    parallel = engine_kwargs.get("parallel", False)
+
+    if kwargs and nopython:
+        raise ValueError(
+            "numba does not support kwargs with nopython=True: "
+            "https://github.com/numba/numba/issues/2916"
+        )
+
+    if parallel:
+        loop_range = numba.prange
+    else:
+        loop_range = range
+
+    def make_rolling_apply(func):
+        """
+        1. jit the user's function
+        2. Return a rolling apply function with the jitted function inline
+
+        Configurations specified in engine_kwargs apply to both the user's
+        function _AND_ the rolling apply function.
+        """
+
+        @numba.generated_jit(nopython=nopython)
+        def numba_func(window, *_args):
+            if getattr(np, func.__name__, False) is func:
+
+                def impl(window, *_args):
+                    return func(window, *_args)
+
+                return impl
+            else:
+                jf = numba.jit(func, nopython=nopython)
+
+                def impl(window, *_args):
+                    return jf(window, *_args)
+
+                return impl
+
+        @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
+        def roll_apply(
+            values: np.ndarray,
+            begin: np.ndarray,
+            end: np.ndarray,
+            minimum_periods: int,
+        ):
+            result = np.empty(len(begin))
+            for i in loop_range(len(result)):
+                start = begin[i]
+                stop = end[i]
+                window = values[start:stop]
+                count_nan = np.sum(np.isnan(window))
+                if len(window) - count_nan >= minimum_periods:
+                    result[i] = numba_func(window, *args)
+                else:
+                    result[i] = np.nan
+            return result
+
+        return roll_apply
+
+    return make_rolling_apply(func)
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 5b1f17ad1d3a6..30b9fa5792e90 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -54,6 +54,7 @@
     FixedWindowIndexer,
     VariableWindowIndexer,
 )
+from pandas.core.window.numba_ import _generate_numba_apply_func
 
 
 class _Window(PandasObject, ShallowMixin, SelectionMixin):
@@ -1296,9 +1297,7 @@ def apply(
         elif engine == "numba":
             if raw is False:
                 raise ValueError("raw must be `True` when using the numba engine")
-            apply_func = self._generate_numba_apply_func(
-                args, kwargs, func, engine_kwargs
-            )
+            apply_func = _generate_numba_apply_func(args, kwargs, func, engine_kwargs)
         else:
             raise ValueError("engine must be either 'numba' or 'cython'")
 
@@ -1325,75 +1324,6 @@ def apply_func(values, begin, end, min_periods, raw=raw):
 
         return apply_func
 
-    def _generate_numba_apply_func(self, args, kwargs, func, engine_kwargs):
-        numba = import_optional_dependency("numba")
-
-        if engine_kwargs is None:
-            engine_kwargs = {"nopython": True, "nogil": False, "parallel": False}
-
-        nopython = engine_kwargs.get("nopython", True)
-        nogil = engine_kwargs.get("nogil", False)
-        parallel = engine_kwargs.get("parallel", False)
-
-        if kwargs and nopython:
-            raise ValueError(
-                "numba does not support kwargs with nopython=True: "
-                "https://github.com/numba/numba/issues/2916"
-            )
-
-        if parallel:
-            loop_range = numba.prange
-        else:
-            loop_range = range
-
-        def make_rolling_apply(func):
-            """
-            1. jit the user's function
-            2. Return a rolling apply function with the jitted function inline
-
-            Configurations specified in engine_kwargs apply to both the user's
-            function _AND_ the rolling apply function.
-            """
-
-            @numba.generated_jit(nopython=nopython)
-            def numba_func(window, *_args):
-                if getattr(np, func.__name__, False) is func:
-
-                    def impl(window, *_args):
-                        return func(window, *_args)
-
-                    return impl
-                else:
-                    jf = numba.jit(func, nopython=nopython)
-
-                    def impl(window, *_args):
-                        return jf(window, *_args)
-
-                    return impl
-
-            @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
-            def roll_apply(
-                values: np.ndarray,
-                begin: np.ndarray,
-                end: np.ndarray,
-                minimum_periods: int,
-            ):
-                result = np.empty(len(begin))
-                for i in loop_range(len(result)):
-                    start = begin[i]
-                    stop = end[i]
-                    window = values[start:stop]
-                    count_nan = np.sum(np.isnan(window))
-                    if len(window) - count_nan >= minimum_periods:
-                        result[i] = numba_func(window, *args)
-                    else:
-                        result[i] = np.nan
-                return result
-
-            return roll_apply
-
-        return make_rolling_apply(func)
-
     def sum(self, *args, **kwargs):
         nv.validate_window_func("sum", args, kwargs)
         window_func = self._get_cython_func_type("roll_sum")

From 17851cf1a7c77a83b59dc629cdcc642341e0d5bd Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 10 Dec 2019 23:30:37 -0800
Subject: [PATCH 08/44] Adjust signature in top level function as well

---
 pandas/core/window/rolling.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 30b9fa5792e90..9ff61721a2abb 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1972,18 +1972,18 @@ def apply(
         self,
         func,
         raw=False,
-        args=None,
-        kwargs=None,
         engine="cython",
         engine_kwargs=None,
+        args=None,
+        kwargs=None,
     ):
         return super().apply(
             func,
             raw=raw,
-            args=args,
-            kwargs=kwargs,
             engine=engine,
             engine_kwargs=engine_kwargs,
+            args=args,
+            kwargs=kwargs,
         )
 
     @Substitution(name="rolling")

From 9619f8d675d2273a501a916176976fc97ea18166 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Wed, 11 Dec 2019 11:56:57 -0800
Subject: [PATCH 09/44] Generate requirements-dev.txt using script

---
 requirements-dev.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index a829b323a4e06..93a5a6b32fad6 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -47,6 +47,7 @@ jinja2
 matplotlib>=2.2.2
 numexpr>=2.6.8
 scipy>=1.1
+numba>=0.46.0
 beautifulsoup4>=4.6.0
 fastparquet>=0.3.2
 html5lib
@@ -64,5 +65,4 @@ xlsxwriter
 xlwt
 odfpy
 pyreadstat
-git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
-numba>=0.46.0
\ No newline at end of file
+git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
\ No newline at end of file

From b8908eaf15e4deca584bc77e5e76d7bd0fcba5c3 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Thu, 12 Dec 2019 22:10:56 -0800
Subject: [PATCH 10/44] Add skip test decorator, add numba to a few builds

---
 ci/deps/azure-36-minimum_versions.yaml | 1 +
 ci/deps/azure-windows-36.yaml          | 1 +
 pandas/tests/window/test_api.py        | 1 +
 3 files changed, 3 insertions(+)

diff --git a/ci/deps/azure-36-minimum_versions.yaml b/ci/deps/azure-36-minimum_versions.yaml
index 8bf4f70d18aec..de7e011d9c7ca 100644
--- a/ci/deps/azure-36-minimum_versions.yaml
+++ b/ci/deps/azure-36-minimum_versions.yaml
@@ -17,6 +17,7 @@ dependencies:
   - beautifulsoup4=4.6.0
   - bottleneck=1.2.1
   - jinja2=2.8
+  - numba=0.46.0
   - numexpr=2.6.2
   - numpy=1.13.3
   - openpyxl=2.5.7
diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml
index 2bd11c9030325..7fa9dee7445a6 100644
--- a/ci/deps/azure-windows-36.yaml
+++ b/ci/deps/azure-windows-36.yaml
@@ -17,6 +17,7 @@ dependencies:
   - bottleneck
   - fastparquet>=0.3.2
   - matplotlib=3.0.2
+  - numba
   - numexpr
   - numpy=1.15.*
   - openpyxl
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index a099e6731e2dd..ca61b98d0b416 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -344,6 +344,7 @@ def test_multiple_agg_funcs(self, func, window_size, expected_vals):
         tm.assert_frame_equal(result, expected)
 
 
+@td.skip_if_no('numba', '0.46.0')
 class TestEngine:
     def test_invalid_engine(self):
         with pytest.raises(

From 135f2ad9d236a5d1fba0defd1a210d4e40f20c53 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Thu, 12 Dec 2019 22:21:13 -0800
Subject: [PATCH 11/44] black

---
 pandas/core/window/numba_.py    | 17 +++++++++--------
 pandas/tests/window/test_api.py |  2 +-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index bacefd51724f3..435e29b12a9ff 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -8,6 +8,15 @@
 def _generate_numba_apply_func(
     args: Tuple, kwargs: Dict, func: Callable, engine_kwargs: Optional[Dict]
 ):
+    """
+    Generate a numba jitted apply function specified by values from engine_kwargs.
+
+    1. jit the user's function
+    2. Return a rolling apply function with the jitted function inline
+
+    Configurations specified in engine_kwargs apply to both the user's
+    function _AND_ the rolling apply function.
+    """
     numba = import_optional_dependency("numba")
 
     if engine_kwargs is None:
@@ -29,14 +38,6 @@ def _generate_numba_apply_func(
         loop_range = range
 
     def make_rolling_apply(func):
-        """
-        1. jit the user's function
-        2. Return a rolling apply function with the jitted function inline
-
-        Configurations specified in engine_kwargs apply to both the user's
-        function _AND_ the rolling apply function.
-        """
-
         @numba.generated_jit(nopython=nopython)
         def numba_func(window, *_args):
             if getattr(np, func.__name__, False) is func:
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index ca61b98d0b416..518da688d72bf 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -344,7 +344,7 @@ def test_multiple_agg_funcs(self, func, window_size, expected_vals):
         tm.assert_frame_equal(result, expected)
 
 
-@td.skip_if_no('numba', '0.46.0')
+@td.skip_if_no("numba", "0.46.0")
 class TestEngine:
     def test_invalid_engine(self):
         with pytest.raises(

From 34a5687d4670f5d7d1b0288013d08718421b3f0f Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Thu, 12 Dec 2019 22:32:21 -0800
Subject: [PATCH 12/44] don't rejit a user's jitted function

---
 pandas/core/window/numba_.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index 435e29b12a9ff..e02597e25c145 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -38,21 +38,27 @@ def _generate_numba_apply_func(
         loop_range = range
 
     def make_rolling_apply(func):
-        @numba.generated_jit(nopython=nopython)
-        def numba_func(window, *_args):
-            if getattr(np, func.__name__, False) is func:
 
-                def impl(window, *_args):
-                    return func(window, *_args)
+        if isinstance(func, numba.targets.registry.CPUDispatcher):
+            # Don't jit a user passed jitted function
+            numba_func = func
+        else:
 
-                return impl
-            else:
-                jf = numba.jit(func, nopython=nopython)
+            @numba.generated_jit(nopython=nopython)
+            def numba_func(window, *_args):
+                if getattr(np, func.__name__, False) is func:
 
-                def impl(window, *_args):
-                    return jf(window, *_args)
+                    def impl(window, *_args):
+                        return func(window, *_args)
 
-                return impl
+                    return impl
+                else:
+                    jf = numba.jit(func, nopython=nopython)
+
+                    def impl(window, *_args):
+                        return jf(window, *_args)
+
+                    return impl
 
         @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
         def roll_apply(

From 6da8199f446ea0590c4596f746983e7a1da97277 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Thu, 12 Dec 2019 22:47:39 -0800
Subject: [PATCH 13/44] Add numba/cython comparison test

---
 pandas/tests/window/test_numba.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 pandas/tests/window/test_numba.py

diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
new file mode 100644
index 0000000000000..eeee83635e31a
--- /dev/null
+++ b/pandas/tests/window/test_numba.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+from pandas import Series
+import pandas.util.testing as tm
+
+
+@td.skip_if_no("numba", "0.46.0")
+class TestApply:
+    @pytest.mark.parametrize("nogil", [True, False])
+    @pytest.mark.parametrize("parallel", [True, False])
+    @pytest.mark.parametrize("nopython", [True, False])
+    def test_numba_vs_cython(self, nogil, parallel, nopython):
+        def f(x, *args):
+            return np.sqrt(x) + np.sum(args) + 1
+
+        engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
+        args = (2,)
+
+        s = Series(range(10))
+        result = s.rolling(2).apply(
+            f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True
+        )
+        expected = s.rolling(2).apply(f, engine="cython", args=args, raw=True)
+        tm.assert_series_equal(result, expected)

From 54e74d1094e47b3b8fd8b552d44b98c45381f63b Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 21:31:17 -0800
Subject: [PATCH 14/44] Remove typing for now

---
 pandas/core/window/rolling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 86bb51f341731..be7c351dadf54 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1270,7 +1270,7 @@ def count(self):
 
     def apply(
         self,
-        func: Callable,
+        func,
         raw: bool = False,
         engine: str = "cython",
         engine_kwargs: Optional[Dict] = None,

From 04d353098259896cac0c2eb24487db51548661e7 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 21:42:21 -0800
Subject: [PATCH 15/44] Remove sub description for doc failures?

---
 pandas/core/window/rolling.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index be7c351dadf54..4e20bbf34ac31 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1241,20 +1241,18 @@ def count(self):
           If you are just applying a NumPy reduction function this will
           achieve much better performance.
     engine : str, default 'cython'
-        Execution engine for the applied function.
         * ``'cython'`` : Runs rolling apply through C-extensions from cython.
         * ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
           Only available when ``raw`` is set to ``True``.
     engine_kwargs : dict, default None
-        Arguments to specify for the execution engine.
         * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
         * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
           and ``parallel``. The default ``engine_kwargs`` for the ``'numba'`` engine is
-          ``{'nopython': True, 'nogil': False, 'parallel': False}``
+          ``{'nopython': True, 'nogil': False, 'parallel': False}``.
     args : tuple, default None
-        Positional arguments to be passed into func
+        Positional arguments to be passed into func.
     kwargs : dict, default None
-        Keyword arguments to be passed into func
+        Keyword arguments to be passed into func.
 
     Returns
     -------

From 4bbf5872365d4515bed285097c915d5e6db03d12 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 22:03:02 -0800
Subject: [PATCH 16/44] Fix test function

---
 pandas/tests/window/test_numba.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index eeee83635e31a..b60ca94d9a799 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -14,7 +14,10 @@ class TestApply:
     @pytest.mark.parametrize("nopython", [True, False])
     def test_numba_vs_cython(self, nogil, parallel, nopython):
         def f(x, *args):
-            return np.sqrt(x) + np.sum(args) + 1
+            arg_sum = 0
+            for arg in args:
+                arg_sum += arg
+            return np.mean(x) + arg_sum
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
         args = (2,)

From f849bc7006be8d60f0739b173783f9d783c262ab Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 22:16:23 -0800
Subject: [PATCH 17/44] test user predefined jit function, clarify docstring

---
 pandas/core/window/rolling.py     |  3 ++-
 pandas/tests/window/conftest.py   | 15 +++++++++++++++
 pandas/tests/window/test_numba.py | 11 +++++++----
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 4e20bbf34ac31..8482fa52e8921 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1247,7 +1247,8 @@ def count(self):
     engine_kwargs : dict, default None
         * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
         * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
-          and ``parallel``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+          and ``parallel`` dictionary keys. The values must either be ``True`` or ``False``.
+          The default ``engine_kwargs`` for the ``'numba'`` engine is
           ``{'nopython': True, 'nogil': False, 'parallel': False}``.
     args : tuple, default None
         Positional arguments to be passed into func.
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index 7ea4be25ca2a6..8f40278d95d12 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -47,3 +47,18 @@ def center(request):
 @pytest.fixture(params=[None, 1])
 def min_periods(request):
     return request.param
+
+
+@pytest.fixture(params=[True, False])
+def parallel(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def nogil(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def nopython(request):
+    return request.param
diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index b60ca94d9a799..3a85cff9a73eb 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -9,16 +9,19 @@
 
 @td.skip_if_no("numba", "0.46.0")
 class TestApply:
-    @pytest.mark.parametrize("nogil", [True, False])
-    @pytest.mark.parametrize("parallel", [True, False])
-    @pytest.mark.parametrize("nopython", [True, False])
-    def test_numba_vs_cython(self, nogil, parallel, nopython):
+    @pytest.mark.parametrize("jit", [True, False])
+    def test_numba_vs_cython(self, jit, nogil, parallel, nopython):
         def f(x, *args):
             arg_sum = 0
             for arg in args:
                 arg_sum += arg
             return np.mean(x) + arg_sum
 
+        if jit:
+            import numba
+
+            f = numba.jit(f)
+
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
         args = (2,)
 

From 0c30e48b2fb8f7864d46728c3204f49b2628f783 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 22:21:03 -0800
Subject: [PATCH 18/44] Apply engine kwargs to function as well

---
 pandas/core/window/numba_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index e02597e25c145..aa0472f76068a 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -44,7 +44,7 @@ def make_rolling_apply(func):
             numba_func = func
         else:
 
-            @numba.generated_jit(nopython=nopython)
+            @numba.generated_jit(nopython=nopython, nogil=nogil, parallel=parallel)
             def numba_func(window, *_args):
                 if getattr(np, func.__name__, False) is func:
 

From c4c952ef4f096c5d19ae225c0d383dead63e8b78 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 22:22:41 -0800
Subject: [PATCH 19/44] Clairfy documentation

---
 pandas/core/window/rolling.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 8482fa52e8921..db832447f8830 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1232,7 +1232,8 @@ def count(self):
     ----------
     func : function
         Must produce a single value from an ndarray input if ``raw=True``
-        or a single value from a Series if ``raw=False``.
+        or a single value from a Series if ``raw=False``. Can also accept a
+        Numba JIT function with ``engine='numba'`` specified.
     raw : bool, default None
         * ``False`` : passes each row or column as a Series to the
           function.

From 8645976e71a3dc34a3b488957cce4c82167651e7 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 22:25:05 -0800
Subject: [PATCH 20/44] Clarify what engine_kwargs applies to

---
 pandas/core/window/rolling.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index db832447f8830..37b1ad0e0f521 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1250,7 +1250,8 @@ def count(self):
         * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
           and ``parallel`` dictionary keys. The values must either be ``True`` or ``False``.
           The default ``engine_kwargs`` for the ``'numba'`` engine is
-          ``{'nopython': True, 'nogil': False, 'parallel': False}``.
+          ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be applied
+          to both the ``func`` and the ``apply`` rolling aggregation.
     args : tuple, default None
         Positional arguments to be passed into func.
     kwargs : dict, default None

From 987c91697fc747fe7f8e951d53fbe0cf3476cbc8 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 23:20:35 -0800
Subject: [PATCH 21/44] Start section for numba rolling apply

---
 doc/source/user_guide/computation.rst | 19 +++++++++++++++++++
 pandas/core/window/rolling.py         |  4 ++++
 2 files changed, 23 insertions(+)

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 627a83b7359bb..7e618e9363c08 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -321,6 +321,11 @@ We provide a number of common statistical functions:
     :meth:`~Rolling.cov`, Unbiased covariance (binary)
     :meth:`~Rolling.corr`, Correlation (binary)
 
+.. _stats.rolling_apply:
+
+Rolling Apply
+~~~~~~~~~~~~~
+
 The :meth:`~Rolling.apply` function takes an extra ``func`` argument and performs
 generic rolling computations. The ``func`` argument should be a single function
 that produces a single value from an ndarray input. Suppose we wanted to
@@ -334,6 +339,20 @@ compute the mean absolute deviation on a rolling basis:
    @savefig rolling_apply_ex.png
    s.rolling(window=60).apply(mad, raw=True).plot(style='k')
 
+Additionally, :meth:`~Rolling.apply` can leverage `Numba <https://numba.pydata.org/>`__
+if installed as an optional dependency as the execution engine of the apply aggregation using the
+``engine='numba'`` and ``engine_kwargs`` arguments (``raw`` must also be set to ``True``).
+Numba will be applied in potentially two routines:
+
+1. If ``func`` is a standard Python function, the engine will JIT the passed function. ``func``
+can also be a pre-JIT function in which case the engine will not JIT the function again.
+2. The engine will JIT the for loop where the apply function is applied to each window.
+
+The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the
+`numba.jit decorator <https://numba.pydata.org/numba-doc/latest/reference/jit-compilation.html#numba.jit>`__.
+These keyword arguments will be applied to *both* the passed function (if a standard Python function)
+and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported.
+
 .. _stats.rolling_window:
 
 Rolling windows
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 37b1ad0e0f521..c00b88956325e 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1266,6 +1266,10 @@ def count(self):
     --------
     Series.%(name)s : Series %(name)s.
     DataFrame.%(name)s : DataFrame %(name)s.
+    
+    Notes
+    -----
+    See :ref:`stats.rolling_window` for extended documentation on the Numba engine.
     """
     )
 

From b775684c9be43ba203f2777c7ddd013a271718be Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 16 Dec 2019 23:28:40 -0800
Subject: [PATCH 22/44] Lint

---
 pandas/core/window/rolling.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index c00b88956325e..d3da2cbbbbf42 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1248,10 +1248,10 @@ def count(self):
     engine_kwargs : dict, default None
         * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
         * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
-          and ``parallel`` dictionary keys. The values must either be ``True`` or ``False``.
-          The default ``engine_kwargs`` for the ``'numba'`` engine is
-          ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be applied
-          to both the ``func`` and the ``apply`` rolling aggregation.
+          and ``parallel`` dictionary keys. The values must either be ``True`` or
+          ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+          ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
+          applied to both the ``func`` and the ``apply`` rolling aggregation.
     args : tuple, default None
         Positional arguments to be passed into func.
     kwargs : dict, default None
@@ -1266,7 +1266,7 @@ def count(self):
     --------
     Series.%(name)s : Series %(name)s.
     DataFrame.%(name)s : DataFrame %(name)s.
-    
+
     Notes
     -----
     See :ref:`stats.rolling_window` for extended documentation on the Numba engine.

From 2e04e602ca860872b8409c80b49a68dec37e059c Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 17 Dec 2019 00:01:53 -0800
Subject: [PATCH 23/44] clarify note

---
 pandas/core/window/rolling.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index d3da2cbbbbf42..8e198e60933a0 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1269,7 +1269,8 @@ def count(self):
 
     Notes
     -----
-    See :ref:`stats.rolling_window` for extended documentation on the Numba engine.
+    See :ref:`stats.rolling_window` for extended documentation and performance
+    considerations for the Numba engine.
     """
     )
 

From 0c140330b36ad25ff86d1b4ce3d615d050517928 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Wed, 18 Dec 2019 23:27:47 -0800
Subject: [PATCH 24/44] Add apply function cache to save compiled numba
 functions

---
 pandas/core/window/common.py  |  1 +
 pandas/core/window/numba_.py  | 10 +++++++++-
 pandas/core/window/rolling.py | 24 +++++++++++++++++++-----
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
index c7d856e9a1e88..0e7a877cbc69b 100644
--- a/pandas/core/window/common.py
+++ b/pandas/core/window/common.py
@@ -70,6 +70,7 @@ def _apply(
         floor: int = 1,
         is_weighted: bool = False,
         name: Optional[str] = None,
+        use_numba_cache: Optional = False,
         **kwargs,
     ):
         """
diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index aa0472f76068a..d4b693b7dd988 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -6,7 +6,11 @@
 
 
 def _generate_numba_apply_func(
-    args: Tuple, kwargs: Dict, func: Callable, engine_kwargs: Optional[Dict]
+    args: Tuple,
+    kwargs: Dict,
+    func: Callable,
+    engine_kwargs: Optional[Dict],
+    function_cache: Dict,
 ):
     """
     Generate a numba jitted apply function specified by values from engine_kwargs.
@@ -37,6 +41,10 @@ def _generate_numba_apply_func(
     else:
         loop_range = range
 
+    # Return an already compiled version of roll_apply if available
+    if func in function_cache:
+        return function_cache[func]
+
     def make_rolling_apply(func):
 
         if isinstance(func, numba.targets.registry.CPUDispatcher):
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 8e198e60933a0..62d9605d30bb1 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -93,6 +93,7 @@ def __init__(
         self.win_freq = None
         self.axis = obj._get_axis_number(axis) if axis is not None else None
         self.validate()
+        self._numba_func_cache = dict()
 
     @property
     def _constructor(self):
@@ -443,6 +444,7 @@ def _apply(
         floor: int = 1,
         is_weighted: bool = False,
         name: Optional[str] = None,
+        use_numba_cache: Optional[bool] = False,
         **kwargs,
     ):
         """
@@ -455,10 +457,11 @@ def _apply(
         func : callable function to apply
         center : bool
         require_min_periods : int
-        floor: int
-        is_weighted
-        name: str,
+        floor : int
+        is_weighted : bool
+        name : str,
             compatibility with groupby.rolling
+        use_numba_cache : bool
         **kwargs
             additional arguments for rolling function and window function
 
@@ -533,6 +536,9 @@ def calc(x):
                     result = calc(values)
                     result = np.asarray(result)
 
+            if use_numba_cache:
+                self._numba_func_cache[name] = func
+
             if center:
                 result = self._center_window(result, window)
 
@@ -1303,13 +1309,21 @@ def apply(
         elif engine == "numba":
             if raw is False:
                 raise ValueError("raw must be `True` when using the numba engine")
-            apply_func = _generate_numba_apply_func(args, kwargs, func, engine_kwargs)
+            apply_func = _generate_numba_apply_func(
+                args, kwargs, func, engine_kwargs, self._numba_func_cache
+            )
         else:
             raise ValueError("engine must be either 'numba' or 'cython'")
 
         # TODO: Why do we always pass center=False?
         # name=func for WindowGroupByMixin._apply
-        return self._apply(apply_func, center=False, floor=0, name=func)
+        return self._apply(
+            apply_func,
+            center=False,
+            floor=0,
+            name=func,
+            use_numba_cache=engine == "numba",
+        )
 
     def _generate_cython_apply_func(self, args, kwargs, raw, offset, func):
         from pandas import Series

From c7106dc47535d34b82ad388271320d762f596014 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Wed, 18 Dec 2019 23:51:36 -0800
Subject: [PATCH 25/44] Add performance example

---
 doc/source/user_guide/computation.rst | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 7e618e9363c08..643a4e9ca69b4 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -353,6 +353,30 @@ The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be
 These keyword arguments will be applied to *both* the passed function (if a standard Python function)
 and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported.
 
+.. note::
+
+   In terms of performance, **the first time a function is run using the Numba engine will be slow**
+   as Numba will have some function compilation overhead. However, `rolling` objects will cache
+   the function and subsequent calls will be fast. In general, the Numba engine is performant with
+   a larger amount of data points (e.g. 1+ million).
+
+.. code-block:: ipython
+
+   In [1]: data = pd.Series(range(1000000))
+   
+   In [2]: roll = data.rolling(10)
+
+   In [3]: f = lambda x: np.sum(x) + 5
+   # Ran the first time, compilation time will affect performance
+   In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True)
+   1.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
+   # Function is cached and performance will improve
+   In [5]: %timeit roll.apply(f, engine='numba', raw=True)
+   188 ms ± 1.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [6]: %timeit roll.apply(f, engine='cython', raw=True)
+   3.92 s ± 59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
 .. _stats.rolling_window:
 
 Rolling windows

From 2846faf0e85e5487e1d75d66950870e941dfb976 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Fri, 20 Dec 2019 20:01:31 -0800
Subject: [PATCH 26/44] Remove whitespace

---
 doc/source/user_guide/computation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 643a4e9ca69b4..67b0e5c0098b4 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -363,7 +363,7 @@ and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel``
 .. code-block:: ipython
 
    In [1]: data = pd.Series(range(1000000))
-   
+
    In [2]: roll = data.rolling(10)
 
    In [3]: f = lambda x: np.sum(x) + 5

From 5a645c0a54ef6f32b4a6d59caabb7673547be90b Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sat, 21 Dec 2019 20:31:45 -0800
Subject: [PATCH 27/44] Address lint errors and separate apply tests

---
 doc/source/user_guide/computation.rst         |   3 +-
 pandas/core/window/numba_.py                  |   5 +-
 pandas/tests/window/conftest.py               |   5 +
 pandas/tests/window/test_apply.py             | 114 ++++++++++++++++++
 pandas/tests/window/test_moments.py           |  51 --------
 pandas/tests/window/test_timeseries_window.py |  30 -----
 6 files changed, 125 insertions(+), 83 deletions(-)
 create mode 100644 pandas/tests/window/test_apply.py

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 67b0e5c0098b4..259c274cfc369 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -366,7 +366,8 @@ and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel``
 
    In [2]: roll = data.rolling(10)
 
-   In [3]: f = lambda x: np.sum(x) + 5
+   In [3]: def f(x):
+      ...:     return np.sum(x) + 5
    # Ran the first time, compilation time will affect performance
    In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True)
    1.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index d4b693b7dd988..284d566a14949 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -1,3 +1,4 @@
+import types
 from typing import Callable, Dict, Optional, Tuple
 
 import numpy as np
@@ -54,7 +55,9 @@ def make_rolling_apply(func):
 
             @numba.generated_jit(nopython=nopython, nogil=nogil, parallel=parallel)
             def numba_func(window, *_args):
-                if getattr(np, func.__name__, False) is func:
+                if getattr(np, func.__name__, False) is func or isinstance(
+                    func, types.BuiltinFunctionType
+                ):
 
                     def impl(window, *_args):
                         return func(window, *_args)
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index 8f40278d95d12..21b57d35d04a7 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -62,3 +62,8 @@ def nogil(request):
 @pytest.fixture(params=[True, False])
 def nopython(request):
     return request.param
+
+
+@pytest.fixture(params=["numba", "cython"])
+def engine(request):
+    return request.param
diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py
new file mode 100644
index 0000000000000..965d8e3616f4c
--- /dev/null
+++ b/pandas/tests/window/test_apply.py
@@ -0,0 +1,114 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Series, Timestamp, date_range
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize("bad_raw", [None, 1, 0])
+def test_rolling_apply_invalid_raw(bad_raw):
+    with pytest.raises(ValueError, match="raw parameter must be `True` or `False`"):
+        Series(range(3)).rolling(1).apply(len, raw=bad_raw)
+
+
+def test_rolling_apply_out_of_bounds(engine, raw):
+    # gh-1850
+    if engine == "numba":
+        raw = True
+
+    vals = Series([1, 2, 3, 4])
+
+    result = vals.rolling(10).apply(np.sum, engine=engine, raw=raw)
+    assert result.isna().all()
+
+    result = vals.rolling(10, min_periods=1).apply(np.sum, engine=engine, raw=raw)
+    expected = Series([1, 3, 6, 10], dtype=float)
+    tm.assert_almost_equal(result, expected)
+
+
+@pytest.mark.parametrize("window", [2, "2s"])
+def test_rolling_apply_with_pandas_objects(window):
+    # 5071
+    df = DataFrame(
+        {"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)},
+        index=date_range("20130101", periods=5, freq="s"),
+    )
+
+    # we have an equal spaced timeseries index
+    # so simulate removing the first period
+    def f(x):
+        if x.index[0] == df.index[0]:
+            return np.nan
+        return x.iloc[-1]
+
+    result = df.rolling(window).apply(f, raw=False)
+    expected = df.iloc[2:].reindex_like(df)
+    tm.assert_frame_equal(result, expected)
+
+    with pytest.raises(AttributeError):
+        df.rolling(window).apply(f, raw=True)
+
+
+def test_rolling_apply(engine, raw):
+    if engine == "numba":
+        raw = True
+    expected = Series([], dtype="float64")
+    result = expected.rolling(10).apply(lambda x: x.mean(), engine=engine, raw=raw)
+    tm.assert_series_equal(result, expected)
+
+    # gh-8080
+    s = Series([None, None, None])
+    result = s.rolling(2, min_periods=0).apply(lambda x: len(x), engine=engine, raw=raw)
+    expected = Series([1.0, 2.0, 2.0])
+    tm.assert_series_equal(result, expected)
+
+    result = s.rolling(2, min_periods=0).apply(len, engine=engine, raw=raw)
+    tm.assert_series_equal(result, expected)
+
+
+def test_all_apply(engine, raw):
+    if engine == "numba":
+        raw = True
+
+    df = (
+        DataFrame(
+            {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
+        ).set_index("A")
+        * 2
+    )
+    er = df.rolling(window=1)
+    r = df.rolling(window="1s")
+
+    result = r.apply(lambda x: 1, engine=engine, raw=raw)
+    expected = er.apply(lambda x: 1, engine=engine, raw=raw)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_ragged_apply(engine, raw):
+    if engine == "numba":
+        raw = True
+
+    df = DataFrame({"B": range(5)})
+    df.index = [
+        Timestamp("20130101 09:00:00"),
+        Timestamp("20130101 09:00:02"),
+        Timestamp("20130101 09:00:03"),
+        Timestamp("20130101 09:00:05"),
+        Timestamp("20130101 09:00:06"),
+    ]
+
+    f = lambda x: 1
+    result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw)
+    expected = df.copy()
+    expected["B"] = 1.0
+    tm.assert_frame_equal(result, expected)
+
+    result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw)
+    expected = df.copy()
+    expected["B"] = 1.0
+    tm.assert_frame_equal(result, expected)
+
+    result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw)
+    expected = df.copy()
+    expected["B"] = 1.0
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/window/test_moments.py b/pandas/tests/window/test_moments.py
index 2c65c9e2ac82c..756f1a37d9a9b 100644
--- a/pandas/tests/window/test_moments.py
+++ b/pandas/tests/window/test_moments.py
@@ -674,57 +674,6 @@ def f(x):
 
         self._check_moment_func(np.mean, name="apply", func=f, raw=raw)
 
-        expected = Series([], dtype="float64")
-        result = expected.rolling(10).apply(lambda x: x.mean(), raw=raw)
-        tm.assert_series_equal(result, expected)
-
-        # gh-8080
-        s = Series([None, None, None])
-        result = s.rolling(2, min_periods=0).apply(lambda x: len(x), raw=raw)
-        expected = Series([1.0, 2.0, 2.0])
-        tm.assert_series_equal(result, expected)
-
-        result = s.rolling(2, min_periods=0).apply(len, raw=raw)
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("bad_raw", [None, 1, 0])
-    def test_rolling_apply_invalid_raw(self, bad_raw):
-        with pytest.raises(ValueError, match="raw parameter must be `True` or `False`"):
-            Series(range(3)).rolling(1).apply(len, raw=bad_raw)
-
-    def test_rolling_apply_out_of_bounds(self, raw):
-        # gh-1850
-        vals = pd.Series([1, 2, 3, 4])
-
-        result = vals.rolling(10).apply(np.sum, raw=raw)
-        assert result.isna().all()
-
-        result = vals.rolling(10, min_periods=1).apply(np.sum, raw=raw)
-        expected = pd.Series([1, 3, 6, 10], dtype=float)
-        tm.assert_almost_equal(result, expected)
-
-    @pytest.mark.parametrize("window", [2, "2s"])
-    def test_rolling_apply_with_pandas_objects(self, window):
-        # 5071
-        df = pd.DataFrame(
-            {"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)},
-            index=pd.date_range("20130101", periods=5, freq="s"),
-        )
-
-        # we have an equal spaced timeseries index
-        # so simulate removing the first period
-        def f(x):
-            if x.index[0] == df.index[0]:
-                return np.nan
-            return x.iloc[-1]
-
-        result = df.rolling(window).apply(f, raw=False)
-        expected = df.iloc[2:].reindex_like(df)
-        tm.assert_frame_equal(result, expected)
-
-        with pytest.raises(AttributeError):
-            df.rolling(window).apply(f, raw=True)
-
     def test_rolling_std(self, raw):
         self._check_moment_func(lambda x: np.std(x, ddof=1), name="std", raw=raw)
         self._check_moment_func(
diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py
index 46582b4b50c84..c0d47fc2ca624 100644
--- a/pandas/tests/window/test_timeseries_window.py
+++ b/pandas/tests/window/test_timeseries_window.py
@@ -566,26 +566,6 @@ def test_freqs_ops(self, freq, op, result_data):
 
         tm.assert_series_equal(result, expected)
 
-    def test_ragged_apply(self, raw):
-
-        df = self.ragged
-
-        f = lambda x: 1
-        result = df.rolling(window="1s", min_periods=1).apply(f, raw=raw)
-        expected = df.copy()
-        expected["B"] = 1.0
-        tm.assert_frame_equal(result, expected)
-
-        result = df.rolling(window="2s", min_periods=1).apply(f, raw=raw)
-        expected = df.copy()
-        expected["B"] = 1.0
-        tm.assert_frame_equal(result, expected)
-
-        result = df.rolling(window="5s", min_periods=1).apply(f, raw=raw)
-        expected = df.copy()
-        expected["B"] = 1.0
-        tm.assert_frame_equal(result, expected)
-
     def test_all(self):
 
         # simple comparison of integer vs time-based windowing
@@ -614,16 +594,6 @@ def test_all(self):
         expected = er.quantile(0.5)
         tm.assert_frame_equal(result, expected)
 
-    def test_all_apply(self, raw):
-
-        df = self.regular * 2
-        er = df.rolling(window=1)
-        r = df.rolling(window="1s")
-
-        result = r.apply(lambda x: 1, raw=raw)
-        expected = er.apply(lambda x: 1, raw=raw)
-        tm.assert_frame_equal(result, expected)
-
     def test_all2(self):
 
         # more sophisticated comparison of integer vs.

From 6bac000ba2961544e01ff2f42d98c47a7048d5e9 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 22 Dec 2019 10:28:53 -0800
Subject: [PATCH 28/44] Add whatsnew note

---
 doc/source/whatsnew/v1.0.0.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index a15d5b319fc82..e829b559bb8b5 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -169,6 +169,16 @@ You can use the alias ``"boolean"`` as well.
    s = pd.Series([True, False, None], dtype="boolean")
    s
 
+.. _whatsnew_1000.numba_rolling_apply:
+
+Using Numba in ``rolling.apply``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We've added an ``engine`` keyword to :meth:`~Rolling.apply` that allows the user to execute the
+routine using `Numba <https://numba.pydata.org/>`__ instead of Cython. Using the Numba engine
+can yield significant performance gains if the apply function can operate on numpy arrays and
+the data set is larger. For more details, see :ref:`rolling apply documentation <stats.rolling_apply>`
+
 .. _whatsnew_1000.custom_window:
 
 Defining custom windows for rolling operations
@@ -428,6 +438,8 @@ Optional libraries below the lowest tested version may still work, but are not c
 +-----------------+-----------------+---------+
 | matplotlib      | 2.2.2           |         |
 +-----------------+-----------------+---------+
+| numba           | 0.46.0          |         |
++-----------------+-----------------+---------+
 | openpyxl        | 2.5.7           |    X    |
 +-----------------+-----------------+---------+
 | pyarrow         | 0.12.0          |    X    |

From 6f1c73f955285d027355283017238237b11f013f Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 22 Dec 2019 10:41:38 -0800
Subject: [PATCH 29/44] Skip apply tests for numba not installed, lint

---
 doc/source/user_guide/computation.rst |  2 +-
 pandas/tests/window/conftest.py       | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 259c274cfc369..043e4d761d477 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -369,7 +369,7 @@ and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel``
    In [3]: def f(x):
       ...:     return np.sum(x) + 5
    # Ran the first time, compilation time will affect performance
-   In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True)
+   In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True)  # noqa: E225
    1.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
    # Function is cached and performance will improve
    In [5]: %timeit roll.apply(f, engine='numba', raw=True)
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index 21b57d35d04a7..77846b0829fd3 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -1,3 +1,10 @@
+try:
+    import numba  # noqa
+
+    _HAVE_NUMBA = True
+except ImportError:
+    _HAVE_NUMBA = False
+
 import pytest
 
 
@@ -64,6 +71,14 @@ def nopython(request):
     return request.param
 
 
-@pytest.fixture(params=["numba", "cython"])
+@pytest.fixture(
+    params=[
+        pytest.param(
+            "numba",
+            marks=pytest.mark.skipif(not _HAVE_NUMBA, reason="numba is not installed"),
+        ),
+        "cython",
+    ]
+)
 def engine(request):
     return request.param

From a8903379cd56a60ae927c9897918964bd46cb759 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 22 Dec 2019 11:59:28 -0800
Subject: [PATCH 30/44] Add typing

---
 pandas/core/window/rolling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 62d9605d30bb1..14137f8cd4ce9 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -93,7 +93,7 @@ def __init__(
         self.win_freq = None
         self.axis = obj._get_axis_number(axis) if axis is not None else None
         self.validate()
-        self._numba_func_cache = dict()
+        self._numba_func_cache: Dict = dict()
 
     @property
     def _constructor(self):

From 0a9071cd72d9a958aa6d02eedd65bc0b9329d5ae Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 22 Dec 2019 12:58:04 -0800
Subject: [PATCH 31/44] Add more typing

---
 pandas/core/window/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
index 0e7a877cbc69b..80f6de1aecd74 100644
--- a/pandas/core/window/common.py
+++ b/pandas/core/window/common.py
@@ -70,7 +70,7 @@ def _apply(
         floor: int = 1,
         is_weighted: bool = False,
         name: Optional[str] = None,
-        use_numba_cache: Optional = False,
+        use_numba_cache: Optional[bool] = False,
         **kwargs,
     ):
         """

From 9d8d40b86b2b8ad2ff89a664f4463da4eedb0e81 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 22 Dec 2019 18:18:03 -0800
Subject: [PATCH 32/44] Formatting cleanups

---
 doc/source/user_guide/computation.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 043e4d761d477..84c0e960a14f8 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -356,13 +356,13 @@ and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel``
 .. note::
 
    In terms of performance, **the first time a function is run using the Numba engine will be slow**
-   as Numba will have some function compilation overhead. However, `rolling` objects will cache
+   as Numba will have some function compilation overhead. However, ``rolling`` objects will cache
    the function and subsequent calls will be fast. In general, the Numba engine is performant with
    a larger amount of data points (e.g. 1+ million).
 
 .. code-block:: ipython
 
-   In [1]: data = pd.Series(range(1000000))
+   In [1]: data = pd.Series(range(1_000_000))
 
    In [2]: roll = data.rolling(10)
 

From a429206771582227958c914a3b2756a85fb96260 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 23 Dec 2019 17:30:53 -0800
Subject: [PATCH 33/44] Address Jeff's comments

---
 doc/source/user_guide/computation.rst | 13 +++++----
 doc/source/whatsnew/v1.0.0.rst        |  1 +
 pandas/core/window/numba_.py          | 21 +++++++++++++--
 pandas/core/window/rolling.py         | 13 +++++++--
 pandas/tests/window/conftest.py       | 13 ++++++---
 pandas/tests/window/test_api.py       |  2 +-
 pandas/tests/window/test_numba.py     | 39 +++++++++++++++++++++++++++
 7 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 84c0e960a14f8..31ab87e831d30 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -339,19 +339,22 @@ compute the mean absolute deviation on a rolling basis:
    @savefig rolling_apply_ex.png
    s.rolling(window=60).apply(mad, raw=True).plot(style='k')
 
+.. versionadded:: 1.0
+
 Additionally, :meth:`~Rolling.apply` can leverage `Numba <https://numba.pydata.org/>`__
-if installed as an optional dependency as the execution engine of the apply aggregation using the
+if installed as an optional dependency. The apply aggregation can be executed using Numba by specifying
 ``engine='numba'`` and ``engine_kwargs`` arguments (``raw`` must also be set to ``True``).
 Numba will be applied in potentially two routines:
 
-1. If ``func`` is a standard Python function, the engine will JIT the passed function. ``func``
-can also be a pre-JIT function in which case the engine will not JIT the function again.
+1. If ``func`` is a standard Python function, the engine will `JIT <http://numba.pydata.org/numba-doc/latest/user/overview.html>`__
+the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again.
 2. The engine will JIT the for loop where the apply function is applied to each window.
 
 The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the
 `numba.jit decorator <https://numba.pydata.org/numba-doc/latest/reference/jit-compilation.html#numba.jit>`__.
 These keyword arguments will be applied to *both* the passed function (if a standard Python function)
-and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported.
+and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported,
+and their default values are set to ``False``, ``True`` and ``False`` respectively.
 
 .. note::
 
@@ -368,7 +371,7 @@ and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel``
 
    In [3]: def f(x):
       ...:     return np.sum(x) + 5
-   # Ran the first time, compilation time will affect performance
+   # Run the first time, compilation time will affect performance
    In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True)  # noqa: E225
    1.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
    # Function is cached and performance will improve
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index f8d8c0a3b593a..0b18983ba10f1 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -178,6 +178,7 @@ We've added an ``engine`` keyword to :meth:`~Rolling.apply` that allows the user
 routine using `Numba <https://numba.pydata.org/>`__ instead of Cython. Using the Numba engine
 can yield significant performance gains if the apply function can operate on numpy arrays and
 the data set is larger. For more details, see :ref:`rolling apply documentation <stats.rolling_apply>`
+(:issue:`28987`)
 
 .. _whatsnew_1000.custom_window:
 
diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index 284d566a14949..1fa4483e2be5e 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -6,7 +6,7 @@
 from pandas.compat._optional import import_optional_dependency
 
 
-def _generate_numba_apply_func(
+def generate_numba_apply_func(
     args: Tuple,
     kwargs: Dict,
     func: Callable,
@@ -21,11 +21,28 @@ def _generate_numba_apply_func(
 
     Configurations specified in engine_kwargs apply to both the user's
     function _AND_ the rolling apply function.
+
+    Parameters
+    ----------
+    args : tuple
+        *args to be passed into the function
+    kwargs : dict
+        **kwargs to be passed into the function
+    func : function
+        function to be applied to each window and will be JITed
+    engine_kwargs : dict
+        dictionary of arguments to be passed into numba.jit
+    function_cache : dict
+        dictionary of cached apply function to avoid re-compiling the apply loop
+
+    Returns
+    -------
+    Numba function
     """
     numba = import_optional_dependency("numba")
 
     if engine_kwargs is None:
-        engine_kwargs = {"nopython": True, "nogil": False, "parallel": False}
+        engine_kwargs = {}
 
     nopython = engine_kwargs.get("nopython", True)
     nogil = engine_kwargs.get("nogil", False)
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 98be912f8106e..43cf46d97c3e5 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -54,7 +54,7 @@
     FixedWindowIndexer,
     VariableWindowIndexer,
 )
-from pandas.core.window.numba_ import _generate_numba_apply_func
+from pandas.core.window.numba_ import generate_numba_apply_func
 
 
 class _Window(PandasObject, ShallowMixin, SelectionMixin):
@@ -1240,6 +1240,9 @@ def count(self):
         Must produce a single value from an ndarray input if ``raw=True``
         or a single value from a Series if ``raw=False``. Can also accept a
         Numba JIT function with ``engine='numba'`` specified.
+        
+        .. versionchanged:: 1.0.0
+        
     raw : bool, default None
         * ``False`` : passes each row or column as a Series to the
           function.
@@ -1251,6 +1254,9 @@ def count(self):
         * ``'cython'`` : Runs rolling apply through C-extensions from cython.
         * ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
           Only available when ``raw`` is set to ``True``.
+          
+          .. versionadded:: 1.0.0
+
     engine_kwargs : dict, default None
         * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
         * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
@@ -1258,6 +1264,9 @@ def count(self):
           ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
           ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
           applied to both the ``func`` and the ``apply`` rolling aggregation.
+          
+          .. versionadded:: 1.0.0
+          
     args : tuple, default None
         Positional arguments to be passed into func.
     kwargs : dict, default None
@@ -1309,7 +1318,7 @@ def apply(
         elif engine == "numba":
             if raw is False:
                 raise ValueError("raw must be `True` when using the numba engine")
-            apply_func = _generate_numba_apply_func(
+            apply_func = generate_numba_apply_func(
                 args, kwargs, func, engine_kwargs, self._numba_func_cache
             )
         else:
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index 77846b0829fd3..4baaa27f57658 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -1,12 +1,13 @@
-try:
-    import numba  # noqa
+import pytest
+
+from pandas.compat._optional import import_optional_dependency
 
+try:
+    import_optional_dependency('numba')  # noqa
     _HAVE_NUMBA = True
 except ImportError:
     _HAVE_NUMBA = False
 
-import pytest
-
 
 @pytest.fixture(params=[True, False])
 def raw(request):
@@ -58,16 +59,19 @@ def min_periods(request):
 
 @pytest.fixture(params=[True, False])
 def parallel(request):
+    """parallel keyword argument for numba.jit"""
     return request.param
 
 
 @pytest.fixture(params=[True, False])
 def nogil(request):
+    """nogil keyword argument for numba.jit"""
     return request.param
 
 
 @pytest.fixture(params=[True, False])
 def nopython(request):
+    """nopython keyword argument for numba.jit"""
     return request.param
 
 
@@ -81,4 +85,5 @@ def nopython(request):
     ]
 )
 def engine(request):
+    """engine keyword argument for rolling.apply"""
     return request.param
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index 518da688d72bf..b0e175acfb922 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -344,7 +344,6 @@ def test_multiple_agg_funcs(self, func, window_size, expected_vals):
         tm.assert_frame_equal(result, expected)
 
 
-@td.skip_if_no("numba", "0.46.0")
 class TestEngine:
     def test_invalid_engine(self):
         with pytest.raises(
@@ -366,6 +365,7 @@ def test_invalid_raw_numba(self):
         ):
             Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba")
 
+    @td.skip_if_no("numba", "0.46.0")
     def test_invalid_kwargs_nopython(self):
         with pytest.raises(ValueError, match="numba does not support kwargs with"):
             Series(range(1)).rolling(1).apply(
diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index 3a85cff9a73eb..66e4d4e2e7145 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -31,3 +31,42 @@ def f(x, *args):
         )
         expected = s.rolling(2).apply(f, engine="cython", args=args, raw=True)
         tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("jit", [True, False])
+    def test_cache(self, jit, nogil, parallel, nopython):
+        # Test that the functions are cached correctly if we switch functions
+        def func_1(x):
+            return np.mean(x) + 4
+
+        def func_2(x):
+            return np.std(x) * 5
+
+        if jit:
+            import numba
+
+            func_1 = numba.jit(func_1)
+            func_2 = numba.jit(func_2)
+
+        engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
+
+        roll = Series(range(10)).rolling(2)
+        result = roll.apply(
+            func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
+        )
+        expected = roll.apply(func_1, engine="cython", raw=True)
+        tm.assert_series_equal(result, expected)
+
+        # func_1 should be in the cache now
+        assert func_1 in roll._numba_func_cache
+
+        result = roll.apply(
+            func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True
+        )
+        expected = roll.apply(func_2, engine="cython", raw=True)
+        tm.assert_series_equal(result, expected)
+        # This run should use the cached func_1
+        result = roll.apply(
+            func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
+        )
+        expected = roll.apply(func_1, engine="cython", raw=True)
+        tm.assert_series_equal(result, expected)

From 5826ad9d235d316f3f84d64aa917328ace557932 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 23 Dec 2019 17:31:14 -0800
Subject: [PATCH 34/44] Black

---
 pandas/tests/window/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index 4baaa27f57658..a8188d484aae6 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -3,7 +3,7 @@
 from pandas.compat._optional import import_optional_dependency
 
 try:
-    import_optional_dependency('numba')  # noqa
+    import_optional_dependency("numba")  # noqa
     _HAVE_NUMBA = True
 except ImportError:
     _HAVE_NUMBA = False

From cf7571b8f83dd95a931e9de2646d892fab6db376 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 23 Dec 2019 19:48:41 -0800
Subject: [PATCH 35/44] Add clarification

---
 doc/source/user_guide/computation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 31ab87e831d30..a2150c207c0b0 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -353,7 +353,7 @@ the passed function. ``func`` can also be a JITed function in which case the eng
 The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the
 `numba.jit decorator <https://numba.pydata.org/numba-doc/latest/reference/jit-compilation.html#numba.jit>`__.
 These keyword arguments will be applied to *both* the passed function (if a standard Python function)
-and the apply for loop. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported,
+and the apply for loop over each window. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported,
 and their default values are set to ``False``, ``True`` and ``False`` respectively.
 
 .. note::

From 18eed60802e1eac77ed38bc1770b93f50d0d2d72 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 24 Dec 2019 10:51:54 -0800
Subject: [PATCH 36/44] Move function to module level

---
 pandas/core/window/numba_.py  | 104 +++++++++++++++++-----------------
 pandas/core/window/rolling.py |   2 +
 2 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index 1fa4483e2be5e..1608536eecdf0 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -6,6 +6,56 @@
 from pandas.compat._optional import import_optional_dependency
 
 
+def make_rolling_apply(func, args, nogil, parallel, nopython):
+    numba = import_optional_dependency("numba")
+
+    if parallel:
+        loop_range = numba.prange
+    else:
+        loop_range = range
+
+    if isinstance(func, numba.targets.registry.CPUDispatcher):
+        # Don't jit a user passed jitted function
+        numba_func = func
+    else:
+
+        @numba.generated_jit(nopython=nopython, nogil=nogil, parallel=parallel)
+        def numba_func(window, *_args):
+            if getattr(np, func.__name__, False) is func or isinstance(
+                func, types.BuiltinFunctionType
+            ):
+
+                def impl(window, *_args):
+                    return func(window, *_args)
+
+                return impl
+            else:
+                jf = numba.jit(func, nopython=nopython)
+
+                def impl(window, *_args):
+                    return jf(window, *_args)
+
+                return impl
+
+    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
+    def roll_apply(
+        values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int,
+    ):
+        result = np.empty(len(begin))
+        for i in loop_range(len(result)):
+            start = begin[i]
+            stop = end[i]
+            window = values[start:stop]
+            count_nan = np.sum(np.isnan(window))
+            if len(window) - count_nan >= minimum_periods:
+                result[i] = numba_func(window, *args)
+            else:
+                result[i] = np.nan
+        return result
+
+    return roll_apply
+
+
 def generate_numba_apply_func(
     args: Tuple,
     kwargs: Dict,
@@ -39,7 +89,6 @@ def generate_numba_apply_func(
     -------
     Numba function
     """
-    numba = import_optional_dependency("numba")
 
     if engine_kwargs is None:
         engine_kwargs = {}
@@ -54,59 +103,8 @@ def generate_numba_apply_func(
             "https://github.com/numba/numba/issues/2916"
         )
 
-    if parallel:
-        loop_range = numba.prange
-    else:
-        loop_range = range
-
     # Return an already compiled version of roll_apply if available
     if func in function_cache:
         return function_cache[func]
 
-    def make_rolling_apply(func):
-
-        if isinstance(func, numba.targets.registry.CPUDispatcher):
-            # Don't jit a user passed jitted function
-            numba_func = func
-        else:
-
-            @numba.generated_jit(nopython=nopython, nogil=nogil, parallel=parallel)
-            def numba_func(window, *_args):
-                if getattr(np, func.__name__, False) is func or isinstance(
-                    func, types.BuiltinFunctionType
-                ):
-
-                    def impl(window, *_args):
-                        return func(window, *_args)
-
-                    return impl
-                else:
-                    jf = numba.jit(func, nopython=nopython)
-
-                    def impl(window, *_args):
-                        return jf(window, *_args)
-
-                    return impl
-
-        @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
-        def roll_apply(
-            values: np.ndarray,
-            begin: np.ndarray,
-            end: np.ndarray,
-            minimum_periods: int,
-        ):
-            result = np.empty(len(begin))
-            for i in loop_range(len(result)):
-                start = begin[i]
-                stop = end[i]
-                window = values[start:stop]
-                count_nan = np.sum(np.isnan(window))
-                if len(window) - count_nan >= minimum_periods:
-                    result[i] = numba_func(window, *args)
-                else:
-                    result[i] = np.nan
-            return result
-
-        return roll_apply
-
-    return make_rolling_apply(func)
+    return make_rolling_apply(func, args, nogil, parallel, nopython)
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 43cf46d97c3e5..5208cd5d11c94 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -462,6 +462,8 @@ def _apply(
         name : str,
             compatibility with groupby.rolling
         use_numba_cache : bool
+            whether to cache a numba compiled function. Only available for numba
+            enabled methods (so far only apply)
         **kwargs
             additional arguments for rolling function and window function
 

From f715b55043c7ae5a7e6981bf1694611951159272 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 24 Dec 2019 11:30:01 -0800
Subject: [PATCH 37/44] move cache check higher up

---
 pandas/core/window/numba_.py  |  7 -------
 pandas/core/window/rolling.py | 10 +++++++---
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index 1608536eecdf0..302afda6c402c 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -61,7 +61,6 @@ def generate_numba_apply_func(
     kwargs: Dict,
     func: Callable,
     engine_kwargs: Optional[Dict],
-    function_cache: Dict,
 ):
     """
     Generate a numba jitted apply function specified by values from engine_kwargs.
@@ -82,8 +81,6 @@ def generate_numba_apply_func(
         function to be applied to each window and will be JITed
     engine_kwargs : dict
         dictionary of arguments to be passed into numba.jit
-    function_cache : dict
-        dictionary of cached apply function to avoid re-compiling the apply loop
 
     Returns
     -------
@@ -103,8 +100,4 @@ def generate_numba_apply_func(
             "https://github.com/numba/numba/issues/2916"
         )
 
-    # Return an already compiled version of roll_apply if available
-    if func in function_cache:
-        return function_cache[func]
-
     return make_rolling_apply(func, args, nogil, parallel, nopython)
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 5208cd5d11c94..4d657b68b2ae0 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1320,9 +1320,13 @@ def apply(
         elif engine == "numba":
             if raw is False:
                 raise ValueError("raw must be `True` when using the numba engine")
-            apply_func = generate_numba_apply_func(
-                args, kwargs, func, engine_kwargs, self._numba_func_cache
-            )
+            if func in self._numba_func_cache:
+                # Return an already compiled version of roll_apply if available
+                apply_func = self._numba_func_cache[func]
+            else:
+                apply_func = generate_numba_apply_func(
+                    args, kwargs, func, engine_kwargs
+                )
         else:
             raise ValueError("engine must be either 'numba' or 'cython'")
 

From 6a765bf1e734d29ceb899b3609f7a7aea0f139fa Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 24 Dec 2019 12:08:59 -0800
Subject: [PATCH 38/44] Address Will's comments

---
 pandas/core/window/numba_.py    | 20 ++++++++------------
 pandas/tests/window/conftest.py | 16 ++--------------
 2 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index 302afda6c402c..27efec35089ec 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -1,5 +1,5 @@
 import types
-from typing import Callable, Dict, Optional, Tuple
+from typing import Any, Callable, Dict, Optional, Tuple
 
 import numpy as np
 
@@ -24,23 +24,19 @@ def numba_func(window, *_args):
             if getattr(np, func.__name__, False) is func or isinstance(
                 func, types.BuiltinFunctionType
             ):
-
-                def impl(window, *_args):
-                    return func(window, *_args)
-
-                return impl
+                jf = func
             else:
                 jf = numba.jit(func, nopython=nopython)
 
-                def impl(window, *_args):
-                    return jf(window, *_args)
+            def impl(window, *_args):
+                return jf(window, *_args)
 
-                return impl
+            return impl
 
     @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
     def roll_apply(
         values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int,
-    ):
+    ) -> np.ndarray:
         result = np.empty(len(begin))
         for i in loop_range(len(result)):
             start = begin[i]
@@ -58,9 +54,9 @@ def roll_apply(
 
 def generate_numba_apply_func(
     args: Tuple,
-    kwargs: Dict,
+    kwargs: Dict[str, Any],
     func: Callable,
-    engine_kwargs: Optional[Dict],
+    engine_kwargs: Optional[Dict[str, bool]],
 ):
     """
     Generate a numba jitted apply function specified by values from engine_kwargs.
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index a8188d484aae6..2a2ff95f599ba 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -1,12 +1,6 @@
 import pytest
 
-from pandas.compat._optional import import_optional_dependency
-
-try:
-    import_optional_dependency("numba")  # noqa
-    _HAVE_NUMBA = True
-except ImportError:
-    _HAVE_NUMBA = False
+import pandas.util._test_decorators as td
 
 
 @pytest.fixture(params=[True, False])
@@ -76,13 +70,7 @@ def nopython(request):
 
 
 @pytest.fixture(
-    params=[
-        pytest.param(
-            "numba",
-            marks=pytest.mark.skipif(not _HAVE_NUMBA, reason="numba is not installed"),
-        ),
-        "cython",
-    ]
+    params=[pytest.param("numba", marks=td.skip_if_no("numba", "0.46.0")), "cython"]
 )
 def engine(request):
     """engine keyword argument for rolling.apply"""

From af3fe500e2f0155fdff884578a326475c3f9b6e7 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 24 Dec 2019 12:12:05 -0800
Subject: [PATCH 39/44] Type Callable in generate_numba_apply_func

---
 pandas/core/window/numba_.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index 27efec35089ec..024f7c50f9b45 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 
+from pandas._typing import Scalar
 from pandas.compat._optional import import_optional_dependency
 
 
@@ -55,7 +56,7 @@ def roll_apply(
 def generate_numba_apply_func(
     args: Tuple,
     kwargs: Dict[str, Any],
-    func: Callable,
+    func: Callable[[np.ndarray, ...], Scalar],
     engine_kwargs: Optional[Dict[str, bool]],
 ):
     """

From f7dfcf4e9f7c83b0e27c4179a89d4b35035fb45a Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 24 Dec 2019 14:28:13 -0800
Subject: [PATCH 40/44] use ellipsis, cannot specify np.ndarray as well

---
 pandas/core/window/numba_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index 024f7c50f9b45..af6491e183e80 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -56,7 +56,7 @@ def roll_apply(
 def generate_numba_apply_func(
     args: Tuple,
     kwargs: Dict[str, Any],
-    func: Callable[[np.ndarray, ...], Scalar],
+    func: Callable[..., Scalar],
     engine_kwargs: Optional[Dict[str, bool]],
 ):
     """

From a42a9603a498dd54fd6266fe13419209e1eb2e12 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 24 Dec 2019 14:31:45 -0800
Subject: [PATCH 41/44] Remove trailing whitespace in apply docstring

---
 pandas/core/window/rolling.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 4d657b68b2ae0..033b6777e42ca 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1242,9 +1242,9 @@ def count(self):
         Must produce a single value from an ndarray input if ``raw=True``
         or a single value from a Series if ``raw=False``. Can also accept a
         Numba JIT function with ``engine='numba'`` specified.
-        
+
         .. versionchanged:: 1.0.0
-        
+
     raw : bool, default None
         * ``False`` : passes each row or column as a Series to the
           function.
@@ -1256,7 +1256,7 @@ def count(self):
         * ``'cython'`` : Runs rolling apply through C-extensions from cython.
         * ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
           Only available when ``raw`` is set to ``True``.
-          
+
           .. versionadded:: 1.0.0
 
     engine_kwargs : dict, default None
@@ -1266,9 +1266,9 @@ def count(self):
           ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
           ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
           applied to both the ``func`` and the ``apply`` rolling aggregation.
-          
+
           .. versionadded:: 1.0.0
-          
+
     args : tuple, default None
         Positional arguments to be passed into func.
     kwargs : dict, default None

From d01983029a936482513a6322946a1232f1610f0e Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 24 Dec 2019 20:03:52 -0800
Subject: [PATCH 42/44] Address Will's and Brock's comments

---
 doc/source/whatsnew/v1.0.0.rst    |  6 ++--
 pandas/core/window/numba_.py      |  8 ++++-
 pandas/core/window/rolling.py     |  4 +--
 pandas/tests/window/conftest.py   | 12 ++++++++
 pandas/tests/window/test_api.py   | 29 ------------------
 pandas/tests/window/test_apply.py | 50 +++++++++++++++++++++++--------
 6 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 0ab633b5f7c5e..dd0b332be9c64 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -177,8 +177,8 @@ Using Numba in ``rolling.apply``
 We've added an ``engine`` keyword to :meth:`~Rolling.apply` that allows the user to execute the
 routine using `Numba <https://numba.pydata.org/>`__ instead of Cython. Using the Numba engine
 can yield significant performance gains if the apply function can operate on numpy arrays and
-the data set is larger. For more details, see :ref:`rolling apply documentation <stats.rolling_apply>`
-(:issue:`28987`)
+the data set is larger (1 million rows or greater). For more details, see
+:ref:`rolling apply documentation <stats.rolling_apply>` (:issue:`28987`)
 
 .. _whatsnew_1000.custom_window:
 
@@ -439,7 +439,7 @@ Optional libraries below the lowest tested version may still work, but are not c
 +-----------------+-----------------+---------+
 | matplotlib      | 2.2.2           |         |
 +-----------------+-----------------+---------+
-| numba           | 0.46.0          |         |
+| numba           | 0.46.0          |    X    |
 +-----------------+-----------------+---------+
 | openpyxl        | 2.5.7           |    X    |
 +-----------------+-----------------+---------+
diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index af6491e183e80..fdc5c32c453b1 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -7,7 +7,13 @@
 from pandas.compat._optional import import_optional_dependency
 
 
-def make_rolling_apply(func, args, nogil, parallel, nopython):
+def make_rolling_apply(
+    func: Callable[..., Scalar],
+    args: Tuple,
+    nogil: bool,
+    parallel: bool,
+    nopython: bool,
+):
     numba = import_optional_dependency("numba")
 
     if parallel:
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 033b6777e42ca..d2618debf3c32 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -93,7 +93,7 @@ def __init__(
         self.win_freq = None
         self.axis = obj._get_axis_number(axis) if axis is not None else None
         self.validate()
-        self._numba_func_cache: Dict = dict()
+        self._numba_func_cache: Dict[Callable, Callable] = dict()
 
     @property
     def _constructor(self):
@@ -444,7 +444,7 @@ def _apply(
         floor: int = 1,
         is_weighted: bool = False,
         name: Optional[str] = None,
-        use_numba_cache: Optional[bool] = False,
+        use_numba_cache: bool = False,
         **kwargs,
     ):
         """
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index 2a2ff95f599ba..fb46ca51ace58 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -75,3 +75,15 @@ def nopython(request):
 def engine(request):
     """engine keyword argument for rolling.apply"""
     return request.param
+
+
+@pytest.fixture(
+    params=[
+        pytest.param(("numba", True), marks=td.skip_if_no("numba", "0.46.0")),
+        ("cython", True),
+        ("cython", False),
+    ]
+)
+def engine_and_raw(request):
+    """engine and raw keyword arguments for rolling.apply"""
+    return request.param
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index b0e175acfb922..5085576cc96f0 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -342,32 +342,3 @@ def test_multiple_agg_funcs(self, func, window_size, expected_vals):
         )
 
         tm.assert_frame_equal(result, expected)
-
-
-class TestEngine:
-    def test_invalid_engine(self):
-        with pytest.raises(
-            ValueError, match="engine must be either 'numba' or 'cython'"
-        ):
-            Series(range(1)).rolling(1).apply(lambda x: x, engine="foo")
-
-    def test_invalid_engine_kwargs_cython(self):
-        with pytest.raises(
-            ValueError, match="cython engine does not accept engine_kwargs"
-        ):
-            Series(range(1)).rolling(1).apply(
-                lambda x: x, engine="cython", engine_kwargs={"nopython": False}
-            )
-
-    def test_invalid_raw_numba(self):
-        with pytest.raises(
-            ValueError, match="raw must be `True` when using the numba engine"
-        ):
-            Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba")
-
-    @td.skip_if_no("numba", "0.46.0")
-    def test_invalid_kwargs_nopython(self):
-        with pytest.raises(ValueError, match="numba does not support kwargs with"):
-            Series(range(1)).rolling(1).apply(
-                lambda x: x, kwargs={"a": 1}, engine="numba", raw=True
-            )
diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py
index 965d8e3616f4c..4b56cbd48c388 100644
--- a/pandas/tests/window/test_apply.py
+++ b/pandas/tests/window/test_apply.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import DataFrame, Series, Timestamp, date_range
 import pandas.util.testing as tm
 
@@ -11,10 +13,9 @@ def test_rolling_apply_invalid_raw(bad_raw):
         Series(range(3)).rolling(1).apply(len, raw=bad_raw)
 
 
-def test_rolling_apply_out_of_bounds(engine, raw):
+def test_rolling_apply_out_of_bounds(engine_and_raw):
     # gh-1850
-    if engine == "numba":
-        raw = True
+    engine, raw = engine_and_raw
 
     vals = Series([1, 2, 3, 4])
 
@@ -49,9 +50,9 @@ def f(x):
         df.rolling(window).apply(f, raw=True)
 
 
-def test_rolling_apply(engine, raw):
-    if engine == "numba":
-        raw = True
+def test_rolling_apply(engine_and_raw):
+    engine, raw = engine_and_raw
+
     expected = Series([], dtype="float64")
     result = expected.rolling(10).apply(lambda x: x.mean(), engine=engine, raw=raw)
     tm.assert_series_equal(result, expected)
@@ -66,9 +67,8 @@ def test_rolling_apply(engine, raw):
     tm.assert_series_equal(result, expected)
 
 
-def test_all_apply(engine, raw):
-    if engine == "numba":
-        raw = True
+def test_all_apply(engine_and_raw):
+    engine, raw = engine_and_raw
 
     df = (
         DataFrame(
@@ -84,9 +84,8 @@ def test_all_apply(engine, raw):
     tm.assert_frame_equal(result, expected)
 
 
-def test_ragged_apply(engine, raw):
-    if engine == "numba":
-        raw = True
+def test_ragged_apply(engine_and_raw):
+    engine, raw = engine_and_raw
 
     df = DataFrame({"B": range(5)})
     df.index = [
@@ -112,3 +111,30 @@ def test_ragged_apply(engine, raw):
     expected = df.copy()
     expected["B"] = 1.0
     tm.assert_frame_equal(result, expected)
+
+
+def test_invalid_engine():
+    with pytest.raises(ValueError, match="engine must be either 'numba' or 'cython'"):
+        Series(range(1)).rolling(1).apply(lambda x: x, engine="foo")
+
+
+def test_invalid_engine_kwargs_cython():
+    with pytest.raises(ValueError, match="cython engine does not accept engine_kwargs"):
+        Series(range(1)).rolling(1).apply(
+            lambda x: x, engine="cython", engine_kwargs={"nopython": False}
+        )
+
+
+def test_invalid_raw_numba():
+    with pytest.raises(
+        ValueError, match="raw must be `True` when using the numba engine"
+    ):
+        Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba")
+
+
+@td.skip_if_no("numba")
+def test_invalid_kwargs_nopython():
+    with pytest.raises(ValueError, match="numba does not support kwargs with"):
+        Series(range(1)).rolling(1).apply(
+            lambda x: x, kwargs={"a": 1}, engine="numba", raw=True
+        )

From 29d145fd74cdb07126b63e9599e32a99d0b975b4 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 24 Dec 2019 21:02:16 -0800
Subject: [PATCH 43/44] Fix typing

---
 pandas/core/window/rolling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index d2618debf3c32..e1cf5a6b09a15 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -93,7 +93,7 @@ def __init__(
         self.win_freq = None
         self.axis = obj._get_axis_number(axis) if axis is not None else None
         self.validate()
-        self._numba_func_cache: Dict[Callable, Callable] = dict()
+        self._numba_func_cache: Dict[Optional[str], Callable] = dict()
 
     @property
     def _constructor(self):

From a3da51eb3a301b8fdcf6991c5ea091d206ab21b8 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Thu, 26 Dec 2019 10:15:31 -0800
Subject: [PATCH 44/44] Address followup comments

---
 pandas/core/window/common.py  |  2 +-
 pandas/core/window/numba_.py  | 23 ++++++++++++++++++++++-
 pandas/core/window/rolling.py |  2 +-
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
index 80f6de1aecd74..5b467b03c1fc2 100644
--- a/pandas/core/window/common.py
+++ b/pandas/core/window/common.py
@@ -70,7 +70,7 @@ def _apply(
         floor: int = 1,
         is_weighted: bool = False,
         name: Optional[str] = None,
-        use_numba_cache: Optional[bool] = False,
+        use_numba_cache: bool = False,
         **kwargs,
     ):
         """
diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index fdc5c32c453b1..127957943d2ff 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -14,6 +14,27 @@ def make_rolling_apply(
     parallel: bool,
     nopython: bool,
 ):
+    """
+    Creates a JITted rolling apply function with a JITted version of
+    the user's function.
+
+    Parameters
+    ----------
+    func : function
+        function to be applied to each window and will be JITed
+    args : tuple
+        *args to be passed into the function
+    nogil : bool
+        nogil parameter from engine_kwargs for numba.jit
+    parallel : bool
+        parallel parameter from engine_kwargs for numba.jit
+    nopython : bool
+        nopython parameter from engine_kwargs for numba.jit
+
+    Returns
+    -------
+    Numba function
+    """
     numba = import_optional_dependency("numba")
 
     if parallel:
@@ -33,7 +54,7 @@ def numba_func(window, *_args):
             ):
                 jf = func
             else:
-                jf = numba.jit(func, nopython=nopython)
+                jf = numba.jit(func, nopython=nopython, nogil=nogil)
 
             def impl(window, *_args):
                 return jf(window, *_args)
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index e1cf5a6b09a15..5b0fbbb3518d2 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1286,7 +1286,7 @@ def count(self):
 
     Notes
     -----
-    See :ref:`stats.rolling_window` for extended documentation and performance
+    See :ref:`stats.rolling_apply` for extended documentation and performance
     considerations for the Numba engine.
     """
     )