From 4710af408ce571d8dd6a1d879d8f502d5ff86bec Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 18 Nov 2023 16:15:32 -0800 Subject: [PATCH 01/14] Use `numbagg` for `ffill` --- xarray/backends/zarr.py | 4 ++-- xarray/core/dask_array_ops.py | 6 ++++-- xarray/core/duck_array_ops.py | 38 ++++++++++++++++++++++++++++++++--- xarray/core/missing.py | 10 --------- xarray/core/nputils.py | 13 ++++++++---- xarray/core/rolling_exp.py | 30 +++++++++++++-------------- xarray/tests/__init__.py | 7 ++++++- xarray/tests/test_missing.py | 27 ++++++++++++++++++------- 8 files changed, 91 insertions(+), 44 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 6632e40cf6f..f0eece3bb61 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -177,8 +177,8 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # DESIGN CHOICE: do not allow multiple dask chunks on a single zarr chunk # this avoids the need to get involved in zarr synchronization / locking # From zarr docs: - # "If each worker in a parallel computation is writing to a separate - # region of the array, and if region boundaries are perfectly aligned + # "If each worker in a parallel computation is writing to a + # separate region of the array, and if region boundaries are perfectly aligned # with chunk boundaries, then no synchronization is required." # TODO: incorporate synchronizer to allow writes from multiple dask # threads diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index d2d3e4a6d1c..67c1effd745 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -59,10 +59,11 @@ def push(array, n, axis): """ Dask-aware bottleneck.push """ - import bottleneck import dask.array as da import numpy as np + from xarray.core.duck_array_ops import _push + def _fill_with_last_one(a, b): # cumreduction apply the push func over all the blocks first so, the only missing part is filling # the missing values using the last data of the previous chunk @@ -85,7 +86,8 @@ def _fill_with_last_one(a, b): # The method parameter makes that the tests for python 3.7 fails. return da.reductions.cumreduction( - func=bottleneck.push, + func=_push, + # func=bottleneck.push, binop=_fill_with_last_one, ident=np.nan, x=array, diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index b9f7db9737f..7cffc6b18a6 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -31,8 +31,10 @@ from numpy import concatenate as _concatenate from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined] from numpy.lib.stride_tricks import sliding_window_view # noqa +from packaging.version import Version from xarray.core import dask_array_ops, dtypes, nputils +from xarray.core.options import OPTIONS from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.pycompat import array_type, is_duck_dask_array from xarray.core.utils import is_duck_array, module_available @@ -688,13 +690,43 @@ def least_squares(lhs, rhs, rcond=None, skipna=False): return nputils.least_squares(lhs, rhs, rcond=rcond, skipna=skipna) -def push(array, n, axis): - from bottleneck import push +def _push(array, n: int | None = None, axis: int = -1): + """ + Use either bottleneck or numbagg depending on options & what's available + """ + from xarray.core.nputils import NUMBAGG_VERSION, numbagg + + if not OPTIONS["use_bottleneck"] and not OPTIONS["use_numbagg"]: + raise RuntimeError( + "ffill & bfill requires bottleneck or numbagg to be enabled." + " Call `xr.set_options(use_bottleneck=True)` or `xr.set_options(use_numbagg=True)` to enable one." + ) + if OPTIONS["use_numbagg"] and NUMBAGG_VERSION is not None: + if NUMBAGG_VERSION < Version("0.6.2"): + warnings.warn( + f"numbagg >= 0.6.2 is required for bfill & ffill; {NUMBAGG_VERSION} is installed. We'll attempt with bottleneck instead." + ) + else: + return numbagg.ffill(array, limit=n, axis=axis) + # work around for bottleneck 178 + limit = n if n is not None else array.shape[axis] + + import bottleneck as bn + + return bn.push(array, limit, axis) + + +def push(array, n, axis): + if not OPTIONS["use_bottleneck"] and not OPTIONS["use_numbagg"]: + raise RuntimeError( + "ffill & bfill requires bottleneck or numbagg to be enabled." + " Call `xr.set_options(use_bottleneck=True)` or `xr.set_options(use_numbagg=True)` to enable one." + ) if is_duck_dask_array(array): return dask_array_ops.push(array, n, axis) else: - return push(array, n, axis) + return _push(array, n, axis) def _first_last_wrapper(array, *, axis, op, keepdims): diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 90a9dd2e76c..c617e016107 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -413,11 +413,6 @@ def _bfill(arr, n=None, axis=-1): def ffill(arr, dim=None, limit=None): """forward fill missing values""" - if not OPTIONS["use_bottleneck"]: - raise RuntimeError( - "ffill requires bottleneck to be enabled." - " Call `xr.set_options(use_bottleneck=True)` to enable it." - ) axis = arr.get_axis_num(dim) @@ -436,11 +431,6 @@ def ffill(arr, dim=None, limit=None): def bfill(arr, dim=None, limit=None): """backfill missing values""" - if not OPTIONS["use_bottleneck"]: - raise RuntimeError( - "bfill requires bottleneck to be enabled." - " Call `xr.set_options(use_bottleneck=True)` to enable it." - ) axis = arr.get_axis_num(dim) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 316a77ead6a..8c24ac05380 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -1,6 +1,7 @@ from __future__ import annotations import warnings +from typing import Callable import numpy as np import pandas as pd @@ -25,14 +26,17 @@ bn = np _BOTTLENECK_AVAILABLE = False +NUMBAGG_VERSION: Version | None + try: import numbagg - _HAS_NUMBAGG = Version(numbagg.__version__) >= Version("0.5.0") + v = getattr(numbagg, "__version__", "999") + NUMBAGG_VERSION = Version(v) except ImportError: # use numpy methods instead numbagg = np - _HAS_NUMBAGG = False + NUMBAGG_VERSION = None def _select_along_axis(values, idx, axis): @@ -171,14 +175,15 @@ def __setitem__(self, key, value): self._array[key] = np.moveaxis(value, vindex_positions, mixed_positions) -def _create_method(name, npmodule=np): +def _create_method(name, npmodule=np) -> Callable: def f(values, axis=None, **kwargs): dtype = kwargs.get("dtype", None) bn_func = getattr(bn, name, None) nba_func = getattr(numbagg, name, None) if ( - _HAS_NUMBAGG + NUMBAGG_VERSION is not None + and NUMBAGG_VERSION >= Version("0.5.0") and OPTIONS["use_numbagg"] and isinstance(values, np.ndarray) and nba_func is not None diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index c8160cefef3..43a2ada5f9b 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -15,9 +15,9 @@ import numbagg from numbagg import move_exp_nanmean, move_exp_nansum - _NUMBAGG_VERSION: Version | None = Version(numbagg.__version__) + NUMBAGG_VERSION: Version | None = Version(numbagg.__version__) except ImportError: - _NUMBAGG_VERSION = None + NUMBAGG_VERSION = None def _get_alpha( @@ -100,17 +100,17 @@ def __init__( window_type: str = "span", min_weight: float = 0.0, ): - if _NUMBAGG_VERSION is None: + if NUMBAGG_VERSION is None: raise ImportError( "numbagg >= 0.2.1 is required for rolling_exp but currently numbagg is not installed" ) - elif _NUMBAGG_VERSION < Version("0.2.1"): + elif NUMBAGG_VERSION < Version("0.2.1"): raise ImportError( - f"numbagg >= 0.2.1 is required for rolling_exp but currently version {_NUMBAGG_VERSION} is installed" + f"numbagg >= 0.2.1 is required for rolling_exp but currently version {NUMBAGG_VERSION} is installed" ) - elif _NUMBAGG_VERSION < Version("0.3.1") and min_weight > 0: + elif NUMBAGG_VERSION < Version("0.3.1") and min_weight > 0: raise ImportError( - f"numbagg >= 0.3.1 is required for `min_weight > 0` within `.rolling_exp` but currently version {_NUMBAGG_VERSION} is installed" + f"numbagg >= 0.3.1 is required for `min_weight > 0` within `.rolling_exp` but currently version {NUMBAGG_VERSION} is installed" ) self.obj: T_DataWithCoords = obj @@ -211,9 +211,9 @@ def std(self) -> T_DataWithCoords: Dimensions without coordinates: x """ - if _NUMBAGG_VERSION is None or _NUMBAGG_VERSION < Version("0.4.0"): + if NUMBAGG_VERSION is None or NUMBAGG_VERSION < Version("0.4.0"): raise ImportError( - f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {_NUMBAGG_VERSION} is installed" + f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {NUMBAGG_VERSION} is installed" ) dim_order = self.obj.dims @@ -243,9 +243,9 @@ def var(self) -> T_DataWithCoords: Dimensions without coordinates: x """ - if _NUMBAGG_VERSION is None or _NUMBAGG_VERSION < Version("0.4.0"): + if NUMBAGG_VERSION is None or NUMBAGG_VERSION < Version("0.4.0"): raise ImportError( - f"numbagg >= 0.4.0 is required for rolling_exp().var(), currently {_NUMBAGG_VERSION} is installed" + f"numbagg >= 0.4.0 is required for rolling_exp().var(), currently {NUMBAGG_VERSION} is installed" ) dim_order = self.obj.dims @@ -275,9 +275,9 @@ def cov(self, other: T_DataWithCoords) -> T_DataWithCoords: Dimensions without coordinates: x """ - if _NUMBAGG_VERSION is None or _NUMBAGG_VERSION < Version("0.4.0"): + if NUMBAGG_VERSION is None or NUMBAGG_VERSION < Version("0.4.0"): raise ImportError( - f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {_NUMBAGG_VERSION} is installed" + f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {NUMBAGG_VERSION} is installed" ) dim_order = self.obj.dims @@ -308,9 +308,9 @@ def corr(self, other: T_DataWithCoords) -> T_DataWithCoords: Dimensions without coordinates: x """ - if _NUMBAGG_VERSION is None or _NUMBAGG_VERSION < Version("0.4.0"): + if NUMBAGG_VERSION is None or NUMBAGG_VERSION < Version("0.4.0"): raise ImportError( - f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {_NUMBAGG_VERSION} is installed" + f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {NUMBAGG_VERSION} is installed" ) dim_order = self.obj.dims diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index fec695f83d7..7b8fd3b019b 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -53,7 +53,8 @@ def _importorskip( mod = importlib.import_module(modname) has = True if minversion is not None: - if Version(mod.__version__) < Version(minversion): + v = getattr(mod, "__version__", "999") + if Version(v) < Version(minversion): raise ImportError("Minimum version not satisfied") except ImportError: has = False @@ -89,6 +90,10 @@ def _importorskip( requires_scipy_or_netCDF4 = pytest.mark.skipif( not has_scipy_or_netCDF4, reason="requires scipy or netCDF4" ) +has_numbagg_or_bottleneck = has_numbagg or has_bottleneck +requires_numbagg_or_bottleneck = pytest.mark.skipif( + not has_scipy_or_netCDF4, reason="requires scipy or netCDF4" +) # _importorskip does not work for development versions has_pandas_version_two = Version(pd.__version__).major >= 2 requires_pandas_version_two = pytest.mark.skipif( diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index e318bf01a7e..b1be3d50ace 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -24,6 +24,8 @@ requires_bottleneck, requires_cftime, requires_dask, + requires_numbagg, + requires_numbagg_or_bottleneck, requires_scipy, ) @@ -407,7 +409,7 @@ def test_interpolate_dask_expected_dtype(dtype, method): assert da.dtype == da.compute().dtype -@requires_bottleneck +@requires_numbagg_or_bottleneck def test_ffill(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") expected = xr.DataArray(np.array([4, 5, 5], dtype=np.float64), dims="x") @@ -415,25 +417,36 @@ def test_ffill(): assert_equal(actual, expected) -def test_ffill_use_bottleneck(): +def test_ffill_use_bottleneck_numbagg(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") with xr.set_options(use_bottleneck=False): - with pytest.raises(RuntimeError): - da.ffill("x") + with xr.set_options(use_numbagg=False): + with pytest.raises(RuntimeError): + da.ffill("x") @requires_dask def test_ffill_use_bottleneck_dask(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") da = da.chunk({"x": 1}) - with xr.set_options(use_bottleneck=False): + with xr.set_options(use_bottleneck=False, use_numbagg=False): with pytest.raises(RuntimeError): da.ffill("x") +@requires_numbagg +@requires_dask +def test_ffill_use_numbagg_dask(): + with xr.set_options(use_bottleneck=False): + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") + da = da.chunk(x=-1) + # Succeeds with a single chunk: + _ = da.ffill("x").compute() + + def test_bfill_use_bottleneck(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") - with xr.set_options(use_bottleneck=False): + with xr.set_options(use_bottleneck=False, use_numbagg=False): with pytest.raises(RuntimeError): da.bfill("x") @@ -442,7 +455,7 @@ def test_bfill_use_bottleneck(): def test_bfill_use_bottleneck_dask(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") da = da.chunk({"x": 1}) - with xr.set_options(use_bottleneck=False): + with xr.set_options(use_bottleneck=False, use_numbagg=False): with pytest.raises(RuntimeError): da.bfill("x") From 249f14c73e7a8043e52d10b1e175dd8432e226e3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 Nov 2023 00:16:31 +0000 Subject: [PATCH 02/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index c617e016107..b55fd6049a6 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -14,7 +14,7 @@ from xarray.core.common import _contains_datetime_like_objects, ones_like from xarray.core.computation import apply_ufunc from xarray.core.duck_array_ops import datetime_to_numeric, push, timedelta_to_numeric -from xarray.core.options import OPTIONS, _get_keep_attrs +from xarray.core.options import _get_keep_attrs from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.types import Interp1dOptions, InterpOptions from xarray.core.utils import OrderedSet, is_scalar From 0ed1b3c2e92917b5ac7ef14b26447e1bbf5f4653 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 18 Nov 2023 16:18:06 -0800 Subject: [PATCH 03/14] --- xarray/core/dask_array_ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 67c1effd745..98ff9002856 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -87,7 +87,6 @@ def _fill_with_last_one(a, b): # The method parameter makes that the tests for python 3.7 fails. return da.reductions.cumreduction( func=_push, - # func=bottleneck.push, binop=_fill_with_last_one, ident=np.nan, x=array, From 7f519ace313c7d53fe8a76fa3115f33a08a3877e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 24 Nov 2023 10:44:46 -0800 Subject: [PATCH 04/14] Use duck_array_ops for numbagg version, test import is lazy --- xarray/core/duck_array_ops.py | 11 +++---- xarray/core/nputils.py | 35 +++++++++------------- xarray/core/pycompat.py | 5 +++- xarray/core/rolling_exp.py | 56 +++++++++++++++++++++-------------- xarray/tests/test_plugins.py | 29 +++++++++--------- 5 files changed, 72 insertions(+), 64 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 7cffc6b18a6..cbc1543a473 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -33,7 +33,7 @@ from numpy.lib.stride_tricks import sliding_window_view # noqa from packaging.version import Version -from xarray.core import dask_array_ops, dtypes, nputils +from xarray.core import dask_array_ops, dtypes, nputils, pycompat from xarray.core.options import OPTIONS from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.pycompat import array_type, is_duck_dask_array @@ -694,17 +694,18 @@ def _push(array, n: int | None = None, axis: int = -1): """ Use either bottleneck or numbagg depending on options & what's available """ - from xarray.core.nputils import NUMBAGG_VERSION, numbagg if not OPTIONS["use_bottleneck"] and not OPTIONS["use_numbagg"]: raise RuntimeError( "ffill & bfill requires bottleneck or numbagg to be enabled." " Call `xr.set_options(use_bottleneck=True)` or `xr.set_options(use_numbagg=True)` to enable one." ) - if OPTIONS["use_numbagg"] and NUMBAGG_VERSION is not None: - if NUMBAGG_VERSION < Version("0.6.2"): + if OPTIONS["use_numbagg"] and pycompat.mod_version("numbagg") is not None: + import numbagg + + if pycompat.mod_version("numbagg") < Version("0.6.2"): warnings.warn( - f"numbagg >= 0.6.2 is required for bfill & ffill; {NUMBAGG_VERSION} is installed. We'll attempt with bottleneck instead." + f"numbagg >= 0.6.2 is required for bfill & ffill; {pycompat.mod_version('numbagg')} is installed. We'll attempt with bottleneck instead." ) else: return numbagg.ffill(array, limit=n, axis=axis) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 12c7b84fe5d..4f717943d94 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -8,6 +8,8 @@ from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined] from packaging.version import Version +from xarray.core import pycompat + # remove once numpy 2.0 is the oldest supported version try: from numpy.exceptions import RankWarning # type: ignore[attr-defined,unused-ignore] @@ -26,18 +28,6 @@ bn = np _BOTTLENECK_AVAILABLE = False -NUMBAGG_VERSION: Version | None - -try: - import numbagg - - v = getattr(numbagg, "__version__", "999") - NUMBAGG_VERSION = Version(v) -except ImportError: - # use numpy methods instead - numbagg = np # type: ignore - NUMBAGG_VERSION = None - def _select_along_axis(values, idx, axis): other_ind = np.ix_(*[np.arange(s) for s in idx.shape]) @@ -179,14 +169,13 @@ def _create_method(name, npmodule=np) -> Callable: def f(values, axis=None, **kwargs): dtype = kwargs.get("dtype", None) bn_func = getattr(bn, name, None) - nba_func = getattr(numbagg, name, None) if ( - NUMBAGG_VERSION is not None - and NUMBAGG_VERSION >= Version("0.5.0") + pycompat.mod_version("numbagg") is not None + and pycompat.mod_version("numbagg") >= Version("0.5.0") and OPTIONS["use_numbagg"] and isinstance(values, np.ndarray) - and nba_func is not None + # and nba_func is not None # numbagg uses ddof=1 only, but numpy uses ddof=0 by default and (("var" in name or "std" in name) and kwargs.get("ddof", 0) == 1) # TODO: bool? @@ -194,11 +183,15 @@ def f(values, axis=None, **kwargs): # and values.dtype.isnative and (dtype is None or np.dtype(dtype) == values.dtype) ): - # numbagg does not take care dtype, ddof - kwargs.pop("dtype", None) - kwargs.pop("ddof", None) - result = nba_func(values, axis=axis, **kwargs) - elif ( + import numbagg + + nba_func = getattr(numbagg, name, None) + if nba_func is not None: + # numbagg does not take care dtype, ddof + kwargs.pop("dtype", None) + kwargs.pop("ddof", None) + return nba_func(values, axis=axis, **kwargs) + if ( _BOTTLENECK_AVAILABLE and OPTIONS["use_bottleneck"] and isinstance(values, np.ndarray) diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index bc8b61164f1..32ef408f7cc 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -12,7 +12,7 @@ integer_types = (int, np.integer) if TYPE_CHECKING: - ModType = Literal["dask", "pint", "cupy", "sparse", "cubed"] + ModType = Literal["dask", "pint", "cupy", "sparse", "cubed", "numbagg"] DuckArrayTypes = tuple[type[Any], ...] # TODO: improve this? maybe Generic @@ -47,6 +47,9 @@ def __init__(self, mod: ModType) -> None: duck_array_type = (duck_array_module.SparseArray,) elif mod == "cubed": duck_array_type = (duck_array_module.Array,) + # Not a duck array module, but using this system regardless, to get lazy imports + elif mod == "numbagg": + duck_array_type = () else: raise NotImplementedError diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index fb1f6575798..629a709f0d1 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -6,19 +6,12 @@ import numpy as np from packaging.version import Version +from xarray.core import pycompat from xarray.core.computation import apply_ufunc from xarray.core.options import _get_keep_attrs from xarray.core.pdcompat import count_not_none from xarray.core.types import T_DataWithCoords -try: - import numbagg - from numbagg import move_exp_nanmean, move_exp_nansum - - NUMBAGG_VERSION: Version | None = Version(numbagg.__version__) -except ImportError: - NUMBAGG_VERSION = None - def _get_alpha( com: float | None = None, @@ -83,17 +76,17 @@ def __init__( window_type: str = "span", min_weight: float = 0.0, ): - if NUMBAGG_VERSION is None: + if pycompat.mod_version("numbagg") is None: raise ImportError( "numbagg >= 0.2.1 is required for rolling_exp but currently numbagg is not installed" ) - elif NUMBAGG_VERSION < Version("0.2.1"): + elif pycompat.mod_version("numbagg") < Version("0.2.1"): raise ImportError( - f"numbagg >= 0.2.1 is required for rolling_exp but currently version {NUMBAGG_VERSION} is installed" + f"numbagg >= 0.2.1 is required for rolling_exp but currently version {pycompat.mod_version('numbagg')} is installed" ) - elif NUMBAGG_VERSION < Version("0.3.1") and min_weight > 0: + elif pycompat.mod_version("numbagg") < Version("0.3.1") and min_weight > 0: raise ImportError( - f"numbagg >= 0.3.1 is required for `min_weight > 0` within `.rolling_exp` but currently version {NUMBAGG_VERSION} is installed" + f"numbagg >= 0.3.1 is required for `min_weight > 0` within `.rolling_exp` but currently version {pycompat.mod_version('numbagg')} is installed" ) self.obj: T_DataWithCoords = obj @@ -127,13 +120,15 @@ def mean(self, keep_attrs: bool | None = None) -> T_DataWithCoords: Dimensions without coordinates: x """ + import numbagg + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=True) dim_order = self.obj.dims return apply_ufunc( - move_exp_nanmean, + numbagg.move_exp_nanmean, self.obj, input_core_dims=[[self.dim]], kwargs=self.kwargs, @@ -163,13 +158,15 @@ def sum(self, keep_attrs: bool | None = None) -> T_DataWithCoords: Dimensions without coordinates: x """ + import numbagg + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=True) dim_order = self.obj.dims return apply_ufunc( - move_exp_nansum, + numbagg.move_exp_nansum, self.obj, input_core_dims=[[self.dim]], kwargs=self.kwargs, @@ -194,10 +191,14 @@ def std(self) -> T_DataWithCoords: Dimensions without coordinates: x """ - if NUMBAGG_VERSION is None or NUMBAGG_VERSION < Version("0.4.0"): + if pycompat.mod_version("numbagg") is None or pycompat.mod_version( + "numbagg" + ) < Version("0.4.0"): raise ImportError( - f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {NUMBAGG_VERSION} is installed" + f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed" ) + import numbagg + dim_order = self.obj.dims return apply_ufunc( @@ -226,11 +227,14 @@ def var(self) -> T_DataWithCoords: Dimensions without coordinates: x """ - if NUMBAGG_VERSION is None or NUMBAGG_VERSION < Version("0.4.0"): + if pycompat.mod_version("numbagg") is None or pycompat.mod_version( + "numbagg" + ) < Version("0.4.0"): raise ImportError( - f"numbagg >= 0.4.0 is required for rolling_exp().var(), currently {NUMBAGG_VERSION} is installed" + f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed" ) dim_order = self.obj.dims + import numbagg return apply_ufunc( numbagg.move_exp_nanvar, @@ -258,11 +262,14 @@ def cov(self, other: T_DataWithCoords) -> T_DataWithCoords: Dimensions without coordinates: x """ - if NUMBAGG_VERSION is None or NUMBAGG_VERSION < Version("0.4.0"): + if pycompat.mod_version("numbagg") is None or pycompat.mod_version( + "numbagg" + ) < Version("0.4.0"): raise ImportError( - f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {NUMBAGG_VERSION} is installed" + f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed" ) dim_order = self.obj.dims + import numbagg return apply_ufunc( numbagg.move_exp_nancov, @@ -291,11 +298,14 @@ def corr(self, other: T_DataWithCoords) -> T_DataWithCoords: Dimensions without coordinates: x """ - if NUMBAGG_VERSION is None or NUMBAGG_VERSION < Version("0.4.0"): + if pycompat.mod_version("numbagg") is None or pycompat.mod_version( + "numbagg" + ) < Version("0.4.0"): raise ImportError( - f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {NUMBAGG_VERSION} is installed" + f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed" ) dim_order = self.obj.dims + import numbagg return apply_ufunc( numbagg.move_exp_nancorr, diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 1af255d30bb..b518c973d3a 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -218,28 +218,29 @@ def test_lazy_import() -> None: When importing xarray these should not be imported as well. Only when running code for the first time that requires them. """ - blacklisted = [ + deny_list = [ + "cubed", + "cupy", + # "dask", # TODO: backends.locks is not lazy yet :( + "dask.array", + "dask.distributed", + "flox", "h5netcdf", + "matplotlib", + "nc_time_axis", "netCDF4", - "pydap", "Nio", + "numbagg", + "pint", + "pydap", "scipy", - "zarr", - "matplotlib", - "nc_time_axis", - "flox", - # "dask", # TODO: backends.locks is not lazy yet :( - "dask.array", - "dask.distributed", "sparse", - "cupy", - "pint", - "cubed", + "zarr", ] # ensure that none of the above modules has been imported before modules_backup = {} for pkg in list(sys.modules.keys()): - for mod in blacklisted + ["xarray"]: + for mod in deny_list + ["xarray"]: if pkg.startswith(mod): modules_backup[pkg] = sys.modules[pkg] del sys.modules[pkg] @@ -255,7 +256,7 @@ def test_lazy_import() -> None: # lazy loaded are loaded when importing xarray is_imported = set() for pkg in sys.modules: - for mod in blacklisted: + for mod in deny_list: if pkg.startswith(mod): is_imported.add(mod) break From 650da40a15244228b19e936d883255ee0b8dce57 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 24 Nov 2023 10:49:52 -0800 Subject: [PATCH 05/14] --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 76548fe95c5..50000fe4657 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,6 +50,10 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- :py:meth:`DataArray.bfill` & :py:meth:`DataArray.ffill` now use numbagg by + default, which is up to 5x faster on wide arrays on multi-core machines. (:pull:`8339`) + By `Maximilian Roos `_. + .. _whats-new.2023.11.0: v2023.11.0 (Nov 16, 2023) From f28da8feb9d0530d34547b5984a5f9be05f5e104 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 24 Nov 2023 12:30:18 -0800 Subject: [PATCH 06/14] --- xarray/core/duck_array_ops.py | 2 +- xarray/core/rolling_exp.py | 19 +++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index cbc1543a473..792ae723752 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -700,7 +700,7 @@ def _push(array, n: int | None = None, axis: int = -1): "ffill & bfill requires bottleneck or numbagg to be enabled." " Call `xr.set_options(use_bottleneck=True)` or `xr.set_options(use_numbagg=True)` to enable one." ) - if OPTIONS["use_numbagg"] and pycompat.mod_version("numbagg") is not None: + if OPTIONS["use_numbagg"] and pycompat.mod_version("numbagg") != Version("0.0.0"): import numbagg if pycompat.mod_version("numbagg") < Version("0.6.2"): diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 629a709f0d1..bb3d78b5ee2 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -76,7 +76,7 @@ def __init__( window_type: str = "span", min_weight: float = 0.0, ): - if pycompat.mod_version("numbagg") is None: + if pycompat.mod_version("numbagg") == Version("0.0.0"): raise ImportError( "numbagg >= 0.2.1 is required for rolling_exp but currently numbagg is not installed" ) @@ -191,9 +191,7 @@ def std(self) -> T_DataWithCoords: Dimensions without coordinates: x """ - if pycompat.mod_version("numbagg") is None or pycompat.mod_version( - "numbagg" - ) < Version("0.4.0"): + if pycompat.mod_version("numbagg") < Version("0.4.0"): raise ImportError( f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed" ) @@ -226,10 +224,7 @@ def var(self) -> T_DataWithCoords: array([ nan, 0. , 0.46153846, 0.18461538, 0.06446281]) Dimensions without coordinates: x """ - - if pycompat.mod_version("numbagg") is None or pycompat.mod_version( - "numbagg" - ) < Version("0.4.0"): + if pycompat.mod_version("numbagg") < Version("0.4.0"): raise ImportError( f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed" ) @@ -262,9 +257,7 @@ def cov(self, other: T_DataWithCoords) -> T_DataWithCoords: Dimensions without coordinates: x """ - if pycompat.mod_version("numbagg") is None or pycompat.mod_version( - "numbagg" - ) < Version("0.4.0"): + if pycompat.mod_version("numbagg") < Version("0.4.0"): raise ImportError( f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed" ) @@ -298,9 +291,7 @@ def corr(self, other: T_DataWithCoords) -> T_DataWithCoords: Dimensions without coordinates: x """ - if pycompat.mod_version("numbagg") is None or pycompat.mod_version( - "numbagg" - ) < Version("0.4.0"): + if pycompat.mod_version("numbagg") < Version("0.4.0"): raise ImportError( f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed" ) From 6c7e2bc8db1f9c515f65577bc740f6b928ee78e1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 24 Nov 2023 15:12:35 -0800 Subject: [PATCH 07/14] Update xarray/core/duck_array_ops.py Co-authored-by: Deepak Cherian --- xarray/core/duck_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 792ae723752..a5efff96880 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -700,7 +700,7 @@ def _push(array, n: int | None = None, axis: int = -1): "ffill & bfill requires bottleneck or numbagg to be enabled." " Call `xr.set_options(use_bottleneck=True)` or `xr.set_options(use_numbagg=True)` to enable one." ) - if OPTIONS["use_numbagg"] and pycompat.mod_version("numbagg") != Version("0.0.0"): + if OPTIONS["use_numbagg"] and not module_available("numbagg"): import numbagg if pycompat.mod_version("numbagg") < Version("0.6.2"): From bd30ed789e33a05785eec54697502fbdae0477d1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 24 Nov 2023 15:12:44 -0800 Subject: [PATCH 08/14] Update xarray/core/nputils.py Co-authored-by: Deepak Cherian --- xarray/core/nputils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 4f717943d94..058674272b8 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -175,7 +175,6 @@ def f(values, axis=None, **kwargs): and pycompat.mod_version("numbagg") >= Version("0.5.0") and OPTIONS["use_numbagg"] and isinstance(values, np.ndarray) - # and nba_func is not None # numbagg uses ddof=1 only, but numpy uses ddof=0 by default and (("var" in name or "std" in name) and kwargs.get("ddof", 0) == 1) # TODO: bool? From 314c3842ef95bff657eba300535a1216cb8370e8 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 24 Nov 2023 15:14:45 -0800 Subject: [PATCH 09/14] Update xarray/core/rolling_exp.py Co-authored-by: Deepak Cherian --- xarray/core/rolling_exp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index bb3d78b5ee2..158e60ddefc 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -76,7 +76,7 @@ def __init__( window_type: str = "span", min_weight: float = 0.0, ): - if pycompat.mod_version("numbagg") == Version("0.0.0"): + if not module_available("numbagg"): raise ImportError( "numbagg >= 0.2.1 is required for rolling_exp but currently numbagg is not installed" ) From 3973818c98129bbf475da08ee6527c1ba3dcb08c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 24 Nov 2023 15:14:56 -0800 Subject: [PATCH 10/14] Update xarray/core/nputils.py Co-authored-by: Deepak Cherian --- xarray/core/nputils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 058674272b8..bf78effc9ab 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -171,7 +171,7 @@ def f(values, axis=None, **kwargs): bn_func = getattr(bn, name, None) if ( - pycompat.mod_version("numbagg") is not None + module_available("numbagg") and pycompat.mod_version("numbagg") >= Version("0.5.0") and OPTIONS["use_numbagg"] and isinstance(values, np.ndarray) From b7edf31e5e3de26ed9d66fe71c8a2f86041c088a Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 24 Nov 2023 18:43:58 -0800 Subject: [PATCH 11/14] --- xarray/core/nputils.py | 1 + xarray/core/rolling_exp.py | 1 + 2 files changed, 2 insertions(+) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index bf78effc9ab..96e5548b9b4 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -9,6 +9,7 @@ from packaging.version import Version from xarray.core import pycompat +from xarray.core.utils import module_available # remove once numpy 2.0 is the oldest supported version try: diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 158e60ddefc..1e4b805208f 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -11,6 +11,7 @@ from xarray.core.options import _get_keep_attrs from xarray.core.pdcompat import count_not_none from xarray.core.types import T_DataWithCoords +from xarray.core.utils import module_available def _get_alpha( From 2293fb061cfbb307f55d1c5245b85595fefd2602 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 24 Nov 2023 18:45:17 -0800 Subject: [PATCH 12/14] --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 23dc94237ec..b2efe650e28 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,7 +56,7 @@ Internal Changes ~~~~~~~~~~~~~~~~ - :py:meth:`DataArray.bfill` & :py:meth:`DataArray.ffill` now use numbagg by - default, which is up to 5x faster on wide arrays on multi-core machines. (:pull:`8339`) + default, which is up to 5x faster where parallelization is possible. (:pull:`8339`) By `Maximilian Roos `_. .. _whats-new.2023.11.0: From ddffb69fb5c1d665723a136d98f23f6e35f9ec9c Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 24 Nov 2023 18:46:07 -0800 Subject: [PATCH 13/14] --- xarray/tests/test_missing.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index b1be3d50ace..20a54c3ed53 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -419,10 +419,9 @@ def test_ffill(): def test_ffill_use_bottleneck_numbagg(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") - with xr.set_options(use_bottleneck=False): - with xr.set_options(use_numbagg=False): - with pytest.raises(RuntimeError): - da.ffill("x") + with xr.set_options(use_bottleneck=False, use_numbagg=False): + with pytest.raises(RuntimeError): + da.ffill("x") @requires_dask From 7693b8520541fbe97134ea18b7ece9cdea5c95ad Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 25 Nov 2023 10:56:47 -0800 Subject: [PATCH 14/14] --- xarray/core/duck_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index a5efff96880..7f2b2ed85ee 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -700,7 +700,7 @@ def _push(array, n: int | None = None, axis: int = -1): "ffill & bfill requires bottleneck or numbagg to be enabled." " Call `xr.set_options(use_bottleneck=True)` or `xr.set_options(use_numbagg=True)` to enable one." ) - if OPTIONS["use_numbagg"] and not module_available("numbagg"): + if OPTIONS["use_numbagg"] and module_available("numbagg"): import numbagg if pycompat.mod_version("numbagg") < Version("0.6.2"):