From fab900cfe7661da4f9778f9ade181cac91bddb9a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 5 Dec 2024 10:23:45 -0700 Subject: [PATCH 01/11] dask tests: Avoid check for non-copies, xfail pandas comparison (#9857) --- xarray/tests/test_dask.py | 1 + xarray/tests/test_variable.py | 25 +++++++++++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index e3e12599926..068f57ed42d 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -791,6 +791,7 @@ def test_tokenize_duck_dask_array(self): class TestToDaskDataFrame: + @pytest.mark.xfail(reason="https://github.com/dask/dask/issues/11584") def test_to_dask_dataframe(self): # Test conversion of Datasets to dask DataFrames x = np.random.randn(10) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 7dc5ef0db94..f4f353eda7d 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -278,34 +278,30 @@ def test_0d_time_data(self): @pytest.mark.filterwarnings("ignore:Converting non-nanosecond") def test_datetime64_conversion(self): times = pd.date_range("2000-01-01", periods=3) - for values, preserve_source in [ - (times, True), - (times.values, True), - (times.values.astype("datetime64[s]"), False), - (times.to_pydatetime(), False), + for values in [ + times, + times.values, + times.values.astype("datetime64[s]"), + times.to_pydatetime(), ]: v = self.cls(["t"], values) assert v.dtype == np.dtype("datetime64[ns]") assert_array_equal(v.values, times.values) assert v.values.dtype == np.dtype("datetime64[ns]") - same_source = source_ndarray(v.values) is source_ndarray(values) - assert preserve_source == same_source @pytest.mark.filterwarnings("ignore:Converting non-nanosecond") def test_timedelta64_conversion(self): times = pd.timedelta_range(start=0, periods=3) - for values, preserve_source in [ - (times, True), - (times.values, True), - (times.values.astype("timedelta64[s]"), False), - (times.to_pytimedelta(), False), + for values in [ + times, + times.values, + times.values.astype("timedelta64[s]"), + times.to_pytimedelta(), ]: v = self.cls(["t"], values) assert v.dtype == np.dtype("timedelta64[ns]") assert_array_equal(v.values, times.values) assert v.values.dtype == np.dtype("timedelta64[ns]") - same_source = source_ndarray(v.values) is source_ndarray(values) - assert preserve_source == same_source def test_object_conversion(self): data = np.arange(5).astype(str).astype(object) @@ -2372,6 +2368,7 @@ def test_dask_rolling(self, dim, window, center): assert actual.shape == expected.shape assert_equal(actual, expected) + @pytest.mark.xfail(reason="https://github.com/dask/dask/issues/11585") def test_multiindex(self): super().test_multiindex() From eac5105470ec7fec767e5897fefdec9320689184 Mon Sep 17 00:00:00 2001 From: Florian Jetter Date: Thu, 5 Dec 2024 22:44:46 +0100 Subject: [PATCH 02/11] Avoid local functions in push (#9856) * Avoid local functions in push * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- xarray/core/dask_array_ops.py | 71 +++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 8bf9c68b727..2dca38538e1 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -1,6 +1,7 @@ from __future__ import annotations import math +from functools import partial from xarray.core import dtypes, nputils @@ -75,6 +76,47 @@ def least_squares(lhs, rhs, rcond=None, skipna=False): return coeffs, residuals +def _fill_with_last_one(a, b): + import numpy as np + + # cumreduction apply the push func over all the blocks first so, + # the only missing part is filling the missing values using the + # last data of the previous chunk + return np.where(np.isnan(b), a, b) + + +def _dtype_push(a, axis, dtype=None): + from xarray.core.duck_array_ops import _push + + # Not sure why the blelloch algorithm force to receive a dtype + return _push(a, axis=axis) + + +def _reset_cumsum(a, axis, dtype=None): + import numpy as np + + cumsum = np.cumsum(a, axis=axis) + reset_points = np.maximum.accumulate(np.where(a == 0, cumsum, 0), axis=axis) + return cumsum - reset_points + + +def _last_reset_cumsum(a, axis, keepdims=None): + import numpy as np + + # Take the last cumulative sum taking into account the reset + # This is useful for blelloch method + return np.take(_reset_cumsum(a, axis=axis), axis=axis, indices=[-1]) + + +def _combine_reset_cumsum(a, b, axis): + import numpy as np + + # It is going to sum the previous result until the first + # non nan value + bitmask = np.cumprod(b != 0, axis=axis) + return np.where(bitmask, b + a, b) + + def push(array, n, axis, method="blelloch"): """ Dask-aware bottleneck.push @@ -91,16 +133,6 @@ def push(array, n, axis, method="blelloch"): # TODO: Replace all this function # once https://github.com/pydata/xarray/issues/9229 being implemented - def _fill_with_last_one(a, b): - # cumreduction apply the push func over all the blocks first so, - # the only missing part is filling the missing values using the - # last data of the previous chunk - return np.where(np.isnan(b), a, b) - - def _dtype_push(a, axis, dtype=None): - # Not sure why the blelloch algorithm force to receive a dtype - return _push(a, axis=axis) - pushed_array = da.reductions.cumreduction( func=_dtype_push, binop=_fill_with_last_one, @@ -113,26 +145,9 @@ def _dtype_push(a, axis, dtype=None): ) if n is not None and 0 < n < array.shape[axis] - 1: - - def _reset_cumsum(a, axis, dtype=None): - cumsum = np.cumsum(a, axis=axis) - reset_points = np.maximum.accumulate(np.where(a == 0, cumsum, 0), axis=axis) - return cumsum - reset_points - - def _last_reset_cumsum(a, axis, keepdims=None): - # Take the last cumulative sum taking into account the reset - # This is useful for blelloch method - return np.take(_reset_cumsum(a, axis=axis), axis=axis, indices=[-1]) - - def _combine_reset_cumsum(a, b): - # It is going to sum the previous result until the first - # non nan value - bitmask = np.cumprod(b != 0, axis=axis) - return np.where(bitmask, b + a, b) - valid_positions = da.reductions.cumreduction( func=_reset_cumsum, - binop=_combine_reset_cumsum, + binop=partial(_combine_reset_cumsum, axis=axis), ident=0, x=da.isnan(array, dtype=int), axis=axis, From f9ed7275ce5cd15e2fc92a88ec352a16ef24fa3c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 8 Dec 2024 22:22:49 -0800 Subject: [PATCH 03/11] Add token to codecov (#9865) Without this, we can't upload on `main`, I think. I added a token into our Actions Secrets. ref https://github.com/pydata/xarray/issues/9860 --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b0996acf6fe..eb7d2c858af 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -160,6 +160,8 @@ jobs: - name: Upload code coverage to Codecov uses: codecov/codecov-action@v5.0.7 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: file: ./coverage.xml flags: unittests From 2d628b6d31e782c48da11a84ce03d572b1dd9e11 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 02:35:25 -0800 Subject: [PATCH 04/11] Bump codecov/codecov-action from 5.0.7 to 5.1.1 in the actions group (#9866) --- .github/workflows/ci-additional.yaml | 8 ++++---- .github/workflows/ci.yaml | 2 +- .github/workflows/upstream-dev-ci.yaml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 91c63528741..84114056312 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -123,7 +123,7 @@ jobs: python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report - name: Upload mypy coverage to Codecov - uses: codecov/codecov-action@v5.0.7 + uses: codecov/codecov-action@v5.1.1 with: file: mypy_report/cobertura.xml flags: mypy @@ -174,7 +174,7 @@ jobs: python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report - name: Upload mypy coverage to Codecov - uses: codecov/codecov-action@v5.0.7 + uses: codecov/codecov-action@v5.1.1 with: file: mypy_report/cobertura.xml flags: mypy-min @@ -230,7 +230,7 @@ jobs: python -m pyright xarray/ - name: Upload pyright coverage to Codecov - uses: codecov/codecov-action@v5.0.7 + uses: codecov/codecov-action@v5.1.1 with: file: pyright_report/cobertura.xml flags: pyright @@ -286,7 +286,7 @@ jobs: python -m pyright xarray/ - name: Upload pyright coverage to Codecov - uses: codecov/codecov-action@v5.0.7 + uses: codecov/codecov-action@v5.1.1 with: file: pyright_report/cobertura.xml flags: pyright39 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index eb7d2c858af..ad710e36247 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -159,7 +159,7 @@ jobs: path: pytest.xml - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v5.0.7 + uses: codecov/codecov-action@v5.1.1 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 30047673187..6a8b8d777c4 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -140,7 +140,7 @@ jobs: run: | python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report - name: Upload mypy coverage to Codecov - uses: codecov/codecov-action@v5.0.7 + uses: codecov/codecov-action@v5.1.1 with: file: mypy_report/cobertura.xml flags: mypy From 96e0ff7d70c605a1505ff89a2d62b5c4138b0305 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 9 Dec 2024 15:29:03 -0800 Subject: [PATCH 05/11] Remove deprecated behavior for non-dim positional args (#9864) * Remove deprecated behavior with for non-dim positional args * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- doc/whats-new.rst | 11 ++++++++--- xarray/core/dataarray.py | 15 --------------- xarray/core/dataset.py | 17 +++++------------ xarray/core/groupby.py | 2 -- xarray/core/weighted.py | 7 ------- 5 files changed, 13 insertions(+), 39 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9a8154d3791..6a08246182c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,9 +14,9 @@ What's New np.random.seed(123456) -.. _whats-new.2024.11.1: +.. _whats-new.2024.12.0: -v.2024.11.1 (unreleased) +v.2024.12.0 (unreleased) ------------------------ New Features @@ -28,7 +28,12 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ - +- Methods including ``dropna``, ``rank``, ``idxmax``, ``idxmin`` require + non-dimension arguments to be passed as keyword arguments. The previous + behavior, which allowed ``.idxmax('foo', 'all')`` was too easily confused with + ``'all'`` being a dimension. The updated equivalent is ``.idxmax('foo', + how='all')``. The previous behavior was deprecated in v2023.10.0. + By `Maximilian Roos `_. Deprecations ~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5150998aebb..d287564cfe5 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1026,7 +1026,6 @@ def reset_coords( drop: Literal[True], ) -> Self: ... - @_deprecate_positional_args("v2023.10.0") def reset_coords( self, names: Dims = None, @@ -1364,7 +1363,6 @@ def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: all_variables = [self.variable] + [c.variable for c in self.coords.values()] return get_chunksizes(all_variables) - @_deprecate_positional_args("v2023.10.0") def chunk( self, chunks: T_ChunksFreq = {}, # noqa: B006 # {} even though it's technically unsafe, is being used intentionally here (#4667) @@ -1835,7 +1833,6 @@ def thin( ds = self._to_temp_dataset().thin(indexers, **indexers_kwargs) return self._from_temp_dataset(ds) - @_deprecate_positional_args("v2023.10.0") def broadcast_like( self, other: T_DataArrayOrSet, @@ -1948,7 +1945,6 @@ def _reindex_callback( return da - @_deprecate_positional_args("v2023.10.0") def reindex_like( self, other: T_DataArrayOrSet, @@ -2135,7 +2131,6 @@ def reindex_like( fill_value=fill_value, ) - @_deprecate_positional_args("v2023.10.0") def reindex( self, indexers: Mapping[Any, Any] | None = None, @@ -2960,7 +2955,6 @@ def stack( ) return self._from_temp_dataset(ds) - @_deprecate_positional_args("v2023.10.0") def unstack( self, dim: Dims = None, @@ -3385,7 +3379,6 @@ def drop_isel( dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs) return self._from_temp_dataset(dataset) - @_deprecate_positional_args("v2023.10.0") def dropna( self, dim: Hashable, @@ -4889,7 +4882,6 @@ def _title_for_slice(self, truncate: int = 50) -> str: return title - @_deprecate_positional_args("v2023.10.0") def diff( self, dim: Hashable, @@ -5198,7 +5190,6 @@ def sortby( ds = self._to_temp_dataset().sortby(variables, ascending=ascending) return self._from_temp_dataset(ds) - @_deprecate_positional_args("v2023.10.0") def quantile( self, q: ArrayLike, @@ -5318,7 +5309,6 @@ def quantile( ) return self._from_temp_dataset(ds) - @_deprecate_positional_args("v2023.10.0") def rank( self, dim: Hashable, @@ -5897,7 +5887,6 @@ def pad( ) return self._from_temp_dataset(ds) - @_deprecate_positional_args("v2023.10.0") def idxmin( self, dim: Hashable | None = None, @@ -5995,7 +5984,6 @@ def idxmin( keep_attrs=keep_attrs, ) - @_deprecate_positional_args("v2023.10.0") def idxmax( self, dim: Hashable = None, @@ -6093,7 +6081,6 @@ def idxmax( keep_attrs=keep_attrs, ) - @_deprecate_positional_args("v2023.10.0") def argmin( self, dim: Dims = None, @@ -6195,7 +6182,6 @@ def argmin( else: return self._replace_maybe_drop_dims(result) - @_deprecate_positional_args("v2023.10.0") def argmax( self, dim: Dims = None, @@ -6544,7 +6530,6 @@ def curvefit( kwargs=kwargs, ) - @_deprecate_positional_args("v2023.10.0") def drop_duplicates( self, dim: Hashable | Iterable[Hashable], diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2c1f5cfd4ac..ea17a69f827 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3276,9 +3276,11 @@ def _shuffle(self, dim, *, indices: GroupIndices, chunks: T_Chunks) -> Self: subset = self[[name for name in self._variables if name not in is_chunked]] no_slices: list[list[int]] = [ - list(range(*idx.indices(self.sizes[dim]))) - if isinstance(idx, slice) - else idx + ( + list(range(*idx.indices(self.sizes[dim]))) + if isinstance(idx, slice) + else idx + ) for idx in indices ] no_slices = [idx for idx in no_slices if idx] @@ -5102,7 +5104,6 @@ def set_index( variables, coord_names=coord_names, indexes=indexes_ ) - @_deprecate_positional_args("v2023.10.0") def reset_index( self, dims_or_levels: Hashable | Sequence[Hashable], @@ -5740,7 +5741,6 @@ def _unstack_full_reindex( variables, coord_names=coord_names, indexes=indexes ) - @_deprecate_positional_args("v2023.10.0") def unstack( self, dim: Dims = None, @@ -6502,7 +6502,6 @@ def transpose( ds._variables[name] = var.transpose(*var_dims) return ds - @_deprecate_positional_args("v2023.10.0") def dropna( self, dim: Hashable, @@ -7976,7 +7975,6 @@ def _copy_attrs_from(self, other): if v in self.variables: self.variables[v].attrs = other.variables[v].attrs - @_deprecate_positional_args("v2023.10.0") def diff( self, dim: Hashable, @@ -8324,7 +8322,6 @@ def sortby( indices[key] = order if ascending else order[::-1] return aligned_self.isel(indices) - @_deprecate_positional_args("v2023.10.0") def quantile( self, q: ArrayLike, @@ -8505,7 +8502,6 @@ def quantile( ) return new.assign_coords(quantile=q) - @_deprecate_positional_args("v2023.10.0") def rank( self, dim: Hashable, @@ -9476,7 +9472,6 @@ def pad( attrs = self._attrs if keep_attrs else None return self._replace_with_new_dims(variables, indexes=indexes, attrs=attrs) - @_deprecate_positional_args("v2023.10.0") def idxmin( self, dim: Hashable | None = None, @@ -9575,7 +9570,6 @@ def idxmin( ) ) - @_deprecate_positional_args("v2023.10.0") def idxmax( self, dim: Hashable | None = None, @@ -10258,7 +10252,6 @@ def _wrapper(Y, *args, **kwargs): return result - @_deprecate_positional_args("v2023.10.0") def drop_duplicates( self, dim: Hashable | Iterable[Hashable], diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 9596d19e735..ceae79031f8 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -50,7 +50,6 @@ ) from xarray.core.variable import IndexVariable, Variable from xarray.namedarray.pycompat import is_chunked_array -from xarray.util.deprecation_helpers import _deprecate_positional_args if TYPE_CHECKING: from numpy.typing import ArrayLike @@ -1183,7 +1182,6 @@ def fillna(self, value: Any) -> T_Xarray: """ return ops.fillna(self, value) - @_deprecate_positional_args("v2023.10.0") def quantile( self, q: ArrayLike, diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 2c6e7d4282a..269cb49a2c1 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -11,7 +11,6 @@ from xarray.core.computation import apply_ufunc, dot from xarray.core.types import Dims, T_DataArray, T_Xarray from xarray.namedarray.utils import is_duck_dask_array -from xarray.util.deprecation_helpers import _deprecate_positional_args # Weighted quantile methods are a subset of the numpy supported quantile methods. QUANTILE_METHODS = Literal[ @@ -454,7 +453,6 @@ def _weighted_quantile_1d( def _implementation(self, func, dim, **kwargs): raise NotImplementedError("Use `Dataset.weighted` or `DataArray.weighted`") - @_deprecate_positional_args("v2023.10.0") def sum_of_weights( self, dim: Dims = None, @@ -465,7 +463,6 @@ def sum_of_weights( self._sum_of_weights, dim=dim, keep_attrs=keep_attrs ) - @_deprecate_positional_args("v2023.10.0") def sum_of_squares( self, dim: Dims = None, @@ -477,7 +474,6 @@ def sum_of_squares( self._sum_of_squares, dim=dim, skipna=skipna, keep_attrs=keep_attrs ) - @_deprecate_positional_args("v2023.10.0") def sum( self, dim: Dims = None, @@ -489,7 +485,6 @@ def sum( self._weighted_sum, dim=dim, skipna=skipna, keep_attrs=keep_attrs ) - @_deprecate_positional_args("v2023.10.0") def mean( self, dim: Dims = None, @@ -501,7 +496,6 @@ def mean( self._weighted_mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs ) - @_deprecate_positional_args("v2023.10.0") def var( self, dim: Dims = None, @@ -513,7 +507,6 @@ def var( self._weighted_var, dim=dim, skipna=skipna, keep_attrs=keep_attrs ) - @_deprecate_positional_args("v2023.10.0") def std( self, dim: Dims = None, From 49502fcde4db6ea3da1f60ead589580cfdad5c98 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 11 Dec 2024 16:51:30 -0700 Subject: [PATCH 06/11] Fix/silence upstream tests (#9879) --- xarray/tests/test_cftimeindex.py | 1 + xarray/tests/test_formatting.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 116487e2bcf..e34714a344a 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1188,6 +1188,7 @@ def test_to_datetimeindex_feb_29(calendar): index.to_datetimeindex() +@pytest.mark.xfail(reason="fails on pandas main branch") @requires_cftime def test_multiindex(): index = xr.cftime_range("2001-01-01", periods=100, calendar="360_day") diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 946d491bd61..9b658fa0d66 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -1020,9 +1020,10 @@ def test_display_nbytes() -> None: assert actual == expected actual = repr(xds["foo"]) - expected = """ + array_repr = repr(xds.foo.data).replace("\n ", "") + expected = f""" Size: 2kB -array([ 0, 1, 2, ..., 1197, 1198, 1199], dtype=int16) +{array_repr} Coordinates: * foo (foo) int16 2kB 0 1 2 3 4 5 6 ... 1194 1195 1196 1197 1198 1199 """.strip() From b7e6036555e54ffdaa685ba3cc6a94cd9664c4b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 13 Dec 2024 16:06:45 +0100 Subject: [PATCH 07/11] finalize deprecation of "closed"-parameter (#9882) * finalize deprecation of "closed" to "inclusive" in date_range and cftime_range * add whats-new.rst entry * fix tests * fix test * remove stale function --- doc/whats-new.rst | 4 +- xarray/coding/cftime_offsets.py | 73 ++--------------------------- xarray/tests/test_cftime_offsets.py | 48 ------------------- 3 files changed, 8 insertions(+), 117 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6a08246182c..08e6218ca14 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,7 +37,9 @@ Breaking changes Deprecations ~~~~~~~~~~~~ - +- Finalize deprecation of ``closed`` parameters of :py:func:`cftime_range` and + :py:func:`date_range` (:pull:`9882`). + By `Kai Mühlbauer `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a994eb9661f..89c06e56ea7 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -62,15 +62,13 @@ ) from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like from xarray.core.pdcompat import ( - NoDefault, count_not_none, nanosecond_precision_timestamp, - no_default, ) from xarray.core.utils import attempt_import, emit_user_level_warning if TYPE_CHECKING: - from xarray.core.types import InclusiveOptions, Self, SideOptions, TypeAlias + from xarray.core.types import InclusiveOptions, Self, TypeAlias DayOption: TypeAlias = Literal["start", "end"] @@ -943,42 +941,6 @@ def _generate_range(start, end, periods, offset): current = next_date -def _translate_closed_to_inclusive(closed): - """Follows code added in pandas #43504.""" - emit_user_level_warning( - "Following pandas, the `closed` parameter is deprecated in " - "favor of the `inclusive` parameter, and will be removed in " - "a future version of xarray.", - FutureWarning, - ) - if closed is None: - inclusive = "both" - elif closed in ("left", "right"): - inclusive = closed - else: - raise ValueError( - f"Argument `closed` must be either 'left', 'right', or None. " - f"Got {closed!r}." - ) - return inclusive - - -def _infer_inclusive( - closed: NoDefault | SideOptions, inclusive: InclusiveOptions | None -) -> InclusiveOptions: - """Follows code added in pandas #43504.""" - if closed is not no_default and inclusive is not None: - raise ValueError( - "Following pandas, deprecated argument `closed` cannot be " - "passed if argument `inclusive` is not None." - ) - if closed is not no_default: - return _translate_closed_to_inclusive(closed) - if inclusive is None: - return "both" - return inclusive - - def cftime_range( start=None, end=None, @@ -986,8 +948,7 @@ def cftime_range( freq=None, normalize=False, name=None, - closed: NoDefault | SideOptions = no_default, - inclusive: None | InclusiveOptions = None, + inclusive: InclusiveOptions = "both", calendar="standard", ) -> CFTimeIndex: """Return a fixed frequency CFTimeIndex. @@ -1006,16 +967,7 @@ def cftime_range( Normalize start/end dates to midnight before generating date range. name : str, default: None Name of the resulting index - closed : {None, "left", "right"}, default: "NO_DEFAULT" - Make the interval closed with respect to the given frequency to the - "left", "right", or both sides (None). - - .. deprecated:: 2023.02.0 - Following pandas, the ``closed`` parameter is deprecated in favor - of the ``inclusive`` parameter, and will be removed in a future - version of xarray. - - inclusive : {None, "both", "neither", "left", "right"}, default None + inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; whether to set each bound as closed or open. .. versionadded:: 2023.02.0 @@ -1193,8 +1145,6 @@ def cftime_range( offset = to_offset(freq) dates = np.array(list(_generate_range(start, end, periods, offset))) - inclusive = _infer_inclusive(closed, inclusive) - if inclusive == "neither": left_closed = False right_closed = False @@ -1229,8 +1179,7 @@ def date_range( tz=None, normalize=False, name=None, - closed: NoDefault | SideOptions = no_default, - inclusive: None | InclusiveOptions = None, + inclusive: InclusiveOptions = "both", calendar="standard", use_cftime=None, ): @@ -1257,20 +1206,10 @@ def date_range( Normalize start/end dates to midnight before generating date range. name : str, default: None Name of the resulting index - closed : {None, "left", "right"}, default: "NO_DEFAULT" - Make the interval closed with respect to the given frequency to the - "left", "right", or both sides (None). - - .. deprecated:: 2023.02.0 - Following pandas, the `closed` parameter is deprecated in favor - of the `inclusive` parameter, and will be removed in a future - version of xarray. - - inclusive : {None, "both", "neither", "left", "right"}, default: None + inclusive : {"both", "neither", "left", "right"}, default: "both" Include boundaries; whether to set each bound as closed or open. .. versionadded:: 2023.02.0 - calendar : str, default: "standard" Calendar type for the datetimes. use_cftime : boolean, optional @@ -1294,8 +1233,6 @@ def date_range( if tz is not None: use_cftime = False - inclusive = _infer_inclusive(closed, inclusive) - if _is_standard_calendar(calendar) and use_cftime is not True: try: return pd.date_range( diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index f6f97108c1d..1ab6c611aac 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1057,15 +1057,6 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_arg False, [(1, 1, 2), (1, 1, 3)], ), - ( - "0001-01-01", - "0001-01-04", - None, - "D", - None, - False, - [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)], - ), ( "0001-01-01", "0001-01-04", @@ -1294,13 +1285,6 @@ def test_invalid_cftime_range_inputs( cftime_range(start, end, periods, freq, inclusive=inclusive) # type: ignore[arg-type] -def test_invalid_cftime_arg() -> None: - with pytest.warns( - FutureWarning, match="Following pandas, the `closed` parameter is deprecated" - ): - cftime_range("2000", "2001", None, "YE", closed="left") - - _CALENDAR_SPECIFIC_MONTH_END_TESTS = [ ("noleap", [(2, 28), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]), ("all_leap", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]), @@ -1534,15 +1518,6 @@ def as_timedelta_not_implemented_error(): tick.as_timedelta() -@pytest.mark.parametrize("function", [cftime_range, date_range]) -def test_cftime_or_date_range_closed_and_inclusive_error(function: Callable) -> None: - if function == cftime_range and not has_cftime: - pytest.skip("requires cftime") - - with pytest.raises(ValueError, match="Following pandas, deprecated"): - function("2000", periods=3, closed=None, inclusive="right") - - @pytest.mark.parametrize("function", [cftime_range, date_range]) def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> None: if function == cftime_range and not has_cftime: @@ -1552,29 +1527,6 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non function("2000", periods=3, inclusive="foo") -@pytest.mark.parametrize( - "function", - [ - pytest.param(cftime_range, id="cftime", marks=requires_cftime), - pytest.param(date_range, id="date"), - ], -) -@pytest.mark.parametrize( - ("closed", "inclusive"), [(None, "both"), ("left", "left"), ("right", "right")] -) -def test_cftime_or_date_range_closed( - function: Callable, - closed: Literal["left", "right", None], - inclusive: Literal["left", "right", "both"], -) -> None: - with pytest.warns(FutureWarning, match="Following pandas"): - result_closed = function("2000-01-01", "2000-01-04", freq="D", closed=closed) - result_inclusive = function( - "2000-01-01", "2000-01-04", freq="D", inclusive=inclusive - ) - np.testing.assert_equal(result_closed.values, result_inclusive.values) - - @pytest.mark.parametrize("function", [cftime_range, date_range]) def test_cftime_or_date_range_inclusive_None(function) -> None: if function == cftime_range and not has_cftime: From f05c5ec799f144b8cc3b9355a702814be7285d8f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Dec 2024 08:59:54 -0700 Subject: [PATCH 08/11] Fix upstream Zarr compatibility (#9884) Closes #9880 --- xarray/backends/zarr.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index cb3ab375c31..d7f056a209a 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1135,9 +1135,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No else: encoded_attrs[DIMENSION_KEY] = dims - encoding["exists_ok" if _zarr_v3() else "overwrite"] = ( - True if self._mode == "w" else False - ) + encoding["overwrite"] = True if self._mode == "w" else False zarr_array = self._create_new_array( name=name, From 755581c84dc2ad5435f0a9798e48115f80015f2d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 14 Dec 2024 01:33:10 -0700 Subject: [PATCH 09/11] Fix interpolation when non-numeric coords are present. (#9887) * Fix interpolation when non-numeric coords are present. Closes #8099 Closes #9839 * fix * Add basic 1d test --------- Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- doc/whats-new.rst | 3 +++ xarray/core/dataset.py | 11 ++++++----- xarray/tests/test_interp.py | 38 +++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 08e6218ca14..cbc59a708aa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,9 @@ Bug fixes By `Bruce Merry `_. - Fix unintended load on datasets when calling :py:meth:`DataArray.plot.scatter` (:pull:`9818`). By `Jimmy Westling `_. +- Fix interpolation when non-numeric coordinate variables are present (:issue:`8099`, :issue:`9839`). + By `Deepak Cherian `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ea17a69f827..d4a23ac275a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4185,7 +4185,7 @@ def _validate_interp_indexer(x, new_x): } variables: dict[Hashable, Variable] = {} - reindex: bool = False + reindex_vars: list[Hashable] = [] for name, var in obj._variables.items(): if name in indexers: continue @@ -4207,19 +4207,20 @@ def _validate_interp_indexer(x, new_x): # booleans and objects and retains the dtype but inside # this loop there might be some duplicate code that slows it # down, therefore collect these signals and run it later: - reindex = True + reindex_vars.append(name) elif all(d not in indexers for d in var.dims): # For anything else we can only keep variables if they # are not dependent on any coords that are being # interpolated along: variables[name] = var - if reindex: - reindex_indexers = { + if reindex_vars and ( + reindex_indexers := { k: v for k, (_, v) in validated_indexers.items() if v.dims == (k,) } + ): reindexed = alignment.reindex( - obj, + obj[reindex_vars], indexers=reindex_indexers, method=method_non_numeric, exclude_vars=variables.keys(), diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index d602cb96a6a..86532a26f65 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -1055,3 +1055,41 @@ def test_interp1d_complex_out_of_bounds() -> None: expected = da.interp(time=3.5, kwargs=dict(fill_value=np.nan + np.nan * 1j)) actual = da.interp(time=3.5) assert_identical(actual, expected) + + +@requires_scipy +def test_interp_non_numeric_1d() -> None: + ds = xr.Dataset( + { + "numeric": ("time", 1 + np.arange(0, 4, 1)), + "non_numeric": ("time", np.array(["a", "b", "c", "d"])), + }, + coords={"time": (np.arange(0, 4, 1))}, + ) + actual = ds.interp(time=np.linspace(0, 3, 7)) + + expected = xr.Dataset( + { + "numeric": ("time", 1 + np.linspace(0, 3, 7)), + "non_numeric": ("time", np.array(["a", "b", "b", "c", "c", "d", "d"])), + }, + coords={"time": np.linspace(0, 3, 7)}, + ) + xr.testing.assert_identical(actual, expected) + + +@requires_scipy +def test_interp_non_numeric_nd() -> None: + # regression test for GH8099, GH9839 + ds = xr.Dataset({"x": ("a", np.arange(4))}, coords={"a": (np.arange(4) - 1.5)}) + t = xr.DataArray( + np.random.randn(6).reshape((2, 3)) * 0.5, + dims=["r", "s"], + coords={"r": np.arange(2) - 0.5, "s": np.arange(3) - 1}, + ) + ds["m"] = ds.x > 1 + + actual = ds.interp(a=t, method="linear") + # with numeric only + expected = ds[["x"]].interp(a=t, method="linear") + assert_identical(actual[["x"]], expected) From 3bc7a883059f8a82f59bae3a7a4bafda1dd6dfa3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 14 Dec 2024 11:57:40 +0100 Subject: [PATCH 10/11] Use integers instead of randint (#9889) --- asv_bench/benchmarks/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py index 4a9613ce026..b8a54f71a42 100644 --- a/asv_bench/benchmarks/__init__.py +++ b/asv_bench/benchmarks/__init__.py @@ -48,7 +48,7 @@ def randn(shape, frac_nan=None, chunks=None, seed=0): def randint(low, high=None, size=None, frac_minus=None, seed=0): rng = np.random.default_rng(seed) - x = rng.randint(low, high, size) + x = rng.integers(low, high, size) if frac_minus is not None: inds = rng.choice(range(x.size), int(x.size * frac_minus)) x.flat[inds] = -1 From 0945e0eaa6de01171be452f9b3e758cec9cec339 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 08:20:02 +0100 Subject: [PATCH 11/11] Bump pypa/gh-action-pypi-publish in the actions group (#9894) Bumps the actions group with 1 update: [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish). Updates `pypa/gh-action-pypi-publish` from 1.12.2 to 1.12.3 - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.12.2...v1.12.3) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index e6e984ce400..6377f59ac38 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -88,7 +88,7 @@ jobs: path: dist - name: Publish package to TestPyPI if: github.event_name == 'push' - uses: pypa/gh-action-pypi-publish@v1.12.2 + uses: pypa/gh-action-pypi-publish@v1.12.3 with: repository_url: https://test.pypi.org/legacy/ verbose: true @@ -110,6 +110,6 @@ jobs: name: releases path: dist - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.12.2 + uses: pypa/gh-action-pypi-publish@v1.12.3 with: verbose: true