From 6531b57f8c5cb7f3c564ff895c2e4b6573bb5521 Mon Sep 17 00:00:00 2001 From: Mick Date: Wed, 1 Mar 2023 00:23:45 +0100 Subject: [PATCH 1/9] use numpys SupportsDtype (#7521) --- xarray/core/types.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/xarray/core/types.py b/xarray/core/types.py index 7149443c0c7..0f11b16b003 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -7,7 +7,6 @@ Any, Callable, Literal, - Protocol, SupportsIndex, TypeVar, Union, @@ -18,6 +17,7 @@ from packaging.version import Version if TYPE_CHECKING: + from numpy._typing import _SupportsDType from numpy.typing import ArrayLike from xarray.backends.common import BackendEntrypoint @@ -50,19 +50,12 @@ _ShapeLike = Union[SupportsIndex, Sequence[SupportsIndex]] _DTypeLikeNested = Any # TODO: wait for support for recursive types - # once NumPy 1.21 is minimum version, use NumPys definition directly - # 1.20 uses a non-generic Protocol (like we define here for simplicity) - class _SupportsDType(Protocol): - @property - def dtype(self) -> np.dtype: - ... - # Xarray requires a Mapping[Hashable, dtype] in many places which # conflics with numpys own DTypeLike (with dtypes for fields). # https://numpy.org/devdocs/reference/typing.html#numpy.typing.DTypeLike # This is a copy of this DTypeLike that allows only non-Mapping dtypes. DTypeLikeSave = Union[ - np.dtype, + np.dtype[Any], # default data type (float64) None, # array-scalar types and generic types @@ -78,7 +71,7 @@ def dtype(self) -> np.dtype: # because numpy does the same? list[Any], # anything with a dtype attribute - _SupportsDType, + _SupportsDType[np.dtype[Any]], ] try: from cftime import datetime as CFTimeDatetime From 463bc281eff3cd630c3fd87792cfcddb98d90915 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Thu, 2 Mar 2023 17:49:22 +0100 Subject: [PATCH 2/9] fix nczarr when libnetcdf>4.8.1 (#7575) --- xarray/backends/zarr.py | 4 ++-- xarray/tests/test_backends.py | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 6686d67ed4d..3b0335aa5a6 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -207,7 +207,7 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key, try_nczarr): "which are required for xarray to determine variable dimensions." ) from e - nc_attrs = [attr for attr in zarr_obj.attrs if attr.startswith("_NC")] + nc_attrs = [attr for attr in zarr_obj.attrs if attr.lower().startswith("_nc")] attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key] + nc_attrs) return dimensions, attributes @@ -495,7 +495,7 @@ def get_attrs(self): return { k: v for k, v in self.zarr_group.attrs.asdict().items() - if not k.startswith("_NC") + if not k.lower().startswith("_nc") } def get_dimensions(self): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a156b864315..91daabd12d5 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5663,12 +5663,14 @@ def test_write_file_from_np_str(str_type, tmpdir) -> None: @requires_zarr @requires_netCDF4 class TestNCZarr: - @staticmethod - def _create_nczarr(filename): - netcdfc_version = Version(nc4.getlibversion().split()[0]) - if netcdfc_version < Version("4.8.1"): + @property + def netcdfc_version(self): + return Version(nc4.getlibversion().split()[0]) + + def _create_nczarr(self, filename): + if self.netcdfc_version < Version("4.8.1"): pytest.skip("requires netcdf-c>=4.8.1") - if (platform.system() == "Windows") and (netcdfc_version == Version("4.8.1")): + if platform.system() == "Windows" and self.netcdfc_version == Version("4.8.1"): # Bug in netcdf-c==4.8.1 (typo: Nan instead of NaN) # https://github.com/Unidata/netcdf-c/issues/2265 pytest.skip("netcdf-c==4.8.1 has issues on Windows") @@ -5678,9 +5680,7 @@ def _create_nczarr(filename): # https://github.com/Unidata/netcdf-c/issues/2259 ds = ds.drop_vars("dim3") - # netcdf-c>4.8.1 will add _ARRAY_DIMENSIONS by default - mode = "nczarr" if netcdfc_version == Version("4.8.1") else "nczarr,noxarray" - ds.to_netcdf(f"file://{filename}#mode={mode}") + ds.to_netcdf(f"file://{filename}#mode=nczarr") return ds def test_open_nczarr(self) -> None: @@ -5700,6 +5700,9 @@ def test_overwriting_nczarr(self) -> None: @pytest.mark.parametrize("mode", ["a", "r+"]) @pytest.mark.filterwarnings("ignore:.*non-consolidated metadata.*") def test_raise_writing_to_nczarr(self, mode) -> None: + if self.netcdfc_version > Version("4.8.1"): + pytest.skip("netcdf-c>4.8.1 adds the _ARRAY_DIMENSIONS attribute") + with create_tmp_file(suffix=".zarr") as tmp: ds = self._create_nczarr(tmp) with pytest.raises( From e04109f6588a5356bcf422b3e0a7c0f7c2585fae Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Mar 2023 19:56:12 +0100 Subject: [PATCH 3/9] [pre-commit.ci] pre-commit autoupdate (#7565) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/charliermarsh/ruff-pre-commit: v0.0.248 → v0.0.253](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.248...v0.0.253) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a94564c5472..55c9b638153 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: files: ^xarray/ - repo: https://github.com/charliermarsh/ruff-pre-commit # Ruff version. - rev: 'v0.0.248' + rev: 'v0.0.253' hooks: - id: ruff args: ["--fix"] From 0b9c2240c221ee176bfa9825957f715f2ba753cc Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 2 Mar 2023 23:51:25 -0800 Subject: [PATCH 4/9] Add xCDAT to list of Xarray related projects (#7579) * Add `xcdat` to `ecosystem.rst` * Capitalize xCDAT --- doc/ecosystem.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst index 39d82a6f5d5..e6e970c6239 100644 --- a/doc/ecosystem.rst +++ b/doc/ecosystem.rst @@ -45,6 +45,7 @@ Geosciences - `xarray-spatial `_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.) - `xarray-topo `_: xarray extension for topographic analysis and modelling. - `xbpch `_: xarray interface for bpch files. +- `xCDAT `_: An extension of xarray for climate data analysis on structured grids. - `xclim `_: A library for calculating climate science indices with unit handling built from xarray and dask. - `xESMF `_: Universal regridder for geospatial data. - `xgcm `_: Extends the xarray data model to understand finite volume grid cells (common in General Circulation Models) and provides interpolation and difference operations for such grids. From 43ba095712de12c957e0a4acf956df01d84b2046 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 3 Mar 2023 11:14:13 +0100 Subject: [PATCH 5/9] update the docs environment (#7442) * use python 3.10 in the docs environment * require a more recent `sphinx` version * [skip-ci] * remove the unused pydata-sphinx-theme [skip-ci] * pin `sphinx-book-theme` to a more recent version [skip-ci] * try working around the theme bug [skip-ci] * use string interpolation markers for the extlink text * try specifying the project of a intersphinx link * remove the empty `extra_navbar` and `navbar_footer_text` theme options * [skip-ci] --------- Co-authored-by: Joe Hamman Co-authored-by: Deepak Cherian Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- ci/requirements/doc.yml | 7 +++---- doc/conf.py | 7 +++---- doc/user-guide/interpolation.rst | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index d8823e7cbbc..cc6f13e7086 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -4,7 +4,7 @@ channels: - conda-forge - nodefaults dependencies: - - python=3.9 + - python=3.10 - bottleneck - cartopy - cfgrib>=0.9 @@ -23,7 +23,6 @@ dependencies: - pandas>=1.4 - pooch - pip - - pydata-sphinx-theme>=0.4.3 - pyproj - rasterio>=1.1 - scipy!=1.10.0 @@ -31,10 +30,10 @@ dependencies: - setuptools - sparse - sphinx-autosummary-accessors - - sphinx-book-theme >= 0.0.38 + - sphinx-book-theme >= 0.3.0 - sphinx-copybutton - sphinx-design - - sphinx!=4.4.0 + - sphinx>=5.0 - zarr>=2.10 - pip: - sphinxext-rediraffe diff --git a/doc/conf.py b/doc/conf.py index c916fde5760..0b6c6766c3b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -97,8 +97,8 @@ extlinks = { - "issue": ("https://github.com/pydata/xarray/issues/%s", "GH"), - "pull": ("https://github.com/pydata/xarray/pull/%s", "PR"), + "issue": ("https://github.com/pydata/xarray/issues/%s", "GH%s"), + "pull": ("https://github.com/pydata/xarray/pull/%s", "PR%s"), } # sphinx-copybutton configurations @@ -244,12 +244,11 @@ use_repository_button=True, use_issues_button=True, home_page_in_toc=False, - extra_navbar="", - navbar_footer_text="", extra_footer="""

Xarray is a fiscally sponsored project of NumFOCUS, a nonprofit dedicated to supporting the open-source scientific computing community.
Theme by the Executable Book Project

""", twitter_url="https://twitter.com/xarray_devs", + icon_links=[], # workaround for pydata/pydata-sphinx-theme#1220 ) diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index 2dc47e9f591..7b40962e826 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -50,7 +50,7 @@ array-like, which gives the interpolated result as an array. # interpolation da.interp(time=[2.5, 3.5]) -To interpolate data with a :py:doc:`numpy.datetime64 ` coordinate you can pass a string. +To interpolate data with a :py:doc:`numpy.datetime64 ` coordinate you can pass a string. .. ipython:: python From 830ee6de0d545c997df84fe69b0ac2334bde1d1b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 3 Mar 2023 16:13:51 -0700 Subject: [PATCH 6/9] Support first, last with dask arrays (#7562) * Support first, last with dask arrays Use dask.array.reduction. For this we need to add support for the `keepdims` kwarg to `nanfirst` and `nanlast`. Even though the final result is always keepdims=False, dask runs the intermediate steps with keepdims=True. * Don't provide meta. It would need to account for shape change. --- doc/whats-new.rst | 2 ++ xarray/core/dask_array_ops.py | 37 +++++++++++++++++++++++++++++ xarray/core/duck_array_ops.py | 19 +++++++-------- xarray/core/nputils.py | 20 ++++++++++++---- xarray/tests/test_dask.py | 15 ++++++++---- xarray/tests/test_duck_array_ops.py | 29 +++++++++++++++++++++- 6 files changed, 101 insertions(+), 21 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a82d0a9fa2a..b7e632bdfb7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,8 @@ New Features - Fix :py:meth:`xr.cov` and :py:meth:`xr.corr` now support complex valued arrays (:issue:`7340`, :pull:`7392`). By `Michael Niklas `_. +- Support dask arrays in ``first`` and ``last`` reductions. + By `Deepak Cherian `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index d2d3e4a6d1c..24c5f698a27 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -1,5 +1,9 @@ from __future__ import annotations +from functools import partial + +from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined] + from xarray.core import dtypes, nputils @@ -92,3 +96,36 @@ def _fill_with_last_one(a, b): axis=axis, dtype=array.dtype, ) + + +def _first_last_wrapper(array, *, axis, op, keepdims): + return op(array, axis, keepdims=keepdims) + + +def _first_or_last(darray, axis, op): + import dask.array + + # This will raise the same error message seen for numpy + axis = normalize_axis_index(axis, darray.ndim) + + wrapped_op = partial(_first_last_wrapper, op=op) + return dask.array.reduction( + darray, + chunk=wrapped_op, + aggregate=wrapped_op, + axis=axis, + dtype=darray.dtype, + keepdims=False, # match numpy version + ) + + +def nanfirst(darray, axis): + from xarray.core.duck_array_ops import nanfirst + + return _first_or_last(darray, axis, op=nanfirst) + + +def nanlast(darray, axis): + from xarray.core.duck_array_ops import nanlast + + return _first_or_last(darray, axis, op=nanlast) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 96baf7f96cd..84e66803fe8 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -9,7 +9,6 @@ import datetime import inspect import warnings -from functools import partial from importlib import import_module import numpy as np @@ -637,18 +636,14 @@ def cumsum(array, axis=None, **kwargs): return _nd_cum_func(cumsum_1d, array, axis, **kwargs) -_fail_on_dask_array_input_skipna = partial( - fail_on_dask_array_input, - msg="%r with skipna=True is not yet implemented on dask arrays", -) - - def first(values, axis, skipna=None): """Return the first non-NA elements in this array along the given axis""" if (skipna or skipna is None) and values.dtype.kind not in "iSU": # only bother for dtypes that can hold NaN - _fail_on_dask_array_input_skipna(values) - return nanfirst(values, axis) + if is_duck_dask_array(values): + return dask_array_ops.nanfirst(values, axis) + else: + return nanfirst(values, axis) return take(values, 0, axis=axis) @@ -656,8 +651,10 @@ def last(values, axis, skipna=None): """Return the last non-NA elements in this array along the given axis""" if (skipna or skipna is None) and values.dtype.kind not in "iSU": # only bother for dtypes that can hold NaN - _fail_on_dask_array_input_skipna(values) - return nanlast(values, axis) + if is_duck_dask_array(values): + return dask_array_ops.nanlast(values, axis) + else: + return nanlast(values, axis) return take(values, -1, axis=axis) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 80c988ebd4f..2bc413dc21f 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -24,17 +24,29 @@ def _select_along_axis(values, idx, axis): return values[sl] -def nanfirst(values, axis): +def nanfirst(values, axis, keepdims=False): + if isinstance(axis, tuple): + (axis,) = axis axis = normalize_axis_index(axis, values.ndim) idx_first = np.argmax(~pd.isnull(values), axis=axis) - return _select_along_axis(values, idx_first, axis) + result = _select_along_axis(values, idx_first, axis) + if keepdims: + return np.expand_dims(result, axis=axis) + else: + return result -def nanlast(values, axis): +def nanlast(values, axis, keepdims=False): + if isinstance(axis, tuple): + (axis,) = axis axis = normalize_axis_index(axis, values.ndim) rev = (slice(None),) * axis + (slice(None, None, -1),) idx_last = -1 - np.argmax(~pd.isnull(values)[rev], axis=axis) - return _select_along_axis(values, idx_last, axis) + result = _select_along_axis(values, idx_last, axis) + if keepdims: + return np.expand_dims(result, axis=axis) + else: + return result def inverse_permutation(indices): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 577debbce21..52a41035faf 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -549,17 +549,22 @@ def test_rolling(self): actual = v.rolling(x=2).mean() self.assertLazyAndAllClose(expected, actual) - def test_groupby_first(self): + @pytest.mark.parametrize("func", ["first", "last"]) + def test_groupby_first_last(self, func): + method = operator.methodcaller(func) u = self.eager_array v = self.lazy_array for coords in [u.coords, v.coords]: coords["ab"] = ("x", ["a", "a", "b", "b"]) - with pytest.raises(NotImplementedError, match=r"dask"): - v.groupby("ab").first() - expected = u.groupby("ab").first() + expected = method(u.groupby("ab")) + + with raise_if_dask_computes(): + actual = method(v.groupby("ab")) + self.assertLazyAndAllClose(expected, actual) + with raise_if_dask_computes(): - actual = v.groupby("ab").first(skipna=False) + actual = method(v.groupby("ab")) self.assertLazyAndAllClose(expected, actual) def test_reindex(self): diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c873c7b76d3..0d6efa2a8d3 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -48,7 +48,11 @@ class TestOps: def setUp(self): self.x = array( [ - [[nan, nan, 2.0, nan], [nan, 5.0, 6.0, nan], [8.0, 9.0, 10.0, nan]], + [ + [nan, nan, 2.0, nan], + [nan, 5.0, 6.0, nan], + [8.0, 9.0, 10.0, nan], + ], [ [nan, 13.0, 14.0, 15.0], [nan, 17.0, 18.0, nan], @@ -128,6 +132,29 @@ def test_all_nan_arrays(self): assert np.isnan(mean([np.nan, np.nan])) +@requires_dask +class TestDaskOps(TestOps): + @pytest.fixture(autouse=True) + def setUp(self): + import dask.array + + self.x = dask.array.from_array( + [ + [ + [nan, nan, 2.0, nan], + [nan, 5.0, 6.0, nan], + [8.0, 9.0, 10.0, nan], + ], + [ + [nan, 13.0, 14.0, 15.0], + [nan, 17.0, 18.0, nan], + [nan, 21.0, nan, nan], + ], + ], + chunks=(2, 1, 2), + ) + + def test_cumsum_1d(): inputs = np.array([0, 1, 2, 3]) expected = np.array([0, 1, 3, 6]) From 798f4d492b7f87172a50a00cf7c4c68e4cbb8b4e Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 7 Mar 2023 10:10:29 -0600 Subject: [PATCH 7/9] Update contains_cftime_datetimes to avoid loading entire variable array (#7494) * Update contains_cftime_datetimes to avoid loading entire variable array * Update whats-new.rst * Convert arrays to variable instead for better control * fix mypy? * Update common.py * Update xarray/core/common.py Co-authored-by: Mathias Hauser * Update common.py remove _variable_contains_cftime_datetimes * Avoid creating variable. * Add test * minimize diff * Update tests. * address comment * Fix test * Fix whats-new * Fix more tests * More fixes * fix iris tests --------- Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: Mathias Hauser Co-authored-by: dcherian Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/coding/calendar_ops.py | 6 +++--- xarray/coding/cftime_offsets.py | 2 +- xarray/coding/frequencies.py | 3 ++- xarray/core/accessor_dt.py | 2 +- xarray/core/common.py | 34 +++++++++++++++---------------- xarray/tests/__init__.py | 13 +++++++++++- xarray/tests/test_accessor_dt.py | 1 - xarray/tests/test_backends.py | 2 +- xarray/tests/test_coding.py | 4 ++-- xarray/tests/test_coding_times.py | 27 ++++++++++++++++++------ xarray/tests/test_dataarray.py | 4 ++-- 12 files changed, 63 insertions(+), 37 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b7e632bdfb7..3cc2efde599 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,6 +44,8 @@ Bug fixes - Fix matplotlib raising a UserWarning when plotting a scatter plot with an unfilled marker (:issue:`7313`, :pull:`7318`). By `Jimmy Westling `_. +- Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). + By `Alex Goodman `_ and `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 06f57757619..dc2f95b832e 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -147,7 +147,7 @@ def convert_calendar( from xarray.core.dataarray import DataArray time = obj[dim] - if not _contains_datetime_like_objects(time): + if not _contains_datetime_like_objects(time.variable): raise ValueError(f"Coordinate {dim} must contain datetime objects.") use_cftime = _should_cftime_be_used(time, calendar, use_cftime) @@ -319,8 +319,8 @@ def interp_calendar(source, target, dim="time"): target = DataArray(target, dims=(dim,), name=dim) if not _contains_datetime_like_objects( - source[dim] - ) or not _contains_datetime_like_objects(target): + source[dim].variable + ) or not _contains_datetime_like_objects(target.variable): raise ValueError( f"Both 'source.{dim}' and 'target' must contain datetime objects." ) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index bc3e3545892..792724ecc79 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1267,7 +1267,7 @@ def date_range_like(source, calendar, use_cftime=None): if not isinstance(source, (pd.DatetimeIndex, CFTimeIndex)) and ( isinstance(source, DataArray) and (source.ndim != 1) - or not _contains_datetime_like_objects(source) + or not _contains_datetime_like_objects(source.variable) ): raise ValueError( "'source' must be a 1D array of datetime objects for inferring its range." diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py index fef2f5a8319..4d24327aa2f 100644 --- a/xarray/coding/frequencies.py +++ b/xarray/coding/frequencies.py @@ -79,11 +79,12 @@ def infer_freq(index): If there are fewer than three values or the index is not 1D. """ from xarray.core.dataarray import DataArray + from xarray.core.variable import Variable if isinstance(index, (DataArray, pd.Series)): if index.ndim != 1: raise ValueError("'index' must be 1D") - elif not _contains_datetime_like_objects(DataArray(index)): + elif not _contains_datetime_like_objects(Variable("dim", index)): raise ValueError("'index' must contain datetime-like objects") dtype = np.asarray(index).dtype if dtype == "datetime64[ns]": diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 118cbcb7ac5..b261bb26d23 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -574,7 +574,7 @@ def __new__(cls, obj: T_DataArray) -> CombinedDatetimelikeAccessor: # we need to choose which parent (datetime or timedelta) is # appropriate. Since we're checking the dtypes anyway, we'll just # do all the validation here. - if not _contains_datetime_like_objects(obj): + if not _contains_datetime_like_objects(obj.variable): raise TypeError( "'.dt' accessor only available for " "DataArray with datetime64 timedelta64 dtype or " diff --git a/xarray/core/common.py b/xarray/core/common.py index 3a73f463ea9..d980e622763 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -11,6 +11,7 @@ import pandas as pd from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops +from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.pycompat import is_duck_dask_array from xarray.core.utils import Frozen, either_dict_or_kwargs, is_scalar @@ -40,6 +41,7 @@ ScalarOrArray, SideOptions, T_DataWithCoords, + T_Variable, ) from xarray.core.variable import Variable @@ -1770,31 +1772,27 @@ def is_np_timedelta_like(dtype: DTypeLike) -> bool: return np.issubdtype(dtype, np.timedelta64) -def _contains_cftime_datetimes(array) -> bool: - """Check if an array contains cftime.datetime objects""" +def _contains_cftime_datetimes(array: Any) -> bool: + """Check if a array inside a Variable contains cftime.datetime objects""" if cftime is None: return False - else: - if array.dtype == np.dtype("O") and array.size > 0: - sample = np.asarray(array).flat[0] - if is_duck_dask_array(sample): - sample = sample.compute() - if isinstance(sample, np.ndarray): - sample = sample.item() - return isinstance(sample, cftime.datetime) - else: - return False + if array.dtype == np.dtype("O") and array.size > 0: + first_idx = (0,) * array.ndim + if isinstance(array, ExplicitlyIndexed): + first_idx = BasicIndexer(first_idx) + sample = array[first_idx] + return isinstance(np.asarray(sample).item(), cftime.datetime) + + return False -def contains_cftime_datetimes(var) -> bool: + +def contains_cftime_datetimes(var: T_Variable) -> bool: """Check if an xarray.Variable contains cftime.datetime objects""" - if var.dtype == np.dtype("O") and var.size > 0: - return _contains_cftime_datetimes(var.data) - else: - return False + return _contains_cftime_datetimes(var._data) -def _contains_datetime_like_objects(var) -> bool: +def _contains_datetime_like_objects(var: T_Variable) -> bool: """Check if a variable contains datetime like objects (either np.datetime64, np.timedelta64, or cftime.datetime) """ diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 1fe65a7e831..eda790694af 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -143,7 +143,18 @@ def __init__(self, array): self.array = array def __getitem__(self, key): - raise UnexpectedDataAccess("Tried accessing data") + raise UnexpectedDataAccess("Tried accessing data.") + + def __array__(self): + raise UnexpectedDataAccess("Tried accessing data.") + + +class FirstElementAccessibleArray(InaccessibleArray): + def __getitem__(self, key): + tuple_idxr = key.tuple + if len(tuple_idxr) > 1: + raise UnexpectedDataAccess("Tried accessing more than one element.") + return self.array[tuple_idxr] class ReturnItem: diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index aabdf6a5c75..ef91257c4d9 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -418,7 +418,6 @@ def test_calendar_cftime(data) -> None: assert data.time.dt.calendar == expected -@requires_cftime def test_calendar_datetime64_2d() -> None: data = xr.DataArray(np.zeros((4, 5), dtype="datetime64[ns]"), dims=("x", "y")) assert data.dt.calendar == "proleptic_gregorian" diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 91daabd12d5..bc6b095fc4e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2560,7 +2560,7 @@ def test_open_zarr_use_cftime(self) -> None: ds_a = xr.open_zarr(store_target, **self.version_kwargs) assert_identical(ds, ds_a) ds_b = xr.open_zarr(store_target, use_cftime=True, **self.version_kwargs) - assert xr.coding.times.contains_cftime_datetimes(ds_b.time) + assert xr.coding.times.contains_cftime_datetimes(ds_b.time.variable) def test_write_read_select_write(self) -> None: # Test for https://github.com/pydata/xarray/issues/4084 diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 5bf23819d87..f7579c4b488 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -65,13 +65,13 @@ def test_CFMaskCoder_missing_value() -> None: expected.attrs["missing_value"] = -9999 decoded = xr.decode_cf(expected.to_dataset()) - encoded, _ = xr.conventions.cf_encoder(decoded, decoded.attrs) + encoded, _ = xr.conventions.cf_encoder(decoded.variables, decoded.attrs) assert_equal(encoded["tmpk"], expected.variable) decoded.tmpk.encoding["_FillValue"] = -9940 with pytest.raises(ValueError): - encoded, _ = xr.conventions.cf_encoder(decoded, decoded.attrs) + encoded, _ = xr.conventions.cf_encoder(decoded.variables, decoded.attrs) @requires_dask diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 0746a949cc8..580de878fe6 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -31,6 +31,7 @@ from xarray.core.common import contains_cftime_datetimes from xarray.testing import assert_equal, assert_identical from xarray.tests import ( + FirstElementAccessibleArray, arm_xfail, assert_array_equal, assert_no_warnings, @@ -787,35 +788,35 @@ def times_3d(times): @requires_cftime def test_contains_cftime_datetimes_1d(data) -> None: - assert contains_cftime_datetimes(data.time) + assert contains_cftime_datetimes(data.time.variable) @requires_cftime @requires_dask def test_contains_cftime_datetimes_dask_1d(data) -> None: - assert contains_cftime_datetimes(data.time.chunk()) + assert contains_cftime_datetimes(data.time.variable.chunk()) @requires_cftime def test_contains_cftime_datetimes_3d(times_3d) -> None: - assert contains_cftime_datetimes(times_3d) + assert contains_cftime_datetimes(times_3d.variable) @requires_cftime @requires_dask def test_contains_cftime_datetimes_dask_3d(times_3d) -> None: - assert contains_cftime_datetimes(times_3d.chunk()) + assert contains_cftime_datetimes(times_3d.variable.chunk()) @pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes(non_cftime_data) -> None: - assert not contains_cftime_datetimes(non_cftime_data) + assert not contains_cftime_datetimes(non_cftime_data.variable) @requires_dask @pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data) -> None: - assert not contains_cftime_datetimes(non_cftime_data.chunk()) + assert not contains_cftime_datetimes(non_cftime_data.variable.chunk()) @requires_cftime @@ -1176,3 +1177,17 @@ def test_scalar_unit() -> None: variable = Variable(("x", "y"), np.array([[0, 1], [2, 3]]), {"units": np.nan}) result = coding.times.CFDatetimeCoder().decode(variable) assert np.isnan(result.attrs["units"]) + + +@requires_cftime +def test_contains_cftime_lazy() -> None: + import cftime + + from xarray.core.common import _contains_cftime_datetimes + + times = np.array( + [cftime.DatetimeGregorian(1, 1, 2, 0), cftime.DatetimeGregorian(1, 1, 2, 0)], + dtype=object, + ) + array = FirstElementAccessibleArray(times) + assert _contains_cftime_datetimes(array) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 836d30b60b8..ed1abea5fbe 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6219,7 +6219,7 @@ def test_to_and_from_iris(self) -> None: original_coord = original.coords[orginal_key] assert coord.var_name == original_coord.name assert_array_equal( - coord.points, CFDatetimeCoder().encode(original_coord).values + coord.points, CFDatetimeCoder().encode(original_coord.variable).values ) assert actual.coord_dims(coord) == original.get_axis_num( original.coords[coord.var_name].dims @@ -6295,7 +6295,7 @@ def test_to_and_from_iris_dask(self) -> None: original_coord = original.coords[orginal_key] assert coord.var_name == original_coord.name assert_array_equal( - coord.points, CFDatetimeCoder().encode(original_coord).values + coord.points, CFDatetimeCoder().encode(original_coord.variable).values ) assert actual.coord_dims(coord) == original.get_axis_num( original.coords[coord.var_name].dims From 821dc24b5f3ed91b843a634bf8513a26046269ef Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 8 Mar 2023 14:41:53 +0100 Subject: [PATCH 8/9] ignore the `pkg_resources` deprecation warning (#7594) --- xarray/tests/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index eda790694af..be81bd18a58 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -38,6 +38,7 @@ # https://github.com/pydata/xarray/issues/7322 warnings.filterwarnings("ignore", "'urllib3.contrib.pyopenssl' module is deprecated") warnings.filterwarnings("ignore", "Deprecated call to `pkg_resources.declare_namespace") +warnings.filterwarnings("ignore", "pkg_resources is deprecated as an API") arm_xfail = pytest.mark.xfail( platform.machine() == "aarch64" or "arm" in platform.machine(), From 6d771fc82228bdaf8a4b77d0ceec1cc444ebd090 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Wed, 8 Mar 2023 11:55:21 -0500 Subject: [PATCH 9/9] Preserve `base` and `loffset` arguments in `resample` (#7444) * [test-upstream] Preserve base and loffset arguments in resample While pandas is getting set to remove these, we have not had a chance to emit a deprecation warning yet for them in xarray. This should hopefully give users some extra time to adapt. * Emit warning when base is not None Co-authored-by: Deepak Cherian * Modify warnings to refer loffset and base as parameters; add tests * Add type validation for loffset arguments * Add typing and support for pd.Timedelta as an loffset * pd.Timedelta is a subclass of datetime.timedelta * [test-upstream] Remove unneeded skipif * Fix failing tests * [test-upstream] Add return type to tests * [test-upstream] Update documentation * [test-upstream] Fix mypy errors in tests * Move _convert_base_to_offset to pdcompat and add a few more tests * Use offset instead of base in documentation --------- Co-authored-by: Deepak Cherian --- doc/user-guide/weather-climate.rst | 2 +- doc/whats-new.rst | 7 ++ xarray/core/common.py | 70 +++++++++++++------- xarray/core/groupby.py | 78 +++++++++++++++++++---- xarray/core/pdcompat.py | 23 +++++++ xarray/core/resample_cftime.py | 18 +++--- xarray/tests/test_cftimeindex_resample.py | 64 +++++++++++++++---- xarray/tests/test_groupby.py | 41 +++++++++--- 8 files changed, 239 insertions(+), 64 deletions(-) diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index 3c957978acf..793da9d1bdd 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -233,7 +233,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.resample(time="81T", closed="right", label="right", base=3).mean() + da.resample(time="81T", closed="right", label="right", offset="3T").mean() .. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations .. _ISO 8601 standard: https://en.wikipedia.org/wiki/ISO_8601 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3cc2efde599..52e58782a5b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,13 @@ Breaking changes Deprecations ~~~~~~~~~~~~ +- Following pandas, the ``base`` and ``loffset`` parameters of + :py:meth:`xr.DataArray.resample` and :py:meth:`xr.Dataset.resample` have been + deprecated and will be removed in a future version of xarray. Using the + ``origin`` or ``offset`` parameters is recommended as a replacement for using + the ``base`` parameter and using time offset arithmetic is recommended as a + replacement for using the ``loffset`` parameter (:pull:`8459`). By `Spencer + Clark `_. Bug fixes diff --git a/xarray/core/common.py b/xarray/core/common.py index d980e622763..af935ae15d2 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -13,8 +13,14 @@ from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed from xarray.core.options import OPTIONS, _get_keep_attrs +from xarray.core.pdcompat import _convert_base_to_offset from xarray.core.pycompat import is_duck_dask_array -from xarray.core.utils import Frozen, either_dict_or_kwargs, is_scalar +from xarray.core.utils import ( + Frozen, + either_dict_or_kwargs, + emit_user_level_warning, + is_scalar, +) try: import cftime @@ -845,6 +851,12 @@ def _resample( For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + + .. deprecated:: 2023.03.0 + Following pandas, the ``base`` parameter is deprecated in favor + of the ``origin`` and ``offset`` parameters, and will be removed + in a future version of xarray. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' The datetime on which to adjust the grouping. The timezone of origin must match the timezone of the index. @@ -860,6 +872,12 @@ def _resample( loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. + + .. deprecated:: 2023.03.0 + Following pandas, the ``loffset`` parameter is deprecated in favor + of using time offset arithmetic, and will be removed in a future + version of xarray. + restore_coord_dims : bool, optional If True, also restore the dimension order of multi-dimensional coordinates. @@ -930,8 +948,8 @@ def _resample( """ # TODO support non-string indexer after removing the old API. - from xarray.coding.cftimeindex import CFTimeIndex from xarray.core.dataarray import DataArray + from xarray.core.groupby import TimeResampleGrouper from xarray.core.resample import RESAMPLE_DIM if keep_attrs is not None: @@ -961,28 +979,36 @@ def _resample( dim_name: Hashable = dim dim_coord = self[dim] - if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex): - from xarray.core.resample_cftime import CFTimeGrouper - - grouper = CFTimeGrouper( - freq=freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - origin=origin, - offset=offset, + if loffset is not None: + emit_user_level_warning( + "Following pandas, the `loffset` parameter to resample will be deprecated " + "in a future version of xarray. Switch to using time offset arithmetic.", + FutureWarning, ) - else: - grouper = pd.Grouper( - freq=freq, - closed=closed, - label=label, - base=base, - offset=offset, - origin=origin, - loffset=loffset, + + if base is not None: + emit_user_level_warning( + "Following pandas, the `base` parameter to resample will be deprecated in " + "a future version of xarray. Switch to using `origin` or `offset` instead.", + FutureWarning, ) + + if base is not None and offset is not None: + raise ValueError("base and offset cannot be present at the same time") + + if base is not None: + index = self._indexes[dim_name].to_pandas_index() + offset = _convert_base_to_offset(base, freq, index) + + grouper = TimeResampleGrouper( + freq=freq, + closed=closed, + label=label, + origin=origin, + offset=offset, + loffset=loffset, + ) + group = DataArray( dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM ) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5bfa0229af5..15694b41219 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -40,6 +40,7 @@ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset + from xarray.core.types import DatetimeLike, SideOptions from xarray.core.utils import Frozen GroupKey = Any @@ -245,7 +246,10 @@ def _unique_and_monotonic(group: T_Group) -> bool: return index.is_unique and index.is_monotonic_increasing -def _apply_loffset(grouper, result): +def _apply_loffset( + loffset: str | pd.DateOffset | datetime.timedelta | pd.Timedelta, + result: pd.Series | pd.DataFrame, +): """ (copied from pandas) if loffset is set, offset the result index @@ -258,17 +262,25 @@ def _apply_loffset(grouper, result): result : Series or DataFrame the result of resample """ + # pd.Timedelta is a subclass of datetime.timedelta so we do not need to + # include it in instance checks. + if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)): + raise ValueError( + f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. " + f"Got {loffset}." + ) + + if isinstance(loffset, str): + loffset = pd.tseries.frequencies.to_offset(loffset) needs_offset = ( - isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta)) + isinstance(loffset, (pd.DateOffset, datetime.timedelta)) and isinstance(result.index, pd.DatetimeIndex) and len(result.index) > 0 ) if needs_offset: - result.index = result.index + grouper.loffset - - grouper.loffset = None + result.index = result.index + loffset class GroupBy(Generic[T_Xarray]): @@ -530,14 +542,7 @@ def __repr__(self) -> str: ) def _get_index_and_items(self, index, grouper): - from xarray.core.resample_cftime import CFTimeGrouper - - s = pd.Series(np.arange(index.size), index) - if isinstance(grouper, CFTimeGrouper): - first_items = grouper.first_items(index) - else: - first_items = s.groupby(grouper).first() - _apply_loffset(grouper, first_items) + first_items = grouper.first_items(index) full_index = first_items.index if first_items.isnull().any(): first_items = first_items.dropna() @@ -1365,3 +1370,50 @@ class DatasetGroupBy( # type: ignore[misc] ImplementsDatasetReduce, ): __slots__ = () + + +class TimeResampleGrouper: + def __init__( + self, + freq: str, + closed: SideOptions | None, + label: SideOptions | None, + origin: str | DatetimeLike, + offset: pd.Timedelta | datetime.timedelta | str | None, + loffset: datetime.timedelta | str | None, + ): + self.freq = freq + self.closed = closed + self.label = label + self.origin = origin + self.offset = offset + self.loffset = loffset + + def first_items(self, index): + from xarray import CFTimeIndex + from xarray.core.resample_cftime import CFTimeGrouper + + if isinstance(index, CFTimeIndex): + grouper = CFTimeGrouper( + freq=self.freq, + closed=self.closed, + label=self.label, + origin=self.origin, + offset=self.offset, + loffset=self.loffset, + ) + return grouper.first_items(index) + else: + s = pd.Series(np.arange(index.size), index) + grouper = pd.Grouper( + freq=self.freq, + closed=self.closed, + label=self.label, + origin=self.origin, + offset=self.offset, + ) + + first_items = s.groupby(grouper).first() + if self.loffset is not None: + _apply_loffset(self.loffset, first_items) + return first_items diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index 018bb19b871..b20a96bb8d6 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -38,6 +38,10 @@ from enum import Enum from typing import Literal +import pandas as pd + +from xarray.coding import cftime_offsets + def count_not_none(*args) -> int: """Compute the number of non-None arguments. @@ -68,3 +72,22 @@ def __repr__(self) -> str: _NoDefault.no_default ) # Sentinel indicating the default value following pandas NoDefault = Literal[_NoDefault.no_default] # For typing following pandas + + +def _convert_base_to_offset(base, freq, index): + """Required until we officially deprecate the base argument to resample. This + translates a provided `base` argument to an `offset` argument, following logic + from pandas. + """ + from xarray.coding.cftimeindex import CFTimeIndex + + if isinstance(index, pd.DatetimeIndex): + freq = pd.tseries.frequencies.to_offset(freq) + if isinstance(freq, pd.offsets.Tick): + return pd.Timedelta(base * freq.nanos // freq.n) + elif isinstance(index, CFTimeIndex): + freq = cftime_offsets.to_offset(freq) + if isinstance(freq, cftime_offsets.Tick): + return base * freq.as_timedelta() // freq.n + else: + raise ValueError("Can only resample using a DatetimeIndex or CFTimeIndex.") diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 7fdd372ec74..920a6873814 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -71,7 +71,6 @@ def __init__( freq: str | BaseCFTimeOffset, closed: SideOptions | None = None, label: SideOptions | None = None, - base: int | None = None, loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None, origin: str | CFTimeDatetime = "start_day", offset: str | datetime.timedelta | None = None, @@ -79,10 +78,6 @@ def __init__( self.offset: datetime.timedelta | None self.closed: SideOptions self.label: SideOptions - - if base is not None and offset is not None: - raise ValueError("base and offset cannot be provided at the same time") - self.freq = to_offset(freq) self.loffset = loffset self.origin = origin @@ -122,9 +117,6 @@ def __init__( else: self.label = label - if base is not None and isinstance(self.freq, Tick): - offset = type(self.freq)(n=base % self.freq.n).as_timedelta() - if offset is not None: try: self.offset = _convert_offset_to_timedelta(offset) @@ -150,6 +142,16 @@ def first_items(self, index: CFTimeIndex): index, self.freq, self.closed, self.label, self.origin, self.offset ) if self.loffset is not None: + if not isinstance( + self.loffset, (str, datetime.timedelta, BaseCFTimeOffset) + ): + # BaseCFTimeOffset is not public API so we do not include it in + # the error message for now. + raise ValueError( + f"`loffset` must be a str or datetime.timedelta object. " + f"Got {self.loffset}." + ) + if isinstance(self.loffset, datetime.timedelta): labels = labels + self.loffset else: diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 5f818b7663d..07bc14f8983 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -8,6 +8,7 @@ import pytest import xarray as xr +from xarray.core.pdcompat import _convert_base_to_offset from xarray.core.resample_cftime import CFTimeGrouper cftime = pytest.importorskip("cftime") @@ -130,17 +131,18 @@ def test_resample(freqs, closed, label, base, offset) -> None: da_datetimeindex = da(datetime_index) da_cftimeindex = da(cftime_index) - compare_against_pandas( - da_datetimeindex, - da_cftimeindex, - resample_freq, - closed=closed, - label=label, - base=base, - offset=offset, - origin=origin, - loffset=loffset, - ) + with pytest.warns(FutureWarning, match="`loffset` parameter"): + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + label=label, + base=base, + offset=offset, + origin=origin, + loffset=loffset, + ) @pytest.mark.parametrize( @@ -245,3 +247,43 @@ def test_timedelta_offset() -> None: timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean() string_result = da_cftime.resample(time="2D", offset=string).mean() xr.testing.assert_identical(timedelta_result, string_result) + + +@pytest.mark.parametrize("loffset", ["12H", datetime.timedelta(hours=-12)]) +def test_resample_loffset_cftimeindex(loffset) -> None: + datetimeindex = pd.date_range("2000-01-01", freq="6H", periods=10) + da_datetimeindex = xr.DataArray(np.arange(10), [("time", datetimeindex)]) + + cftimeindex = xr.cftime_range("2000-01-01", freq="6H", periods=10) + da_cftimeindex = xr.DataArray(np.arange(10), [("time", cftimeindex)]) + + with pytest.warns(FutureWarning, match="`loffset` parameter"): + result = da_cftimeindex.resample(time="24H", loffset=loffset).mean() + expected = da_datetimeindex.resample(time="24H", loffset=loffset).mean() + + result["time"] = result.xindexes["time"].to_pandas_index().to_datetimeindex() + xr.testing.assert_identical(result, expected) + + +def test_resample_invalid_loffset_cftimeindex() -> None: + times = xr.cftime_range("2000-01-01", freq="6H", periods=10) + da = xr.DataArray(np.arange(10), [("time", times)]) + + with pytest.raises(ValueError): + da.resample(time="24H", loffset=1) # type: ignore + + +@pytest.mark.parametrize(("base", "freq"), [(1, "10S"), (17, "3H"), (15, "5U")]) +def test__convert_base_to_offset(base, freq): + # Verify that the cftime_offset adapted version of _convert_base_to_offset + # produces the same result as the pandas version. + datetimeindex = pd.date_range("2000", periods=2) + cftimeindex = xr.cftime_range("2000", periods=2) + pandas_result = _convert_base_to_offset(base, freq, datetimeindex) + cftime_result = _convert_base_to_offset(base, freq, cftimeindex) + assert pandas_result.to_pytimedelta() == cftime_result + + +def test__convert_base_to_offset_invalid_index(): + with pytest.raises(ValueError, match="Can only resample"): + _convert_base_to_offset(1, "12H", pd.Index([0])) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index cec37560d8f..a7d98405017 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime import warnings import numpy as np @@ -16,6 +17,7 @@ assert_equal, assert_identical, create_test_data, + has_pandas_version_two, requires_dask, requires_flox, requires_scipy, @@ -1475,14 +1477,6 @@ def test_resample(self): actual = array.resample(time="24H").reduce(np.mean) assert_identical(expected, actual) - # Our use of `loffset` may change if we align our API with pandas' changes. - # ref https://github.com/pydata/xarray/pull/4537 - actual = array.resample(time="24H", loffset="-12H").mean() - expected_ = array.to_series().resample("24H").mean() - expected_.index += to_offset("-12H") - expected = DataArray.from_series(expected_) - assert_identical(actual, expected) - with pytest.raises(ValueError, match=r"index must be monotonic"): array[[2, 0, 1]].resample(time="1D") @@ -1802,12 +1796,15 @@ def test_upsample_interpolate_dask(self, chunked_time): # done here due to floating point arithmetic assert_allclose(expected, actual, rtol=1e-16) + @pytest.mark.skipif(has_pandas_version_two, reason="requires pandas < 2.0.0") def test_resample_base(self) -> None: times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) array = DataArray(np.arange(10), [("time", times)]) base = 11 - actual = array.resample(time="24H", base=base).mean() + + with pytest.warns(FutureWarning, match="the `base` parameter to resample"): + actual = array.resample(time="24H", base=base).mean() expected = DataArray(array.to_series().resample("24H", base=base).mean()) assert_identical(expected, actual) @@ -1829,6 +1826,32 @@ def test_resample_origin(self) -> None: expected = DataArray(array.to_series().resample("24H", origin=origin).mean()) assert_identical(expected, actual) + @pytest.mark.skipif(has_pandas_version_two, reason="requires pandas < 2.0.0") + @pytest.mark.parametrize( + "loffset", + [ + "-12H", + datetime.timedelta(hours=-12), + pd.Timedelta(hours=-12), + pd.DateOffset(hours=-12), + ], + ) + def test_resample_loffset(self, loffset) -> None: + times = pd.date_range("2000-01-01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + with pytest.warns(FutureWarning, match="`loffset` parameter"): + actual = array.resample(time="24H", loffset=loffset).mean() + expected = DataArray(array.to_series().resample("24H", loffset=loffset).mean()) + assert_identical(actual, expected) + + def test_resample_invalid_loffset(self) -> None: + times = pd.date_range("2000-01-01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + with pytest.raises(ValueError, match="`loffset` must be"): + array.resample(time="24H", loffset=1).mean() # type: ignore + class TestDatasetResample: def test_resample_and_first(self):