From a7ab5a211b7d8ed152ded99b18fe92b4b4f622f2 Mon Sep 17 00:00:00 2001 From: Gianfranco Costamagna Date: Fri, 8 Sep 2023 17:40:47 +0200 Subject: [PATCH 1/8] tests: Update US/Eastern timezone to America/New_York (#8153) * tests: Update US/Eastern timezone to America/New_York Reason is that US/Eastern symlink moved from tzdata to tzdata-legacy package, causing FTBFS. Since America/New_York is better supported by tzdata, the switch makes the package stronger against future failures * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_variable.py | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d0e9f01bdae..01bdf2cef0c 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4698,7 +4698,7 @@ def test_convert_dataframe_with_many_types_and_multiindex(self) -> None: "e": [True, False, True], "f": pd.Categorical(list("abc")), "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "h": pd.date_range("20130101", periods=3, tz="America/New_York"), } ) df.index = pd.MultiIndex.from_product([["a"], range(3)], names=["one", "two"]) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 118d78d2e04..2ef34201a8b 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2604,7 +2604,7 @@ def test_datetime(self): @requires_pandas_version_two def test_tz_datetime(self) -> None: - tz = pytz.timezone("US/Eastern") + tz = pytz.timezone("America/New_York") times_ns = pd.date_range("2000", periods=1, tz=tz) times_s = times_ns.astype(pd.DatetimeTZDtype("s", tz)) @@ -2904,9 +2904,11 @@ def test_from_pint_wrapping_dask(self, Var): (pd.date_range("2000", periods=1), False), (datetime(2000, 1, 1), False), (np.array([datetime(2000, 1, 1)]), False), - (pd.date_range("2000", periods=1, tz=pytz.timezone("US/Eastern")), False), + (pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), False), ( - pd.Series(pd.date_range("2000", periods=1, tz=pytz.timezone("US/Eastern"))), + pd.Series( + pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")) + ), False, ), ], @@ -2929,7 +2931,7 @@ def test_datetime_conversion_warning(values, warns_under_pandas_version_two) -> # the case that the variable is backed by a timezone-aware # DatetimeIndex, and thus is hidden within the PandasIndexingAdapter class. assert var._data.array.dtype == pd.DatetimeTZDtype( - "ns", pytz.timezone("US/Eastern") + "ns", pytz.timezone("America/New_York") ) @@ -2941,12 +2943,14 @@ def test_pandas_two_only_datetime_conversion_warnings() -> None: (pd.date_range("2000", periods=1), "datetime64[s]"), (pd.Series(pd.date_range("2000", periods=1)), "datetime64[s]"), ( - pd.date_range("2000", periods=1, tz=pytz.timezone("US/Eastern")), - pd.DatetimeTZDtype("s", pytz.timezone("US/Eastern")), + pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), + pd.DatetimeTZDtype("s", pytz.timezone("America/New_York")), ), ( - pd.Series(pd.date_range("2000", periods=1, tz=pytz.timezone("US/Eastern"))), - pd.DatetimeTZDtype("s", pytz.timezone("US/Eastern")), + pd.Series( + pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")) + ), + pd.DatetimeTZDtype("s", pytz.timezone("America/New_York")), ), ] for data, dtype in cases: @@ -2960,7 +2964,7 @@ def test_pandas_two_only_datetime_conversion_warnings() -> None: # the case that the variable is backed by a timezone-aware # DatetimeIndex, and thus is hidden within the PandasIndexingAdapter class. assert var._data.array.dtype == pd.DatetimeTZDtype( - "ns", pytz.timezone("US/Eastern") + "ns", pytz.timezone("America/New_York") ) From 67268f1159fc5b4aee5c644b37d84036d8fc1528 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Fri, 8 Sep 2023 11:41:31 -0400 Subject: [PATCH 2/8] Docs page on internal design (#7991) * add page on internal design * add xarray-datatree to intersphinx mapping * typo * add subheadings to the accessors page * Revert "add page on internal design" This reverts commit 198f67b9a5a5f0b1b37ba3f0f844c2d706a2bfab. * rename page on variables * whatsnew * sel->isel * add section on lazy indexing * actually show lazy indexing example * size -> sizes Co-authored-by: Michael Niklas * link to UXarray * plugin -> backend Co-authored-by: Michael Niklas * Don't pretend .dims is a set --------- Co-authored-by: Michael Niklas Co-authored-by: Deepak Cherian Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/conf.py | 1 + doc/internals/duck-arrays-integration.rst | 2 +- doc/internals/extending-xarray.rst | 11 ++ doc/internals/index.rst | 9 +- doc/internals/internal-design.rst | 224 ++++++++++++++++++++++ doc/internals/variable-objects.rst | 31 --- doc/whats-new.rst | 2 + 7 files changed, 243 insertions(+), 37 deletions(-) create mode 100644 doc/internals/internal-design.rst delete mode 100644 doc/internals/variable-objects.rst diff --git a/doc/conf.py b/doc/conf.py index 6c6efb47f6b..d2f6cdf3aa1 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -324,6 +324,7 @@ "cftime": ("https://unidata.github.io/cftime", None), "sparse": ("https://sparse.pydata.org/en/latest/", None), "cubed": ("https://tom-e-white.com/cubed/", None), + "datatree": ("https://xarray-datatree.readthedocs.io/en/latest/", None), } diff --git a/doc/internals/duck-arrays-integration.rst b/doc/internals/duck-arrays-integration.rst index 1f1f57974df..a674acb04fe 100644 --- a/doc/internals/duck-arrays-integration.rst +++ b/doc/internals/duck-arrays-integration.rst @@ -35,7 +35,7 @@ Python Array API standard support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As an integration library xarray benefits greatly from the standardization of duck-array libraries' APIs, and so is a -big supporter of the `Python Array API Standard `_. . +big supporter of the `Python Array API Standard `_. We aim to support any array libraries that follow the Array API standard out-of-the-box. However, xarray does occasionally call some numpy functions which are not (yet) part of the standard (e.g. :py:meth:`xarray.DataArray.pad` calls :py:func:`numpy.pad`). diff --git a/doc/internals/extending-xarray.rst b/doc/internals/extending-xarray.rst index a180b85044f..cb1b23e78eb 100644 --- a/doc/internals/extending-xarray.rst +++ b/doc/internals/extending-xarray.rst @@ -14,6 +14,11 @@ Xarray is designed as a general purpose library and hence tries to avoid including overly domain specific functionality. But inevitably, the need for more domain specific logic arises. +.. _internals.accessors.composition: + +Composition over Inheritance +---------------------------- + One potential solution to this problem is to subclass Dataset and/or DataArray to add domain specific functionality. However, inheritance is not very robust. It's easy to inadvertently use internal APIs when subclassing, which means that your @@ -23,11 +28,17 @@ only return native xarray objects. The standard advice is to use :issue:`composition over inheritance <706>`, but reimplementing an API as large as xarray's on your own objects can be an onerous task, even if most methods are only forwarding to xarray implementations. +(For an example of a project which took this approach of subclassing see `UXarray `_). If you simply want the ability to call a function with the syntax of a method call, then the builtin :py:meth:`~xarray.DataArray.pipe` method (copied from pandas) may suffice. +.. _internals.accessors.writing accessors: + +Writing Custom Accessors +------------------------ + To resolve this issue for more complex cases, xarray has the :py:func:`~xarray.register_dataset_accessor` and :py:func:`~xarray.register_dataarray_accessor` decorators for adding custom diff --git a/doc/internals/index.rst b/doc/internals/index.rst index 7e13f0cfe95..46972ff69bd 100644 --- a/doc/internals/index.rst +++ b/doc/internals/index.rst @@ -1,6 +1,6 @@ .. _internals: -xarray Internals +Xarray Internals ================ Xarray builds upon two of the foundational libraries of the scientific Python @@ -11,15 +11,14 @@ compiled code to :ref:`optional dependencies`. The pages in this section are intended for: * Contributors to xarray who wish to better understand some of the internals, -* Developers who wish to extend xarray with domain-specific logic, perhaps to support a new scientific community of users, -* Developers who wish to interface xarray with their existing tooling, e.g. by creating a plugin for reading a new file format, or wrapping a custom array type. - +* Developers from other fields who wish to extend xarray with domain-specific logic, perhaps to support a new scientific community of users, +* Developers of other packages who wish to interface xarray with their existing tools, e.g. by creating a backend for reading a new file format, or wrapping a custom array type. .. toctree:: :maxdepth: 2 :hidden: - variable-objects + internal-design duck-arrays-integration chunked-arrays extending-xarray diff --git a/doc/internals/internal-design.rst b/doc/internals/internal-design.rst new file mode 100644 index 00000000000..11b4ee39da9 --- /dev/null +++ b/doc/internals/internal-design.rst @@ -0,0 +1,224 @@ +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + import xarray as xr + + np.random.seed(123456) + np.set_printoptions(threshold=20) + +.. _internal design: + +Internal Design +=============== + +This page gives an overview of the internal design of xarray. + +In totality, the Xarray project defines 4 key data structures. +In order of increasing complexity, they are: + +- :py:class:`xarray.Variable`, +- :py:class:`xarray.DataArray`, +- :py:class:`xarray.Dataset`, +- :py:class:`datatree.DataTree`. + +The user guide lists only :py:class:`xarray.DataArray` and :py:class:`xarray.Dataset`, +but :py:class:`~xarray.Variable` is the fundamental object internally, +and :py:class:`~datatree.DataTree` is a natural generalisation of :py:class:`xarray.Dataset`. + +.. note:: + + Our :ref:`roadmap` includes plans both to document :py:class:`~xarray.Variable` as fully public API, + and to merge the `xarray-datatree `_ package into xarray's main repository. + +Internally private :ref:`lazy indexing classes ` are used to avoid loading more data than necessary, +and flexible indexes classes (derived from :py:class:`~xarray.indexes.Index`) provide performant label-based lookups. + + +.. _internal design.data structures: + +Data Structures +--------------- + +The :ref:`data structures` page in the user guide explains the basics and concentrates on user-facing behavior, +whereas this section explains how xarray's data structure classes actually work internally. + + +.. _internal design.data structures.variable: + +Variable Objects +~~~~~~~~~~~~~~~~ + +The core internal data structure in xarray is the :py:class:`~xarray.Variable`, +which is used as the basic building block behind xarray's +:py:class:`~xarray.Dataset`, :py:class:`~xarray.DataArray` types. A +:py:class:`~xarray.Variable` consists of: + +- ``dims``: A tuple of dimension names. +- ``data``: The N-dimensional array (typically a NumPy or Dask array) storing + the Variable's data. It must have the same number of dimensions as the length + of ``dims``. +- ``attrs``: An ordered dictionary of metadata associated with this array. By + convention, xarray's built-in operations never use this metadata. +- ``encoding``: Another ordered dictionary used to store information about how + these variable's data is represented on disk. See :ref:`io.encoding` for more + details. + +:py:class:`~xarray.Variable` has an interface similar to NumPy arrays, but extended to make use +of named dimensions. For example, it uses ``dim`` in preference to an ``axis`` +argument for methods like ``mean``, and supports :ref:`compute.broadcasting`. + +However, unlike ``Dataset`` and ``DataArray``, the basic ``Variable`` does not +include coordinate labels along each axis. + +:py:class:`~xarray.Variable` is public API, but because of its incomplete support for labeled +data, it is mostly intended for advanced uses, such as in xarray itself, for +writing new backends, or when creating custom indexes. +You can access the variable objects that correspond to xarray objects via the (readonly) +:py:attr:`Dataset.variables ` and +:py:attr:`DataArray.variable ` attributes. + + +.. _internal design.dataarray: + +DataArray Objects +~~~~~~~~~~~~~~~~~ + +The simplest data structure used by most users is :py:class:`~xarray.DataArray`. +A :py:class:`~xarray.DataArray` is a composite object consisting of multiple +:py:class:`~xarray.core.variable.Variable` objects which store related data. + +A single :py:class:`~xarray.core.Variable` is referred to as the "data variable", and stored under the :py:attr:`~xarray.DataArray.variable`` attribute. +A :py:class:`~xarray.DataArray` inherits all of the properties of this data variable, i.e. ``dims``, ``data``, ``attrs`` and ``encoding``, +all of which are implemented by forwarding on to the underlying ``Variable`` object. + +In addition, a :py:class:`~xarray.DataArray` stores additional ``Variable`` objects stored in a dict under the private ``_coords`` attribute, +each of which is referred to as a "Coordinate Variable". These coordinate variable objects are only allowed to have ``dims`` that are a subset of the data variable's ``dims``, +and each dim has a specific length. This means that the full :py:attr:`~xarray.DataArray.sizes` of the dataarray can be represented by a dictionary mapping dimension names to integer sizes. +The underlying data variable has this exact same size, and the attached coordinate variables have sizes which are some subset of the size of the data variable. +Another way of saying this is that all coordinate variables must be "alignable" with the data variable. + +When a coordinate is accessed by the user (e.g. via the dict-like :py:class:`~xarray.DataArray.__getitem__` syntax), +then a new ``DataArray`` is constructed by finding all coordinate variables that have compatible dimensions and re-attaching them before the result is returned. +This is why most users never see the ``Variable`` class underlying each coordinate variable - it is always promoted to a ``DataArray`` before returning. + +Lookups are performed by special :py:class:`~xarray.indexes.Index` objects, which are stored in a dict under the private ``_indexes`` attribute. +Indexes must be associated with one or more coordinates, and essentially act by translating a query given in physical coordinate space +(typically via the :py:meth:`~xarray.DataArray.sel` method) into a set of integer indices in array index space that can be used to index the underlying n-dimensional array-like ``data``. +Indexing in array index space (typically performed via the :py:meth:`~xarray.DataArray.isel` method) does not require consulting an ``Index`` object. + +Finally a :py:class:`~xarray.DataArray` defines a :py:attr:`~xarray.DataArray.name` attribute, which refers to its data +variable but is stored on the wrapping ``DataArray`` class. +The ``name`` attribute is primarily used when one or more :py:class:`~xarray.DataArray` objects are promoted into a :py:class:`~xarray.Dataset` +(e.g. via :py:meth:`~xarray.DataArray.to_dataset`). +Note that the underlying :py:class:`~xarray.core.Variable` objects are all unnamed, so they can always be referred to uniquely via a +dict-like mapping. + +.. _internal design.dataset: + +Dataset Objects +~~~~~~~~~~~~~~~ + +The :py:class:`~xarray.Dataset` class is a generalization of the :py:class:`~xarray.DataArray` class that can hold multiple data variables. +Internally all data variables and coordinate variables are stored under a single ``variables`` dict, and coordinates are +specified by storing their names in a private ``_coord_names`` dict. + +The dataset's dimensions are the set of all dims present across any variable, but (similar to in dataarrays) coordinate +variables cannot have a dimension that is not present on any data variable. + +When a data variable or coordinate variable is accessed, a new ``DataArray`` is again constructed from all compatible +coordinates before returning. + +.. _internal design.subclassing: + +.. note:: + + The way that selecting a variable from a ``DataArray`` or ``Dataset`` actually involves internally wrapping the + ``Variable`` object back up into a ``DataArray``/``Dataset`` is the primary reason :ref:`we recommend against subclassing ` + Xarray objects. The main problem it creates is that we currently cannot easily guarantee that for example selecting + a coordinate variable from your ``SubclassedDataArray`` would return an instance of ``SubclassedDataArray`` instead + of just an :py:class:`xarray.DataArray`. See `GH issue `_ for more details. + +.. _internal design.lazy indexing: + +Lazy Indexing Classes +--------------------- + +Lazy Loading +~~~~~~~~~~~~ + +If we open a ``Variable`` object from disk using :py:func:`~xarray.open_dataset` we can see that the actual values of +the array wrapped by the data variable are not displayed. + +.. ipython:: python + + da = xr.tutorial.open_dataset("air_temperature")["air"] + var = da.variable + var + +We can see the size, and the dtype of the underlying array, but not the actual values. +This is because the values have not yet been loaded. + +If we look at the private attribute :py:meth:`~xarray.Variable._data` containing the underlying array object, we see +something interesting: + +.. ipython:: python + + var._data + +You're looking at one of xarray's internal `Lazy Indexing Classes`. These powerful classes are hidden from the user, +but provide important functionality. + +Calling the public :py:attr:`~xarray.Variable.data` property loads the underlying array into memory. + +.. ipython:: python + + var.data + +This array is now cached, which we can see by accessing the private attribute again: + +.. ipython:: python + + var._data + +Lazy Indexing +~~~~~~~~~~~~~ + +The purpose of these lazy indexing classes is to prevent more data being loaded into memory than is necessary for the +subsequent analysis, by deferring loading data until after indexing is performed. + +Let's open the data from disk again. + +.. ipython:: python + + da = xr.tutorial.open_dataset("air_temperature")["air"] + var = da.variable + +Now, notice how even after subsetting the data has does not get loaded: + +.. ipython:: python + + var.isel(time=0) + +The shape has changed, but the values are still not shown. + +Looking at the private attribute again shows how this indexing information was propagated via the hidden lazy indexing classes: + +.. ipython:: python + + var.isel(time=0)._data + +.. note:: + + Currently only certain indexing operations are lazy, not all array operations. For discussion of making all array + operations lazy see `GH issue #5081 `_. + + +Lazy Dask Arrays +~~~~~~~~~~~~~~~~ + +Note that xarray's implementation of Lazy Indexing classes is completely separate from how :py:class:`dask.array.Array` +objects evaluate lazily. Dask-backed xarray objects delay almost all operations until :py:meth:`~xarray.DataArray.compute` +is called (either explicitly or implicitly via :py:meth:`~xarray.DataArray.plot` for example). The exceptions to this +laziness are operations whose output shape is data-dependent, such as when calling :py:meth:`~xarray.DataArray.where`. diff --git a/doc/internals/variable-objects.rst b/doc/internals/variable-objects.rst deleted file mode 100644 index 6ae3c2f7e6d..00000000000 --- a/doc/internals/variable-objects.rst +++ /dev/null @@ -1,31 +0,0 @@ -Variable objects -================ - -The core internal data structure in xarray is the :py:class:`~xarray.Variable`, -which is used as the basic building block behind xarray's -:py:class:`~xarray.Dataset` and :py:class:`~xarray.DataArray` types. A -``Variable`` consists of: - -- ``dims``: A tuple of dimension names. -- ``data``: The N-dimensional array (typically, a NumPy or Dask array) storing - the Variable's data. It must have the same number of dimensions as the length - of ``dims``. -- ``attrs``: An ordered dictionary of metadata associated with this array. By - convention, xarray's built-in operations never use this metadata. -- ``encoding``: Another ordered dictionary used to store information about how - these variable's data is represented on disk. See :ref:`io.encoding` for more - details. - -``Variable`` has an interface similar to NumPy arrays, but extended to make use -of named dimensions. For example, it uses ``dim`` in preference to an ``axis`` -argument for methods like ``mean``, and supports :ref:`compute.broadcasting`. - -However, unlike ``Dataset`` and ``DataArray``, the basic ``Variable`` does not -include coordinate labels along each axis. - -``Variable`` is public API, but because of its incomplete support for labeled -data, it is mostly intended for advanced uses, such as in xarray itself or for -writing new backends. You can access the variable objects that correspond to -xarray objects via the (readonly) :py:attr:`Dataset.variables -` and -:py:attr:`DataArray.variable ` attributes. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 157795f08d1..b83697a3b20 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -145,6 +145,8 @@ Breaking changes Documentation ~~~~~~~~~~~~~ +- Added page on the internal design of xarray objects. + (:pull:`7991`) By `Tom Nicholas `_. - Added examples to docstrings of :py:meth:`Dataset.assign_attrs`, :py:meth:`Dataset.broadcast_equals`, :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.expand_dims`,:py:meth:`Dataset.drop_vars` (:issue:`6793`, :pull:`7937`) By `Harshitha `_. From aea9af0b0943d13a8d33b392d7e2030411093179 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Sat, 9 Sep 2023 06:53:18 +0200 Subject: [PATCH 3/8] Fix tokenize with empty attrs (#8101) * Fix tokenize with empty attrs * docs * typing Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --------- Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/core/variable.py | 2 +- xarray/tests/test_dask.py | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b83697a3b20..7762edfcef2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,6 +50,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- Fix bug where empty attrs would generate inconsistent tokens (:issue:`6970`, :pull:`8101`). + By `Mattia Almansi `_. - Improved handling of multi-coordinate indexes when updating coordinates, including bug fixes (and improved warnings for deprecated features) for pandas multi-indexes (:pull:`8094`). By `Benoît Bovy `_. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 05f9930aacd..a1e11b86f7e 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -566,7 +566,7 @@ def __dask_tokenize__(self): # around NetCDF and the like from dask.base import normalize_token - return normalize_token((type(self), self._dims, self.data, self._attrs)) + return normalize_token((type(self), self._dims, self.data, self.attrs)) def __dask_graph__(self): if is_duck_dask_array(self._data): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 6e65d52fdb5..1c2511427ac 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -299,6 +299,17 @@ def test_persist(self): self.assertLazyAndAllClose(u + 1, v) self.assertLazyAndAllClose(u + 1, v2) + def test_tokenize_empty_attrs(self) -> None: + # Issue #6970 + assert self.eager_var._attrs is None + expected = dask.base.tokenize(self.eager_var) + assert self.eager_var.attrs == self.eager_var._attrs == {} + assert ( + expected + == dask.base.tokenize(self.eager_var) + == dask.base.tokenize(self.lazy_var.compute()) + ) + @requires_pint def test_tokenize_duck_dask_array(self): import pint From 0afbd45bdc52d34fae4cdb6d1d67b468dd7cd4fc Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 9 Sep 2023 07:55:42 +0300 Subject: [PATCH 4/8] Consistently report all dimensions in error messages if invalid dimensions are given (#8079) * Show dims and coords in idxmin/idxmax error message if an invalid dim is given * Show data dims in error messages of Dataset and update tests Remove _assert_empty, not used anymore * Update test for dataarray * Show data dims in error messages of weighted and update test * Show dimensions in error message of group_indexers_by_index * List coordinates in concat error message, update test * List coordinates in coords __delitem__ error message, update tests * Show list of names in error message of PandasMultiIndex.sel, update test * Show list of dimensions in error messages of Rolling and Coarsen, update tests * Show dims in Variable.concat error message as tuple for consistency * Change 'dataset' to 'data' in error messages * Update whats-new --------- Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 + xarray/core/computation.py | 8 +++- xarray/core/concat.py | 13 +++--- xarray/core/coordinates.py | 8 +++- xarray/core/dataset.py | 69 +++++++++++++++++++------------- xarray/core/indexes.py | 6 +-- xarray/core/indexing.py | 5 ++- xarray/core/rolling.py | 18 ++++++--- xarray/core/variable.py | 2 +- xarray/core/weighted.py | 5 ++- xarray/tests/test_coarsen.py | 5 ++- xarray/tests/test_concat.py | 5 ++- xarray/tests/test_coordinates.py | 5 +++ xarray/tests/test_dataarray.py | 20 ++++++--- xarray/tests/test_dataset.py | 46 +++++++++++++++++---- xarray/tests/test_indexes.py | 5 ++- xarray/tests/test_rolling.py | 11 +++++ xarray/tests/test_weighted.py | 13 +++--- 18 files changed, 177 insertions(+), 69 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7762edfcef2..77d2249960f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -67,6 +67,8 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`). + By `András Gunyhó `_. .. _whats-new.2023.08.0: diff --git a/xarray/core/computation.py b/xarray/core/computation.py index fe89672e392..235b52402f1 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -2046,9 +2046,13 @@ def _calc_idxminmax( raise ValueError("Must supply 'dim' argument for multidimensional arrays") if dim not in array.dims: - raise KeyError(f'Dimension "{dim}" not in dimension') + raise KeyError( + f"Dimension {dim!r} not found in array dimensions {array.dims!r}" + ) if dim not in array.coords: - raise KeyError(f'Dimension "{dim}" does not have coordinates') + raise KeyError( + f"Dimension {dim!r} is not one of the coordinates {tuple(array.coords.keys())}" + ) # These are dtypes with NaN values argmin and argmax can handle na_dtypes = "cfO" diff --git a/xarray/core/concat.py b/xarray/core/concat.py index d7aad8c7188..a76bb6b0033 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -391,17 +391,20 @@ def process_subset_opt(opt, subset): else: raise ValueError(f"unexpected value for {subset}: {opt}") else: - invalid_vars = [k for k in opt if k not in getattr(datasets[0], subset)] + valid_vars = tuple(getattr(datasets[0], subset)) + invalid_vars = [k for k in opt if k not in valid_vars] if invalid_vars: if subset == "coords": raise ValueError( - "some variables in coords are not coordinates on " - f"the first dataset: {invalid_vars}" + f"the variables {invalid_vars} in coords are not " + f"found in the coordinates of the first dataset {valid_vars}" ) else: + # note: data_vars are not listed in the error message here, + # because there may be lots of them raise ValueError( - "some variables in data_vars are not data variables " - f"on the first dataset: {invalid_vars}" + f"the variables {invalid_vars} in data_vars are not " + f"found in the data variables of the first dataset" ) concat_over.update(opt) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 489b6f0d04e..e20c022e637 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -769,7 +769,9 @@ def __delitem__(self, key: Hashable) -> None: if key in self: del self._data[key] else: - raise KeyError(f"{key!r} is not a coordinate variable.") + raise KeyError( + f"{key!r} is not in coordinate variables {tuple(self.keys())}" + ) def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" @@ -855,7 +857,9 @@ def to_dataset(self) -> Dataset: def __delitem__(self, key: Hashable) -> None: if key not in self: - raise KeyError(f"{key!r} is not a coordinate variable.") + raise KeyError( + f"{key!r} is not in coordinate variables {tuple(self.keys())}" + ) assert_no_index_corrupted(self._data.xindexes, {key}) del self._data._coords[key] diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 97f528aea7d..4e5ca3746f0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -213,11 +213,6 @@ def _get_virtual_variable( return ref_name, var_name, virtual_var -def _assert_empty(args: tuple, msg: str = "%s") -> None: - if args: - raise ValueError(msg % args) - - def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint): """ Return map from each dim to chunk sizes, accounting for backend's preferred chunks. @@ -2640,7 +2635,7 @@ def chunk( bad_dims = chunks.keys() - self.dims.keys() if bad_dims: raise ValueError( - f"some chunks keys are not dimensions on this object: {bad_dims}" + f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.dims)}" ) chunkmanager = guess_chunkmanager(chunked_array_type) @@ -4243,8 +4238,8 @@ def rename_dims( for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( - f"cannot rename {k!r} because it is not a " - "dimension in this dataset" + f"cannot rename {k!r} because it is not found " + f"in the dimensions of this dataset {tuple(self.dims)}" ) if v in self.dims or v in self: raise ValueError( @@ -4366,7 +4361,7 @@ def swap_dims( if k not in self.dims: raise ValueError( f"cannot swap from dimension {k!r} because it is " - "not an existing dimension" + f"not one of the dimensions of this dataset {tuple(self.dims)}" ) if v in self.variables and self.variables[v].dims != (k,): raise ValueError( @@ -5448,10 +5443,10 @@ def unstack( else: dims = list(dim) - missing_dims = [d for d in dims if d not in self.dims] + missing_dims = set(dims) - set(self.dims) if missing_dims: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" ) # each specified dimension must have exactly one multi-index @@ -5836,7 +5831,10 @@ def drop_indexes( if errors == "raise": invalid_coords = coord_names - self._coord_names if invalid_coords: - raise ValueError(f"those coordinates don't exist: {invalid_coords}") + raise ValueError( + f"The coordinates {tuple(invalid_coords)} are not found in the " + f"dataset coordinates {tuple(self.coords.keys())}" + ) unindexed_coords = set(coord_names) - set(self._indexes) if unindexed_coords: @@ -6084,7 +6082,7 @@ def drop_dims( missing_dims = drop_dims - set(self.dims) if missing_dims: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} @@ -6244,7 +6242,9 @@ def dropna( # depending on the order of the supplied axes. if dim not in self.dims: - raise ValueError(f"{dim} must be a single dataset dimension") + raise ValueError( + f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}" + ) if subset is None: subset = iter(self.data_vars) @@ -6725,10 +6725,10 @@ def reduce( else: dims = set(dim) - missing_dimensions = [d for d in dims if d not in self.dims] + missing_dimensions = tuple(d for d in dims if d not in self.dims) if missing_dimensions: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dimensions}" + f"Dimensions {missing_dimensions} not found in data dimensions {tuple(self.dims)}" ) if keep_attrs is None: @@ -7710,9 +7710,11 @@ def shift( foo (x) object nan nan 'a' 'b' 'c' """ shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift") - invalid = [k for k in shifts if k not in self.dims] + invalid = tuple(k for k in shifts if k not in self.dims) if invalid: - raise ValueError(f"dimensions {invalid!r} do not exist") + raise ValueError( + f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}" + ) variables = {} for name, var in self.variables.items(): @@ -7789,7 +7791,9 @@ def roll( shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "roll") invalid = [k for k in shifts if k not in self.dims] if invalid: - raise ValueError(f"dimensions {invalid!r} do not exist") + raise ValueError( + f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}" + ) unrolled_vars: tuple[Hashable, ...] @@ -8038,10 +8042,11 @@ def quantile( else: dims = set(dim) - _assert_empty( - tuple(d for d in dims if d not in self.dims), - "Dataset does not contain the dimensions: %s", - ) + invalid_dims = set(dims) - set(self.dims) + if invalid_dims: + raise ValueError( + f"Dimensions {tuple(invalid_dims)} not found in data dimensions {tuple(self.dims)}" + ) q = np.asarray(q, dtype=np.float64) @@ -8117,7 +8122,9 @@ def rank( ) if dim not in self.dims: - raise ValueError(f"Dataset does not contain the dimension: {dim}") + raise ValueError( + f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}" + ) variables = {} for name, var in self.variables.items(): @@ -8167,7 +8174,10 @@ def differentiate( from xarray.core.variable import Variable if coord not in self.variables and coord not in self.dims: - raise ValueError(f"Coordinate {coord} does not exist.") + variables_and_dims = tuple(set(self.variables.keys()).union(self.dims)) + raise ValueError( + f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}." + ) coord_var = self[coord].variable if coord_var.ndim != 1: @@ -8269,7 +8279,10 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False): from xarray.core.variable import Variable if coord not in self.variables and coord not in self.dims: - raise ValueError(f"Coordinate {coord} does not exist.") + variables_and_dims = tuple(set(self.variables.keys()).union(self.dims)) + raise ValueError( + f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}." + ) coord_var = self[coord].variable if coord_var.ndim != 1: @@ -9771,7 +9784,9 @@ def drop_duplicates( missing_dims = set(dims) - set(self.dims) if missing_dims: - raise ValueError(f"'{missing_dims}' not found in dimensions") + raise ValueError( + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" + ) indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims} return self.isel(indexes) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index b5e396963a1..dffc012c582 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1203,12 +1203,12 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult: coord_name, label = next(iter(labels.items())) if is_dict_like(label): - invalid_levels = [ + invalid_levels = tuple( name for name in label if name not in self.index.names - ] + ) if invalid_levels: raise ValueError( - f"invalid multi-index level names {invalid_levels}" + f"multi-index level names {invalid_levels} not found in indexes {tuple(self.index.names)}" ) return self.sel(label) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 7969ded3102..6e6ce01a41f 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -143,7 +143,10 @@ def group_indexers_by_index( elif key in obj.coords: raise KeyError(f"no index found for coordinate {key!r}") elif key not in obj.dims: - raise KeyError(f"{key!r} is not a valid dimension or coordinate") + raise KeyError( + f"{key!r} is not a valid dimension or coordinate for " + f"{obj.__class__.__name__} with dimensions {obj.dims!r}" + ) elif len(options): raise ValueError( f"cannot supply selection options {options!r} for dimension {key!r}" diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 916fabe42ac..dcd01a0e0f1 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -102,6 +102,14 @@ def __init__( self.center = self._mapping_to_list(center, default=False) self.obj: T_Xarray = obj + missing_dims = tuple(dim for dim in self.dim if dim not in self.obj.dims) + if missing_dims: + # NOTE: we raise KeyError here but ValueError in Coarsen. + raise KeyError( + f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} " + f"dimensions {tuple(self.obj.dims)}" + ) + # attributes if min_periods is not None and min_periods <= 0: raise ValueError("min_periods must be greater than zero or None") @@ -624,8 +632,7 @@ def __init__( xarray.DataArray.groupby """ super().__init__(obj, windows, min_periods, center) - if any(d not in self.obj.dims for d in self.dim): - raise KeyError(self.dim) + # Keep each Rolling object as a dictionary self.rollings = {} for key, da in self.obj.data_vars.items(): @@ -839,10 +846,11 @@ def __init__( self.side = side self.boundary = boundary - absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] - if absent_dims: + missing_dims = tuple(dim for dim in windows.keys() if dim not in self.obj.dims) + if missing_dims: raise ValueError( - f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}." + f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} " + f"dimensions {tuple(self.obj.dims)}" ) if not utils.is_dict_like(coord_func): coord_func = {d: coord_func for d in self.obj.dims} # type: ignore[misc] diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a1e11b86f7e..965648091bf 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2117,7 +2117,7 @@ def concat( for var in variables: if var.dims != first_var_dims: raise ValueError( - f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var_dims)}" + f"Variable has dimensions {tuple(var.dims)} but first Variable has dimensions {tuple(first_var_dims)}" ) return cls(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index e21091fad6b..82ffe684ec7 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -198,10 +198,11 @@ def _check_dim(self, dim: Dims): dims = [dim] if dim else [] else: dims = list(dim) - missing_dims = set(dims) - set(self.obj.dims) - set(self.weights.dims) + all_dims = set(self.obj.dims).union(set(self.weights.dims)) + missing_dims = set(dims) - all_dims if missing_dims: raise ValueError( - f"{self.__class__.__name__} does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in {self.__class__.__name__} dimensions {tuple(all_dims)}" ) @staticmethod diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index d58361afdd3..e345ae691ec 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -17,7 +17,10 @@ def test_coarsen_absent_dims_error(ds: Dataset) -> None: - with pytest.raises(ValueError, match=r"not found in Dataset."): + with pytest.raises( + ValueError, + match=r"Window dimensions \('foo',\) not found in Dataset dimensions", + ): ds.coarsen(foo=2) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 030f653e031..543b6d33cb9 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -614,9 +614,12 @@ def test_concat_errors(self): with pytest.raises(ValueError, match=r"must supply at least one"): concat([], "dim1") - with pytest.raises(ValueError, match=r"are not coordinates"): + with pytest.raises(ValueError, match=r"are not found in the coordinates"): concat([data, data], "new_dim", coords=["not_found"]) + with pytest.raises(ValueError, match=r"are not found in the data variables"): + concat([data, data], "new_dim", data_vars=["not_found"]) + with pytest.raises(ValueError, match=r"global attributes not"): # call deepcopy seperately to get unique attrs data0 = deepcopy(split_data[0]) diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 27abc6c0ae2..ef73371dfe4 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -103,6 +103,11 @@ def test_delitem(self) -> None: del coords["x"] assert "x" not in coords + with pytest.raises( + KeyError, match="'nonexistent' is not in coordinate variables" + ): + del coords["nonexistent"] + def test_update(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b4efe4ab2a7..2a28939df41 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1,6 +1,7 @@ from __future__ import annotations import pickle +import re import sys import warnings from collections.abc import Hashable @@ -4886,8 +4887,10 @@ def test_idxmin( else: ar0 = ar0_raw - # dim doesn't exist - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=r"'spam' not found in array dimensions", + ): ar0.idxmin(dim="spam") # Scalar Dataarray @@ -4999,8 +5002,10 @@ def test_idxmax( else: ar0 = ar0_raw - # dim doesn't exist - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=r"'spam' not found in array dimensions", + ): ar0.idxmax(dim="spam") # Scalar Dataarray @@ -6954,7 +6959,12 @@ def test_drop_duplicates_1d(self, keep) -> None: result = da.drop_duplicates("time", keep=keep) assert_equal(expected, result) - with pytest.raises(ValueError, match="['space'] not found"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimensions ('space',) not found in data dimensions ('time',)" + ), + ): da.drop_duplicates("space", keep=keep) def test_drop_duplicates_2d(self) -> None: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 01bdf2cef0c..226e2b6dc78 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1158,7 +1158,12 @@ def get_dask_names(ds): for k, v in new_dask_names.items(): assert v == orig_dask_names[k] - with pytest.raises(ValueError, match=r"some chunks"): + with pytest.raises( + ValueError, + match=re.escape( + "chunks keys ('foo',) not found in data dimensions ('dim2', 'dim3', 'time', 'dim1')" + ), + ): data.chunk({"foo": 10}) @requires_dask @@ -2780,7 +2785,10 @@ def test_drop_indexes(self) -> None: assert type(actual.x.variable) is Variable assert type(actual.y.variable) is Variable - with pytest.raises(ValueError, match="those coordinates don't exist"): + with pytest.raises( + ValueError, + match=r"The coordinates \('not_a_coord',\) are not found in the dataset coordinates", + ): ds.drop_indexes("not_a_coord") with pytest.raises(ValueError, match="those coordinates do not have an index"): @@ -3672,7 +3680,10 @@ def test_unstack(self) -> None: def test_unstack_errors(self) -> None: ds = Dataset({"x": [1, 2, 3]}) - with pytest.raises(ValueError, match=r"does not contain the dimensions"): + with pytest.raises( + ValueError, + match=re.escape("Dimensions ('foo',) not found in data dimensions ('x',)"), + ): ds.unstack("foo") with pytest.raises(ValueError, match=r".*do not have exactly one multi-index"): ds.unstack("x") @@ -4962,7 +4973,10 @@ def test_dropna(self) -> None: expected = ds.isel(a=[1, 3]) assert_identical(actual, ds) - with pytest.raises(ValueError, match=r"a single dataset dimension"): + with pytest.raises( + ValueError, + match=r"'foo' not found in data dimensions \('a', 'b'\)", + ): ds.dropna("foo") with pytest.raises(ValueError, match=r"invalid how"): ds.dropna("a", how="somehow") # type: ignore @@ -5280,7 +5294,10 @@ def test_mean_uint_dtype(self) -> None: def test_reduce_bad_dim(self) -> None: data = create_test_data() - with pytest.raises(ValueError, match=r"Dataset does not contain"): + with pytest.raises( + ValueError, + match=r"Dimensions \('bad_dim',\) not found in data dimensions", + ): data.mean(dim="bad_dim") def test_reduce_cumsum(self) -> None: @@ -5306,7 +5323,10 @@ def test_reduce_cumsum(self) -> None: @pytest.mark.parametrize("func", ["cumsum", "cumprod"]) def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None: data = create_test_data() - with pytest.raises(ValueError, match=r"Dataset does not contain"): + with pytest.raises( + ValueError, + match=r"Dimensions \('bad_dim',\) not found in data dimensions", + ): getattr(data, func)(dim="bad_dim") # ensure dimensions are correct @@ -5554,7 +5574,12 @@ def test_rank(self) -> None: assert list(z.coords) == list(ds.coords) assert list(x.coords) == list(y.coords) # invalid dim - with pytest.raises(ValueError, match=r"does not contain"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimension 'invalid_dim' not found in data dimensions ('dim3', 'dim1')" + ), + ): x.rank("invalid_dim") def test_rank_use_bottleneck(self) -> None: @@ -7087,7 +7112,12 @@ def test_drop_duplicates_1d(self, keep) -> None: result = ds.drop_duplicates("time", keep=keep) assert_equal(expected, result) - with pytest.raises(ValueError, match="['space'] not found"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimensions ('space',) not found in data dimensions ('time',)" + ), + ): ds.drop_duplicates("space", keep=keep) diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index 05d748541ed..866c2ef7e85 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -487,7 +487,10 @@ def test_sel(self) -> None: index.sel({"x": 0}) with pytest.raises(ValueError, match=r"cannot provide labels for both.*"): index.sel({"one": 0, "x": "a"}) - with pytest.raises(ValueError, match=r"invalid multi-index level names"): + with pytest.raises( + ValueError, + match=r"multi-index level names \('three',\) not found in indexes", + ): index.sel({"x": {"three": 0}}) with pytest.raises(IndexError): index.sel({"x": (slice(None), 1, "no_level")}) diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 73aebc1b1f0..0e3c0874a0a 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -77,6 +77,12 @@ def test_rolling_properties(self, da) -> None: with pytest.raises(ValueError, match="min_periods must be greater than zero"): da.rolling(time=2, min_periods=0) + with pytest.raises( + KeyError, + match=r"\('foo',\) not found in DataArray dimensions", + ): + da.rolling(foo=2) + @pytest.mark.parametrize("name", ("sum", "mean", "std", "min", "max", "median")) @pytest.mark.parametrize("center", (True, False, None)) @pytest.mark.parametrize("min_periods", (1, None)) @@ -540,6 +546,11 @@ def test_rolling_properties(self, ds) -> None: ds.rolling(time=2, min_periods=0) with pytest.raises(KeyError, match="time2"): ds.rolling(time2=2) + with pytest.raises( + KeyError, + match=r"\('foo',\) not found in Dataset dimensions", + ): + ds.rolling(foo=2) @pytest.mark.parametrize( "name", ("sum", "mean", "std", "var", "min", "max", "median") diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index e2530d41fbe..628d6310945 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -782,9 +782,12 @@ def test_weighted_bad_dim(operation, as_dataset): if operation == "quantile": kwargs["q"] = 0.5 - error_msg = ( - f"{data.__class__.__name__}Weighted" - " does not contain the dimensions: {'bad_dim'}" - ) - with pytest.raises(ValueError, match=error_msg): + with pytest.raises( + ValueError, + match=( + f"Dimensions \\('bad_dim',\\) not found in {data.__class__.__name__}Weighted " + # the order of (dim_0, dim_1) varies + "dimensions \\(('dim_0', 'dim_1'|'dim_1', 'dim_0')\\)" + ), + ): getattr(data.weighted(weights), operation)(**kwargs) From 336aec04d8518af66a67bfd1a0eadf72241f60e7 Mon Sep 17 00:00:00 2001 From: Kian-Meng Ang Date: Sat, 9 Sep 2023 20:04:30 +0800 Subject: [PATCH 5/8] Fix typos (#8163) Found via `codespell -S tests -L splitted,coo,fo,nd,ser,slowy,soler,tung,secondy,nin` --- asv_bench/benchmarks/dataset_io.py | 2 +- asv_bench/benchmarks/merge.py | 2 +- doc/user-guide/io.rst | 2 +- doc/whats-new.rst | 4 ++-- xarray/backends/netCDF4_.py | 2 +- xarray/core/dataarray.py | 2 +- xarray/core/indexes.py | 2 +- xarray/core/rolling.py | 2 +- xarray/plot/dataarray_plot.py | 2 +- xarray/plot/dataset_plot.py | 2 +- xarray/plot/facetgrid.py | 2 +- xarray/plot/utils.py | 8 ++++---- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py index 0af8084dd21..fac4986f9d0 100644 --- a/asv_bench/benchmarks/dataset_io.py +++ b/asv_bench/benchmarks/dataset_io.py @@ -527,7 +527,7 @@ def time_read_dataset(self, engine, chunks): class IOReadCustomEngine: def setup(self, *args, **kwargs): """ - The custom backend does the bare mininum to be considered a lazy backend. But + The custom backend does the bare minimum to be considered a lazy backend. But the data in it is still in memory so slow file reading shouldn't affect the results. """ diff --git a/asv_bench/benchmarks/merge.py b/asv_bench/benchmarks/merge.py index 043de35bdf7..6c8c1e9da90 100644 --- a/asv_bench/benchmarks/merge.py +++ b/asv_bench/benchmarks/merge.py @@ -41,7 +41,7 @@ def setup(self, strategy, count): data = np.array(["0", "b"], dtype=str) self.dataset_coords = dict(time=np.array([0, 1])) self.dataset_attrs = dict(description="Test data") - attrs = dict(units="Celcius") + attrs = dict(units="Celsius") if strategy == "dict_of_DataArrays": def create_data_vars(): diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index c0e88634705..eeb7813ae15 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -565,7 +565,7 @@ HDF5 ---- `HDF5`_ is both a file format and a data model for storing information. HDF5 stores data hierarchically, using groups to create a nested structure. HDF5 is a more -general verion of the netCDF4 data model, so the nested structure is one of many +general version of the netCDF4 data model, so the nested structure is one of many similarities between the two data formats. Reading HDF5 files in xarray requires the ``h5netcdf`` engine, which can be installed diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 77d2249960f..a25f87468af 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -888,7 +888,7 @@ Bug fixes By `Michael Niklas `_. - Fix side effects on index coordinate metadata after aligning objects. (:issue:`6852`, :pull:`6857`) By `Benoît Bovy `_. -- Make FacetGrid.set_titles send kwargs correctly using `handle.udpate(kwargs)`. (:issue:`6839`, :pull:`6843`) +- Make FacetGrid.set_titles send kwargs correctly using `handle.update(kwargs)`. (:issue:`6839`, :pull:`6843`) By `Oliver Lopez `_. - Fix bug where index variables would be changed inplace. (:issue:`6931`, :pull:`6938`) By `Michael Niklas `_. @@ -4751,7 +4751,7 @@ Bug fixes - Corrected a bug with incorrect coordinates for non-georeferenced geotiff files (:issue:`1686`). Internally, we now use the rasterio coordinate transform tool instead of doing the computations ourselves. A - ``parse_coordinates`` kwarg has beed added to :py:func:`~open_rasterio` + ``parse_coordinates`` kwarg has been added to :py:func:`~open_rasterio` (set to ``True`` per default). By `Fabien Maussion `_. - The colors of discrete colormaps are now the same regardless if `seaborn` diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index b5c3413e7f8..e3f97d7bac2 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -538,7 +538,7 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint): """ Backend for netCDF files based on the netCDF4 package. - It can open ".nc", ".nc4", ".cdf" files and will be choosen + It can open ".nc", ".nc4", ".cdf" files and will be chosen as default for these files. Additionally it can open valid HDF5 files, see diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index dc0b2032a37..57757179af0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4463,7 +4463,7 @@ def broadcast_equals(self: T_DataArray, other: T_DataArray) -> bool: [2, 2]]) Dimensions without coordinates: X, Y - .equals returns True if two DataArrays have the same values, dimensions, and coordinates. .broadcast_equals returns True if the results of broadcasting two DataArrays against eachother have the same values, dimensions, and coordinates. + .equals returns True if two DataArrays have the same values, dimensions, and coordinates. .broadcast_equals returns True if the results of broadcasting two DataArrays against each other have the same values, dimensions, and coordinates. >>> a.equals(b) False diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index dffc012c582..9972896d6df 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -307,7 +307,7 @@ def reindex_like(self: T_Index, other: T_Index) -> dict[Hashable, Any]: def equals(self: T_Index, other: T_Index) -> bool: """Compare this index with another index of the same type. - Implemenation is optional but required in order to support alignment. + Implementation is optional but required in order to support alignment. Parameters ---------- diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index dcd01a0e0f1..67389527a98 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -572,7 +572,7 @@ def _numpy_or_bottleneck_reduce( and not is_duck_dask_array(self.obj.data) and self.ndim == 1 ): - # TODO: renable bottleneck with dask after the issues + # TODO: re-enable bottleneck with dask after the issues # underlying https://github.com/pydata/xarray/issues/2940 are # fixed. return self._bottleneck_reduce( diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py index 3f7b1568e64..4df1938f89f 100644 --- a/xarray/plot/dataarray_plot.py +++ b/xarray/plot/dataarray_plot.py @@ -186,7 +186,7 @@ def _prepare_plot1d_data( # dimensions so the plotter can plot anything: if darray.ndim > 1: # When stacking dims the lines will continue connecting. For floats - # this can be solved by adding a nan element inbetween the flattening + # this can be solved by adding a nan element in between the flattening # points: dims_T = [] if np.issubdtype(darray.dtype, np.floating): diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py index b0774c31b17..4f703045f17 100644 --- a/xarray/plot/dataset_plot.py +++ b/xarray/plot/dataset_plot.py @@ -730,7 +730,7 @@ def _temp_dataarray(ds: Dataset, y: Hashable, locals_: dict[str, Any]) -> DataAr coords = dict(ds.coords) # Add extra coords to the DataArray from valid kwargs, if using all - # kwargs there is a risk that we add unneccessary dataarrays as + # kwargs there is a risk that we add unnecessary dataarrays as # coords straining RAM further for example: # ds.both and extend="both" would add ds.both to the coords: valid_coord_kwargs = {"x", "z", "markersize", "hue", "row", "col", "u", "v"} diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 93a328836d0..2b348d8bedd 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -76,7 +76,7 @@ class FacetGrid(Generic[T_Xarray]): The general approach to plotting here is called "small multiples", where the same kind of plot is repeated multiple times, and the specific use of small multiples to display the same relationship - conditioned on one ore more other variables is often called a "trellis + conditioned on one or more other variables is often called a "trellis plot". The basic workflow is to initialize the :class:`FacetGrid` object with diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 70e8bd3fdb9..b8cc4ff7349 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -1461,7 +1461,7 @@ def _calc_widths(self, y: DataArray) -> DataArray: def _calc_widths(self, y: np.ndarray | DataArray) -> np.ndarray | DataArray: """ - Normalize the values so they're inbetween self._width. + Normalize the values so they're in between self._width. """ if self._width is None: return y @@ -1473,7 +1473,7 @@ def _calc_widths(self, y: np.ndarray | DataArray) -> np.ndarray | DataArray: # Use default with if y is constant: widths = xdefault + 0 * y else: - # Normalize inbetween xmin and xmax: + # Normalize in between xmin and xmax: k = (y - np.min(y)) / diff_maxy_miny widths = xmin + k * (xmax - xmin) return widths @@ -1821,8 +1821,8 @@ def _guess_coords_to_plot( ) # If dims_plot[k] isn't defined then fill with one of the available dims, unless - # one of related mpl kwargs has been used. This should have similiar behaviour as - # * plt.plot(x, y) -> Multple lines with different colors if y is 2d. + # one of related mpl kwargs has been used. This should have similar behaviour as + # * plt.plot(x, y) -> Multiple lines with different colors if y is 2d. # * plt.plot(x, y, color="red") -> Multiple red lines if y is 2d. for k, dim, ign_kws in zip(default_guess, available_coords, ignore_guess_kwargs): if coords_to_plot.get(k, None) is None and all( From 0b3b20a3f96e24ba0d2723a7ee074063a659f3c1 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sun, 10 Sep 2023 17:33:36 +0200 Subject: [PATCH 6/8] to_stacked_array: better error msg & refactor (#8130) * to_stacked_array: better error msg & refactor * fix regex * Update xarray/core/dataset.py Co-authored-by: Deepak Cherian * Apply suggestions from code review --------- Co-authored-by: Deepak Cherian --- xarray/core/dataset.py | 38 +++++++++++++++--------------------- xarray/tests/test_dataset.py | 5 ++++- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4e5ca3746f0..912591ce22a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -79,10 +79,7 @@ ) from xarray.core.missing import get_clean_interp_index from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import ( - get_chunked_array_type, - guess_chunkmanager, -) +from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.core.pycompat import ( array_type, is_chunked_array, @@ -5275,34 +5272,31 @@ def to_stacked_array( stacking_dims = tuple(dim for dim in self.dims if dim not in sample_dims) - for variable in self: - dims = self[variable].dims - dims_include_sample_dims = set(sample_dims) <= set(dims) - if not dims_include_sample_dims: + for key, da in self.data_vars.items(): + missing_sample_dims = set(sample_dims) - set(da.dims) + if missing_sample_dims: raise ValueError( - "All variables in the dataset must contain the " - f"dimensions {dims}." + "Variables in the dataset must contain all ``sample_dims`` " + f"({sample_dims!r}) but '{key}' misses {sorted(missing_sample_dims)}" ) - def ensure_stackable(val): - assign_coords = {variable_dim: val.name} - for dim in stacking_dims: - if dim not in val.dims: - assign_coords[dim] = None + def stack_dataarray(da): + # add missing dims/ coords and the name of the variable + + missing_stack_coords = {variable_dim: da.name} + for dim in set(stacking_dims) - set(da.dims): + missing_stack_coords[dim] = None - expand_dims = set(stacking_dims).difference(set(val.dims)) - expand_dims.add(variable_dim) - # must be list for .expand_dims - expand_dims = list(expand_dims) + missing_stack_dims = list(missing_stack_coords) return ( - val.assign_coords(**assign_coords) - .expand_dims(expand_dims) + da.assign_coords(**missing_stack_coords) + .expand_dims(missing_stack_dims) .stack({new_dim: (variable_dim,) + stacking_dims}) ) # concatenate the arrays - stackable_vars = [ensure_stackable(self[key]) for key in self.data_vars] + stackable_vars = [stack_dataarray(da) for da in self.data_vars.values()] data_array = concat(stackable_vars, dim=new_dim) if name is not None: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 226e2b6dc78..73a1e74214c 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3792,7 +3792,10 @@ def test_to_stacked_array_invalid_sample_dims(self) -> None: data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, coords={"y": ["u", "v", "w"]}, ) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match=r"Variables in the dataset must contain all ``sample_dims`` \(\['y'\]\) but 'b' misses \['y'\]", + ): data.to_stacked_array("features", sample_dims=["y"]) def test_to_stacked_array_name(self) -> None: From 2951ce0215f14a8a79ecd0b5fc73a02a34b9b86b Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 11 Sep 2023 05:55:51 +0200 Subject: [PATCH 7/8] fix miscellaneous `numpy=2.0` errors (#8117) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * replace `np.unicode_` with `np.str_` * replace `np.NaN` with `np.nan` * replace more instances of `np.unicode_` note that with more modern versions of `numpy` the `.astype(np.str_)` don't actually change the dtype, so maybe we can remove those. * more instances of renamed / removed dtypes * more dtype replacements * use `str.encode(encoding)` instead of `bytes(str, encoding)` * explicitly import `RankWarning` * left-over `np.RankWarning` * use `float` instead of the removed `np.float_` * ignore missing stubs for `numpy.exceptions` --------- Co-authored-by: Kai Mühlbauer Co-authored-by: Mathias Hauser Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: Deepak Cherian --- pyproject.toml | 1 + xarray/backends/netCDF4_.py | 2 +- xarray/coding/strings.py | 6 +- xarray/coding/times.py | 4 +- xarray/core/accessor_str.py | 2 +- xarray/core/dataarray.py | 8 +-- xarray/core/dataset.py | 19 ++++-- xarray/core/dtypes.py | 2 +- xarray/core/missing.py | 2 +- xarray/core/nputils.py | 8 ++- xarray/tests/test_accessor_str.py | 82 +++++++++++++------------- xarray/tests/test_backends.py | 2 +- xarray/tests/test_dataarray.py | 96 ++++++++++++++++--------------- xarray/tests/test_dataset.py | 20 ++++--- xarray/tests/test_dtypes.py | 10 ++-- xarray/tests/test_groupby.py | 8 +-- xarray/tests/test_variable.py | 14 ++--- xarray/tests/test_weighted.py | 6 +- 18 files changed, 158 insertions(+), 134 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4d63fd564ba..15c6bf194a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ module = [ "sparse.*", "toolz.*", "zarr.*", + "numpy.exceptions.*", # remove once support for `numpy<2.0` has been dropped ] [[tool.mypy.overrides]] diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index e3f97d7bac2..7cbfa5b5e4e 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -207,7 +207,7 @@ def _ensure_fill_value_valid(data, attributes): # work around for netCDF4/scipy issue where _FillValue has the wrong type: # https://github.com/Unidata/netcdf4-python/issues/271 if data.dtype.kind == "S" and "_FillValue" in attributes: - attributes["_FillValue"] = np.string_(attributes["_FillValue"]) + attributes["_FillValue"] = np.bytes_(attributes["_FillValue"]) def _force_native_endianness(var): diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index d10af65c44a..89ceaddd93b 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -100,7 +100,7 @@ def ensure_fixed_length_bytes(var): dims, data, attrs, encoding = unpack_for_encoding(var) if check_vlen_dtype(data.dtype) == bytes: # TODO: figure out how to handle this with dask - data = np.asarray(data, dtype=np.string_) + data = np.asarray(data, dtype=np.bytes_) return Variable(dims, data, attrs, encoding) @@ -151,7 +151,7 @@ def bytes_to_char(arr): def _numpy_bytes_to_char(arr): """Like netCDF4.stringtochar, but faster and more flexible.""" # ensure the array is contiguous - arr = np.array(arr, copy=False, order="C", dtype=np.string_) + arr = np.array(arr, copy=False, order="C", dtype=np.bytes_) return arr.reshape(arr.shape + (1,)).view("S1") @@ -168,7 +168,7 @@ def char_to_bytes(arr): if not size: # can't make an S0 dtype - return np.zeros(arr.shape[:-1], dtype=np.string_) + return np.zeros(arr.shape[:-1], dtype=np.bytes_) if is_chunked_array(arr): chunkmanager = get_chunked_array_type(arr) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4291d95979c..b531dc97d0c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -467,7 +467,7 @@ def convert_times(times, date_type, raise_on_invalid: bool = True) -> np.ndarray Useful to convert between calendars in numpy and cftime or between cftime calendars. If raise_on_valid is True (default), invalid dates trigger a ValueError. - Otherwise, the invalid element is replaced by np.NaN for cftime types and np.NaT for np.datetime64. + Otherwise, the invalid element is replaced by np.nan for cftime types and np.NaT for np.datetime64. """ if date_type in (pd.Timestamp, np.datetime64) and not is_np_datetime_like( times.dtype @@ -489,7 +489,7 @@ def convert_times(times, date_type, raise_on_invalid: bool = True) -> np.ndarray f"{date_type(2000, 1, 1).calendar} calendar. Reason: {e}." ) else: - dt = np.NaN + dt = np.nan new[i] = dt return new diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 31028f10350..aa6dc2c7114 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -471,7 +471,7 @@ def cat(self, *others, sep: str | bytes | Any = "") -> T_DataArray: ... ) >>> values_2 = np.array(3.4) >>> values_3 = "" - >>> values_4 = np.array("test", dtype=np.unicode_) + >>> values_4 = np.array("test", dtype=np.str_) Determine the separator to use diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 57757179af0..fd3ff60cb6c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5772,8 +5772,8 @@ def idxmin( >>> array = xr.DataArray( ... [ ... [2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... [-4.0, np.nan, 2.0, np.nan, -2.0], + ... [np.nan, np.nan, 1.0, np.nan, np.nan], ... ], ... dims=["y", "x"], ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, @@ -5868,8 +5868,8 @@ def idxmax( >>> array = xr.DataArray( ... [ ... [2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... [-4.0, np.nan, 2.0, np.nan, -2.0], + ... [np.nan, np.nan, 1.0, np.nan, np.nan], ... ], ... dims=["y", "x"], ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 912591ce22a..4e1fe621004 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -24,6 +24,13 @@ from typing import IO, TYPE_CHECKING, Any, Callable, Generic, Literal, cast, overload import numpy as np + +# remove once numpy 2.0 is the oldest supported version +try: + from numpy.exceptions import RankWarning +except ImportError: + from numpy import RankWarning + import pandas as pd from xarray.coding.calendar_ops import convert_calendar, interp_calendar @@ -8785,9 +8792,9 @@ def polyfit( with warnings.catch_warnings(): if full: # Copy np.polyfit behavior - warnings.simplefilter("ignore", np.RankWarning) + warnings.simplefilter("ignore", RankWarning) else: # Raise only once per variable - warnings.simplefilter("once", np.RankWarning) + warnings.simplefilter("once", RankWarning) coeffs, residuals = duck_array_ops.least_squares( lhs, rhs.data, rcond=rcond, skipna=skipna_da @@ -9077,8 +9084,8 @@ def idxmin( >>> array2 = xr.DataArray( ... [ ... [2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... [-4.0, np.nan, 2.0, np.nan, -2.0], + ... [np.nan, np.nan, 1.0, np.nan, np.nan], ... ], ... dims=["y", "x"], ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, @@ -9174,8 +9181,8 @@ def idxmax( >>> array2 = xr.DataArray( ... [ ... [2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... [-4.0, np.nan, 2.0, np.nan, -2.0], + ... [np.nan, np.nan, 1.0, np.nan, np.nan], ... ], ... dims=["y", "x"], ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index 7ac342e3d52..0762fa03112 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -40,7 +40,7 @@ def __eq__(self, other): PROMOTE_TO_OBJECT: tuple[tuple[type[np.generic], type[np.generic]], ...] = ( (np.number, np.character), # numpy promotes to character (np.bool_, np.character), # numpy promotes to character - (np.bytes_, np.unicode_), # numpy promotes to unicode + (np.bytes_, np.str_), # numpy promotes to unicode ) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index c6efaebc04c..137d689de3b 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -730,7 +730,7 @@ def interp_func(var, x, new_x, method: InterpOptions, kwargs): # scipy.interpolate.interp1d always forces to float. # Use the same check for blockwise as well: if not issubclass(var.dtype.type, np.inexact): - dtype = np.float_ + dtype = float else: dtype = var.dtype diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 1c5b0d3d972..9efa5824954 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -6,6 +6,12 @@ import pandas as pd from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined] +# remove once numpy 2.0 is the oldest supported version +try: + from numpy.exceptions import RankWarning +except ImportError: + from numpy import RankWarning + from xarray.core.options import OPTIONS from xarray.core.pycompat import is_duck_array @@ -194,7 +200,7 @@ def _nanpolyfit_1d(arr, x, rcond=None): def warn_on_deficient_rank(rank, order): if rank != order: - warnings.warn("Polyfit may be poorly conditioned", np.RankWarning, stacklevel=2) + warnings.warn("Polyfit may be poorly conditioned", RankWarning, stacklevel=2) def least_squares(lhs, rhs, rcond=None, skipna=False): diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py index 168d3232f81..dc325a84748 100644 --- a/xarray/tests/test_accessor_str.py +++ b/xarray/tests/test_accessor_str.py @@ -279,22 +279,20 @@ def test_case_bytes() -> None: def test_case_str() -> None: # This string includes some unicode characters # that are common case management corner cases - value = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi⁵Å Ç Ⅰ"]).astype(np.unicode_) - - exp_capitalized = xr.DataArray(["Some word dž ß ᾓ σς ffi⁵å ç ⅰ"]).astype(np.unicode_) - exp_lowered = xr.DataArray(["some word dž ß ᾓ σς ffi⁵å ç ⅰ"]).astype(np.unicode_) - exp_swapped = xr.DataArray(["soME WoRD dž SS ᾛ σς FFI⁵å ç ⅰ"]).astype(np.unicode_) - exp_titled = xr.DataArray(["Some Word Dž Ss ᾛ Σς Ffi⁵Å Ç Ⅰ"]).astype(np.unicode_) - exp_uppered = xr.DataArray(["SOME WORD DŽ SS ἫΙ ΣΣ FFI⁵Å Ç Ⅰ"]).astype(np.unicode_) - exp_casefolded = xr.DataArray(["some word dž ss ἣι σσ ffi⁵å ç ⅰ"]).astype( - np.unicode_ - ) - - exp_norm_nfc = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi⁵Å Ç Ⅰ"]).astype(np.unicode_) - exp_norm_nfkc = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi5Å Ç I"]).astype(np.unicode_) - exp_norm_nfd = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi⁵Å Ç Ⅰ"]).astype(np.unicode_) + value = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi⁵Å Ç Ⅰ"]).astype(np.str_) + + exp_capitalized = xr.DataArray(["Some word dž ß ᾓ σς ffi⁵å ç ⅰ"]).astype(np.str_) + exp_lowered = xr.DataArray(["some word dž ß ᾓ σς ffi⁵å ç ⅰ"]).astype(np.str_) + exp_swapped = xr.DataArray(["soME WoRD dž SS ᾛ σς FFI⁵å ç ⅰ"]).astype(np.str_) + exp_titled = xr.DataArray(["Some Word Dž Ss ᾛ Σς Ffi⁵Å Ç Ⅰ"]).astype(np.str_) + exp_uppered = xr.DataArray(["SOME WORD DŽ SS ἫΙ ΣΣ FFI⁵Å Ç Ⅰ"]).astype(np.str_) + exp_casefolded = xr.DataArray(["some word dž ss ἣι σσ ffi⁵å ç ⅰ"]).astype(np.str_) + + exp_norm_nfc = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi⁵Å Ç Ⅰ"]).astype(np.str_) + exp_norm_nfkc = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi5Å Ç I"]).astype(np.str_) + exp_norm_nfd = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi⁵Å Ç Ⅰ"]).astype(np.str_) exp_norm_nfkd = xr.DataArray(["SOme wOrd DŽ ß ᾛ ΣΣ ffi5Å Ç I"]).astype( - np.unicode_ + np.str_ ) res_capitalized = value.str.capitalize() @@ -680,7 +678,7 @@ def test_extract_extractall_name_collision_raises(dtype) -> None: def test_extract_single_case(dtype) -> None: pat_str = r"(\w+)_Xy_\d*" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re) @@ -728,7 +726,7 @@ def test_extract_single_case(dtype) -> None: def test_extract_single_nocase(dtype) -> None: pat_str = r"(\w+)?_Xy_\d*" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re, flags=re.IGNORECASE) @@ -770,7 +768,7 @@ def test_extract_single_nocase(dtype) -> None: def test_extract_multi_case(dtype) -> None: pat_str = r"(\w+)_Xy_(\d*)" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re) @@ -810,7 +808,7 @@ def test_extract_multi_case(dtype) -> None: def test_extract_multi_nocase(dtype) -> None: pat_str = r"(\w+)_Xy_(\d*)" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re, flags=re.IGNORECASE) @@ -876,7 +874,7 @@ def test_extract_broadcast(dtype) -> None: def test_extractall_single_single_case(dtype) -> None: pat_str = r"(\w+)_Xy_\d*" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re) @@ -908,7 +906,7 @@ def test_extractall_single_single_case(dtype) -> None: def test_extractall_single_single_nocase(dtype) -> None: pat_str = r"(\w+)_Xy_\d*" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re, flags=re.I) @@ -937,7 +935,7 @@ def test_extractall_single_single_nocase(dtype) -> None: def test_extractall_single_multi_case(dtype) -> None: pat_str = r"(\w+)_Xy_\d*" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re) @@ -983,7 +981,7 @@ def test_extractall_single_multi_case(dtype) -> None: def test_extractall_single_multi_nocase(dtype) -> None: pat_str = r"(\w+)_Xy_\d*" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re, flags=re.I) @@ -1030,7 +1028,7 @@ def test_extractall_single_multi_nocase(dtype) -> None: def test_extractall_multi_single_case(dtype) -> None: pat_str = r"(\w+)_Xy_(\d*)" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re) @@ -1065,7 +1063,7 @@ def test_extractall_multi_single_case(dtype) -> None: def test_extractall_multi_single_nocase(dtype) -> None: pat_str = r"(\w+)_Xy_(\d*)" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re, flags=re.I) @@ -1097,7 +1095,7 @@ def test_extractall_multi_single_nocase(dtype) -> None: def test_extractall_multi_multi_case(dtype) -> None: pat_str = r"(\w+)_Xy_(\d*)" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re) @@ -1147,7 +1145,7 @@ def test_extractall_multi_multi_case(dtype) -> None: def test_extractall_multi_multi_nocase(dtype) -> None: pat_str = r"(\w+)_Xy_(\d*)" pat_re: str | bytes = ( - pat_str if dtype == np.unicode_ else bytes(pat_str, encoding="UTF-8") + pat_str if dtype == np.str_ else bytes(pat_str, encoding="UTF-8") ) pat_compiled = re.compile(pat_re, flags=re.I) @@ -3419,12 +3417,12 @@ def test_cat_multi() -> None: values_4 = "" - values_5 = np.array("", dtype=np.unicode_) + values_5 = np.array("", dtype=np.str_) sep = xr.DataArray( [" ", ", "], dims=["ZZ"], - ).astype(np.unicode_) + ).astype(np.str_) expected = xr.DataArray( [ @@ -3440,7 +3438,7 @@ def test_cat_multi() -> None: ], ], dims=["X", "Y", "ZZ"], - ).astype(np.unicode_) + ).astype(np.str_) res = values_1.str.cat(values_2, values_3, values_4, values_5, sep=sep) @@ -3561,7 +3559,7 @@ def test_format_scalar() -> None: values = xr.DataArray( ["{}.{Y}.{ZZ}", "{},{},{X},{X}", "{X}-{Y}-{ZZ}"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) pos0 = 1 pos1 = 1.2 @@ -3574,7 +3572,7 @@ def test_format_scalar() -> None: expected = xr.DataArray( ["1.X.None", "1,1.2,'test','test'", "'test'-X-None"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) res = values.str.format(pos0, pos1, pos2, X=X, Y=Y, ZZ=ZZ, W=W) @@ -3586,7 +3584,7 @@ def test_format_broadcast() -> None: values = xr.DataArray( ["{}.{Y}.{ZZ}", "{},{},{X},{X}", "{X}-{Y}-{ZZ}"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) pos0 = 1 pos1 = 1.2 @@ -3608,7 +3606,7 @@ def test_format_broadcast() -> None: ["'test'-X-None", "'test'-X-None"], ], dims=["X", "YY"], - ).astype(np.unicode_) + ).astype(np.str_) res = values.str.format(pos0, pos1, pos2, X=X, Y=Y, ZZ=ZZ, W=W) @@ -3620,7 +3618,7 @@ def test_mod_scalar() -> None: values = xr.DataArray( ["%s.%s.%s", "%s,%s,%s", "%s-%s-%s"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) pos0 = 1 pos1 = 1.2 @@ -3629,7 +3627,7 @@ def test_mod_scalar() -> None: expected = xr.DataArray( ["1.1.2.2.3", "1,1.2,2.3", "1-1.2-2.3"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) res = values.str % (pos0, pos1, pos2) @@ -3641,7 +3639,7 @@ def test_mod_dict() -> None: values = xr.DataArray( ["%(a)s.%(a)s.%(b)s", "%(b)s,%(c)s,%(b)s", "%(c)s-%(b)s-%(a)s"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) a = 1 b = 1.2 @@ -3650,7 +3648,7 @@ def test_mod_dict() -> None: expected = xr.DataArray( ["1.1.1.2", "1.2,2.3,1.2", "2.3-1.2-1"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) res = values.str % {"a": a, "b": b, "c": c} @@ -3662,7 +3660,7 @@ def test_mod_broadcast_single() -> None: values = xr.DataArray( ["%s_1", "%s_2", "%s_3"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) pos = xr.DataArray( ["2.3", "3.44444"], @@ -3672,7 +3670,7 @@ def test_mod_broadcast_single() -> None: expected = xr.DataArray( [["2.3_1", "3.44444_1"], ["2.3_2", "3.44444_2"], ["2.3_3", "3.44444_3"]], dims=["X", "YY"], - ).astype(np.unicode_) + ).astype(np.str_) res = values.str % pos @@ -3684,7 +3682,7 @@ def test_mod_broadcast_multi() -> None: values = xr.DataArray( ["%s.%s.%s", "%s,%s,%s", "%s-%s-%s"], dims=["X"], - ).astype(np.unicode_) + ).astype(np.str_) pos0 = 1 pos1 = 1.2 @@ -3701,7 +3699,7 @@ def test_mod_broadcast_multi() -> None: ["1-1.2-2.3", "1-1.2-3.44444"], ], dims=["X", "YY"], - ).astype(np.unicode_) + ).astype(np.str_) res = values.str % (pos0, pos1, pos2) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d54e1004f08..e2ae34f94f2 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -814,7 +814,7 @@ def test_array_type_after_indexing(self) -> None: def test_dropna(self) -> None: # regression test for GH:issue:1694 a = np.random.randn(4, 3) - a[1, 1] = np.NaN + a[1, 1] = np.nan in_memory = xr.Dataset( {"a": (("y", "x"), a)}, coords={"y": np.arange(4), "x": np.arange(3)} ) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2a28939df41..1f4d259d320 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -14,6 +14,12 @@ import pytest from packaging.version import Version +# remove once numpy 2.0 is the oldest supported version +try: + from numpy.exceptions import RankWarning +except ImportError: + from numpy import RankWarning + import xarray as xr from xarray import ( DataArray, @@ -2846,7 +2852,7 @@ def test_reduce_out(self) -> None: ) def test_quantile(self, q, axis, dim, skipna) -> None: va = self.va.copy(deep=True) - va[0, 0] = np.NaN + va[0, 0] = np.nan actual = DataArray(va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) _percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile @@ -3124,10 +3130,10 @@ def test_align_str_dtype(self) -> None: b = DataArray([1, 2], dims=["x"], coords={"x": ["b", "c"]}) expected_a = DataArray( - [0, 1, np.NaN], dims=["x"], coords={"x": ["a", "b", "c"]} + [0, 1, np.nan], dims=["x"], coords={"x": ["a", "b", "c"]} ) expected_b = DataArray( - [np.NaN, 1, 2], dims=["x"], coords={"x": ["a", "b", "c"]} + [np.nan, 1, 2], dims=["x"], coords={"x": ["a", "b", "c"]} ) actual_a, actual_b = xr.align(a, b, join="outer") @@ -4211,7 +4217,7 @@ def test_polyfit(self, use_dask, use_datetime) -> None: # Full output and deficient rank with warnings.catch_warnings(): - warnings.simplefilter("ignore", np.RankWarning) + warnings.simplefilter("ignore", RankWarning) out = da.polyfit("x", 12, full=True) assert out.polyfit_residuals.isnull().all() @@ -4232,7 +4238,7 @@ def test_polyfit(self, use_dask, use_datetime) -> None: np.testing.assert_almost_equal(out.polyfit_residuals, [0, 0]) with warnings.catch_warnings(): - warnings.simplefilter("ignore", np.RankWarning) + warnings.simplefilter("ignore", RankWarning) out = da.polyfit("x", 8, full=True) np.testing.assert_array_equal(out.polyfit_residuals.isnull(), [True, False]) @@ -4253,7 +4259,7 @@ def test_pad_constant(self) -> None: ar = xr.DataArray([9], dims="x") actual = ar.pad(x=1) - expected = xr.DataArray([np.NaN, 9, np.NaN], dims="x") + expected = xr.DataArray([np.nan, 9, np.nan], dims="x") assert_identical(actual, expected) actual = ar.pad(x=1, constant_values=1.23456) @@ -4261,7 +4267,7 @@ def test_pad_constant(self) -> None: assert_identical(actual, expected) with pytest.raises(ValueError, match="cannot convert float NaN to integer"): - ar.pad(x=1, constant_values=np.NaN) + ar.pad(x=1, constant_values=np.nan) def test_pad_coords(self) -> None: ar = DataArray( @@ -4699,10 +4705,10 @@ def setup(self): np.array([0.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0]), 5, 2, None, id="float" ), pytest.param( - np.array([1.0, np.NaN, 2.0, np.NaN, -2.0, -4.0, 2.0]), 5, 2, 1, id="nan" + np.array([1.0, np.nan, 2.0, np.nan, -2.0, -4.0, 2.0]), 5, 2, 1, id="nan" ), pytest.param( - np.array([1.0, np.NaN, 2.0, np.NaN, -2.0, -4.0, 2.0]).astype("object"), + np.array([1.0, np.nan, 2.0, np.nan, -2.0, -4.0, 2.0]).astype("object"), 5, 2, 1, @@ -4711,7 +4717,7 @@ def setup(self): ), id="obj", ), - pytest.param(np.array([np.NaN, np.NaN]), np.NaN, np.NaN, 0, id="allnan"), + pytest.param(np.array([np.nan, np.nan]), np.nan, np.nan, 0, id="allnan"), pytest.param( np.array( ["2015-12-31", "2020-01-02", "2020-01-01", "2016-01-01"], @@ -4906,7 +4912,7 @@ def test_idxmin( if hasna: coordarr1[...] = 1 - fill_value_0 = np.NaN + fill_value_0 = np.nan else: fill_value_0 = 1 @@ -4920,7 +4926,7 @@ def test_idxmin( assert_identical(result0, expected0) # Manually specify NaN fill_value - result1 = ar0.idxmin(fill_value=np.NaN) + result1 = ar0.idxmin(fill_value=np.nan) assert_identical(result1, expected0) # keep_attrs @@ -5021,7 +5027,7 @@ def test_idxmax( if hasna: coordarr1[...] = 1 - fill_value_0 = np.NaN + fill_value_0 = np.nan else: fill_value_0 = 1 @@ -5035,7 +5041,7 @@ def test_idxmax( assert_identical(result0, expected0) # Manually specify NaN fill_value - result1 = ar0.idxmax(fill_value=np.NaN) + result1 = ar0.idxmax(fill_value=np.nan) assert_identical(result1, expected0) # keep_attrs @@ -5200,12 +5206,12 @@ def test_argmax_dim( np.array( [ [2.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0], - [-4.0, np.NaN, 2.0, np.NaN, -2.0, -4.0, 2.0], - [np.NaN] * 7, + [-4.0, np.nan, 2.0, np.nan, -2.0, -4.0, 2.0], + [np.nan] * 7, ] ), - [5, 0, np.NaN], - [0, 2, np.NaN], + [5, 0, np.nan], + [0, 2, np.nan], [None, 1, 0], id="nan", ), @@ -5213,12 +5219,12 @@ def test_argmax_dim( np.array( [ [2.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0], - [-4.0, np.NaN, 2.0, np.NaN, -2.0, -4.0, 2.0], - [np.NaN] * 7, + [-4.0, np.nan, 2.0, np.nan, -2.0, -4.0, 2.0], + [np.nan] * 7, ] ).astype("object"), - [5, 0, np.NaN], - [0, 2, np.NaN], + [5, 0, np.nan], + [0, 2, np.nan], [None, 1, 0], marks=pytest.mark.filterwarnings( "ignore:invalid value encountered in reduce:RuntimeWarning:" @@ -5493,7 +5499,7 @@ def test_idxmin( coordarr1[hasna, :] = 1 minindex0 = [x if not np.isnan(x) else 0 for x in minindex] - nan_mult_0 = np.array([np.NaN if x else 1 for x in hasna])[:, None] + nan_mult_0 = np.array([np.nan if x else 1 for x in hasna])[:, None] expected0list = [ (coordarr1 * nan_mult_0).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex0) @@ -5508,7 +5514,7 @@ def test_idxmin( # Manually specify NaN fill_value with raise_if_dask_computes(max_computes=max_computes): - result1 = ar0.idxmin(dim="x", fill_value=np.NaN) + result1 = ar0.idxmin(dim="x", fill_value=np.nan) assert_identical(result1, expected0) # keep_attrs @@ -5635,7 +5641,7 @@ def test_idxmax( coordarr1[hasna, :] = 1 maxindex0 = [x if not np.isnan(x) else 0 for x in maxindex] - nan_mult_0 = np.array([np.NaN if x else 1 for x in hasna])[:, None] + nan_mult_0 = np.array([np.nan if x else 1 for x in hasna])[:, None] expected0list = [ (coordarr1 * nan_mult_0).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex0) @@ -5650,7 +5656,7 @@ def test_idxmax( # Manually specify NaN fill_value with raise_if_dask_computes(max_computes=max_computes): - result1 = ar0.idxmax(dim="x", fill_value=np.NaN) + result1 = ar0.idxmax(dim="x", fill_value=np.nan) assert_identical(result1, expected0) # keep_attrs @@ -5909,31 +5915,31 @@ def test_argmax_dim( np.array( [ [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]], - [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]], - [[np.NaN] * 4, [np.NaN] * 4], + [[-4.0, np.nan, 2.0, np.nan], [-2.0, -4.0, 2.0, 0.0]], + [[np.nan] * 4, [np.nan] * 4], ] ), {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])}, { "y": np.array( - [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]] + [[1, 1, 0, 0], [0, 1, 0, 1], [np.nan, np.nan, np.nan, np.nan]] ) }, - {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])}, + {"z": np.array([[3, 1], [0, 1], [np.nan, np.nan]])}, {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])}, {"x": np.array([1, 0]), "z": np.array([0, 1])}, - {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])}, + {"y": np.array([1, 0, np.nan]), "z": np.array([1, 0, np.nan])}, {"x": np.array(0), "y": np.array(1), "z": np.array(1)}, {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])}, { "y": np.array( - [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]] + [[0, 0, 0, 0], [1, 1, 0, 1], [np.nan, np.nan, np.nan, np.nan]] ) }, - {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])}, + {"z": np.array([[0, 2], [2, 2], [np.nan, np.nan]])}, {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])}, {"x": np.array([0, 0]), "z": np.array([2, 2])}, - {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])}, + {"y": np.array([0, 0, np.nan]), "z": np.array([0, 2, np.nan])}, {"x": np.array(0), "y": np.array(0), "z": np.array(0)}, {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])}, { @@ -5952,31 +5958,31 @@ def test_argmax_dim( np.array( [ [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]], - [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]], - [[np.NaN] * 4, [np.NaN] * 4], + [[-4.0, np.nan, 2.0, np.nan], [-2.0, -4.0, 2.0, 0.0]], + [[np.nan] * 4, [np.nan] * 4], ] ).astype("object"), {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])}, { "y": np.array( - [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]] + [[1, 1, 0, 0], [0, 1, 0, 1], [np.nan, np.nan, np.nan, np.nan]] ) }, - {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])}, + {"z": np.array([[3, 1], [0, 1], [np.nan, np.nan]])}, {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])}, {"x": np.array([1, 0]), "z": np.array([0, 1])}, - {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])}, + {"y": np.array([1, 0, np.nan]), "z": np.array([1, 0, np.nan])}, {"x": np.array(0), "y": np.array(1), "z": np.array(1)}, {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])}, { "y": np.array( - [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]] + [[0, 0, 0, 0], [1, 1, 0, 1], [np.nan, np.nan, np.nan, np.nan]] ) }, - {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])}, + {"z": np.array([[0, 2], [2, 2], [np.nan, np.nan]])}, {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])}, {"x": np.array([0, 0]), "z": np.array([2, 2])}, - {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])}, + {"y": np.array([0, 0, np.nan]), "z": np.array([0, 2, np.nan])}, {"x": np.array(0), "y": np.array(0), "z": np.array(0)}, {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])}, { @@ -6522,12 +6528,12 @@ def test_isin(da) -> None: def test_raise_no_warning_for_nan_in_binary_ops() -> None: with assert_no_warnings(): - xr.DataArray([1, 2, np.NaN]) > 0 + xr.DataArray([1, 2, np.nan]) > 0 @pytest.mark.filterwarnings("error") def test_no_warning_for_all_nan() -> None: - _ = xr.DataArray([np.NaN, np.NaN]).mean() + _ = xr.DataArray([np.nan, np.nan]).mean() def test_name_in_masking() -> None: @@ -6567,7 +6573,7 @@ def test_to_and_from_iris(self) -> None: ) # Set a bad value to test the masking logic - original.data[0, 2] = np.NaN + original.data[0, 2] = np.nan original.attrs["cell_methods"] = "height: mean (comment: A cell method)" actual = original.to_iris() diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 73a1e74214c..882285ac8ec 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -15,6 +15,12 @@ import pytest from pandas.core.indexes.datetimes import DatetimeIndex +# remove once numpy 2.0 is the oldest supported version +try: + from numpy.exceptions import RankWarning +except ImportError: + from numpy import RankWarning + import xarray as xr from xarray import ( DataArray, @@ -118,7 +124,7 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]: ), "unicode_var": xr.DataArray( unicode_var, coords=[time1], dims=["time"] - ).astype(np.unicode_), + ).astype(np.str_), "datetime_var": xr.DataArray( datetime_var, coords=[time1], dims=["time"] ), @@ -141,7 +147,7 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]: ), "unicode_var": xr.DataArray( unicode_var[:nt2], coords=[time2], dims=["time"] - ).astype(np.unicode_), + ).astype(np.str_), "datetime_var": xr.DataArray( datetime_var_to_append, coords=[time2], dims=["time"] ), @@ -2432,10 +2438,10 @@ def test_align_str_dtype(self) -> None: b = Dataset({"foo": ("x", [1, 2])}, coords={"x": ["b", "c"]}) expected_a = Dataset( - {"foo": ("x", [0, 1, np.NaN])}, coords={"x": ["a", "b", "c"]} + {"foo": ("x", [0, 1, np.nan])}, coords={"x": ["a", "b", "c"]} ) expected_b = Dataset( - {"foo": ("x", [np.NaN, 1, 2])}, coords={"x": ["a", "b", "c"]} + {"foo": ("x", [np.nan, 1, 2])}, coords={"x": ["a", "b", "c"]} ) actual_a, actual_b = xr.align(a, b, join="outer") @@ -5505,7 +5511,7 @@ def test_reduce_keepdims(self) -> None: @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) def test_quantile(self, q, skipna) -> None: ds = create_test_data(seed=123) - ds.var1.data[0, 0] = np.NaN + ds.var1.data[0, 0] = np.nan for dim in [None, "dim1", ["dim1"]]: ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) @@ -6378,7 +6384,7 @@ def test_polyfit_warnings(self) -> None: with warnings.catch_warnings(record=True) as ws: ds.var1.polyfit("dim2", 10, full=False) assert len(ws) == 1 - assert ws[0].category == np.RankWarning + assert ws[0].category == RankWarning ds.var1.polyfit("dim2", 10, full=True) assert len(ws) == 1 @@ -6705,7 +6711,7 @@ def test_dir_unicode(ds) -> None: def test_raise_no_warning_for_nan_in_binary_ops() -> None: with assert_no_warnings(): - Dataset(data_vars={"x": ("y", [1, 2, np.NaN])}) > 0 + Dataset(data_vars={"x": ("y", [1, 2, np.nan])}) > 0 @pytest.mark.filterwarnings("error") diff --git a/xarray/tests/test_dtypes.py b/xarray/tests/test_dtypes.py index 490520c8f54..3c2ee5e8f6f 100644 --- a/xarray/tests/test_dtypes.py +++ b/xarray/tests/test_dtypes.py @@ -10,12 +10,12 @@ "args, expected", [ ([bool], bool), - ([bool, np.string_], np.object_), + ([bool, np.bytes_], np.object_), ([np.float32, np.float64], np.float64), - ([np.float32, np.string_], np.object_), - ([np.unicode_, np.int64], np.object_), - ([np.unicode_, np.unicode_], np.unicode_), - ([np.bytes_, np.unicode_], np.object_), + ([np.float32, np.bytes_], np.object_), + ([np.str_, np.int64], np.object_), + ([np.str_, np.str_], np.str_), + ([np.bytes_, np.str_], np.object_), ], ) def test_result_type(args, expected) -> None: diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 5d99eda1e88..b961abef5db 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -232,11 +232,11 @@ def test_da_groupby_quantile() -> None: assert_identical(expected, actual) array = xr.DataArray( - data=[np.NaN, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x" + data=[np.nan, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x" ) for skipna in (True, False, None): - e = [np.NaN, 5] if skipna is False else [2.5, 5] + e = [np.nan, 5] if skipna is False else [2.5, 5] expected = xr.DataArray(data=e, coords={"x": [1, 2], "quantile": 0.5}, dims="x") actual = array.groupby("x").quantile(0.5, skipna=skipna) @@ -346,12 +346,12 @@ def test_ds_groupby_quantile() -> None: assert_identical(expected, actual) ds = xr.Dataset( - data_vars={"a": ("x", [np.NaN, 2, 3, 4, 5, 6])}, + data_vars={"a": ("x", [np.nan, 2, 3, 4, 5, 6])}, coords={"x": [1, 1, 1, 2, 2, 2]}, ) for skipna in (True, False, None): - e = [np.NaN, 5] if skipna is False else [2.5, 5] + e = [np.nan, 5] if skipna is False else [2.5, 5] expected = xr.Dataset( data_vars={"a": ("x", e)}, coords={"quantile": 0.5, "x": [1, 2]} diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 2ef34201a8b..c4bac3b2c61 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -196,7 +196,7 @@ def test_index_0d_int(self): self._assertIndexedLikeNDArray(x, value, dtype) def test_index_0d_float(self): - for value, dtype in [(0.5, np.float_), (np.float32(0.5), np.float32)]: + for value, dtype in [(0.5, float), (np.float32(0.5), np.float32)]: x = self.cls(["x"], [value]) self._assertIndexedLikeNDArray(x, value, dtype) @@ -1127,9 +1127,9 @@ def test_0d_str(self): assert v.dtype == np.dtype("U3") assert v.values == "foo" - v = Variable([], np.string_("foo")) + v = Variable([], np.bytes_("foo")) assert v.dtype == np.dtype("S3") - assert v.values == bytes("foo", "ascii") + assert v.values == "foo".encode("ascii") def test_0d_datetime(self): v = Variable([], pd.Timestamp("2000-01-01")) @@ -1466,10 +1466,10 @@ def test_isel(self): def test_index_0d_numpy_string(self): # regression test to verify our work around for indexing 0d strings - v = Variable([], np.string_("asdf")) + v = Variable([], np.bytes_("asdf")) assert_identical(v[()], v) - v = Variable([], np.unicode_("asdf")) + v = Variable([], np.str_("asdf")) assert_identical(v[()], v) def test_indexing_0d_unicode(self): @@ -1810,7 +1810,7 @@ def raise_if_called(*args, **kwargs): ) def test_quantile(self, q, axis, dim, skipna): d = self.d.copy() - d[0, 0] = np.NaN + d[0, 0] = np.nan v = Variable(["x", "y"], d) actual = v.quantile(q, dim=dim, skipna=skipna) @@ -2719,7 +2719,7 @@ def __init__(self, array): def test_raise_no_warning_for_nan_in_binary_ops(): with assert_no_warnings(): - Variable("x", [1, 2, np.NaN]) > 0 + Variable("x", [1, 2, np.nan]) > 0 class TestBackendIndexing: diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 628d6310945..95fda3fac62 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -608,7 +608,7 @@ def test_weighted_operations_3D(dim, add_nans, skipna): # add approximately 25 % NaNs (https://stackoverflow.com/a/32182680/3010700) if add_nans: c = int(data.size * 0.25) - data.ravel()[np.random.choice(data.size, c, replace=False)] = np.NaN + data.ravel()[np.random.choice(data.size, c, replace=False)] = np.nan data = DataArray(data, dims=dims, coords=coords) @@ -631,7 +631,7 @@ def test_weighted_quantile_3D(dim, q, add_nans, skipna): # add approximately 25 % NaNs (https://stackoverflow.com/a/32182680/3010700) if add_nans: c = int(data.size * 0.25) - data.ravel()[np.random.choice(data.size, c, replace=False)] = np.NaN + data.ravel()[np.random.choice(data.size, c, replace=False)] = np.nan da = DataArray(data, dims=dims, coords=coords) @@ -709,7 +709,7 @@ def test_weighted_operations_different_shapes( # add approximately 25 % NaNs if add_nans: c = int(data.size * 0.25) - data.ravel()[np.random.choice(data.size, c, replace=False)] = np.NaN + data.ravel()[np.random.choice(data.size, c, replace=False)] = np.nan data = DataArray(data) From cd6ba930a64a5129439606e7aae5f9c819f6c88c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 14:32:03 +0200 Subject: [PATCH 8/8] Bump actions/checkout from 3 to 4 (#8169) Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/benchmarks-last-release.yml | 2 +- .github/workflows/benchmarks.yml | 2 +- .github/workflows/ci-additional.yaml | 10 +++++----- .github/workflows/ci.yaml | 4 ++-- .github/workflows/nightly-wheels.yml | 2 +- .github/workflows/pypi-release.yaml | 2 +- .github/workflows/upstream-dev-ci.yaml | 6 +++--- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/benchmarks-last-release.yml b/.github/workflows/benchmarks-last-release.yml index e1ae9b1b62e..40f06c82107 100644 --- a/.github/workflows/benchmarks-last-release.yml +++ b/.github/workflows/benchmarks-last-release.yml @@ -17,7 +17,7 @@ jobs: steps: # We need the full repo to avoid this issue # https://github.com/actions/checkout/issues/23 - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index ade00b942e7..08f39e36762 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -17,7 +17,7 @@ jobs: steps: # We need the full repo to avoid this issue # https://github.com/actions/checkout/issues/23 - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index e3c406a981b..ec1c192fd35 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -22,7 +22,7 @@ jobs: outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 @@ -44,7 +44,7 @@ jobs: PYTHON_VERSION: "3.10" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. @@ -92,7 +92,7 @@ jobs: PYTHON_VERSION: "3.10" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. @@ -146,7 +146,7 @@ jobs: PYTHON_VERSION: "3.9" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. @@ -205,7 +205,7 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4d59fe0531f..7ee197aeda3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,7 +22,7 @@ jobs: outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 @@ -60,7 +60,7 @@ jobs: python-version: "3.10" os: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Set environment variables diff --git a/.github/workflows/nightly-wheels.yml b/.github/workflows/nightly-wheels.yml index 562e442683e..ca3499386c9 100644 --- a/.github/workflows/nightly-wheels.yml +++ b/.github/workflows/nightly-wheels.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'pydata/xarray' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v4 diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 5f4a2cd364c..916bd33528a 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'pydata/xarray' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v4 diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 7c60f20125e..d01fc5cdffc 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -25,7 +25,7 @@ jobs: outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 @@ -52,7 +52,7 @@ jobs: matrix: python-version: ["3.10"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Set up conda environment @@ -112,7 +112,7 @@ jobs: matrix: python-version: ["3.10"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Set up conda environment