From 74901af7df26a1e795ee62ce98e3cab545347306 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?=
Date: Fri, 17 Nov 2023 16:25:59 +0100
Subject: [PATCH 01/58] preserve vlen string dtypes, allow vlen string
fill_values (#7869)
* preserve vlen string dtypes, allow vlen string fill_values
* update min-deps h5py->3.7, h5netcdf -> 1.1
* add doc/whats-new.rst entry
* add suggestions from review
* remove stale entry
---
ci/requirements/min-all-deps.yml | 4 +--
doc/whats-new.rst | 5 ++-
xarray/backends/h5netcdf_.py | 9 -----
xarray/backends/netCDF4_.py | 10 ------
xarray/coding/variables.py | 12 +++++++
xarray/conventions.py | 4 +++
xarray/tests/test_backends.py | 62 ++++++++++++++++++--------------
7 files changed, 57 insertions(+), 49 deletions(-)
diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml
index 7d0f29c0960..22076f88854 100644
--- a/ci/requirements/min-all-deps.yml
+++ b/ci/requirements/min-all-deps.yml
@@ -16,11 +16,11 @@ dependencies:
- dask-core=2022.7
- distributed=2022.7
- flox=0.5
- - h5netcdf=1.0
+ - h5netcdf=1.1
# h5py and hdf5 tend to cause conflicts
# for e.g. hdf5 1.12 conflicts with h5py=3.1
# prioritize bumping other packages instead
- - h5py=3.6
+ - h5py=3.7
- hdf5=1.12
- hypothesis
- iris=3.2
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 2fb76cfe8c2..928a2c95cac 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -32,13 +32,14 @@ New Features
By `Sam Levang `_.
- Allow the usage of h5py drivers (eg: ros3) via h5netcdf (:pull:`8360`).
By `Ezequiel Cimadevilla `_.
+- Enable VLEN string fill_values, preserve VLEN string dtypes (:issue:`1647`, :issue:`7652`, :issue:`7868`, :pull:`7869`).
+ By `Kai Mühlbauer `_.
Breaking changes
~~~~~~~~~~~~~~~~
- drop support for `cdms2 `_. Please use
`xcdat `_ instead (:pull:`8441`).
By `Justus Magin `_.
-
- Following pandas, :py:meth:`infer_freq` will return ``"Y"``, ``"YS"``,
``"QE"``, ``"ME"``, ``"h"``, ``"min"``, ``"s"``, ``"ms"``, ``"us"``, or
``"ns"`` instead of ``"A"``, ``"AS"``, ``"Q"``, ``"M"``, ``"H"``, ``"T"``,
@@ -46,6 +47,8 @@ Breaking changes
deprecation of the latter frequency strings (:issue:`8394`, :pull:`8415`). By
`Spencer Clark `_.
- Bump minimum tested pint version to ``>=0.22``. By `Deepak Cherian `_.
+- Minimum supported versions for the following packages have changed: ``h5py >=3.7``, ``h5netcdf>=1.1``.
+ By `Kai Mühlbauer `_.
Deprecations
~~~~~~~~~~~~
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index a68a44b5f6f..d9385fc68a9 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -271,15 +271,6 @@ def prepare_variable(
dtype = _get_datatype(variable, raise_on_invalid_encoding=check_encoding)
fillvalue = attrs.pop("_FillValue", None)
- if dtype is str and fillvalue is not None:
- raise NotImplementedError(
- "h5netcdf does not yet support setting a fill value for "
- "variable-length strings "
- "(https://github.com/h5netcdf/h5netcdf/issues/37). "
- f"Either remove '_FillValue' from encoding on variable {name!r} "
- "or set {'dtype': 'S1'} in encoding to use the fixed width "
- "NC_CHAR type."
- )
if dtype is str:
dtype = h5py.special_dtype(vlen=str)
diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
index f21f15bf795..1aee4c1c726 100644
--- a/xarray/backends/netCDF4_.py
+++ b/xarray/backends/netCDF4_.py
@@ -490,16 +490,6 @@ def prepare_variable(
fill_value = attrs.pop("_FillValue", None)
- if datatype is str and fill_value is not None:
- raise NotImplementedError(
- "netCDF4 does not yet support setting a fill value for "
- "variable-length strings "
- "(https://github.com/Unidata/netcdf4-python/issues/730). "
- f"Either remove '_FillValue' from encoding on variable {name!r} "
- "or set {'dtype': 'S1'} in encoding to use the fixed width "
- "NC_CHAR type."
- )
-
encoding = _extract_nc4_variable_encoding(
variable, raise_on_invalid=check_encoding, unlimited_dims=unlimited_dims
)
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index df660f90d9e..487197605e8 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -562,3 +562,15 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
def decode(self):
raise NotImplementedError()
+
+
+class ObjectVLenStringCoder(VariableCoder):
+ def encode(self):
+ return NotImplementedError
+
+ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
+ if variable.dtype == object and variable.encoding.get("dtype", False) == str:
+ variable = variable.astype(variable.encoding["dtype"])
+ return variable
+ else:
+ return variable
diff --git a/xarray/conventions.py b/xarray/conventions.py
index cf207f0c37a..75f816e6cb4 100644
--- a/xarray/conventions.py
+++ b/xarray/conventions.py
@@ -265,6 +265,10 @@ def decode_cf_variable(
var = strings.CharacterArrayCoder().decode(var, name=name)
var = strings.EncodedStringCoder().decode(var)
+ if original_dtype == object:
+ var = variables.ObjectVLenStringCoder().decode(var)
+ original_dtype = var.dtype
+
if mask_and_scale:
for coder in [
variables.UnsignedIntegerCoder(),
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 80b6951dbff..85248b5c40a 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -864,12 +864,13 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
assert check_vlen_dtype(original["a"].dtype) == str
with self.roundtrip(original) as actual:
assert_identical(original, actual)
- assert object == actual["a"].dtype
- assert actual["a"].dtype == original["a"].dtype
- # only check metadata for capable backends
- # eg. NETCDF3 based backends do not roundtrip metadata
- if actual["a"].dtype.metadata is not None:
- assert check_vlen_dtype(actual["a"].dtype) == str
+ if np.issubdtype(actual["a"].dtype, object):
+ # only check metadata for capable backends
+ # eg. NETCDF3 based backends do not roundtrip metadata
+ if actual["a"].dtype.metadata is not None:
+ assert check_vlen_dtype(actual["a"].dtype) == str
+ else:
+ assert actual["a"].dtype == np.dtype(" None:
with self.open(tmp_file, group="data/2") as actual2:
assert_identical(data2, actual2)
- def test_encoding_kwarg_vlen_string(self) -> None:
- for input_strings in [[b"foo", b"bar", b"baz"], ["foo", "bar", "baz"]]:
- original = Dataset({"x": input_strings})
- expected = Dataset({"x": ["foo", "bar", "baz"]})
- kwargs = dict(encoding={"x": {"dtype": str}})
- with self.roundtrip(original, save_kwargs=kwargs) as actual:
- assert actual["x"].encoding["dtype"] is str
- assert_identical(actual, expected)
-
- def test_roundtrip_string_with_fill_value_vlen(self) -> None:
+ @pytest.mark.parametrize(
+ "input_strings, is_bytes",
+ [
+ ([b"foo", b"bar", b"baz"], True),
+ (["foo", "bar", "baz"], False),
+ (["foó", "bár", "baź"], False),
+ ],
+ )
+ def test_encoding_kwarg_vlen_string(
+ self, input_strings: list[str], is_bytes: bool
+ ) -> None:
+ original = Dataset({"x": input_strings})
+
+ expected_string = ["foo", "bar", "baz"] if is_bytes else input_strings
+ expected = Dataset({"x": expected_string})
+ kwargs = dict(encoding={"x": {"dtype": str}})
+ with self.roundtrip(original, save_kwargs=kwargs) as actual:
+ assert actual["x"].encoding["dtype"] == " None:
values = np.array(["ab", "cdef", np.nan], dtype=object)
expected = Dataset({"x": ("t", values)})
- # netCDF4-based backends don't support an explicit fillvalue
- # for variable length strings yet.
- # https://github.com/Unidata/netcdf4-python/issues/730
- # https://github.com/h5netcdf/h5netcdf/issues/37
- original = Dataset({"x": ("t", values, {}, {"_FillValue": "XXX"})})
- with pytest.raises(NotImplementedError):
- with self.roundtrip(original) as actual:
- assert_identical(expected, actual)
+ original = Dataset({"x": ("t", values, {}, {"_FillValue": fill_value})})
+ with self.roundtrip(original) as actual:
+ assert_identical(expected, actual)
original = Dataset({"x": ("t", values, {}, {"_FillValue": ""})})
- with pytest.raises(NotImplementedError):
- with self.roundtrip(original) as actual:
- assert_identical(expected, actual)
+ with self.roundtrip(original) as actual:
+ assert_identical(expected, actual)
def test_roundtrip_character_array(self) -> None:
with create_tmp_file() as tmp_file:
From b6eaf436f7b120f5b6b3934892061af1e9ad89fe Mon Sep 17 00:00:00 2001
From: Justus Magin
Date: Fri, 17 Nov 2023 16:27:21 +0100
Subject: [PATCH 02/58] migrate the other CI to python 3.11 (#8416)
* migrate the additional CI to py311
* migrate the upstream-dev CI to python 3.11
* switch to the default environment file
* convert a left-over use of `provision-with-micromamba`
* silence the `cgi` deprecation warning from `pydap`
---------
Co-authored-by: Deepak Cherian
---
.github/workflows/ci-additional.yaml | 6 +++---
.github/workflows/upstream-dev-ci.yaml | 8 ++++----
xarray/tests/__init__.py | 9 ++++++++-
3 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index 52f785c6a00..43f13f03133 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -41,7 +41,7 @@ jobs:
env:
CONDA_ENV_FILE: ci/requirements/environment.yml
- PYTHON_VERSION: "3.10"
+ PYTHON_VERSION: "3.11"
steps:
- uses: actions/checkout@v4
@@ -87,7 +87,7 @@ jobs:
shell: bash -l {0}
env:
CONDA_ENV_FILE: ci/requirements/environment.yml
- PYTHON_VERSION: "3.10"
+ PYTHON_VERSION: "3.11"
steps:
- uses: actions/checkout@v4
@@ -332,7 +332,7 @@ jobs:
with:
environment-name: xarray-tests
create-args: >-
- python=3.10
+ python=3.11
pyyaml
conda
python-dateutil
diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml
index d01fc5cdffc..cb8319cc58f 100644
--- a/.github/workflows/upstream-dev-ci.yaml
+++ b/.github/workflows/upstream-dev-ci.yaml
@@ -50,7 +50,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- python-version: ["3.10"]
+ python-version: ["3.11"]
steps:
- uses: actions/checkout@v4
with:
@@ -110,17 +110,17 @@ jobs:
strategy:
fail-fast: false
matrix:
- python-version: ["3.10"]
+ python-version: ["3.11"]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for all branches and tags.
- name: Set up conda environment
- uses: mamba-org/provision-with-micromamba@v15
+ uses: mamba-org/setup-micromamba@v1
with:
environment-file: ci/requirements/environment.yml
environment-name: xarray-tests
- extra-specs: |
+ create-args: >-
python=${{ matrix.python-version }}
pytest-reportlog
conda
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index fec695f83d7..8b5cf456bcb 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -63,7 +63,14 @@ def _importorskip(
has_matplotlib, requires_matplotlib = _importorskip("matplotlib")
has_scipy, requires_scipy = _importorskip("scipy")
-has_pydap, requires_pydap = _importorskip("pydap.client")
+with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ message="'cgi' is deprecated and slated for removal in Python 3.13",
+ category=DeprecationWarning,
+ )
+
+ has_pydap, requires_pydap = _importorskip("pydap.client")
has_netCDF4, requires_netCDF4 = _importorskip("netCDF4")
has_h5netcdf, requires_h5netcdf = _importorskip("h5netcdf")
has_pynio, requires_pynio = _importorskip("Nio")
From 4473d6dae6b475cfb5253dba5268b35b6ffaa8fb Mon Sep 17 00:00:00 2001
From: Deepak Cherian
Date: Fri, 17 Nov 2023 14:02:20 -0700
Subject: [PATCH 03/58] 2023.11.0 Whats-new (#8461)
* 2023.11.0 Whats-new
* Update doc/whats-new.rst
* Add whats-new 10 year note
* Add banner
* [skip-ci] add
* Update doc/whats-new.rst
---
doc/_static/style.css | 5 +++++
doc/conf.py | 1 +
doc/whats-new.rst | 25 +++++++++++++++++--------
3 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/doc/_static/style.css b/doc/_static/style.css
index a097398d1e9..ea42511c51e 100644
--- a/doc/_static/style.css
+++ b/doc/_static/style.css
@@ -7,6 +7,11 @@ table.docutils td {
word-wrap: break-word;
}
+div.bd-header-announcement {
+ background-color: unset;
+ color: #000;
+}
+
/* Reduce left and right margins */
.container, .container-lg, .container-md, .container-sm, .container-xl {
diff --git a/doc/conf.py b/doc/conf.py
index 9f3a70717f6..f305c2a0fa7 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -242,6 +242,7 @@
Theme by the Executable Book Project
""",
twitter_url="https://twitter.com/xarray_dev",
icon_links=[], # workaround for pydata/pydata-sphinx-theme#1220
+ announcement="🍾 Xarray is now 10 years old! 🎉",
)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 928a2c95cac..20e269e6025 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -14,10 +14,24 @@ What's New
np.random.seed(123456)
-.. _whats-new.2023.10.2:
+.. _whats-new.2023.11.0:
-v2023.10.2 (unreleased)
------------------------
+v2023.11.0 (Nov 16, 2023)
+-------------------------
+
+
+.. tip::
+
+ `This is our 10th year anniversary release! `_ Thank you for your love and support.
+
+
+This release brings the ability to use ``opt_einsum`` for :py:func:`xarray.dot` by default,
+support for auto-detecting ``region`` when writing partial datasets to Zarr, and the use of h5py
+drivers with ``h5netcdf``.
+
+Thanks to the 19 contributors to this release:
+Aman Bagrecha, Anderson Banihirwe, Ben Mares, Deepak Cherian, Dimitri Papadopoulos Orfanos, Ezequiel Cimadevilla Alvarez,
+Illviljan, Justus Magin, Katelyn FitzGerald, Kai Muehlbauer, Martin Durant, Maximilian Roos, Metamess, Sam Levang, Spencer Clark, Tom Nicholas, mgunyho, templiert
New Features
~~~~~~~~~~~~
@@ -100,11 +114,6 @@ Documentation
- Small updates to documentation on distributed writes: See :ref:`io.zarr.appending` to Zarr.
By `Deepak Cherian `_.
-
-Internal Changes
-~~~~~~~~~~~~~~~~
-
-
.. _whats-new.2023.10.1:
v2023.10.1 (19 Oct, 2023)
From bb8511e0894020e180d95d2edb29ed4036ac6447 Mon Sep 17 00:00:00 2001
From: Deepak Cherian
Date: Sat, 18 Nov 2023 08:20:37 -0700
Subject: [PATCH 04/58] [skip-ci] dev whats-new (#8467)
---
doc/whats-new.rst | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 20e269e6025..350cc2e0efa 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -14,6 +14,35 @@ What's New
np.random.seed(123456)
+
+.. _whats-new.2023.11.1:
+
+v2023.11.1 (unreleased)
+-----------------------
+
+New Features
+~~~~~~~~~~~~
+
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+
+Deprecations
+~~~~~~~~~~~~
+
+
+Bug fixes
+~~~~~~~~~
+
+
+Documentation
+~~~~~~~~~~~~~
+
+
+Internal Changes
+~~~~~~~~~~~~~~~~
+
.. _whats-new.2023.11.0:
v2023.11.0 (Nov 16, 2023)
From d98baf454c348b5cd39c805d00998a766accea27 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Tue, 21 Nov 2023 10:24:51 -0800
Subject: [PATCH 05/58] Consolidate `_get_alpha` func (#8465)
* Consolidate `_get_alpha` func
Am changing this a bit so starting with consolidating it rather than converting twice
---
xarray/core/rolling_exp.py | 41 +++++++++++---------------------------
1 file changed, 12 insertions(+), 29 deletions(-)
diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py
index c8160cefef3..04d7dd41966 100644
--- a/xarray/core/rolling_exp.py
+++ b/xarray/core/rolling_exp.py
@@ -25,51 +25,34 @@ def _get_alpha(
span: float | None = None,
halflife: float | None = None,
alpha: float | None = None,
-) -> float:
- # pandas defines in terms of com (converting to alpha in the algo)
- # so use its function to get a com and then convert to alpha
-
- com = _get_center_of_mass(com, span, halflife, alpha)
- return 1 / (1 + com)
-
-
-def _get_center_of_mass(
- comass: float | None,
- span: float | None,
- halflife: float | None,
- alpha: float | None,
) -> float:
"""
- Vendored from pandas.core.window.common._get_center_of_mass
-
- See licenses/PANDAS_LICENSE for the function's license
+ Convert com, span, halflife to alpha.
"""
- valid_count = count_not_none(comass, span, halflife, alpha)
+ valid_count = count_not_none(com, span, halflife, alpha)
if valid_count > 1:
- raise ValueError("comass, span, halflife, and alpha are mutually exclusive")
+ raise ValueError("com, span, halflife, and alpha are mutually exclusive")
- # Convert to center of mass; domain checks ensure 0 < alpha <= 1
- if comass is not None:
- if comass < 0:
- raise ValueError("comass must satisfy: comass >= 0")
+ # Convert to alpha
+ if com is not None:
+ if com < 0:
+ raise ValueError("commust satisfy: com>= 0")
+ return 1 / (com + 1)
elif span is not None:
if span < 1:
raise ValueError("span must satisfy: span >= 1")
- comass = (span - 1) / 2.0
+ return 2 / (span + 1)
elif halflife is not None:
if halflife <= 0:
raise ValueError("halflife must satisfy: halflife > 0")
- decay = 1 - np.exp(np.log(0.5) / halflife)
- comass = 1 / decay - 1
+ return 1 - np.exp(np.log(0.5) / halflife)
elif alpha is not None:
- if alpha <= 0 or alpha > 1:
+ if not 0 < alpha <= 1:
raise ValueError("alpha must satisfy: 0 < alpha <= 1")
- comass = (1.0 - alpha) / alpha
+ return alpha
else:
raise ValueError("Must pass one of comass, span, halflife, or alpha")
- return float(comass)
-
class RollingExp(Generic[T_DataWithCoords]):
"""
From 7e6eba06d9397f9c70c9252bc9ba4efe6e7a846a Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Tue, 21 Nov 2023 10:25:14 -0800
Subject: [PATCH 06/58] Fix `map_blocks` docs' formatting (#8464)
---
xarray/core/parallel.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
index dd5232023a2..f971556b3f7 100644
--- a/xarray/core/parallel.py
+++ b/xarray/core/parallel.py
@@ -186,8 +186,9 @@ def map_blocks(
Returns
-------
- A single DataArray or Dataset with dask backend, reassembled from the outputs of the
- function.
+ obj : same as obj
+ A single DataArray or Dataset with dask backend, reassembled from the outputs of the
+ function.
Notes
-----
From dcf5d743fc8ff66996ff73c08f6893b701ff6e02 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Tue, 21 Nov 2023 20:26:24 +0100
Subject: [PATCH 07/58] Use concise date format when plotting (#8449)
* Add concise date format
* Update utils.py
* Update dataarray_plot.py
* Update dataarray_plot.py
* Update whats-new.rst
* Cleanup
* Clarify xfail reason
* Update whats-new.rst
---
doc/whats-new.rst | 2 ++
xarray/plot/dataarray_plot.py | 35 +++++---------------
xarray/plot/utils.py | 26 ++++++++++++++-
xarray/tests/test_plot.py | 62 +++++++++++++++++++++++++++++++----
4 files changed, 90 insertions(+), 35 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 350cc2e0efa..3698058cfe8 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -23,6 +23,8 @@ v2023.11.1 (unreleased)
New Features
~~~~~~~~~~~~
+- Use a concise format when plotting datetime arrays. (:pull:`8449`).
+ By `Jimmy Westling `_.
Breaking changes
~~~~~~~~~~~~~~~~
diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py
index 61f2014fbc3..6da97a3faf0 100644
--- a/xarray/plot/dataarray_plot.py
+++ b/xarray/plot/dataarray_plot.py
@@ -27,6 +27,7 @@
_rescale_imshow_rgb,
_resolve_intervals_1dplot,
_resolve_intervals_2dplot,
+ _set_concise_date,
_update_axes,
get_axis,
label_from_attrs,
@@ -525,14 +526,8 @@ def line(
assert hueplt is not None
ax.legend(handles=primitive, labels=list(hueplt.to_numpy()), title=hue_label)
- # Rotate dates on xlabels
- # Do this without calling autofmt_xdate so that x-axes ticks
- # on other subplots (if any) are not deleted.
- # https://stackoverflow.com/questions/17430105/autofmt-xdate-deletes-x-axis-labels-of-all-subplots
if np.issubdtype(xplt.dtype, np.datetime64):
- for xlabels in ax.get_xticklabels():
- xlabels.set_rotation(30)
- xlabels.set_horizontalalignment("right")
+ _set_concise_date(ax, axis="x")
_update_axes(ax, xincrease, yincrease, xscale, yscale, xticks, yticks, xlim, ylim)
@@ -1087,14 +1082,12 @@ def _add_labels(
add_labels: bool | Iterable[bool],
darrays: Iterable[DataArray | None],
suffixes: Iterable[str],
- rotate_labels: Iterable[bool],
ax: Axes,
) -> None:
"""Set x, y, z labels."""
add_labels = [add_labels] * 3 if isinstance(add_labels, bool) else add_labels
- for axis, add_label, darray, suffix, rotate_label in zip(
- ("x", "y", "z"), add_labels, darrays, suffixes, rotate_labels
- ):
+ axes: tuple[Literal["x", "y", "z"], ...] = ("x", "y", "z")
+ for axis, add_label, darray, suffix in zip(axes, add_labels, darrays, suffixes):
if darray is None:
continue
@@ -1103,14 +1096,8 @@ def _add_labels(
if label is not None:
getattr(ax, f"set_{axis}label")(label)
- if rotate_label and np.issubdtype(darray.dtype, np.datetime64):
- # Rotate dates on xlabels
- # Do this without calling autofmt_xdate so that x-axes ticks
- # on other subplots (if any) are not deleted.
- # https://stackoverflow.com/questions/17430105/autofmt-xdate-deletes-x-axis-labels-of-all-subplots
- for labels in getattr(ax, f"get_{axis}ticklabels")():
- labels.set_rotation(30)
- labels.set_horizontalalignment("right")
+ if np.issubdtype(darray.dtype, np.datetime64):
+ _set_concise_date(ax, axis=axis)
@overload
@@ -1265,7 +1252,7 @@ def scatter(
kwargs.update(s=sizeplt.to_numpy().ravel())
plts_or_none = (xplt, yplt, zplt)
- _add_labels(add_labels, plts_or_none, ("", "", ""), (True, False, False), ax)
+ _add_labels(add_labels, plts_or_none, ("", "", ""), ax)
xplt_np = None if xplt is None else xplt.to_numpy().ravel()
yplt_np = None if yplt is None else yplt.to_numpy().ravel()
@@ -1653,14 +1640,8 @@ def newplotfunc(
ax, xincrease, yincrease, xscale, yscale, xticks, yticks, xlim, ylim
)
- # Rotate dates on xlabels
- # Do this without calling autofmt_xdate so that x-axes ticks
- # on other subplots (if any) are not deleted.
- # https://stackoverflow.com/questions/17430105/autofmt-xdate-deletes-x-axis-labels-of-all-subplots
if np.issubdtype(xplt.dtype, np.datetime64):
- for xlabels in ax.get_xticklabels():
- xlabels.set_rotation(30)
- xlabels.set_horizontalalignment("right")
+ _set_concise_date(ax, "x")
return primitive
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index 5694acc06e8..903780b1137 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -6,7 +6,7 @@
from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence
from datetime import datetime
from inspect import getfullargspec
-from typing import TYPE_CHECKING, Any, Callable, overload
+from typing import TYPE_CHECKING, Any, Callable, Literal, overload
import numpy as np
import pandas as pd
@@ -1827,3 +1827,27 @@ def _guess_coords_to_plot(
_assert_valid_xy(darray, dim, k)
return coords_to_plot
+
+
+def _set_concise_date(ax: Axes, axis: Literal["x", "y", "z"] = "x") -> None:
+ """
+ Use ConciseDateFormatter which is meant to improve the
+ strings chosen for the ticklabels, and to minimize the
+ strings used in those tick labels as much as possible.
+
+ https://matplotlib.org/stable/gallery/ticks/date_concise_formatter.html
+
+ Parameters
+ ----------
+ ax : Axes
+ Figure axes.
+ axis : Literal["x", "y", "z"], optional
+ Which axis to make concise. The default is "x".
+ """
+ import matplotlib.dates as mdates
+
+ locator = mdates.AutoDateLocator()
+ formatter = mdates.ConciseDateFormatter(locator)
+ _axis = getattr(ax, f"{axis}axis")
+ _axis.set_major_locator(locator)
+ _axis.set_major_formatter(formatter)
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index 31c23955b02..102d06b0289 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -787,12 +787,17 @@ def test_plot_nans(self) -> None:
self.darray[1] = np.nan
self.darray.plot.line()
- def test_x_ticks_are_rotated_for_time(self) -> None:
+ def test_dates_are_concise(self) -> None:
+ import matplotlib.dates as mdates
+
time = pd.date_range("2000-01-01", "2000-01-10")
a = DataArray(np.arange(len(time)), [("t", time)])
a.plot.line()
- rotation = plt.gca().get_xticklabels()[0].get_rotation()
- assert rotation != 0
+
+ ax = plt.gca()
+
+ assert isinstance(ax.xaxis.get_major_locator(), mdates.AutoDateLocator)
+ assert isinstance(ax.xaxis.get_major_formatter(), mdates.ConciseDateFormatter)
def test_xyincrease_false_changes_axes(self) -> None:
self.darray.plot.line(xincrease=False, yincrease=False)
@@ -1356,12 +1361,17 @@ def test_xyincrease_true_changes_axes(self) -> None:
diffs = xlim[0] - 0, xlim[1] - 14, ylim[0] - 0, ylim[1] - 9
assert all(abs(x) < 1 for x in diffs)
- def test_x_ticks_are_rotated_for_time(self) -> None:
+ def test_dates_are_concise(self) -> None:
+ import matplotlib.dates as mdates
+
time = pd.date_range("2000-01-01", "2000-01-10")
a = DataArray(np.random.randn(2, len(time)), [("xx", [1, 2]), ("t", time)])
- a.plot(x="t")
- rotation = plt.gca().get_xticklabels()[0].get_rotation()
- assert rotation != 0
+ self.plotfunc(a, x="t")
+
+ ax = plt.gca()
+
+ assert isinstance(ax.xaxis.get_major_locator(), mdates.AutoDateLocator)
+ assert isinstance(ax.xaxis.get_major_formatter(), mdates.ConciseDateFormatter)
def test_plot_nans(self) -> None:
x1 = self.darray[:5]
@@ -1888,6 +1898,25 @@ def test_interval_breaks_logspace(self) -> None:
class TestImshow(Common2dMixin, PlotTestCase):
plotfunc = staticmethod(xplt.imshow)
+ @pytest.mark.xfail(
+ reason=(
+ "Failing inside matplotlib. Should probably be fixed upstream because "
+ "other plot functions can handle it. "
+ "Remove this test when it works, already in Common2dMixin"
+ )
+ )
+ def test_dates_are_concise(self) -> None:
+ import matplotlib.dates as mdates
+
+ time = pd.date_range("2000-01-01", "2000-01-10")
+ a = DataArray(np.random.randn(2, len(time)), [("xx", [1, 2]), ("t", time)])
+ self.plotfunc(a, x="t")
+
+ ax = plt.gca()
+
+ assert isinstance(ax.xaxis.get_major_locator(), mdates.AutoDateLocator)
+ assert isinstance(ax.xaxis.get_major_formatter(), mdates.ConciseDateFormatter)
+
@pytest.mark.slow
def test_imshow_called(self) -> None:
# Having both statements ensures the test works properly
@@ -2032,6 +2061,25 @@ class TestSurface(Common2dMixin, PlotTestCase):
plotfunc = staticmethod(xplt.surface)
subplot_kws = {"projection": "3d"}
+ @pytest.mark.xfail(
+ reason=(
+ "Failing inside matplotlib. Should probably be fixed upstream because "
+ "other plot functions can handle it. "
+ "Remove this test when it works, already in Common2dMixin"
+ )
+ )
+ def test_dates_are_concise(self) -> None:
+ import matplotlib.dates as mdates
+
+ time = pd.date_range("2000-01-01", "2000-01-10")
+ a = DataArray(np.random.randn(2, len(time)), [("xx", [1, 2]), ("t", time)])
+ self.plotfunc(a, x="t")
+
+ ax = plt.gca()
+
+ assert isinstance(ax.xaxis.get_major_locator(), mdates.AutoDateLocator)
+ assert isinstance(ax.xaxis.get_major_formatter(), mdates.ConciseDateFormatter)
+
def test_primitive_artist_returned(self) -> None:
artist = self.plotmethod()
assert isinstance(artist, mpl_toolkits.mplot3d.art3d.Poly3DCollection)
From cb14f2fee49210a5d6b18731f9b9b10feee5f909 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Wed, 22 Nov 2023 00:01:12 -0800
Subject: [PATCH 08/58] Fix mypy tests (#8476)
I was seeing an error in #8475
---
xarray/core/nputils.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index 316a77ead6a..bd33b7b6d8f 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -31,7 +31,7 @@
_HAS_NUMBAGG = Version(numbagg.__version__) >= Version("0.5.0")
except ImportError:
# use numpy methods instead
- numbagg = np
+ numbagg = np # type: ignore
_HAS_NUMBAGG = False
From 41b1b8cede2b151c797e5679a6260d02ed71fe26 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Wed, 22 Nov 2023 08:45:02 -0800
Subject: [PATCH 09/58] Allow `rank` to run on dask arrays (#8475)
---
xarray/core/variable.py | 27 ++++++++++++---------------
xarray/tests/test_variable.py | 20 ++++++++++++++++----
2 files changed, 28 insertions(+), 19 deletions(-)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index db109a40454..c2133d55aeb 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -2063,6 +2063,7 @@ def rank(self, dim, pct=False):
--------
Dataset.rank, DataArray.rank
"""
+ # This could / should arguably be implemented at the DataArray & Dataset level
if not OPTIONS["use_bottleneck"]:
raise RuntimeError(
"rank requires bottleneck to be enabled."
@@ -2071,24 +2072,20 @@ def rank(self, dim, pct=False):
import bottleneck as bn
- data = self.data
-
- if is_duck_dask_array(data):
- raise TypeError(
- "rank does not work for arrays stored as dask "
- "arrays. Load the data via .compute() or .load() "
- "prior to calling this method."
- )
- elif not isinstance(data, np.ndarray):
- raise TypeError(f"rank is not implemented for {type(data)} objects.")
-
- axis = self.get_axis_num(dim)
func = bn.nanrankdata if self.dtype.kind == "f" else bn.rankdata
- ranked = func(data, axis=axis)
+ ranked = xr.apply_ufunc(
+ func,
+ self,
+ input_core_dims=[[dim]],
+ output_core_dims=[[dim]],
+ dask="parallelized",
+ kwargs=dict(axis=-1),
+ ).transpose(*self.dims)
+
if pct:
- count = np.sum(~np.isnan(data), axis=axis, keepdims=True)
+ count = self.notnull().sum(dim)
ranked /= count
- return Variable(self.dims, ranked)
+ return ranked
def rolling_window(
self, dim, window, window_dim, center=False, fill_value=dtypes.NA
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 8a73e435977..d91cf85e4eb 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1878,9 +1878,20 @@ def test_quantile_out_of_bounds(self, q):
@requires_dask
@requires_bottleneck
- def test_rank_dask_raises(self):
- v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0]).chunk(2)
- with pytest.raises(TypeError, match=r"arrays stored as dask"):
+ def test_rank_dask(self):
+ # Instead of a single test here, we could parameterize the other tests for both
+ # arrays. But this is sufficient.
+ v = Variable(
+ ["x", "y"], [[30.0, 1.0, np.nan, 20.0, 4.0], [30.0, 1.0, np.nan, 20.0, 4.0]]
+ ).chunk(x=1)
+ expected = Variable(
+ ["x", "y"], [[4.0, 1.0, np.nan, 3.0, 2.0], [4.0, 1.0, np.nan, 3.0, 2.0]]
+ )
+ assert_equal(v.rank("y").compute(), expected)
+
+ with pytest.raises(
+ ValueError, match=r" with dask='parallelized' consists of multiple chunks"
+ ):
v.rank("x")
def test_rank_use_bottleneck(self):
@@ -1912,7 +1923,8 @@ def test_rank(self):
v_expect = Variable(["x"], [0.75, 0.25, np.nan, 0.5, 1.0])
assert_equal(v.rank("x", pct=True), v_expect)
# invalid dim
- with pytest.raises(ValueError, match=r"not found"):
+ with pytest.raises(ValueError):
+ # apply_ufunc error message isn't great here — `ValueError: tuple.index(x): x not in tuple`
v.rank("y")
def test_big_endian_reduce(self):
From 398d8e6c393efe72978ed07ab2d37973771db7b3 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Wed, 22 Nov 2023 10:45:22 -0800
Subject: [PATCH 10/58] Add whatsnew for #8475 (#8478)
Sorry, forgot in the original PR
---
doc/whats-new.rst | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 3698058cfe8..76548fe95c5 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -26,6 +26,11 @@ New Features
- Use a concise format when plotting datetime arrays. (:pull:`8449`).
By `Jimmy Westling `_.
+
+- :py:meth:`~xarray.DataArray.rank` now operates on dask-backed arrays, assuming
+ the core dim has exactly one chunk. (:pull:`8475`).
+ By `Maximilian Roos `_.
+
Breaking changes
~~~~~~~~~~~~~~~~
From 71c2f6199f0aa569409d791dff6ee5e06f8c8665 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Fri, 24 Nov 2023 10:49:37 -0800
Subject: [PATCH 11/58] Improve "variable not found" error message (#8474)
* Improve missing variable error message
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---------
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
doc/whats-new.rst | 3 +++
xarray/core/dataset.py | 10 +++++++++-
xarray/core/formatting.py | 15 ++++++++++++++-
xarray/tests/test_error_messages.py | 17 +++++++++++++++++
4 files changed, 43 insertions(+), 2 deletions(-)
create mode 100644 xarray/tests/test_error_messages.py
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 76548fe95c5..a8302715317 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -46,6 +46,9 @@ Bug fixes
Documentation
~~~~~~~~~~~~~
+- Improved error message when attempting to get a variable which doesn't exist from a Dataset.
+ (:pull:`8474`)
+ By `Maximilian Roos `_.
Internal Changes
~~~~~~~~~~~~~~~~
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index c8e7564d3ca..5d2d24d6723 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1539,10 +1539,18 @@ def __getitem__(
Indexing with a list of names will return a new ``Dataset`` object.
"""
+ from xarray.core.formatting import shorten_list_repr
+
if utils.is_dict_like(key):
return self.isel(**key)
if utils.hashable(key):
- return self._construct_dataarray(key)
+ try:
+ return self._construct_dataarray(key)
+ except KeyError as e:
+ raise KeyError(
+ f"No variable named {key!r}. Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}"
+ ) from e
+
if utils.iterable_of_hashable(key):
return self._copy_listed(key)
raise ValueError(f"Unsupported key-type {type(key)}")
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index a915e9acbf3..ea0e6275fb6 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -6,7 +6,7 @@
import functools
import math
from collections import defaultdict
-from collections.abc import Collection, Hashable
+from collections.abc import Collection, Hashable, Sequence
from datetime import datetime, timedelta
from itertools import chain, zip_longest
from reprlib import recursive_repr
@@ -937,3 +937,16 @@ def diff_dataset_repr(a, b, compat):
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))
return "\n".join(summary)
+
+
+def shorten_list_repr(items: Sequence, max_items: int) -> str:
+ if len(items) <= max_items:
+ return repr(items)
+ else:
+ first_half = repr(items[: max_items // 2])[
+ 1:-1
+ ] # Convert to string and remove brackets
+ second_half = repr(items[-max_items // 2 :])[
+ 1:-1
+ ] # Convert to string and remove brackets
+ return f"[{first_half}, ..., {second_half}]"
diff --git a/xarray/tests/test_error_messages.py b/xarray/tests/test_error_messages.py
new file mode 100644
index 00000000000..b5840aafdfa
--- /dev/null
+++ b/xarray/tests/test_error_messages.py
@@ -0,0 +1,17 @@
+"""
+This new file is intended to test the quality & friendliness of error messages that are
+raised by xarray. It's currently separate from the standard tests, which are more
+focused on the functions working (though we could consider integrating them.).
+"""
+
+import pytest
+
+
+def test_no_var_in_dataset(ds):
+ with pytest.raises(
+ KeyError,
+ match=(
+ r"No variable named 'foo'. Variables on the dataset include \['z1', 'z2', 'x', 'time', 'c', 'y'\]"
+ ),
+ ):
+ ds["foo"]
From dc66f0d2b34754fb2a8d29d8eb635a5b143755ad Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 24 Nov 2023 23:56:18 +0100
Subject: [PATCH 12/58] Fix bug for categorical pandas index with categories
with EA dtype (#8481)
* Fix bug for categorical pandas index with categories with EA dtype
* Add whatsnew
* Update xarray/tests/test_dataset.py
Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com>
---------
Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com>
---
doc/whats-new.rst | 2 ++
xarray/core/utils.py | 2 ++
xarray/tests/test_dataset.py | 11 +++++++++++
3 files changed, 15 insertions(+)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index a8302715317..d92f3239f60 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -42,6 +42,8 @@ Deprecations
Bug fixes
~~~~~~~~~
+- Fix dtype inference for ``pd.CategoricalIndex`` when categories are backed by a ``pd.ExtensionDtype`` (:pull:`8481`)
+
Documentation
~~~~~~~~~~~~~
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index ad86b2c7fec..9ba4a43f6d9 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -114,6 +114,8 @@ def get_valid_numpy_dtype(array: np.ndarray | pd.Index):
elif hasattr(array, "categories"):
# category isn't a real numpy dtype
dtype = array.categories.dtype
+ if not is_valid_numpy_dtype(dtype):
+ dtype = np.dtype("O")
elif not is_valid_numpy_dtype(array.dtype):
dtype = np.dtype("O")
else:
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index ff7703a1cf5..a53d81e36af 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -4697,6 +4697,17 @@ def test_from_dataframe_categorical(self) -> None:
assert len(ds["i1"]) == 2
assert len(ds["i2"]) == 2
+ def test_from_dataframe_categorical_string_categories(self) -> None:
+ cat = pd.CategoricalIndex(
+ pd.Categorical.from_codes(
+ np.array([1, 1, 0, 2]),
+ categories=pd.Index(["foo", "bar", "baz"], dtype="string"),
+ )
+ )
+ ser = pd.Series(1, index=cat)
+ ds = ser.to_xarray()
+ assert ds.coords.dtypes["index"] == np.dtype("O")
+
@requires_sparse
def test_from_dataframe_sparse(self) -> None:
import sparse
From a6837909ded8c3fe2d9ff8655b04d18f46d77ed5 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Sat, 25 Nov 2023 13:06:08 -0800
Subject: [PATCH 13/58] Use numbagg for `ffill` by default (#8389)
* Use `numbagg` for `ffill`
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
* Use duck_array_ops for numbagg version, test import is lazy
* Update xarray/core/duck_array_ops.py
Co-authored-by: Deepak Cherian
* Update xarray/core/nputils.py
Co-authored-by: Deepak Cherian
* Update xarray/core/rolling_exp.py
Co-authored-by: Deepak Cherian
* Update xarray/core/nputils.py
Co-authored-by: Deepak Cherian
---------
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Deepak Cherian
---
doc/whats-new.rst | 4 +++
xarray/backends/zarr.py | 4 +--
xarray/core/dask_array_ops.py | 5 ++--
xarray/core/duck_array_ops.py | 41 +++++++++++++++++++++++++---
xarray/core/missing.py | 12 +--------
xarray/core/nputils.py | 34 +++++++++++-------------
xarray/core/pycompat.py | 5 +++-
xarray/core/rolling_exp.py | 50 ++++++++++++++++++-----------------
xarray/tests/__init__.py | 7 ++++-
xarray/tests/test_missing.py | 24 ++++++++++++-----
xarray/tests/test_plugins.py | 29 ++++++++++----------
11 files changed, 132 insertions(+), 83 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index d92f3239f60..b2efe650e28 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -55,6 +55,10 @@ Documentation
Internal Changes
~~~~~~~~~~~~~~~~
+- :py:meth:`DataArray.bfill` & :py:meth:`DataArray.ffill` now use numbagg by
+ default, which is up to 5x faster where parallelization is possible. (:pull:`8339`)
+ By `Maximilian Roos `_.
+
.. _whats-new.2023.11.0:
v2023.11.0 (Nov 16, 2023)
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 6632e40cf6f..f0eece3bb61 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -177,8 +177,8 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
# DESIGN CHOICE: do not allow multiple dask chunks on a single zarr chunk
# this avoids the need to get involved in zarr synchronization / locking
# From zarr docs:
- # "If each worker in a parallel computation is writing to a separate
- # region of the array, and if region boundaries are perfectly aligned
+ # "If each worker in a parallel computation is writing to a
+ # separate region of the array, and if region boundaries are perfectly aligned
# with chunk boundaries, then no synchronization is required."
# TODO: incorporate synchronizer to allow writes from multiple dask
# threads
diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py
index d2d3e4a6d1c..98ff9002856 100644
--- a/xarray/core/dask_array_ops.py
+++ b/xarray/core/dask_array_ops.py
@@ -59,10 +59,11 @@ def push(array, n, axis):
"""
Dask-aware bottleneck.push
"""
- import bottleneck
import dask.array as da
import numpy as np
+ from xarray.core.duck_array_ops import _push
+
def _fill_with_last_one(a, b):
# cumreduction apply the push func over all the blocks first so, the only missing part is filling
# the missing values using the last data of the previous chunk
@@ -85,7 +86,7 @@ def _fill_with_last_one(a, b):
# The method parameter makes that the tests for python 3.7 fails.
return da.reductions.cumreduction(
- func=bottleneck.push,
+ func=_push,
binop=_fill_with_last_one,
ident=np.nan,
x=array,
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index b9f7db9737f..7f2b2ed85ee 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -31,8 +31,10 @@
from numpy import concatenate as _concatenate
from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined]
from numpy.lib.stride_tricks import sliding_window_view # noqa
+from packaging.version import Version
-from xarray.core import dask_array_ops, dtypes, nputils
+from xarray.core import dask_array_ops, dtypes, nputils, pycompat
+from xarray.core.options import OPTIONS
from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array
from xarray.core.pycompat import array_type, is_duck_dask_array
from xarray.core.utils import is_duck_array, module_available
@@ -688,13 +690,44 @@ def least_squares(lhs, rhs, rcond=None, skipna=False):
return nputils.least_squares(lhs, rhs, rcond=rcond, skipna=skipna)
-def push(array, n, axis):
- from bottleneck import push
+def _push(array, n: int | None = None, axis: int = -1):
+ """
+ Use either bottleneck or numbagg depending on options & what's available
+ """
+
+ if not OPTIONS["use_bottleneck"] and not OPTIONS["use_numbagg"]:
+ raise RuntimeError(
+ "ffill & bfill requires bottleneck or numbagg to be enabled."
+ " Call `xr.set_options(use_bottleneck=True)` or `xr.set_options(use_numbagg=True)` to enable one."
+ )
+ if OPTIONS["use_numbagg"] and module_available("numbagg"):
+ import numbagg
+
+ if pycompat.mod_version("numbagg") < Version("0.6.2"):
+ warnings.warn(
+ f"numbagg >= 0.6.2 is required for bfill & ffill; {pycompat.mod_version('numbagg')} is installed. We'll attempt with bottleneck instead."
+ )
+ else:
+ return numbagg.ffill(array, limit=n, axis=axis)
+
+ # work around for bottleneck 178
+ limit = n if n is not None else array.shape[axis]
+
+ import bottleneck as bn
+
+ return bn.push(array, limit, axis)
+
+def push(array, n, axis):
+ if not OPTIONS["use_bottleneck"] and not OPTIONS["use_numbagg"]:
+ raise RuntimeError(
+ "ffill & bfill requires bottleneck or numbagg to be enabled."
+ " Call `xr.set_options(use_bottleneck=True)` or `xr.set_options(use_numbagg=True)` to enable one."
+ )
if is_duck_dask_array(array):
return dask_array_ops.push(array, n, axis)
else:
- return push(array, n, axis)
+ return _push(array, n, axis)
def _first_last_wrapper(array, *, axis, op, keepdims):
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index 90a9dd2e76c..b55fd6049a6 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -14,7 +14,7 @@
from xarray.core.common import _contains_datetime_like_objects, ones_like
from xarray.core.computation import apply_ufunc
from xarray.core.duck_array_ops import datetime_to_numeric, push, timedelta_to_numeric
-from xarray.core.options import OPTIONS, _get_keep_attrs
+from xarray.core.options import _get_keep_attrs
from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array
from xarray.core.types import Interp1dOptions, InterpOptions
from xarray.core.utils import OrderedSet, is_scalar
@@ -413,11 +413,6 @@ def _bfill(arr, n=None, axis=-1):
def ffill(arr, dim=None, limit=None):
"""forward fill missing values"""
- if not OPTIONS["use_bottleneck"]:
- raise RuntimeError(
- "ffill requires bottleneck to be enabled."
- " Call `xr.set_options(use_bottleneck=True)` to enable it."
- )
axis = arr.get_axis_num(dim)
@@ -436,11 +431,6 @@ def ffill(arr, dim=None, limit=None):
def bfill(arr, dim=None, limit=None):
"""backfill missing values"""
- if not OPTIONS["use_bottleneck"]:
- raise RuntimeError(
- "bfill requires bottleneck to be enabled."
- " Call `xr.set_options(use_bottleneck=True)` to enable it."
- )
axis = arr.get_axis_num(dim)
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index bd33b7b6d8f..96e5548b9b4 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -1,12 +1,16 @@
from __future__ import annotations
import warnings
+from typing import Callable
import numpy as np
import pandas as pd
from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined]
from packaging.version import Version
+from xarray.core import pycompat
+from xarray.core.utils import module_available
+
# remove once numpy 2.0 is the oldest supported version
try:
from numpy.exceptions import RankWarning # type: ignore[attr-defined,unused-ignore]
@@ -25,15 +29,6 @@
bn = np
_BOTTLENECK_AVAILABLE = False
-try:
- import numbagg
-
- _HAS_NUMBAGG = Version(numbagg.__version__) >= Version("0.5.0")
-except ImportError:
- # use numpy methods instead
- numbagg = np # type: ignore
- _HAS_NUMBAGG = False
-
def _select_along_axis(values, idx, axis):
other_ind = np.ix_(*[np.arange(s) for s in idx.shape])
@@ -171,17 +166,16 @@ def __setitem__(self, key, value):
self._array[key] = np.moveaxis(value, vindex_positions, mixed_positions)
-def _create_method(name, npmodule=np):
+def _create_method(name, npmodule=np) -> Callable:
def f(values, axis=None, **kwargs):
dtype = kwargs.get("dtype", None)
bn_func = getattr(bn, name, None)
- nba_func = getattr(numbagg, name, None)
if (
- _HAS_NUMBAGG
+ module_available("numbagg")
+ and pycompat.mod_version("numbagg") >= Version("0.5.0")
and OPTIONS["use_numbagg"]
and isinstance(values, np.ndarray)
- and nba_func is not None
# numbagg uses ddof=1 only, but numpy uses ddof=0 by default
and (("var" in name or "std" in name) and kwargs.get("ddof", 0) == 1)
# TODO: bool?
@@ -189,11 +183,15 @@ def f(values, axis=None, **kwargs):
# and values.dtype.isnative
and (dtype is None or np.dtype(dtype) == values.dtype)
):
- # numbagg does not take care dtype, ddof
- kwargs.pop("dtype", None)
- kwargs.pop("ddof", None)
- result = nba_func(values, axis=axis, **kwargs)
- elif (
+ import numbagg
+
+ nba_func = getattr(numbagg, name, None)
+ if nba_func is not None:
+ # numbagg does not take care dtype, ddof
+ kwargs.pop("dtype", None)
+ kwargs.pop("ddof", None)
+ return nba_func(values, axis=axis, **kwargs)
+ if (
_BOTTLENECK_AVAILABLE
and OPTIONS["use_bottleneck"]
and isinstance(values, np.ndarray)
diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py
index bc8b61164f1..32ef408f7cc 100644
--- a/xarray/core/pycompat.py
+++ b/xarray/core/pycompat.py
@@ -12,7 +12,7 @@
integer_types = (int, np.integer)
if TYPE_CHECKING:
- ModType = Literal["dask", "pint", "cupy", "sparse", "cubed"]
+ ModType = Literal["dask", "pint", "cupy", "sparse", "cubed", "numbagg"]
DuckArrayTypes = tuple[type[Any], ...] # TODO: improve this? maybe Generic
@@ -47,6 +47,9 @@ def __init__(self, mod: ModType) -> None:
duck_array_type = (duck_array_module.SparseArray,)
elif mod == "cubed":
duck_array_type = (duck_array_module.Array,)
+ # Not a duck array module, but using this system regardless, to get lazy imports
+ elif mod == "numbagg":
+ duck_array_type = ()
else:
raise NotImplementedError
diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py
index 04d7dd41966..1e4b805208f 100644
--- a/xarray/core/rolling_exp.py
+++ b/xarray/core/rolling_exp.py
@@ -6,18 +6,12 @@
import numpy as np
from packaging.version import Version
+from xarray.core import pycompat
from xarray.core.computation import apply_ufunc
from xarray.core.options import _get_keep_attrs
from xarray.core.pdcompat import count_not_none
from xarray.core.types import T_DataWithCoords
-
-try:
- import numbagg
- from numbagg import move_exp_nanmean, move_exp_nansum
-
- _NUMBAGG_VERSION: Version | None = Version(numbagg.__version__)
-except ImportError:
- _NUMBAGG_VERSION = None
+from xarray.core.utils import module_available
def _get_alpha(
@@ -83,17 +77,17 @@ def __init__(
window_type: str = "span",
min_weight: float = 0.0,
):
- if _NUMBAGG_VERSION is None:
+ if not module_available("numbagg"):
raise ImportError(
"numbagg >= 0.2.1 is required for rolling_exp but currently numbagg is not installed"
)
- elif _NUMBAGG_VERSION < Version("0.2.1"):
+ elif pycompat.mod_version("numbagg") < Version("0.2.1"):
raise ImportError(
- f"numbagg >= 0.2.1 is required for rolling_exp but currently version {_NUMBAGG_VERSION} is installed"
+ f"numbagg >= 0.2.1 is required for rolling_exp but currently version {pycompat.mod_version('numbagg')} is installed"
)
- elif _NUMBAGG_VERSION < Version("0.3.1") and min_weight > 0:
+ elif pycompat.mod_version("numbagg") < Version("0.3.1") and min_weight > 0:
raise ImportError(
- f"numbagg >= 0.3.1 is required for `min_weight > 0` within `.rolling_exp` but currently version {_NUMBAGG_VERSION} is installed"
+ f"numbagg >= 0.3.1 is required for `min_weight > 0` within `.rolling_exp` but currently version {pycompat.mod_version('numbagg')} is installed"
)
self.obj: T_DataWithCoords = obj
@@ -127,13 +121,15 @@ def mean(self, keep_attrs: bool | None = None) -> T_DataWithCoords:
Dimensions without coordinates: x
"""
+ import numbagg
+
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=True)
dim_order = self.obj.dims
return apply_ufunc(
- move_exp_nanmean,
+ numbagg.move_exp_nanmean,
self.obj,
input_core_dims=[[self.dim]],
kwargs=self.kwargs,
@@ -163,13 +159,15 @@ def sum(self, keep_attrs: bool | None = None) -> T_DataWithCoords:
Dimensions without coordinates: x
"""
+ import numbagg
+
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=True)
dim_order = self.obj.dims
return apply_ufunc(
- move_exp_nansum,
+ numbagg.move_exp_nansum,
self.obj,
input_core_dims=[[self.dim]],
kwargs=self.kwargs,
@@ -194,10 +192,12 @@ def std(self) -> T_DataWithCoords:
Dimensions without coordinates: x
"""
- if _NUMBAGG_VERSION is None or _NUMBAGG_VERSION < Version("0.4.0"):
+ if pycompat.mod_version("numbagg") < Version("0.4.0"):
raise ImportError(
- f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {_NUMBAGG_VERSION} is installed"
+ f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed"
)
+ import numbagg
+
dim_order = self.obj.dims
return apply_ufunc(
@@ -225,12 +225,12 @@ def var(self) -> T_DataWithCoords:
array([ nan, 0. , 0.46153846, 0.18461538, 0.06446281])
Dimensions without coordinates: x
"""
-
- if _NUMBAGG_VERSION is None or _NUMBAGG_VERSION < Version("0.4.0"):
+ if pycompat.mod_version("numbagg") < Version("0.4.0"):
raise ImportError(
- f"numbagg >= 0.4.0 is required for rolling_exp().var(), currently {_NUMBAGG_VERSION} is installed"
+ f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed"
)
dim_order = self.obj.dims
+ import numbagg
return apply_ufunc(
numbagg.move_exp_nanvar,
@@ -258,11 +258,12 @@ def cov(self, other: T_DataWithCoords) -> T_DataWithCoords:
Dimensions without coordinates: x
"""
- if _NUMBAGG_VERSION is None or _NUMBAGG_VERSION < Version("0.4.0"):
+ if pycompat.mod_version("numbagg") < Version("0.4.0"):
raise ImportError(
- f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {_NUMBAGG_VERSION} is installed"
+ f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed"
)
dim_order = self.obj.dims
+ import numbagg
return apply_ufunc(
numbagg.move_exp_nancov,
@@ -291,11 +292,12 @@ def corr(self, other: T_DataWithCoords) -> T_DataWithCoords:
Dimensions without coordinates: x
"""
- if _NUMBAGG_VERSION is None or _NUMBAGG_VERSION < Version("0.4.0"):
+ if pycompat.mod_version("numbagg") < Version("0.4.0"):
raise ImportError(
- f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {_NUMBAGG_VERSION} is installed"
+ f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed"
)
dim_order = self.obj.dims
+ import numbagg
return apply_ufunc(
numbagg.move_exp_nancorr,
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 8b5cf456bcb..f7f8f823d78 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -53,7 +53,8 @@ def _importorskip(
mod = importlib.import_module(modname)
has = True
if minversion is not None:
- if Version(mod.__version__) < Version(minversion):
+ v = getattr(mod, "__version__", "999")
+ if Version(v) < Version(minversion):
raise ImportError("Minimum version not satisfied")
except ImportError:
has = False
@@ -96,6 +97,10 @@ def _importorskip(
requires_scipy_or_netCDF4 = pytest.mark.skipif(
not has_scipy_or_netCDF4, reason="requires scipy or netCDF4"
)
+has_numbagg_or_bottleneck = has_numbagg or has_bottleneck
+requires_numbagg_or_bottleneck = pytest.mark.skipif(
+ not has_scipy_or_netCDF4, reason="requires scipy or netCDF4"
+)
# _importorskip does not work for development versions
has_pandas_version_two = Version(pd.__version__).major >= 2
requires_pandas_version_two = pytest.mark.skipif(
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index e318bf01a7e..20a54c3ed53 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -24,6 +24,8 @@
requires_bottleneck,
requires_cftime,
requires_dask,
+ requires_numbagg,
+ requires_numbagg_or_bottleneck,
requires_scipy,
)
@@ -407,7 +409,7 @@ def test_interpolate_dask_expected_dtype(dtype, method):
assert da.dtype == da.compute().dtype
-@requires_bottleneck
+@requires_numbagg_or_bottleneck
def test_ffill():
da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
expected = xr.DataArray(np.array([4, 5, 5], dtype=np.float64), dims="x")
@@ -415,9 +417,9 @@ def test_ffill():
assert_equal(actual, expected)
-def test_ffill_use_bottleneck():
+def test_ffill_use_bottleneck_numbagg():
da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
- with xr.set_options(use_bottleneck=False):
+ with xr.set_options(use_bottleneck=False, use_numbagg=False):
with pytest.raises(RuntimeError):
da.ffill("x")
@@ -426,14 +428,24 @@ def test_ffill_use_bottleneck():
def test_ffill_use_bottleneck_dask():
da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
da = da.chunk({"x": 1})
- with xr.set_options(use_bottleneck=False):
+ with xr.set_options(use_bottleneck=False, use_numbagg=False):
with pytest.raises(RuntimeError):
da.ffill("x")
+@requires_numbagg
+@requires_dask
+def test_ffill_use_numbagg_dask():
+ with xr.set_options(use_bottleneck=False):
+ da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
+ da = da.chunk(x=-1)
+ # Succeeds with a single chunk:
+ _ = da.ffill("x").compute()
+
+
def test_bfill_use_bottleneck():
da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
- with xr.set_options(use_bottleneck=False):
+ with xr.set_options(use_bottleneck=False, use_numbagg=False):
with pytest.raises(RuntimeError):
da.bfill("x")
@@ -442,7 +454,7 @@ def test_bfill_use_bottleneck():
def test_bfill_use_bottleneck_dask():
da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
da = da.chunk({"x": 1})
- with xr.set_options(use_bottleneck=False):
+ with xr.set_options(use_bottleneck=False, use_numbagg=False):
with pytest.raises(RuntimeError):
da.bfill("x")
diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py
index 1af255d30bb..b518c973d3a 100644
--- a/xarray/tests/test_plugins.py
+++ b/xarray/tests/test_plugins.py
@@ -218,28 +218,29 @@ def test_lazy_import() -> None:
When importing xarray these should not be imported as well.
Only when running code for the first time that requires them.
"""
- blacklisted = [
+ deny_list = [
+ "cubed",
+ "cupy",
+ # "dask", # TODO: backends.locks is not lazy yet :(
+ "dask.array",
+ "dask.distributed",
+ "flox",
"h5netcdf",
+ "matplotlib",
+ "nc_time_axis",
"netCDF4",
- "pydap",
"Nio",
+ "numbagg",
+ "pint",
+ "pydap",
"scipy",
- "zarr",
- "matplotlib",
- "nc_time_axis",
- "flox",
- # "dask", # TODO: backends.locks is not lazy yet :(
- "dask.array",
- "dask.distributed",
"sparse",
- "cupy",
- "pint",
- "cubed",
+ "zarr",
]
# ensure that none of the above modules has been imported before
modules_backup = {}
for pkg in list(sys.modules.keys()):
- for mod in blacklisted + ["xarray"]:
+ for mod in deny_list + ["xarray"]:
if pkg.startswith(mod):
modules_backup[pkg] = sys.modules[pkg]
del sys.modules[pkg]
@@ -255,7 +256,7 @@ def test_lazy_import() -> None:
# lazy loaded are loaded when importing xarray
is_imported = set()
for pkg in sys.modules:
- for mod in blacklisted:
+ for mod in deny_list:
if pkg.startswith(mod):
is_imported.add(mod)
break
From 633e66a64e364c42b59294bd5ce60c6627a18d25 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Sat, 25 Nov 2023 13:55:19 -0800
Subject: [PATCH 14/58] Refine rolling_exp error messages (#8485)
(Sorry, copy & pasted too liberally!)
---
xarray/core/rolling_exp.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py
index 1e4b805208f..144e26a86b2 100644
--- a/xarray/core/rolling_exp.py
+++ b/xarray/core/rolling_exp.py
@@ -227,7 +227,7 @@ def var(self) -> T_DataWithCoords:
"""
if pycompat.mod_version("numbagg") < Version("0.4.0"):
raise ImportError(
- f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed"
+ f"numbagg >= 0.4.0 is required for rolling_exp().var(), currently {pycompat.mod_version('numbagg')} is installed"
)
dim_order = self.obj.dims
import numbagg
@@ -260,7 +260,7 @@ def cov(self, other: T_DataWithCoords) -> T_DataWithCoords:
if pycompat.mod_version("numbagg") < Version("0.4.0"):
raise ImportError(
- f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed"
+ f"numbagg >= 0.4.0 is required for rolling_exp().cov(), currently {pycompat.mod_version('numbagg')} is installed"
)
dim_order = self.obj.dims
import numbagg
@@ -294,7 +294,7 @@ def corr(self, other: T_DataWithCoords) -> T_DataWithCoords:
if pycompat.mod_version("numbagg") < Version("0.4.0"):
raise ImportError(
- f"numbagg >= 0.4.0 is required for rolling_exp().std(), currently {pycompat.mod_version('numbagg')} is installed"
+ f"numbagg >= 0.4.0 is required for rolling_exp().corr(), currently {pycompat.mod_version('numbagg')} is installed"
)
dim_order = self.obj.dims
import numbagg
From d54c461c0af9f7d0945862ebc9dec1a3b0eacca6 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Mon, 27 Nov 2023 12:56:56 -0800
Subject: [PATCH 15/58] Fix Zarr region transpose (#8484)
* Fix Zarr region transpose
This wasn't working on an unregion-ed write; I think because `new_var` was being lost.
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---------
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
doc/whats-new.rst | 2 ++
xarray/backends/zarr.py | 8 +++-----
xarray/tests/test_backends.py | 9 +++++++--
3 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index b2efe650e28..71cbc1a08ee 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -43,6 +43,8 @@ Bug fixes
~~~~~~~~~
- Fix dtype inference for ``pd.CategoricalIndex`` when categories are backed by a ``pd.ExtensionDtype`` (:pull:`8481`)
+- Fix writing a variable that requires transposing when not writing to a region (:pull:`8484`)
+ By `Maximilian Roos `_.
Documentation
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index f0eece3bb61..7f1af10b45a 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -624,12 +624,10 @@ def store(
variables_encoded.update(vars_with_encoding)
for var_name in existing_variable_names:
- new_var = variables_encoded[var_name]
- existing_var = existing_vars[var_name]
- new_var = _validate_and_transpose_existing_dims(
+ variables_encoded[var_name] = _validate_and_transpose_existing_dims(
var_name,
- new_var,
- existing_var,
+ variables_encoded[var_name],
+ existing_vars[var_name],
self._write_region,
self._append_dim,
)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 85248b5c40a..0704dd835c0 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -5423,7 +5423,7 @@ def test_zarr_region_append(self, tmp_path):
@requires_zarr
-def test_zarr_region_transpose(tmp_path):
+def test_zarr_region(tmp_path):
x = np.arange(0, 50, 10)
y = np.arange(0, 20, 2)
data = np.ones((5, 10))
@@ -5438,7 +5438,12 @@ def test_zarr_region_transpose(tmp_path):
)
ds.to_zarr(tmp_path / "test.zarr")
- ds_region = 1 + ds.isel(x=[0], y=[0]).transpose()
+ ds_transposed = ds.transpose("y", "x")
+
+ ds_region = 1 + ds_transposed.isel(x=[0], y=[0])
ds_region.to_zarr(
tmp_path / "test.zarr", region={"x": slice(0, 1), "y": slice(0, 1)}
)
+
+ # Write without region
+ ds_transposed.to_zarr(tmp_path / "test.zarr", mode="r+")
From d3a15274b41810efc656bc4aeec0e1955cf2be32 Mon Sep 17 00:00:00 2001
From: Max Jones <14077947+maxrjones@users.noreply.github.com>
Date: Tue, 28 Nov 2023 01:29:43 -0500
Subject: [PATCH 16/58] Reduce redundancy between namedarray and variable tests
(#8405)
Co-authored-by: Deepak Cherian
Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
---
xarray/tests/test_namedarray.py | 788 +++++++++++++++++---------------
xarray/tests/test_variable.py | 44 +-
2 files changed, 420 insertions(+), 412 deletions(-)
diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py
index e0141e12755..fcdf063d106 100644
--- a/xarray/tests/test_namedarray.py
+++ b/xarray/tests/test_namedarray.py
@@ -2,6 +2,7 @@
import copy
import warnings
+from abc import abstractmethod
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Generic, cast, overload
@@ -57,387 +58,420 @@ def __array_namespace__(self) -> ModuleType:
return np
-@pytest.fixture
-def random_inputs() -> np.ndarray[Any, np.dtype[np.float32]]:
- return np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5))
-
-
-def test_namedarray_init() -> None:
- dtype = np.dtype(np.int8)
- expected = np.array([1, 2], dtype=dtype)
- actual: NamedArray[Any, np.dtype[np.int8]]
- actual = NamedArray(("x",), expected)
- assert np.array_equal(np.asarray(actual.data), expected)
-
- with pytest.raises(AttributeError):
- expected2 = [1, 2]
- actual2: NamedArray[Any, Any]
- actual2 = NamedArray(("x",), expected2) # type: ignore[arg-type]
- assert np.array_equal(np.asarray(actual2.data), expected2)
-
-
-@pytest.mark.parametrize(
- "dims, data, expected, raise_error",
- [
- (("x",), [1, 2, 3], np.array([1, 2, 3]), False),
- ((1,), np.array([4, 5, 6]), np.array([4, 5, 6]), False),
- ((), 2, np.array(2), False),
- # Fail:
- (("x",), NamedArray("time", np.array([1, 2, 3])), np.array([1, 2, 3]), True),
- ],
-)
-def test_from_array(
- dims: _DimsLike,
- data: ArrayLike,
- expected: np.ndarray[Any, Any],
- raise_error: bool,
-) -> None:
- actual: NamedArray[Any, Any]
- if raise_error:
- with pytest.raises(TypeError, match="already a Named array"):
- actual = from_array(dims, data)
-
- # Named arrays are not allowed:
- from_array(actual) # type: ignore[call-overload]
- else:
- actual = from_array(dims, data)
-
+class NamedArraySubclassobjects:
+ @pytest.fixture
+ def target(self, data: np.ndarray[Any, Any]) -> Any:
+ """Fixture that needs to be overridden"""
+ raise NotImplementedError
+
+ @abstractmethod
+ def cls(self, *args: Any, **kwargs: Any) -> Any:
+ """Method that needs to be overridden"""
+ raise NotImplementedError
+
+ @pytest.fixture
+ def data(self) -> np.ndarray[Any, np.dtype[Any]]:
+ return 0.5 * np.arange(10).reshape(2, 5)
+
+ @pytest.fixture
+ def random_inputs(self) -> np.ndarray[Any, np.dtype[np.float32]]:
+ return np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5))
+
+ def test_properties(self, target: Any, data: Any) -> None:
+ assert target.dims == ("x", "y")
+ assert np.array_equal(target.data, data)
+ assert target.dtype == float
+ assert target.shape == (2, 5)
+ assert target.ndim == 2
+ assert target.sizes == {"x": 2, "y": 5}
+ assert target.size == 10
+ assert target.nbytes == 80
+ assert len(target) == 2
+
+ def test_attrs(self, target: Any) -> None:
+ assert target.attrs == {}
+ attrs = {"foo": "bar"}
+ target.attrs = attrs
+ assert target.attrs == attrs
+ assert isinstance(target.attrs, dict)
+ target.attrs["foo"] = "baz"
+ assert target.attrs["foo"] == "baz"
+
+ @pytest.mark.parametrize(
+ "expected", [np.array([1, 2], dtype=np.dtype(np.int8)), [1, 2]]
+ )
+ def test_init(self, expected: Any) -> None:
+ actual = self.cls(("x",), expected)
assert np.array_equal(np.asarray(actual.data), expected)
+ actual = self.cls(("x",), expected)
+ assert np.array_equal(np.asarray(actual.data), expected)
-def test_from_array_with_masked_array() -> None:
- masked_array: np.ndarray[Any, np.dtype[np.generic]]
- masked_array = np.ma.array([1, 2, 3], mask=[False, True, False]) # type: ignore[no-untyped-call]
- with pytest.raises(NotImplementedError):
- from_array(("x",), masked_array)
-
-
-def test_from_array_with_0d_object() -> None:
- data = np.empty((), dtype=object)
- data[()] = (10, 12, 12)
- narr = from_array((), data)
- np.array_equal(np.asarray(narr.data), data)
-
-
-# TODO: Make xr.core.indexing.ExplicitlyIndexed pass as a subclass of_arrayfunction_or_api
-# and remove this test.
-def test_from_array_with_explicitly_indexed(
- random_inputs: np.ndarray[Any, Any]
-) -> None:
- array: CustomArray[Any, Any]
- array = CustomArray(random_inputs)
- output: NamedArray[Any, Any]
- output = from_array(("x", "y", "z"), array)
- assert isinstance(output.data, np.ndarray)
-
- array2: CustomArrayIndexable[Any, Any]
- array2 = CustomArrayIndexable(random_inputs)
- output2: NamedArray[Any, Any]
- output2 = from_array(("x", "y", "z"), array2)
- assert isinstance(output2.data, CustomArrayIndexable)
-
-
-def test_properties() -> None:
- data = 0.5 * np.arange(10).reshape(2, 5)
- named_array: NamedArray[Any, Any]
- named_array = NamedArray(["x", "y"], data, {"key": "value"})
- assert named_array.dims == ("x", "y")
- assert np.array_equal(np.asarray(named_array.data), data)
- assert named_array.attrs == {"key": "value"}
- assert named_array.ndim == 2
- assert named_array.sizes == {"x": 2, "y": 5}
- assert named_array.size == 10
- assert named_array.nbytes == 80
- assert len(named_array) == 2
-
-
-def test_attrs() -> None:
- named_array: NamedArray[Any, Any]
- named_array = NamedArray(["x", "y"], np.arange(10).reshape(2, 5))
- assert named_array.attrs == {}
- named_array.attrs["key"] = "value"
- assert named_array.attrs == {"key": "value"}
- named_array.attrs = {"key": "value2"}
- assert named_array.attrs == {"key": "value2"}
-
-
-def test_data(random_inputs: np.ndarray[Any, Any]) -> None:
- named_array: NamedArray[Any, Any]
- named_array = NamedArray(["x", "y", "z"], random_inputs)
- assert np.array_equal(np.asarray(named_array.data), random_inputs)
- with pytest.raises(ValueError):
- named_array.data = np.random.random((3, 4)).astype(np.float64)
-
-
-def test_real_and_imag() -> None:
- expected_real: np.ndarray[Any, np.dtype[np.float64]]
- expected_real = np.arange(3, dtype=np.float64)
-
- expected_imag: np.ndarray[Any, np.dtype[np.float64]]
- expected_imag = -np.arange(3, dtype=np.float64)
-
- arr: np.ndarray[Any, np.dtype[np.complex128]]
- arr = expected_real + 1j * expected_imag
-
- named_array: NamedArray[Any, np.dtype[np.complex128]]
- named_array = NamedArray(["x"], arr)
-
- actual_real: duckarray[Any, np.dtype[np.float64]] = named_array.real.data
- assert np.array_equal(np.asarray(actual_real), expected_real)
- assert actual_real.dtype == expected_real.dtype
-
- actual_imag: duckarray[Any, np.dtype[np.float64]] = named_array.imag.data
- assert np.array_equal(np.asarray(actual_imag), expected_imag)
- assert actual_imag.dtype == expected_imag.dtype
-
-
-# Additional tests as per your original class-based code
-@pytest.mark.parametrize(
- "data, dtype",
- [
- ("foo", np.dtype("U3")),
- (b"foo", np.dtype("S3")),
- ],
-)
-def test_0d_string(data: Any, dtype: DTypeLike) -> None:
- named_array: NamedArray[Any, Any]
- named_array = from_array([], data)
- assert named_array.data == data
- assert named_array.dims == ()
- assert named_array.sizes == {}
- assert named_array.attrs == {}
- assert named_array.ndim == 0
- assert named_array.size == 1
- assert named_array.dtype == dtype
-
-
-def test_0d_object() -> None:
- named_array: NamedArray[Any, Any]
- named_array = from_array([], (10, 12, 12))
- expected_data = np.empty((), dtype=object)
- expected_data[()] = (10, 12, 12)
- assert np.array_equal(np.asarray(named_array.data), expected_data)
-
- assert named_array.dims == ()
- assert named_array.sizes == {}
- assert named_array.attrs == {}
- assert named_array.ndim == 0
- assert named_array.size == 1
- assert named_array.dtype == np.dtype("O")
-
-
-def test_0d_datetime() -> None:
- named_array: NamedArray[Any, Any]
- named_array = from_array([], np.datetime64("2000-01-01"))
- assert named_array.dtype == np.dtype("datetime64[D]")
-
-
-@pytest.mark.parametrize(
- "timedelta, expected_dtype",
- [
- (np.timedelta64(1, "D"), np.dtype("timedelta64[D]")),
- (np.timedelta64(1, "s"), np.dtype("timedelta64[s]")),
- (np.timedelta64(1, "m"), np.dtype("timedelta64[m]")),
- (np.timedelta64(1, "h"), np.dtype("timedelta64[h]")),
- (np.timedelta64(1, "us"), np.dtype("timedelta64[us]")),
- (np.timedelta64(1, "ns"), np.dtype("timedelta64[ns]")),
- (np.timedelta64(1, "ps"), np.dtype("timedelta64[ps]")),
- (np.timedelta64(1, "fs"), np.dtype("timedelta64[fs]")),
- (np.timedelta64(1, "as"), np.dtype("timedelta64[as]")),
- ],
-)
-def test_0d_timedelta(
- timedelta: np.timedelta64, expected_dtype: np.dtype[np.timedelta64]
-) -> None:
- named_array: NamedArray[Any, Any]
- named_array = from_array([], timedelta)
- assert named_array.dtype == expected_dtype
- assert named_array.data == timedelta
-
-
-@pytest.mark.parametrize(
- "dims, data_shape, new_dims, raises",
- [
- (["x", "y", "z"], (2, 3, 4), ["a", "b", "c"], False),
- (["x", "y", "z"], (2, 3, 4), ["a", "b"], True),
- (["x", "y", "z"], (2, 4, 5), ["a", "b", "c", "d"], True),
- ([], [], (), False),
- ([], [], ("x",), True),
- ],
-)
-def test_dims_setter(dims: Any, data_shape: Any, new_dims: Any, raises: bool) -> None:
- named_array: NamedArray[Any, Any]
- named_array = NamedArray(dims, np.asarray(np.random.random(data_shape)))
- assert named_array.dims == tuple(dims)
- if raises:
+ def test_data(self, random_inputs: Any) -> None:
+ expected = self.cls(["x", "y", "z"], random_inputs)
+ assert np.array_equal(np.asarray(expected.data), random_inputs)
with pytest.raises(ValueError):
- named_array.dims = new_dims
- else:
- named_array.dims = new_dims
- assert named_array.dims == tuple(new_dims)
-
-
-def test_duck_array_class() -> None:
- def test_duck_array_typevar(a: duckarray[Any, _DType]) -> duckarray[Any, _DType]:
- # Mypy checks a is valid:
- b: duckarray[Any, _DType] = a
-
- # Runtime check if valid:
- if isinstance(b, _arrayfunction_or_api):
- return b
+ expected.data = np.random.random((3, 4)).astype(np.float64)
+ d2 = np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5))
+ expected.data = d2
+ assert np.array_equal(np.asarray(expected.data), d2)
+
+
+class TestNamedArray(NamedArraySubclassobjects):
+ def cls(self, *args: Any, **kwargs: Any) -> NamedArray[Any, Any]:
+ return NamedArray(*args, **kwargs)
+
+ @pytest.fixture
+ def target(self, data: np.ndarray[Any, Any]) -> NamedArray[Any, Any]:
+ return NamedArray(["x", "y"], data)
+
+ @pytest.mark.parametrize(
+ "expected",
+ [
+ np.array([1, 2], dtype=np.dtype(np.int8)),
+ pytest.param(
+ [1, 2],
+ marks=pytest.mark.xfail(
+ reason="NamedArray only supports array-like objects"
+ ),
+ ),
+ ],
+ )
+ def test_init(self, expected: Any) -> None:
+ super().test_init(expected)
+
+ @pytest.mark.parametrize(
+ "dims, data, expected, raise_error",
+ [
+ (("x",), [1, 2, 3], np.array([1, 2, 3]), False),
+ ((1,), np.array([4, 5, 6]), np.array([4, 5, 6]), False),
+ ((), 2, np.array(2), False),
+ # Fail:
+ (
+ ("x",),
+ NamedArray("time", np.array([1, 2, 3])),
+ np.array([1, 2, 3]),
+ True,
+ ),
+ ],
+ )
+ def test_from_array(
+ self,
+ dims: _DimsLike,
+ data: ArrayLike,
+ expected: np.ndarray[Any, Any],
+ raise_error: bool,
+ ) -> None:
+ actual: NamedArray[Any, Any]
+ if raise_error:
+ with pytest.raises(TypeError, match="already a Named array"):
+ actual = from_array(dims, data)
+
+ # Named arrays are not allowed:
+ from_array(actual) # type: ignore[call-overload]
else:
- raise TypeError(f"a ({type(a)}) is not a valid _arrayfunction or _arrayapi")
-
- numpy_a: NDArray[np.int64]
- numpy_a = np.array([2.1, 4], dtype=np.dtype(np.int64))
- test_duck_array_typevar(numpy_a)
-
- masked_a: np.ma.MaskedArray[Any, np.dtype[np.int64]]
- masked_a = np.ma.asarray([2.1, 4], dtype=np.dtype(np.int64)) # type: ignore[no-untyped-call]
- test_duck_array_typevar(masked_a)
-
- custom_a: CustomArrayIndexable[Any, np.dtype[np.int64]]
- custom_a = CustomArrayIndexable(numpy_a)
- test_duck_array_typevar(custom_a)
-
- # Test numpy's array api:
- with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore",
- r"The numpy.array_api submodule is still experimental",
- category=UserWarning,
- )
- import numpy.array_api as nxp
-
- # TODO: nxp doesn't use dtype typevars, so can only use Any for the moment:
- arrayapi_a: duckarray[Any, Any] # duckarray[Any, np.dtype[np.int64]]
- arrayapi_a = nxp.asarray([2.1, 4], dtype=np.dtype(np.int64))
- test_duck_array_typevar(arrayapi_a)
-
-
-def test_new_namedarray() -> None:
- dtype_float = np.dtype(np.float32)
- narr_float: NamedArray[Any, np.dtype[np.float32]]
- narr_float = NamedArray(("x",), np.array([1.5, 3.2], dtype=dtype_float))
- assert narr_float.dtype == dtype_float
-
- dtype_int = np.dtype(np.int8)
- narr_int: NamedArray[Any, np.dtype[np.int8]]
- narr_int = narr_float._new(("x",), np.array([1, 3], dtype=dtype_int))
- assert narr_int.dtype == dtype_int
-
- # Test with a subclass:
- class Variable(
- NamedArray[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co]
- ):
- @overload
- def _new(
- self,
- dims: _DimsLike | Default = ...,
- data: duckarray[Any, _DType] = ...,
- attrs: _AttrsLike | Default = ...,
- ) -> Variable[Any, _DType]:
- ...
-
- @overload
- def _new(
- self,
- dims: _DimsLike | Default = ...,
- data: Default = ...,
- attrs: _AttrsLike | Default = ...,
- ) -> Variable[_ShapeType_co, _DType_co]:
- ...
-
- def _new(
- self,
- dims: _DimsLike | Default = _default,
- data: duckarray[Any, _DType] | Default = _default,
- attrs: _AttrsLike | Default = _default,
- ) -> Variable[Any, _DType] | Variable[_ShapeType_co, _DType_co]:
- dims_ = copy.copy(self._dims) if dims is _default else dims
-
- attrs_: Mapping[Any, Any] | None
- if attrs is _default:
- attrs_ = None if self._attrs is None else self._attrs.copy()
- else:
- attrs_ = attrs
-
- if data is _default:
- return type(self)(dims_, copy.copy(self._data), attrs_)
- else:
- cls_ = cast("type[Variable[Any, _DType]]", type(self))
- return cls_(dims_, data, attrs_)
-
- var_float: Variable[Any, np.dtype[np.float32]]
- var_float = Variable(("x",), np.array([1.5, 3.2], dtype=dtype_float))
- assert var_float.dtype == dtype_float
-
- var_int: Variable[Any, np.dtype[np.int8]]
- var_int = var_float._new(("x",), np.array([1, 3], dtype=dtype_int))
- assert var_int.dtype == dtype_int
-
-
-def test_replace_namedarray() -> None:
- dtype_float = np.dtype(np.float32)
- np_val: np.ndarray[Any, np.dtype[np.float32]]
- np_val = np.array([1.5, 3.2], dtype=dtype_float)
- np_val2: np.ndarray[Any, np.dtype[np.float32]]
- np_val2 = 2 * np_val
-
- narr_float: NamedArray[Any, np.dtype[np.float32]]
- narr_float = NamedArray(("x",), np_val)
- assert narr_float.dtype == dtype_float
-
- narr_float2: NamedArray[Any, np.dtype[np.float32]]
- narr_float2 = NamedArray(("x",), np_val2)
- assert narr_float2.dtype == dtype_float
-
- # Test with a subclass:
- class Variable(
- NamedArray[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co]
- ):
- @overload
- def _new(
- self,
- dims: _DimsLike | Default = ...,
- data: duckarray[Any, _DType] = ...,
- attrs: _AttrsLike | Default = ...,
- ) -> Variable[Any, _DType]:
- ...
-
- @overload
- def _new(
- self,
- dims: _DimsLike | Default = ...,
- data: Default = ...,
- attrs: _AttrsLike | Default = ...,
- ) -> Variable[_ShapeType_co, _DType_co]:
- ...
-
- def _new(
- self,
- dims: _DimsLike | Default = _default,
- data: duckarray[Any, _DType] | Default = _default,
- attrs: _AttrsLike | Default = _default,
- ) -> Variable[Any, _DType] | Variable[_ShapeType_co, _DType_co]:
- dims_ = copy.copy(self._dims) if dims is _default else dims
-
- attrs_: Mapping[Any, Any] | None
- if attrs is _default:
- attrs_ = None if self._attrs is None else self._attrs.copy()
- else:
- attrs_ = attrs
+ actual = from_array(dims, data)
- if data is _default:
- return type(self)(dims_, copy.copy(self._data), attrs_)
+ assert np.array_equal(np.asarray(actual.data), expected)
+
+ def test_from_array_with_masked_array(self) -> None:
+ masked_array: np.ndarray[Any, np.dtype[np.generic]]
+ masked_array = np.ma.array([1, 2, 3], mask=[False, True, False]) # type: ignore[no-untyped-call]
+ with pytest.raises(NotImplementedError):
+ from_array(("x",), masked_array)
+
+ def test_from_array_with_0d_object(self) -> None:
+ data = np.empty((), dtype=object)
+ data[()] = (10, 12, 12)
+ narr = from_array((), data)
+ np.array_equal(np.asarray(narr.data), data)
+
+ # TODO: Make xr.core.indexing.ExplicitlyIndexed pass as a subclass of_arrayfunction_or_api
+ # and remove this test.
+ def test_from_array_with_explicitly_indexed(
+ self, random_inputs: np.ndarray[Any, Any]
+ ) -> None:
+ array: CustomArray[Any, Any]
+ array = CustomArray(random_inputs)
+ output: NamedArray[Any, Any]
+ output = from_array(("x", "y", "z"), array)
+ assert isinstance(output.data, np.ndarray)
+
+ array2: CustomArrayIndexable[Any, Any]
+ array2 = CustomArrayIndexable(random_inputs)
+ output2: NamedArray[Any, Any]
+ output2 = from_array(("x", "y", "z"), array2)
+ assert isinstance(output2.data, CustomArrayIndexable)
+
+ def test_real_and_imag(self) -> None:
+ expected_real: np.ndarray[Any, np.dtype[np.float64]]
+ expected_real = np.arange(3, dtype=np.float64)
+
+ expected_imag: np.ndarray[Any, np.dtype[np.float64]]
+ expected_imag = -np.arange(3, dtype=np.float64)
+
+ arr: np.ndarray[Any, np.dtype[np.complex128]]
+ arr = expected_real + 1j * expected_imag
+
+ named_array: NamedArray[Any, np.dtype[np.complex128]]
+ named_array = NamedArray(["x"], arr)
+
+ actual_real: duckarray[Any, np.dtype[np.float64]] = named_array.real.data
+ assert np.array_equal(np.asarray(actual_real), expected_real)
+ assert actual_real.dtype == expected_real.dtype
+
+ actual_imag: duckarray[Any, np.dtype[np.float64]] = named_array.imag.data
+ assert np.array_equal(np.asarray(actual_imag), expected_imag)
+ assert actual_imag.dtype == expected_imag.dtype
+
+ # Additional tests as per your original class-based code
+ @pytest.mark.parametrize(
+ "data, dtype",
+ [
+ ("foo", np.dtype("U3")),
+ (b"foo", np.dtype("S3")),
+ ],
+ )
+ def test_from_array_0d_string(self, data: Any, dtype: DTypeLike) -> None:
+ named_array: NamedArray[Any, Any]
+ named_array = from_array([], data)
+ assert named_array.data == data
+ assert named_array.dims == ()
+ assert named_array.sizes == {}
+ assert named_array.attrs == {}
+ assert named_array.ndim == 0
+ assert named_array.size == 1
+ assert named_array.dtype == dtype
+
+ def test_from_array_0d_object(self) -> None:
+ named_array: NamedArray[Any, Any]
+ named_array = from_array([], (10, 12, 12))
+ expected_data = np.empty((), dtype=object)
+ expected_data[()] = (10, 12, 12)
+ assert np.array_equal(np.asarray(named_array.data), expected_data)
+
+ assert named_array.dims == ()
+ assert named_array.sizes == {}
+ assert named_array.attrs == {}
+ assert named_array.ndim == 0
+ assert named_array.size == 1
+ assert named_array.dtype == np.dtype("O")
+
+ def test_from_array_0d_datetime(self) -> None:
+ named_array: NamedArray[Any, Any]
+ named_array = from_array([], np.datetime64("2000-01-01"))
+ assert named_array.dtype == np.dtype("datetime64[D]")
+
+ @pytest.mark.parametrize(
+ "timedelta, expected_dtype",
+ [
+ (np.timedelta64(1, "D"), np.dtype("timedelta64[D]")),
+ (np.timedelta64(1, "s"), np.dtype("timedelta64[s]")),
+ (np.timedelta64(1, "m"), np.dtype("timedelta64[m]")),
+ (np.timedelta64(1, "h"), np.dtype("timedelta64[h]")),
+ (np.timedelta64(1, "us"), np.dtype("timedelta64[us]")),
+ (np.timedelta64(1, "ns"), np.dtype("timedelta64[ns]")),
+ (np.timedelta64(1, "ps"), np.dtype("timedelta64[ps]")),
+ (np.timedelta64(1, "fs"), np.dtype("timedelta64[fs]")),
+ (np.timedelta64(1, "as"), np.dtype("timedelta64[as]")),
+ ],
+ )
+ def test_from_array_0d_timedelta(
+ self, timedelta: np.timedelta64, expected_dtype: np.dtype[np.timedelta64]
+ ) -> None:
+ named_array: NamedArray[Any, Any]
+ named_array = from_array([], timedelta)
+ assert named_array.dtype == expected_dtype
+ assert named_array.data == timedelta
+
+ @pytest.mark.parametrize(
+ "dims, data_shape, new_dims, raises",
+ [
+ (["x", "y", "z"], (2, 3, 4), ["a", "b", "c"], False),
+ (["x", "y", "z"], (2, 3, 4), ["a", "b"], True),
+ (["x", "y", "z"], (2, 4, 5), ["a", "b", "c", "d"], True),
+ ([], [], (), False),
+ ([], [], ("x",), True),
+ ],
+ )
+ def test_dims_setter(
+ self, dims: Any, data_shape: Any, new_dims: Any, raises: bool
+ ) -> None:
+ named_array: NamedArray[Any, Any]
+ named_array = NamedArray(dims, np.asarray(np.random.random(data_shape)))
+ assert named_array.dims == tuple(dims)
+ if raises:
+ with pytest.raises(ValueError):
+ named_array.dims = new_dims
+ else:
+ named_array.dims = new_dims
+ assert named_array.dims == tuple(new_dims)
+
+ def test_duck_array_class(
+ self,
+ ) -> None:
+ def test_duck_array_typevar(
+ a: duckarray[Any, _DType]
+ ) -> duckarray[Any, _DType]:
+ # Mypy checks a is valid:
+ b: duckarray[Any, _DType] = a
+
+ # Runtime check if valid:
+ if isinstance(b, _arrayfunction_or_api):
+ return b
else:
- cls_ = cast("type[Variable[Any, _DType]]", type(self))
- return cls_(dims_, data, attrs_)
-
- var_float: Variable[Any, np.dtype[np.float32]]
- var_float = Variable(("x",), np_val)
- assert var_float.dtype == dtype_float
-
- var_float2: Variable[Any, np.dtype[np.float32]]
- var_float2 = var_float._replace(("x",), np_val2)
- assert var_float2.dtype == dtype_float
+ raise TypeError(
+ f"a ({type(a)}) is not a valid _arrayfunction or _arrayapi"
+ )
+
+ numpy_a: NDArray[np.int64]
+ numpy_a = np.array([2.1, 4], dtype=np.dtype(np.int64))
+ test_duck_array_typevar(numpy_a)
+
+ masked_a: np.ma.MaskedArray[Any, np.dtype[np.int64]]
+ masked_a = np.ma.asarray([2.1, 4], dtype=np.dtype(np.int64)) # type: ignore[no-untyped-call]
+ test_duck_array_typevar(masked_a)
+
+ custom_a: CustomArrayIndexable[Any, np.dtype[np.int64]]
+ custom_a = CustomArrayIndexable(numpy_a)
+ test_duck_array_typevar(custom_a)
+
+ # Test numpy's array api:
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ r"The numpy.array_api submodule is still experimental",
+ category=UserWarning,
+ )
+ import numpy.array_api as nxp
+
+ # TODO: nxp doesn't use dtype typevars, so can only use Any for the moment:
+ arrayapi_a: duckarray[Any, Any] # duckarray[Any, np.dtype[np.int64]]
+ arrayapi_a = nxp.asarray([2.1, 4], dtype=np.dtype(np.int64))
+ test_duck_array_typevar(arrayapi_a)
+
+ def test_new_namedarray(self) -> None:
+ dtype_float = np.dtype(np.float32)
+ narr_float: NamedArray[Any, np.dtype[np.float32]]
+ narr_float = NamedArray(("x",), np.array([1.5, 3.2], dtype=dtype_float))
+ assert narr_float.dtype == dtype_float
+
+ dtype_int = np.dtype(np.int8)
+ narr_int: NamedArray[Any, np.dtype[np.int8]]
+ narr_int = narr_float._new(("x",), np.array([1, 3], dtype=dtype_int))
+ assert narr_int.dtype == dtype_int
+
+ # Test with a subclass:
+ class Variable(
+ NamedArray[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co]
+ ):
+ @overload
+ def _new(
+ self,
+ dims: _DimsLike | Default = ...,
+ data: duckarray[Any, _DType] = ...,
+ attrs: _AttrsLike | Default = ...,
+ ) -> Variable[Any, _DType]:
+ ...
+
+ @overload
+ def _new(
+ self,
+ dims: _DimsLike | Default = ...,
+ data: Default = ...,
+ attrs: _AttrsLike | Default = ...,
+ ) -> Variable[_ShapeType_co, _DType_co]:
+ ...
+
+ def _new(
+ self,
+ dims: _DimsLike | Default = _default,
+ data: duckarray[Any, _DType] | Default = _default,
+ attrs: _AttrsLike | Default = _default,
+ ) -> Variable[Any, _DType] | Variable[_ShapeType_co, _DType_co]:
+ dims_ = copy.copy(self._dims) if dims is _default else dims
+
+ attrs_: Mapping[Any, Any] | None
+ if attrs is _default:
+ attrs_ = None if self._attrs is None else self._attrs.copy()
+ else:
+ attrs_ = attrs
+
+ if data is _default:
+ return type(self)(dims_, copy.copy(self._data), attrs_)
+ else:
+ cls_ = cast("type[Variable[Any, _DType]]", type(self))
+ return cls_(dims_, data, attrs_)
+
+ var_float: Variable[Any, np.dtype[np.float32]]
+ var_float = Variable(("x",), np.array([1.5, 3.2], dtype=dtype_float))
+ assert var_float.dtype == dtype_float
+
+ var_int: Variable[Any, np.dtype[np.int8]]
+ var_int = var_float._new(("x",), np.array([1, 3], dtype=dtype_int))
+ assert var_int.dtype == dtype_int
+
+ def test_replace_namedarray(self) -> None:
+ dtype_float = np.dtype(np.float32)
+ np_val: np.ndarray[Any, np.dtype[np.float32]]
+ np_val = np.array([1.5, 3.2], dtype=dtype_float)
+ np_val2: np.ndarray[Any, np.dtype[np.float32]]
+ np_val2 = 2 * np_val
+
+ narr_float: NamedArray[Any, np.dtype[np.float32]]
+ narr_float = NamedArray(("x",), np_val)
+ assert narr_float.dtype == dtype_float
+
+ narr_float2: NamedArray[Any, np.dtype[np.float32]]
+ narr_float2 = NamedArray(("x",), np_val2)
+ assert narr_float2.dtype == dtype_float
+
+ # Test with a subclass:
+ class Variable(
+ NamedArray[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co]
+ ):
+ @overload
+ def _new(
+ self,
+ dims: _DimsLike | Default = ...,
+ data: duckarray[Any, _DType] = ...,
+ attrs: _AttrsLike | Default = ...,
+ ) -> Variable[Any, _DType]:
+ ...
+
+ @overload
+ def _new(
+ self,
+ dims: _DimsLike | Default = ...,
+ data: Default = ...,
+ attrs: _AttrsLike | Default = ...,
+ ) -> Variable[_ShapeType_co, _DType_co]:
+ ...
+
+ def _new(
+ self,
+ dims: _DimsLike | Default = _default,
+ data: duckarray[Any, _DType] | Default = _default,
+ attrs: _AttrsLike | Default = _default,
+ ) -> Variable[Any, _DType] | Variable[_ShapeType_co, _DType_co]:
+ dims_ = copy.copy(self._dims) if dims is _default else dims
+
+ attrs_: Mapping[Any, Any] | None
+ if attrs is _default:
+ attrs_ = None if self._attrs is None else self._attrs.copy()
+ else:
+ attrs_ = attrs
+
+ if data is _default:
+ return type(self)(dims_, copy.copy(self._data), attrs_)
+ else:
+ cls_ = cast("type[Variable[Any, _DType]]", type(self))
+ return cls_(dims_, data, attrs_)
+
+ var_float: Variable[Any, np.dtype[np.float32]]
+ var_float = Variable(("x",), np_val)
+ assert var_float.dtype == dtype_float
+
+ var_float2: Variable[Any, np.dtype[np.float32]]
+ var_float2 = var_float._replace(("x",), np_val2)
+ assert var_float2.dtype == dtype_float
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index d91cf85e4eb..0bea3f63673 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1,7 +1,7 @@
from __future__ import annotations
import warnings
-from abc import ABC, abstractmethod
+from abc import ABC
from copy import copy, deepcopy
from datetime import datetime, timedelta
from textwrap import dedent
@@ -46,6 +46,7 @@
requires_sparse,
source_ndarray,
)
+from xarray.tests.test_namedarray import NamedArraySubclassobjects
dask_array_type = array_type("dask")
@@ -63,34 +64,11 @@ def var():
return Variable(dims=list("xyz"), data=np.random.rand(3, 4, 5))
-class VariableSubclassobjects(ABC):
- @abstractmethod
- def cls(self, *args, **kwargs) -> Variable:
- raise NotImplementedError
-
- def test_properties(self):
- data = 0.5 * np.arange(10)
- v = self.cls(["time"], data, {"foo": "bar"})
- assert v.dims == ("time",)
- assert_array_equal(v.values, data)
- assert v.dtype == float
- assert v.shape == (10,)
- assert v.size == 10
- assert v.sizes == {"time": 10}
- assert v.nbytes == 80
- assert v.ndim == 1
- assert len(v) == 10
- assert v.attrs == {"foo": "bar"}
-
- def test_attrs(self):
- v = self.cls(["time"], 0.5 * np.arange(10))
- assert v.attrs == {}
- attrs = {"foo": "bar"}
- v.attrs = attrs
- assert v.attrs == attrs
- assert isinstance(v.attrs, dict)
- v.attrs["foo"] = "baz"
- assert v.attrs["foo"] == "baz"
+class VariableSubclassobjects(NamedArraySubclassobjects, ABC):
+ @pytest.fixture
+ def target(self, data):
+ data = 0.5 * np.arange(10).reshape(2, 5)
+ return Variable(["x", "y"], data)
def test_getitem_dict(self):
v = self.cls(["x"], np.random.randn(5))
@@ -368,7 +346,7 @@ def test_1d_math(self, dtype: np.typing.DTypeLike) -> None:
assert_array_equal(v >> 2, x >> 2)
# binary ops with numpy arrays
assert_array_equal((v * x).values, x**2)
- assert_array_equal((x * v).values, x**2) # type: ignore[attr-defined] # TODO: Fix mypy thinking numpy takes priority, GH7780
+ assert_array_equal((x * v).values, x**2)
assert_array_equal(v - y, v - 1)
assert_array_equal(y - v, 1 - v)
if dtype is int:
@@ -1065,9 +1043,8 @@ def cls(self, *args, **kwargs) -> Variable:
def setup(self):
self.d = np.random.random((10, 3)).astype(np.float64)
- def test_data_and_values(self):
+ def test_values(self):
v = Variable(["time", "x"], self.d)
- assert_array_equal(v.data, self.d)
assert_array_equal(v.values, self.d)
assert source_ndarray(v.values) is self.d
with pytest.raises(ValueError):
@@ -1076,9 +1053,6 @@ def test_data_and_values(self):
d2 = np.random.random((10, 3))
v.values = d2
assert source_ndarray(v.values) is d2
- d3 = np.random.random((10, 3))
- v.data = d3
- assert source_ndarray(v.data) is d3
def test_numpy_same_methods(self):
v = Variable([], np.float32(0.0))
From e7e8c38566c011b50a8b1980c2e563a1db3cbed5 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Tue, 28 Nov 2023 13:04:47 -0800
Subject: [PATCH 17/58] Start renaming `dims` to `dim` (#8487)
* Start renaming `dims` to `dim`
Begins the process of #6646. I don't think it's feasible / enjoyable to do this for everything at once, so I would suggest we do it gradually, while keeping the warnings quite quiet, so by the time we convert to louder warnings, users can do a find/replace easily.
* No deprecation for internal methods
* Simplify typing
---
doc/whats-new.rst | 7 ++++++
xarray/core/alignment.py | 14 +++++------
xarray/core/computation.py | 28 +++++++++++----------
xarray/core/dataarray.py | 17 +++++++------
xarray/core/variable.py | 6 ++---
xarray/core/weighted.py | 2 +-
xarray/tests/test_computation.py | 40 +++++++++++++++---------------
xarray/tests/test_dataarray.py | 6 ++---
xarray/util/deprecation_helpers.py | 27 ++++++++++++++++++++
9 files changed, 92 insertions(+), 55 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 71cbc1a08ee..92048e02837 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -38,6 +38,13 @@ Breaking changes
Deprecations
~~~~~~~~~~~~
+- As part of an effort to standardize the API, we're renaming the ``dims``
+ keyword arg to ``dim`` for the minority of functions which current use
+ ``dims``. This started with :py:func:`xarray.dot` & :py:meth:`DataArray.dot`
+ and we'll gradually roll this out across all functions. The warnings are
+ currently ``PendingDeprecationWarning``, which are silenced by default. We'll
+ convert these to ``DeprecationWarning`` in a future release.
+ By `Maximilian Roos `_.
Bug fixes
~~~~~~~~~
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index 732ec5d3ea6..041fe63a9f3 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -324,7 +324,7 @@ def assert_no_index_conflict(self) -> None:
"- they may be used to reindex data along common dimensions"
)
- def _need_reindex(self, dims, cmp_indexes) -> bool:
+ def _need_reindex(self, dim, cmp_indexes) -> bool:
"""Whether or not we need to reindex variables for a set of
matching indexes.
@@ -340,14 +340,14 @@ def _need_reindex(self, dims, cmp_indexes) -> bool:
return True
unindexed_dims_sizes = {}
- for dim in dims:
- if dim in self.unindexed_dim_sizes:
- sizes = self.unindexed_dim_sizes[dim]
+ for d in dim:
+ if d in self.unindexed_dim_sizes:
+ sizes = self.unindexed_dim_sizes[d]
if len(sizes) > 1:
# reindex if different sizes are found for unindexed dims
return True
else:
- unindexed_dims_sizes[dim] = next(iter(sizes))
+ unindexed_dims_sizes[d] = next(iter(sizes))
if unindexed_dims_sizes:
indexed_dims_sizes = {}
@@ -356,8 +356,8 @@ def _need_reindex(self, dims, cmp_indexes) -> bool:
for var in index_vars.values():
indexed_dims_sizes.update(var.sizes)
- for dim, size in unindexed_dims_sizes.items():
- if indexed_dims_sizes.get(dim, -1) != size:
+ for d, size in unindexed_dims_sizes.items():
+ if indexed_dims_sizes.get(d, -1) != size:
# reindex if unindexed dimension size doesn't match
return True
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 0c5c9d6d5cb..ed2c733d4ca 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -26,6 +26,7 @@
from xarray.core.types import Dims, T_DataArray
from xarray.core.utils import is_dict_like, is_scalar
from xarray.core.variable import Variable
+from xarray.util.deprecation_helpers import deprecate_dims
if TYPE_CHECKING:
from xarray.core.coordinates import Coordinates
@@ -1691,9 +1692,10 @@ def cross(
return c
+@deprecate_dims
def dot(
*arrays,
- dims: Dims = None,
+ dim: Dims = None,
**kwargs: Any,
):
"""Generalized dot product for xarray objects. Like ``np.einsum``, but
@@ -1703,7 +1705,7 @@ def dot(
----------
*arrays : DataArray or Variable
Arrays to compute.
- dims : str, iterable of hashable, "..." or None, optional
+ dim : str, iterable of hashable, "..." or None, optional
Which dimensions to sum over. Ellipsis ('...') sums over all dimensions.
If not specified, then all the common dimensions are summed over.
**kwargs : dict
@@ -1756,18 +1758,18 @@ def dot(
[3, 4, 5]])
Dimensions without coordinates: c, d
- >>> xr.dot(da_a, da_b, dims=["a", "b"])
+ >>> xr.dot(da_a, da_b, dim=["a", "b"])
array([110, 125])
Dimensions without coordinates: c
- >>> xr.dot(da_a, da_b, dims=["a"])
+ >>> xr.dot(da_a, da_b, dim=["a"])
array([[40, 46],
[70, 79]])
Dimensions without coordinates: b, c
- >>> xr.dot(da_a, da_b, da_c, dims=["b", "c"])
+ >>> xr.dot(da_a, da_b, da_c, dim=["b", "c"])
array([[ 9, 14, 19],
[ 93, 150, 207],
@@ -1779,7 +1781,7 @@ def dot(
array([110, 125])
Dimensions without coordinates: c
- >>> xr.dot(da_a, da_b, dims=...)
+ >>> xr.dot(da_a, da_b, dim=...)
array(235)
"""
@@ -1803,18 +1805,18 @@ def dot(
einsum_axes = "abcdefghijklmnopqrstuvwxyz"
dim_map = {d: einsum_axes[i] for i, d in enumerate(all_dims)}
- if dims is ...:
- dims = all_dims
- elif isinstance(dims, str):
- dims = (dims,)
- elif dims is None:
+ if dim is ...:
+ dim = all_dims
+ elif isinstance(dim, str):
+ dim = (dim,)
+ elif dim is None:
# find dimensions that occur more than one times
dim_counts: Counter = Counter()
for arr in arrays:
dim_counts.update(arr.dims)
- dims = tuple(d for d, c in dim_counts.items() if c > 1)
+ dim = tuple(d for d, c in dim_counts.items() if c > 1)
- dot_dims: set[Hashable] = set(dims)
+ dot_dims: set[Hashable] = set(dim)
# dimensions to be parallelized
broadcast_dims = common_dims - dot_dims
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index b417470fdc0..47708cfb581 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -65,7 +65,7 @@
)
from xarray.plot.accessor import DataArrayPlotAccessor
from xarray.plot.utils import _get_units_from_attrs
-from xarray.util.deprecation_helpers import _deprecate_positional_args
+from xarray.util.deprecation_helpers import _deprecate_positional_args, deprecate_dims
if TYPE_CHECKING:
from typing import TypeVar, Union
@@ -115,14 +115,14 @@
T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset])
-def _check_coords_dims(shape, coords, dims):
- sizes = dict(zip(dims, shape))
+def _check_coords_dims(shape, coords, dim):
+ sizes = dict(zip(dim, shape))
for k, v in coords.items():
- if any(d not in dims for d in v.dims):
+ if any(d not in dim for d in v.dims):
raise ValueError(
f"coordinate {k} has dimensions {v.dims}, but these "
"are not a subset of the DataArray "
- f"dimensions {dims}"
+ f"dimensions {dim}"
)
for d, s in v.sizes.items():
@@ -4895,10 +4895,11 @@ def imag(self) -> Self:
"""
return self._replace(self.variable.imag)
+ @deprecate_dims
def dot(
self,
other: T_Xarray,
- dims: Dims = None,
+ dim: Dims = None,
) -> T_Xarray:
"""Perform dot product of two DataArrays along their shared dims.
@@ -4908,7 +4909,7 @@ def dot(
----------
other : DataArray
The other array with which the dot product is performed.
- dims : ..., str, Iterable of Hashable or None, optional
+ dim : ..., str, Iterable of Hashable or None, optional
Which dimensions to sum over. Ellipsis (`...`) sums over all dimensions.
If not specified, then all the common dimensions are summed over.
@@ -4947,7 +4948,7 @@ def dot(
if not isinstance(other, DataArray):
raise TypeError("dot only operates on DataArrays.")
- return computation.dot(self, other, dims=dims)
+ return computation.dot(self, other, dim=dim)
def sortby(
self,
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index c2133d55aeb..39a947e6264 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1541,15 +1541,15 @@ def stack(self, dimensions=None, **dimensions_kwargs):
result = result._stack_once(dims, new_dim)
return result
- def _unstack_once_full(self, dims: Mapping[Any, int], old_dim: Hashable) -> Self:
+ def _unstack_once_full(self, dim: Mapping[Any, int], old_dim: Hashable) -> Self:
"""
Unstacks the variable without needing an index.
Unlike `_unstack_once`, this function requires the existing dimension to
contain the full product of the new dimensions.
"""
- new_dim_names = tuple(dims.keys())
- new_dim_sizes = tuple(dims.values())
+ new_dim_names = tuple(dim.keys())
+ new_dim_sizes = tuple(dim.values())
if old_dim not in self.dims:
raise ValueError(f"invalid existing dimension: {old_dim}")
diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
index 28740a99020..53ff6db5f28 100644
--- a/xarray/core/weighted.py
+++ b/xarray/core/weighted.py
@@ -228,7 +228,7 @@ def _reduce(
# `dot` does not broadcast arrays, so this avoids creating a large
# DataArray (if `weights` has additional dimensions)
- return dot(da, weights, dims=dim)
+ return dot(da, weights, dim=dim)
def _sum_of_weights(self, da: DataArray, dim: Dims = None) -> DataArray:
"""Calculate the sum of weights, accounting for missing values"""
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 425673dc40f..396507652c6 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -1936,7 +1936,7 @@ def test_dot(use_dask: bool) -> None:
da_a = da_a.chunk({"a": 3})
da_b = da_b.chunk({"a": 3})
da_c = da_c.chunk({"c": 3})
- actual = xr.dot(da_a, da_b, dims=["a", "b"])
+ actual = xr.dot(da_a, da_b, dim=["a", "b"])
assert actual.dims == ("c",)
assert (actual.data == np.einsum("ij,ijk->k", a, b)).all()
assert isinstance(actual.variable.data, type(da_a.variable.data))
@@ -1960,33 +1960,33 @@ def test_dot(use_dask: bool) -> None:
if use_dask:
da_a = da_a.chunk({"a": 3})
da_b = da_b.chunk({"a": 3})
- actual = xr.dot(da_a, da_b, dims=["b"])
+ actual = xr.dot(da_a, da_b, dim=["b"])
assert actual.dims == ("a", "c")
assert (actual.data == np.einsum("ij,ijk->ik", a, b)).all()
assert isinstance(actual.variable.data, type(da_a.variable.data))
- actual = xr.dot(da_a, da_b, dims=["b"])
+ actual = xr.dot(da_a, da_b, dim=["b"])
assert actual.dims == ("a", "c")
assert (actual.data == np.einsum("ij,ijk->ik", a, b)).all()
- actual = xr.dot(da_a, da_b, dims="b")
+ actual = xr.dot(da_a, da_b, dim="b")
assert actual.dims == ("a", "c")
assert (actual.data == np.einsum("ij,ijk->ik", a, b)).all()
- actual = xr.dot(da_a, da_b, dims="a")
+ actual = xr.dot(da_a, da_b, dim="a")
assert actual.dims == ("b", "c")
assert (actual.data == np.einsum("ij,ijk->jk", a, b)).all()
- actual = xr.dot(da_a, da_b, dims="c")
+ actual = xr.dot(da_a, da_b, dim="c")
assert actual.dims == ("a", "b")
assert (actual.data == np.einsum("ij,ijk->ij", a, b)).all()
- actual = xr.dot(da_a, da_b, da_c, dims=["a", "b"])
+ actual = xr.dot(da_a, da_b, da_c, dim=["a", "b"])
assert actual.dims == ("c", "e")
assert (actual.data == np.einsum("ij,ijk,kl->kl ", a, b, c)).all()
# should work with tuple
- actual = xr.dot(da_a, da_b, dims=("c",))
+ actual = xr.dot(da_a, da_b, dim=("c",))
assert actual.dims == ("a", "b")
assert (actual.data == np.einsum("ij,ijk->ij", a, b)).all()
@@ -1996,47 +1996,47 @@ def test_dot(use_dask: bool) -> None:
assert (actual.data == np.einsum("ij,ijk,kl->l ", a, b, c)).all()
# 1 array summation
- actual = xr.dot(da_a, dims="a")
+ actual = xr.dot(da_a, dim="a")
assert actual.dims == ("b",)
assert (actual.data == np.einsum("ij->j ", a)).all()
# empty dim
- actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dims="a")
+ actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dim="a")
assert actual.dims == ("b",)
assert (actual.data == np.zeros(actual.shape)).all()
# Ellipsis (...) sums over all dimensions
- actual = xr.dot(da_a, da_b, dims=...)
+ actual = xr.dot(da_a, da_b, dim=...)
assert actual.dims == ()
assert (actual.data == np.einsum("ij,ijk->", a, b)).all()
- actual = xr.dot(da_a, da_b, da_c, dims=...)
+ actual = xr.dot(da_a, da_b, da_c, dim=...)
assert actual.dims == ()
assert (actual.data == np.einsum("ij,ijk,kl-> ", a, b, c)).all()
- actual = xr.dot(da_a, dims=...)
+ actual = xr.dot(da_a, dim=...)
assert actual.dims == ()
assert (actual.data == np.einsum("ij-> ", a)).all()
- actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dims=...)
+ actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dim=...)
assert actual.dims == ()
assert (actual.data == np.zeros(actual.shape)).all()
# Invalid cases
if not use_dask:
with pytest.raises(TypeError):
- xr.dot(da_a, dims="a", invalid=None)
+ xr.dot(da_a, dim="a", invalid=None)
with pytest.raises(TypeError):
- xr.dot(da_a.to_dataset(name="da"), dims="a")
+ xr.dot(da_a.to_dataset(name="da"), dim="a")
with pytest.raises(TypeError):
- xr.dot(dims="a")
+ xr.dot(dim="a")
# einsum parameters
- actual = xr.dot(da_a, da_b, dims=["b"], order="C")
+ actual = xr.dot(da_a, da_b, dim=["b"], order="C")
assert (actual.data == np.einsum("ij,ijk->ik", a, b)).all()
assert actual.values.flags["C_CONTIGUOUS"]
assert not actual.values.flags["F_CONTIGUOUS"]
- actual = xr.dot(da_a, da_b, dims=["b"], order="F")
+ actual = xr.dot(da_a, da_b, dim=["b"], order="F")
assert (actual.data == np.einsum("ij,ijk->ik", a, b)).all()
# dask converts Fortran arrays to C order when merging the final array
if not use_dask:
@@ -2078,7 +2078,7 @@ def test_dot_align_coords(use_dask: bool) -> None:
expected = (da_a * da_b).sum(["a", "b"])
xr.testing.assert_allclose(expected, actual)
- actual = xr.dot(da_a, da_b, dims=...)
+ actual = xr.dot(da_a, da_b, dim=...)
expected = (da_a * da_b).sum()
xr.testing.assert_allclose(expected, actual)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 44b9790f0b7..f9547f3afa2 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -3964,13 +3964,13 @@ def test_dot(self) -> None:
assert_equal(expected3, actual3)
# Ellipsis: all dims are shared
- actual4 = da.dot(da, dims=...)
+ actual4 = da.dot(da, dim=...)
expected4 = da.dot(da)
assert_equal(expected4, actual4)
# Ellipsis: not all dims are shared
- actual5 = da.dot(dm3, dims=...)
- expected5 = da.dot(dm3, dims=("j", "x", "y", "z"))
+ actual5 = da.dot(dm3, dim=...)
+ expected5 = da.dot(dm3, dim=("j", "x", "y", "z"))
assert_equal(expected5, actual5)
with pytest.raises(NotImplementedError):
diff --git a/xarray/util/deprecation_helpers.py b/xarray/util/deprecation_helpers.py
index 7b4cf901aa1..c620e45574e 100644
--- a/xarray/util/deprecation_helpers.py
+++ b/xarray/util/deprecation_helpers.py
@@ -36,6 +36,8 @@
from functools import wraps
from typing import Callable, TypeVar
+from xarray.core.utils import emit_user_level_warning
+
T = TypeVar("T", bound=Callable)
POSITIONAL_OR_KEYWORD = inspect.Parameter.POSITIONAL_OR_KEYWORD
@@ -115,3 +117,28 @@ def inner(*args, **kwargs):
return inner
return _decorator
+
+
+def deprecate_dims(func: T) -> T:
+ """
+ For functions that previously took `dims` as a kwarg, and have now transitioned to
+ `dim`. This decorator will issue a warning if `dims` is passed while forwarding it
+ to `dim`.
+ """
+
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ if "dims" in kwargs:
+ emit_user_level_warning(
+ "The `dims` argument has been renamed to `dim`, and will be removed "
+ "in the future. This renaming is taking place throughout xarray over the "
+ "next few releases.",
+ # Upgrade to `DeprecationWarning` in the future, when the renaming is complete.
+ PendingDeprecationWarning,
+ )
+ kwargs["dim"] = kwargs.pop("dims")
+ return func(*args, **kwargs)
+
+ # We're quite confident we're just returning `T` from this function, so it's fine to ignore typing
+ # within the function.
+ return wrapper # type: ignore
From dc0931ad05f631135baa9889bdceeb15e2fa727c Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
Date: Tue, 28 Nov 2023 14:19:00 -0800
Subject: [PATCH 18/58] Raise an informative error message when object array
has mixed types (#4700)
Co-authored-by: Mathias Hauser
Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
---
xarray/conventions.py | 24 ++++++++++++++++++++----
xarray/tests/test_conventions.py | 12 ++++++++++++
2 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/xarray/conventions.py b/xarray/conventions.py
index 75f816e6cb4..8c7d6be2309 100644
--- a/xarray/conventions.py
+++ b/xarray/conventions.py
@@ -52,16 +52,32 @@ def _var_as_tuple(var: Variable) -> T_VarTuple:
return var.dims, var.data, var.attrs.copy(), var.encoding.copy()
-def _infer_dtype(array, name: T_Name = None) -> np.dtype:
- """Given an object array with no missing values, infer its dtype from its
- first element
- """
+def _infer_dtype(array, name=None):
+ """Given an object array with no missing values, infer its dtype from all elements."""
if array.dtype.kind != "O":
raise TypeError("infer_type must be called on a dtype=object array")
if array.size == 0:
return np.dtype(float)
+ native_dtypes = set(np.vectorize(type, otypes=[object])(array.ravel()))
+ if len(native_dtypes) > 1 and native_dtypes != {bytes, str}:
+ raise ValueError(
+ "unable to infer dtype on variable {!r}; object array "
+ "contains mixed native types: {}".format(
+ name, ", ".join(x.__name__ for x in native_dtypes)
+ )
+ )
+
+ native_dtypes = set(np.vectorize(type, otypes=[object])(array.ravel()))
+ if len(native_dtypes) > 1 and native_dtypes != {bytes, str}:
+ raise ValueError(
+ "unable to infer dtype on variable {!r}; object array "
+ "contains mixed native types: {}".format(
+ name, ", ".join(x.__name__ for x in native_dtypes)
+ )
+ )
+
element = array[(0,) * array.ndim]
# We use the base types to avoid subclasses of bytes and str (which might
# not play nice with e.g. hdf5 datatypes), such as those from numpy
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index d6d1303a696..be6e949edf8 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -495,6 +495,18 @@ def test_encoding_kwarg_fixed_width_string(self) -> None:
pass
+@pytest.mark.parametrize(
+ "data",
+ [
+ np.array([["ab", "cdef", b"X"], [1, 2, "c"]], dtype=object),
+ np.array([["x", 1], ["y", 2]], dtype="object"),
+ ],
+)
+def test_infer_dtype_error_on_mixed_types(data):
+ with pytest.raises(ValueError, match="unable to infer dtype on variable"):
+ conventions._infer_dtype(data, "test")
+
+
class TestDecodeCFVariableWithArrayUnits:
def test_decode_cf_variable_with_array_units(self) -> None:
v = Variable(["t"], [1, 2, 3], {"units": np.array(["foobar"], dtype=object)})
From 8ee12f66269c1c875d245a118679356dd2624a5a Mon Sep 17 00:00:00 2001
From: Doug Latornell
Date: Tue, 28 Nov 2023 17:31:43 -0800
Subject: [PATCH 19/58] Update resample time offset FutureWarning and docs
(#8479)
* Improve FutureWarning re: resample() loffset parameter
As discussed in https://github.com/pydata/xarray/discussions/8175
* Add docs re: resample time offset arithmetic
Illustrate updating the time coordinate values of a resampled dataset
using time offset arithmetic. This is the recommended technique to
replace the use of the deprecated `loffset` parameter in `resample()`
re: issue #7596 and discussion
https://github.com/pydata/xarray/discussions/8175
* Add loffset deprecation warning to resample docstrings
* Add docs change to whats-new.rst
* Drop redundant FutureWarning in warning text
* Change deprecation warning to present tense
* Add code example for FutureWarning message
---------
Co-authored-by: Deepak Cherian
---
doc/user-guide/time-series.rst | 12 ++++++++++++
doc/whats-new.rst | 6 ++++++
xarray/core/common.py | 7 +++++--
xarray/core/dataarray.py | 6 ++++++
xarray/core/dataset.py | 6 ++++++
5 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst
index cbb831cac3a..82172aa8998 100644
--- a/doc/user-guide/time-series.rst
+++ b/doc/user-guide/time-series.rst
@@ -245,6 +245,18 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``.
ds.resample(time="1h").nearest(tolerance="1h")
+It is often desirable to center the time values after a resampling operation.
+That can be accomplished by updating the resampled dataset time coordinate values
+using time offset arithmetic via the `pandas.tseries.frequencies.to_offset`_ function.
+
+.. _pandas.tseries.frequencies.to_offset: https://pandas.pydata.org/docs/reference/api/pandas.tseries.frequencies.to_offset.html
+
+.. ipython:: python
+
+ resampled_ds = ds.resample(time="6h").mean()
+ offset = pd.tseries.frequencies.to_offset("6h") / 2
+ resampled_ds["time"] = resampled_ds.get_index("time") + offset
+ resampled_ds
For more examples of using grouped operations on a time dimension, see
:doc:`../examples/weather-data`.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 92048e02837..82842430b53 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -57,6 +57,12 @@ Bug fixes
Documentation
~~~~~~~~~~~~~
+- Added illustration of updating the time coordinate values of a resampled dataset using
+ time offset arithmetic.
+ This is the recommended technique to replace the use of the deprecated ``loffset`` parameter
+ in ``resample`` (:pull:`8479`).
+ By `Doug Latornell `_.
+
- Improved error message when attempting to get a variable which doesn't exist from a Dataset.
(:pull:`8474`)
By `Maximilian Roos `_.
diff --git a/xarray/core/common.py b/xarray/core/common.py
index fa0fa9aec0f..cb5b79defc0 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -1010,8 +1010,11 @@ def _resample(
if loffset is not None:
emit_user_level_warning(
- "Following pandas, the `loffset` parameter to resample will be deprecated "
- "in a future version of xarray. Switch to using time offset arithmetic.",
+ "Following pandas, the `loffset` parameter to resample is deprecated. "
+ "Switch to updating the resampled dataset time coordinate using "
+ "time offset arithmetic. For example:\n"
+ " >>> offset = pd.tseries.frequencies.to_offset(freq) / 2\n"
+ ' >>> resampled_ds["time"] = resampled_ds.get_index("time") + offset',
FutureWarning,
)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 47708cfb581..bac4ad36adb 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -7032,6 +7032,12 @@ def resample(
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
+
+ .. deprecated:: 2023.03.0
+ Following pandas, the ``loffset`` parameter is deprecated in favor
+ of using time offset arithmetic, and will be removed in a future
+ version of xarray.
+
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 5d2d24d6723..66c83e95b77 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -10382,6 +10382,12 @@ def resample(
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
+
+ .. deprecated:: 2023.03.0
+ Following pandas, the ``loffset`` parameter is deprecated in favor
+ of using time offset arithmetic, and will be removed in a future
+ version of xarray.
+
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
From 8ea565deae1e7be3a1f48242f8394cb23d2ebe91 Mon Sep 17 00:00:00 2001
From: Ben Mares
Date: Wed, 29 Nov 2023 22:19:10 +0100
Subject: [PATCH 20/58] Fix minor typo in io.rst (#8492)
---
doc/user-guide/io.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
index 2155ecfd88b..48751c5f299 100644
--- a/doc/user-guide/io.rst
+++ b/doc/user-guide/io.rst
@@ -804,7 +804,7 @@ store. These options are useful for scenarios when it is infeasible or
undesirable to write your entire dataset at once.
1. Use ``mode='a'`` to add or overwrite entire variables,
-2. Use ``append_dim`` to resize and append to exiting variables, and
+2. Use ``append_dim`` to resize and append to existing variables, and
3. Use ``region`` to write to limited regions of existing arrays.
.. tip::
From d46c5b66463dfb0fed7e105514dda07e7ef4b5ef Mon Sep 17 00:00:00 2001
From: Tom Nicholas
Date: Thu, 30 Nov 2023 19:40:18 -0500
Subject: [PATCH 21/58] Warn on repeated dimension names during construction
(#8491)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
doc/whats-new.rst | 6 ++++++
xarray/core/common.py | 2 ++
xarray/core/variable.py | 9 +++------
xarray/namedarray/core.py | 20 ++++++++++++++++++++
xarray/tests/test_backends.py | 2 ++
xarray/tests/test_namedarray.py | 4 ++++
6 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 82842430b53..9fc1b0ba80a 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -34,6 +34,12 @@ New Features
Breaking changes
~~~~~~~~~~~~~~~~
+- Explicitly warn when creating xarray objects with repeated dimension names.
+ Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called,
+ which means many functions will raise.
+ This latter change is technically a breaking change, but whilst allowed,
+ this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`)
+ By `Tom Nicholas `_.
Deprecations
~~~~~~~~~~~~
diff --git a/xarray/core/common.py b/xarray/core/common.py
index cb5b79defc0..cebd8f2a95b 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -21,6 +21,7 @@
emit_user_level_warning,
is_scalar,
)
+from xarray.namedarray.core import _raise_if_any_duplicate_dimensions
try:
import cftime
@@ -217,6 +218,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, .
return self._get_axis_num(dim)
def _get_axis_num(self: Any, dim: Hashable) -> int:
+ _raise_if_any_duplicate_dimensions(self.dims)
try:
return self.dims.index(dim)
except ValueError:
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 39a947e6264..d9102dc9e0a 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -46,7 +46,7 @@
is_duck_array,
maybe_coerce_to_str,
)
-from xarray.namedarray.core import NamedArray
+from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions
NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
indexing.ExplicitlyIndexed,
@@ -2876,11 +2876,8 @@ def _unified_dims(variables):
all_dims = {}
for var in variables:
var_dims = var.dims
- if len(set(var_dims)) < len(var_dims):
- raise ValueError(
- "broadcasting cannot handle duplicate "
- f"dimensions: {list(var_dims)!r}"
- )
+ _raise_if_any_duplicate_dimensions(var_dims, err_context="Broadcasting")
+
for d, s in zip(var_dims, var.shape):
if d not in all_dims:
all_dims[d] = s
diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py
index d3fcffcfd9e..002afe96358 100644
--- a/xarray/namedarray/core.py
+++ b/xarray/namedarray/core.py
@@ -481,6 +481,15 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims:
f"dimensions {dims} must have the same length as the "
f"number of data dimensions, ndim={self.ndim}"
)
+ if len(set(dims)) < len(dims):
+ repeated_dims = set([d for d in dims if dims.count(d) > 1])
+ warnings.warn(
+ f"Duplicate dimension names present: dimensions {repeated_dims} appear more than once in dims={dims}. "
+ "We do not yet support duplicate dimension names, but we do allow initial construction of the object. "
+ "We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not. "
+ "To rename the dimensions you will need to set the ``.dims`` attribute of each variable, ``e.g. var.dims=('x0', 'x1')``.",
+ UserWarning,
+ )
return dims
@property
@@ -651,6 +660,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, .
return self._get_axis_num(dim)
def _get_axis_num(self: Any, dim: Hashable) -> int:
+ _raise_if_any_duplicate_dimensions(self.dims)
try:
return self.dims.index(dim) # type: ignore[no-any-return]
except ValueError:
@@ -846,3 +856,13 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]:
_NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]]
+
+
+def _raise_if_any_duplicate_dimensions(
+ dims: _Dims, err_context: str = "This function"
+) -> None:
+ if len(set(dims)) < len(dims):
+ repeated_dims = set([d for d in dims if dims.count(d) > 1])
+ raise ValueError(
+ f"{err_context} cannot handle duplicate dimensions, but dimensions {repeated_dims} appear more than once on this object's dims: {dims}"
+ )
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 0704dd835c0..8c5f2f8b98a 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -3464,6 +3464,7 @@ class TestH5NetCDFDataRos3Driver(TestCommon):
"https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
)
+ @pytest.mark.filterwarnings("ignore:Duplicate dimension names")
def test_get_variable_list(self) -> None:
with open_dataset(
self.test_remote_dataset,
@@ -3472,6 +3473,7 @@ def test_get_variable_list(self) -> None:
) as actual:
assert "Temperature" in list(actual)
+ @pytest.mark.filterwarnings("ignore:Duplicate dimension names")
def test_get_variable_list_empty_driver_kwds(self) -> None:
driver_kwds = {
"secret_id": b"",
diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py
index fcdf063d106..deeb5ce753a 100644
--- a/xarray/tests/test_namedarray.py
+++ b/xarray/tests/test_namedarray.py
@@ -475,3 +475,7 @@ def _new(
var_float2: Variable[Any, np.dtype[np.float32]]
var_float2 = var_float._replace(("x",), np_val2)
assert var_float2.dtype == dtype_float
+
+ def test_warn_on_repeated_dimension_names(self) -> None:
+ with pytest.warns(UserWarning, match="Duplicate dimension names"):
+ NamedArray(("x", "x"), np.arange(4).reshape(2, 2))
From b703102ddfb91ff441d23025286c6a1c67ae1517 Mon Sep 17 00:00:00 2001
From: Deepak Cherian
Date: Thu, 30 Nov 2023 19:18:18 -0700
Subject: [PATCH 22/58] Minor to_zarr optimizations (#8489)
Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
---
xarray/backends/zarr.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 7f1af10b45a..30b306ceb34 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -599,8 +599,9 @@ def store(
"""
import zarr
+ existing_keys = tuple(self.zarr_group.array_keys())
existing_variable_names = {
- vn for vn in variables if _encode_variable_name(vn) in self.zarr_group
+ vn for vn in variables if _encode_variable_name(vn) in existing_keys
}
new_variables = set(variables) - existing_variable_names
variables_without_encoding = {vn: variables[vn] for vn in new_variables}
@@ -665,6 +666,8 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
import zarr
+ existing_keys = tuple(self.zarr_group.array_keys())
+
for vn, v in variables.items():
name = _encode_variable_name(vn)
check = vn in check_encoding_set
@@ -677,7 +680,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
if v.encoding == {"_FillValue": None} and fill_value is None:
v.encoding = {}
- if name in self.zarr_group:
+ if name in existing_keys:
# existing variable
# TODO: if mode="a", consider overriding the existing variable
# metadata. This would need some case work properly with region
From b313ffc2bd6ffc1c221f8950fff3cceb24cb775f Mon Sep 17 00:00:00 2001
From: Carl Andersson
Date: Fri, 1 Dec 2023 03:20:14 +0100
Subject: [PATCH 23/58] Properly closes zarr groups in zarr store (#8425)
Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
---
doc/whats-new.rst | 3 +++
xarray/backends/zarr.py | 8 +++++++-
xarray/tests/test_backends.py | 10 ++++++++++
3 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 9fc1b0ba80a..abc3760df6e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -174,6 +174,9 @@ Bug fixes
- Fix a bug where :py:meth:`DataArray.to_dataset` silently drops a variable
if a coordinate with the same name already exists (:pull:`8433`, :issue:`7823`).
By `András Gunyhó `_.
+- Fix for :py:meth:`DataArray.to_zarr` & :py:meth:`Dataset.to_zarr` to close
+ the created zarr store when passing a path with `.zip` extension (:pull:`8425`).
+ By `Carl Andersson _`.
Documentation
~~~~~~~~~~~~~
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 30b306ceb34..7ff59e0e7bf 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -381,6 +381,7 @@ class ZarrStore(AbstractWritableDataStore):
"_write_region",
"_safe_chunks",
"_write_empty",
+ "_close_store_on_close",
)
@classmethod
@@ -464,6 +465,7 @@ def open_group(
zarr_group = zarr.open_consolidated(store, **open_kwargs)
else:
zarr_group = zarr.open_group(store, **open_kwargs)
+ close_store_on_close = zarr_group.store is not store
return cls(
zarr_group,
mode,
@@ -472,6 +474,7 @@ def open_group(
write_region,
safe_chunks,
write_empty,
+ close_store_on_close,
)
def __init__(
@@ -483,6 +486,7 @@ def __init__(
write_region=None,
safe_chunks=True,
write_empty: bool | None = None,
+ close_store_on_close: bool = False,
):
self.zarr_group = zarr_group
self._read_only = self.zarr_group.read_only
@@ -494,6 +498,7 @@ def __init__(
self._write_region = write_region
self._safe_chunks = safe_chunks
self._write_empty = write_empty
+ self._close_store_on_close = close_store_on_close
@property
def ds(self):
@@ -762,7 +767,8 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
writer.add(v.data, zarr_array, region)
def close(self):
- pass
+ if self._close_store_on_close:
+ self.zarr_group.store.close()
def open_zarr(
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 8c5f2f8b98a..d6f76df067c 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -5251,6 +5251,16 @@ def test_pickle_open_mfdataset_dataset():
assert_identical(ds, pickle.loads(pickle.dumps(ds)))
+@requires_zarr
+def test_zarr_closing_internal_zip_store():
+ store_name = "tmp.zarr.zip"
+ original_da = DataArray(np.arange(12).reshape((3, 4)))
+ original_da.to_zarr(store_name, mode="w")
+
+ with open_dataarray(store_name, engine="zarr") as loaded_da:
+ assert_identical(original_da, loaded_da)
+
+
@requires_zarr
class TestZarrRegionAuto:
def test_zarr_region_auto_all(self, tmp_path):
From 1715ed3422c04853fda1827de7e3580c07de85cf Mon Sep 17 00:00:00 2001
From: Deepak Cherian
Date: Thu, 30 Nov 2023 19:47:02 -0700
Subject: [PATCH 24/58] Avoid duplicate Zarr array read (#8472)
Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
---
xarray/backends/zarr.py | 15 +++----
xarray/tests/test_backends.py | 78 +++++++++++++++++------------------
2 files changed, 43 insertions(+), 50 deletions(-)
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 7ff59e0e7bf..c437c42183a 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -61,15 +61,12 @@ def encode_zarr_attr_value(value):
class ZarrArrayWrapper(BackendArray):
- __slots__ = ("datastore", "dtype", "shape", "variable_name", "_array")
-
- def __init__(self, variable_name, datastore):
- self.datastore = datastore
- self.variable_name = variable_name
+ __slots__ = ("dtype", "shape", "_array")
+ def __init__(self, zarr_array):
# some callers attempt to evaluate an array if an `array` property exists on the object.
# we prefix with _ to avoid this inference.
- self._array = self.datastore.zarr_group[self.variable_name]
+ self._array = zarr_array
self.shape = self._array.shape
# preserve vlen string object dtype (GH 7328)
@@ -86,10 +83,10 @@ def get_array(self):
return self._array
def _oindex(self, key):
- return self.get_array().oindex[key]
+ return self._array.oindex[key]
def __getitem__(self, key):
- array = self.get_array()
+ array = self._array
if isinstance(key, indexing.BasicIndexer):
return array[key.tuple]
elif isinstance(key, indexing.VectorizedIndexer):
@@ -506,7 +503,7 @@ def ds(self):
return self.zarr_group
def open_store_variable(self, name, zarr_array):
- data = indexing.LazilyIndexedArray(ZarrArrayWrapper(name, self))
+ data = indexing.LazilyIndexedArray(ZarrArrayWrapper(zarr_array))
try_nczarr = self._mode == "r"
dimensions, attributes = _get_zarr_dims_and_attrs(
zarr_array, DIMENSION_KEY, try_nczarr
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index d6f76df067c..d60daefc728 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -2836,6 +2836,43 @@ def test_write_empty(
ls = listdir(os.path.join(store, "test"))
assert set(expected) == set([file for file in ls if file[0] != "."])
+ def test_avoid_excess_metadata_calls(self) -> None:
+ """Test that chunk requests do not trigger redundant metadata requests.
+
+ This test targets logic in backends.zarr.ZarrArrayWrapper, asserting that calls
+ to retrieve chunk data after initialization do not trigger additional
+ metadata requests.
+
+ https://github.com/pydata/xarray/issues/8290
+ """
+
+ import zarr
+
+ ds = xr.Dataset(data_vars={"test": (("Z",), np.array([123]).reshape(1))})
+
+ # The call to retrieve metadata performs a group lookup. We patch Group.__getitem__
+ # so that we can inspect calls to this method - specifically count of calls.
+ # Use of side_effect means that calls are passed through to the original method
+ # rather than a mocked method.
+ Group = zarr.hierarchy.Group
+ with (
+ self.create_zarr_target() as store,
+ patch.object(
+ Group, "__getitem__", side_effect=Group.__getitem__, autospec=True
+ ) as mock,
+ ):
+ ds.to_zarr(store, mode="w")
+
+ # We expect this to request array metadata information, so call_count should be == 1,
+ xrds = xr.open_zarr(store)
+ call_count = mock.call_count
+ assert call_count == 1
+
+ # compute() requests array data, which should not trigger additional metadata requests
+ # we assert that the number of calls has not increased after fetchhing the array
+ xrds.test.compute(scheduler="sync")
+ assert mock.call_count == call_count
+
class ZarrBaseV3(ZarrBase):
zarr_version = 3
@@ -2876,47 +2913,6 @@ def create_zarr_target(self):
yield tmp
-@requires_zarr
-class TestZarrArrayWrapperCalls(TestZarrKVStoreV3):
- def test_avoid_excess_metadata_calls(self) -> None:
- """Test that chunk requests do not trigger redundant metadata requests.
-
- This test targets logic in backends.zarr.ZarrArrayWrapper, asserting that calls
- to retrieve chunk data after initialization do not trigger additional
- metadata requests.
-
- https://github.com/pydata/xarray/issues/8290
- """
-
- import zarr
-
- ds = xr.Dataset(data_vars={"test": (("Z",), np.array([123]).reshape(1))})
-
- # The call to retrieve metadata performs a group lookup. We patch Group.__getitem__
- # so that we can inspect calls to this method - specifically count of calls.
- # Use of side_effect means that calls are passed through to the original method
- # rather than a mocked method.
- Group = zarr.hierarchy.Group
- with (
- self.create_zarr_target() as store,
- patch.object(
- Group, "__getitem__", side_effect=Group.__getitem__, autospec=True
- ) as mock,
- ):
- ds.to_zarr(store, mode="w")
-
- # We expect this to request array metadata information, so call_count should be >= 1,
- # At time of writing, 2 calls are made
- xrds = xr.open_zarr(store)
- call_count = mock.call_count
- assert call_count > 0
-
- # compute() requests array data, which should not trigger additional metadata requests
- # we assert that the number of calls has not increased after fetchhing the array
- xrds.test.compute(scheduler="sync")
- assert mock.call_count == call_count
-
-
@requires_zarr
@requires_fsspec
def test_zarr_storage_options() -> None:
From c93b31a9175eed9e506eb1950bf843d7de715bb9 Mon Sep 17 00:00:00 2001
From: Ryan Abernathey
Date: Thu, 30 Nov 2023 22:58:54 -0500
Subject: [PATCH 25/58] Add mode='a-': Do not overwrite coordinates when
appending to Zarr with `append_dim` (#8428)
Co-authored-by: Deepak Cherian
Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
---
doc/whats-new.rst | 4 ++--
xarray/backends/api.py | 22 ++++++++++++----------
xarray/backends/zarr.py | 21 ++++++++++++++++++---
xarray/core/dataarray.py | 18 ++++++++++++------
xarray/core/dataset.py | 12 +++++++-----
xarray/core/types.py | 3 +++
xarray/tests/test_backends.py | 25 ++++++++++++++++++++++++-
7 files changed, 78 insertions(+), 27 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index abc3760df6e..817ea2c8235 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -25,8 +25,8 @@ New Features
- Use a concise format when plotting datetime arrays. (:pull:`8449`).
By `Jimmy Westling `_.
-
-
+- Avoid overwriting unchanged existing coordinate variables when appending by setting ``mode='a-'``.
+ By `Ryan Abernathey `_ and `Deepak Cherian `_.
- :py:meth:`~xarray.DataArray.rank` now operates on dask-backed arrays, assuming
the core dim has exactly one chunk. (:pull:`8475`).
By `Maximilian Roos `_.
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 3e6d00a8059..c59f2f8d81b 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -39,6 +39,7 @@
from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk
from xarray.core.indexes import Index
from xarray.core.parallelcompat import guess_chunkmanager
+from xarray.core.types import ZarrWriteModes
from xarray.core.utils import is_remote_uri
if TYPE_CHECKING:
@@ -69,7 +70,6 @@
"NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC"
]
-
DATAARRAY_NAME = "__xarray_dataarray_name__"
DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"
@@ -1577,7 +1577,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
@@ -1601,7 +1601,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
@@ -1623,7 +1623,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
@@ -1680,16 +1680,18 @@ def to_zarr(
else:
mode = "w-"
- if mode != "a" and append_dim is not None:
+ if mode not in ["a", "a-"] and append_dim is not None:
raise ValueError("cannot set append_dim unless mode='a' or mode=None")
- if mode not in ["a", "r+"] and region is not None:
- raise ValueError("cannot set region unless mode='a', mode='r+' or mode=None")
+ if mode not in ["a", "a-", "r+"] and region is not None:
+ raise ValueError(
+ "cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
+ )
- if mode not in ["w", "w-", "a", "r+"]:
+ if mode not in ["w", "w-", "a", "a-", "r+"]:
raise ValueError(
"The only supported options for mode are 'w', "
- f"'w-', 'a' and 'r+', but mode={mode!r}"
+ f"'w-', 'a', 'a-', and 'r+', but mode={mode!r}"
)
# validate Dataset keys, DataArray names
@@ -1745,7 +1747,7 @@ def to_zarr(
write_empty=write_empty_chunks,
)
- if mode in ["a", "r+"]:
+ if mode in ["a", "a-", "r+"]:
_validate_datatypes_for_zarr_append(zstore, dataset)
if append_dim is not None:
existing_dims = zstore.get_dimensions()
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index c437c42183a..469bbf4c339 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -21,6 +21,7 @@
from xarray.core import indexing
from xarray.core.parallelcompat import guess_chunkmanager
from xarray.core.pycompat import integer_types
+from xarray.core.types import ZarrWriteModes
from xarray.core.utils import (
FrozenDict,
HiddenKeyDict,
@@ -385,7 +386,7 @@ class ZarrStore(AbstractWritableDataStore):
def open_group(
cls,
store,
- mode="r",
+ mode: ZarrWriteModes = "r",
synchronizer=None,
group=None,
consolidated=False,
@@ -410,7 +411,8 @@ def open_group(
zarr_version = getattr(store, "_store_version", 2)
open_kwargs = dict(
- mode=mode,
+ # mode='a-' is a handcrafted xarray specialty
+ mode="a" if mode == "a-" else mode,
synchronizer=synchronizer,
path=group,
)
@@ -639,8 +641,21 @@ def store(
self.set_attributes(attributes)
self.set_dimensions(variables_encoded, unlimited_dims=unlimited_dims)
+ # if we are appending to an append_dim, only write either
+ # - new variables not already present, OR
+ # - variables with the append_dim in their dimensions
+ # We do NOT overwrite other variables.
+ if self._mode == "a-" and self._append_dim is not None:
+ variables_to_set = {
+ k: v
+ for k, v in variables_encoded.items()
+ if (k not in existing_variable_names) or (self._append_dim in v.dims)
+ }
+ else:
+ variables_to_set = variables_encoded
+
self.set_variables(
- variables_encoded, check_encoding_set, writer, unlimited_dims=unlimited_dims
+ variables_to_set, check_encoding_set, writer, unlimited_dims=unlimited_dims
)
if self._consolidate_on_close:
zarr.consolidate_metadata(self.zarr_group.store)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index bac4ad36adb..935eff9fb18 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -49,7 +49,12 @@
from xarray.core.indexing import is_fancy_indexer, map_index_queries
from xarray.core.merge import PANDAS_TYPES, MergeError
from xarray.core.options import OPTIONS, _get_keep_attrs
-from xarray.core.types import DaCompatible, T_DataArray, T_DataArrayOrSet
+from xarray.core.types import (
+ DaCompatible,
+ T_DataArray,
+ T_DataArrayOrSet,
+ ZarrWriteModes,
+)
from xarray.core.utils import (
Default,
HybridMappingProxy,
@@ -4074,7 +4079,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
*,
@@ -4095,7 +4100,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
@@ -4114,7 +4119,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
@@ -4150,10 +4155,11 @@ def to_zarr(
chunk_store : MutableMapping, str or path-like, optional
Store or path to directory in local or remote file system only for Zarr
array chunks. Requires zarr-python v2.4.0 or later.
- mode : {"w", "w-", "a", "r+", None}, optional
+ mode : {"w", "w-", "a", "a-", r+", None}, optional
Persistence mode: "w" means create (overwrite if exists);
"w-" means create (fail if exists);
- "a" means override existing variables (create if does not exist);
+ "a" means override all existing variables including dimension coordinates (create if does not exist);
+ "a-" means only append those variables that have ``append_dim``.
"r+" means modify existing array *values* only (raise an error if
any metadata or shapes would change).
The default mode is "a" if ``append_dim`` is set. Otherwise, it is
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 66c83e95b77..c65bbd6b849 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -100,6 +100,7 @@
T_Chunks,
T_DataArrayOrSet,
T_Dataset,
+ ZarrWriteModes,
)
from xarray.core.utils import (
Default,
@@ -2305,7 +2306,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
@@ -2328,7 +2329,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
@@ -2349,7 +2350,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
- mode: Literal["w", "w-", "a", "r+", None] = None,
+ mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
@@ -2387,10 +2388,11 @@ def to_zarr(
chunk_store : MutableMapping, str or path-like, optional
Store or path to directory in local or remote file system only for Zarr
array chunks. Requires zarr-python v2.4.0 or later.
- mode : {"w", "w-", "a", "r+", None}, optional
+ mode : {"w", "w-", "a", "a-", r+", None}, optional
Persistence mode: "w" means create (overwrite if exists);
"w-" means create (fail if exists);
- "a" means override existing variables (create if does not exist);
+ "a" means override all existing variables including dimension coordinates (create if does not exist);
+ "a-" means only append those variables that have ``append_dim``.
"r+" means modify existing array *values* only (raise an error if
any metadata or shapes would change).
The default mode is "a" if ``append_dim`` is set. Otherwise, it is
diff --git a/xarray/core/types.py b/xarray/core/types.py
index 1be5b00c43f..90f0f94e679 100644
--- a/xarray/core/types.py
+++ b/xarray/core/types.py
@@ -282,3 +282,6 @@ def copy(
"midpoint",
"nearest",
]
+
+
+ZarrWriteModes = Literal["w", "w-", "a", "a-", "r+", "r"]
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index d60daefc728..062f5de7d20 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -2390,6 +2390,29 @@ def test_append_with_new_variable(self) -> None:
xr.open_dataset(store_target, engine="zarr", **self.version_kwargs),
)
+ def test_append_with_append_dim_no_overwrite(self) -> None:
+ ds, ds_to_append, _ = create_append_test_data()
+ with self.create_zarr_target() as store_target:
+ ds.to_zarr(store_target, mode="w", **self.version_kwargs)
+ original = xr.concat([ds, ds_to_append], dim="time")
+ original2 = xr.concat([original, ds_to_append], dim="time")
+
+ # overwrite a coordinate;
+ # for mode='a-', this will not get written to the store
+ # because it does not have the append_dim as a dim
+ ds_to_append.lon.data[:] = -999
+ ds_to_append.to_zarr(
+ store_target, mode="a-", append_dim="time", **self.version_kwargs
+ )
+ actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
+ assert_identical(original, actual)
+
+ # by default, mode="a" will overwrite all coordinates.
+ ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs)
+ actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
+ original2.lon.data[:] = -999
+ assert_identical(original2, actual)
+
@requires_dask
def test_to_zarr_compute_false_roundtrip(self) -> None:
from dask.delayed import Delayed
@@ -2586,7 +2609,7 @@ def setup_and_verify_store(expected=data):
with pytest.raises(
ValueError,
match=re.escape(
- "cannot set region unless mode='a', mode='r+' or mode=None"
+ "cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
),
):
data.to_zarr(
From 4550a01c9dca27dd043d734bab1a78ef972be68b Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Fri, 1 Dec 2023 19:52:10 +0100
Subject: [PATCH 26/58] Add expand_dims (#8407)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
---
xarray/core/variable.py | 2 +-
xarray/namedarray/_array_api.py | 52 +++++++++++++++++++++++++++++++++
xarray/namedarray/_typing.py | 14 +++++++++
xarray/namedarray/core.py | 5 ++--
xarray/namedarray/utils.py | 15 +---------
xarray/tests/test_namedarray.py | 10 +++++--
6 files changed, 78 insertions(+), 20 deletions(-)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index d9102dc9e0a..3add7a1441e 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -2596,7 +2596,7 @@ def _as_sparse(self, sparse_format=_default, fill_value=_default) -> Variable:
"""
Use sparse-array as backend.
"""
- from xarray.namedarray.utils import _default as _default_named
+ from xarray.namedarray._typing import _default as _default_named
if sparse_format is _default:
sparse_format = _default_named
diff --git a/xarray/namedarray/_array_api.py b/xarray/namedarray/_array_api.py
index e205c4d4efe..b5c320e0b96 100644
--- a/xarray/namedarray/_array_api.py
+++ b/xarray/namedarray/_array_api.py
@@ -7,7 +7,11 @@
import numpy as np
from xarray.namedarray._typing import (
+ Default,
_arrayapi,
+ _Axis,
+ _default,
+ _Dim,
_DType,
_ScalarType,
_ShapeType,
@@ -144,3 +148,51 @@ def real(
xp = _get_data_namespace(x)
out = x._new(data=xp.real(x._data))
return out
+
+
+# %% Manipulation functions
+def expand_dims(
+ x: NamedArray[Any, _DType],
+ /,
+ *,
+ dim: _Dim | Default = _default,
+ axis: _Axis = 0,
+) -> NamedArray[Any, _DType]:
+ """
+ Expands the shape of an array by inserting a new dimension of size one at the
+ position specified by dims.
+
+ Parameters
+ ----------
+ x :
+ Array to expand.
+ dim :
+ Dimension name. New dimension will be stored in the axis position.
+ axis :
+ (Not recommended) Axis position (zero-based). Default is 0.
+
+ Returns
+ -------
+ out :
+ An expanded output array having the same data type as x.
+
+ Examples
+ --------
+ >>> x = NamedArray(("x", "y"), nxp.asarray([[1.0, 2.0], [3.0, 4.0]]))
+ >>> expand_dims(x)
+
+ Array([[[1., 2.],
+ [3., 4.]]], dtype=float64)
+ >>> expand_dims(x, dim="z")
+
+ Array([[[1., 2.],
+ [3., 4.]]], dtype=float64)
+ """
+ xp = _get_data_namespace(x)
+ dims = x.dims
+ if dim is _default:
+ dim = f"dim_{len(dims)}"
+ d = list(dims)
+ d.insert(axis, dim)
+ out = x._new(dims=tuple(d), data=xp.expand_dims(x._data, axis=axis))
+ return out
diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py
index 0b972e19539..670a2076eb1 100644
--- a/xarray/namedarray/_typing.py
+++ b/xarray/namedarray/_typing.py
@@ -1,10 +1,12 @@
from __future__ import annotations
from collections.abc import Hashable, Iterable, Mapping, Sequence
+from enum import Enum
from types import ModuleType
from typing import (
Any,
Callable,
+ Final,
Protocol,
SupportsIndex,
TypeVar,
@@ -15,6 +17,14 @@
import numpy as np
+
+# Singleton type, as per https://github.com/python/typing/pull/240
+class Default(Enum):
+ token: Final = 0
+
+
+_default = Default.token
+
# https://stackoverflow.com/questions/74633074/how-to-type-hint-a-generic-numpy-array
_T = TypeVar("_T")
_T_co = TypeVar("_T_co", covariant=True)
@@ -49,6 +59,10 @@ def dtype(self) -> _DType_co:
_ShapeType = TypeVar("_ShapeType", bound=Any)
_ShapeType_co = TypeVar("_ShapeType_co", bound=Any, covariant=True)
+_Axis = int
+_Axes = tuple[_Axis, ...]
+_AxisLike = Union[_Axis, _Axes]
+
_Chunks = tuple[_Shape, ...]
_Dim = Hashable
diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py
index 002afe96358..b9ad27b6679 100644
--- a/xarray/namedarray/core.py
+++ b/xarray/namedarray/core.py
@@ -25,6 +25,7 @@
_arrayapi,
_arrayfunction_or_api,
_chunkedarray,
+ _default,
_dtype,
_DType_co,
_ScalarType_co,
@@ -33,13 +34,14 @@
_SupportsImag,
_SupportsReal,
)
-from xarray.namedarray.utils import _default, is_duck_dask_array, to_0d_object_array
+from xarray.namedarray.utils import is_duck_dask_array, to_0d_object_array
if TYPE_CHECKING:
from numpy.typing import ArrayLike, NDArray
from xarray.core.types import Dims
from xarray.namedarray._typing import (
+ Default,
_AttrsLike,
_Chunks,
_Dim,
@@ -52,7 +54,6 @@
_ShapeType,
duckarray,
)
- from xarray.namedarray.utils import Default
try:
from dask.typing import (
diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py
index 03eb0134231..4bd20931189 100644
--- a/xarray/namedarray/utils.py
+++ b/xarray/namedarray/utils.py
@@ -2,12 +2,7 @@
import sys
from collections.abc import Hashable
-from enum import Enum
-from typing import (
- TYPE_CHECKING,
- Any,
- Final,
-)
+from typing import TYPE_CHECKING, Any
import numpy as np
@@ -31,14 +26,6 @@
DaskCollection: Any = NDArray # type: ignore
-# Singleton type, as per https://github.com/python/typing/pull/240
-class Default(Enum):
- token: Final = 0
-
-
-_default = Default.token
-
-
def module_available(module: str) -> bool:
"""Checks whether a module is installed without importing it.
diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py
index deeb5ce753a..c75b01e9e50 100644
--- a/xarray/tests/test_namedarray.py
+++ b/xarray/tests/test_namedarray.py
@@ -10,9 +10,13 @@
import pytest
from xarray.core.indexing import ExplicitlyIndexed
-from xarray.namedarray._typing import _arrayfunction_or_api, _DType_co, _ShapeType_co
+from xarray.namedarray._typing import (
+ _arrayfunction_or_api,
+ _default,
+ _DType_co,
+ _ShapeType_co,
+)
from xarray.namedarray.core import NamedArray, from_array
-from xarray.namedarray.utils import _default
if TYPE_CHECKING:
from types import ModuleType
@@ -20,13 +24,13 @@
from numpy.typing import ArrayLike, DTypeLike, NDArray
from xarray.namedarray._typing import (
+ Default,
_AttrsLike,
_DimsLike,
_DType,
_Shape,
duckarray,
)
- from xarray.namedarray.utils import Default
class CustomArrayBase(Generic[_ShapeType_co, _DType_co]):
From abd2068bca8da1e1069790bb47d97b8843260d60 Mon Sep 17 00:00:00 2001
From: Michael Niklas
Date: Fri, 1 Dec 2023 23:02:20 +0100
Subject: [PATCH 27/58] Update to mypy1.7 (#8501)
* fix mypy1.7 errors
* pin mypy to <1.8
* add entry to whats-new
---
.github/workflows/ci-additional.yaml | 4 ++--
doc/whats-new.rst | 2 ++
xarray/core/alignment.py | 4 ++--
xarray/core/dataset.py | 4 ++--
4 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index 43f13f03133..cd6edcf7b3a 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -117,7 +117,7 @@ jobs:
python xarray/util/print_versions.py
- name: Install mypy
run: |
- python -m pip install "mypy<1.7" --force-reinstall
+ python -m pip install "mypy<1.8" --force-reinstall
- name: Run mypy
run: |
@@ -171,7 +171,7 @@ jobs:
python xarray/util/print_versions.py
- name: Install mypy
run: |
- python -m pip install "mypy<1.7" --force-reinstall
+ python -m pip install "mypy<1.8" --force-reinstall
- name: Run mypy
run: |
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 817ea2c8235..14869b3a1ea 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -79,6 +79,8 @@ Internal Changes
- :py:meth:`DataArray.bfill` & :py:meth:`DataArray.ffill` now use numbagg by
default, which is up to 5x faster where parallelization is possible. (:pull:`8339`)
By `Maximilian Roos `_.
+- Update mypy version to 1.7 (:issue:`8448`, :pull:`8501`).
+ By `Michael Niklas `_.
.. _whats-new.2023.11.0:
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index 041fe63a9f3..28857c2d26e 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -681,7 +681,7 @@ def align(
...
-def align( # type: ignore[misc]
+def align(
*objects: T_Alignable,
join: JoinOptions = "inner",
copy: bool = True,
@@ -1153,7 +1153,7 @@ def broadcast(
...
-def broadcast( # type: ignore[misc]
+def broadcast(
*args: T_Alignable, exclude: str | Iterable[Hashable] | None = None
) -> tuple[T_Alignable, ...]:
"""Explicitly broadcast any number of DataArray or Dataset objects against
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index c65bbd6b849..5d19265e56d 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -7979,8 +7979,8 @@ def sortby(
variables = variables
arrays = [v if isinstance(v, DataArray) else self[v] for v in variables]
aligned_vars = align(self, *arrays, join="left")
- aligned_self = aligned_vars[0]
- aligned_other_vars: tuple[DataArray, ...] = aligned_vars[1:]
+ aligned_self = cast("Self", aligned_vars[0])
+ aligned_other_vars = cast(tuple[DataArray, ...], aligned_vars[1:])
vars_by_dim = defaultdict(list)
for data_array in aligned_other_vars:
if data_array.ndim != 1:
From 5213f0d63465eac228822fb7299046e0c6701acc Mon Sep 17 00:00:00 2001
From: Michael Niklas
Date: Fri, 1 Dec 2023 23:02:38 +0100
Subject: [PATCH 28/58] change type of curvefit's p0 and bounds to mapping
(#8502)
* change type of curvefit's p0 and bounds to mapping
* add entry to whats-new
---
doc/whats-new.rst | 4 +++-
xarray/core/dataarray.py | 4 ++--
xarray/core/dataset.py | 4 ++--
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 14869b3a1ea..102a64af433 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -58,7 +58,9 @@ Bug fixes
- Fix dtype inference for ``pd.CategoricalIndex`` when categories are backed by a ``pd.ExtensionDtype`` (:pull:`8481`)
- Fix writing a variable that requires transposing when not writing to a region (:pull:`8484`)
By `Maximilian Roos `_.
-
+- Static typing of ``p0`` and ``bounds`` arguments of :py:func:`xarray.DataArray.curvefit` and :py:func:`xarray.Dataset.curvefit`
+ was changed to ``Mapping`` (:pull:`8502`).
+ By `Michael Niklas `_.
Documentation
~~~~~~~~~~~~~
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 935eff9fb18..1d7e82d3044 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -6220,8 +6220,8 @@ def curvefit(
func: Callable[..., Any],
reduce_dims: Dims = None,
skipna: bool = True,
- p0: dict[str, float | DataArray] | None = None,
- bounds: dict[str, tuple[float | DataArray, float | DataArray]] | None = None,
+ p0: Mapping[str, float | DataArray] | None = None,
+ bounds: Mapping[str, tuple[float | DataArray, float | DataArray]] | None = None,
param_names: Sequence[str] | None = None,
errors: ErrorOptions = "raise",
kwargs: dict[str, Any] | None = None,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 5d19265e56d..d010bfbade0 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -9622,8 +9622,8 @@ def curvefit(
func: Callable[..., Any],
reduce_dims: Dims = None,
skipna: bool = True,
- p0: dict[str, float | DataArray] | None = None,
- bounds: dict[str, tuple[float | DataArray, float | DataArray]] | None = None,
+ p0: Mapping[str, float | DataArray] | None = None,
+ bounds: Mapping[str, tuple[float | DataArray, float | DataArray]] | None = None,
param_names: Sequence[str] | None = None,
errors: ErrorOptions = "raise",
kwargs: dict[str, Any] | None = None,
From 2f0091393d187e26515ce1274a32caf1074415db Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Fri, 1 Dec 2023 21:52:49 -0800
Subject: [PATCH 29/58] Fully deprecate `.drop` (#8497)
* Fully deprecate `.drop`
I think it's time...
---
doc/whats-new.rst | 4 ++++
xarray/core/dataset.py | 22 ++++++++++------------
xarray/tests/test_dataset.py | 12 ++++++------
3 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 102a64af433..2722ca0b38a 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -51,6 +51,10 @@ Deprecations
currently ``PendingDeprecationWarning``, which are silenced by default. We'll
convert these to ``DeprecationWarning`` in a future release.
By `Maximilian Roos `_.
+- :py:meth:`Dataset.drop` &
+ :py:meth:`DataArray.drop` are now deprecated, since pending deprecation for
+ several years. :py:meth:`DataArray.drop_sel` & :py:meth:`DataArray.drop_var`
+ replace them for labels & variables respectively.
Bug fixes
~~~~~~~~~
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index d010bfbade0..b8093d3dd78 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -111,6 +111,7 @@
decode_numpy_dict_values,
drop_dims_from_indexers,
either_dict_or_kwargs,
+ emit_user_level_warning,
infix_dims,
is_dict_like,
is_scalar,
@@ -5944,10 +5945,9 @@ def drop(
raise ValueError('errors must be either "raise" or "ignore"')
if is_dict_like(labels) and not isinstance(labels, dict):
- warnings.warn(
- "dropping coordinates using `drop` is be deprecated; use drop_vars.",
- FutureWarning,
- stacklevel=2,
+ emit_user_level_warning(
+ "dropping coordinates using `drop` is deprecated; use drop_vars.",
+ DeprecationWarning,
)
return self.drop_vars(labels, errors=errors)
@@ -5957,10 +5957,9 @@ def drop(
labels = either_dict_or_kwargs(labels, labels_kwargs, "drop")
if dim is None and (is_scalar(labels) or isinstance(labels, Iterable)):
- warnings.warn(
- "dropping variables using `drop` will be deprecated; using drop_vars is encouraged.",
- PendingDeprecationWarning,
- stacklevel=2,
+ emit_user_level_warning(
+ "dropping variables using `drop` is deprecated; use drop_vars.",
+ DeprecationWarning,
)
return self.drop_vars(labels, errors=errors)
if dim is not None:
@@ -5972,10 +5971,9 @@ def drop(
)
return self.drop_sel({dim: labels}, errors=errors, **labels_kwargs)
- warnings.warn(
- "dropping labels using `drop` will be deprecated; using drop_sel is encouraged.",
- PendingDeprecationWarning,
- stacklevel=2,
+ emit_user_level_warning(
+ "dropping labels using `drop` is deprecated; use `drop_sel` instead.",
+ DeprecationWarning,
)
return self.drop_sel(labels, errors=errors)
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index a53d81e36af..37ddcf2786a 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -2651,19 +2651,19 @@ def test_drop_variables(self) -> None:
# deprecated approach with `drop` works (straight copy paste from above)
- with pytest.warns(PendingDeprecationWarning):
+ with pytest.warns(DeprecationWarning):
actual = data.drop("not_found_here", errors="ignore")
assert_identical(data, actual)
- with pytest.warns(PendingDeprecationWarning):
+ with pytest.warns(DeprecationWarning):
actual = data.drop(["not_found_here"], errors="ignore")
assert_identical(data, actual)
- with pytest.warns(PendingDeprecationWarning):
+ with pytest.warns(DeprecationWarning):
actual = data.drop(["time", "not_found_here"], errors="ignore")
assert_identical(expected, actual)
- with pytest.warns(PendingDeprecationWarning):
+ with pytest.warns(DeprecationWarning):
actual = data.drop({"time", "not_found_here"}, errors="ignore")
assert_identical(expected, actual)
@@ -2736,9 +2736,9 @@ def test_drop_labels_by_keyword(self) -> None:
ds5 = data.drop_sel(x=["a", "b"], y=range(0, 6, 2))
arr = DataArray(range(3), dims=["c"])
- with pytest.warns(FutureWarning):
+ with pytest.warns(DeprecationWarning):
data.drop(arr.coords)
- with pytest.warns(FutureWarning):
+ with pytest.warns(DeprecationWarning):
data.drop(arr.xindexes)
assert_array_equal(ds1.coords["x"], ["b"])
From 1acdb5e6128c82a4d537ea704ca400cb94d0780c Mon Sep 17 00:00:00 2001
From: "Gregorio L. Trevisan"
Date: Sat, 2 Dec 2023 05:53:10 -0800
Subject: [PATCH 30/58] Fix docstrings for `combine_by_coords` (#8471)
* Fix docstrings for combine_by_coords
Update default for `combine_attrs` parameter.
* add whats-new
---------
Co-authored-by: Michael Niklas
---
doc/whats-new.rst | 3 ++-
xarray/core/combine.py | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 2722ca0b38a..bfe0a1b9be5 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -74,10 +74,11 @@ Documentation
This is the recommended technique to replace the use of the deprecated ``loffset`` parameter
in ``resample`` (:pull:`8479`).
By `Doug Latornell `_.
-
- Improved error message when attempting to get a variable which doesn't exist from a Dataset.
(:pull:`8474`)
By `Maximilian Roos `_.
+- Fix default value of ``combine_attrs `` in :py:func:`xarray.combine_by_coords` (:pull:`8471`)
+ By `Gregorio L. Trevisan `_.
Internal Changes
~~~~~~~~~~~~~~~~
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index eecd01d011e..1939e2c7d0f 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -739,7 +739,7 @@ def combine_by_coords(
dimension must have the same size in all objects.
combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
- "override"} or callable, default: "drop"
+ "override"} or callable, default: "no_conflicts"
A callable or a string indicating how to combine attrs of the objects being
merged:
From d44bfd7aecf155710402a8ea277ca50f9db64e3e Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
Date: Sat, 2 Dec 2023 14:42:17 -0800
Subject: [PATCH 31/58] roll out the new/refreshed Xarray logo (#8505)
---
doc/_static/dataset-diagram-build.sh | 2 -
doc/_static/dataset-diagram-logo.pdf | Bin 13358 -> 0 bytes
doc/_static/dataset-diagram-logo.png | Bin 117124 -> 0 bytes
doc/_static/dataset-diagram-logo.svg | 484 ------------------
doc/_static/dataset-diagram-logo.tex | 283 ----------
doc/_static/dataset-diagram-square-logo.png | Bin 183796 -> 0 bytes
doc/_static/dataset-diagram-square-logo.tex | 277 ----------
doc/_static/dataset-diagram.tex | 270 ----------
doc/_static/favicon.ico | Bin 4286 -> 0 bytes
doc/_static/logos/Xarray_Icon_Final.png | Bin 0 -> 42939 bytes
doc/_static/logos/Xarray_Icon_Final.svg | 29 ++
...Xarray_Logo_FullColor_InverseRGB_Final.png | Bin 0 -> 88561 bytes
...Xarray_Logo_FullColor_InverseRGB_Final.svg | 54 ++
doc/_static/logos/Xarray_Logo_RGB_Final.png | Bin 0 -> 88862 bytes
doc/_static/logos/Xarray_Logo_RGB_Final.svg | 54 ++
doc/conf.py | 8 +-
doc/gallery.yml | 6 +-
17 files changed, 144 insertions(+), 1323 deletions(-)
delete mode 100755 doc/_static/dataset-diagram-build.sh
delete mode 100644 doc/_static/dataset-diagram-logo.pdf
delete mode 100644 doc/_static/dataset-diagram-logo.png
delete mode 100644 doc/_static/dataset-diagram-logo.svg
delete mode 100644 doc/_static/dataset-diagram-logo.tex
delete mode 100644 doc/_static/dataset-diagram-square-logo.png
delete mode 100644 doc/_static/dataset-diagram-square-logo.tex
delete mode 100644 doc/_static/dataset-diagram.tex
delete mode 100644 doc/_static/favicon.ico
create mode 100644 doc/_static/logos/Xarray_Icon_Final.png
create mode 100644 doc/_static/logos/Xarray_Icon_Final.svg
create mode 100644 doc/_static/logos/Xarray_Logo_FullColor_InverseRGB_Final.png
create mode 100644 doc/_static/logos/Xarray_Logo_FullColor_InverseRGB_Final.svg
create mode 100644 doc/_static/logos/Xarray_Logo_RGB_Final.png
create mode 100644 doc/_static/logos/Xarray_Logo_RGB_Final.svg
diff --git a/doc/_static/dataset-diagram-build.sh b/doc/_static/dataset-diagram-build.sh
deleted file mode 100755
index 1e69d454ff6..00000000000
--- a/doc/_static/dataset-diagram-build.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-pdflatex -shell-escape dataset-diagram.tex
diff --git a/doc/_static/dataset-diagram-logo.pdf b/doc/_static/dataset-diagram-logo.pdf
deleted file mode 100644
index 0ef2b1247ebb5a158ac50698ff3b60bdcc0bd21d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 13358
zcmbWe1#DbPlQtYXW@d&lGcz+YGcz+YL(I&~cFYhnGc&UtCuVlc-`xA&`)j3LX;=Sf
zG^*~ZuIHK7>2tcP>X0dliqkRCv%!$fEe)^1umczY4#w6nyu2_BvS#)cu9g6Hc1AY9
z-yawTaVuL_GiLyUxUG?^nW&kGgQ*z|KR=9%tFxJr9gOG3aIQ=pXskd6&-i(
z)u?))MBh2yH0Z};iy
ztidTq0LNMWL$#M5et4O7`0+zaW@i;)_-ftlae%?u?EBkBz#kbsw@KcvkDHC7!(O)*
z*Tw6q>i76|tBBRBR|SN_m%6jCYIFV1@=t9BCOA8`SUtX;toFK%!mE*VO0Uf7p4_sV
z2fx%&r|$bTmvueU)X&$4XM(5JE9WSqt;n3L~)`=
zjFo&zEc1yk57zI^Lp!1f$r~hGsQ6Y`Rb_S^YuRH)%08qPMI^B@%k+M>vCE7WR)D~0
z$NHaT)~9B9m2Y4UeL9?)GudPD*AQJ{A;%=h&(J%8ZI1O_9EtgR@Sd=clRl__cPTb@
zv8e;Q3W)J)%qlk8ZQB2T4Q}p?duZfej3Y_^+x-PHEY@w048KL
z&;SRPYz*7v0lF6G8jK65><230h)!U|W|q4~*FLcIlK6HpN|tvaM>SSiZI1l4;%?I0
zt<#(|k2~th9w_1*{Oy)wCsC?Lo6eDRi)dx6ap1sX>)bP|8XT4)%ha9vl1QUY(o9dQ
zJ1(;1^y(PzCz-Z?^B2pjdk0oJ%ExUbs~ZW*$SK|T
z`Fob;k)R(psiKJ?{v1LK0;is_sG=m|3r!|*8$z4C=r8E$V2iG&8q}cY>u^X|9
zUV0a5Zd**wf;k>KR;Py3)K?)FL);(wo2kB%x!hkef?7shSJ6}!!5U)7lC;?K1dX)o
z)p$njz%z_LPr(`NamiJ+S7ml&<)M>HtX(Mu8~V(6qR$F7oMk0dG{Mw(Cbzm0zl<34
zZ=#$(K~;)j&$8%b1rd(UC6_HwhnXioV6MLVDYR;Q?jj`w27TJExj;|S
zQIp@*(CYt!$Cr(g`K}72k|%nOB&~Z2bTn}m)pKVk?Q2+s}t0JC2Pj?#fa{@tLJWm;!9rFgl*bm}h97Mw~@0dwK1
ztfMlV3x_=8v>m5>gBK&08z$^vY>N^(yh#;73?}pdYRekkjHV7325o8xne)Z_Yg6W_
z_!!L4fut^nw%BjlFcQ<_yLQ|Ry66V4<>e2F-e*Fo$LC@?X}&xy-H2ZD@iMxYGk?1#
zRhzrnKt_sQO!!nq7V~>C86v)i
zo1{U2uAsQe^66Z3R9my-HE}8)&>CUdrInqkcm6mz4Hw~(`3hoV#PVdCMmWlf`tuLc
z%tGm-OA9|pd=vS3PE9TW(G)i9S&2Sh@op$9D#i+7cvAZ8HA}n|g={KdV4<|i4T-ZX
zl~FGQN?Mx1<1$tY7k8s|Ac3HV~N6xV4z+05fI27TO
zN%!W>eMB}IEp}<*755t)1GKt)r$KQ%voV+{fW9nsrb&j5042j=m
zK@9e^kf
zzvGc%L4EzHjR+$~BNkPxsI7}ogM%&27uI;d+h9#b3B2V+vj9EhFR4;e%%D}Pdn>CH
zrf{m%Xs=6g_cF@Xf_PH`MZ7TO17{$j{3%rm$_a3yc`7RvrE{p%zn_*}4ZD%DRazt8M&Y^!{_!Ieo3c$WWsJE8zdvld9T
z*Q#==?9_J%DltW2K7(6&A&sr@7bT_+zI+&Ms|J?LnoQ3z;a
z#W#Px$N%feHqsQUzB$j*9g5VSlR8DJ@4&lg%_u=PyU!9ElD3v2I(LeT1dFk0G}xX2
z52X!UmeingNplS(ZGh-zQ<@5BQ%#X<9wBtVk*ds$psB9#GZG&H_|P!LtFOtMLgx8_
z2+`gG_cd)f`A<(W8;2;U`G>n;8zw^3W}DEYy#X{VPoJ_qpxxbm4mes{ft3dgrOiWx
zbTF{=L9}5IaPw78dIaR#nba6U$kE1E@oG*3CoZL5M{*$jzL`RT<>|PhwRDEf=Z_T)
z315M?=7YAB1!g!+b-ACBc)^joiqhkphFWY<<3^D`wt$W%tE604i6Z~3X@@^YM@!Z9
zkLR59KQo5}$}`|4y2wD%bzG3sZhaBi5B$N0-yCz0s*WNDKX|ycC{)dg
zousbRb--MdK;)>4J79@kMng{wlUW0epFz(8`RhV>obxGUzG|~}WI*PC0uQz~IVDZ0
zqwyB`Fu45(dy+CSf`%$z5fKPzSbU8sT5W|pEix5Y6itJ&Q)|)0ym*1B+)jaGCY2xq
z=F0CaSouWno*bin(g#%ep$Fq+Wv3qtRfSyrqPh&MR@mV*)i^k)phFMDDkTGs!)hXl
z!I_R%j8@nqG<3MpDLIumc=Dr`b+LuP7lkvig>R3FA95uw)Uw?TYb|nYZYKBah8(Ig
z#ReL@6m`)eL;m~UqE~$jFJm=yb7}yhr{(&J=cko8Bek*mAFw)5;WSd9vV-ZU%c4!F
zgX+OI(MdyTFD*l*)g4|+JaG1Y${grWnke8uU@|R7Rc<-+qcW>EVfzo0!aJ%?9?IER
zPgtw5vj!CD{*UefbQm9x*wuBFw#W%Ij-?EeC4i&4MM|q0L06
z54HwF+ktUu>^JLHS<~js4JBxwh*;h<+B2lgLnJn(K7FfI_1TXkkEVwTy>qTS2{9I(
ze@5HPVsb7M?kL8bo#S|x#R5+a+y9HfEb0cM@^aW0mr83rtT@X7$z{xeZ|So~e6yB=
zlIR&6G!V-Ne0cMriN%7POzGbKpD~==y?C2k&SeAKB^n~0PJ*bCGpd}-`UpC(63vN6
zwqYzTl~wJIqNq~VQDj0RlO6qv;X>N^acY8PQ7S(46pP!?sbb9Bk17SuH&X^35B
zNoXsW;j+)yI`mRiLrCV)ko(M1%532P6@HX-nWQQvhZP%(xY2(6_$4eh@h`YZ$CVCG
zyu{fbrnCt0`j$Vo?cVA3E!v^U6nTpaToVUcR-g&VdzfuL<>yV|(RqJSh
zzx~o>Mg;eRwrD2vxk~wVfKN_Fw)jOvd$kCk7%^1pVLqqb$A$LINZBfnAwd;?6tek5
zy+W+B113P#{wg10?sd_>CrU5h^`o-(D(>5B!4nOQ5fF6rB6p|c$)mgSDMEhW<(a&w
zmXKoN;!v=6Alr=sIqJz
z5dWh5T^mwj5-PEyQYVl*jhb?;!_TmFR^$suzd>N{9YY_d4hZJyJrzV?@^U8ECC>SM
zx9s~U_KNOdPzU&?E$wJ}PfWfNaX?}V%;!FpPOIIi!2e}3?KvK*kn35@0
z+&Xfw=EbQDT7t_~++}K>crWUVGdo+6OeYko0Aa5jFcWbG*u_pZt@=mB{22XAy2Ea;Dty+GtX<5#CyS)X$Ql
z<}5{KiAzt^4AnlVPvp!3-5sP!S!555)Y!l%gKb_jZIXA{wewhd+X%G)fQ?2mCb|ox
z+JIr;D8lnw_c6R?HZYiQK=+u&VR(@k`G(}s(AEWF0}LXFbPey^Ley+$k0IV*_)PL?
zb+V~DAdvE(Oa!OKoiIc9E;!k@PSd8jFDRh7blaKFBP4rs@+}!{ZZW8~$uv*<28jNq
z&{AIfu9apNl4X>4zj#+W)9vp$404zJWtMKd%PzW!waGtvXQ@F;>7OI{R&w*1LaQHZ
zacA{lPZ^7gbpWZ;OrOr+XPDnn9X*Qj?rzo3RD6EB
z0wVFu=qrdGZkDaPqQVMz^(h3$R4K`N+n3YOd{RBa-p%EX^6YLn$b3#c%HApEioix?
zI`kas2~DYl*Uuea!=67pcP-uEJj!dkUP~T)0jP?}ML~EoSdf_QIxIKru#C%X1Z-TV
z$Bw5NqEpksvJcY|(}~_%(?o}n>so=89q+#=>;obcr^85oT1d$)!%NmxUOz!?>oOX<
zV;E9<=IrqhQuHCiNf73j68s=KGX5e@_J7ICGNV>mJ$5
zi9<+OKcYX42qNs00g)=TG(lt1m+{!*fR3j}n1Rl+U?o@`OGdb%81+}C!AWp9m1Q{o
zSTf=y4*$)J^Dkx;H9%%^%YQL*{+rnn`d`fGP62(MTseaS(C}E5w*#I|jX0;8^C?j
zSk~o~w=|^8c{Z!z)MCQ`%961ZCYoJU8!!;805p2|{Y5*=30MeLBs6+tql&$Kdq#Hq
zuab1={qIPK9*2PtzDq^OpfCiN;m>gQ6-5J_{e#<-`5**p%8cZm&A1~h^{5zV{0wjq
zokqNkDWTJEd-Agqt17AAFFf0#58hM9zrZq?lEj$hU-uuA+H=Wls!h-38sOz@$f>oN
zJKAKXw}h~%y00?IN|voq&(*VA``J$%#x~%Tv5vOWs%AM=j8uNh2B{VjcgdI(3d?LQ
z2AzFZ>84W;iTuf!9s<*3Yf-msV)|_vP{cKLj1>O+3_hnP4ku{LNKzyR3wr-c>_h9R
zy=CPal14+DVmL6|NJvcM42-IIjGVIAij4?!-2$U(;k9F$lu|ob)w9>=*sv1gT&S6-
zkMku)(GQtsSSzCodbJU)E>}#m+fFm~q#;LUM#h^ga=WbAXU!KRmAjR3Vo3G0Qg4Z&
z9DOg7mRV+b%GhtG6AMGF&Wm6=xAN7TMvuj`etlg2cG8p}`Y}9hx7zHb8*s9Hpl@p5>c-RnCz4ho
zW%q7V^S*b}
zYb7y(OtzF0D#C&%#`SkOy_xm9Tty+6<Nxw
zew8@#=QVVCGJAh-VWIoP&tJ&Tf4jR`br5vKH+omN%Zg~JwzF|611=XlgWicO?<<5?
zf&Jc3>OJdKw;p#JzikZV0KFB3ZPv{&|Ch&O#LxMaIy0X2x_pD(kB<)j)3-v00H62g
zh|kv>hTV_5Ny6TC8!>@q{om<*zog&TBJuF88$b6pB%Zo6+tm2|UdJIf1=2ng2Vow!
z&Izr3V2AxG#QiGn%4Y~S1eOc}KHo>nKKAPb+#e7>W%&y}1Vjn1;Xc?TKPDf}-t*sQ
z>5j6}KHeFgVfMemnAw~Dlh*-Cf8~UjSpQpchzY>N&dm02s)!lD#KiIcwO9a*|0)1X
zT&!IG*BsJ!RX<+UXB>}Yelld?YFhz2ETbt!+V5#M%wAS-BD6YOGkAx#qg+iVIsMJ6
zGTUgi4_S)(+8bydPBM+DZ79+*mD!yK>NM_6eu~h3Nj|eNbgxdYHf;jt{3lPo`6sWw
z`RTi}LzbmTs$WU_?j@V?uj)XJ>K~a2GP+#HdwYG
z;&?(x#1QsSaJIHzR{Q&fw!wa&D04+g6I=Dc+#@o#)z{~=QR)i*c!M7hRjvSoESr*G
z4Mtv|1S`jm7QmwvMiP`D27nGD?VZxRO{w%j()Iw5u95MGGKoVQ9pS=)5Mf9-!!kmk
zg2C3dK>EPw#nDTZY{MtRK#>YcpkkPfBxz$L(G$oLh)vO?m8LiZsQ^SoIp*Gis9%PS
zWXaW`49}56l@n4(vEnHqOvXaN#8eogq^6(nkEC(_`77^fr|C*+JzCrC-DQi+Hc{xH=KA_E{pi$*hr7GdFp>&)jq
zbN63WMU*JZ*2a*h=sU0#u!b2T+q%`lC!>~SZlR!Qw5ij6qaYCaLT@ocpC+|yI1g26
zizL&)@-qfXQg$x`FESCz%?3n>g$m>>UM}DZcGAJG|7}=>bq(1k1cRbK~k8je)R1&7e!$x
zdi{#r%w*;E79U21J?eVT)y1lNG-et>$gxtWKXTf0?!s@61$+B9k($@3=&L&fn(NMZ
zU(zQVAQ7_d4tEk&q=s?@93m`HV!;dX*dqqPr#`cH->gnRaXj$IhoVc7=aJ~mAJ4xF
zfebPynT}L6z|r=zK(U3s`~WZ7j+NM
z`Jd?>)4=${OTSv5$IZSE6S?Q$iy%7udA{Q2Y8t9IE23+6wVmL-DgL!Eq=rjgnswv2
zlL^-?Lt~@nYCR$D&o=#RX1!4JpzX7{Tvl#P7r&y3W^yxLH;uzVllQ1`WS!F4)uJc);$8JeEyG9L*=%@xw{1tvN4rW|
zsg6g%ewNt<8{;1@y_m@I;)c#9J|C^0r6cd|L?pcNm6wKh(;>w|(AxB0TPfz_Q26|o
zYfG!5GTZrPIuT=Cy9Yd~!7$I+7oW@@{FsxyUhlj=>Ls#fm_ovRP9W4KyxXmd@acn8
zLI%ko!Y{@ujgG{$XUJoN51+}ilCNF5?;H0&E}4SjYk+ZZ?#JzJ!i>yPeFofB0W+_i
zy@;D}5#TH4A6$#!O>GCCHw_yvHyvmuUCXbdk=<%e
zv@5cuub2-r-w?)=V(s~$sFw74IZP!49`L7he00#ZEgIkXCyyH$TO{i;42Ic1+7EoL
z-Z5>DsDl?Dx=c`JX(}hp;z;lnGb94H=gP_#M``xXs=q=UoXSPJEdMFP72y~OMI-y!
zp*w&3;=R>=+?@MfzCzQS=XK93za8N;%i8jX93Ok#x@YRopkGnVpuh#ZpR;MQz4pfk
zoY5R0qN`ZRuZX?7j9A-&a?rn^+T~i?f3w2v&s}0Ttp*$EAFbrH>Z_zw44*vA?@iCj
zyT=Txi@5e~uc=lo59$Gb3>)joyYJtYg>zFLAF-KV+EsFTwFJFvxYKN(O4#Z9Idn7y
z0CHX0nBCec-*4#abQv4$Hq*qu_xsb9<4LKvU8WXln7yJbaY=1&Ugh4M%SyZP^itJR
zNchMENQ*5`epIR~Hf_#8a-~{sa^`v6PMR`SpXSWZE&3kztaMiQLe=K`
zBkb~Ye!6a8*v$Gk?Pe*=mQb+X9qnD^-gn?;B0k&}@N_>RVyDHa>w9f%Yf*aYv3SnE
z?-_A90Q?xWbtWw1F+^??#N(s>;o37a6Y~CL#N)Ft$^7b4$Zo1IxydQdU
z9c?GNs96tnPHHT9Hf%ImeLr84vaJ!d^|92stGhC~lZ-kx3bu>8i%b`!2yl`wOymFX
zS7PgL-Z|+y7!t)e5rYRN{L=*EYa1x(H
zx@cXx^a}gd8l|zcxd&pI1!C%UOsNhn6;Fw6`>Ttcr@ObCoV)jrnWwg#D+@KB6uY8sJp;GaTwP_X2Zh9a(6WI@x1
zdQ^CQaeZKOl3&7xh+`WdD0grwpXd^1hCt(FDI*CWID>5yq{1wDCz38_p+T)|F%d$9
z!UT(0#)@1rJ^-+=37)C%EHs8f7LJ)nnFA0=5jorLHrCdD5{g)G5Z8zPCSPO{z!R+k
zaU780lz+!O$nrwdBtS5GiJ`8V;7f)2iAHws&<7UOufx^U(uatC@ybB>RwD9sy`$4B
z?6Me{?w8ORDDB$I6fF_?uUVVu>es_9P^){Sjhlcz(n#^Q1(d|WIX=;lI442eNr4bA
zq;no|i1k1al-MD_{1;ESFCy<$9cNT}?QqwyQQ8sEtj0r55V;@;P^uVMAc=xIi0(OJ
zP!J&O$zR3sJ`E18c(7o=k%$7=_aKwJR)e0V^!p~FD+CGkm7zyYhhz3Mw%
zuw)BXs+UiWKEg;)zq~gH-I?0v-Wr~Ii>^Jn6_jz!N)RoJW8ppJ_o~gFfzyStDkz!)IrquBP2b(nt*eRCw@5eR+s$Kyorj
zD#cm)#PqRnbAV5Q$L#go0FHzbp?lWEr?!R2H$L8aMR@^-9Zr_bg@)wD@+Fx*`9Oh>
zn5~{gaiSnB-;lV3P}7_th%?dkHf|2Ni-w}BFy!t4z}0x3T#$ucUh`h7&MYN5gJ;EM
z!>=^QsB#&JaR#qA1w0p|6cx%I?Z$N@V@D42SLbscNE_=CRCULK;&~KoBp+&-Bf-V*
zJ@KcIykhpjUA;UVU+Z`0bnq{mh9
zv#0J+H9hBZSV-uh7CEVe7~7x`PeWL15ShFWm}#AVeo5Pi5Eh9n{fTPjn)aBeqZQ?{
zBCTPh&;XxlKyUQs;%-)547>E*x(=1kBJ(NF>0(ebSbVuT*zX5q?)zWO?1s>3;~@5U
zZx3rYi3ZJDS)g^(lCWb*^gfVpjSQb$F2jbaTe-b}VBOi2v8eSVDXN;Wjkr2RB+blWO?3(j_YdAlfB@4&uxM+Dn7Tw8eSvK+6FkFV$p}f&z(bt)$*9y{+
zc{rQcFBs9|pJBzS9#y3uYU#6t)#veWx1*2C|P0@pdJg$ghLq@=K0ip@u{5i?S5b`k*KEw-I^rL6^_
z8{pgGsvwg>)UtMwW34S*Jr-rjwbd+ceEZ&wj#-L011Xbo_DqlgOLNT=-*nZ8$yuYH
z*mGNy=;Fd;IZGA3Qg2x;12;`;5Yh$E_0o{cQ#O^{u5SI(c5z>ca)pU^`Al^gytJ;3
zy=%#AFSBq+6=mQi(`GA$e&9_IyRPj%q00{7Xs9aiAnR(^MRU&$*y_^YbYgXy-Qw)n
z%vp?i>N^-0Hw&3b(sr9rAvA?3RcvE$T{&mNK);nmoUaTafp08zSW%CGavZD@9$>s$
z_?ly0(bDF0IL^&gUez?BAYZc~z`#*)Vjts$WjH+)9lK2ZeLHi7^r=KDwTt$M9z?YN
z?A0JU)BGY7>!*KH6D)XVdIj2!Fwa-a>@*CW=Rx~tQN
z35R=5r-fqSBTKdPlxT(1CIXMu`pTqo1oI%C5av%uYFU9q<{1ISGMnH8$p?%j6&sHx
zp9gn&i%sTXdA^Zy+(C_(10i>z5s`9}T-3O=e0(vwBnUqKYlVR{!|UKwg%ICL;;p!v
zjSlQy3j|@lCK!i5jb@UBoceCK*fccn8nx0(ZD&ODi
zI~97`eE$ScA2vOQ_dz!Ha4Hx`g}FN{Jw2rtF~P
zGK{0*@I@lO0dD_laqE>r=d)s~ok5EM#q8j$eLC){>7oda;#Q!R$k&QrwC%eHPb+Gy
zEuRYV;|eesT=AIx8>eiTL*ScS@`Xv+)YHa6zGl?SQ$o#8jx{c#j(7)o933&+=={z%
zRez7SwT2(OkmI;sc0rAoq=%L3*o$qAmbf2(mYc)h#b@u=SFZgwA!dWg;{r)gUwDy6b^oWFyM_n`1Zq)0#f+N^=v{=O2i4ZMWbz{Nz^_t}k9DS39{`;>D{65n0q
zg5+1P*32h3a*cfp&Mnp6j*k@2FcZ)ERVB8}$;Z_A37pMG7&IO~9D=(J)VDL8+agk8
zdu~prS`S11-dlKjyzDRo>obN$J!t||7wky_IOcU)E72OSA5!*Pr?vd#=O$U&R(qsk
zB^On5>?fM6uP;RIf4n?Lf85$>>a@nx?=%N;z#nI6F@(+}u1JqPEHukw!IQ-#nhSp`
zU33&jv6t^;?u_SPPaS{o2JB^#Y_YlgC2}UobC7)fPbgPxrm2_5~!5)|S=E0u5r%tP#tJ8($
z%y%@E$$X>6godEfA7b{Y|YTcv!G~Zk#LwigTQX_@d%SE7(
zBM7(SS#Q_cMWy=Nedos7>LF}tyK?-Na(={GhLNaM{N%wAV7Y5~!sF3oM_EyPO@56l
zDfEFPjgO=3(Q1ux+wBEE^!{yMp=&I{#yWsygq5gne|3v86=637S?{E3)26_2D~iBa5V5UOxMv
zfm00aYF*cky!fvgx(slEox>IVdLklHRuZYQuVx{Pbw6vCQYX1Fn<=0+E@5N)aBWM-d*?~kSFT>GkhXNH4m
zHMGY-!0q>tdyD5i#*wJ8sf$;8!HHhWoLJhE2}`GpHHu}U8
ze$G^{+APk!OQq}cvOZMZ{3w^TouVD*Q}z!Ve@-vUswcRJCK0GM?Xt+JFf~G)2!Bg6
zcEe^1q#^D&!0%~bRq;qtK$YuobK#Yl*Jib<
z2j{X3q1cbTqlJf8i04nx?GWv0!cSH~f8kV&%>-EbPU&cBu4<+UESYCzq-Wxy
zf?*H=*1o$s05!nwf16eg_M*TFc>tv-H!~wM3nMcVD-#Pd2MZe=BL@W|BL&b-&cXD5
zGf{Oma&$B^1(w?z*}9m)Fes{uY0`_k+1eT#+1vlcMb*;E1pxg1hZ8M8-OSkq2n}GN
z2ja1DF|)JLG1IgE%k^)D09h+{GXOIq6DJkG#?i>t(#6aO;Qr4zc6t_iMykIm`u_>}
z{?{o`^>Q==Fen&XtGL?1FvtU#fM+~$D`yv102kB0r#%2W(?4fA!2kZTK&+~c!v-Uo
z|5F`jd^Y(hZd6DpwbMnJTSahHb+{*Ef2bs@P*mpL?p4kgvbmP*gs9o~*O>(`&^M{CI4R14#*~8Ab41c0{=|YdNZ1J|#D$qbl$IdY;8|oZ6G$wDHKIcsb|ZvO
zxR7uCzB)s-qF+)EB6Lbxv#$)P6_F>dN5JG1wRbA82T8%j
zRnVP#x=Y^zrB=O|*4NgEtXUPAuJjU#Ql;_E-?oQzt`dl-un36}EZc$*Q@^-e<4}{T
z+A%Q&P~LhT@X9jj1VFvbU+_*Lu+0X1@lB@X5Mz$`ZKJm1(Yv@7d)2`r%?2NgR7D1s
z^Ee@T!pEM4vHly^>fM@$PL8`V|G?29j_HTFTB_Ff-J$xci64?2{0s;&5|7|?At#p=
z952k$20u5QjhkM@ISd$jygJ3}TI3utAfN2nBEnfAaI~8p8wUhw<~Sqeu@?bIATvw$
zJsqrC_+r%3tq^&{BUE-c9*w3K1w`|_Hcf5u-12QR$tSdi6hHb`7+3
zL1^VLEYS8F|91X;U9mWv(F)hL_Dat1Oa2CbcXsFTE0gU>4~i5O_SMFM+UVP1kimVD
zy{8*|gq4zaumTx&Z=2h1i;nB+B!aMsC$MaS(B7F-65A{JbC~q@k!5Dg`XlVL7evi_
zoYZ?fyWdRM2Uh?SCvd#-U*_fxz^ukU4N-s&
zuc$DvypU6bRh*4kOiWBzh*5}--H9@R`3#(vFp;@!aS
rG4O?CR+ZoEjz;Miwp@GBPm*ahU%HM%YKE
diff --git a/doc/_static/dataset-diagram-logo.png b/doc/_static/dataset-diagram-logo.png
deleted file mode 100644
index 23c413d3414f6744e88e5f2d6627426b745c9fa7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 117124
zcmY(~1yEbT8Yo~qxI0A)#ogWAi#w&bySo*K;w}YBvEc6R?he7Fxa&*rym#lm^9_Va
zn2GG!^UH1`Rg|Pr5D5_>ARtg=WhA~rKtO4O|K34|1OJw&z(az7K=@)MF0Nwr{W}DN
zOk_%mg-VhR-q6L3z;T!erq~Y^@t<^5fywB)G_&P@M2>P)ByA&_q3EL`I3yD!A@TH6
zC~PFLk)ng4a9u+fphjDra8yNg9JdH^9*Bk8KQer#9Nt|HHeIHk93jafLd<%`d`!N!
zcCU@Z5#VEw!)(#Sv-42D?J!pP4O=
z%PH&Z6pksN#eei#XlYmLZ?kP_?;HhqoykvjF+3;ko=zC};~+wOqJ1Vp{RsbXxrkEd
z3BFzaGi>`?*;(CNGo36kqj462ByH>#n^Jn{NtJ}X^XL~*TUQ0X
zn*dZ!HWm&TBa5rSQ~7vusgHmu3rVAnJ#UC*7YMSKPVb!*2-kBck>?pLWxJNDWM2rr
z^GkM^j1TWWDJUrLflx;xa5PXF`3SB-7%b4K-MBmuc-xS^qUg&Ik7RI^P>{n=Eye&E
z2uWj79{4g5MoS3Ae4K2!oNa+-C}7CiHWNNvd=R=qaGD6rW)LiVa5Xx?FHyDx%p%ld
z5h-+{+VD?g+`~kO!D0oLGzg=C&ILIt80v@`qHZc!w@9w2txy}Ga0Nf6c{rfoQ2atM
z(qZPl!D>S<{#aoFxQF@e+&OY&BOZ1?T^fR)?uT+O@g>e9mjE)-PTJ)sD0GTmkz@>m
z&{#(dG)nwPF_>Y^B6M2mlb@PJM91RiPj~pKK?b|rZ
zWICC`Cv*1S_%8T!;Vm%){R+kh)!G%vngE-yyYBFBb2YHba%Zw}p)P{ryB&Jdwuvq`9{C?-U!g>UyD79Wd=Z>N
zi9->^=ywRe#axEVP?n)jL>l&MgWOGEuduJ+oU$K-B&e*Cnv2lC!dWpN^Bl7slOLmG
zC@hk#C5DU~?{(~bxz@N=yY^wtOwYwvW)QQ@(r(cHv7E3>kC^*QX-9=i#hGS5nLU|5
zS;wipIh;pJK*6`%v*D5VO7)5tAv3ff^oPVRi5iKze#J;Dy3{gJw|ps;o6?~_TBTSV
zp}*yJXCLcx>fu&7S1F0bEoQuKdAu>+ES@0@C}mSPQg747Q!7&2$!JZ_risxR836
zIFTqw;iKrT8OLNnJCsJB%+L5BLz23b`jEQC5TMX$YWd=?dl}-YNzHgw;iAnFp{kx)`03@!nNrq?MDr_c-U{5s
z^e;MdQtTRahBuwKQqh$-Rc_j+K&}eAU&Mt5m44EIMb?GL1>OqwX2gzBUya$aXN|&E
zO7Qvg+dlfdnM8a_ky*J)?INW!)4%%N|4ry^3hoOW0v!0WLMaVr3ik@%2oE5#BD#t#
zi#@OUQsu7SVdu7VvNXsy_;XM?hBSRzVO`NZ>o(n&jguWGy)kn#y)+{%b0Iyp_Sjb0
zo%JQ0cXfTzH|xpk1`YQ&?lEo&<9qFVLrjxxW42+RE>_F$Zxf*@Q5xwQIVsbbR>anR
z^SYLWwpiwB4mabP3tYX8GwxA^1ChJgd+oLC)mY07YZESv&3esge=&ThU&LM@0c9b1
z;wgP<(l|qwL%7mElOpHLOU5QGEwm2bCJO~%N%EHZv0BnwY@LqXs9K(zHCu#S=G?KG
z-P-!jna%<0LyKiqin?pZkOH100D)Z3S^+h|fV*B#FK-PW!3VC}$-DELf+yQst9!3Q
zn?pU&%d+wub6?d^9*7jQwI@t!OsYaihCGk9j_JuS!$rgOhaR8kjC37eplR~kMvq$(
z`4@6n^3SI_r*%tP>tt=@-}=I0lViUTO@;mvhZf5c$Lka5J?T5>_1fXwp%6+7i1=XZ
z<@&{7@@1B;mcyBDl&*}!ns1fvS>#qBMp40Q+?Ij6k?!6t+eP1Q-M3x(_3E)WG%&PL
zLf^y^)Iw?f)%UAIt%zzinannwr9#N;Vu@SvT$=E}JMu(~0l{v@
zV#Y$oMcr%Nq^Z0qwb_Hi>}I{ioO;~%N#!intfwp==M)!@y*P&*+tM{eJCv=}Iz>0V
z_r{UGN^8nzWM>guS9-akhE|(vIgtEqH#pZ#hstNps|#PrG|6c2H|$#4?g8^?HNwbe
zIvpj>Q-{3~y?T;xlKYa;1Fs-GVZwLqtKHenKON9izp13}zui;)C0cQ)`;aJ=uu?ki
zhkSr0HXE<)t!)BuleFvD>l^w+IMVDURQFc?D*syPY_xBpp|IfCVBn?~>pPK~`P7H;
z<44>wiB+Y>AI7pbPmcSe5WbSuIPci!>+k8`Glp`xAGvSe`rk`SRU2#mT+Lk-?e=1A
zQhR5OXJ%&ZXUXesr^cjaB#~2JyxV@bXqDUB7!Va!9%1vOIGqeI7j(sBy9;i(g!}!z
zt9V|?bl3Iy2AHoY6`v=Vqdh}BArv@t29~VVw!LX}7{%vl2^sl`UW<)L_71#8%cnUr
zO&BP(2>4Jvk4j7p<_>8=S8ZP0oe(!4c>f)A8#>i`;qgne*WTa>m;)WO=$+~1G$}fG
z+-BW2eVAYCy#yYM3=!Ay8+c4yxT66H+IdXbUH5a}I!+(F9vslBh)F(O5qYr?veEgX
zUl~7t>+M^LXCq}4*c1vUq4%YDFM2I~oqOJD@OQwQl@w)d67QVWUuNTIp
zzR63(_4{WRA`b=2o_5WUgM0W@P_K)U?BL((DpVOHm|0>JXWOlo1#KFj1H9+}q-s
z9?DG{?BDt6BgVbaLKA@JILCSC$E3a|DzZ<0nj^wOcc)FCT2>q4RvVTwhdyy>P6XV`
zel4<^TOS+#?C~*|5padz?nmKE+xc|;7v>}~#H?~v_yaG;
zi1|dFO6=+7=59ji_^a8+mG8sF6xPmb=+$XJo-ppw)KvV(x1+b@U6OnsR8Ki`dS2{+tdqcVKyeUW|63WXoi*olqFZ1DRb)wKE5L4iWffpAQJ@S^b5
zbryD$Me(L?J58BYbuoxvG8za&Cf_ghvj##+a)Kz~Q
z5I|V%4~Y!&!0ZeB*3DAQQtjEyQhmx#8NxY9Xc6mxEO@}fz=v;a*!kM|vdgK^x4gIi
zM!TDkImjZl_Z1NGfD$j46*H#l1z&?xYv|ZzTnHZ><}u3O&V=>ZQ$L2+Q!jrpxWo((
z+0K94#HkZ)>&L~|!#=+@dNjxu`A0bat_pK6ycqw8`Zq_8vWy;i_o`8GGxC-4n*(bF
z$|1LJrgiBkiYUhqzLWVZBxv&)+^F+8l0D!;9d_cns~v^$6uW8|DXlc&>JotfOH?
zjD*O>Bh4$34BU_r6*A^SuMdrf6r$y`1rB$Y*WeQ!I}H*b*j|Nj1ibV7QlE$)hE)zd
zi>zR1?a8&MNivhf4d7wWu`it7&aR-PPgikYa^H>_Ru{tmtFxk0%1Sle>g7+y#I=yF
zathb=lUjqNZI4$IYMfh{#}ss~k7Rr!9DT4L09goW2<L#<%N7@1+td;xIxMro)9V0DY-4&LJgHHoKC5+2Ve=&gZ41Uf
zNt9Wn8h9qs^TPtLf!gS6b+2U(vBc`VX(U;z98lbNP0sWq5-?0^(YwQa)!e-k+$n$0
zG}KD?wEFuKm64d^MKQw_4Nm~XO&}qZI-n+MLuGf^$;b<G>Bz2)5kL3}dMwVyT6xA&%P
zyCQyXjh5+_FY-xMCgssv3j!u|a5l>vHS88`ZZfT-hf-C2gk3S?2LG;3c`s-Hl76rx
zJnzJajGU^u49+jLYqTW2f_}P}m#GuL-SV8cpV*4t0BmvNvJL-6GD;x`{PLl*6}#Fd
zvl}-llzcAO4V19nP|P(WD&-?E`Lz|Nyj
zA^srr&9;UD~5cVGQIlW{z@D?XXY-h_oaLiK%mKsSkt!Di$^f
z>U}mNHah;P+qCwa;mTHt^5~`T%wA>JU%F_Mk*M2<1CGWsNW~byZ
zcW9$2B+JHCJJAE(+BZI%ODHX=^UDd;2&)ZTxX&?FC$ckgiQ8*xP_rr2CFHSVQD~!?
zHJgj3=%I{!O~zHo*Rgh@A6@>l@=kO86RiL?OiYp)P>dI#LZ@HlgRVkFa#r*Mh#Pq_
z{H=ueuZtVq>rc3^QsOAI@h-=+HUWYK6&q9uT)&E1gNQMI7%Nh?sSW#y#Y^p{qtBV;
zD22CM8hP`@-J-m5Uv1)VdMduTC9e8ap%(GgeHE5si+hmAa@=>%zbsCvAMLLl@)AS)
zMvqtX_J^cQ1hF7Q^c$)O3tbwaN(r2e2Qg4L$dJAd@BeSzB{_tF_MS<#Kb|BlsQ7ZW`(7})nba*_blJ)cDy8l
zrbi&@uMxVHoUgJUToFx(Z;l$q_@{3C^qPs&q_H1FZsvTOFRik2){eJ@@mLCqwxI|4
zAF)ny6=?2{rk>^pMF6Ce0_UuT(7m5i&mry9x{4qk&DkBxbZ_A(*XJ!OsN
zEfw+S=OrXq!!O;at}F(-vu18gZ8n{$Ccjkcz||?60svC_2PTJN8TOk1R>hP*tvhaP
z!*QIUD}B&T>fNL%YYk5czNur0=&Jc6p~u%$D<&hX(!74h^UGtX@0VvH20u8dO*WOC
z&tt-E3H{hRi2f38z|D%?=qxIP`--fNO@s-l`}4DpVSTC$i6DqwJ6Zn8D0N
zgiM@samOv>UNU8cK?hr%LRdHZSFxB*XY-O+140YL!p~O>@ajLIwlW6eDqA{CDY(K%
z)g#_cvgn|5giVRi@@q(Ds0F&j4j9(c!59Y-sJP=9Rz@Z$tnR}0unkz6`!Y(9O6p{>
ztt}>~x;zuN8AHf-;$c+rPv!{z
zov6uX#D}O#_#@RgL*y<2CZdR#3A+Amx-!0#m%#E!|1F0|IsgUX(Q@d}U0Qf~%u?ky
zR`gzVn1g+dwlgY-CbiE3{>P(V)7o#6Pl0RWBUz1=ygv$1?3|hziI?BWxFdN8{Y50+
za-AX;@ug82;simCzsaKUY!Ojdy{HW=2EN>Lbghf|GObo+qp~MB3fBuzTcVjb)rm6Q
z95@}e?%r}k6YvvJuZ6WSu{rzjXuwY#x(rYnJc6JgD1UMp^9$lG3sYi$HNcw<)#thj
z*Jbv*5Ik^2xJg_hqA}v}yG@Tg_mL1w`<@%F?tV2dE*aT7)Mim>UID#eu7QKj0Qv{%bKWv(A8)?;2rlaGPXVmPCVMWuz=0r%1cQ~n?1
z1^f4^LDvF#)6^TZms~gC*D{St>$iYrMAswxw->j*obrH3@93{I*oa|b1u8VyQC)a&
zZZIS}hq?~a=`YPy2W{EL+-+OT9PNMI#!t0}9zgtV{v|j3DwV3SGn`V;Z)T+ol@gRF
zC@4n?7JMP?U#doI65KupUYIx0g8HDsVUa`1tQNkO7emF5I61EqpH&1S(gjWp9D-b`
zr>ZBaCju+>z)b%2ncSyCYrTZlYK!
z+UP=?C97(d%3vadqRY{+$d6;zZI1@g1Aw0(E>;ZdI?TnV<4*X?E+9x!;zy+~e2tKC
z#0ONSnfSr_2K?|I4Aub)7Jw<#TonKR9NkFQuur|j{dM1mP(j>RS&t~As3{+Y@BI3@
z?oJ~ER4Q%t_jjvHJ>?x99vA1`-Ytf}s`)pXVr+@lBXVz%md?GDM6K=qFu@(4-hzJZ
zOSv9;Ze|BuGW62`_uPPjs?(3l)+g)#J1qZh9-E~8d~RkeYu2PUKRRNa#xa#ZVCH}zA5#l3WT_84OYloS5On#8Mh&|fFZ4qI*l#$_qmBDMBhV=}OR0zF2
zyDdM~D!46}{UJvhJlGlQNB9z0kOqcV{X_7qw0}|`Bq%}`56^(7lr_S2G*FvzU}jDgZlVKu!TU*rYR~gf2sAmE6U&d(YS6mTx*@`b%a9Urh^qzY
z7j;4(&7%vbeU+_7t_2Gws{&)T)SkZd?mO+K_8l$h<^(!T6uziR5k%C1M6bw}8&fCg
za{z4zRmQNFKC^@SL}?*MRsuExNO42*Rmp}m0b|_tMf(X;+03<8h2CTT-_)XmJ`Fgi
z0+rDl*Z|$4%Bd=E`D4an#-0yc*SWlhj)u!Iwe&sZf9_kHRM%)1(V
z;i8b$`l2Y!H-lg~=^_9)EsFpG`U27W
zd82Hn8xkbC5z-3S(QUFBSc)S5E-j@kh2Kvc!H@0%0hI~Efout{8?y@-LhBZa!53`(
ztTdw$LO2g`6()lz>)1K=#CkZ?y^zT(=;AFVW^mJX;L)>o?lyXgZHYn*7L<3@nnAtS
z(`j$6->`((ASPp%{Oo?RmY*_gNq};Nwae6R05PM&fsG>CS!J*UgyuubTgwAkcP*ot
z?T&3S`4BN+q=KP)G7V#!2E)aHp(X$tI2ciuMG$EN^XW7k=;PRTPm<3eH=tEI>$K??
zjh7N}DmCka#fLhd_Op!0EX%wiyx`SK^(@445E52gZF&}=pq`{CzWq5}NbdgMVJ6^n
zgZW?#h@!VB-4=+ei#CwgJ?19D!^gWtI)8jR;w)SeX%p&OLv{!;$2Y66s)$q)qOuuV
z4cz&i?r3rS`cSSX2+>~Q}kkVNo#
zEn?Sj17uwZe$BxhAz;wjhzzi)P?s;YqiGtQq0ImjpTT<=LlBiA)O}67A5$!)z`xUf
zhC){&+>T`l6;=O$Y4{O8eY)Wu)n)KutmNVKfqNOo~DAuDpT+
z1+{`pZyg}~D?T-4-5O?9f9UmzdY}D^4
zqXZyMuncN1Y2)Q&pJKz~JFxGLZnXWWw4`5!QXVVWpG(+e>GYrNEGmx(>-BvN-C;iRk-{^cI-3uka|&ISmhn(jrzxr3uN1`lklTK
zpYcJNTq2?ldusR1Z4oTIy`e?VrAbW6YVay%7u|#P++!A@DaZ`z1*JACaxE9$n}D38Z6rd&(kxS;05=ju!fV(fRvP
zX#%$HJOBDsQ|j7l+ly?kkn;(YS*fcfysS+5=l9e8`V){vV0ccV6>zkAvKq`-
zYHBtjw|ut{^@wxjcm(2(?$m*Sga%~|aEC0@MMh)cg(!O+kro6%2*%qCaduyD1#Dq^
zU|nQ&?L&B0rZ5K!xL+(?lMUB}bw#6!Okbs0A)HmDg9FUJl<7Zbz&fp*G4B)mE?wND
zE08x={l3;i)@`FOoGYB`?YyhW_IV_EGbmnCQb+)
zoM=04)qoAzM5YeAc-Xwqq7Yo}HbU;7enF?ZPjwHqV8KOvsW_=P=$6PVaQFr9_1<$Q
ziXl-_I6vr7q3sMFbXeln@F~dt0!g&GBFujgsJ9zjTaA=dTWpotm$&v5Y;|S4PME?U
z5gT!LGWBO-9#Tz0t62?)kfQ~km89702TFP(9A2zJ3*>KhzsEOA>JyysEy-@!-H7sE
zt&ZN>smz~vK4ep=bXIE#KVelrKU2E7{x?qpDLpAWfZmtB_5fWgpU|n72Y^&RpLYfi
z1`pm2B<-y}6kitU&Z(_>t>q`WOT{D>G$gBJeU)t|g
zNVLJ=3w*>M!p~v*)CNmplp)pjuz6vp*n#>P91K`v{cL}Xd5JNrN8Z}&VEQvg{oqKFtpgXhPnjA@@pC`@=%fmWcw6w4T{oSJI*O<+WQv6`?
zHdnCfsNp6BAI5gO1W?}@L7h=Ah$g0>n0|t7HH>SOVQ~@e%E(0+37j|QH|J(wSLN$m
zQ<(>yx9$_bnGes9ITu>X338)Vv$suG`8pktkZ)-OYX;KffYdVZ*6{SzXN>D`b>%Kg
zbJ>8n;J*Q(S?yhoz1se-)4~81Fy@(aP|{%(bs={v{ok)Y$?;$YxFsv;HQN-@G%rD$5g#*JY(!(6hU-Bbgv*41FYyRx?8
zS5MMsYH^@3WF>J^JA`Zcoo9GXNx-*JpXFBji0J)to926ca`BGwc{UU_02_YcdDvZ#
z-*LS<2F(pjKkZJEdc5P8`D)yiml*}U>b~f{5dN$Dv?y}hhBV4#a7rBu0?a?g-^mU~
z&1(tjjb^?{{=hL_H}_n)q#IswZp>EPA4s83Qke9}Ra@53@od@1+I5R*k>A61
zOMG(bCeUGs4wCi^l?DebCY;xah22r>y44C}0t+y}l>w-&-y_I-;*_&$=~MmI-qN*A
zeum2yH#_{t!^;k~R_Wxdl%Da38kKU7{n%(=3Z&zGbl%0=lOaoGC4o9S{4YPLD#8v#
z-|}z&uKk+t#cqgEGz%}>_J-gS^d9d
zvPbj>N6_6;Y4C$fz(gvrG}!%(;`=YwnynJzJi-MRb!E#Y=YEgRo#O14gMx#CM|2YOTrg=R?9wZt%bH-U@*n0
z_>}oM`aR*rI5)LKR8s{EorBR{cfB4TY7qcL(??^IJ{S?mhBePkPH$lYhVt*y85Bjk
z4*P!eBHtLuz(=(-I(sw$
ze9CHTSXwmP{_q*8JiglhOZa5_*WL3xCFKdGCUB)0@)?^xdK{O0AXcgQ!U
z;hWVu*CNz8(lV*w0c=Pu|KBms`oIoyD{n44#MH95B_$Xl7`hv~U@=$EQnky^aq7>+
zu$e}lA|z!I3RDte3P|aKihhcSw>Ju0XTYhLWH1+fe!qP?ekm|@v=Xy|qO|35;BtWL
zV8$&aOzJ-(;_}H<
zqfpmH0<_9d)I2>1+u~^Qup4TyRjEa(MZM?I^oJGM&;TuanY9EsNaZ1is4bY8ix@*b
zZ`!@Q)=N+ro~ZjY$s!4yD~XWsz{WOdWVt1HTo?8kID1>Z03CrNOhLt4htCh6=T+(c
z+w^LSJ$LBr(vyev%K=$m>AV33f5f6aj9pASsOPA<<-Z+%zK>ZBG54HU9$E(E`09s`
zuyk*KD>dd{ok2|>(?1aNUb}5Pwkt4oKR%mXNl;nSU(+W-ehT*$e>dvaFe^a>V%YQ(
zR#UHH{f#s(H3x(9=X|q%UI=aWa=HN>Kwd<6^z*BC4yClISA%z<5eBsLtc%EOa?pIGK5SHb&(G#|9?
z?$MtjgJng5!qlGk}d{7J4K-v$?xKy7m?dvg#76|Vu`Vn?iYguzyGtkU=cCTmz
zeVM*m%a}Rhgk(dol^;BnuIq;uWM?E@A@x*`_h-kd+dTJ2Q|NcLsfjY}EfFCXKw3jp
z;pqwe4wzk#dHK^%1vjE`?veBznRJiJ;NhZQ;_qij`>ITIppPZ&0+hu)Ec2M6=G=U(
z{GSrLZG4Lp`cxTQZ2|Z~wH$*t&J2B3yC!)?cuIwdpJSsw;UN4CAo$5ofj4m(d8M;k
zceE#Kaipqxc>PZ;{l>-SZj|yL5Rsu|Cxt|SdXvKm4Wir{M_msA)G}#vH)gJ
zpI2+L_!?wgqs@RD;@1ksr){kV7Ls>~E1Bcp4kW2OV^HZ{+=$SD@e+xzJooR93%pk5(#+MPu0wK_U419CKXY(*SA6gne?tp`+13bq#T;B3Fm4y=cb_qeYJjoi`H_@?^+os-Yo_11Db)P*mf
zzc0@6S7c`;&tz$E*Ky`?X3&JHbK@SvS!3m^@BX3VoO5$@J7qkVkgtGPr2lZNJj3brel;Yq>v*IU);7~lc*Ebxw)z5&-1~o)`Za^1e%bk-Qx3=bgmt3G+
zRw@Uh6{7U_xWP}cLBqX#VQz@z?Kav=)j(tUMyj>MAaEz5tUt*G0dM0JsFFl4~Lm{-2L-Bmd{p1=*k0bneh*g@tZ
z3KOgwgRandCk3S8{_wTsi+ZOhM>GZfA#ca}6wINXpdd`DPP(2-5ZzFVl9D^HPMLqH
z-XZ%mePEKabV2z4o7x{kuLdxSMn3v(#WtRq^7?kOhXhUdO}C^=Eccz)FQWyct5N_$
zViA5Xn!BZWqnWFg@|MS?rWKA2MiKqu;98)-m-wEA5G^!ALHM|kRnfm>p__QlM#FxN
zwG$&;)N2q^SD&UJ*92@+&vO?Kt)J}!rkVa?)^<(4^S#)x_?p?8+roV_EC6$~?PeWg
z9W#3O(?av10@oE~k=DNmv72b@Ro9__8Xc)_j7Vi1<+BxX?88$dU_q}ZUJEy6T4Izf
zPHBj6(Z(3aVRndbNN)wz3$dEqx}x0!fxjUY$;)NU%f~enbi8*?!pc22+t(GV)yP-_
zA&~>uTU$0;HdPL2Jd*kI!+DoK*@oZt5QMP9LD#Tp4ni4{a4mRP=%bjRhltpe8_WQ^
z7nYn2$GlLQzvzukAVfZLq4x^mz)^z>1t;Vhxa{Yicilgp-U6teJg-3Mxok==A92Jo>^Pyq*>C4nPZ<};S(AJlxsPgDZw`;LOzP30Q#W4?HBvH1_
ztoQ%+ON?Nz9G+da*fdytqixEzo1SS0zYKnP({hi%cQctsur%WMH4uSo89o*3fWIX|
zO+B1h`vv@*7i4@9sNFu-f=?tx@e}x2thjQ)@?hizAuS;-(D?1hhH%UYb9)@TC{sTA
zEn)p7Ahj#nWuy8o%K!`px1`fAOR!xIeqDEOxFGkzzcFhF73lQ+h}&9A^%4eDC5Ted
zmKZ3D>NaJZIEjOLVB#CaZmEkph!_)}v!uBIUP}BWInZ+Y#YRo^BJ0iUz~^Hy(cI5d
zKMwD$
zb^=%ETd=?s=VE=?{j9nRte1YF>K1Q(Dn*&h1P(~3DmX)wc0^UR!V)QESf&yAYs>^W
zx>C>0Y(dE-jU^o0_hPVWe(U}Hl(Ze=r91kis!#~|Y~D#^NKW0o`r#m5z4Hy{3tSe>
zo41(<>22z#U%pq&RYW4hCxKiOsz)@T@)=#yZzQna^0;R>XM$=XjkLOe-E&1rs8l#D
zhAoB(M(*Z48L-BAS;RTSD0snK_Z!Pgoj<%hcLn##ZU(qfv>g6^R5tS+U}_=hgUTZ1
z^@htUk;pBJp
z&f?KUwt9ey+nB+fZxSJzY@26z`DB2JYeFTP+KffJW;&+
z!eFboQ=a_XaXq{lrF+PCe5f04MeQEObuU2;!D&H+10AK?dm0QqF}G2`&WrG*Q^i6v
zM+f~%L_$FVd02dv{4cu7IPateXk>#=qnd1rdkT+@L^go(4I_dWdyTd6Gt44F8J0R=
zkI$~W5r<{-;O1HS-)a`~z4*aJMwi7q>PqB436~1J>M35O5e(!D
z2x=FK{I@^opnt;gyImvW8eDA??(Z;2+LWSa+b`}?xZWkKfX0Mg*(CVJz0V$@8(#?Iue7^J*8Y;CM)N6
zx?~=?PgJNZP4bp$)2PVrZ~-C0!?Mna*%JHU(hQq^K(2bK5e!a#cW-bcw#Omp0%{8u
z-Gk;_Wu5$eym@uP-hJ0=Fa_M<5>!>5hqeK7E@dLgD(mFd3c$JO`b6AFe_H(K4-~t4z2r|S|!+pa&
zXHoILBdBj_=kzk{aq+5hS1-gq-9OzQzxwfabD93&f?A#cvwfv?kH|VaUxlHskVOZBFf_sh4h&)`tZq=aC%Nid8X@fiXYayhpy&j>=sBgMe
z=7tl~H#x78?@%Ufm$*$glBd>wx<;y3c0!=jhR7#O*D(be3jkqCZv0y0kHY!b`8*u1Xj-ei*+`~*H$O6uP$a2a1S=6GgFy?f9JZ1c
z>wj}vez8jYB9oUVBjgEEAY8a1rN%YB%Ap2E}uo*PebI!2ab
zFgfkhVJP@6PyxBh3)TKxB7QKvmLpaO4`@Jb9O1qvyOeE%hD63OT%MUYOe!^S$WS=?#0ocCoez$is
z#zJObu*GR|%qfOYPqrfsi8kF8a_0Ji%(-||A+Os048q~NMxQP`+DY+yk#)iu{=7L>
z{;&~5mdwNU`+AEot<;$GHxSh|KLp%zFAiq
zWyO}Xy1~sfsS#9;d?&7BbBqP9W+S7CDd-*d=(vy00|D#smx@K^fzwzYh;-
zgs@4Y&_sQ-!SECFl>GUgtSUl&CGgG}@
z4v*<@=22!)AoT+8YVX+g@N#fVU_&qLxms5&SzrP42!Ehs67=>p^UE-`K0=zEY5yKt
z^DOa3PIA8Kr8Qr>MV3>Y9R0s)0x=9z+e2PI<5+I#a)-%@b2-qd&FLj@_~`dWoEtx!
zs1YY82WNiqm|B{Wf!Gj}#TV=s0R|k`NUYK%@V<&2XV89U+DkBb|KpDh9Zd)
zFlf$FSw)edK7lh)0gqDSYIxipW52%V@H=qX2Hq`)8Jr5k2+pIqNm{OkW2LuEZ?JwS
ze}bz(HWs1E%?5YcHT$EqX_^fkJGS|-1yycEW}sC%#P51f!W_LmQVy#G(9)opmfH2P
zSQ2GSs6ZMed<#KZUMX7S7Dkhu*?!xe3s+^k8u0eG?c|=&*}R_jZ*>>oFH$8cfKcUN
za}IyKATQzLFCzd~`k<&B
zlaghG#_VgS{oU>Bt=DrGZYA
zAew=*t(1RAJrApo`FpjtyECfS_40m>1ZfCfX_kt#z?=$HPMe;dR(YobW{ZtDWkHPz
zYB*K`vw%d&LODq}NhoGdu$!=(D%|ZZGKc7wGq7Laz69HrcjR|K9HlfrllCvfgRd3O
zH=nd_Zg018PScCQy*@-`(d}jTjBM^`-at
zpIdRSxT|3(dBVOzsD7PV@>Quv85E}@-hn&j$BD0`V`<>HWdp7qhqV(r$^0Na-EPhg
zHc-Yb#`)?{k*AIhPY*vN^;>-K-W;63|4E-S@tZzXgP~j-E_LxAV^mw;-&X_?1{ipx
zcxb&EUx%68YodkayY0Di_4F5T+$0UEHEY0vC@;VqnmzQ|?EW^2_G2~V{dt2K=HAb`
zRs?4wWfjfdDgrblq3u4`=Ob-^Sv2!c<2>*W*_I1U8b@pE|vTzDdgOft0ZE#m1fDAf{a5iA+`N8?^s)K1-ZTLv@&p65vSxLOqL
zL*}#Vv+F3we`vK?r!yimz5)q$LaBRZbmY+s2~;z5~Dk1Jd`rSziC`3{#6+A7tGjuP>PY
zKe+`>kXuuK3OHm4P9B^^w;clribPR8|MX4%^pn01yZ4g@61U*~_?G7s=mg=d!>}~k
z(4EyQzpf+|t0zLL=qV^(5)l(wM5e)
zB)6Aj|IUVgn_jEobfGJQ5(HShjITxhH2vB94r;VBykxM6^l2L(wUcwmIEZWX54$pDtn^^j!Ye6WMy#Wgm2rxeCo
zNiNew*)Iuomu0(U(3$NUdv{-|Lafrk@P|qF(5?Z-=XRU%A9>A~Z;}BCZS`TRVloby
zn;adq!w0Jts}|Firk|^%4D)@YKjPkr|FcA(3CaaGN5Om1+8Gydkw&U0iIQT`UHOh&
zr>!E`q_YYqMC`l_3<1_^S*xBx8kfz$5FqsOeJWfsT=!<}0-lyps5llE!&)^mRK-;i
zh+0&o%ZH@%@EgmgGV=;^7#*grpIZ=f0=aZY0#hje_NEm7^g`PcglM5=zqdI1L1?!N
zH-Hk{euV53PV6bOR%8dIJDs(8g7QonQLe${)G$W{RBG+lsFv4f^oT4s|DR|vuoV`N
z+TGd!&hNEKjbr?)Rq+P`t%$i|ZCPt!>q+@Q+Yup_;dX-;F7q)>-}kiz@;bpY0B99-
zulzS}RXA)fZsd66c>8o04BBat$0i%+QOfsoCnVQMjx%+BVFbnhq3W%}qU^f&ZyKb#
zV-N{x=}r-86p#*;27#fwyOAy_r5PHA?oR1$7`mn7clkW;``q_;K7V^0gL7Sb?{%(o
zof~JP0Z=ZuA)aNepA0K5YXO3UI-L9ukfggWL-+6|B^ic{Q-?Fn^cwQfcj`wDGG{3!
ztO`PYhDcaX&lOi{(4K~6Gy4C4YmmG*xTog)C^bM8TB07u{E@a=$Fz3XGBz^{0B5J+
zg#*Ek|Lf(;l)X2x)B0+R3i{W%mp%SiR<=bzhzoWhpF|#E{P=8SY!#Vwlj*v
zROUJ58bn>#gu$;CqcH^*Mo0!4>cp(>tS;?ZEz|-$3A!rj#t0+HX5wMwWhB~#5xSOM
z+gaw8sH;M`Ao4p~S7S#w4Zy!BC=Vg`$QYS#LDhWB-bV!q_w%Hpcy6y>c0RL$xAE1+
zyf%Szo!hf9~efS-p`_7(pnCB89>+tMgqB0um?tb6eBXKe^F)qpl
zah{DBJfAhr!;6K(TCz+^x{?7)-*vvWwMOLk6bs`hH~t(q2wZ$-)}wQnJsgB>st*V$
zA&i(Rq}aXr!2lq}!7%%mMuo->S1~<(8(%AYES@Kx=wDsS9mg-Bdh!^KXa)$J0N5Jk
z%`x#I2-}9UWXh^^WrB%bzm$2Ul~?wA#{B;?B@2l;0qq-!?v2nMya-9te#}%bNw)@v
zK0rTB8b6OM)6bB60I52TLgfz3b4iu
zwj%AMzThS>^&b(dbbt#XOjps@Js$e(8wz|d~k86fzk8{qA@Xn~LN
zGv(>J5_mgx4jfUU|0Q|u4@|w|Cr-SW;vFu+0%{$OHO_C@+~~Jd>thBw1FzyEVH^8}
z=JYy#WwP^MO=3EJ$gq$=^w3b1%SaB8krZP*SdZU?TEuI`X~BziNehQK14>F=0vmiw
z2yBm7^B|Kt5B5`V
z_L6p#_jh94ukOTuw*C8apTxhFBswTKg)@ra@XPGRyI|&CrCudOZ*y@|xY&Q0H$;6
zqzWY0K3*kN#^{L6L|nPHh}E7GBVOH+PuyBb^c)HXC{EuG77i9@?e)7GL!E$Z5-fYf
zP>PLU=!Y;y7{16x8k7MdX>j5h#{qhD?A?o;#DⓈP{OJ%l!>z|Ge^VCQRxdc@(qi
zk}J5jq5PnO#9>V?L%))y^5fP<3VdQd?KxxGn;LE<9wj7gHG}wB3Bqux4_aZaN*3~J
z<6n#;BmFLk_RJ>7{J%Ey8BpLvE%2u;m^1Gl&n}lY(YQwTxnhn-5Nrej1tjJh>0vnC
zmh-%A*^ZA!uPX>8jKBbI-r9z);I_a+N5!t-iCMLvUf^F1?iOsg^tnjZZ+@jK@OS+P
zVo5I7!$$Qs^l^RsjFv(=%K{;qDpJtLrR#bB27lC0&}aLBsDCw~4der#6179HL!g@R
z0RX;=!utlyEqzc2Hq%owLLl%(b&Ew^cRdT+An3_fnQF=Rz_RS+3AMmQaZ&VXPrg{8PGC8zLr9!Df$v4mH`^>8{zqH3r*}xdM=VnW{>GL)BcO5LqSn7nz8bW@T}YGb4X5w048IHy75i!Qz5~}Nuut@R
z2EusY3xZwjJpf7N&5~=w78>>xj*JF76!H)rYzs;RAcF)zwXWLQY+ji;6HK2r9>@R+
z#Fc7K{bp8|Igirw7+yl88E{h^Wng{zk}&8mzSeRIItW>sT$}rBD;sRF*MlD0Eq01N
zhx1>@{8fGeyb2_jj>>t*R70$TQ3etv(makV`2{gJPG;#$RkJm;g3{^Qb0{eRk=L=
z-Gus9y(Vkfn=d-zp53;snW`n@<-yS1gW&dXX2W?K8tytXQ3B2YdliN;`+(l}2Bc;N
zH>LA5B-)<8*j4WJBWPpO)lDCLvQ8py0kls@Ax1d!+EhW2`*WQ7Gq4W*L
zXYNi@-@Jgl;ymVzOc$G#2DkPoU3ZJ+V;&LB;ABYkE~}8V_u@n70~(W-Zbh9DTc9Y_
zmE+u`s=!*WF9l+@k4QX>eLZClm0|n&0t@;h2QPJ;iNo*
zuF#K==PUmL>bgl#xvlQL_)je%e)k)vrmyB$$E9C!004fK#?$BY5#C<~H49q|OFJi3
zjl01AA?4bQ2A6$(cTaSFK-pR9R%B6iQ5D*LOET7m)?tX`gZF?;y2?4qGa7iIHYV=#
z5+9?2%GXDk5P4pjgrOVl{O-$aU??u0k=N=%
z*8<{qbhb>^%*93V&4#csG6JI5lQG*v!r$+uP`VzhSS`d#-CgC9}zSN24n)kJOg-
z%iu8zF~wPAJl{l)hw7vSaT
z8=8wZF;Z+!8vtoE5z02)@BS8Pf;=t|0bx0M4ixS2EnIEUb{ku+c&X*@z_
zsxe9caAZ;vv%STruw}ZhIFz2N_Ayx)EN#I;|x59DDi|(ZBbN}hx
z%7MpaRU@Ac1=i(xg`fwZ0TRz`5o(fQag&?!1a5Tnyy6Jt<<_=(^5hv4!?lVJJC?qobhqM0>Rw_Zqy^2c|(3?oqBJ*orxqy`>wE
z|8aQ<`q3olLG?j)$}e|i2EfQ7IUykdDNg@@be|NItMWE0R4lv4vQmfDCkfp>TvzYU
zG(b=@|GDMXw6CK>qu?@Tsj2yd?6Hq@JXnr4Ff)+l!xI
zl$0q8*CDFS{=Il++t-HGh<677@<{~ZJpL*Zzx-{$yrrI6x18^F%(cz6(co(zt?p-o
z3&TDTmPyNa~C*ET7UE-L?lJ`Nk
zER8HDGbBKFBC#g12B8ii+r!6hoQZ;;yzha&6+@WA1|=bT}|UOssX~xlP=odw&o=8-~bZ%`sg2Z-ejoz06j!Zokrt-i_M03rf{Z|t5r-%
zEwB_dwFY&3slcRM^6u`pmG#D;`V?g?SVf;kJHpb#g4{y6XALpfEArQ!>m=@nrh$1k
znF%Ald|+)C5&i7m>BuQ&#lMOB?c<1Jr-FZ@(VI2vq-H9v`oN9^*Dtj`j*s(4HH24}`{pRE8$cFR4OV
zuAlC90VJLl-E3hkSbM2|!S!Ij(mNDHTf(oQH^=B70ZeGeFyi+hKajl)q(5jFUK_Cr
zMdC_VbEWZ|IK}MM@cR^aG28SNz7{MlMZkUuv?IsaAm0I%i)y_HAyugL!!JAKlCBZjr!edPN%vIUCGr(X~rIQ{K>)mW{BxY0-*FI-mcea-1L8y)7Ibbz;jw*b+^d2CUOWEt%4d;-@V
zc8+66cie4h92KMFaD6uGROYOC6ZIwv-rJO~`uV7Pnt&ag2sA>a^*5MD8=`O9=^+N6
zXg>V!N0$PdA(ZSZflcyR^t*_GK0np?>Q@0%0lw4PE1XC@9(z2;If1HGzy^e=ZIIl|
zo)e#!sfH{{tXXC*!(v3SIEO+u%MCHp3)wj6nT;q9xJACe*db{b80Q@4#NBcgG>C3P
z1pkSgb3aqg%-cI3{Nwn
z(idrT<&Y8?;qoh&cPeaBu-
z0>tKUsRyt4{Z|D=`j4kFpeA~cYEoD#D6jLwy6-#No;IRq;@zDCK$As08Mxl^Z~;NQ
z3B|){I*7kuR0c^*kR)-i8cMW(dMJOwMk1Wwiadym4klSiUg#d|
zW#{_a7^sE@*O!e2wAtE+wv}y?iZR{pVSn1q-pv(PXn;-}lr`VN&fFTgAt(_YsuOfB
zZpZfhkF1ZCmwnm2;m~oYDrXCBEKw|Sq+=rbJ~N@)%405PfvqFI_~DCH%fYN
znF+k*Z(d-^%
z0gdqk4PfO~nF&ynBXi)|kqBeLqes9`;?9&&M!3Xopz^MS&MyrezvxkM4`#h?zDgx)L@Oxo
z5;^vO8XG{)=!w&6(LMHCV%c}H?*cB9YE8c3PD4AQJ}vA;Tmo85SVp}_$Ff=Gi3Oyc3)L=$F$pgNwubQyuQ*7
zUn)jEUOu53dcUMHn7%nMxiB);{AF4w1L)o=zD6zLA!Xr>2hW5s^7-H
zN_`dox8K+Gtp^<5%im1|2K>c=;sSgMDWgqFZf4!3u@UL((7!LnhMmxZ`nBgKbez
z@@+r@6D)AJ0xUI($*i$ge7WawnAQ8NaNYqylQ
z;NTg)yA0n&7L2fW+%Qy{nN&
zl47r@rlhvMe|vZ_5A&k{7IC1BdNunV08RdwEsQlRaO&IZmXGd|EwBF+Ol2Wl-4Xp^
z*_Xb`KUA7eX9h>xc%Jo;IF9ZAm=?tVG$tUaE9k;7)93bK2zRyRgJK>j6wiC}CTdev
z>sN*R=S1_iAVeRCCiH%7f@!%t3h|
z9%Eao0hh^%SMvIj(vYM)VpdWCXi<=TwEt-Rnvav32O!Y*&^Oi>x4$9IfewaZr%s(H
z3orSm2N-9`1pkQ!-ZcAjrQi!pzDfB>`8BV3
z2|l>uPh4FGS#%J+O_U#x{q&G%DA2i{+&3*UD!gO*mqlNgKDl=PRNbsA#PP>GSFm;T+Z5K$~*QH*#?w2vBM+clWAKCFrji(x8j{{`gw?lf|Z
zOf|2tca&V1*2Vq!a%ry}cVCSLx765#zFPx|MzU`7r}9qAYwZDY(KLGEAKDQY^^(F*
zg|C+S`(L7A?7ZC$Vtd<_+P868SzH*o*`n%=fTsYaznC2#>|l~wLVU21COVC7IT
z`g%dHhK$o)1bIw@xj#Szt*XqLWwBH6+2INH&{5xF>#=%;OMY_rn2;3<0LPPy7v-x_
z`bC5OcmTUn`Vda{!|ne0wB6lZD(aXBS`JKlEaXQ>_OxFKqNcRXcx|?oWc!8Gs9&kfJ^3idT_lf6k8EGpKnsv>YS5c;UEzPqE#_)4Yz{`@
zbRZ!TbiV*2pa)bnnm7fVBhQq~l;ED(G^rga`$?t&EnGmo-Fq&RM@9vfAxzla4ns0j
zXitwIxuJU+;Sb)=|9^;@iO_|i;)#{WT2Zqx_>VK6nc?M+*64NNE*C^`i|3isl`aqI
zkl;M_sK&r9(haYXdvf%KK@S0}Llq(ckj}mz(=RKu
z8_a9*p$w54gqWoR)N!qqq4-cL`&k2vGWCqefwqW7#*9z!I8MX<<1~e7~2tZ)t-F
zr9PPgFaW`%w!5pi!!hYr=~e+MY*{*DwGvlxUdA*=-$O(TmVSA7y`}k$TJy`kNDqB4
zt03w<1-#P=I7zQJq|Y{H6+3w64ggfr&s>Gh85gEl+ATWIUKep>q-z6!on*^)zLhgPphd-8g?mr#!>r2Zf(ah2$0
zVrUd}Nb3Q&!FMviHZni7%A&1Sg>t2+zij6uEueJ(qzFv5`&fIH-RP)pt~={4enWoj
z@=kx`LzLdub|Ws75m>MPbH8J-JTjlTVQFn(Z@{%RDsLOR0%w`fLlZ5IDTAXY-;jVX
z70YXkWhClUs21k2aWDvVH_#(a&av~@xlcU_sQr4?c<6+Cr{DQKklt?EyL6?}y}h8S
zHJs7%lt?q_)nSjL2#)-VH%iIG>a)Q2kgb!I;uc}wZYMDb^{kOoY8Xy9$&y}K>_@x32R4QPGuYkF^M_JeU{06|=5)f35-W)AJH-JF(z+~THx;wf%0V=5gWZf;)&+ZQgpzm+LzelKB
z%_w%3RYQ(geeGRvL-pP=ZHlMp^;_rgL&EtNFArp9B1%!Y)Jz8zpB6R;xtHz%tMX&q
zwd@K_?33Ib<6f4;a6x5OuQ$3RJD7fXqqs^984(Qx_cpFtR&C~cDZRGBntXp^EDNoX
z_rSyVTl&-6-!R!M8cCqN35+W?b|*HM>d&xC-+di@?Rw>Mcrv%@_l|=6n|TeqqtzvV
zugiARx&GfOoE~C*^q2Ok4gmOvrW=>D6NKL_xgIjtcSGJwQll3|{v$FD6>RK7Ohdk+
zXC;hfKafxlyuFSO98}8$u3G2rLC!{B(!2TAJeZo*Tp8DpHF3%pM^f&eBT90|k%C@BV|}JPpsdIMrT`VMssEV@(R(u2KaujA
zw~H |