From 47eec7fb1c948f385e8b6f9ad71d4d49e2148bcc Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Fri, 13 Oct 2023 17:58:06 +0200
Subject: [PATCH 1/2] Remove real, imag, astype methods from NamedArray (#8295)

* remove imag / real / astype

* Remove real, imag and astype

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add imag, real functions

* Update variable.py

* Update utils.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update utils.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 xarray/core/variable.py    | 22 +++++++++++++++++++
 xarray/namedarray/core.py  | 25 ++-------------------
 xarray/namedarray/utils.py | 45 ++++++++++++++++++++++++++------------
 3 files changed, 55 insertions(+), 37 deletions(-)

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index fa5523b1340..ec7957fbb0f 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -2400,6 +2400,28 @@ def notnull(self, keep_attrs: bool | None = None):
             keep_attrs=keep_attrs,
         )
 
+    @property
+    def real(self):
+        """
+        The real part of the variable.
+
+        See Also
+        --------
+        numpy.ndarray.real
+        """
+        return self._replace(data=self.data.real)
+
+    @property
+    def imag(self):
+        """
+        The imaginary part of the variable.
+
+        See Also
+        --------
+        numpy.ndarray.imag
+        """
+        return self._replace(data=self.data.imag)
+
     def __array_wrap__(self, obj, context=None):
         return Variable(self.dims, obj)
 
diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py
index ec3d8fa171b..92cc742e131 100644
--- a/xarray/namedarray/core.py
+++ b/xarray/namedarray/core.py
@@ -14,6 +14,7 @@
     Default,
     T_DuckArray,
     _default,
+    astype,
     is_chunked_duck_array,
     is_duck_array,
     is_duck_dask_array,
@@ -245,28 +246,6 @@ def data(self, data: T_DuckArray | np.typing.ArrayLike) -> None:
         self._check_shape(data)
         self._data = data
 
-    @property
-    def real(self) -> Self:
-        """
-        The real part of the NamedArray.
-
-        See Also
-        --------
-        numpy.ndarray.real
-        """
-        return self._replace(data=self.data.real)
-
-    @property
-    def imag(self) -> Self:
-        """
-        The imaginary part of the NamedArray.
-
-        See Also
-        --------
-        numpy.ndarray.imag
-        """
-        return self._replace(data=self.data.imag)
-
     def __dask_tokenize__(self) -> Hashable:
         # Use v.data, instead of v._data, in order to cope with the wrappers
         # around NetCDF and the like
@@ -497,7 +476,7 @@ def _as_sparse(
         except AttributeError as exc:
             raise ValueError(f"{sparse_format} is not a valid sparse format") from exc
 
-        data = as_sparse(self.data.astype(dtype), fill_value=fill_value)
+        data = as_sparse(astype(self.data, dtype), fill_value=fill_value)
         return self._replace(data=data)
 
     def _to_dense(self) -> Self:
diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py
index 6f7658ea00b..8c598a9a3b3 100644
--- a/xarray/namedarray/utils.py
+++ b/xarray/namedarray/utils.py
@@ -4,6 +4,7 @@
 import sys
 from collections.abc import Hashable
 from enum import Enum
+from types import ModuleType
 from typing import TYPE_CHECKING, Any, Final, Protocol, TypeVar
 
 import numpy as np
@@ -15,9 +16,9 @@
         from typing_extensions import TypeGuard
 
     if sys.version_info >= (3, 11):
-        from typing import Self
+        pass
     else:
-        from typing_extensions import Self
+        pass
 
     try:
         from dask.array import Array as DaskArray
@@ -29,7 +30,7 @@
 
 # https://stackoverflow.com/questions/74633074/how-to-type-hint-a-generic-numpy-array
 T_DType_co = TypeVar("T_DType_co", bound=np.dtype[np.generic], covariant=True)
-# T_DType = TypeVar("T_DType", bound=np.dtype[np.generic])
+T_DType = TypeVar("T_DType", bound=np.dtype[np.generic])
 
 
 class _Array(Protocol[T_DType_co]):
@@ -41,17 +42,6 @@ def dtype(self) -> T_DType_co:
     def shape(self) -> tuple[int, ...]:
         ...
 
-    @property
-    def real(self) -> Self:
-        ...
-
-    @property
-    def imag(self) -> Self:
-        ...
-
-    def astype(self, dtype: np.typing.DTypeLike) -> Self:
-        ...
-
     # TODO: numpy doesn't use any inputs:
     # https://github.com/numpy/numpy/blob/v1.24.3/numpy/_typing/_array_like.py#L38
     def __array__(self) -> np.ndarray[Any, T_DType_co]:
@@ -161,3 +151,30 @@ def __dask_tokenize__(self) -> Hashable:
         from dask.base import normalize_token
 
         return normalize_token((type(self), self._value))  # type: ignore[no-any-return]
+
+
+# %% Array API functions
+def get_array_namespace(x: _Array[Any]) -> ModuleType:
+    if hasattr(x, "__array_namespace__"):
+        return x.__array_namespace__()  # type: ignore[no-any-return]
+    else:
+        return np
+
+
+def astype(x: _Array[Any], dtype: T_DType, /, *, copy: bool = True) -> _Array[T_DType]:
+    if hasattr(x, "__array_namespace__"):
+        xp = x.__array_namespace__()
+        return xp.astype(x, dtype, copy=copy)  # type: ignore[no-any-return]
+
+    # np.astype doesn't exist yet:
+    return x.astype(dtype, copy=copy)  # type: ignore[no-any-return, attr-defined]
+
+
+def imag(x: _Array[Any], /) -> _Array[Any]:
+    xp = get_array_namespace(x)
+    return xp.imag(x)  # type: ignore[no-any-return]
+
+
+def real(x: _Array[Any], /) -> _Array[Any]:
+    xp = get_array_namespace(x)
+    return xp.real(x)  # type: ignore[no-any-return]

From 58763657dfed5fd566f3391b1a693c8427efe6d4 Mon Sep 17 00:00:00 2001
From: olimcc <dev@olimcc.com>
Date: Fri, 13 Oct 2023 09:38:56 -0700
Subject: [PATCH 2/2] Avoid redundant metadata reads in `ZarrArrayWrapper`
 (#8297)

* Avoid redundant metadata reads in `ZarrArrayWrapper`

Modify ZarrArrayWrapper to avoid re-reading metadata each time data
is read from the array.

* Improve test documentation

* Update xarray/tests/test_backends.py

Co-authored-by: Joe Hamman <jhamman1@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add what's new entry

---------

Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Co-authored-by: Joe Hamman <jhamman1@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/whats-new.rst             |  7 ++++++
 xarray/backends/zarr.py       | 16 +++++++------
 xarray/tests/test_backends.py | 42 +++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 40c50e158ad..4a57faaa82e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -54,6 +54,7 @@ Deprecations
 
 Bug fixes
 ~~~~~~~~~
+
 - :py:meth:`DataArray.rename` & :py:meth:`Dataset.rename` would emit a warning
   when the operation was a no-op. (:issue:`8266`)
   By `Simon Hansen <https://github.com/hoxbro>`_.
@@ -64,6 +65,12 @@ Bug fixes
   (:issue:`8271`, :pull:`8272`). By `Spencer Clark
   <https://github.com/spencerkclark>`_.
 
+- Fix excess metadata requests when using a Zarr store. Prior to this, metadata
+  was re-read every time data was retrieved from the array, now metadata is retrieved only once
+  when they array is initialized.
+  (:issue:`8290`, :pull:`8297`).
+  By `Oliver McCormack <https://github.com/olimcc>`_.
+
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index f88523422bb..d6ad15f4f87 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -61,27 +61,29 @@ def encode_zarr_attr_value(value):
 
 
 class ZarrArrayWrapper(BackendArray):
-    __slots__ = ("datastore", "dtype", "shape", "variable_name")
+    __slots__ = ("datastore", "dtype", "shape", "variable_name", "_array")
 
     def __init__(self, variable_name, datastore):
         self.datastore = datastore
         self.variable_name = variable_name
 
-        array = self.get_array()
-        self.shape = array.shape
+        # some callers attempt to evaluate an array if an `array` property exists on the object.
+        # we prefix with _ to avoid this inference.
+        self._array = self.datastore.zarr_group[self.variable_name]
+        self.shape = self._array.shape
 
         # preserve vlen string object dtype (GH 7328)
-        if array.filters is not None and any(
-            [filt.codec_id == "vlen-utf8" for filt in array.filters]
+        if self._array.filters is not None and any(
+            [filt.codec_id == "vlen-utf8" for filt in self._array.filters]
         ):
             dtype = coding.strings.create_vlen_dtype(str)
         else:
-            dtype = array.dtype
+            dtype = self._array.dtype
 
         self.dtype = dtype
 
     def get_array(self):
-        return self.datastore.zarr_group[self.variable_name]
+        return self._array
 
     def _oindex(self, key):
         return self.get_array().oindex[key]
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 9ec67bf47dc..4c04841db8d 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -19,6 +19,7 @@
 from os import listdir
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Final, cast
+from unittest.mock import patch
 
 import numpy as np
 import pandas as pd
@@ -2862,6 +2863,47 @@ def create_zarr_target(self):
             yield tmp
 
 
+@requires_zarr
+class TestZarrArrayWrapperCalls(TestZarrKVStoreV3):
+    def test_avoid_excess_metadata_calls(self) -> None:
+        """Test that chunk requests do not trigger redundant metadata requests.
+
+        This test targets logic in backends.zarr.ZarrArrayWrapper, asserting that calls
+        to retrieve chunk data after initialization do not trigger additional
+        metadata requests.
+
+        https://github.com/pydata/xarray/issues/8290
+        """
+
+        import zarr
+
+        ds = xr.Dataset(data_vars={"test": (("Z",), np.array([123]).reshape(1))})
+
+        # The call to retrieve metadata performs a group lookup. We patch Group.__getitem__
+        # so that we can inspect calls to this method - specifically count of calls.
+        # Use of side_effect means that calls are passed through to the original method
+        # rather than a mocked method.
+        Group = zarr.hierarchy.Group
+        with (
+            self.create_zarr_target() as store,
+            patch.object(
+                Group, "__getitem__", side_effect=Group.__getitem__, autospec=True
+            ) as mock,
+        ):
+            ds.to_zarr(store, mode="w")
+
+            # We expect this to request array metadata information, so call_count should be >= 1,
+            # At time of writing, 2 calls are made
+            xrds = xr.open_zarr(store)
+            call_count = mock.call_count
+            assert call_count > 0
+
+            # compute() requests array data, which should not trigger additional metadata requests
+            # we assert that the number of calls has not increased after fetchhing the array
+            xrds.test.compute(scheduler="sync")
+            assert mock.call_count == call_count
+
+
 @requires_zarr
 @requires_fsspec
 def test_zarr_storage_options() -> None: