From 3fdb52f0af8a1098c31e9c85f0d4aa6699bee52a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:04:12 +0200 Subject: [PATCH 01/46] Type parallelcompat --- xarray/namedarray/parallelcompat.py | 99 +++++++++++++---------------- 1 file changed, 43 insertions(+), 56 deletions(-) diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index dd555fe200a..252ea5778ba 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -11,7 +11,8 @@ from abc import ABC, abstractmethod from collections.abc import Iterable, Sequence from importlib.metadata import EntryPoint, entry_points -from typing import TYPE_CHECKING, Any, Callable, Generic, Protocol, TypeVar +from types import ModuleType +from typing import TYPE_CHECKING, Any, Callable import numpy as np @@ -20,34 +21,18 @@ if TYPE_CHECKING: from xarray.namedarray._typing import ( + _chunkedarrayfunction_or_api, _Chunks, + _ChunksLike, _DType, - _DType_co, - _NormalizedChunks, - _ShapeType, + _Shape, + chunkedduckarray, duckarray, ) -class ChunkedArrayMixinProtocol(Protocol): - def rechunk(self, chunks: Any, **kwargs: Any) -> Any: ... - - @property - def dtype(self) -> np.dtype[Any]: ... - - @property - def chunks(self) -> _NormalizedChunks: ... - - def compute( - self, *data: Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: ... - - -T_ChunkedArray = TypeVar("T_ChunkedArray", bound=ChunkedArrayMixinProtocol) - - @functools.lru_cache(maxsize=1) -def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: +def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint]: """ Return a dictionary of available chunk managers and their ChunkManagerEntrypoint subclass objects. @@ -71,7 +56,7 @@ def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: def load_chunkmanagers( entrypoints: Sequence[EntryPoint], -) -> dict[str, ChunkManagerEntrypoint[Any]]: +) -> dict[str, ChunkManagerEntrypoint]: """Load entrypoints and instantiate chunkmanagers only once.""" loaded_entrypoints = {} @@ -93,8 +78,8 @@ def load_chunkmanagers( def guess_chunkmanager( - manager: str | ChunkManagerEntrypoint[Any] | None, -) -> ChunkManagerEntrypoint[Any]: + manager: str | ChunkManagerEntrypoint | None, +) -> ChunkManagerEntrypoint: """ Get namespace of chunk-handling methods, guessing from what's available. @@ -128,7 +113,7 @@ def guess_chunkmanager( ) -def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: +def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint: """ Detects which parallel backend should be used for given set of arrays. @@ -171,7 +156,7 @@ def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: return selected[0] -class ChunkManagerEntrypoint(ABC, Generic[T_ChunkedArray]): +class ChunkManagerEntrypoint(ABC): """ Interface between a particular parallel computing framework and xarray. @@ -190,7 +175,7 @@ class ChunkManagerEntrypoint(ABC, Generic[T_ChunkedArray]): This attribute is used for array instance type checking at runtime. """ - array_cls: type[T_ChunkedArray] + array_cls: type[chunkedduckarray[Any, Any]] available: bool = True @abstractmethod @@ -216,10 +201,10 @@ def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: -------- dask.is_dask_collection """ - return isinstance(data, self.array_cls) + return isinstance(data, _chunkedarrayfunction_or_api) @abstractmethod - def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: + def chunks(self, data: chunkedduckarray[Any, Any]) -> _Chunks: """ Return the current chunks of the given array. @@ -245,12 +230,12 @@ def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: @abstractmethod def normalize_chunks( self, - chunks: _Chunks | _NormalizedChunks, - shape: _ShapeType | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, dtype: _DType | None = None, - previous_chunks: _NormalizedChunks | None = None, - ) -> _NormalizedChunks: + previous_chunks: _Chunks | None = None, + ) -> _Chunks: """ Normalize given chunking pattern into an explicit tuple of tuples representation. @@ -281,8 +266,8 @@ def normalize_chunks( @abstractmethod def from_array( - self, data: duckarray[Any, Any], chunks: _Chunks, **kwargs: Any - ) -> T_ChunkedArray: + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs: Any + ) -> chunkedduckarray[Any, _DType]: """ Create a chunked array from a non-chunked numpy-like array. @@ -307,10 +292,10 @@ def from_array( def rechunk( self, - data: T_ChunkedArray, - chunks: _NormalizedChunks | tuple[int, ...] | _Chunks, + data: chunkedduckarray[Any, _DType], + chunks: _ChunksLike, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Changes the chunking pattern of the given array. @@ -338,8 +323,8 @@ def rechunk( @abstractmethod def compute( - self, *data: T_ChunkedArray | Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[duckarray[Any, _DType], ...]: """ Computes one or more chunked arrays, returning them as eager numpy arrays. @@ -365,7 +350,7 @@ def compute( raise NotImplementedError() @property - def array_api(self) -> Any: + def array_api(self) -> ModuleType: """ Return the array_api namespace following the python array API standard. @@ -382,14 +367,14 @@ def array_api(self) -> Any: def reduction( self, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], func: Callable[..., Any], combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, keepdims: bool = False, - ) -> T_ChunkedArray: + ) -> chunkedduckarray[Any, _DType]: """ A general version of array reductions along one or more axes. @@ -434,11 +419,11 @@ def scan( func: Callable[..., Any], binop: Callable[..., Any], ident: float, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], axis: int | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, **kwargs: Any, - ) -> T_ChunkedArray: + ) -> chunkedduckarray[Any, _DType]: """ General version of a 1D scan, also known as a cumulative array reduction. @@ -474,10 +459,10 @@ def apply_gufunc( *args: Any, axes: Sequence[tuple[int, ...]] | None = None, keepdims: bool = False, - output_dtypes: Sequence[_DType_co] | None = None, + output_dtypes: Sequence[_DType] | None = None, vectorize: bool | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...]: """ Apply a generalized ufunc or similar python function to arrays. @@ -557,12 +542,12 @@ def map_blocks( self, func: Callable[..., Any], *args: Any, - dtype: _DType_co | None = None, - chunks: tuple[int, ...] | None = None, + dtype: _DType | None = None, + chunks: _Chunks | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Map a function across all blocks of a chunked array. @@ -610,7 +595,7 @@ def blockwise( new_axes: dict[Any, int] | None = None, align_arrays: bool = True, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Tensor operation: Generalized inner and outer products. @@ -656,7 +641,7 @@ def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, - ) -> tuple[dict[str, _NormalizedChunks], list[T_ChunkedArray]]: + ) -> tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]]: """ Unify chunks across a sequence of arrays. @@ -676,7 +661,9 @@ def unify_chunks( def store( self, - sources: T_ChunkedArray | Sequence[T_ChunkedArray], + sources: ( + chunkedduckarray[Any, _DType] | Sequence[chunkedduckarray[Any, _DType]] + ), targets: Any, **kwargs: dict[str, Any], ) -> Any: From c24b7aeb59a8b583b7e380031b17899edb4be12f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:06:57 +0200 Subject: [PATCH 02/46] Type daskmanager --- xarray/namedarray/daskmanager.py | 140 ++++++++++++++++++------------- 1 file changed, 81 insertions(+), 59 deletions(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 14744d2de6b..cfd788793e1 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -1,25 +1,29 @@ from __future__ import annotations from collections.abc import Iterable, Sequence +from types import ModuleType from typing import TYPE_CHECKING, Any, Callable import numpy as np from packaging.version import Version from xarray.core.indexing import ImplicitToExplicitIndexingAdapter -from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray +from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: from xarray.namedarray._typing import ( - T_Chunks, - _DType_co, - _NormalizedChunks, + _Chunks, + _ChunksLike, + _DType, + _dtype, + _Shape, + chunkedduckarray, duckarray, ) try: - from dask.array import Array as DaskArray + from dask.array.core import Array as DaskArray except ImportError: DaskArray = np.ndarray[Any, Any] # type: ignore[assignment, misc] @@ -27,83 +31,90 @@ dask_available = module_available("dask") -class DaskManager(ChunkManagerEntrypoint["DaskArray"]): # type: ignore[type-var] - array_cls: type[DaskArray] +class DaskManager(ChunkManagerEntrypoint): available: bool = dask_available def __init__(self) -> None: # TODO can we replace this with a class attribute instead? - from dask.array import Array + from dask.array.core import Array - self.array_cls = Array + # TODO: error: Incompatible types in assignment (expression has type "type[Array]", variable has type "type[_chunkedarrayfunction[Any, Any]] | type[_chunkedarrayapi[Any, Any]]") [assignment] + self.array_cls = Array # type: ignore[assignment] def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) - def chunks(self, data: Any) -> _NormalizedChunks: - return data.chunks # type: ignore[no-any-return] + def chunks(self, data: chunkedduckarray[Any, _dtype[Any]]) -> _Chunks: + return data.chunks def normalize_chunks( self, - chunks: T_Chunks | _NormalizedChunks, - shape: tuple[int, ...] | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, - dtype: _DType_co | None = None, - previous_chunks: _NormalizedChunks | None = None, - ) -> Any: + dtype: _dtype[Any] | None = None, + previous_chunks: _Chunks | None = None, + ) -> _Chunks: """Called by open_dataset""" from dask.array.core import normalize_chunks - return normalize_chunks( + out: _Chunks + out = normalize_chunks( chunks, shape=shape, limit=limit, dtype=dtype, previous_chunks=previous_chunks, ) # type: ignore[no-untyped-call] + return out def from_array( - self, data: Any, chunks: T_Chunks | _NormalizedChunks, **kwargs: Any - ) -> DaskArray | Any: - import dask.array as da + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs: Any + ) -> chunkedduckarray[Any, _DType]: + from dask.array.core import from_array if isinstance(data, ImplicitToExplicitIndexingAdapter): # lazily loaded backend array classes should use NumPy array operations. kwargs["meta"] = np.ndarray - return da.from_array( + out: chunkedduckarray[Any, _DType] + out = from_array( data, chunks, **kwargs, ) # type: ignore[no-untyped-call] + return out def compute( - self, *data: Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[duckarray[Any, _DType], ...]: from dask.array import compute - return compute(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] + out: tuple[duckarray[Any, _DType], ...] + out = compute(*data, **kwargs) # type: ignore[no-untyped-call] + return out @property - def array_api(self) -> Any: + def array_api(self) -> ModuleType: from dask import array as da return da - def reduction( # type: ignore[override] + def reduction( self, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], func: Callable[..., Any], combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, keepdims: bool = False, - ) -> DaskArray | Any: - from dask.array import reduction + ) -> chunkedduckarray[Any, _DType]: + from dask.array.reductions import reduction - return reduction( + out: chunkedduckarray[Any, _DType] + out = reduction( arr, chunk=func, combine=combine_func, @@ -112,20 +123,22 @@ def reduction( # type: ignore[override] dtype=dtype, keepdims=keepdims, ) # type: ignore[no-untyped-call] + return out - def scan( # type: ignore[override] + def scan( self, func: Callable[..., Any], binop: Callable[..., Any], ident: float, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], axis: int | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, **kwargs: Any, - ) -> DaskArray | Any: + ) -> chunkedduckarray[Any, _DType]: from dask.array.reductions import cumreduction - return cumreduction( + out: chunkedduckarray[Any, _DType] + out = cumreduction( func, binop, ident, @@ -134,6 +147,7 @@ def scan( # type: ignore[override] dtype=dtype, **kwargs, ) # type: ignore[no-untyped-call] + return out def apply_gufunc( self, @@ -141,18 +155,19 @@ def apply_gufunc( signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, - axis: int | None = None, keepdims: bool = False, - output_dtypes: Sequence[_DType_co] | None = None, - output_sizes: dict[str, int] | None = None, + output_dtypes: Sequence[_DType] | None = None, vectorize: bool | None = None, + axis: int | None = None, + output_sizes: dict[str, int] | None = None, allow_rechunk: bool = False, - meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + meta: tuple[np.ndarray[Any, np.dtype[np.generic]], ...] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...]: from dask.array.gufunc import apply_gufunc - return apply_gufunc( + out: chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...] + out = apply_gufunc( func, signature, *args, @@ -167,18 +182,20 @@ def apply_gufunc( **kwargs, ) # type: ignore[no-untyped-call] + return out + def map_blocks( self, func: Callable[..., Any], *args: Any, - dtype: _DType_co | None = None, - chunks: tuple[int, ...] | None = None, + dtype: _DType | None = None, + chunks: _Chunks | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: import dask - from dask.array import map_blocks + from dask.array.core import map_blocks if drop_axis is None and Version(dask.__version__) < Version("2022.9.1"): # See https://github.com/pydata/xarray/pull/7019#discussion_r1196729489 @@ -186,7 +203,8 @@ def map_blocks( drop_axis = [] # pass through name, meta, token as kwargs - return map_blocks( + out: chunkedduckarray[Any, _DType] + out = map_blocks( func, *args, dtype=dtype, @@ -195,26 +213,27 @@ def map_blocks( new_axis=new_axis, **kwargs, ) # type: ignore[no-untyped-call] + return out def blockwise( self, func: Callable[..., Any], out_ind: Iterable[Any], - *args: Any, - # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types - name: str | None = None, - token: Any | None = None, - dtype: _DType_co | None = None, + *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, + name: str | None = None, + token: Any | None = None, + dtype: _DType | None = None, concatenate: bool | None = None, - meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + meta: tuple[np.ndarray[Any, np.dtype[np.generic]], ...] | None = None, **kwargs: Any, - ) -> DaskArray | Any: - from dask.array import blockwise + ) -> chunkedduckarray[Any, _DType]: + from dask.array.blockwise import blockwise - return blockwise( + out: chunkedduckarray[Any, _DType] + out = blockwise( func, out_ind, *args, @@ -228,15 +247,18 @@ def blockwise( meta=meta, **kwargs, ) # type: ignore[no-untyped-call] + return out def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, - ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: + ) -> tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]]: from dask.array.core import unify_chunks - return unify_chunks(*args, **kwargs) # type: ignore[no-any-return, no-untyped-call] + out: tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]] + out = unify_chunks(*args, **kwargs) # type: ignore[no-untyped-call] + return out def store( self, @@ -244,7 +266,7 @@ def store( targets: Any, **kwargs: Any, ) -> Any: - from dask.array import store + from dask.array.core import store return store( sources=sources, From a932c71aa186b1eb75bee7b93bdc2a512cdd7a09 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:09:32 +0200 Subject: [PATCH 03/46] Add chunks typing --- xarray/namedarray/_typing.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index b715973814f..66519ffd341 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -76,8 +76,14 @@ def dtype(self) -> _DType_co: ... _Axes = tuple[_Axis, ...] _AxisLike = Union[_Axis, _Axes] -_Chunks = tuple[_Shape, ...] -_NormalizedChunks = tuple[tuple[int, ...], ...] +_Chunk = tuple[int, ...] +_Chunks = tuple[_Chunk, ...] +_NormalizedChunks = tuple[tuple[int, ...], ...] # TODO: Same as Chunks. +_ChunksLike = Union[ + int, Literal["auto"], None, _Chunk, _Chunks +] # TODO: Literal["auto"] +_ChunksType = TypeVar("_ChunksType", bound=_Chunks) + # FYI in some cases we don't allow `None`, which this doesn't take account of. T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]] # We allow the tuple form of this (though arguably we could transition to named dims only) @@ -235,7 +241,7 @@ def chunks(self) -> _Chunks: ... @runtime_checkable class _chunkedarrayfunction( - _arrayfunction[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] + _arrayfunction[_ShapeType, _DType_co], Protocol[_ShapeType, _DType_co] ): """ Chunked duck array supporting NEP 18. @@ -246,6 +252,11 @@ class _chunkedarrayfunction( @property def chunks(self) -> _Chunks: ... + def rechunk( + self, + chunks: _ChunksLike, + ) -> _chunkedarrayfunction[_ShapeType, _DType_co]: ... + @runtime_checkable class _chunkedarrayapi( @@ -260,6 +271,11 @@ class _chunkedarrayapi( @property def chunks(self) -> _Chunks: ... + def rechunk( + self, + chunks: _ChunksLike, + ) -> _chunkedarrayapi[_ShapeType_co, _DType_co]: ... + # NamedArray can most likely use both __array_function__ and __array_namespace__: _chunkedarrayfunction_or_api = (_chunkedarrayfunction, _chunkedarrayapi) From 676f045af679cc82068f1ac01a9209280bb76bcb Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:22:57 +0200 Subject: [PATCH 04/46] Update times.py --- xarray/coding/times.py | 94 ++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 30 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 34d4f9a23ad..a30affcbe93 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -5,7 +5,7 @@ from collections.abc import Hashable from datetime import datetime, timedelta from functools import partial -from typing import TYPE_CHECKING, Callable, Union +from typing import TYPE_CHECKING, Callable, Union, overload import numpy as np import pandas as pd @@ -22,13 +22,19 @@ ) from xarray.core import indexing from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like -from xarray.core.duck_array_ops import asarray +from xarray.core.duck_array_ops import asarray, ravel from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import nanosecond_precision_timestamp from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable -from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray._typing import ( + _chunkedarrayfunction_or_api, + chunkedduckarray, + duckarray, +) +from xarray.namedarray.parallelcompat import get_chunked_array_type + +# from xarray.namedarray.pycompat import is_chunked_array from xarray.namedarray.utils import is_duck_dask_array try: @@ -37,7 +43,7 @@ cftime = None if TYPE_CHECKING: - from xarray.core.types import CFCalendar, T_DuckArray + from xarray.core.types import CFCalendar T_Name = Union[Hashable, None] @@ -315,7 +321,7 @@ def decode_cf_datetime( cftime.num2date """ num_dates = np.asarray(num_dates) - flat_num_dates = num_dates.ravel() + flat_num_dates = ravel(num_dates) if calendar is None: calendar = "standard" @@ -369,7 +375,7 @@ def decode_cf_timedelta(num_timedeltas, units: str) -> np.ndarray: """ num_timedeltas = np.asarray(num_timedeltas) units = _netcdf_to_numpy_timeunit(units) - result = to_timedelta_unboxed(num_timedeltas.ravel(), unit=units) + result = to_timedelta_unboxed(ravel(num_timedeltas), unit=units) return result.reshape(num_timedeltas.shape) @@ -428,7 +434,7 @@ def infer_datetime_units(dates) -> str: 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = np.asarray(dates).ravel() + dates = ravel(np.asarray(dates)) if np.asarray(dates).dtype == "datetime64[ns]": dates = to_datetime_unboxed(dates) dates = dates[pd.notnull(dates)] @@ -456,7 +462,7 @@ def infer_timedelta_units(deltas) -> str: {'days', 'hours', 'minutes' 'seconds'} (the first one that can evenly divide all unique time deltas in `deltas`) """ - deltas = to_timedelta_unboxed(np.asarray(deltas).ravel()) + deltas = to_timedelta_unboxed(ravel(np.asarray(deltas))) unique_timedeltas = np.unique(deltas[pd.notnull(deltas)]) return _infer_time_units_from_diff(unique_timedeltas) @@ -643,7 +649,7 @@ def encode_datetime(d): except TypeError: return np.nan if d is None else cftime.date2num(d, units, calendar) - return np.array([encode_datetime(d) for d in dates.ravel()]).reshape(dates.shape) + return np.array([encode_datetime(d) for d in ravel(dates)]).reshape(dates.shape) def cast_to_int_if_safe(num) -> np.ndarray: @@ -700,12 +706,26 @@ def _cast_to_dtype_if_safe(num: np.ndarray, dtype: np.dtype) -> np.ndarray: return cast_num +@overload +def encode_cf_datetime( + dates: chunkedduckarray, + units: str | None = None, + calendar: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray, str, str]: ... +@overload +def encode_cf_datetime( + dates: duckarray, + units: str | None = None, + calendar: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[duckarray, str, str]: ... def encode_cf_datetime( - dates: T_DuckArray, # type: ignore + dates: duckarray | chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_DuckArray, str, str]: +) -> tuple[duckarray | chunkedduckarray, str, str]: """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF compliant time variable. @@ -716,19 +736,19 @@ def encode_cf_datetime( cftime.date2num """ dates = asarray(dates) - if is_chunked_array(dates): + if isinstance(dates, _chunkedarrayfunction_or_api): return _lazily_encode_cf_datetime(dates, units, calendar, dtype) else: return _eagerly_encode_cf_datetime(dates, units, calendar, dtype) def _eagerly_encode_cf_datetime( - dates: T_DuckArray, # type: ignore + dates: duckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[T_DuckArray, str, str]: +) -> tuple[duckarray, str, str]: dates = asarray(dates) data_units = infer_datetime_units(dates) @@ -753,7 +773,7 @@ def _eagerly_encode_cf_datetime( # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from # dates to be encoded (GH 2272). - dates_as_index = pd.DatetimeIndex(dates.ravel()) + dates_as_index = pd.DatetimeIndex(ravel(dates)) time_deltas = dates_as_index - ref_date # retrieve needed units to faithfully encode to int64 @@ -806,11 +826,11 @@ def _eagerly_encode_cf_datetime( def _encode_cf_datetime_within_map_blocks( - dates: T_DuckArray, # type: ignore + dates: duckarray, units: str, calendar: str, dtype: np.dtype, -) -> T_DuckArray: +) -> duckarray: num, *_ = _eagerly_encode_cf_datetime( dates, units, calendar, dtype, allow_units_modification=False ) @@ -818,11 +838,11 @@ def _encode_cf_datetime_within_map_blocks( def _lazily_encode_cf_datetime( - dates: T_ChunkedArray, + dates: chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_ChunkedArray, str, str]: +) -> tuple[chunkedduckarray, str, str]: if calendar is None: # This will only trigger minor compute if dates is an object dtype array. calendar = infer_calendar_name(dates) @@ -855,31 +875,43 @@ def _lazily_encode_cf_datetime( return num, units, calendar +@overload def encode_cf_timedelta( - timedeltas: T_DuckArray, # type: ignore + timedeltas: chunkedduckarray, units: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_DuckArray, str]: +) -> tuple[chunkedduckarray, str]: ... +@overload +def encode_cf_timedelta( + timedeltas: duckarray, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[duckarray, str]: ... +def encode_cf_timedelta( + timedeltas: chunkedduckarray | duckarray, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray | duckarray, str]: timedeltas = asarray(timedeltas) - if is_chunked_array(timedeltas): + if isinstance(timedeltas, _chunkedarrayfunction_or_api): return _lazily_encode_cf_timedelta(timedeltas, units, dtype) else: return _eagerly_encode_cf_timedelta(timedeltas, units, dtype) def _eagerly_encode_cf_timedelta( - timedeltas: T_DuckArray, # type: ignore + timedeltas: duckarray, units: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[T_DuckArray, str]: +) -> tuple[duckarray, str]: data_units = infer_timedelta_units(timedeltas) if units is None: units = data_units time_delta = _time_units_to_timedelta64(units) - time_deltas = pd.TimedeltaIndex(timedeltas.ravel()) + time_deltas = pd.TimedeltaIndex(ravel(timedeltas)) # retrieve needed units to faithfully encode to int64 needed_units = data_units @@ -920,10 +952,10 @@ def _eagerly_encode_cf_timedelta( def _encode_cf_timedelta_within_map_blocks( - timedeltas: T_DuckArray, # type:ignore + timedeltas: duckarray, units: str, dtype: np.dtype, -) -> T_DuckArray: +) -> duckarray: num, _ = _eagerly_encode_cf_timedelta( timedeltas, units, dtype, allow_units_modification=False ) @@ -931,8 +963,10 @@ def _encode_cf_timedelta_within_map_blocks( def _lazily_encode_cf_timedelta( - timedeltas: T_ChunkedArray, units: str | None = None, dtype: np.dtype | None = None -) -> tuple[T_ChunkedArray, str]: + timedeltas: chunkedduckarray, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray, str]: if units is None and dtype is None: units = "nanoseconds" dtype = np.dtype("int64") From 6f79bdc9a89fe11b1c2e3869043509fff1d7bf15 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:24:43 +0200 Subject: [PATCH 05/46] Update times.py --- xarray/coding/times.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index a30affcbe93..0d6dd22fb96 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -34,7 +34,6 @@ ) from xarray.namedarray.parallelcompat import get_chunked_array_type -# from xarray.namedarray.pycompat import is_chunked_array from xarray.namedarray.utils import is_duck_dask_array try: From 3d48d440cfa5806e0de31a96042c2db85603a9b0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:28:04 +0200 Subject: [PATCH 06/46] Update _typing.py --- xarray/namedarray/_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 66519ffd341..a24398a693c 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -78,7 +78,7 @@ def dtype(self) -> _DType_co: ... _Chunk = tuple[int, ...] _Chunks = tuple[_Chunk, ...] -_NormalizedChunks = tuple[tuple[int, ...], ...] # TODO: Same as Chunks. +# _NormalizedChunks = tuple[tuple[int, ...], ...] # TODO: Same as Chunks. _ChunksLike = Union[ int, Literal["auto"], None, _Chunk, _Chunks ] # TODO: Literal["auto"] From e7041e05e386af44410435453ceef3c1ef9b97e3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:28:09 +0200 Subject: [PATCH 07/46] Update test_parallelcompat.py --- xarray/tests/test_parallelcompat.py | 36 ++++++++++++++++++----------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index dbe40be710c..1ff1414783b 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -6,8 +6,14 @@ import numpy as np import pytest -from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks -from xarray.namedarray._typing import _Chunks +from xarray.namedarray._typing import ( + _Chunks, + _ChunksLike, + _DType, + _Shape, + chunkedduckarray, + duckarray, +) from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import ( ChunkManagerEntrypoint, @@ -27,7 +33,7 @@ class DummyChunkedArray(np.ndarray): https://numpy.org/doc/stable/user/basics.subclassing.html#simple-example-adding-an-extra-attribute-to-ndarray """ - chunks: T_NormalizedChunks + chunks: _Chunks def __new__( cls, @@ -63,32 +69,36 @@ def __init__(self): def is_chunked_array(self, data: Any) -> bool: return isinstance(data, DummyChunkedArray) - def chunks(self, data: DummyChunkedArray) -> T_NormalizedChunks: + def chunks(self, data: chunkedduckarray[Any, Any]) -> _Chunks: return data.chunks def normalize_chunks( self, - chunks: T_Chunks | T_NormalizedChunks, - shape: tuple[int, ...] | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, - dtype: np.dtype | None = None, - previous_chunks: T_NormalizedChunks | None = None, - ) -> T_NormalizedChunks: + dtype: _DType | None = None, + previous_chunks: _Chunks | None = None, + ) -> _Chunks: from dask.array.core import normalize_chunks return normalize_chunks(chunks, shape, limit, dtype, previous_chunks) def from_array( - self, data: T_DuckArray | np.typing.ArrayLike, chunks: _Chunks, **kwargs - ) -> DummyChunkedArray: + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs + ) -> chunkedduckarray[Any, _DType]: from dask import array as da return da.from_array(data, chunks, **kwargs) - def rechunk(self, data: DummyChunkedArray, chunks, **kwargs) -> DummyChunkedArray: + def rechunk( + self, data: chunkedduckarray[Any, _DType], chunks: _ChunksLike, **kwargs + ) -> chunkedduckarray[Any, _DType]: return data.rechunk(chunks, **kwargs) - def compute(self, *data: DummyChunkedArray, **kwargs) -> tuple[np.ndarray, ...]: + def compute( + self, *data: chunkedduckarray[Any, _DType], **kwargs + ) -> tuple[duckarray[Any, _DType], ...]: from dask.array import compute return compute(*data, **kwargs) From 6f56dd8fdfb88c1b47b4115e593b545fcadf0e1d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 10:28:45 +0000 Subject: [PATCH 08/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 0d6dd22fb96..cb64ca298d7 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -33,7 +33,6 @@ duckarray, ) from xarray.namedarray.parallelcompat import get_chunked_array_type - from xarray.namedarray.utils import is_duck_dask_array try: From 19b26748b536c49b2de9ecb01ef79ac118f3e728 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:33:07 +0200 Subject: [PATCH 09/46] Update indexing.py --- xarray/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 06e7efdbb48..55d3c41acec 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1325,7 +1325,7 @@ def _arrayize_vectorized_indexer( def _chunked_array_with_chunks_hint( - array, chunks, chunkmanager: ChunkManagerEntrypoint[Any] + array, chunks, chunkmanager: ChunkManagerEntrypoint ): """Create a chunked array using the chunks hint for dimensions of size > 1.""" From 11aa840128faae788073c674cecd5dc1c947c55d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:34:34 +0200 Subject: [PATCH 10/46] Update core.py --- xarray/namedarray/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index fe47bf50533..6852a0f9768 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -749,7 +749,7 @@ def sizes(self) -> dict[_Dim, _IntOrUnknown]: def chunk( self, chunks: int | Literal["auto"] | Mapping[Any, None | int | tuple[int, ...]] = {}, - chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: @@ -822,6 +822,7 @@ def chunk( chunkmanager = guess_chunkmanager(chunked_array_type) data_old = self._data + data_chunked: _chunkedarray[Any, _DType_co] if chunkmanager.is_chunked_array(data_old): data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type] else: From 62635eb3e1af1766eed983d2ed7aad27cb655fd6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:35:21 +0200 Subject: [PATCH 11/46] Update variable.py --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f0685882595..594c4287d4d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2526,7 +2526,7 @@ def chunk( # type: ignore[override] name: str | None = None, lock: bool | None = None, inline_array: bool | None = None, - chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: From 9c996a8b25acdfded88cdd9cfc9e4ee8a0ecd578 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:36:14 +0200 Subject: [PATCH 12/46] Update computation.py --- xarray/core/computation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f418d3821c2..a5d5ac09405 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -25,6 +25,7 @@ from xarray.core.types import Dims, T_DataArray from xarray.core.utils import is_dict_like, is_duck_dask_array, is_scalar, parse_dims from xarray.core.variable import Variable +from xarray.namedarray._typing import chunkedduckarray from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array from xarray.util.deprecation_helpers import deprecate_dims @@ -795,6 +796,7 @@ def apply_variable_ufunc( ) def func(*arrays): + res: chunkedduckarray | tuple[chunkedduckarray, ...] res = chunkmanager.apply_gufunc( numpy_func, signature.to_gufunc_string(exclude_dims), From 81fcf8530f046242efc04620295eb9ee5b435b9a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:40:04 +0200 Subject: [PATCH 13/46] Update variables.py --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..3e1104dc202 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -179,7 +179,7 @@ def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): if is_chunked_array(array): chunkmanager = get_chunked_array_type(array) - return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] + return chunkmanager.map_blocks(func, array, dtype=dtype) else: return _ElementwiseFunctionArray(array, func, dtype) From c894c7be862f113235f8fffb0145f9d057ee8de9 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:50:30 +0200 Subject: [PATCH 14/46] Update dataset.py --- xarray/core/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 50cfc7b0c29..7e0d0151d95 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -163,6 +163,7 @@ T_Xarray, ) from xarray.core.weighted import DatasetWeighted + from xarray.namedarray._typing import duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -860,7 +861,7 @@ def load(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*lazy_data.values()) # evaluate all the chunked arrays simultaneously - evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute( + evaluated_data: tuple[duckarray[Any, Any], ...] = chunkmanager.compute( *lazy_data.values(), **kwargs ) From fe35554f8235527e0e15a2c666821ce20ea7a87f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:51:18 +0200 Subject: [PATCH 15/46] Update _typing.py --- xarray/namedarray/_typing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index a24398a693c..27c0bec59d7 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -78,7 +78,6 @@ def dtype(self) -> _DType_co: ... _Chunk = tuple[int, ...] _Chunks = tuple[_Chunk, ...] -# _NormalizedChunks = tuple[tuple[int, ...], ...] # TODO: Same as Chunks. _ChunksLike = Union[ int, Literal["auto"], None, _Chunk, _Chunks ] # TODO: Literal["auto"] From bc05489a3e6fb9006b10b750b9bae6306c9c6c94 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 13:00:23 +0200 Subject: [PATCH 16/46] Update parallelcompat.py --- xarray/namedarray/parallelcompat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 252ea5778ba..b26aab920c7 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -17,7 +17,6 @@ import numpy as np from xarray.core.utils import emit_user_level_warning -from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: from xarray.namedarray._typing import ( @@ -126,7 +125,8 @@ def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint: chunked_arrays = [ a for a in args - if is_chunked_array(a) and type(a) not in ALLOWED_NON_CHUNKED_TYPES + if isinstance(a, _chunkedarrayfunction_or_api) + and type(a) not in ALLOWED_NON_CHUNKED_TYPES ] # Asserts all arrays are the same type (or numpy etc.) From cedff32973ad5f72e46114b7f4920a5411f38a1f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 13:07:34 +0200 Subject: [PATCH 17/46] Update parallelcompat.py --- xarray/namedarray/parallelcompat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index b26aab920c7..db1dfb912e6 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -17,10 +17,10 @@ import numpy as np from xarray.core.utils import emit_user_level_warning +from xarray.namedarray._typing import _chunkedarrayfunction_or_api if TYPE_CHECKING: from xarray.namedarray._typing import ( - _chunkedarrayfunction_or_api, _Chunks, _ChunksLike, _DType, From d43c0c142154a4ed4e042e5b052319caede833e1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 10 Jul 2024 13:13:19 +0200 Subject: [PATCH 18/46] Update test_coding_times.py --- xarray/tests/test_coding_times.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index d568bdc3268..623e4e9f970 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -44,6 +44,8 @@ FirstElementAccessibleArray, arm_xfail, assert_array_equal, + assert_duckarray_allclose, + assert_duckarray_equal, assert_no_warnings, has_cftime, requires_cftime, @@ -144,13 +146,13 @@ def test_cf_datetime(num_dates, units, calendar) -> None: assert (abs_diff <= np.timedelta64(1, "s")).all() encoded, _, _ = encode_cf_datetime(actual, units, calendar) - assert_array_equal(num_dates, np.round(encoded, 1)) + assert_duckarray_allclose(num_dates, encoded) if hasattr(num_dates, "ndim") and num_dates.ndim == 1 and "1000" not in units: # verify that wrapping with a pandas.Index works # note that it *does not* currently work to put # non-datetime64 compatible dates into a pandas.Index encoded, _, _ = encode_cf_datetime(pd.Index(actual), units, calendar) - assert_array_equal(num_dates, np.round(encoded, 1)) + assert_duckarray_allclose(num_dates, encoded) @requires_cftime @@ -893,10 +895,10 @@ def test_time_units_with_timezone_roundtrip(calendar) -> None: ) if calendar in _STANDARD_CALENDARS: - np.testing.assert_array_equal(result_num_dates, expected_num_dates) + assert_duckarray_equal(result_num_dates, expected_num_dates) else: # cftime datetime arithmetic is not quite exact. - np.testing.assert_allclose(result_num_dates, expected_num_dates) + assert_duckarray_allclose(result_num_dates, expected_num_dates) assert result_units == expected_units assert result_calendar == calendar From 6399dabd81df74616c096ea83840cb0c51183520 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 3 Aug 2024 04:42:04 +0200 Subject: [PATCH 19/46] Update _typing.py --- xarray/namedarray/_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 99a20ac891a..95260f30c20 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -84,7 +84,7 @@ def dtype(self) -> _DType_co: ... _ChunksType = TypeVar("_ChunksType", bound=_Chunks) # FYI in some cases we don't allow `None`, which this doesn't take account of. -T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]] +T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, _Chunk] # We allow the tuple form of this (though arguably we could transition to named dims only) T_Chunks: TypeAlias = Union[T_ChunkDim, Mapping[Any, T_ChunkDim]] From 156a95355031ea97a8166e08f5979a15d9be7e6e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 3 Aug 2024 02:42:44 +0000 Subject: [PATCH 20/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 2 +- xarray/core/dataset.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index d801372123f..d6f4985f0dc 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -40,7 +40,7 @@ except ImportError: cftime = None -from xarray.core.types import CFCalendar, NPDatetimeUnitOptions, T_DuckArray +from xarray.core.types import CFCalendar, NPDatetimeUnitOptions T_Name = Union[Hashable, None] diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b58ab85edfb..bad1afa37b1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -165,8 +165,8 @@ T_Xarray, ) from xarray.core.weighted import DatasetWeighted - from xarray.namedarray._typing import duckarray from xarray.groupers import Grouper, Resampler + from xarray.namedarray._typing import duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint From 6a087407f0e8e0120f6f1feaacce2e35f127d722 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 3 Aug 2024 04:44:23 +0200 Subject: [PATCH 21/46] Update times.py --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index d801372123f..3f662363796 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -5,7 +5,7 @@ from collections.abc import Hashable from datetime import datetime, timedelta from functools import partial -from typing import Callable, Literal, Union, cast +from typing import Callable, Literal, Union, cast, overload import numpy as np import pandas as pd From eef8fc8316298ce1f2087c47e8b0d284081d1bed Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Aug 2024 00:08:46 +0200 Subject: [PATCH 22/46] Update core.py --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 5a52f9aa34f..70f23e76e9b 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -749,7 +749,7 @@ def sizes(self) -> dict[_Dim, _IntOrUnknown]: def chunk( self, chunks: T_Chunks = {}, - chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: From db9898412ccc96fe38ec8db77c1740d8c7b2132e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Aug 2024 00:08:58 +0200 Subject: [PATCH 23/46] Update daskmanager.py --- xarray/namedarray/daskmanager.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index dbfca0f0c62..7fa115d77a6 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -22,11 +22,6 @@ duckarray, ) - try: - from dask.array.core import Array as DaskArray - except ImportError: - DaskArray = np.ndarray[Any, Any] - dask_available = module_available("dask") From 96e1a1dbc12bad7bf2bcab88e2e5bc0d85ceb56b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Aug 2024 00:40:04 +0200 Subject: [PATCH 24/46] Update test_coding_times.py --- xarray/tests/test_coding_times.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index ef478af8786..9d048a2230c 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -146,14 +146,14 @@ def test_cf_datetime(num_dates, units, calendar) -> None: # https://github.com/Unidata/netcdf4-python/issues/355 assert (abs_diff <= np.timedelta64(1, "s")).all() encoded1, _, _ = encode_cf_datetime(actual, units, calendar) - assert_array_equal(num_dates, np.around(encoded1, 1)) + assert_duckarray_allclose(num_dates, encoded1) if hasattr(num_dates, "ndim") and num_dates.ndim == 1 and "1000" not in units: # verify that wrapping with a pandas.Index works # note that it *does not* currently work to put # non-datetime64 compatible dates into a pandas.Index encoded2, _, _ = encode_cf_datetime(pd.Index(actual), units, calendar) - assert_array_equal(num_dates, np.around(encoded2, 1)) + assert_duckarray_allclose(num_dates, encoded2) @requires_cftime From bf8327517ca6f8e8da9e7e54500aa403c959df2a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Aug 2024 01:08:11 +0200 Subject: [PATCH 25/46] Update test_coding_times.py --- xarray/tests/test_coding_times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 9d048a2230c..3496edab42e 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -123,7 +123,7 @@ def _all_cftime_date_types(): @pytest.mark.filterwarnings("ignore:Ambiguous reference date string") @pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS) -def test_cf_datetime(num_dates, units, calendar) -> None: +def test_cf_datetime(num_dates, units: str, calendar: str) -> None: import cftime expected = cftime.num2date( From 31e1895e842dbe45aa0af258a0e2886930addc12 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Aug 2024 14:01:25 +0200 Subject: [PATCH 26/46] test pd.index --- xarray/tests/test_namedarray.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 7687765e659..c9b63d0d518 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -561,3 +561,6 @@ def test_broadcast_to_errors( def test_warn_on_repeated_dimension_names(self) -> None: with pytest.warns(UserWarning, match="Duplicate dimension names"): NamedArray(("x", "x"), np.arange(4).reshape(2, 2)) + + def test_pd_index_duckarray() -> None: + a: duckarray = pd.Index([]) From 30f8cddcb8ebe03b7bd271212aaad4f469c32faa Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Aug 2024 14:08:25 +0200 Subject: [PATCH 27/46] Update test_namedarray.py --- xarray/tests/test_namedarray.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index c9b63d0d518..0d813cc7a8f 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -360,6 +360,12 @@ def test_duck_array_class_array_api(self) -> None: arrayapi_a = nxp.asarray([2.1, 4], dtype=nxp.int64) check_duck_array_typevar(arrayapi_a) + def test_pd_index_duckarray() -> None: + import pandas as pd + + a: duckarray = pd.Index([]) + check_duck_array_typevar(a) + def test_new_namedarray(self) -> None: dtype_float = np.dtype(np.float32) narr_float: NamedArray[Any, np.dtype[np.float32]] @@ -561,6 +567,3 @@ def test_broadcast_to_errors( def test_warn_on_repeated_dimension_names(self) -> None: with pytest.warns(UserWarning, match="Duplicate dimension names"): NamedArray(("x", "x"), np.arange(4).reshape(2, 2)) - - def test_pd_index_duckarray() -> None: - a: duckarray = pd.Index([]) From 6ed1ae697fada8e47de5ba50e1de570f1d55381b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Aug 2024 14:13:08 +0200 Subject: [PATCH 28/46] Update test_namedarray.py --- xarray/tests/test_namedarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 0d813cc7a8f..40dc4d87837 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -360,10 +360,10 @@ def test_duck_array_class_array_api(self) -> None: arrayapi_a = nxp.asarray([2.1, 4], dtype=nxp.int64) check_duck_array_typevar(arrayapi_a) - def test_pd_index_duckarray() -> None: + def test_pd_index_duckarray(self) -> None: import pandas as pd - a: duckarray = pd.Index([]) + a: duckarray[Any, Any] = pd.Index([]) check_duck_array_typevar(a) def test_new_namedarray(self) -> None: From cd05c6756fae33b93696415f7f10692e7e085dc0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:07:36 +0200 Subject: [PATCH 29/46] Add more helpful error --- xarray/tests/test_namedarray.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 40dc4d87837..06424194418 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -86,7 +86,21 @@ def check_duck_array_typevar(a: duckarray[Any, _DType]) -> duckarray[Any, _DType if isinstance(b, _arrayfunction_or_api): return b else: - raise TypeError(f"a ({type(a)}) is not a valid _arrayfunction or _arrayapi") + + missing_attrs = "" + actual_attrs = set(dir(b)) + for t in _arrayfunction_or_api: + expected_attrs = t.__protocol_attrs__ + missing_attrs_ = expected_attrs - actual_attrs + if missing_attrs_: + missing_attrs += f"{t.__name__} - {missing_attrs_}\n" + raise TypeError( + ( + f"a ({type(a)}) is not a valid _arrayfunction or _arrayapi. " + "Missing following attrs:\n" + f"{missing_attrs}" + ) + ) class NamedArraySubclassobjects: From 459378f4dbc41c780ee63de049257b0673e77a4a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 13:08:16 +0000 Subject: [PATCH 30/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_namedarray.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 06424194418..9f7549422ce 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -95,11 +95,9 @@ def check_duck_array_typevar(a: duckarray[Any, _DType]) -> duckarray[Any, _DType if missing_attrs_: missing_attrs += f"{t.__name__} - {missing_attrs_}\n" raise TypeError( - ( - f"a ({type(a)}) is not a valid _arrayfunction or _arrayapi. " - "Missing following attrs:\n" - f"{missing_attrs}" - ) + f"a ({type(a)}) is not a valid _arrayfunction or _arrayapi. " + "Missing following attrs:\n" + f"{missing_attrs}" ) From 85f49eb919f6ffa86cf6e0fb3b7a6c857b12595d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 21:27:58 +0000 Subject: [PATCH 31/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 2 +- xarray/namedarray/daskmanager.py | 4 ++-- xarray/namedarray/parallelcompat.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index b07a8a9a17f..19c9a25bfe3 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -5,7 +5,7 @@ from collections.abc import Callable, Hashable from datetime import datetime, timedelta from functools import partial -from typing import Callable, Literal, Union, cast, overload +from typing import Literal, Union, cast, overload import numpy as np import pandas as pd diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 7fa115d77a6..03d97bd5f75 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -1,8 +1,8 @@ from __future__ import annotations -from collections.abc import Iterable, Sequence +from collections.abc import Callable, Iterable, Sequence from types import ModuleType -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any import numpy as np from packaging.version import Version diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 353cfece7c2..df8b82295f9 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -11,7 +11,7 @@ from collections.abc import Callable, Iterable, Sequence from importlib.metadata import EntryPoint, entry_points from types import ModuleType -from typing import TYPE_CHECKING, Any, Callable, Generic, Protocol, TypeVar +from typing import TYPE_CHECKING, Any import numpy as np From 1b791b72b7871880da9036b7a53ea106153a65c2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 13:42:56 +0000 Subject: [PATCH 32/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_parallelcompat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index 1fe73e85366..b6e4dd6b886 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -6,6 +6,7 @@ import numpy as np import pytest +from xarray import set_options from xarray.namedarray._typing import ( _Chunks, _ChunksLike, @@ -14,7 +15,6 @@ chunkedduckarray, duckarray, ) -from xarray import set_options from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import ( ChunkManagerEntrypoint, From 7632f3ff9c0dcd937e9155601b1a578f3c7e2ef9 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 14:47:17 +0100 Subject: [PATCH 33/46] Update core.py --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 072f6ebc9e5..54a7eb9f6de 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -753,7 +753,7 @@ def sizes(self) -> dict[_Dim, _IntOrUnknown]: def chunk( self, - chunks: T_Chunks = {}, + chunks: T_Chunks = {}, # noqa: B006 # even though it's unsafe, it is being used intentionally here (#4667) chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, From 340b70ff370d58f93a554bd8a9f033675242a474 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 14:50:51 +0100 Subject: [PATCH 34/46] Update daskmanager.py --- xarray/namedarray/daskmanager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index c712bcb4510..dc71e9452dd 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -90,7 +90,9 @@ def compute( out = compute(*data, **kwargs) # type: ignore[no-untyped-call] return out - def persist(self, *data: Any, **kwargs: Any) -> tuple[DaskArray | Any, ...]: + def persist( + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[chunkedduckarray[Any, _DType] | Any, ...]: from dask import persist return persist(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] From 52aacfefc79d0d2c9b5f0b5db2426b19bdae4f12 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 14:53:44 +0100 Subject: [PATCH 35/46] Update parallelcompat.py --- xarray/namedarray/parallelcompat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 13256f9b8aa..dc832bd1ffe 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -342,8 +342,8 @@ def compute( raise NotImplementedError() def persist( - self, *data: T_ChunkedArray | Any, **kwargs: Any - ) -> tuple[T_ChunkedArray | Any, ...]: + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[chunkedduckarray[Any, _DType] | Any, ...]: """ Persist one or more chunked arrays in memory. From ebf415d14d99d2dd93fd3f4c8d8bb409469ad84b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 15:09:00 +0100 Subject: [PATCH 36/46] chunkedduckarray --- xarray/core/dataset.py | 3 ++- xarray/namedarray/parallelcompat.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 191b178d0db..95af15cf66b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -171,7 +171,7 @@ ) from xarray.core.weighted import DatasetWeighted from xarray.groupers import Grouper, Resampler - from xarray.namedarray._typing import duckarray + from xarray.namedarray._typing import duckarray, chunkedduckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -1056,6 +1056,7 @@ def _persist_inplace(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*lazy_data.values()) # evaluate all the dask arrays simultaneously + evaluated_data: tuple[chunkedduckarray, ...] evaluated_data = chunkmanager.persist(*lazy_data.values(), **kwargs) for k, data in zip(lazy_data, evaluated_data, strict=False): diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index dc832bd1ffe..828f90c2e60 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -343,7 +343,7 @@ def compute( def persist( self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any - ) -> tuple[chunkedduckarray[Any, _DType] | Any, ...]: + ) -> tuple[chunkedduckarray[Any, _DType], ...]: """ Persist one or more chunked arrays in memory. From 02e7b28b6fc729891d73ef64f2413824dd96786a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 14:09:37 +0000 Subject: [PATCH 37/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 95af15cf66b..044c29e3bf9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -171,7 +171,7 @@ ) from xarray.core.weighted import DatasetWeighted from xarray.groupers import Grouper, Resampler - from xarray.namedarray._typing import duckarray, chunkedduckarray + from xarray.namedarray._typing import chunkedduckarray, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint From e71a36afc617496fe84176b114b82831bdb8dcce Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 15:17:09 +0100 Subject: [PATCH 38/46] Update datatree.py --- xarray/core/datatree.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index ee90cf7477c..aaaf2159c66 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -74,6 +74,7 @@ T_ChunksFreq, ZarrWriteModes, ) + from xarray.namedarray._typing import chunkedduckarray, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint # """ @@ -1954,9 +1955,8 @@ def load(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*flat_lazy_data.values()) # evaluate all the chunked arrays simultaneously - evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute( - *flat_lazy_data.values(), **kwargs - ) + evaluated_data: tuple[duckarray[Any, Any], ...] + evaluated_data = chunkmanager.compute(*flat_lazy_data.values(), **kwargs) for (path, var_name), data in zip( flat_lazy_data, evaluated_data, strict=False @@ -2018,6 +2018,7 @@ def _persist_inplace(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*flat_lazy_data.values()) # evaluate all the dask arrays simultaneously + evaluated_data: tuple[chunkedduckarray[Any, Any], ...] evaluated_data = chunkmanager.persist(*flat_lazy_data.values(), **kwargs) for (path, var_name), data in zip( From d7da45052e9f7c104dceb50f67e8be47c3f6393f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 15:33:50 +0100 Subject: [PATCH 39/46] Update times.py --- xarray/coding/times.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 72c1f878587..8d392f5207a 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -725,17 +725,17 @@ def encode_cf_datetime( ) -> tuple[chunkedduckarray, str, str]: ... @overload def encode_cf_datetime( - dates: duckarray, + dates: duckarray | pd.Index | pd.DatetimeIndex, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, ) -> tuple[duckarray, str, str]: ... def encode_cf_datetime( - dates: duckarray | chunkedduckarray, + dates: duckarray | pd.Index | pd.DatetimeIndex | chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[duckarray | chunkedduckarray, str, str]: +) -> tuple[duckarray | pd.Index | pd.DatetimeIndex | chunkedduckarray, str, str]: """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF compliant time variable. @@ -753,12 +753,12 @@ def encode_cf_datetime( def _eagerly_encode_cf_datetime( - dates: duckarray, + dates: duckarray | pd.Index | pd.DatetimeIndex, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[duckarray, str, str]: +) -> tuple[duckarray | pd.Index | pd.DatetimeIndex, str, str]: dates = asarray(dates) data_units = infer_datetime_units(dates) @@ -840,7 +840,7 @@ def _encode_cf_datetime_within_map_blocks( units: str, calendar: str, dtype: np.dtype, -) -> duckarray: +) -> duckarray | pd.Index | pd.DatetimeIndex: num, *_ = _eagerly_encode_cf_datetime( dates, units, calendar, dtype, allow_units_modification=False ) From df29319ddd151835119dbb622fc161acff0b7bc2 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 17:12:11 +0100 Subject: [PATCH 40/46] Update times.py --- xarray/coding/times.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 8d392f5207a..e1770df7bb8 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -43,6 +43,8 @@ from xarray.core.types import CFCalendar, NPDatetimeUnitOptions T_Name = Union[Hashable, None] +PandasTypes = Union[pd.Index, pd.DatetimeIndex, pd.timedelta64] + # standard calendars recognized by cftime _STANDARD_CALENDARS = {"standard", "gregorian", "proleptic_gregorian"} @@ -725,17 +727,17 @@ def encode_cf_datetime( ) -> tuple[chunkedduckarray, str, str]: ... @overload def encode_cf_datetime( - dates: duckarray | pd.Index | pd.DatetimeIndex, + dates: duckarray | PandasTypes, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, ) -> tuple[duckarray, str, str]: ... def encode_cf_datetime( - dates: duckarray | pd.Index | pd.DatetimeIndex | chunkedduckarray, + dates: duckarray | PandasTypes | chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[duckarray | pd.Index | pd.DatetimeIndex | chunkedduckarray, str, str]: +) -> tuple[duckarray | PandasTypes | chunkedduckarray, str, str]: """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF compliant time variable. @@ -753,12 +755,12 @@ def encode_cf_datetime( def _eagerly_encode_cf_datetime( - dates: duckarray | pd.Index | pd.DatetimeIndex, + dates: duckarray | PandasTypes, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[duckarray | pd.Index | pd.DatetimeIndex, str, str]: +) -> tuple[duckarray | PandasTypes, str, str]: dates = asarray(dates) data_units = infer_datetime_units(dates) @@ -840,7 +842,7 @@ def _encode_cf_datetime_within_map_blocks( units: str, calendar: str, dtype: np.dtype, -) -> duckarray | pd.Index | pd.DatetimeIndex: +) -> duckarray | PandasTypes: num, *_ = _eagerly_encode_cf_datetime( dates, units, calendar, dtype, allow_units_modification=False ) @@ -893,12 +895,12 @@ def encode_cf_timedelta( ) -> tuple[chunkedduckarray, str]: ... @overload def encode_cf_timedelta( - timedeltas: duckarray, + timedeltas: duckarray | PandasTypes, units: str | None = None, dtype: np.dtype | None = None, ) -> tuple[duckarray, str]: ... def encode_cf_timedelta( - timedeltas: chunkedduckarray | duckarray, + timedeltas: chunkedduckarray | duckarray | PandasTypes, units: str | None = None, dtype: np.dtype | None = None, ) -> tuple[chunkedduckarray | duckarray, str]: From 6735cf0818e68d596b18741c159b22e8d28982e8 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 17:19:13 +0100 Subject: [PATCH 41/46] Update times.py --- xarray/coding/times.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e1770df7bb8..bb6f2f956c7 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -40,10 +40,9 @@ except ImportError: cftime = None -from xarray.core.types import CFCalendar, NPDatetimeUnitOptions +from xarray.core.types import CFCalendar, NPDatetimeUnitOptions, DatetimeLike T_Name = Union[Hashable, None] -PandasTypes = Union[pd.Index, pd.DatetimeIndex, pd.timedelta64] # standard calendars recognized by cftime @@ -727,17 +726,17 @@ def encode_cf_datetime( ) -> tuple[chunkedduckarray, str, str]: ... @overload def encode_cf_datetime( - dates: duckarray | PandasTypes, + dates: duckarray | DatetimeLike, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, ) -> tuple[duckarray, str, str]: ... def encode_cf_datetime( - dates: duckarray | PandasTypes | chunkedduckarray, + dates: duckarray | DatetimeLike | chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[duckarray | PandasTypes | chunkedduckarray, str, str]: +) -> tuple[duckarray | DatetimeLike | chunkedduckarray, str, str]: """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF compliant time variable. @@ -755,12 +754,12 @@ def encode_cf_datetime( def _eagerly_encode_cf_datetime( - dates: duckarray | PandasTypes, + dates: duckarray | DatetimeLike, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[duckarray | PandasTypes, str, str]: +) -> tuple[duckarray | DatetimeLike, str, str]: dates = asarray(dates) data_units = infer_datetime_units(dates) @@ -842,7 +841,7 @@ def _encode_cf_datetime_within_map_blocks( units: str, calendar: str, dtype: np.dtype, -) -> duckarray | PandasTypes: +) -> duckarray | DatetimeLike: num, *_ = _eagerly_encode_cf_datetime( dates, units, calendar, dtype, allow_units_modification=False ) @@ -895,12 +894,12 @@ def encode_cf_timedelta( ) -> tuple[chunkedduckarray, str]: ... @overload def encode_cf_timedelta( - timedeltas: duckarray | PandasTypes, + timedeltas: duckarray | DatetimeLike, units: str | None = None, dtype: np.dtype | None = None, ) -> tuple[duckarray, str]: ... def encode_cf_timedelta( - timedeltas: chunkedduckarray | duckarray | PandasTypes, + timedeltas: chunkedduckarray | duckarray | DatetimeLike, units: str | None = None, dtype: np.dtype | None = None, ) -> tuple[chunkedduckarray | duckarray, str]: From feaed2cf0fd71d8a8d1e9d39c02f927fed9567d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 16:19:30 +0000 Subject: [PATCH 42/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index bb6f2f956c7..80eed184486 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -40,7 +40,7 @@ except ImportError: cftime = None -from xarray.core.types import CFCalendar, NPDatetimeUnitOptions, DatetimeLike +from xarray.core.types import CFCalendar, DatetimeLike, NPDatetimeUnitOptions T_Name = Union[Hashable, None] From 9d0c48070ca1ff876101c73ed189b3ab992ec58a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 17:41:46 +0100 Subject: [PATCH 43/46] Update times.py --- xarray/coding/times.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index bb6f2f956c7..cd59733c8a7 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -40,9 +40,10 @@ except ImportError: cftime = None -from xarray.core.types import CFCalendar, NPDatetimeUnitOptions, DatetimeLike +from xarray.core.types import CFCalendar, NPDatetimeUnitOptions T_Name = Union[Hashable, None] +PandasTypes = Union[pd.Index, pd.DatetimeIndex] # standard calendars recognized by cftime @@ -726,17 +727,17 @@ def encode_cf_datetime( ) -> tuple[chunkedduckarray, str, str]: ... @overload def encode_cf_datetime( - dates: duckarray | DatetimeLike, + dates: duckarray | PandasTypes, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, ) -> tuple[duckarray, str, str]: ... def encode_cf_datetime( - dates: duckarray | DatetimeLike | chunkedduckarray, + dates: duckarray | PandasTypes | chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[duckarray | DatetimeLike | chunkedduckarray, str, str]: +) -> tuple[duckarray | PandasTypes | chunkedduckarray, str, str]: """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF compliant time variable. @@ -754,12 +755,12 @@ def encode_cf_datetime( def _eagerly_encode_cf_datetime( - dates: duckarray | DatetimeLike, + dates: duckarray | PandasTypes, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[duckarray | DatetimeLike, str, str]: +) -> tuple[duckarray | PandasTypes, str, str]: dates = asarray(dates) data_units = infer_datetime_units(dates) @@ -841,7 +842,7 @@ def _encode_cf_datetime_within_map_blocks( units: str, calendar: str, dtype: np.dtype, -) -> duckarray | DatetimeLike: +) -> duckarray | PandasTypes: num, *_ = _eagerly_encode_cf_datetime( dates, units, calendar, dtype, allow_units_modification=False ) @@ -894,12 +895,12 @@ def encode_cf_timedelta( ) -> tuple[chunkedduckarray, str]: ... @overload def encode_cf_timedelta( - timedeltas: duckarray | DatetimeLike, + timedeltas: duckarray | PandasTypes, units: str | None = None, dtype: np.dtype | None = None, ) -> tuple[duckarray, str]: ... def encode_cf_timedelta( - timedeltas: chunkedduckarray | duckarray | DatetimeLike, + timedeltas: chunkedduckarray | duckarray | PandasTypes, units: str | None = None, dtype: np.dtype | None = None, ) -> tuple[chunkedduckarray | duckarray, str]: From 3e6b5dda8dc0926887d5f7db0350e9fb42a43aa7 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 17:42:00 +0100 Subject: [PATCH 44/46] add __array___ test --- xarray/tests/test_namedarray.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 69b9dc13c15..6e23f80e7fc 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -118,6 +118,12 @@ def check_duck_array_typevar(a: duckarray[Any, _DType]) -> duckarray[Any, _DType ) +def test_duckarray___array__() -> None: + x: duckarray[Any, Any] = np.array([1, 2, 3], dtype=np.int64) + y = np.array(x) + np.testing.assert_array_equal(y, x) + + class NamedArraySubclassobjects: @pytest.fixture def target(self, data: np.ndarray[Any, Any]) -> Any: From fd891e9657a0fefb005b3cfcd03d7dc3bbfef0ef Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 17 Nov 2024 17:42:04 +0100 Subject: [PATCH 45/46] Update test_namedarray.py --- xarray/tests/test_namedarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 6e23f80e7fc..8c157bcb035 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -395,6 +395,7 @@ def test_duck_array_class_array_api(self) -> None: arrayapi_a = nxp.asarray([2.1, 4], dtype=nxp.int64) check_duck_array_typevar(arrayapi_a) + @pytest.mark.xfail(reason="pd.Index does not include an __array_function__") def test_pd_index_duckarray(self) -> None: import pandas as pd From 82e5f58956978ec236885c8530e4a43dc2a34fcc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Nov 2024 06:52:06 +0000 Subject: [PATCH 46/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 06b7610cf4f..cc360a40930 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -5,7 +5,7 @@ from collections.abc import Callable, Hashable from datetime import datetime, timedelta from functools import partial -from typing import TYPE_CHECKING, Literal, Union, cast,overload +from typing import TYPE_CHECKING, Literal, Union, cast, overload import numpy as np import pandas as pd