diff --git a/.github/ISSUE_TEMPLATE/bugreport.yml b/.github/ISSUE_TEMPLATE/bugreport.yml index 59e5889f5ec..cc1a2e12be3 100644 --- a/.github/ISSUE_TEMPLATE/bugreport.yml +++ b/.github/ISSUE_TEMPLATE/bugreport.yml @@ -44,6 +44,7 @@ body: - label: Complete example — the example is self-contained, including all data and the text of any traceback. - label: Verifiable example — the example copy & pastes into an IPython prompt or [Binder notebook](https://mybinder.org/v2/gh/pydata/xarray/main?urlpath=lab/tree/doc/examples/blank_template.ipynb), returning the result. - label: New issue — a search of GitHub Issues suggests this is not a duplicate. + - label: Recent environment — the issue occurs with the latest version of xarray and its dependencies. - type: textarea id: log-output diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index ec1c192fd35..dc9cc2cd2fe 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -82,8 +82,6 @@ jobs: name: Mypy runs-on: "ubuntu-latest" needs: detect-ci-trigger - # temporarily skipping due to https://github.com/pydata/xarray/issues/6551 - if: needs.detect-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} @@ -190,6 +188,126 @@ jobs: + pyright: + name: Pyright + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + if: | + always() + && ( + contains( github.event.pull_request.labels.*.name, 'run-pyright') + ) + defaults: + run: + shell: bash -l {0} + env: + CONDA_ENV_FILE: ci/requirements/environment.yml + PYTHON_VERSION: "3.10" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: set environment variables + run: | + echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ${{env.CONDA_ENV_FILE}} + environment-name: xarray-tests + create-args: >- + python=${{env.PYTHON_VERSION}} + conda + cache-environment: true + cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Install pyright + run: | + python -m pip install pyright --force-reinstall + + - name: Run pyright + run: | + python -m pyright xarray/ + + - name: Upload pyright coverage to Codecov + uses: codecov/codecov-action@v3.1.4 + with: + file: pyright_report/cobertura.xml + flags: pyright + env_vars: PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false + + pyright39: + name: Pyright 3.9 + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + if: | + always() + && ( + contains( github.event.pull_request.labels.*.name, 'run-pyright') + ) + defaults: + run: + shell: bash -l {0} + env: + CONDA_ENV_FILE: ci/requirements/environment.yml + PYTHON_VERSION: "3.9" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: set environment variables + run: | + echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ${{env.CONDA_ENV_FILE}} + environment-name: xarray-tests + create-args: >- + python=${{env.PYTHON_VERSION}} + conda + cache-environment: true + cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Install pyright + run: | + python -m pip install pyright --force-reinstall + + - name: Run pyright + run: | + python -m pyright xarray/ + + - name: Upload pyright coverage to Codecov + uses: codecov/codecov-action@v3.1.4 + with: + file: pyright_report/cobertura.xml + flags: pyright39 + env_vars: PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false + + + min-version-policy: name: Minimum Version Policy runs-on: "ubuntu-latest" diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 92acc3f90c0..8f576f486dc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,6 +29,9 @@ New Features - :py:meth:`DataArray.sortby` & :py:meth:`Dataset.sortby` accept a callable for the ``variables`` parameter, passing the object as the only argument. By `Maximilian Roos `_. +- ``.rolling_exp`` functions can now operate on dask-backed arrays, assuming the + core dim has exactly one chunk. (:pull:`8284`). + By `Maximilian Roos `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index e24f88d9679..1a24a4b4eda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -198,6 +198,31 @@ warn_return_any = true module = ["xarray.namedarray.*", "xarray.tests.test_namedarray"] +[tool.pyright] +# include = ["src"] +# exclude = ["**/node_modules", + # "**/__pycache__", + # "src/experimental", + # "src/typestubs" +# ] +# ignore = ["src/oldstuff"] +defineConstant = { DEBUG = true } +# stubPath = "src/stubs" +# venv = "env367" + +reportMissingImports = true +reportMissingTypeStubs = false + +# pythonVersion = "3.6" +# pythonPlatform = "Linux" + +# executionEnvironments = [ + # { root = "src/web", pythonVersion = "3.5", pythonPlatform = "Windows", extraPaths = [ "src/service_libs" ] }, + # { root = "src/sdk", pythonVersion = "3.0", extraPaths = [ "src/backend" ] }, + # { root = "src/tests", extraPaths = ["src/tests/e2e", "src/sdk" ]}, + # { root = "src" } +# ] + [tool.ruff] builtins = ["ellipsis"] exclude = [ diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index ff2ecbc74a1..7d9ba4f4b94 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -5,7 +5,7 @@ from collections import defaultdict from collections.abc import Hashable, Iterable, Mapping from contextlib import suppress -from typing import TYPE_CHECKING, Any, Callable, Generic, cast +from typing import TYPE_CHECKING, Any, Callable, Final, Generic, TypeVar, cast, overload import numpy as np import pandas as pd @@ -26,7 +26,13 @@ if TYPE_CHECKING: from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset - from xarray.core.types import JoinOptions, T_DataArray, T_Dataset, T_DuckArray + from xarray.core.types import ( + Alignable, + JoinOptions, + T_DataArray, + T_Dataset, + T_DuckArray, + ) def reindex_variables( @@ -128,7 +134,7 @@ def __init__( objects: Iterable[T_Alignable], join: str = "inner", indexes: Mapping[Any, Any] | None = None, - exclude_dims: Iterable = frozenset(), + exclude_dims: str | Iterable[Hashable] = frozenset(), exclude_vars: Iterable[Hashable] = frozenset(), method: str | None = None, tolerance: int | float | Iterable[int | float] | None = None, @@ -576,12 +582,111 @@ def align(self) -> None: self.reindex_all() +T_Obj1 = TypeVar("T_Obj1", bound="Alignable") +T_Obj2 = TypeVar("T_Obj2", bound="Alignable") +T_Obj3 = TypeVar("T_Obj3", bound="Alignable") +T_Obj4 = TypeVar("T_Obj4", bound="Alignable") +T_Obj5 = TypeVar("T_Obj5", bound="Alignable") + + +@overload +def align( + obj1: T_Obj1, + /, + *, + join: JoinOptions = "inner", + copy: bool = True, + indexes=None, + exclude: str | Iterable[Hashable] = frozenset(), + fill_value=dtypes.NA, +) -> tuple[T_Obj1]: + ... + + +@overload +def align( # type: ignore[misc] + obj1: T_Obj1, + obj2: T_Obj2, + /, + *, + join: JoinOptions = "inner", + copy: bool = True, + indexes=None, + exclude: str | Iterable[Hashable] = frozenset(), + fill_value=dtypes.NA, +) -> tuple[T_Obj1, T_Obj2]: + ... + + +@overload +def align( # type: ignore[misc] + obj1: T_Obj1, + obj2: T_Obj2, + obj3: T_Obj3, + /, + *, + join: JoinOptions = "inner", + copy: bool = True, + indexes=None, + exclude: str | Iterable[Hashable] = frozenset(), + fill_value=dtypes.NA, +) -> tuple[T_Obj1, T_Obj2, T_Obj3]: + ... + + +@overload +def align( # type: ignore[misc] + obj1: T_Obj1, + obj2: T_Obj2, + obj3: T_Obj3, + obj4: T_Obj4, + /, + *, + join: JoinOptions = "inner", + copy: bool = True, + indexes=None, + exclude: str | Iterable[Hashable] = frozenset(), + fill_value=dtypes.NA, +) -> tuple[T_Obj1, T_Obj2, T_Obj3, T_Obj4]: + ... + + +@overload +def align( # type: ignore[misc] + obj1: T_Obj1, + obj2: T_Obj2, + obj3: T_Obj3, + obj4: T_Obj4, + obj5: T_Obj5, + /, + *, + join: JoinOptions = "inner", + copy: bool = True, + indexes=None, + exclude: str | Iterable[Hashable] = frozenset(), + fill_value=dtypes.NA, +) -> tuple[T_Obj1, T_Obj2, T_Obj3, T_Obj4, T_Obj5]: + ... + + +@overload def align( *objects: T_Alignable, join: JoinOptions = "inner", copy: bool = True, indexes=None, - exclude=frozenset(), + exclude: str | Iterable[Hashable] = frozenset(), + fill_value=dtypes.NA, +) -> tuple[T_Alignable, ...]: + ... + + +def align( # type: ignore[misc] + *objects: T_Alignable, + join: JoinOptions = "inner", + copy: bool = True, + indexes=None, + exclude: str | Iterable[Hashable] = frozenset(), fill_value=dtypes.NA, ) -> tuple[T_Alignable, ...]: """ @@ -620,7 +725,7 @@ def align( indexes : dict-like, optional Any indexes explicitly provided with the `indexes` argument should be used in preference to the aligned indexes. - exclude : sequence of str, optional + exclude : str, iterable of hashable or None, optional Dimensions that must be excluded from alignment fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps @@ -787,12 +892,12 @@ def align( def deep_align( objects: Iterable[Any], join: JoinOptions = "inner", - copy=True, + copy: bool = True, indexes=None, - exclude=frozenset(), - raise_on_invalid=True, + exclude: str | Iterable[Hashable] = frozenset(), + raise_on_invalid: bool = True, fill_value=dtypes.NA, -): +) -> list[Any]: """Align objects for merging, recursing into dictionary values. This function is not public API. @@ -807,12 +912,12 @@ def deep_align( def is_alignable(obj): return isinstance(obj, (Coordinates, DataArray, Dataset)) - positions = [] - keys = [] - out = [] - targets = [] - no_key = object() - not_replaced = object() + positions: list[int] = [] + keys: list[type[object] | Hashable] = [] + out: list[Any] = [] + targets: list[Alignable] = [] + no_key: Final = object() + not_replaced: Final = object() for position, variables in enumerate(objects): if is_alignable(variables): positions.append(position) @@ -857,7 +962,7 @@ def is_alignable(obj): if key is no_key: out[position] = aligned_obj else: - out[position][key] = aligned_obj # type: ignore[index] # maybe someone can fix this? + out[position][key] = aligned_obj return out @@ -988,9 +1093,69 @@ def _broadcast_dataset(ds: T_Dataset) -> T_Dataset: raise ValueError("all input must be Dataset or DataArray objects") -# TODO: this typing is too restrictive since it cannot deal with mixed -# DataArray and Dataset types...? Is this a problem? -def broadcast(*args: T_Alignable, exclude=None) -> tuple[T_Alignable, ...]: +@overload +def broadcast( + obj1: T_Obj1, /, *, exclude: str | Iterable[Hashable] | None = None +) -> tuple[T_Obj1]: + ... + + +@overload +def broadcast( # type: ignore[misc] + obj1: T_Obj1, obj2: T_Obj2, /, *, exclude: str | Iterable[Hashable] | None = None +) -> tuple[T_Obj1, T_Obj2]: + ... + + +@overload +def broadcast( # type: ignore[misc] + obj1: T_Obj1, + obj2: T_Obj2, + obj3: T_Obj3, + /, + *, + exclude: str | Iterable[Hashable] | None = None, +) -> tuple[T_Obj1, T_Obj2, T_Obj3]: + ... + + +@overload +def broadcast( # type: ignore[misc] + obj1: T_Obj1, + obj2: T_Obj2, + obj3: T_Obj3, + obj4: T_Obj4, + /, + *, + exclude: str | Iterable[Hashable] | None = None, +) -> tuple[T_Obj1, T_Obj2, T_Obj3, T_Obj4]: + ... + + +@overload +def broadcast( # type: ignore[misc] + obj1: T_Obj1, + obj2: T_Obj2, + obj3: T_Obj3, + obj4: T_Obj4, + obj5: T_Obj5, + /, + *, + exclude: str | Iterable[Hashable] | None = None, +) -> tuple[T_Obj1, T_Obj2, T_Obj3, T_Obj4, T_Obj5]: + ... + + +@overload +def broadcast( + *args: T_Alignable, exclude: str | Iterable[Hashable] | None = None +) -> tuple[T_Alignable, ...]: + ... + + +def broadcast( # type: ignore[misc] + *args: T_Alignable, exclude: str | Iterable[Hashable] | None = None +) -> tuple[T_Alignable, ...]: """Explicitly broadcast any number of DataArray or Dataset objects against one another. @@ -1004,7 +1169,7 @@ def broadcast(*args: T_Alignable, exclude=None) -> tuple[T_Alignable, ...]: ---------- *args : DataArray or Dataset Arrays to broadcast against each other. - exclude : sequence of str, optional + exclude : str, iterable of hashable or None, optional Dimensions that must not be broadcasted Returns diff --git a/xarray/core/common.py b/xarray/core/common.py index f571576850c..ab8a4d84261 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1163,7 +1163,7 @@ def where(self, cond: Any, other: Any = dtypes.NA, drop: bool = False) -> Self: f"cond argument is {cond!r} but must be a {Dataset!r} or {DataArray!r} (or a callable than returns one)." ) - self, cond = align(self, cond) # type: ignore[assignment] + self, cond = align(self, cond) def _dataarray_indexer(dim: Hashable) -> DataArray: return cond.any(dim=(d for d in cond.dims if d != dim)) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index db786910f22..9cb60e0c424 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -289,8 +289,14 @@ def apply_dataarray_vfunc( from xarray.core.dataarray import DataArray if len(args) > 1: - args = deep_align( - args, join=join, copy=False, exclude=exclude_dims, raise_on_invalid=False + args = tuple( + deep_align( + args, + join=join, + copy=False, + exclude=exclude_dims, + raise_on_invalid=False, + ) ) objs = _all_of_type(args, DataArray) @@ -506,8 +512,14 @@ def apply_dataset_vfunc( objs = _all_of_type(args, Dataset) if len(args) > 1: - args = deep_align( - args, join=join, copy=False, exclude=exclude_dims, raise_on_invalid=False + args = tuple( + deep_align( + args, + join=join, + copy=False, + exclude=exclude_dims, + raise_on_invalid=False, + ) ) list_of_coords, list_of_indexes = build_output_coords_and_indexes( diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 749cb585234..3142c518c0f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4639,7 +4639,7 @@ def _binary_op( return NotImplemented if isinstance(other, DataArray): align_type = OPTIONS["arithmetic_join"] - self, other = align(self, other, join=align_type, copy=False) # type: ignore[type-var,assignment] + self, other = align(self, other, join=align_type, copy=False) other_variable_or_arraylike: DaCompatible = getattr(other, "variable", other) other_coords = getattr(other, "coords", None) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 952e10c6127..b55efadcdb2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7508,7 +7508,7 @@ def _binary_op(self, other, f, reflexive=False, join=None) -> Dataset: return NotImplemented align_type = OPTIONS["arithmetic_join"] if join is None else join if isinstance(other, (DataArray, Dataset)): - self, other = align(self, other, join=align_type, copy=False) # type: ignore[assignment] + self, other = align(self, other, join=align_type, copy=False) g = f if not reflexive else lambda x, y: f(y, x) ds = self._calculate_binary_op(g, other, join=align_type) keep_attrs = _get_keep_attrs(default=False) @@ -7920,9 +7920,9 @@ def sortby( else: variables = variables arrays = [v if isinstance(v, DataArray) else self[v] for v in variables] - aligned_vars = align(self, *arrays, join="left") # type: ignore[type-var] - aligned_self = cast(Self, aligned_vars[0]) - aligned_other_vars: tuple[DataArray, ...] = aligned_vars[1:] # type: ignore[assignment] + aligned_vars = align(self, *arrays, join="left") + aligned_self = aligned_vars[0] + aligned_other_vars: tuple[DataArray, ...] = aligned_vars[1:] vars_by_dim = defaultdict(list) for data_array in aligned_other_vars: if data_array.ndim != 1: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 3475db4a010..a8e54ad1231 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -474,10 +474,11 @@ def coerce_pandas_values(objects: Iterable[CoercibleMapping]) -> list[DatasetLik from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset - out = [] + out: list[DatasetLike] = [] for obj in objects: + variables: DatasetLike if isinstance(obj, (Dataset, Coordinates)): - variables: DatasetLike = obj + variables = obj else: variables = {} if isinstance(obj, PANDAS_TYPES): @@ -491,7 +492,7 @@ def coerce_pandas_values(objects: Iterable[CoercibleMapping]) -> list[DatasetLik def _get_priority_vars_and_indexes( - objects: list[DatasetLike], + objects: Sequence[DatasetLike], priority_arg: int | None, compat: CompatOptions = "equals", ) -> dict[Hashable, MergeElement]: @@ -503,7 +504,7 @@ def _get_priority_vars_and_indexes( Parameters ---------- - objects : list of dict-like of Variable + objects : sequence of dict-like of Variable Dictionaries in which to find the priority variables. priority_arg : int or None Integer object whose variable should take priority. diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index c56bf6a384e..cb77358869c 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -147,9 +147,9 @@ def mean(self, keep_attrs: bool | None = None) -> T_DataWithCoords: input_core_dims=[[self.dim]], kwargs=dict(alpha=self.alpha, axis=-1), output_core_dims=[[self.dim]], - exclude_dims={self.dim}, keep_attrs=keep_attrs, on_missing_core_dim="copy", + dask="parallelized", ).transpose(*dim_order) def sum(self, keep_attrs: bool | None = None) -> T_DataWithCoords: @@ -183,7 +183,7 @@ def sum(self, keep_attrs: bool | None = None) -> T_DataWithCoords: input_core_dims=[[self.dim]], kwargs=dict(alpha=self.alpha, axis=-1), output_core_dims=[[self.dim]], - exclude_dims={self.dim}, keep_attrs=keep_attrs, on_missing_core_dim="copy", + dask="parallelized", ).transpose(*dim_order) diff --git a/xarray/core/types.py b/xarray/core/types.py index 795283fa88b..2af9591d22a 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -145,6 +145,8 @@ def copy( ... +T_Alignable = TypeVar("T_Alignable", bound="Alignable") + T_Backend = TypeVar("T_Backend", bound="BackendEntrypoint") T_Dataset = TypeVar("T_Dataset", bound="Dataset") T_DataArray = TypeVar("T_DataArray", bound="DataArray") @@ -168,7 +170,6 @@ def copy( # on `DataWithCoords`. T_DataWithCoords = TypeVar("T_DataWithCoords", bound="DataWithCoords") -T_Alignable = TypeVar("T_Alignable", bound="Alignable") # Temporary placeholder for indicating an array api compliant type. # hopefully in the future we can narrow this down more: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5d18d5e12d5..67de33ba615 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2332,9 +2332,9 @@ def test_align(self) -> None: assert np.isnan(left2["var3"][-2:]).all() with pytest.raises(ValueError, match=r"invalid value for join"): - align(left, right, join="foobar") # type: ignore[arg-type] + align(left, right, join="foobar") # type: ignore[call-overload] with pytest.raises(TypeError): - align(left, right, foo="bar") # type: ignore[call-arg] + align(left, right, foo="bar") # type: ignore[call-overload] def test_align_exact(self) -> None: left = xr.Dataset(coords={"x": [0, 1]}) diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 2dc8ae24438..da834b76124 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -788,7 +788,9 @@ def test_raise_no_warning_dask_rolling_assert_close(self, ds, name) -> None: @requires_numbagg class TestDatasetRollingExp: - @pytest.mark.parametrize("backend", ["numpy"], indirect=True) + @pytest.mark.parametrize( + "backend", ["numpy", pytest.param("dask", marks=requires_dask)], indirect=True + ) def test_rolling_exp(self, ds) -> None: result = ds.rolling_exp(time=10, window_type="span").mean() assert isinstance(result, Dataset) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index d89a74e4fba..7e1105e2e5d 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -305,11 +305,13 @@ def __call__(self, obj, *args, **kwargs): all_args = merge_args(self.args, args) all_kwargs = {**self.kwargs, **kwargs} + from xarray.core.groupby import GroupBy + xarray_classes = ( xr.Variable, xr.DataArray, xr.Dataset, - xr.core.groupby.GroupBy, + GroupBy, ) if not isinstance(obj, xarray_classes):