From 3b3f65399e9d91be8896c781e555fd63c04eccfd Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 18 Sep 2022 16:26:13 +0200 Subject: [PATCH 01/13] add parse_dims func --- xarray/core/utils.py | 67 ++++++++++++++++++++++++++++++++++++++ xarray/tests/test_utils.py | 41 ++++++++++++++++++++++- 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 51bf1346506..865c29111db 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -21,6 +21,7 @@ Hashable, Iterable, Iterator, + Literal, Mapping, MutableMapping, MutableSet, @@ -919,6 +920,72 @@ def drop_missing_dims( ) +@overload +def parse_dims( + dim: str | Iterable[Hashable] | None, + all_dims: tuple[Hashable, ...], + *, + check: bool = True, + replace_none: Literal[True] = True, +) -> tuple[Hashable, ...]: + ... + + +@overload +def parse_dims( + dim: str | Iterable[Hashable] | None, + all_dims: tuple[Hashable, ...], + *, + check: bool = True, + replace_none: Literal[False], +) -> tuple[Hashable, ...] | None: + ... + + +def parse_dims( + dim: str | Iterable[Hashable] | None, + all_dims: tuple[Hashable, ...], + *, + check: bool = True, + replace_none: bool = True, +) -> tuple[Hashable, ...] | None: + """Parse one or more dimensions. + + A single dimension must be always a str, multiple dimensions + can be Hashables. This supports e.g. using a tuple as a dimension. + + Parameters + ---------- + dim : str, Iterable of Hashable or None + Dimension(s) to parse. + all_dims : tuple of Hashable + All possible dimensions. + check: bool, default: True + if True, check if dim is a subset of all_dims. + replace_none : bool, default: True + If True, return all_dims if dim is None. + + Returns + ------- + parsed_dims : tuple of Hashable + Input dimensions as a tuple. + """ + if dim is None: + if replace_none: + return all_dims + return None + if isinstance(dim, str): + dim = (dim,) + if check: + wrong_dims = set(dim) - set(all_dims) + if wrong_dims: + wrong_dims_str = ", ".join(f"'{d!s}'" for d in wrong_dims) + raise ValueError( + f"Dimension(s) {wrong_dims_str} do not exist. Expected one or more of {all_dims}" + ) + return tuple(dim) + + _Accessor = TypeVar("_Accessor") diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 6a39c028d2f..f2a7c58b6fd 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -1,7 +1,7 @@ from __future__ import annotations from datetime import datetime -from typing import Hashable +from typing import Hashable, Iterable import numpy as np import pandas as pd @@ -294,6 +294,45 @@ def test_infix_dims_errors(supplied, all_): list(utils.infix_dims(supplied, all_)) +@pytest.mark.parametrize( + ["dim", "expected"], + [ + pytest.param("a", ("a",), id="str"), + pytest.param(["a", "b"], ("a", "b"), id="list_of_str"), + pytest.param(["a", 1], ("a", 1), id="list_mixed"), + pytest.param(["a", ("b", "c")], ("a", ("b", "c")), id="list_with_tuple"), + pytest.param(None, None, id="None"), + ], +) +def test_parse_dims( + dim: str | Iterable[Hashable] | None, + expected: tuple[Hashable, ...], +) -> None: + all_dims = ("a", "b", 1, ("b", "c")) # different Hashables + actual = utils.parse_dims(dim, all_dims, replace_none=False) + assert actual == expected + + +def test_parse_dims_replace_none() -> None: + all_dims = ("a", "b", 1, ("b", "c")) # different Hashables + actual = utils.parse_dims(None, all_dims, replace_none=True) + assert actual == all_dims + + +@pytest.mark.parametrize( + "dim", + [ + pytest.param("x", id="str_missing"), + pytest.param(["a", "x"], id="list_missing_one"), + pytest.param(["x", 2], id="list_missing_all"), + ], +) +def test_parse_dims_raises(dim: str | Iterable[Hashable] | None) -> None: + all_dims = ("a", "b", 1, ("b", "c")) # different Hashables + with pytest.raises(ValueError, match="'x'"): + utils.parse_dims(dim, all_dims, check=True) + + @pytest.mark.parametrize( "nested_list, expected", [ From 1b3239c2f63125ad999bc7816b53e9389d210931 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 18 Sep 2022 18:34:47 +0200 Subject: [PATCH 02/13] add some more tests for tuples --- xarray/tests/test_utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index f2a7c58b6fd..f70eb5d8b15 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -300,7 +300,9 @@ def test_infix_dims_errors(supplied, all_): pytest.param("a", ("a",), id="str"), pytest.param(["a", "b"], ("a", "b"), id="list_of_str"), pytest.param(["a", 1], ("a", 1), id="list_mixed"), + pytest.param(("a", "b"), ("a", "b"), id="tuple_of_str"), pytest.param(["a", ("b", "c")], ("a", ("b", "c")), id="list_with_tuple"), + pytest.param((("b", "c"),), (("b", "c"),), id="tuple_of_tuple"), pytest.param(None, None, id="None"), ], ) @@ -308,13 +310,13 @@ def test_parse_dims( dim: str | Iterable[Hashable] | None, expected: tuple[Hashable, ...], ) -> None: - all_dims = ("a", "b", 1, ("b", "c")) # different Hashables + all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables actual = utils.parse_dims(dim, all_dims, replace_none=False) assert actual == expected def test_parse_dims_replace_none() -> None: - all_dims = ("a", "b", 1, ("b", "c")) # different Hashables + all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables actual = utils.parse_dims(None, all_dims, replace_none=True) assert actual == all_dims @@ -327,8 +329,8 @@ def test_parse_dims_replace_none() -> None: pytest.param(["x", 2], id="list_missing_all"), ], ) -def test_parse_dims_raises(dim: str | Iterable[Hashable] | None) -> None: - all_dims = ("a", "b", 1, ("b", "c")) # different Hashables +def test_parse_dims_raises(dim: str | Iterable[Hashable]) -> None: + all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables with pytest.raises(ValueError, match="'x'"): utils.parse_dims(dim, all_dims, check=True) From 752a0ecf8d0a4d1bc9d9bd611a64990a7144e3a8 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 4 Oct 2022 22:29:33 +0200 Subject: [PATCH 03/13] add parse_ordered_dims --- xarray/core/types.py | 3 +- xarray/core/utils.py | 138 ++++++++++++++++++++++++++++++------- xarray/tests/test_utils.py | 51 ++++++++++++-- 3 files changed, 162 insertions(+), 30 deletions(-) diff --git a/xarray/core/types.py b/xarray/core/types.py index d47379cbe5c..2fa40261055 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -64,7 +64,8 @@ VarCompatible = Union["Variable", "ScalarOrArray"] GroupByIncompatible = Union["Variable", "GroupBy"] -Dims = Union[str, Iterable[Hashable], None] +Dims = Union[str, Iterable[Hashable], "ellipsis", None] +OrderedDims = Union[str, Sequence[Union[Hashable, "ellipsis"]], "ellipsis", None] ErrorOptions = Literal["raise", "ignore"] ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"] diff --git a/xarray/core/utils.py b/xarray/core/utils.py index cb98c40d91e..7498a2f1bfb 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -25,6 +25,7 @@ Mapping, MutableMapping, MutableSet, + Sequence, TypeVar, cast, overload, @@ -34,7 +35,7 @@ import pandas as pd if TYPE_CHECKING: - from .types import ErrorOptionsWithWarn + from .types import Dims, ErrorOptionsWithWarn, OrderedDims K = TypeVar("K") V = TypeVar("V") @@ -895,15 +896,17 @@ def drop_dims_from_indexers( def drop_missing_dims( - supplied_dims: Collection, dims: Collection, missing_dims: ErrorOptionsWithWarn -) -> Collection: + supplied_dims: Iterable[Hashable], + dims: Iterable[Hashable], + missing_dims: ErrorOptionsWithWarn, +) -> Iterable[Hashable]: """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that are not present in dims. Parameters ---------- - supplied_dims : dict - dims : sequence + supplied_dims : Iterable of Hashable + dims : Iterable of Hashable missing_dims : {"raise", "warn", "ignore"} """ @@ -936,12 +939,15 @@ def drop_missing_dims( ) +T_None = TypeVar("T_None", None, ellipsis) + + @overload def parse_dims( - dim: str | Iterable[Hashable] | None, + dim: str | Iterable[Hashable] | T_None, all_dims: tuple[Hashable, ...], *, - check: bool = True, + check_exists: bool = True, replace_none: Literal[True] = True, ) -> tuple[Hashable, ...]: ... @@ -949,59 +955,141 @@ def parse_dims( @overload def parse_dims( - dim: str | Iterable[Hashable] | None, + dim: str | Iterable[Hashable] | T_None, all_dims: tuple[Hashable, ...], *, - check: bool = True, + check_exists: bool = True, replace_none: Literal[False], -) -> tuple[Hashable, ...] | None: +) -> tuple[Hashable, ...] | T_None: ... def parse_dims( - dim: str | Iterable[Hashable] | None, + dim: Dims, all_dims: tuple[Hashable, ...], *, - check: bool = True, + check_exists: bool = True, replace_none: bool = True, -) -> tuple[Hashable, ...] | None: +) -> tuple[Hashable, ...] | None | ellipsis: """Parse one or more dimensions. A single dimension must be always a str, multiple dimensions can be Hashables. This supports e.g. using a tuple as a dimension. + If you supply e.g. a set of dimensions the order cannot be + conserved, but for sequences it will be. Parameters ---------- - dim : str, Iterable of Hashable or None + dim : str, Iterable of Hashable, "..." or None Dimension(s) to parse. all_dims : tuple of Hashable All possible dimensions. - check: bool, default: True + check_exists: bool, default: True if True, check if dim is a subset of all_dims. replace_none : bool, default: True - If True, return all_dims if dim is None. + If True, return all_dims if dim is None or "...". Returns ------- parsed_dims : tuple of Hashable Input dimensions as a tuple. """ - if dim is None: + if dim is None or dim is ...: if replace_none: return all_dims - return None + return dim if isinstance(dim, str): dim = (dim,) - if check: - wrong_dims = set(dim) - set(all_dims) - if wrong_dims: - wrong_dims_str = ", ".join(f"'{d!s}'" for d in wrong_dims) - raise ValueError( - f"Dimension(s) {wrong_dims_str} do not exist. Expected one or more of {all_dims}" - ) + if check_exists: + _check_dims(set(dim), set(all_dims)) return tuple(dim) +@overload +def parse_ordered_dims( + dim: str | Sequence[Hashable | ellipsis] | T_None, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: Literal[True] = True, +) -> tuple[Hashable, ...]: + ... + + +@overload +def parse_ordered_dims( + dim: str | Sequence[Hashable | ellipsis] | T_None, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: Literal[False], +) -> tuple[Hashable, ...] | T_None: + ... + + +def parse_ordered_dims( + dim: OrderedDims, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: bool = True, +) -> tuple[Hashable, ...] | None | ellipsis: + """Parse one or more dimensions. + + A single dimension must be always a str, multiple dimensions + can be Hashables. This supports e.g. using a tuple as a dimension. + An ellipsis ("...") in a sequence of dimensions will be + replaced with all remaining dimensions. This only makes sense when + the input is a sequence and not e.g. a set. + + Parameters + ---------- + dim : str, Sequence of Hashable or "...", "..." or None + Dimension(s) to parse. If "..." appears in a Sequence + it always gets replaced with all remaining dims + all_dims : tuple of Hashable + All possible dimensions. + check_exists: bool, default: True + if True, check if dim is a subset of all_dims. + replace_none : bool, default: True + If True, return all_dims if dim is None. + + Returns + ------- + parsed_dims : tuple of Hashable + Input dimensions as a tuple. + """ + if dim is not None and dim is not ... and not isinstance(dim, str) and ... in dim: + dims_set: set[Hashable | ellipsis] = set(dim) + all_dims_set = set(all_dims) + if check_exists: + _check_dims(dims_set, all_dims_set) + if len(all_dims_set) != len(all_dims): + raise ValueError("Cannot use ellipsis with repeated dims") + dims = tuple(dim) + if dims.count(...) > 1: + raise ValueError("More than one ellipsis supplied") + other_dims = tuple(d for d in all_dims if d not in dims_set) + idx = dims.index(...) + return dims[:idx] + other_dims + dims[idx + 1 :] + else: + return parse_dims( # type: ignore[call-overload] + dim=dim, + all_dims=all_dims, + check_exists=check_exists, + replace_none=replace_none, + ) + + +def _check_dims(dim: set[Hashable | ellipsis], all_dims: set[Hashable]) -> None: + wrong_dims = dim - all_dims + if wrong_dims and wrong_dims != {...}: + wrong_dims_str = ", ".join(f"'{d!s}'" for d in wrong_dims) + raise ValueError( + f"Dimension(s) {wrong_dims_str} do not exist. Expected one or more of {all_dims}" + ) + + _Accessor = TypeVar("_Accessor") diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index f70eb5d8b15..b981afd47da 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -1,7 +1,7 @@ from __future__ import annotations from datetime import datetime -from typing import Hashable, Iterable +from typing import Hashable, Iterable, Sequence import numpy as np import pandas as pd @@ -304,6 +304,7 @@ def test_infix_dims_errors(supplied, all_): pytest.param(["a", ("b", "c")], ("a", ("b", "c")), id="list_with_tuple"), pytest.param((("b", "c"),), (("b", "c"),), id="tuple_of_tuple"), pytest.param(None, None, id="None"), + pytest.param(..., ..., id="ellipsis"), ], ) def test_parse_dims( @@ -315,9 +316,19 @@ def test_parse_dims( assert actual == expected -def test_parse_dims_replace_none() -> None: +def test_parse_dims_set() -> None: all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables - actual = utils.parse_dims(None, all_dims, replace_none=True) + dim = {"a", 1} + actual = utils.parse_dims(dim, all_dims) + assert set(actual) == dim + + +@pytest.mark.parametrize( + "dim", [pytest.param(None, id="None"), pytest.param(..., id="ellipsis")] +) +def test_parse_dims_replace_none(dim: None | ellipsis) -> None: + all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables + actual = utils.parse_dims(dim, all_dims, replace_none=True) assert actual == all_dims @@ -332,7 +343,39 @@ def test_parse_dims_replace_none() -> None: def test_parse_dims_raises(dim: str | Iterable[Hashable]) -> None: all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables with pytest.raises(ValueError, match="'x'"): - utils.parse_dims(dim, all_dims, check=True) + utils.parse_dims(dim, all_dims, check_exists=True) + + +@pytest.mark.parametrize( + ["dim", "expected"], + [ + pytest.param("a", ("a",), id="str"), + pytest.param(["a", "b"], ("a", "b"), id="list"), + pytest.param([...], ("a", "b", "c"), id="list_only_ellipsis"), + pytest.param(["a", ...], ("a", "b", "c"), id="list_with_ellipsis"), + pytest.param(["a", ..., "b"], ("a", "c", "b"), id="list_with_middle_ellipsis"), + ], +) +def test_parse_ordered_dims( + dim: str | Sequence[Hashable | ellipsis], + expected: tuple[Hashable, ...], +) -> None: + all_dims = ("a", "b", "c") + actual = utils.parse_ordered_dims(dim, all_dims) + assert actual == expected + + +def test_parse_ordered_dims_raises() -> None: + all_dims = ("a", "b", "c") + + with pytest.raises(ValueError, match="'x' do not exist"): + utils.parse_ordered_dims("x", all_dims, check_exists=True) + + with pytest.raises(ValueError, match="repeated dims"): + utils.parse_ordered_dims(["a", ...], all_dims + ("a",)) + + with pytest.raises(ValueError, match="More than one ellipsis"): + utils.parse_ordered_dims(["a", ..., "b", ...], all_dims) @pytest.mark.parametrize( From 94d9fdcc758266e6598ffffad060163eef10206a Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 4 Oct 2022 22:32:46 +0200 Subject: [PATCH 04/13] fix typing issue --- xarray/core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 7498a2f1bfb..3a4991eb576 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -939,7 +939,7 @@ def drop_missing_dims( ) -T_None = TypeVar("T_None", None, ellipsis) +T_None = TypeVar("T_None", None, "ellipsis") @overload From 3d42e8b7a21a0079a9aefa904ed3fe418e5a5244 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Thu, 24 Nov 2022 21:28:47 +0100 Subject: [PATCH 05/13] remove double ellipsis typehints --- xarray/core/_aggregations.py | 230 +++++++++++++++++------------------ 1 file changed, 115 insertions(+), 115 deletions(-) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index d7d928fc843..561ac1dc617 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -44,9 +44,9 @@ def count( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -94,7 +94,7 @@ def count( Dimensions: () Data variables: - da int64 5 + da int32 5 """ return self.reduce( duck_array_ops.count, @@ -116,9 +116,9 @@ def all( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -188,9 +188,9 @@ def any( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -261,9 +261,9 @@ def max( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -348,9 +348,9 @@ def min( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -435,9 +435,9 @@ def mean( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -527,9 +527,9 @@ def prod( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -634,9 +634,9 @@ def sum( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -741,9 +741,9 @@ def std( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -845,9 +845,9 @@ def var( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -948,9 +948,9 @@ def median( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1039,9 +1039,9 @@ def cumsum( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1132,9 +1132,9 @@ def cumprod( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1240,9 +1240,9 @@ def count( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -1306,9 +1306,9 @@ def all( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -1372,9 +1372,9 @@ def any( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -1439,9 +1439,9 @@ def max( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1518,9 +1518,9 @@ def min( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1597,9 +1597,9 @@ def mean( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1681,9 +1681,9 @@ def prod( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1778,9 +1778,9 @@ def sum( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1875,9 +1875,9 @@ def std( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1969,9 +1969,9 @@ def var( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2062,9 +2062,9 @@ def median( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2145,9 +2145,9 @@ def cumsum( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2234,9 +2234,9 @@ def cumprod( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2317,7 +2317,7 @@ class DatasetGroupByAggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -2328,14 +2328,14 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> Dataset: raise NotImplementedError() def count( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -2424,7 +2424,7 @@ def count( def all( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -2513,7 +2513,7 @@ def all( def any( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -2602,7 +2602,7 @@ def any( def max( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -2709,7 +2709,7 @@ def max( def min( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -2816,7 +2816,7 @@ def min( def mean( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -2927,7 +2927,7 @@ def mean( def prod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -3057,7 +3057,7 @@ def prod( def sum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -3187,7 +3187,7 @@ def sum( def std( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -3314,7 +3314,7 @@ def std( def var( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -3441,7 +3441,7 @@ def var( def median( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -3537,7 +3537,7 @@ def median( def cumsum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -3631,7 +3631,7 @@ def cumsum( def cumprod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -3730,7 +3730,7 @@ class DatasetResampleAggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -3741,14 +3741,14 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> Dataset: raise NotImplementedError() def count( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -3837,7 +3837,7 @@ def count( def all( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -3926,7 +3926,7 @@ def all( def any( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -4015,7 +4015,7 @@ def any( def max( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -4122,7 +4122,7 @@ def max( def min( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -4229,7 +4229,7 @@ def min( def mean( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -4340,7 +4340,7 @@ def mean( def prod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -4470,7 +4470,7 @@ def prod( def sum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -4600,7 +4600,7 @@ def sum( def std( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -4727,7 +4727,7 @@ def std( def var( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -4854,7 +4854,7 @@ def var( def median( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -4950,7 +4950,7 @@ def median( def cumsum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5044,7 +5044,7 @@ def cumsum( def cumprod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5143,7 +5143,7 @@ class DataArrayGroupByAggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -5154,14 +5154,14 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> DataArray: raise NotImplementedError() def count( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -5217,7 +5217,7 @@ def count( >>> da.groupby("labels").count() - array([1, 2, 2]) + array([1, 2, 2], dtype=int64) Coordinates: * labels (labels) object 'a' 'b' 'c' """ @@ -5243,7 +5243,7 @@ def count( def all( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -5325,7 +5325,7 @@ def all( def any( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -5407,7 +5407,7 @@ def any( def max( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5505,7 +5505,7 @@ def max( def min( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5603,7 +5603,7 @@ def min( def mean( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5705,7 +5705,7 @@ def mean( def prod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -5824,7 +5824,7 @@ def prod( def sum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -5943,7 +5943,7 @@ def sum( def std( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -6059,7 +6059,7 @@ def std( def var( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -6175,7 +6175,7 @@ def var( def median( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6263,7 +6263,7 @@ def median( def cumsum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6353,7 +6353,7 @@ def cumsum( def cumprod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6448,7 +6448,7 @@ class DataArrayResampleAggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -6459,14 +6459,14 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> DataArray: raise NotImplementedError() def count( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -6522,7 +6522,7 @@ def count( >>> da.resample(time="3M").count() - array([1, 3, 1]) + array([1, 3, 1], dtype=int64) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ @@ -6548,7 +6548,7 @@ def count( def all( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -6630,7 +6630,7 @@ def all( def any( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -6712,7 +6712,7 @@ def any( def max( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6810,7 +6810,7 @@ def max( def min( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6908,7 +6908,7 @@ def min( def mean( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -7010,7 +7010,7 @@ def mean( def prod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -7129,7 +7129,7 @@ def prod( def sum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -7248,7 +7248,7 @@ def sum( def std( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -7364,7 +7364,7 @@ def std( def var( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -7480,7 +7480,7 @@ def var( def median( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -7568,7 +7568,7 @@ def median( def cumsum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -7658,7 +7658,7 @@ def cumsum( def cumprod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, From 44d82c1dcec84a569186dfccba7912d8fa67c960 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Thu, 24 Nov 2022 21:31:08 +0100 Subject: [PATCH 06/13] fix gen aggrs --- xarray/util/generate_aggregations.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 01576f4c316..bc4893543fb 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -60,7 +60,7 @@ class {obj}{cls}Aggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -71,7 +71,7 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> {obj}: raise NotImplementedError()""" @@ -84,7 +84,7 @@ class {obj}{cls}Aggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -95,7 +95,7 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> {obj}: raise NotImplementedError()""" @@ -117,7 +117,7 @@ def {method}( TEMPLATE_REDUCTION_SIGNATURE_GROUPBY = ''' def {method}( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *,{extra_kwargs} keep_attrs: bool | None = None, **kwargs: Any, @@ -149,9 +149,9 @@ def {method}( ----- {notes}""" -_DIM_DOCSTRING = """dim : str, Iterable of Hashable, or None, default: None +_DIM_DOCSTRING = """dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions.""" + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.""" _DIM_DOCSTRING_GROUPBY = """dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` From ccf9561347cad6222e6f1e13469088aa48917be0 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Thu, 24 Nov 2022 21:31:41 +0100 Subject: [PATCH 07/13] remove more double ellipsis typehints --- xarray/core/dataarray.py | 8 ++++---- xarray/core/dataset.py | 2 +- xarray/core/groupby.py | 8 ++++---- xarray/core/resample.py | 4 ++-- xarray/core/variable.py | 8 ++++---- xarray/core/weighted.py | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ff55028ff82..caa68bfae5c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3613,7 +3613,7 @@ def combine_first(self: T_DataArray, other: T_DataArray) -> T_DataArray: def reduce( self: T_DataArray, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -4600,7 +4600,7 @@ def imag(self: T_DataArray) -> T_DataArray: def dot( self: T_DataArray, other: T_DataArray, - dims: Dims | ellipsis = None, + dims: Dims = None, ) -> T_DataArray: """Perform dot product of two DataArrays along their shared dims. @@ -5604,7 +5604,7 @@ def idxmax( # https://github.com/python/mypy/issues/12846 is resolved def argmin( self, - dim: Dims | ellipsis = None, + dim: Dims = None, axis: int | None = None, keep_attrs: bool | None = None, skipna: bool | None = None, @@ -5706,7 +5706,7 @@ def argmin( # https://github.com/python/mypy/issues/12846 is resolved def argmax( self, - dim: Dims | ellipsis = None, + dim: Dims = None, axis: int | None = None, keep_attrs: bool | None = None, skipna: bool | None = None, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6c432545669..67393ef222c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5787,7 +5787,7 @@ def combine_first(self: T_Dataset, other: T_Dataset) -> T_Dataset: def reduce( self: T_Dataset, func: Callable, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, keepdims: bool = False, diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e42330d4973..2cd4a2d3805 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -493,7 +493,7 @@ def map( def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -652,7 +652,7 @@ def _maybe_unstack(self, obj): def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, keep_attrs: bool | None = None, **kwargs: Any, ): @@ -1143,7 +1143,7 @@ def _combine(self, applied, shortcut=False): def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -1296,7 +1296,7 @@ def _combine(self, applied): def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 3760030164f..61a12f1b446 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -49,7 +49,7 @@ def __init__( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, keep_attrs: bool | None = None, **kwargs, ) -> T_Xarray: @@ -368,7 +368,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2a287e5f3b3..756e076783e 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1889,7 +1889,7 @@ def clip(self, min=None, max=None): def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, keepdims: bool = False, @@ -2663,7 +2663,7 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float): def _unravel_argminmax( self, argminmax: str, - dim: Dims | ellipsis, + dim: Dims, axis: int | None, keep_attrs: bool | None, skipna: bool | None, @@ -2732,7 +2732,7 @@ def _unravel_argminmax( def argmin( self, - dim: Dims | ellipsis = None, + dim: Dims = None, axis: int | None = None, keep_attrs: bool | None = None, skipna: bool | None = None, @@ -2777,7 +2777,7 @@ def argmin( def argmax( self, - dim: Dims | ellipsis = None, + dim: Dims = None, axis: int | None = None, keep_attrs: bool | None = None, skipna: bool | None = None, diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index aacbbe69e3c..0f3a9aa3432 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -207,7 +207,7 @@ def _check_dim(self, dim: Dims): def _reduce( da: DataArray, weights: DataArray, - dim: Dims | ellipsis = None, + dim: Dims = None, skipna: bool | None = None, ) -> DataArray: """reduce using dot; equivalent to (da * weights).sum(dim, skipna) From 29673d5bc19e820b0127cab85aeb4b2cb66c276d Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 28 Nov 2022 20:31:03 +0100 Subject: [PATCH 08/13] fix doctests: supress urllib3 warning (#7326) --- xarray/tests/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 5b2359a2d05..6970a34b63d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -35,6 +35,8 @@ except ImportError: pass +# https://github.com/pydata/xarray/issues/7322 +warnings.filterwarnings("ignore", "'urllib3.contrib.pyopenssl' module is deprecated") arm_xfail = pytest.mark.xfail( platform.machine() == "aarch64" or "arm" in platform.machine(), From 91fda3799a3c6d0efb30335cb6f854c36d0970b2 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 28 Nov 2022 18:38:52 -0500 Subject: [PATCH 09/13] Enable `origin` and `offset` arguments in `resample` (#7284) * Initial work toward enabling origin and offset arguments in resample * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix _convert_offset_to_timedelta * Reduce number of tests * Address initial review comments * Add more typing information * Make cftime import lazy * Fix module_available import and test * Remove old origin argument * Add type annotations for resample_cftime.py * Add None as a possibility for closed and label * Add what's new entry * Add missing type annotation * Delete added line * Fix typing errors * Add comment and test for as_timedelta stub * Remove old code * [test-upstream] Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 4 +- xarray/coding/cftime_offsets.py | 4 + xarray/core/common.py | 42 +++- xarray/core/dataarray.py | 21 +- xarray/core/dataset.py | 21 +- xarray/core/resample_cftime.py | 259 +++++++++++++++++----- xarray/core/types.py | 8 +- xarray/tests/test_cftime_offsets.py | 6 + xarray/tests/test_cftimeindex_resample.py | 171 ++++++++++---- xarray/tests/test_groupby.py | 27 +++ 10 files changed, 456 insertions(+), 107 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b8a2f47bcf8..48113862c67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,9 @@ v2022.11.1 (unreleased) New Features ~~~~~~~~~~~~ - +- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample` + and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`). By `Spencer + Clark `_. - Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`). By `Gregory Lee `_ and `Joe Hamman `_. diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a029f39c7b8..04b2d773e2e 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -207,6 +207,10 @@ def __mul__(self, other): return new_self * other return type(self)(n=other * self.n) + def as_timedelta(self): + """All Tick subclasses must implement an as_timedelta method.""" + raise NotImplementedError + def _get_day_of_month(other, day_option): """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's diff --git a/xarray/core/common.py b/xarray/core/common.py index b613db9926d..d1387d62e99 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -44,7 +44,13 @@ from .indexes import Index from .resample import Resample from .rolling_exp import RollingExp - from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords + from .types import ( + DatetimeLike, + DTypeLikeSave, + ScalarOrArray, + SideOptions, + T_DataWithCoords, + ) from .variable import Variable DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]] @@ -817,7 +823,9 @@ def _resample( skipna: bool | None, closed: SideOptions | None, label: SideOptions | None, - base: int, + base: int | None, + offset: pd.Timedelta | datetime.timedelta | str | None, + origin: str | DatetimeLike, keep_attrs: bool | None, loffset: datetime.timedelta | str | None, restore_coord_dims: bool | None, @@ -845,6 +853,18 @@ def _resample( For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -960,10 +980,24 @@ def _resample( if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex): from .resample_cftime import CFTimeGrouper - grouper = CFTimeGrouper(freq, closed, label, base, loffset) + grouper = CFTimeGrouper( + freq=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + origin=origin, + offset=offset, + ) else: grouper = pd.Grouper( - freq=freq, closed=closed, label=label, base=base, loffset=loffset + freq=freq, + closed=closed, + label=label, + base=base, + offset=offset, + origin=origin, + loffset=loffset, ) group = DataArray( dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index caa68bfae5c..6eac634bfff 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -78,6 +78,7 @@ from .rolling import DataArrayCoarsen, DataArrayRolling from .types import ( CoarsenBoundaryOptions, + DatetimeLike, DatetimeUnitOptions, Dims, ErrorOptions, @@ -6531,7 +6532,9 @@ def resample( skipna: bool | None = None, closed: SideOptions | None = None, label: SideOptions | None = None, - base: int = 0, + base: int | None = None, + offset: pd.Timedelta | datetime.timedelta | str | None = None, + origin: str | DatetimeLike = "start_day", keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, @@ -6555,10 +6558,22 @@ def resample( Side of each interval to treat as closed. label : {"left", "right"}, optional Side of each interval to use for labeling. - base : int, default = 0 + base : int, optional For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -6640,6 +6655,8 @@ def resample( closed=closed, label=label, base=base, + offset=offset, + origin=origin, keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4e7a2b5603b..4f376bdf811 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -107,6 +107,7 @@ CoarsenBoundaryOptions, CombineAttrsOptions, CompatOptions, + DatetimeLike, DatetimeUnitOptions, Dims, ErrorOptions, @@ -9128,7 +9129,9 @@ def resample( skipna: bool | None = None, closed: SideOptions | None = None, label: SideOptions | None = None, - base: int = 0, + base: int | None = None, + offset: pd.Timedelta | datetime.timedelta | str | None = None, + origin: str | DatetimeLike = "start_day", keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, @@ -9152,10 +9155,22 @@ def resample( Side of each interval to treat as closed. label : {"left", "right"}, optional Side of each interval to use for labeling. - base : int, default = 0 + base : int, optional For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -9190,6 +9205,8 @@ def resample( closed=closed, label=label, base=base, + offset=offset, + origin=origin, keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 11eceda77ee..da21fdd17cf 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -38,21 +38,27 @@ from __future__ import annotations import datetime +import typing import numpy as np import pandas as pd from ..coding.cftime_offsets import ( - CFTIME_TICKS, + BaseCFTimeOffset, Day, MonthEnd, QuarterEnd, + Tick, YearEnd, cftime_range, normalize_date, to_offset, ) from ..coding.cftimeindex import CFTimeIndex +from .types import SideOptions + +if typing.TYPE_CHECKING: + from .types import CFTimeDatetime class CFTimeGrouper: @@ -60,25 +66,77 @@ class CFTimeGrouper: single method, the only one required for resampling in xarray. It cannot be used in a call to groupby like a pandas.Grouper object can.""" - def __init__(self, freq, closed=None, label=None, base=0, loffset=None): + def __init__( + self, + freq: str | BaseCFTimeOffset, + closed: SideOptions | None = None, + label: SideOptions | None = None, + base: int | None = None, + loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None, + origin: str | CFTimeDatetime = "start_day", + offset: str | datetime.timedelta | None = None, + ): + self.offset: datetime.timedelta | None + self.closed: SideOptions + self.label: SideOptions + + if base is not None and offset is not None: + raise ValueError("base and offset cannot be provided at the same time") + self.freq = to_offset(freq) - self.closed = closed - self.label = label - self.base = base self.loffset = loffset + self.origin = origin if isinstance(self.freq, (MonthEnd, QuarterEnd, YearEnd)): - if self.closed is None: + if closed is None: self.closed = "right" - if self.label is None: + else: + self.closed = closed + if label is None: self.label = "right" + else: + self.label = label + else: + # The backward resample sets ``closed`` to ``'right'`` by default + # since the last value should be considered as the edge point for + # the last bin. When origin in "end" or "end_day", the value for a + # specific ``cftime.datetime`` index stands for the resample result + # from the current ``cftime.datetime`` minus ``freq`` to the current + # ``cftime.datetime`` with a right close. + if self.origin in ["end", "end_day"]: + if closed is None: + self.closed = "right" + else: + self.closed = closed + if label is None: + self.label = "right" + else: + self.label = label + else: + if closed is None: + self.closed = "left" + else: + self.closed = closed + if label is None: + self.label = "left" + else: + self.label = label + + if base is not None and isinstance(self.freq, Tick): + offset = type(self.freq)(n=base % self.freq.n).as_timedelta() + + if offset is not None: + try: + self.offset = _convert_offset_to_timedelta(offset) + except (ValueError, AttributeError) as error: + raise ValueError( + f"offset must be a datetime.timedelta object or an offset string " + f"that can be converted to a timedelta. Got {offset} instead." + ) from error else: - if self.closed is None: - self.closed = "left" - if self.label is None: - self.label = "left" + self.offset = None - def first_items(self, index): + def first_items(self, index: CFTimeIndex): """Meant to reproduce the results of the following grouper = pandas.Grouper(...) @@ -89,7 +147,7 @@ def first_items(self, index): """ datetime_bins, labels = _get_time_bins( - index, self.freq, self.closed, self.label, self.base + index, self.freq, self.closed, self.label, self.origin, self.offset ) if self.loffset is not None: if isinstance(self.loffset, datetime.timedelta): @@ -111,7 +169,14 @@ def first_items(self, index): return first_items.where(non_duplicate) -def _get_time_bins(index, freq, closed, label, base): +def _get_time_bins( + index: CFTimeIndex, + freq: BaseCFTimeOffset, + closed: SideOptions, + label: SideOptions, + origin: str | CFTimeDatetime, + offset: datetime.timedelta | None, +): """Obtain the bins and their respective labels for resampling operations. Parameters @@ -122,18 +187,26 @@ def _get_time_bins(index, freq, closed, label, base): The offset object representing target conversion a.k.a. resampling frequency (e.g., 'MS', '2D', 'H', or '3T' with coding.cftime_offsets.to_offset() applied to it). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. The default is 'left' for all frequency offsets except for 'M' and 'A', which have a default of 'right'. - label : 'left' or 'right', optional + label : 'left' or 'right' Which bin edge label to label bucket with. The default is 'left' for all frequency offsets except for 'M' and 'A', which have a default of 'right'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -154,7 +227,7 @@ def _get_time_bins(index, freq, closed, label, base): return datetime_bins, labels first, last = _get_range_edges( - index.min(), index.max(), freq, closed=closed, base=base + index.min(), index.max(), freq, closed=closed, origin=origin, offset=offset ) datetime_bins = labels = cftime_range( freq=freq, start=first, end=last, name=index.name @@ -172,7 +245,13 @@ def _get_time_bins(index, freq, closed, label, base): return datetime_bins, labels -def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): +def _adjust_bin_edges( + datetime_bins: np.ndarray, + freq: BaseCFTimeOffset, + closed: SideOptions, + index: CFTimeIndex, + labels: np.ndarray, +): """This is required for determining the bin edges resampling with daily frequencies greater than one day, month end, and year end frequencies. @@ -207,8 +286,8 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): This is also required for daily frequencies longer than one day and year-end frequencies. """ - is_super_daily = isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)) or ( - isinstance(offset, Day) and offset.n > 1 + is_super_daily = isinstance(freq, (MonthEnd, QuarterEnd, YearEnd)) or ( + isinstance(freq, Day) and freq.n > 1 ) if is_super_daily: if closed == "right": @@ -220,7 +299,14 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): return datetime_bins, labels -def _get_range_edges(first, last, offset, closed="left", base=0): +def _get_range_edges( + first: CFTimeDatetime, + last: CFTimeDatetime, + freq: BaseCFTimeOffset, + closed: SideOptions = "left", + origin: str | CFTimeDatetime = "start_day", + offset: datetime.timedelta | None = None, +): """Get the correct starting and ending datetimes for the resampled CFTimeIndex range. @@ -232,16 +318,24 @@ def _get_range_edges(first, last, offset, closed="left", base=0): last : cftime.datetime Uncorrected ending datetime object for resampled CFTimeIndex range. Usually the max of the original CFTimeIndex. - offset : xarray.coding.cftime_offsets.BaseCFTimeOffset + freq : xarray.coding.cftime_offsets.BaseCFTimeOffset The offset object representing target conversion a.k.a. resampling frequency. Contains information on offset type (e.g. Day or 'D') and offset magnitude (e.g., n = 3). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. Defaults to 'left'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -250,21 +344,28 @@ def _get_range_edges(first, last, offset, closed="left", base=0): last : cftime.datetime Corrected ending datetime object for resampled CFTimeIndex range. """ - if isinstance(offset, CFTIME_TICKS): + if isinstance(freq, Tick): first, last = _adjust_dates_anchored( - first, last, offset, closed=closed, base=base + first, last, freq, closed=closed, origin=origin, offset=offset ) return first, last else: first = normalize_date(first) last = normalize_date(last) - first = offset.rollback(first) if closed == "left" else first - offset - last = last + offset + first = freq.rollback(first) if closed == "left" else first - freq + last = last + freq return first, last -def _adjust_dates_anchored(first, last, offset, closed="right", base=0): +def _adjust_dates_anchored( + first: CFTimeDatetime, + last: CFTimeDatetime, + freq: Tick, + closed: SideOptions = "right", + origin: str | CFTimeDatetime = "start_day", + offset: datetime.timedelta | None = None, +): """First and last offsets should be calculated from the start day to fix an error cause by resampling across multiple days when a one day period is not a multiple of the frequency. @@ -276,16 +377,24 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): A datetime object representing the start of a CFTimeIndex range. last : cftime.datetime A datetime object representing the end of a CFTimeIndex range. - offset : xarray.coding.cftime_offsets.BaseCFTimeOffset + freq : xarray.coding.cftime_offsets.BaseCFTimeOffset The offset object representing target conversion a.k.a. resampling frequency. Contains information on offset type (e.g. Day or 'D') and offset magnitude (e.g., n = 3). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. Defaults to 'right'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -296,33 +405,59 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): A datetime object representing the end of a date range that has been adjusted to fix resampling errors. """ + import cftime + + if origin == "start_day": + origin_date = normalize_date(first) + elif origin == "start": + origin_date = first + elif origin == "epoch": + origin_date = type(first)(1970, 1, 1) + elif origin in ["end", "end_day"]: + origin_last = last if origin == "end" else _ceil_via_cftimeindex(last, "D") + sub_freq_times = (origin_last - first) // freq.as_timedelta() + if closed == "left": + sub_freq_times += 1 + first = origin_last - sub_freq_times * freq + origin_date = first + elif isinstance(origin, cftime.datetime): + origin_date = origin + else: + raise ValueError( + f"origin must be one of {{'epoch', 'start_day', 'start', 'end', 'end_day'}} " + f"or a cftime.datetime object. Got {origin}." + ) + + if offset is not None: + origin_date = origin_date + offset + + foffset = (first - origin_date) % freq.as_timedelta() + loffset = (last - origin_date) % freq.as_timedelta() - base = base % offset.n - start_day = normalize_date(first) - base_td = type(offset)(n=base).as_timedelta() - start_day += base_td - foffset = exact_cftime_datetime_difference(start_day, first) % offset.as_timedelta() - loffset = exact_cftime_datetime_difference(start_day, last) % offset.as_timedelta() if closed == "right": if foffset.total_seconds() > 0: fresult = first - foffset else: - fresult = first - offset.as_timedelta() + fresult = first - freq.as_timedelta() if loffset.total_seconds() > 0: - lresult = last + (offset.as_timedelta() - loffset) + lresult = last + (freq.as_timedelta() - loffset) else: lresult = last else: - fresult = first - foffset if foffset.total_seconds() > 0 else first + if foffset.total_seconds() > 0: + fresult = first - foffset + else: + fresult = first + if loffset.total_seconds() > 0: - lresult = last + (offset.as_timedelta() - loffset) + lresult = last + (freq.as_timedelta() - loffset) else: - lresult = last + offset.as_timedelta() + lresult = last + freq return fresult, lresult -def exact_cftime_datetime_difference(a, b): +def exact_cftime_datetime_difference(a: CFTimeDatetime, b: CFTimeDatetime): """Exact computation of b - a Assumes: @@ -360,3 +495,19 @@ def exact_cftime_datetime_difference(a, b): seconds = int(round(seconds.total_seconds())) microseconds = b.microsecond - a.microsecond return datetime.timedelta(seconds=seconds, microseconds=microseconds) + + +def _convert_offset_to_timedelta( + offset: datetime.timedelta | str | BaseCFTimeOffset, +) -> datetime.timedelta: + if isinstance(offset, datetime.timedelta): + return offset + elif isinstance(offset, (str, Tick)): + return to_offset(offset).as_timedelta() + else: + raise ValueError + + +def _ceil_via_cftimeindex(date: CFTimeDatetime, freq: str | BaseCFTimeOffset): + index = CFTimeIndex([date]) + return index.ceil(freq).item() diff --git a/xarray/core/types.py b/xarray/core/types.py index 7579148e4c2..adf046dabb2 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime from typing import ( TYPE_CHECKING, Any, @@ -17,6 +18,7 @@ ) import numpy as np +import pandas as pd from packaging.version import Version if TYPE_CHECKING: @@ -82,7 +84,11 @@ def dtype(self) -> np.dtype: # anything with a dtype attribute _SupportsDType, ] - + try: + from cftime import datetime as CFTimeDatetime + except ImportError: + CFTimeDatetime = Any + DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64, CFTimeDatetime] else: Self: Any = None DTypeLikeSave: Any = None diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 075393e84e7..d28f4594559 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1385,3 +1385,9 @@ def test_date_range_like_errors(): match="'source' must be a 1D array of datetime objects for inferring its range.", ): date_range_like(da, "noleap") + + +def as_timedelta_not_implemented_error(): + tick = Tick() + with pytest.raises(NotImplementedError): + tick.as_timedelta() diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 35447a39f3c..e780421e09e 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -9,7 +9,7 @@ import xarray as xr from xarray.core.resample_cftime import CFTimeGrouper -pytest.importorskip("cftime") +cftime = pytest.importorskip("cftime") # Create a list of pairs of similar-length initial and resample frequencies @@ -50,7 +50,63 @@ ] -def da(index): +def compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + freq, + closed=None, + label=None, + base=None, + offset=None, + origin=None, + loffset=None, +) -> None: + if isinstance(origin, tuple): + origin_pandas = pd.Timestamp(datetime.datetime(*origin)) + origin_cftime = cftime.DatetimeGregorian(*origin) + else: + origin_pandas = origin + origin_cftime = origin + + try: + result_datetimeindex = da_datetimeindex.resample( + time=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + offset=offset, + origin=origin_pandas, + ).mean() + except ValueError: + with pytest.raises(ValueError): + da_cftimeindex.resample( + time=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + origin=origin_cftime, + offset=offset, + ).mean() + else: + result_cftimeindex = da_cftimeindex.resample( + time=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + origin=origin_cftime, + offset=offset, + ).mean() + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass + result_cftimeindex["time"] = ( + result_cftimeindex.xindexes["time"].to_pandas_index().to_datetimeindex() + ) + xr.testing.assert_identical(result_cftimeindex, result_datetimeindex) + + +def da(index) -> xr.DataArray: return xr.DataArray( np.arange(100.0, 100.0 + index.size), coords=[index], dims=["time"] ) @@ -59,53 +115,31 @@ def da(index): @pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x)) @pytest.mark.parametrize("closed", [None, "left", "right"]) @pytest.mark.parametrize("label", [None, "left", "right"]) -@pytest.mark.parametrize("base", [24, 31]) -def test_resample(freqs, closed, label, base) -> None: +@pytest.mark.parametrize( + ("base", "offset"), [(24, None), (31, None), (None, "5S")], ids=lambda x: f"{x}" +) +def test_resample(freqs, closed, label, base, offset) -> None: initial_freq, resample_freq = freqs start = "2000-01-01T12:07:01" + loffset = "12H" + origin = "start" index_kwargs = dict(start=start, periods=5, freq=initial_freq) datetime_index = pd.date_range(**index_kwargs) cftime_index = xr.cftime_range(**index_kwargs) + da_datetimeindex = da(datetime_index) + da_cftimeindex = da(cftime_index) - loffset = "12H" - try: - da_datetime = ( - da(datetime_index) - .resample( - time=resample_freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - ) - .mean() - ) - except ValueError: - with pytest.raises(ValueError): - da(cftime_index).resample( - time=resample_freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - ).mean() - else: - da_cftime = ( - da(cftime_index) - .resample( - time=resample_freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - ) - .mean() - ) - # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass - da_cftime["time"] = ( - da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() - ) - xr.testing.assert_identical(da_cftime, da_datetime) + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + label=label, + base=base, + offset=offset, + origin=origin, + loffset=loffset, + ) @pytest.mark.parametrize( @@ -153,3 +187,54 @@ def test_calendars(calendar) -> None: # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass da_cftime["time"] = da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) + + +@pytest.mark.parametrize("closed", ["left", "right"]) +@pytest.mark.parametrize( + "origin", + ["start_day", "start", "end", "end_day", "epoch", (1970, 1, 1, 3, 2)], + ids=lambda x: f"{x}", +) +def test_origin(closed, origin) -> None: + initial_freq, resample_freq = ("3H", "9H") + start = "1969-12-31T12:07:01" + index_kwargs = dict(start=start, periods=12, freq=initial_freq) + datetime_index = pd.date_range(**index_kwargs) + cftime_index = xr.cftime_range(**index_kwargs) + da_datetimeindex = da(datetime_index) + da_cftimeindex = da(cftime_index) + + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + origin=origin, + ) + + +def test_base_and_offset_error(): + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + with pytest.raises(ValueError, match="base and offset cannot"): + da_cftime.resample(time="2D", base=3, offset="5S") + + +@pytest.mark.parametrize("offset", ["foo", "5MS", 10]) +def test_invalid_offset_error(offset) -> None: + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + with pytest.raises(ValueError, match="offset must be"): + da_cftime.resample(time="2D", offset=offset) + + +def test_timedelta_offset() -> None: + timedelta = datetime.timedelta(seconds=5) + string = "5S" + + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + + timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean() + string_result = da_cftime.resample(time="2D", offset=string).mean() + xr.testing.assert_identical(timedelta_result, string_result) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d647c82a76b..063dc22e633 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1810,6 +1810,33 @@ def test_upsample_interpolate_dask(self, chunked_time): # done here due to floating point arithmetic assert_allclose(expected, actual, rtol=1e-16) + def test_resample_base(self) -> None: + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + base = 11 + actual = array.resample(time="24H", base=base).mean() + expected = DataArray(array.to_series().resample("24H", base=base).mean()) + assert_identical(expected, actual) + + def test_resample_offset(self) -> None: + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + offset = pd.Timedelta("11H") + actual = array.resample(time="24H", offset=offset).mean() + expected = DataArray(array.to_series().resample("24H", offset=offset).mean()) + assert_identical(expected, actual) + + def test_resample_origin(self) -> None: + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + origin = "start" + actual = array.resample(time="24H", origin=origin).mean() + expected = DataArray(array.to_series().resample("24H", origin=origin).mean()) + assert_identical(expected, actual) + class TestDatasetResample: def test_resample_and_first(self): From 23bec8aeeb261a0429fbe4d6966768814343e1a4 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 29 Nov 2022 02:37:05 -0500 Subject: [PATCH 10/13] =?UTF-8?q?Fix=20PR=20number=20in=20what=E2=80=99s?= =?UTF-8?q?=20new=20(#7331)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 48113862c67..0c9ff5bd1f8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,7 @@ v2022.11.1 (unreleased) New Features ~~~~~~~~~~~~ - Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample` - and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`). By `Spencer + and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`7284`). By `Spencer Clark `_. - Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`). By `Gregory Lee `_ and `Joe Hamman `_. From 48f81cc040854119380d73b5af0d4110dca4ad56 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 28 Nov 2022 23:37:19 -0800 Subject: [PATCH 11/13] [pre-commit.ci] pre-commit autoupdate (#7330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.3.0 → v4.4.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.3.0...v4.4.0) - [github.com/PyCQA/autoflake: v1.7.7 → v2.0.0](https://github.com/PyCQA/autoflake/compare/v1.7.7...v2.0.0) - [github.com/PyCQA/flake8: 5.0.4 → 6.0.0](https://github.com/PyCQA/flake8/compare/5.0.4...6.0.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mathias Hauser --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f940ef09bc8..67dd54faf3a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # https://pre-commit.com/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -10,7 +10,7 @@ repos: - id: mixed-line-ending # This wants to go before isort & flake8 - repo: https://github.com/PyCQA/autoflake - rev: "v1.7.7" + rev: "v2.0.0" hooks: - id: autoflake # isort should run before black as black sometimes tweaks the isort output args: ["--in-place", "--ignore-init-module-imports"] @@ -38,7 +38,7 @@ repos: additional_dependencies: ["black==22.10.0"] - id: blackdoc-autoupdate-black - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 # - repo: https://github.com/Carreau/velin From d3ac8b2f8396d23e5920dffd1f24c88cd257ff38 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 29 Nov 2022 21:06:03 +0100 Subject: [PATCH 12/13] add comment explaining type: ignore --- xarray/core/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 45c4edaa94b..5658467d1a8 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -1066,6 +1066,7 @@ def parse_ordered_dims( idx = dims.index(...) return dims[:idx] + other_dims + dims[idx + 1 :] else: + # mypy cannot resolve that the sequence cannot contain "..." return parse_dims( # type: ignore[call-overload] dim=dim, all_dims=all_dims, From d7a7a370075f7a4c41ea88123a9468363fe8e8ee Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 29 Nov 2022 21:46:03 +0100 Subject: [PATCH 13/13] fix doctest win/linux issue once again --- xarray/core/_aggregations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index 561ac1dc617..1db330fb76f 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -94,7 +94,7 @@ def count( Dimensions: () Data variables: - da int32 5 + da int64 5 """ return self.reduce( duck_array_ops.count, @@ -5217,7 +5217,7 @@ def count( >>> da.groupby("labels").count() - array([1, 2, 2], dtype=int64) + array([1, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' """ @@ -6522,7 +6522,7 @@ def count( >>> da.resample(time="3M").count() - array([1, 3, 1], dtype=int64) + array([1, 3, 1]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """