Skip to content

Commit

Permalink
Deprecate tuples of chunks? (#8341)
Browse files Browse the repository at this point in the history
* Deprecate tuples of chunks?

(I was planning on putting an issue in, but then thought it wasn't much more difficult to make the PR. But it's totally fine if we don't think this is a good idea...)

Allowing a tuple of dims means we're reliant on dimension order, which we really try and not be reliant on. It also makes the type signature even more complicated.

So are we OK to encourage a dict of `dim: chunksizes`, rather than a tuple of chunksizes?

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update xarray/core/dataarray.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
max-sixty and pre-commit-ci[bot] authored Oct 21, 2023
1 parent 86b4167 commit 126b92a
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 20 deletions.
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ Breaking changes
Deprecations
~~~~~~~~~~~~

- Supplying dimension-ordered sequences to :py:meth:`DataArray.chunk` &
:py:meth:`Dataset.chunk` is deprecated in favor of supplying a dictionary of
dimensions, or a single ``int`` or ``"auto"`` argument covering all
dimensions. Xarray favors using dimensions names rather than positions, and
this was one place in the API where dimension positions were used.
(:pull:`8341`)
By `Maximilian Roos <https://github.com/max-sixty>`_.

Bug fixes
~~~~~~~~~
Expand Down
5 changes: 5 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,11 @@ def chunk(
# ignoring type; unclear why it won't accept a Literal into the value.
chunks = dict.fromkeys(self.dims, chunks)
elif isinstance(chunks, (tuple, list)):
utils.emit_user_level_warning(
"Supplying chunks as dimension-order tuples is deprecated. "
"It will raise an error in the future. Instead use a dict with dimension names as keys.",
category=DeprecationWarning,
)
chunks = dict(zip(self.dims, chunks))
else:
chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")
Expand Down
8 changes: 7 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2648,11 +2648,17 @@ def chunk(
warnings.warn(
"None value for 'chunks' is deprecated. "
"It will raise an error in the future. Use instead '{}'",
category=FutureWarning,
category=DeprecationWarning,
)
chunks = {}
chunks_mapping: Mapping[Any, Any]
if not isinstance(chunks, Mapping) and chunks is not None:
if isinstance(chunks, (tuple, list)):
utils.emit_user_level_warning(
"Supplying chunks as dimension-order tuples is deprecated. "
"It will raise an error in the future. Instead use a dict with dimensions as keys.",
category=DeprecationWarning,
)
chunks_mapping = dict.fromkeys(self.dims, chunks)
else:
chunks_mapping = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")
Expand Down
4 changes: 1 addition & 3 deletions xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,7 @@ def copy(
# FYI in some cases we don't allow `None`, which this doesn't take account of.
T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]]
# We allow the tuple form of this (though arguably we could transition to named dims only)
T_Chunks: TypeAlias = Union[
T_ChunkDim, Mapping[Any, T_ChunkDim], tuple[T_ChunkDim, ...]
]
T_Chunks: TypeAlias = Union[T_ChunkDim, Mapping[Any, T_ChunkDim]]
T_NormalizedChunks = tuple[tuple[int, ...], ...]

DataVars = Mapping[Any, Any]
Expand Down
11 changes: 3 additions & 8 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from collections.abc import Hashable, Mapping, Sequence
from datetime import timedelta
from functools import partial
from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, cast
from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast

import numpy as np
import pandas as pd
Expand All @@ -34,6 +34,7 @@
is_duck_dask_array,
to_numpy,
)
from xarray.core.types import T_Chunks
from xarray.core.utils import (
OrderedSet,
_default,
Expand Down Expand Up @@ -965,13 +966,7 @@ def _replace(

def chunk(
self,
chunks: (
int
| Literal["auto"]
| tuple[int, ...]
| tuple[tuple[int, ...], ...]
| Mapping[Any, None | int | tuple[int, ...]]
) = {},
chunks: T_Chunks = {},
name: str | None = None,
lock: bool | None = None,
inline_array: bool | None = None,
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2798,7 +2798,7 @@ def test_write_empty(self, write_empty: bool) -> None:
)

if has_dask:
ds["test"] = ds["test"].chunk((1, 1, 1))
ds["test"] = ds["test"].chunk(1)
encoding = None
else:
encoding = {"test": {"chunks": (1, 1, 1)}}
Expand Down
13 changes: 7 additions & 6 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,13 +879,14 @@ def test_chunk(self) -> None:
assert blocked.chunks == ((3,), (4,))
first_dask_name = blocked.data.name

blocked = unblocked.chunk(chunks=((2, 1), (2, 2)))
assert blocked.chunks == ((2, 1), (2, 2))
assert blocked.data.name != first_dask_name
with pytest.warns(DeprecationWarning):
blocked = unblocked.chunk(chunks=((2, 1), (2, 2))) # type: ignore
assert blocked.chunks == ((2, 1), (2, 2))
assert blocked.data.name != first_dask_name

blocked = unblocked.chunk(chunks=(3, 3))
assert blocked.chunks == ((3,), (3, 1))
assert blocked.data.name != first_dask_name
blocked = unblocked.chunk(chunks=(3, 3))
assert blocked.chunks == ((3,), (3, 1))
assert blocked.data.name != first_dask_name

# name doesn't change when rechunking by same amount
# this fails if ReprObject doesn't have __dask_tokenize__ defined
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2663,7 +2663,7 @@ def test_full_like(self) -> None:
def test_full_like_dask(self) -> None:
orig = Variable(
dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"}
).chunk(((1, 1), (2,)))
).chunk(dict(x=(1, 1), y=(2,)))

def check(actual, expect_dtype, expect_values):
assert actual.dtype == expect_dtype
Expand Down

0 comments on commit 126b92a

Please sign in to comment.