Skip to content
forked from pydata/xarray

Commit

Permalink
Handle empty containers in zarr chunk checks (pydata#5526)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisroat authored Jan 27, 2022
1 parent 4692c59 commit 9235548
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
2 changes: 1 addition & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Bug fixes
By `Michael Delgado <https://github.com/delgadom>`_.
- `dt.season <https://xarray.pydata.org/en/stable/generated/xarray.DataArray.dt.season.html>`_ can now handle NaN and NaT. (:pull:`5876`).
By `Pierre Loicq <https://github.com/pierreloicq>`_.

- Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain cirumstances (:pull:`5526`). By `Chris Roat <https://github.com/chrisroat>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
9 changes: 5 additions & 4 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def __getitem__(self, key):

def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
"""
Given encoding chunks (possibly None) and variable chunks (possibly None)
Given encoding chunks (possibly None or []) and variable chunks
(possibly None or []).
"""

# zarr chunk spec:
Expand All @@ -93,7 +94,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):

# if there are no chunks in encoding and the variable data is a numpy
# array, then we let zarr use its own heuristics to pick the chunks
if var_chunks is None and enc_chunks is None:
if not var_chunks and not enc_chunks:
return None

# if there are no chunks in encoding but there are dask chunks, we try to
Expand All @@ -102,7 +103,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
# http://zarr.readthedocs.io/en/latest/spec/v1.html#chunks
# while dask chunks can be variable sized
# http://dask.pydata.org/en/latest/array-design.html#chunks
if var_chunks and enc_chunks is None:
if var_chunks and not enc_chunks:
if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
raise ValueError(
"Zarr requires uniform chunk sizes except for final chunk. "
Expand Down Expand Up @@ -145,7 +146,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):

# if there are chunks in encoding and the variable data is a numpy array,
# we use the specified chunks
if var_chunks is None:
if not var_chunks:
return enc_chunks_tuple

# the hard case
Expand Down
14 changes: 14 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2383,6 +2383,20 @@ def test_open_zarr_use_cftime(self):
ds_b = xr.open_zarr(store_target, use_cftime=True)
assert xr.coding.times.contains_cftime_datetimes(ds_b.time)

def test_write_read_select_write(self):
# Test for https://github.com/pydata/xarray/issues/4084
ds = create_test_data()

# NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug.
with self.create_zarr_target() as initial_store:
ds.to_zarr(initial_store, mode="w")
ds1 = xr.open_zarr(initial_store)

# Combination of where+squeeze triggers error on write.
ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3")
with self.create_zarr_target() as final_store:
ds_sel.to_zarr(final_store, mode="w")


@requires_zarr
class TestZarrDictStore(ZarrBase):
Expand Down

0 comments on commit 9235548

Please sign in to comment.