From 9235548df71c1b03e76da40641dc790becf443bc Mon Sep 17 00:00:00 2001 From: Chris Roat <1053153+chrisroat@users.noreply.github.com> Date: Thu, 27 Jan 2022 13:46:58 -0800 Subject: [PATCH] Handle empty containers in zarr chunk checks (#5526) --- doc/whats-new.rst | 2 +- xarray/backends/zarr.py | 9 +++++---- xarray/tests/test_backends.py | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8896dd62379..50500e3d75f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -63,7 +63,7 @@ Bug fixes By `Michael Delgado `_. - `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). By `Pierre Loicq `_. - +- Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain cirumstances (:pull:`5526`). By `Chris Roat `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 8bd343869ff..efb22bef1d4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -84,7 +84,8 @@ def __getitem__(self, key): def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): """ - Given encoding chunks (possibly None) and variable chunks (possibly None) + Given encoding chunks (possibly None or []) and variable chunks + (possibly None or []). """ # zarr chunk spec: @@ -93,7 +94,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # if there are no chunks in encoding and the variable data is a numpy # array, then we let zarr use its own heuristics to pick the chunks - if var_chunks is None and enc_chunks is None: + if not var_chunks and not enc_chunks: return None # if there are no chunks in encoding but there are dask chunks, we try to @@ -102,7 +103,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # http://zarr.readthedocs.io/en/latest/spec/v1.html#chunks # while dask chunks can be variable sized # http://dask.pydata.org/en/latest/array-design.html#chunks - if var_chunks and enc_chunks is None: + if var_chunks and not enc_chunks: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( "Zarr requires uniform chunk sizes except for final chunk. " @@ -145,7 +146,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # if there are chunks in encoding and the variable data is a numpy array, # we use the specified chunks - if var_chunks is None: + if not var_chunks: return enc_chunks_tuple # the hard case diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 356335f47e6..bea90e1fca9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2383,6 +2383,20 @@ def test_open_zarr_use_cftime(self): ds_b = xr.open_zarr(store_target, use_cftime=True) assert xr.coding.times.contains_cftime_datetimes(ds_b.time) + def test_write_read_select_write(self): + # Test for https://github.com/pydata/xarray/issues/4084 + ds = create_test_data() + + # NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug. + with self.create_zarr_target() as initial_store: + ds.to_zarr(initial_store, mode="w") + ds1 = xr.open_zarr(initial_store) + + # Combination of where+squeeze triggers error on write. + ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3") + with self.create_zarr_target() as final_store: + ds_sel.to_zarr(final_store, mode="w") + @requires_zarr class TestZarrDictStore(ZarrBase):