From 3bfc9618955e671a8faa19d6ca9752ee94af8ef5 Mon Sep 17 00:00:00 2001 From: Chris Roat <1053153+chrisroat@users.noreply.github.com> Date: Thu, 24 Jun 2021 11:41:25 -0700 Subject: [PATCH 1/2] Handle empty containers in zarr chunk checks --- xarray/backends/zarr.py | 6 +++--- xarray/tests/test_backends.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d492e3dfb92..c00d20fcc71 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -95,7 +95,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # if there are no chunks in encoding and the variable data is a numpy # array, then we let zarr use its own heuristics to pick the chunks - if var_chunks is None and enc_chunks is None: + if not var_chunks and not enc_chunks: return None # if there are no chunks in encoding but there are dask chunks, we try to @@ -104,7 +104,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # http://zarr.readthedocs.io/en/latest/spec/v1.html#chunks # while dask chunks can be variable sized # http://dask.pydata.org/en/latest/array-design.html#chunks - if var_chunks and enc_chunks is None: + if var_chunks and not enc_chunks: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( "Zarr requires uniform chunk sizes except for final chunk. " @@ -147,7 +147,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # if there are chunks in encoding and the variable data is a numpy array, # we use the specified chunks - if var_chunks is None: + if not var_chunks: return enc_chunks_tuple # the hard case diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5079cd390f1..68f1f81ebc2 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2379,6 +2379,20 @@ def test_open_zarr_use_cftime(self): ds_b = xr.open_zarr(store_target, use_cftime=True) assert xr.coding.times.contains_cftime_datetimes(ds_b.time) + def test_write_read_select_write(self): + # Test for https://github.com/pydata/xarray/issues/4084 + ds = create_test_data() + + # NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug. + with self.create_zarr_target() as initial_store: + ds.to_zarr(initial_store, mode="w") + ds1 = xr.open_zarr(initial_store) + + # Combination of where+squeeze triggers error on write. + ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3") + with self.create_zarr_target() as final_store: + ds_sel.to_zarr(final_store, mode="w") + @requires_zarr class TestZarrDictStore(ZarrBase): From 85a993822b76454227e32598778e82193e9b4a6f Mon Sep 17 00:00:00 2001 From: Chris Roat <1053153+chrisroat@users.noreply.github.com> Date: Sat, 22 Jan 2022 19:17:57 -0800 Subject: [PATCH 2/2] Doc update --- doc/whats-new.rst | 2 +- xarray/backends/zarr.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8896dd62379..50500e3d75f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -63,7 +63,7 @@ Bug fixes By `Michael Delgado `_. - `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). By `Pierre Loicq `_. - +- Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain cirumstances (:pull:`5526`). By `Chris Roat `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 699307f746c..efb22bef1d4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -84,7 +84,8 @@ def __getitem__(self, key): def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): """ - Given encoding chunks (possibly None) and variable chunks (possibly None) + Given encoding chunks (possibly None or []) and variable chunks + (possibly None or []). """ # zarr chunk spec: