From 88eb1ec4d75b8fb8030b302614c1a60db06d1dbe Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Wed, 8 Nov 2023 16:14:35 +0100 Subject: [PATCH] tweak variable writing behavior for zarr with append_dim --- xarray/backends/zarr.py | 15 ++++++++++++++- xarray/tests/test_backends.py | 13 +++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 2b41fa5224e..97692349cf4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -628,8 +628,21 @@ def store( self.set_attributes(attributes) self.set_dimensions(variables_encoded, unlimited_dims=unlimited_dims) + # if we are appending to an append_dim, only write either + # - new variables not already present, OR + # - variables with the append_dim in their dimensions + # We do NOT overwrite other variables. + if self._append_dim is not None: + variables_to_set = { + k: v + for k, v in variables_encoded.items() + if (k not in existing_variable_names) or (self._append_dim in v.dims) + } + else: + variables_to_set = variables_encoded + self.set_variables( - variables_encoded, check_encoding_set, writer, unlimited_dims=unlimited_dims + variables_to_set, check_encoding_set, writer, unlimited_dims=unlimited_dims ) if self._consolidate_on_close: zarr.consolidate_metadata(self.zarr_group.store) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 73352c3f7e1..24679bdf79c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2382,6 +2382,19 @@ def test_append_with_new_variable(self) -> None: xr.open_dataset(store_target, engine="zarr", **self.version_kwargs), ) + def test_append_with_append_dim_no_overwrite(self) -> None: + ds, ds_to_append, _ = create_append_test_data() + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w", **self.version_kwargs) + original = xr.concat([ds, ds_to_append], dim="time") + + # overwrite a coordinate; this will not get written to the store + # because it does not have the append_dim as a dim + ds_to_append.lon.data[:] = -999 + ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs) + actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs) + assert_identical(original, actual) + @requires_dask def test_to_zarr_compute_false_roundtrip(self) -> None: from dask.delayed import Delayed