From f1e780bee89db4d340c2fce781c9dc4065a368a6 Mon Sep 17 00:00:00 2001 From: David Brochart Date: Sun, 14 Jul 2019 09:21:43 +0200 Subject: [PATCH 1/6] to_zarr(append_dim='dim0') doesn't need mode='a' --- doc/io.rst | 7 ++++--- xarray/backends/api.py | 3 ++- xarray/core/dataset.py | 4 +++- xarray/tests/test_backends.py | 14 ++++++-------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index c85f566392d..1039e8bc352 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -609,8 +609,9 @@ store is already present at that path, an error will be raised, preventing it from being overwritten. To override this behavior and overwrite an existing store, add ``mode='w'`` when invoking ``to_zarr``. -It is also possible to append to an existing store. For that, add ``mode='a'`` -and set ``append_dim`` to the name of the dimension along which to append. +It is also possible to append to an existing store. For that, set +``append_dim`` to the name of the dimension along which to append. ``mode`` +can be omitted as it will internally be set to ``'a'``. .. ipython:: python :suppress: @@ -628,7 +629,7 @@ and set ``append_dim`` to the name of the dimension along which to append. coords={'x': [10, 20, 30, 40], 'y': [1,2,3,4,5], 't': pd.date_range('2001-01-03', periods=2)}) - ds2.to_zarr('path/to/directory.zarr', mode='a', append_dim='t') + ds2.to_zarr('path/to/directory.zarr', append_dim='t') To store variable length strings use ``dtype=object``. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e0f269eb51f..0f0ba6b64a6 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1118,7 +1118,8 @@ def to_zarr(dataset, store=None, mode='w-', synchronizer=None, group=None, _validate_dataset_names(dataset) _validate_attrs(dataset) - if mode == "a": + if (mode == 'a') or (append_dim is not None): + mode = 'a' _validate_datatypes_for_zarr_append(dataset) _validate_append_dim_and_encoding(dataset, store, append_dim, group=group, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ebc23773738..512ec3a63c9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1411,6 +1411,8 @@ def to_zarr( Persistence mode: 'w' means create (overwrite if exists); 'w-' means create (fail if exists); 'a' means append (create if does not exist). + When ``append_dim`` is set, ``mode`` can be omitted as it is + internally set to ``'a'``. synchronizer : object, optional Array synchronizer group : str, optional @@ -1426,7 +1428,7 @@ def to_zarr( If True, apply zarr's `consolidate_metadata` function to the store after writing. append_dim: hashable, optional - If mode='a', the dimension on which the data will be appended. + If set, the dimension on which the data will be appended. References ---------- diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f5c27f2fb92..4ab9d6b263a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1654,7 +1654,7 @@ def test_write_persistence_modes(self): # check append mode for append write with self.create_zarr_target() as store_target: ds.to_zarr(store_target, mode='w') - ds_to_append.to_zarr(store_target, mode='a', append_dim='time') + ds_to_append.to_zarr(store_target, append_dim='time') original = xr.concat([ds, ds_to_append], dim='time') assert_identical(original, xr.open_zarr(store_target)) @@ -1690,7 +1690,7 @@ def test_append_write(self): ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: ds.to_zarr(store_target, mode='w') - ds_to_append.to_zarr(store_target, mode='a', append_dim='time') + ds_to_append.to_zarr(store_target, append_dim='time') original = xr.concat([ds, ds_to_append], dim='time') assert_identical(original, xr.open_zarr(store_target)) @@ -1706,8 +1706,7 @@ def test_append_with_invalid_dim_raises(self): with pytest.raises(ValueError): with self.create_zarr_target() as store_target: ds.to_zarr(store_target, mode='w') - ds_to_append.to_zarr(store_target, mode='a', - append_dim='notvalid') + ds_to_append.to_zarr(store_target, append_dim='notvalid') def test_append_with_append_dim_not_set_raises(self): @@ -1727,8 +1726,7 @@ def test_append_with_existing_encoding_raises(self): with pytest.raises(ValueError): with self.create_zarr_target() as store_target: ds.to_zarr(store_target, mode='w') - ds_to_append.to_zarr(store_target, mode='a', - append_dim='time', + ds_to_append.to_zarr(store_target, append_dim='time', encoding={'da': {'compressor': None}}) def test_check_encoding_is_consistent_after_append(self): @@ -1741,7 +1739,7 @@ def test_check_encoding_is_consistent_after_append(self): compressor = zarr.Blosc() encoding = {'da': {'compressor': compressor}} ds.to_zarr(store_target, mode='w', encoding=encoding) - ds_to_append.to_zarr(store_target, mode='a', append_dim='time') + ds_to_append.to_zarr(store_target, append_dim='time') actual_ds = xr.open_zarr(store_target) actual_encoding = actual_ds['da'].encoding['compressor'] assert actual_encoding.get_config() == compressor.get_config() @@ -1802,7 +1800,7 @@ def test_to_zarr_append_compute_false_roundtrip(self): assert_identical(ds, actual) delayed_obj = self.save(ds_to_append, store, compute=False, - mode='a', append_dim='time') + append_dim='time') assert isinstance(delayed_obj, Delayed) with pytest.raises(AssertionError): From c3895657e294dd499c5578c109eb868f8cd212cf Mon Sep 17 00:00:00 2001 From: David Brochart Date: Sun, 14 Jul 2019 09:34:23 +0200 Subject: [PATCH 2/6] Updated whats-new.rst --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f41a9d78048..76d6f94f54e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,6 +27,10 @@ New functions/methods Enhancements ~~~~~~~~~~~~ +- In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if + ``append_dim`` is set, as it will automatically be set to ``'a'`` internally. + By `David Brochart `_. + Bug fixes ~~~~~~~~~ From ef1aac152f6322d03065cf96320f6d5220176df0 Mon Sep 17 00:00:00 2001 From: David Brochart Date: Tue, 16 Jul 2019 09:36:50 +0200 Subject: [PATCH 3/6] Default mode=None allows checking consistency with append_dim --- xarray/backends/api.py | 12 ++++++++++-- xarray/core/dataset.py | 4 ++-- xarray/tests/test_backends.py | 11 +++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0f0ba6b64a6..328173a3ffd 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1102,7 +1102,7 @@ def _validate_append_dim_and_encoding(ds_to_append, store, append_dim, ) -def to_zarr(dataset, store=None, mode='w-', synchronizer=None, group=None, +def to_zarr(dataset, store=None, mode=None, synchronizer=None, group=None, encoding=None, compute=True, consolidated=False, append_dim=None): """This function creates an appropriate datastore for writing a dataset to a zarr ztore @@ -1119,12 +1119,20 @@ def to_zarr(dataset, store=None, mode='w-', synchronizer=None, group=None, _validate_attrs(dataset) if (mode == 'a') or (append_dim is not None): - mode = 'a' + if mode is None: + mode = 'a' + elif mode != 'a': + raise ValueError( + "append_dim was set along with mode='{}', either set " + "mode='a' or don't set it.".format(mode) + ) _validate_datatypes_for_zarr_append(dataset) _validate_append_dim_and_encoding(dataset, store, append_dim, group=group, consolidated=consolidated, encoding=encoding) + elif mode is None: + mode = 'w-' zstore = backends.ZarrStore.open_group(store=store, mode=mode, synchronizer=synchronizer, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 512ec3a63c9..d35e7f1e41a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1389,7 +1389,7 @@ def to_netcdf( def to_zarr( self, store: Union[MutableMapping, str, Path] = None, - mode: str = 'w-', + mode: str = None, synchronizer=None, group: str = None, encoding: Mapping = None, @@ -1436,7 +1436,7 @@ def to_zarr( """ if encoding is None: encoding = {} - if mode not in ['w', 'w-', 'a']: + if mode not in ['w', 'w-', 'a', None]: # TODO: figure out how to handle 'r+' raise ValueError("The only supported options for mode are 'w'," "'w-' and 'a'.") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4ab9d6b263a..572c18272df 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1718,6 +1718,17 @@ def test_append_with_append_dim_not_set_raises(self): ds.to_zarr(store_target, mode='w') ds_to_append.to_zarr(store_target, mode='a') + def test_append_with_mode_not_a_raises(self): + + ds, ds_to_append, _ = create_append_test_data() + + # check failure when append_dim is set and mode != 'a' + with pytest.raises(ValueError): + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode='w') + ds_to_append.to_zarr(store_target, mode='w', + append_dim='time') + def test_append_with_existing_encoding_raises(self): ds, ds_to_append, _ = create_append_test_data() From 69c3aa0307fcc5fd71ab2720874e114b4a6c14e4 Mon Sep 17 00:00:00 2001 From: David Brochart Date: Tue, 16 Jul 2019 09:38:50 +0200 Subject: [PATCH 4/6] Fix PEP8 issue --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 328173a3ffd..35cd494b481 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1125,7 +1125,7 @@ def to_zarr(dataset, store=None, mode=None, synchronizer=None, group=None, raise ValueError( "append_dim was set along with mode='{}', either set " "mode='a' or don't set it.".format(mode) - ) + ) _validate_datatypes_for_zarr_append(dataset) _validate_append_dim_and_encoding(dataset, store, append_dim, group=group, From 597e37a3f31dc0c70166b410a405c7d0dc28a927 Mon Sep 17 00:00:00 2001 From: David Brochart Date: Tue, 16 Jul 2019 10:04:11 +0200 Subject: [PATCH 5/6] Moved mode handling from api.py to dataset.py --- xarray/backends/api.py | 11 +---------- xarray/core/dataset.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 35cd494b481..21d91a886af 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1118,21 +1118,12 @@ def to_zarr(dataset, store=None, mode=None, synchronizer=None, group=None, _validate_dataset_names(dataset) _validate_attrs(dataset) - if (mode == 'a') or (append_dim is not None): - if mode is None: - mode = 'a' - elif mode != 'a': - raise ValueError( - "append_dim was set along with mode='{}', either set " - "mode='a' or don't set it.".format(mode) - ) + if mode == 'a': _validate_datatypes_for_zarr_append(dataset) _validate_append_dim_and_encoding(dataset, store, append_dim, group=group, consolidated=consolidated, encoding=encoding) - elif mode is None: - mode = 'w-' zstore = backends.ZarrStore.open_group(store=store, mode=mode, synchronizer=synchronizer, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d35e7f1e41a..64780f2a7d9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1407,12 +1407,13 @@ def to_zarr( ---------- store : MutableMapping, str or Path, optional Store or path to directory in file system. - mode : {'w', 'w-', 'a'} + mode : {'w', 'w-', 'a', None} Persistence mode: 'w' means create (overwrite if exists); 'w-' means create (fail if exists); 'a' means append (create if does not exist). - When ``append_dim`` is set, ``mode`` can be omitted as it is - internally set to ``'a'``. + If ``append_dim`` is set, ``mode`` can be omitted as it is + internally set to ``'a'``. Otherwise, ``mode`` will default to + `w-` if not set. synchronizer : object, optional Array synchronizer group : str, optional @@ -1436,7 +1437,17 @@ def to_zarr( """ if encoding is None: encoding = {} - if mode not in ['w', 'w-', 'a', None]: + if (mode == 'a') or (append_dim is not None): + if mode is None: + mode = 'a' + elif mode != 'a': + raise ValueError( + "append_dim was set along with mode='{}', either set " + "mode='a' or don't set it.".format(mode) + ) + elif mode is None: + mode = 'w-' + if mode not in ['w', 'w-', 'a']: # TODO: figure out how to handle 'r+' raise ValueError("The only supported options for mode are 'w'," "'w-' and 'a'.") From 9c664e09f538c98f16010bc277e5d868973e8c53 Mon Sep 17 00:00:00 2001 From: David Brochart Date: Tue, 16 Jul 2019 10:12:47 +0200 Subject: [PATCH 6/6] Fix closing bracket indentation --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 64780f2a7d9..7cdbdce1ec7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1444,7 +1444,7 @@ def to_zarr( raise ValueError( "append_dim was set along with mode='{}', either set " "mode='a' or don't set it.".format(mode) - ) + ) elif mode is None: mode = 'w-' if mode not in ['w', 'w-', 'a']: