Skip to content

Commit

Permalink
Implement dimension_separator for Python storage classes (See #715) (
Browse files Browse the repository at this point in the history
…#716)

* Implement `dimension_separator` for Python storage classes (See #715)

* All top-level storage classes now take an optional `dimension_separator`
  parameter which defaults to `None`, but can also be `.` or `/`.

* A ValueError is raised at normalization time if this is not the case.

* `None`s are normalized to the default of `.` in all except the
  NestedDirectoryStore case.

* The value is stored as `self._dimension_separator` on participating classes
  so that array creation can lookup the value.

* This value deprecates the `key_separator` value from FSStore.

* Wrapper classes like LRUCacheStore and ConsolidatedMetadataStore *do not*
  follow this pattern and instead rely on the value in the underlying store.

* Only store `dimension_separator` if not None

All hexdigest tests were failing due to updated array metadata.
In the case of NestedDirectoryStore and N5Store, this is necessary.
If the dimension_separator key is excluded from the .zarray JSON
when None, then most standard tests continue to pass.

* Fix doctests with optional key

* Add separator to missed LDBMStore

* Fix linting issue

* De-deprecate key_separator as public, non-null API

* Add test for normalize_dim_sep to appease codecov

* More tests for codecov

* Remove key from n5 array metadata

* Fix minor typo

* Cleanup DIGESTS in test_core.py

* Fix cut-n-paste error in test_utils.py

* And hopefully on last codecov fix

* Apply review changes

* Add 2.8.0 release notes
  • Loading branch information
joshmoore authored Apr 24, 2021
1 parent 4d4d833 commit 2d0acfb
Show file tree
Hide file tree
Showing 11 changed files with 341 additions and 92 deletions.
14 changes: 14 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
Release notes
=============

.. _release_2.8.0:

2.8.0
-----

V2 Specification Update
~~~~~~~~~~~~~~~~~~~~~~~

* Introduce optional dimension_separator .zarray key for nested chunks.
By :user:`Josh Moore <joshmoore>`; :issue:`715`, :issue:`716`.

.. _release_2.7.0:


.. _release_2.7.1:

2.7.1
Expand Down
2 changes: 2 additions & 0 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class Array:
dtype
compression
compression_opts
dimension_separator
fill_value
order
synchronizer
Expand Down Expand Up @@ -194,6 +195,7 @@ def _load_metadata_nosync(self):
self._dtype = meta['dtype']
self._fill_value = meta['fill_value']
self._order = meta['order']
self._dimension_separator = meta.get('dimension_separator', '.')

# setup compressor
config = meta['compressor']
Expand Down
14 changes: 12 additions & 2 deletions zarr/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
from zarr.storage import (DirectoryStore, ZipStore, contains_array,
contains_group, default_compressor, init_array,
normalize_storage_path, FSStore)
from zarr.util import normalize_dimension_separator


def create(shape, chunks=True, dtype=None, compressor='default',
fill_value=0, order='C', store=None, synchronizer=None,
overwrite=False, path=None, chunk_store=None, filters=None,
cache_metadata=True, cache_attrs=True, read_only=False,
object_codec=None, **kwargs):
object_codec=None, dimension_separator=None, **kwargs):
"""Create an array.
Parameters
Expand Down Expand Up @@ -66,6 +67,9 @@ def create(shape, chunks=True, dtype=None, compressor='default',
True if array should be protected against modification.
object_codec : Codec, optional
A codec to encode object arrays, only needed if dtype=object.
dimension_separator : {'.', '/'}, optional
Separator placed between the dimensions of a chunk.
.. versionadded:: 2.8
Returns
-------
Expand Down Expand Up @@ -117,10 +121,16 @@ def create(shape, chunks=True, dtype=None, compressor='default',
# API compatibility with h5py
compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)

# optional array metadata
if dimension_separator is None:
dimension_separator = getattr(store, "_dimension_separator", None)
dimension_separator = normalize_dimension_separator(dimension_separator)

# initialize array metadata
init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,
fill_value=fill_value, order=order, overwrite=overwrite, path=path,
chunk_store=chunk_store, filters=filters, object_codec=object_codec)
chunk_store=chunk_store, filters=filters, object_codec=object_codec,
dimension_separator=dimension_separator)

# instantiate array
z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer,
Expand Down
2 changes: 2 additions & 0 deletions zarr/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,8 @@ def create_dataset(self, name, **kwargs):
lifetime of the object. If False, array metadata will be reloaded
prior to all data access and modification operations (may incur
overhead depending on storage and data access pattern).
dimension_separator : {'.', '/'}, optional
Separator placed between the dimensions of a chunk.
Returns
-------
Expand Down
9 changes: 9 additions & 0 deletions zarr/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
fill_value=fill_value,
order=meta['order'],
filters=meta['filters'],
dimension_separator=meta.get('dimension_separator', '.'),
)

except Exception as e:
raise MetadataError('error decoding metadata: %s' % e)
else:
Expand All @@ -62,6 +64,9 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes:
sdshape = ()
if dtype.subdtype is not None:
dtype, sdshape = dtype.subdtype

dimension_separator = meta.get('dimension_separator')

meta = dict(
zarr_format=ZARR_FORMAT,
shape=meta['shape'] + sdshape,
Expand All @@ -72,6 +77,10 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes:
order=meta['order'],
filters=meta['filters'],
)

if dimension_separator:
meta['dimension_separator'] = dimension_separator

return json_dumps(meta)


Expand Down
3 changes: 3 additions & 0 deletions zarr/n5.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ def array_metadata_to_n5(array_metadata):
compressor_config = compressor_config_to_n5(compressor_config)
array_metadata['compression'] = compressor_config

if 'dimension_separator' in array_metadata:
del array_metadata['dimension_separator']

return array_metadata


Expand Down
Loading

0 comments on commit 2d0acfb

Please sign in to comment.