From e4c4ebf5c00f49dea3b8fdad0062c273d278536a Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 16 May 2024 13:46:15 -0700 Subject: [PATCH 01/11] feature(api): add top level synchronous and asynchronous api --- src/zarr/api/asynchronous.py | 532 +++++++++++++++++++++++++++++++++++ src/zarr/api/synchronous.py | 359 +++++++++++++++++++++++ src/zarr/common.py | 5 +- src/zarr/store/core.py | 7 +- 4 files changed, 899 insertions(+), 4 deletions(-) create mode 100644 src/zarr/api/asynchronous.py create mode 100644 src/zarr/api/synchronous.py diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py new file mode 100644 index 0000000000..122d8528ac --- /dev/null +++ b/src/zarr/api/asynchronous.py @@ -0,0 +1,532 @@ +from __future__ import annotations + +import asyncio +import warnings +from typing import Union, Any, Literal, Iterable + +import numpy as np +import numpy.typing as npt + +from zarr.abc.codec import Codec +from zarr.array import AsyncArray, Array +from zarr.common import ZarrFormat, MEMORY_ORDER, JSON, ChunkCoords +from zarr.group import AsyncGroup +from zarr.metadata import ChunkKeyEncoding +from zarr.store import ( + StoreLike, + make_store_path, +) + +ShapeLike = Union[int, tuple[int, ...]] +ArrayLike = Union[AsyncArray, Array, npt.NDArray[Any]] + + +def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ShapeLike | None, ChunkCoords | None]: + shape = None + chunks = None + + if hasattr(a, "shape") and isinstance(a.shape, tuple): + shape = a.shape + + if hasattr(a, "chunks") and isinstance(a.chunks, tuple) and (len(a.chunks) == len(a.shape)): + chunks = a.chunks + + elif hasattr(a, "chunklen"): + # bcolz carray + chunks = (a.chunklen,) + a.shape[1:] + + return shape, chunks + + +def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> None: + shape, chunks = _get_shape_chunks(a) + if shape is not None: + kwargs.setdefault("shape", shape) + if chunks is not None: + kwargs.setdefault("chunks", chunks) + + if hasattr(a, "dtype"): + kwargs.setdefault("dtype", a.dtype) + + if isinstance(a, AsyncArray): + kwargs.setdefault("order", a.order) + if a.metadata.zarr_format == 2: + # TODO: make this v2/v3 aware + kwargs.setdefault("compressor", a.metadata.compressor) + kwargs.setdefault("filters", a.metadata.filters) + + elif a.metadata.zarr_format == 3: + kwargs.setdefault("codecs", a.codecs) + else: + raise ValueError(f"Unsupported zarr format: {a.metadata.zarr_format}") + else: + # TODO: set default values compressor/codecs + # to do this, we may need to evaluate if this is a v2 or v3 array + # kwargs.setdefault("compressor", "default") + pass + + +async def consolidate_metadata(*args: Any, **kwargs: Any) -> AsyncGroup: + raise NotImplementedError + + +async def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def load( + store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None +) -> Union[AsyncArray, AsyncGroup]: + """Load data from an array or group into memory. + + Parameters + ---------- + store : MutableMapping or string + Store or path to directory in file system or name of zip file. + path : str or None, optional + The path within the store from which to load. + + Returns + ------- + out + If the path contains an array, out will be a numpy array. If the path contains + a group, out will be a dict-like object where keys are array names and values + are numpy arrays. + + See Also + -------- + save, savez + + Notes + ----- + If loading data from a group of arrays, data will not be immediately loaded into + memory. Rather, arrays will be loaded into memory as they are requested. + """ + if zarr_version is not None: + warnings.warn( + "zarr_version is deprecated and no longer required in load", DeprecationWarning + ) + obj = await open(store, path=path) + if isinstance(obj, AsyncArray): + return await obj.getitem(slice(None)) + else: + raise NotImplementedError("loading groups not yet supported") + + +async def open( + store: StoreLike | None = None, + mode: str = "a", + *, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to open_array +) -> Union[AsyncArray, AsyncGroup]: + """Convenience function to open a group or array using file-mode-like semantics. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system or name of zip file. + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store to open. + **kwargs + Additional parameters are passed through to :func:`zarr.creation.open_array` or + :func:`zarr.hierarchy.open_group`. + + Returns + ------- + z : AsyncArray or AsyncGroup + Array or group, depending on what exists in the given store. + """ + if zarr_version is not None: + warnings.warn("zarr_version is deprecated, use zarr_format", DeprecationWarning) + zarr_format = zarr_version + + store_path = make_store_path(store) + + if path is not None: + store_path = store_path / path + + warnings.warn("TODO: mode is ignored", RuntimeWarning) + + try: + return await AsyncArray.open(store_path, zarr_format=zarr_format, **kwargs) + except KeyError: + return await AsyncGroup.open(store_path, zarr_format=zarr_format, **kwargs) + + +async def open_consolidated(*args: Any, **kwargs: Any) -> AsyncGroup: + raise NotImplementedError + + +async def save( + store: StoreLike, + *args: npt.ArrayLike, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to save +) -> None: + """Convenience function to save an array or group of arrays to the local file system. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the group where the arrays will be saved. + kwargs + NumPy arrays with data to save. + """ + if zarr_version is not None: + warnings.warn("zarr_version is deprecated, use zarr_format", DeprecationWarning) + zarr_format = zarr_version + if len(args) == 0 and len(kwargs) == 0: + raise ValueError("at least one array must be provided") + if len(args) == 1 and len(kwargs) == 0: + await save_array(store, args[0], zarr_format=zarr_format, path=path) + else: + await save_group(store, *args, zarr_format=zarr_format, path=path, **kwargs) + + +async def save_array( + store: StoreLike, + arr: npt.ArrayLike, + *, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to create +) -> None: + """Convenience function to save a NumPy array to the local file system, following a + similar API to the NumPy save() function. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + arr : ndarray + NumPy array with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store where the array will be saved. + kwargs + Passed through to :func:`create`, e.g., compressor. + """ + if zarr_version is not None: + warnings.warn("zarr_version is deprecated, use zarr_format", DeprecationWarning) + zarr_format = zarr_version + + if zarr_format is None: + zarr_format = 3 # TODO: perhaps this default should be set via config? + + store_path = make_store_path(store) + if path is not None: + store_path = store_path / path + new = await AsyncArray.create(store_path, zarr_format=zarr_format, **kwargs) + await new.setitem(slice(None), arr) + + +async def save_group( + store: StoreLike, + *args: npt.ArrayLike, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: npt.ArrayLike, +) -> None: + """Convenience function to save several NumPy arrays to the local file system, following a + similar API to the NumPy savez()/savez_compressed() functions. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + Path within the store where the group will be saved. + kwargs + NumPy arrays with data to save. + """ + if zarr_version is not None: + warnings.warn("zarr_version is deprecated, use zarr_format", DeprecationWarning) + zarr_format = zarr_version + + if len(args) == 0 and len(kwargs) == 0: + raise ValueError("at least one array must be provided") + aws = [] + for i, arr in enumerate(args): + aws.append(save_array(store, arr, zarr_format=zarr_format, path=f"{path}/arr_{i}")) + for k, arr in kwargs.items(): + aws.append(save_array(store, arr, zarr_format=zarr_format, path=f"{path}/{k}")) + await asyncio.gather(*aws) + + +# async def tree(*args: Any, **kwargs: Any) -> "TreeViewer": +# raise NotImplementedError + + +async def array(data: npt.ArrayLike, **kwargs: Any) -> AsyncArray: + """Create an array filled with `data`. + + The `data` argument should be a array-like object. For + other parameter definitions see :func:`zarr.api.asynchronous.create`. + """ + + # ensure data is array-like + if not hasattr(data, "shape") or not hasattr(data, "dtype"): + data = np.asanyarray(data) + + # setup dtype + kw_dtype = kwargs.get("dtype") + if kw_dtype is None: + kwargs["dtype"] = data.dtype + else: + kwargs["dtype"] = kw_dtype + + # setup shape and chunks + data_shape, data_chunks = _get_shape_chunks(data) + kwargs["shape"] = data_shape + kw_chunks = kwargs.get("chunks") + if kw_chunks is None: + kwargs["chunks"] = data_chunks + else: + kwargs["chunks"] = kw_chunks + + # pop read-only to apply after storing the data + # read_only = kwargs.pop("read_only", False) + + # instantiate array + z = await create(**kwargs) + + # fill with data + await z.setitem(slice(None), data) + + # set read_only property afterwards + # z.read_only = read_only + + return z + + +# TODO: require kwargs +async def create( + shape: ShapeLike, + chunks: Union[int, tuple[int, ...], bool] = True, + dtype: npt.DTypeLike | None = None, + compressor: str = "default", + fill_value: int | None = 0, + order: MEMORY_ORDER = "C", + store: StoreLike | None = None, + # synchronizer: Synchronizer | None = None, + # overwrite: bool = False, + path: str | None = None, + # chunk_store: StoreLike | None = None, + # filters: Sequence[Codec] | None = None, + # cache_metadata: bool = True, + # cache_attrs: bool = True, + # read_only: bool = False, + # object_codec: Codec | None = None, + # dimension_separator: DIMENSION_SEPARATOR | None = None, + # write_empty_chunks: bool = True, + *, + zarr_version: ZarrFormat | None = None, + # meta_array: MetaArray | None = None, + # storage_transformers: Sequence[StorageTransformer] = (), + **kwargs: Any, # TODO: type kwargs as valid args to AsyncArray.Create +) -> AsyncArray: + store_path = make_store_path(store) + if path is not None: + store_path = store_path / path + + raise NotImplementedError + # TODO: finish when Norman's PR goes in + # return await AsyncArray.create(store_path, chunks=chunks, dtype=dtype, zarr_version=zarr_version, **kwargs) + + +async def empty(shape: ShapeLike, **kwargs: Any) -> AsyncArray: + """Create an empty array. + + For parameter definitions see :func:`zarr.api.asynchronous.create`. + + Notes + ----- + The contents of an empty Zarr array are not defined. On attempting to + retrieve data from an empty Zarr array, any values may be returned, + and these are not guaranteed to be stable from one access to the next. + """ + return await create(shape=shape, fill_value=None, **kwargs) + + +async def empty_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: + """Create an empty array like `a`.""" + _like_args(a, kwargs) + return await empty(**kwargs) + + +# TODO: add type annotations for fill_value and kwargs +async def full(shape: ShapeLike, fill_value: Any, **kwargs: Any) -> AsyncArray: + """Create an array, with `fill_value` being used as the default value for + uninitialized portions of the array. + + For parameter definitions see :func:`zarr.api.asynchronous.create`. + """ + return await create(shape=shape, fill_value=fill_value, **kwargs) + + +# TODO: add type annotations for kwargs +async def full_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: + """Create a filled array like `a`.""" + _like_args(a, kwargs) + if isinstance(a, AsyncArray): + kwargs.setdefault("fill_value", a.metadata.fill_value) + return await full(**kwargs) + + +async def ones(shape: ShapeLike, **kwargs: Any) -> AsyncArray: + """Create an array, with one being used as the default value for + uninitialized portions of the array. + + For parameter definitions see :func:`zarr.creation.create`. + + Returns + ------- + Array + The new array. + """ + return await create(shape=shape, fill_value=1, **kwargs) + + +async def ones_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: + """Create an array of ones like `a`.""" + _like_args(a, kwargs) + return await ones(**kwargs) + + +async def open_array( + store: StoreLike | None = None, + mode: str = "a", + shape: ShapeLike | None = None, + chunks: Union[int, tuple[int, ...], bool] = True, # v2 only + dtype: npt.DTypeLike | None = None, + compressor: dict[str, JSON] | None = None, # v2 only + fill_value: Any | None = 0, # note: default is 0 here and None on Array.create + order: Literal["C", "F"] | None = "C", # deprecate in favor of runtime config? + synchronizer: Any = None, # deprecate and catch + filters: list[dict[str, JSON]] | None = None, # v2 only + cache_metadata: bool = True, # not implemented + cache_attrs: bool = True, # not implemented + path: str | None = None, + object_codec: Any = None, # not implemented + chunk_store: StoreLike | None = None, # not implemented + storage_options: dict[str, Any] | None = None, # not implemented + partial_decompress: bool = False, # not implemented + write_empty_chunks: bool = True, # not implemented + *, + zarr_version: ZarrFormat | None = None, # deprecate in favor of zarr_format + zarr_format: ZarrFormat | None = None, + dimension_separator: Literal[".", "/"] | None = None, # v2 only + meta_array: Any | None = None, # not implemented + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to save +) -> AsyncArray: + """Open an array using file-mode-like semantics. + + Parameters + ---------- + TODO + + Returns + ------- + AsyncArray + The opened array. + """ + + store_path = make_store_path(store) + if path is not None: + store_path = store_path / path + + try: + return await AsyncArray.open(store_path) + except KeyError: + pass + + warnings.warn("mode is ignored", RuntimeWarning) + + if zarr_version is not None: + zarr_format = zarr_version + warnings.warn("zarr_format is deprecated, use zarr_format instead", DeprecationWarning) + if zarr_format is None: + zarr_format = 3 # TODO: perhaps this default should be set via config? + + # TODO: finish when Norman's PR goes in + return await AsyncArray.create(store_path, zarr_format=zarr_format, **kwargs) + + +async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AsyncArray: + """Open a persistent array like `a`. + + Parameters + ---------- + a : Array + The shape and data-type of a define these same attributes of the returned array. + path : str + The path to the new array. + **kwargs + Any keyword arguments to pass to the array constructor. + + Returns + ------- + AsyncArray + The opened array. + """ + _like_args(a, kwargs) + if isinstance(a, (AsyncArray, Array)): + kwargs.setdefault("fill_value", a.metadata.fill_value) + return await open_array(path, **kwargs) + + +async def zeros(shape: ShapeLike, **kwargs: Any) -> AsyncArray: + """Create an array, with zero being used as the default value for + uninitialized portions of the array. + """ + return await create(shape=shape, fill_value=0, **kwargs) + + +async def zeros_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: + """Create an array of zeros like `a`.""" + _like_args(a, kwargs) + return await zeros(**kwargs) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py new file mode 100644 index 0000000000..a48e276a85 --- /dev/null +++ b/src/zarr/api/synchronous.py @@ -0,0 +1,359 @@ +from __future__ import annotations + +from typing import Union, Any +import numpy.typing as npt + +from zarr.store import StoreLike +from zarr.array import Array +from zarr.group import Group +import zarr.api.asynchronous as async_api +from zarr.sync import sync +from zarr.common import ZarrFormat + + +def consolidate_metadata(*args: Any, **kwargs: Any) -> Group: + # TODO + return Group(sync(async_api.consolidate_metadata(*args, **kwargs))) + + +def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + # TODO + return sync(async_api.copy(*args, **kwargs)) + + +def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + # TODO + return sync(async_api.copy_all(*args, **kwargs)) + + +def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + # TODO + return sync(async_api.copy_store(*args, **kwargs)) + + +def load( + store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None +) -> Union[npt.ArrayLike, dict[str, npt.ArrayLike]]: + """ + Load data from an array or group into memory. + + Parameters + ---------- + store : MutableMapping or string + Store or path to directory in file system or name of zip file. + path : str or None, optional + The path within the store from which to load. + + Returns + ------- + out + If the path contains an array, out will be a numpy array. If the path contains + a group, out will be a dict-like object where keys are array names and values + are numpy arrays. + + See Also + -------- + save, savez + + Notes + ----- + If loading data from a group of arrays, data will not be immediately loaded into + memory. Rather, arrays will be loaded into memory as they are requested. + """ + return sync(async_api.load(store=store, zarr_version=zarr_version, path=path)) + + +def open( + store: StoreLike | None = None, + mode: str = "a", + *, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.open +) -> Union[Array, Group]: + """Convenience function to open a group or array using file-mode-like semantics. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system or name of zip file. + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store to open. + **kwargs + Additional parameters are passed through to :func:`zarr.creation.open_array` or + :func:`zarr.hierarchy.open_group`. + + Returns + ------- + z : AsyncArray or AsyncGroup + Array or group, depending on what exists in the given store. + """ + obj = sync( + async_api.open( + store=store, + mode=mode, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + **kwargs, + ) + ) + if isinstance(obj, async_api.AsyncArray): + return Array(obj) + else: + return Group(obj) + + +def open_consolidated(*args: Any, **kwargs: Any) -> Group: + return Group(sync(async_api.open_consolidated(*args, **kwargs))) + + +def save( + store: StoreLike, + *args: npt.ArrayLike, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.save +) -> None: + """Convenience function to save an array or group of arrays to the local file system. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the group where the arrays will be saved. + kwargs + NumPy arrays with data to save. + """ + return sync( + async_api.save( + store, *args, zarr_version=zarr_version, zarr_format=zarr_format, path=path, **kwargs + ) + ) + + +def save_array( + store: StoreLike, + arr: npt.ArrayLike, + *, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.save_array +) -> None: + """Convenience function to save a NumPy array to the local file system, following a + similar API to the NumPy save() function. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + arr : ndarray + NumPy array with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store where the array will be saved. + kwargs + Passed through to :func:`create`, e.g., compressor. + """ + return sync( + async_api.save_array( + store=store, + arr=arr, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + **kwargs, + ) + ) + + +def save_group( + store: StoreLike, + *args: npt.ArrayLike, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: npt.ArrayLike, +) -> None: + """Convenience function to save several NumPy arrays to the local file system, following a + similar API to the NumPy savez()/savez_compressed() functions. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + Path within the store where the group will be saved. + kwargs + NumPy arrays with data to save. + """ + return sync( + async_api.save_group( + store=store, + *args, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + **kwargs, + ) + ) + + +# TODO: implement or deprecate +# def tree(*args: Any, **kwargs: Any) -> "TreeViewer": +# return sync(async_api.tree(*args, **kwargs)) + + +# TODO: add type annotations for kwargs +def array(data: npt.ArrayLike, **kwargs: Any) -> Array: + """Create an array filled with `data`. + + The `data` argument should be a array-like object. For + other parameter definitions see :func:`zarr.api.synchronous.create`. + """ + return Array(sync(async_api.array(data=data, **kwargs))) + + +# TODO: add type annotations for kwargs +def create(*args: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.create(*args, **kwargs))) + + +# TODO: move shapelike to common module +# TODO: add type annotations for kwargs +def empty(shape: async_api.ShapeLike, **kwargs: Any) -> Array: + """Create an empty array. + + For parameter definitions see :func:`zarr.api.asynchronous.create`. + + Notes + ----- + The contents of an empty Zarr array are not defined. On attempting to + retrieve data from an empty Zarr array, any values may be returned, + and these are not guaranteed to be stable from one access to the next. + """ + return Array(sync(async_api.empty(shape, **kwargs))) + + +# TODO: move ArrayLike to common module +# TODO: add type annotations for kwargs +def empty_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + """Create an empty array like `a`.""" + return Array(sync(async_api.empty_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs and fill_value +def full(shape: async_api.ShapeLike, fill_value: Any, **kwargs: Any) -> Array: + """Create an array, with `fill_value` being used as the default value for + uninitialized portions of the array. + + For parameter definitions see :func:`zarr.api.asynchronous.create`. + """ + return Array(sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs))) + + +# TODO: move ArrayLike to common module +# TODO: add type annotations for kwargs +async def full_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + """Create a filled array like `a`.""" + return Array(sync(async_api.full_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs +# TODO: move ShapeLike to common module +def ones(shape: async_api.ShapeLike, **kwargs: Any) -> Array: + """Create an array, with one being used as the default value for + uninitialized portions of the array. + + For parameter definitions see :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + return Array(sync(async_api.ones(shape, **kwargs))) + + +# TODO: add type annotations for kwargs +def ones_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + """Create an array of ones like `a`.""" + return Array(sync(async_api.ones_like(a, **kwargs))) + + +# TODO: update this once async_api.open_array is fully implemented +def open_array(*args: Any, **kwargs: Any) -> Array: + """Open an array using file-mode-like semantics. + + Parameters + ---------- + TODO + + Returns + ------- + AsyncArray + The opened array. + """ + return Array(sync(async_api.open_array(*args, **kwargs))) + + +# TODO: add type annotations for kwargs +def open_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + """Open a persistent array like `a`. + + Parameters + ---------- + a : Array + The shape and data-type of a define these same attributes of the returned array. + path : str + The path to the new array. + **kwargs + Any keyword arguments to pass to the array constructor. + + Returns + ------- + Array + The opened array. + """ + return Array(sync(async_api.open_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs +def zeros(*args: Any, **kwargs: Any) -> Array: + """ + Create an array, with zero being used as the default value for + uninitialized portions of the array. + + For parameter definitions see :func:`zarr.creation.create`. + + Returns: + Array + The new array. + """ + return Array(sync(async_api.zeros(*args, **kwargs))) + + +# TODO: add type annotations for kwargs +def zeros_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + """Create an array of zeros like `a`.""" + return Array(sync(async_api.zeros_like(a, **kwargs))) diff --git a/src/zarr/common.py b/src/zarr/common.py index 95cb8f4a3e..680e9c2f16 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -32,6 +32,7 @@ Selection = slice | SliceSelection ZarrFormat = Literal[2, 3] JSON = Union[str, None, int, float, Enum, dict[str, "JSON"], list["JSON"], tuple["JSON", ...]] +MEMORY_ORDER = Literal["C", "F"] def product(tup: ChunkCoords) -> int: @@ -88,10 +89,10 @@ class ArraySpec: shape: ChunkCoords dtype: np.dtype[Any] fill_value: Any - order: Literal["C", "F"] + order: MEMORY_ORDER def __init__( - self, shape: ChunkCoords, dtype: np.dtype[Any], fill_value: Any, order: Literal["C", "F"] + self, shape: ChunkCoords, dtype: np.dtype[Any], fill_value: Any, order: MEMORY_ORDER ) -> None: shape_parsed = parse_shapelike(shape) dtype_parsed = parse_dtype(dtype) diff --git a/src/zarr/store/core.py b/src/zarr/store/core.py index cc017ec982..5861601b94 100644 --- a/src/zarr/store/core.py +++ b/src/zarr/store/core.py @@ -6,6 +6,7 @@ from zarr.common import BytesLike from zarr.abc.store import Store from zarr.store.local import LocalStore +from zarr.store.memory import MemoryStore def _dereference_path(root: str, path: str) -> str: @@ -62,8 +63,10 @@ def __eq__(self, other: Any) -> bool: StoreLike = Union[Store, StorePath, Path, str] -def make_store_path(store_like: StoreLike) -> StorePath: - if isinstance(store_like, StorePath): +def make_store_path(store_like: StoreLike | None) -> StorePath: + if store_like is None: + return StorePath(MemoryStore()) + elif isinstance(store_like, StorePath): return store_like elif isinstance(store_like, Store): return StorePath(store_like) From 736b54af94bc6ad7063f8f4fa93369bb7311c2b1 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 16 May 2024 14:18:28 -0700 Subject: [PATCH 02/11] add group/open_group --- src/zarr/api/asynchronous.py | 138 ++++++++++++++++++++++++++++++++++- src/zarr/api/synchronous.py | 122 +++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 3 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 122d8528ac..c818ced9e5 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -89,7 +89,7 @@ async def load( Parameters ---------- - store : MutableMapping or string + store : Store or string Store or path to directory in file system or name of zip file. path : str or None, optional The path within the store from which to load. @@ -332,6 +332,138 @@ async def array(data: npt.ArrayLike, **kwargs: Any) -> AsyncArray: return z +async def group( + store: StoreLike | None = None, + overwrite: bool = False, + chunk_store: StoreLike | None = None, # not used + cache_attrs: bool = True, # not used + synchronizer: Any | None = None, # not used + path: str | None = None, + *, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used +) -> AsyncGroup: + """Create a group. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system. + overwrite : bool, optional + If True, delete any pre-existing data in `store` at `path` before + creating the group. + chunk_store : Store, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : string, optional + Group path within store. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + + Returns + ------- + g : AsyncGroup + """ + + if zarr_version is not None: + zarr_format = zarr_version + warnings.warn("zarr_format is deprecated, use zarr_format instead", DeprecationWarning) + + if zarr_format is None: + zarr_format = 3 # TODO: perhaps this default should be set via config? + + store_path = make_store_path(store) + if path is not None: + store_path = store_path / path + + # requires_init = None + # if zarr_version == 2: + # requires_init = overwrite or not contains_group(store) + # elif zarr_version == 3: + # requires_init = overwrite or not contains_group(store, path) + + # if requires_init: + # init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) + + try: + return await AsyncGroup.open(store_path, zarr_format=zarr_format) + except KeyError: + # TODO: pass attributes here + attributes: dict[str, Any] = {} + return await AsyncGroup.create( + store_path, zarr_format=zarr_format, exists_ok=overwrite, attributes=attributes + ) + + +async def open_group( + store: StoreLike | None = None, + mode: str = "a", # not used + cache_attrs: bool = True, # not used + synchronizer: Any = None, # not used + path: str | None = None, + chunk_store: StoreLike | None = None, # not used + storage_options: dict[str, Any] | None = None, # not used + *, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used +) -> AsyncGroup: + """Open a group using file-mode-like semantics. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system or name of zip file. + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : string, optional + Group path within store. + chunk_store : Store or string, optional + Store or path to directory in file system or name of zip file. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + Returns + ------- + g : AsyncGroup + """ + + if zarr_version is not None: + zarr_format = zarr_version + warnings.warn("zarr_format is deprecated, use zarr_format instead", DeprecationWarning) + if zarr_format is None: + zarr_format = 3 # TODO: perhaps this default should be set via config? + + store_path = make_store_path(store) + if path is not None: + store_path = store_path / path + + return await AsyncGroup.open(store_path, zarr_format=zarr_format) + + # TODO: require kwargs async def create( shape: ShapeLike, @@ -341,8 +473,8 @@ async def create( fill_value: int | None = 0, order: MEMORY_ORDER = "C", store: StoreLike | None = None, - # synchronizer: Synchronizer | None = None, - # overwrite: bool = False, + synchronizer: Any | None = None, + overwrite: bool = False, path: str | None = None, # chunk_store: StoreLike | None = None, # filters: Sequence[Codec] | None = None, diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index a48e276a85..04e96b3e3f 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -234,6 +234,128 @@ def array(data: npt.ArrayLike, **kwargs: Any) -> Array: return Array(sync(async_api.array(data=data, **kwargs))) +async def group( + store: StoreLike | None = None, + overwrite: bool = False, + chunk_store: StoreLike | None = None, # not used in async_api + cache_attrs: bool = True, # not used in async_api + synchronizer: Any | None = None, # not used in async_api + path: str | None = None, + *, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used in async_api +) -> Group: + """Create a group. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system. + overwrite : bool, optional + If True, delete any pre-existing data in `store` at `path` before + creating the group. + chunk_store : Store, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : string, optional + Group path within store. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + + Returns + ------- + g : Group + """ + return Group( + sync( + async_api.group( + store=store, + overwrite=overwrite, + chunk_store=chunk_store, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + zarr_version=zarr_version, + zarr_format=zarr_format, + meta_array=meta_array, + ) + ) + ) + + +def open_group( + store: StoreLike | None = None, + mode: str = "a", # not used in async api + cache_attrs: bool = True, # not used in async api + synchronizer: Any = None, # not used in async api + path: str | None = None, + chunk_store: StoreLike | None = None, # not used in async api + storage_options: dict[str, Any] | None = None, # not used in async api + *, + zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used in async api +) -> Group: + """Open a group using file-mode-like semantics. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system or name of zip file. + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : string, optional + Group path within store. + chunk_store : Store or string, optional + Store or path to directory in file system or name of zip file. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + Returns + ------- + g : AsyncGroup + """ + return Group( + sync( + async_api.open_group( + store=store, + mode=mode, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + chunk_store=chunk_store, + storage_options=storage_options, + zarr_version=zarr_version, + zarr_format=zarr_format, + meta_array=meta_array, + ) + ) + ) + + # TODO: add type annotations for kwargs def create(*args: Any, **kwargs: Any) -> Array: return Array(sync(async_api.create(*args, **kwargs))) From c8ba4cab384939c8d005e0710962f3d67a264925 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 16 May 2024 15:57:45 -0700 Subject: [PATCH 03/11] minor doc improvement --- src/zarr/api/synchronous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 04e96b3e3f..fe9bb59da1 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -39,7 +39,7 @@ def load( Parameters ---------- - store : MutableMapping or string + store : Store or string Store or path to directory in file system or name of zip file. path : str or None, optional The path within the store from which to load. From fec7adf78b4e33559c669e5db33ff50e3b4e39c1 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 17 May 2024 15:15:21 -0700 Subject: [PATCH 04/11] sync with v3 branch --- src/zarr/api/asynchronous.py | 387 +++++++++++++++++++++++++---------- src/zarr/api/synchronous.py | 37 ++-- src/zarr/metadata.py | 3 + 3 files changed, 297 insertions(+), 130 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index c818ced9e5..9f95bea3d1 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -2,23 +2,26 @@ import asyncio import warnings -from typing import Union, Any, Literal, Iterable +from collections.abc import Iterable +from typing import Any, Literal, Union import numpy as np import numpy.typing as npt from zarr.abc.codec import Codec -from zarr.array import AsyncArray, Array -from zarr.common import ZarrFormat, MEMORY_ORDER, JSON, ChunkCoords +from zarr.array import Array, AsyncArray +from zarr.common import JSON, MEMORY_ORDER, ChunkCoords, ZarrFormat from zarr.group import AsyncGroup -from zarr.metadata import ChunkKeyEncoding +from zarr.metadata import ArrayV2Metadata, ArrayV3Metadata, ChunkKeyEncoding from zarr.store import ( StoreLike, make_store_path, ) -ShapeLike = Union[int, tuple[int, ...]] -ArrayLike = Union[AsyncArray, Array, npt.NDArray[Any]] +ShapeLike = tuple[int, ...] # TODO: support int for shape +# TODO: this type could use some more thought, noqa to avoid "Variable "asynchronous.ArrayLike" is not valid as a type" +ArrayLike = Union[AsyncArray | Array | npt.NDArray[Any]] # noqa +PathLike = str def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ShapeLike | None, ChunkCoords | None]: @@ -50,13 +53,12 @@ def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> None: if isinstance(a, AsyncArray): kwargs.setdefault("order", a.order) - if a.metadata.zarr_format == 2: - # TODO: make this v2/v3 aware + if isinstance(a.metadata, ArrayV2Metadata): kwargs.setdefault("compressor", a.metadata.compressor) kwargs.setdefault("filters", a.metadata.filters) - elif a.metadata.zarr_format == 3: - kwargs.setdefault("codecs", a.codecs) + if isinstance(a.metadata, ArrayV3Metadata): + kwargs.setdefault("codecs", a.metadata.codecs) else: raise ValueError(f"Unsupported zarr format: {a.metadata.zarr_format}") else: @@ -84,7 +86,7 @@ async def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: async def load( store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None -) -> Union[AsyncArray, AsyncGroup]: +) -> AsyncArray | AsyncGroup: """Load data from an array or group into memory. Parameters @@ -112,9 +114,11 @@ async def load( """ if zarr_version is not None: warnings.warn( - "zarr_version is deprecated and no longer required in load", DeprecationWarning + "zarr_version is deprecated and no longer required in load", + DeprecationWarning, + stacklevel=2, ) - obj = await open(store, path=path) + obj = await open(store=store, path=path) if isinstance(obj, AsyncArray): return await obj.getitem(slice(None)) else: @@ -122,14 +126,14 @@ async def load( async def open( - store: StoreLike | None = None, - mode: str = "a", *, - zarr_version: ZarrFormat | None = None, + store: StoreLike | None = None, + mode: str | None = None, # type and value changed + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to open_array -) -> Union[AsyncArray, AsyncGroup]: +) -> AsyncArray | AsyncGroup: """Convenience function to open a group or array using file-mode-like semantics. Parameters @@ -155,16 +159,18 @@ async def open( Array or group, depending on what exists in the given store. """ if zarr_version is not None: - warnings.warn("zarr_version is deprecated, use zarr_format", DeprecationWarning) + warnings.warn( + "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 + ) zarr_format = zarr_version + if mode is not None: + warnings.warn("mode is ignored", RuntimeWarning, stacklevel=2) store_path = make_store_path(store) if path is not None: store_path = store_path / path - warnings.warn("TODO: mode is ignored", RuntimeWarning) - try: return await AsyncArray.open(store_path, zarr_format=zarr_format, **kwargs) except KeyError: @@ -178,7 +184,7 @@ async def open_consolidated(*args: Any, **kwargs: Any) -> AsyncGroup: async def save( store: StoreLike, *args: npt.ArrayLike, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to save @@ -199,7 +205,9 @@ async def save( NumPy arrays with data to save. """ if zarr_version is not None: - warnings.warn("zarr_version is deprecated, use zarr_format", DeprecationWarning) + warnings.warn( + "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 + ) zarr_format = zarr_version if len(args) == 0 and len(kwargs) == 0: raise ValueError("at least one array must be provided") @@ -213,7 +221,7 @@ async def save_array( store: StoreLike, arr: npt.ArrayLike, *, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to create @@ -235,11 +243,13 @@ async def save_array( Passed through to :func:`create`, e.g., compressor. """ if zarr_version is not None: - warnings.warn("zarr_version is deprecated, use zarr_format", DeprecationWarning) + warnings.warn( + "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 + ) zarr_format = zarr_version if zarr_format is None: - zarr_format = 3 # TODO: perhaps this default should be set via config? + zarr_format = 3 # default via config? store_path = make_store_path(store) if path is not None: @@ -251,7 +261,7 @@ async def save_array( async def save_group( store: StoreLike, *args: npt.ArrayLike, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: npt.ArrayLike, @@ -273,7 +283,9 @@ async def save_group( NumPy arrays with data to save. """ if zarr_version is not None: - warnings.warn("zarr_version is deprecated, use zarr_format", DeprecationWarning) + warnings.warn( + "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 + ) zarr_format = zarr_version if len(args) == 0 and len(kwargs) == 0: @@ -333,14 +345,14 @@ async def array(data: npt.ArrayLike, **kwargs: Any) -> AsyncArray: async def group( + *, # Note: this is a change from v2 store: StoreLike | None = None, overwrite: bool = False, chunk_store: StoreLike | None = None, # not used cache_attrs: bool = True, # not used synchronizer: Any | None = None, # not used path: str | None = None, - *, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used ) -> AsyncGroup: @@ -377,23 +389,25 @@ async def group( if zarr_version is not None: zarr_format = zarr_version - warnings.warn("zarr_format is deprecated, use zarr_format instead", DeprecationWarning) + warnings.warn( + "zarr_format is deprecated, use zarr_format instead", DeprecationWarning, stacklevel=2 + ) if zarr_format is None: - zarr_format = 3 # TODO: perhaps this default should be set via config? + zarr_format = 3 # default via config? store_path = make_store_path(store) if path is not None: store_path = store_path / path - # requires_init = None - # if zarr_version == 2: - # requires_init = overwrite or not contains_group(store) - # elif zarr_version == 3: - # requires_init = overwrite or not contains_group(store, path) - - # if requires_init: - # init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) try: return await AsyncGroup.open(store_path, zarr_format=zarr_format) @@ -406,15 +420,15 @@ async def group( async def open_group( + *, # Note: this is a change from v2 store: StoreLike | None = None, - mode: str = "a", # not used + mode: str | None = None, # not used cache_attrs: bool = True, # not used synchronizer: Any = None, # not used path: str | None = None, chunk_store: StoreLike | None = None, # not used storage_options: dict[str, Any] | None = None, # not used - *, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used ) -> AsyncGroup: @@ -453,9 +467,26 @@ async def open_group( if zarr_version is not None: zarr_format = zarr_version - warnings.warn("zarr_format is deprecated, use zarr_format instead", DeprecationWarning) + warnings.warn( + "zarr_format is deprecated, use zarr_format instead", DeprecationWarning, stacklevel=2 + ) if zarr_format is None: - zarr_format = 3 # TODO: perhaps this default should be set via config? + zarr_format = 3 # default from config? + + if mode is not None: + warnings.warn("mode is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + + if storage_options is not None: + warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2) store_path = make_store_path(store) if path is not None: @@ -467,36 +498,192 @@ async def open_group( # TODO: require kwargs async def create( shape: ShapeLike, - chunks: Union[int, tuple[int, ...], bool] = True, + *, # Note: this is a change from v2 + chunks: ShapeLike | None = None, # TODO: v2 allowed chunks=True dtype: npt.DTypeLike | None = None, - compressor: str = "default", - fill_value: int | None = 0, - order: MEMORY_ORDER = "C", - store: StoreLike | None = None, + compressor: dict[str, JSON] | None = None, # TODO: default and type change + fill_value: Any = 0, # TODO: need type + order: MEMORY_ORDER | None = None, # TODO: default change + store: str | StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, - path: str | None = None, - # chunk_store: StoreLike | None = None, - # filters: Sequence[Codec] | None = None, - # cache_metadata: bool = True, - # cache_attrs: bool = True, - # read_only: bool = False, - # object_codec: Codec | None = None, - # dimension_separator: DIMENSION_SEPARATOR | None = None, - # write_empty_chunks: bool = True, - *, - zarr_version: ZarrFormat | None = None, - # meta_array: MetaArray | None = None, - # storage_transformers: Sequence[StorageTransformer] = (), - **kwargs: Any, # TODO: type kwargs as valid args to AsyncArray.Create + path: PathLike | None = None, + chunk_store: StoreLike | None = None, + filters: list[dict[str, JSON]] | None = None, # TODO: type has changed + cache_metadata: bool | None = None, + cache_attrs: bool | None = None, + read_only: bool | None = None, + object_codec: Codec | None = None, # TODO: type has changed + dimension_separator: Literal[".", "/"] | None = None, + write_empty_chunks: bool = True, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # TODO: need type + storage_transformers: Any | None = (), # TODO: need type + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + **kwargs: Any, ) -> AsyncArray: + """Create an array. + + Parameters + ---------- + shape : int or tuple of ints + Array shape. + chunks : int or tuple of ints, optional + Chunk shape. If True, will be guessed from `shape` and `dtype`. If + False, will be set to `shape`, i.e., single chunk for the whole array. + If an int, the chunk size in each dimension will be given by the value + of `chunks`. Default is True. + dtype : string or dtype, optional + NumPy dtype. + compressor : Codec, optional + Primary compressor. + fill_value : object + Default value to use for uninitialized portions of the array. + order : {'C', 'F'}, optional + Memory layout to be used within each chunk. + store : Store or string + Store or path to directory in file system or name of zip file. + synchronizer : object, optional + Array synchronizer. + overwrite : bool, optional + If True, delete all pre-existing data in `store` at `path` before + creating the array. + path : string, optional + Path under which array is stored. + chunk_store : MutableMapping, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + filters : sequence of Codecs, optional + Sequence of filters to use to encode chunk data prior to compression. + cache_metadata : bool, optional + If True, array configuration metadata will be cached for the + lifetime of the object. If False, array metadata will be reloaded + prior to all data access and modification operations (may incur + overhead depending on storage and data access pattern). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + read_only : bool, optional + True if array should be protected against modification. + object_codec : Codec, optional + A codec to encode object arrays, only needed if dtype=object. + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. + + .. versionadded:: 2.8 + + write_empty_chunks : bool, optional + If True (default), all chunks will be stored regardless of their + contents. If False, each chunk is compared to the array's fill value + prior to storing. If a chunk is uniformly equal to the fill value, then + that chunk is not be stored, and the store entry for that chunk's key + is deleted. This setting enables sparser storage, as only chunks with + non-fill-value data are stored, at the expense of overhead associated + with checking the data of each chunk. + + .. versionadded:: 2.11 + + storage_transformers : sequence of StorageTransformers, optional + Setting storage transformers, changes the storage structure and behaviour + of data coming from the underlying store. The transformers are applied in the + order of the given sequence. Supplying an empty sequence is the same as omitting + the argument or setting it to None. May only be set when using zarr_version 3. + + .. versionadded:: 2.13 + + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.13 + + Returns + ------- + z : zarr.core.Array + """ + # TODOs: + # order=order, # TODO: set via config + # synchronizer=synchronizer, # TODO: warn if set + # chunk_store=chunk_store, # TODO: this should be a store parameter + # cache_metadata=cache_metadata, # TODO: not yet implemented + # cache_attrs=cache_attrs, # TODO: not yet implemented + # read_only=read_only, # TODO: this should be a store parameter + # object_codec=object_codec, # TODO: not yet implemented + # write_empty_chunks=write_empty_chunks, # TODO: not yet implemented + # meta_array=meta_array, # TODO: not yet implemented + # storage_transformers=storage_transformers, # TODO: not yet implemented + + if zarr_version is not None: + zarr_format = zarr_version + warnings.warn( + "zarr_format is deprecated, use zarr_format instead", DeprecationWarning, stacklevel=2 + ) + + if zarr_format is None: + zarr_format = 3 # default from config? + + if order is not None: + warnings.warn( + "order is deprecated, use zarr config instead", DeprecationWarning, stacklevel=2 + ) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_metadata is not None: + warnings.warn("cache_metadata is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if read_only is not None: + warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2) + if object_codec is not None: + warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2) + if dimension_separator is not None: + warnings.warn("dimension_separator is not yet implemented", RuntimeWarning, stacklevel=2) + if write_empty_chunks is not None: + warnings.warn("write_empty_chunks is not yet implemented", RuntimeWarning, stacklevel=2) + if storage_transformers: + warnings.warn("storage_transformers is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + store_path = make_store_path(store) if path is not None: store_path = store_path / path - raise NotImplementedError - # TODO: finish when Norman's PR goes in - # return await AsyncArray.create(store_path, chunks=chunks, dtype=dtype, zarr_version=zarr_version, **kwargs) + return await AsyncArray.create( + store_path, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + exists_ok=overwrite, # TODO: name change + filters=filters, + dimension_separator=dimension_separator, + zarr_format=zarr_format, + chunk_shape=chunk_shape, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + attributes=attributes, + **kwargs, + ) async def empty(shape: ShapeLike, **kwargs: Any) -> AsyncArray: @@ -559,47 +746,25 @@ async def ones_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: async def open_array( + *, # note: this is a change from v2 store: StoreLike | None = None, - mode: str = "a", - shape: ShapeLike | None = None, - chunks: Union[int, tuple[int, ...], bool] = True, # v2 only - dtype: npt.DTypeLike | None = None, - compressor: dict[str, JSON] | None = None, # v2 only - fill_value: Any | None = 0, # note: default is 0 here and None on Array.create - order: Literal["C", "F"] | None = "C", # deprecate in favor of runtime config? - synchronizer: Any = None, # deprecate and catch - filters: list[dict[str, JSON]] | None = None, # v2 only - cache_metadata: bool = True, # not implemented - cache_attrs: bool = True, # not implemented - path: str | None = None, - object_codec: Any = None, # not implemented - chunk_store: StoreLike | None = None, # not implemented - storage_options: dict[str, Any] | None = None, # not implemented - partial_decompress: bool = False, # not implemented - write_empty_chunks: bool = True, # not implemented - *, - zarr_version: ZarrFormat | None = None, # deprecate in favor of zarr_format + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, - dimension_separator: Literal[".", "/"] | None = None, # v2 only - meta_array: Any | None = None, # not implemented - attributes: dict[str, JSON] | None = None, - # v3 only - chunk_shape: ChunkCoords | None = None, - chunk_key_encoding: ( - ChunkKeyEncoding - | tuple[Literal["default"], Literal[".", "/"]] - | tuple[Literal["v2"], Literal[".", "/"]] - | None - ) = None, - codecs: Iterable[Codec | dict[str, JSON]] | None = None, - dimension_names: Iterable[str] | None = None, + path: PathLike | None = None, **kwargs: Any, # TODO: type kwargs as valid args to save ) -> AsyncArray: """Open an array using file-mode-like semantics. Parameters ---------- - TODO + store : Store or string + Store or path to directory in file system or name of zip file. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : string, optional + Path in store to array. + **kwargs + Any keyword arguments to pass to the array constructor. Returns ------- @@ -611,21 +776,19 @@ async def open_array( if path is not None: store_path = store_path / path + if zarr_version is not None: + zarr_format = zarr_version + warnings.warn( + "zarr_format is deprecated, use zarr_format instead", DeprecationWarning, stacklevel=2 + ) + try: - return await AsyncArray.open(store_path) + return await AsyncArray.open(store_path, zarr_format=zarr_format) except KeyError: pass - warnings.warn("mode is ignored", RuntimeWarning) - - if zarr_version is not None: - zarr_format = zarr_version - warnings.warn("zarr_format is deprecated, use zarr_format instead", DeprecationWarning) - if zarr_format is None: - zarr_format = 3 # TODO: perhaps this default should be set via config? - - # TODO: finish when Norman's PR goes in - return await AsyncArray.create(store_path, zarr_format=zarr_format, **kwargs) + # if array was not found, create it + return await create(store=store, path=path, zarr_format=zarr_format, **kwargs) async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AsyncArray: @@ -646,9 +809,9 @@ async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AsyncArray: The opened array. """ _like_args(a, kwargs) - if isinstance(a, (AsyncArray, Array)): + if isinstance(a, (AsyncArray | Array)): kwargs.setdefault("fill_value", a.metadata.fill_value) - return await open_array(path, **kwargs) + return await open_array(path=path, **kwargs) async def zeros(shape: ShapeLike, **kwargs: Any) -> AsyncArray: diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index fe9bb59da1..82894c3ba0 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -1,14 +1,15 @@ from __future__ import annotations -from typing import Union, Any +from typing import Any + import numpy.typing as npt -from zarr.store import StoreLike +import zarr.api.asynchronous as async_api from zarr.array import Array +from zarr.common import ZarrFormat from zarr.group import Group -import zarr.api.asynchronous as async_api +from zarr.store import StoreLike from zarr.sync import sync -from zarr.common import ZarrFormat def consolidate_metadata(*args: Any, **kwargs: Any) -> Group: @@ -33,7 +34,7 @@ def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: def load( store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None -) -> Union[npt.ArrayLike, dict[str, npt.ArrayLike]]: +) -> npt.ArrayLike | dict[str, npt.ArrayLike]: """ Load data from an array or group into memory. @@ -64,14 +65,14 @@ def load( def open( - store: StoreLike | None = None, - mode: str = "a", *, - zarr_version: ZarrFormat | None = None, + store: StoreLike | None = None, + mode: str | None = None, # type and value changed + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.open -) -> Union[Array, Group]: +) -> Array | Group: """Convenience function to open a group or array using file-mode-like semantics. Parameters @@ -119,7 +120,7 @@ def open_consolidated(*args: Any, **kwargs: Any) -> Group: def save( store: StoreLike, *args: npt.ArrayLike, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.save @@ -150,7 +151,7 @@ def save_array( store: StoreLike, arr: npt.ArrayLike, *, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.save_array @@ -186,7 +187,7 @@ def save_array( def save_group( store: StoreLike, *args: npt.ArrayLike, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: npt.ArrayLike, @@ -210,7 +211,7 @@ def save_group( return sync( async_api.save_group( store=store, - *args, + *args, # noqa: B026 zarr_version=zarr_version, zarr_format=zarr_format, path=path, @@ -235,14 +236,14 @@ def array(data: npt.ArrayLike, **kwargs: Any) -> Array: async def group( + *, # Note: this is a change from v2 store: StoreLike | None = None, overwrite: bool = False, chunk_store: StoreLike | None = None, # not used in async_api cache_attrs: bool = True, # not used in async_api synchronizer: Any | None = None, # not used in async_api path: str | None = None, - *, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used in async_api ) -> Group: @@ -294,15 +295,15 @@ async def group( def open_group( + *, # Note: this is a change from v2 store: StoreLike | None = None, - mode: str = "a", # not used in async api + mode: str | None = None, # not used in async api cache_attrs: bool = True, # not used in async api synchronizer: Any = None, # not used in async api path: str | None = None, chunk_store: StoreLike | None = None, # not used in async api storage_options: dict[str, Any] | None = None, # not used in async api - *, - zarr_version: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used in async api ) -> Group: diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 8db8c8033e..5aa24ad78a 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -30,6 +30,7 @@ ZATTRS_JSON, ArraySpec, ChunkCoords, + ZarrFormat, parse_dtype, parse_fill_value, parse_shapelike, @@ -113,8 +114,10 @@ def from_dtype(cls, dtype: np.dtype[Any]) -> DataType: @dataclass(frozen=True, kw_only=True) class ArrayMetadata(Metadata, ABC): shape: ChunkCoords + fill_value: Any chunk_grid: ChunkGrid attributes: dict[str, JSON] + zarr_format: ZarrFormat @property @abstractmethod From 68b7ac1893600dd99f869bf32a619545dc232589 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 22 May 2024 20:49:29 -0700 Subject: [PATCH 05/11] fix mypy errors --- src/zarr/__init__.py | 88 +++++++++++++++++++++++++----------- src/zarr/api/__init__.py | 0 src/zarr/api/asynchronous.py | 49 +++++++++++++------- src/zarr/api/synchronous.py | 21 +++++---- src/zarr/common.py | 2 + src/zarr/convenience.py | 35 ++++++++++++++ src/zarr/creation.py | 37 +++++++++++++++ src/zarr/group.py | 2 +- src/zarr/store/__init__.py | 7 +-- 9 files changed, 184 insertions(+), 57 deletions(-) create mode 100644 src/zarr/api/__init__.py create mode 100644 src/zarr/convenience.py create mode 100644 src/zarr/creation.py diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py index fdab564c64..227b0cf63e 100644 --- a/src/zarr/__init__.py +++ b/src/zarr/__init__.py @@ -1,34 +1,68 @@ -from __future__ import annotations - -import zarr.codecs # noqa: F401 from zarr._version import version as __version__ +from zarr.api.synchronous import ( + array, + consolidate_metadata, + copy, + copy_all, + copy_store, + create, + empty, + empty_like, + full, + full_like, + group, + load, + ones, + ones_like, + open, + open_array, + open_consolidated, + open_group, + open_like, + save, + save_array, + save_group, + tree, + zeros, + zeros_like, +) from zarr.array import Array, AsyncArray -from zarr.config import config # noqa: F401 +from zarr.config import config from zarr.group import AsyncGroup, Group -from zarr.store import ( - StoreLike, - make_store_path, -) -from zarr.sync import sync as _sync # in case setuptools scm screw up and find version to be 0.0.0 assert not __version__.startswith("0.0.0") - -async def open_auto_async(store: StoreLike) -> AsyncArray | AsyncGroup: - store_path = make_store_path(store) - try: - return await AsyncArray.open(store_path) - except KeyError: - return await AsyncGroup.open(store_path) - - -def open_auto(store: StoreLike) -> Array | Group: - object = _sync( - open_auto_async(store), - ) - if isinstance(object, AsyncArray): - return Array(object) - if isinstance(object, AsyncGroup): - return Group(object) - raise TypeError(f"Unexpected object type. Got {type(object)}.") +__all__ = [ + "__version__", + "config", + "Array", + "AsyncArray", + "Group", + "AsyncGroup", + "tree", + "array", + "consolidate_metadata", + "copy", + "copy_all", + "copy_store", + "create", + "empty", + "empty_like", + "full", + "full_like", + "group", + "load", + "ones", + "ones_like", + "open", + "open_array", + "open_consolidated", + "open_group", + "open_like", + "save", + "save_array", + "save_group", + "zeros", + "zeros_like", +] diff --git a/src/zarr/api/__init__.py b/src/zarr/api/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 9f95bea3d1..5489265694 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -172,9 +172,9 @@ async def open( store_path = store_path / path try: - return await AsyncArray.open(store_path, zarr_format=zarr_format, **kwargs) + return await open_array(store=store_path, zarr_format=zarr_format, **kwargs) except KeyError: - return await AsyncGroup.open(store_path, zarr_format=zarr_format, **kwargs) + return await open_group(store=store_path, zarr_format=zarr_format, **kwargs) async def open_consolidated(*args: Any, **kwargs: Any) -> AsyncGroup: @@ -298,8 +298,8 @@ async def save_group( await asyncio.gather(*aws) -# async def tree(*args: Any, **kwargs: Any) -> "TreeViewer": -# raise NotImplementedError +async def tree(*args: Any, **kwargs: Any) -> None: + raise NotImplementedError async def array(data: npt.ArrayLike, **kwargs: Any) -> AsyncArray: @@ -349,12 +349,13 @@ async def group( store: StoreLike | None = None, overwrite: bool = False, chunk_store: StoreLike | None = None, # not used - cache_attrs: bool = True, # not used + cache_attrs: bool | None = None, # not used, default changed synchronizer: Any | None = None, # not used path: str | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used + attributes: dict[str, JSON] | None = None, ) -> AsyncGroup: """Create a group. @@ -409,13 +410,17 @@ async def group( if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + if attributes is None: + attributes = {} + try: - return await AsyncGroup.open(store_path, zarr_format=zarr_format) - except KeyError: - # TODO: pass attributes here - attributes: dict[str, Any] = {} + return await AsyncGroup.open(store=store_path, zarr_format=zarr_format) + except (KeyError, FileNotFoundError): return await AsyncGroup.create( - store_path, zarr_format=zarr_format, exists_ok=overwrite, attributes=attributes + store=store_path, + zarr_format=zarr_format, + exists_ok=overwrite, + attributes=attributes, ) @@ -423,7 +428,7 @@ async def open_group( *, # Note: this is a change from v2 store: StoreLike | None = None, mode: str | None = None, # not used - cache_attrs: bool = True, # not used + cache_attrs: bool | None = None, # not used, default changed synchronizer: Any = None, # not used path: str | None = None, chunk_store: StoreLike | None = None, # not used @@ -431,6 +436,7 @@ async def open_group( zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used + attributes: dict[str, JSON] | None = None, ) -> AsyncGroup: """Open a group using file-mode-like semantics. @@ -481,10 +487,8 @@ async def open_group( warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) - if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) - if storage_options is not None: warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2) @@ -492,7 +496,15 @@ async def open_group( if path is not None: store_path = store_path / path - return await AsyncGroup.open(store_path, zarr_format=zarr_format) + if attributes is None: + attributes = {} + + try: + return await AsyncGroup.open(store_path, zarr_format=zarr_format) + except (KeyError, FileNotFoundError): + return await AsyncGroup.create( + store_path, zarr_format=zarr_format, exists_ok=True, attributes=attributes + ) # TODO: require kwargs @@ -515,7 +527,7 @@ async def create( read_only: bool | None = None, object_codec: Codec | None = None, # TODO: type has changed dimension_separator: Literal[".", "/"] | None = None, - write_empty_chunks: bool = True, + write_empty_chunks: bool = False, # TODO: default has changed zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # TODO: need type @@ -637,6 +649,11 @@ async def create( if zarr_format is None: zarr_format = 3 # default from config? + if zarr_format == 2 and chunks is None: + chunks = shape + if zarr_format == 3 and chunk_shape is None: + chunk_shape = shape + if order is not None: warnings.warn( "order is deprecated, use zarr config instead", DeprecationWarning, stacklevel=2 @@ -655,7 +672,7 @@ async def create( warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2) if dimension_separator is not None: warnings.warn("dimension_separator is not yet implemented", RuntimeWarning, stacklevel=2) - if write_empty_chunks is not None: + if write_empty_chunks: warnings.warn("write_empty_chunks is not yet implemented", RuntimeWarning, stacklevel=2) if storage_transformers: warnings.warn("storage_transformers is not yet implemented", RuntimeWarning, stacklevel=2) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 82894c3ba0..27bea394a5 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -6,7 +6,7 @@ import zarr.api.asynchronous as async_api from zarr.array import Array -from zarr.common import ZarrFormat +from zarr.common import JSON, ZarrFormat from zarr.group import Group from zarr.store import StoreLike from zarr.sync import sync @@ -210,8 +210,8 @@ def save_group( """ return sync( async_api.save_group( - store=store, - *args, # noqa: B026 + store, + *args, zarr_version=zarr_version, zarr_format=zarr_format, path=path, @@ -220,9 +220,8 @@ def save_group( ) -# TODO: implement or deprecate -# def tree(*args: Any, **kwargs: Any) -> "TreeViewer": -# return sync(async_api.tree(*args, **kwargs)) +def tree(*args: Any, **kwargs: Any) -> None: + return sync(async_api.tree(*args, **kwargs)) # TODO: add type annotations for kwargs @@ -235,17 +234,18 @@ def array(data: npt.ArrayLike, **kwargs: Any) -> Array: return Array(sync(async_api.array(data=data, **kwargs))) -async def group( +def group( *, # Note: this is a change from v2 store: StoreLike | None = None, overwrite: bool = False, chunk_store: StoreLike | None = None, # not used in async_api - cache_attrs: bool = True, # not used in async_api + cache_attrs: bool | None = None, # default changed, not used in async_api synchronizer: Any | None = None, # not used in async_api path: str | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used in async_api + attributes: dict[str, JSON] | None = None, ) -> Group: """Create a group. @@ -289,6 +289,7 @@ async def group( zarr_version=zarr_version, zarr_format=zarr_format, meta_array=meta_array, + attributes=attributes, ) ) ) @@ -298,7 +299,7 @@ def open_group( *, # Note: this is a change from v2 store: StoreLike | None = None, mode: str | None = None, # not used in async api - cache_attrs: bool = True, # not used in async api + cache_attrs: bool | None = None, # default changed, not used in async api synchronizer: Any = None, # not used in async api path: str | None = None, chunk_store: StoreLike | None = None, # not used in async api @@ -397,7 +398,7 @@ def full(shape: async_api.ShapeLike, fill_value: Any, **kwargs: Any) -> Array: # TODO: move ArrayLike to common module # TODO: add type annotations for kwargs -async def full_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: +def full_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: """Create a filled array like `a`.""" return Array(sync(async_api.full_like(a, **kwargs))) diff --git a/src/zarr/common.py b/src/zarr/common.py index b11a2cff43..e113b051ca 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -152,6 +152,8 @@ def parse_named_configuration( def parse_shapelike(data: Any) -> tuple[int, ...]: + if isinstance(data, int): + return (data,) if not isinstance(data, Iterable): raise TypeError(f"Expected an iterable. Got {data} instead.") data_tuple = tuple(data) diff --git a/src/zarr/convenience.py b/src/zarr/convenience.py new file mode 100644 index 0000000000..be0a6b2813 --- /dev/null +++ b/src/zarr/convenience.py @@ -0,0 +1,35 @@ +import warnings + +from zarr.api.synchronous import ( + consolidate_metadata, + copy, + copy_all, + copy_store, + load, + open, + open_consolidated, + save, + save_array, + save_group, + tree, +) + +warnings.warn( + "zarr.convenience is deprecated, use zarr.api.synchronous", + DeprecationWarning, + stacklevel=2, +) + +__all__ = [ + "open", + "save_array", + "save_group", + "save", + "load", + "tree", + "copy_store", + "copy", + "copy_all", + "consolidate_metadata", + "open_consolidated", +] diff --git a/src/zarr/creation.py b/src/zarr/creation.py new file mode 100644 index 0000000000..df3f764610 --- /dev/null +++ b/src/zarr/creation.py @@ -0,0 +1,37 @@ +import warnings + +from zarr.api.synchronous import ( + array, + create, + empty, + empty_like, + full, + full_like, + ones, + ones_like, + open_array, + open_like, + zeros, + zeros_like, +) + +warnings.warn( + "zarr.creation is deprecated, use zarr.api.synchronous", + DeprecationWarning, + stacklevel=2, +) + +__all__ = [ + "create", + "empty", + "zeros", + "ones", + "full", + "array", + "open_array", + "empty_like", + "zeros_like", + "ones_like", + "full_like", + "open_like", +] diff --git a/src/zarr/group.py b/src/zarr/group.py index 4ff2176fd9..e0061846aa 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -200,7 +200,7 @@ async def getitem( key: str, ) -> AsyncArray | AsyncGroup: store_path = self.store_path / key - logger.warning("key=%s, store_path=%s", key, store_path) + logger.debug("key=%s, store_path=%s", key, store_path) # Note: # in zarr-python v2, we first check if `key` references an Array, else if `key` references diff --git a/src/zarr/store/__init__.py b/src/zarr/store/__init__.py index b1c3a5f720..fbdcdb9255 100644 --- a/src/zarr/store/__init__.py +++ b/src/zarr/store/__init__.py @@ -1,5 +1,6 @@ -# flake8: noqa -from zarr.store.core import StorePath, StoreLike, make_store_path -from zarr.store.remote import RemoteStore +from zarr.store.core import StoreLike, StorePath, make_store_path from zarr.store.local import LocalStore from zarr.store.memory import MemoryStore +from zarr.store.remote import RemoteStore + +__all__ = ["StorePath", "StoreLike", "make_store_path", "RemoteStore", "LocalStore", "MemoryStore"] From 81405b061c5d90875158c679e673401ae481b85d Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 30 May 2024 21:47:31 -0700 Subject: [PATCH 06/11] progress integrating store mode --- src/zarr/api/asynchronous.py | 40 +++++++++++++++++++++--------------- src/zarr/api/synchronous.py | 6 +++--- src/zarr/array.py | 13 ++++++++++++ src/zarr/codecs/sharding.py | 21 ++++++++++++++++++- src/zarr/store/core.py | 32 ++++------------------------- src/zarr/store/memory.py | 2 +- src/zarr/testing/store.py | 2 +- 7 files changed, 66 insertions(+), 50 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 36e2448693..d555070b09 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -11,7 +11,7 @@ from zarr.abc.codec import Codec from zarr.array import Array, AsyncArray from zarr.buffer import NDArrayLike -from zarr.common import JSON, MEMORY_ORDER, ChunkCoords, ZarrFormat +from zarr.common import JSON, MEMORY_ORDER, ChunkCoords, OpenMode, ZarrFormat from zarr.group import AsyncGroup from zarr.metadata import ArrayV2Metadata, ArrayV3Metadata, ChunkKeyEncoding from zarr.store import ( @@ -129,7 +129,7 @@ async def load( async def open( *, store: StoreLike | None = None, - mode: str | None = None, # type and value changed + mode: OpenMode | None = None, # type and value changed zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, @@ -164,10 +164,8 @@ async def open( "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 ) zarr_format = zarr_version - if mode is not None: - warnings.warn("mode is ignored", RuntimeWarning, stacklevel=2) - store_path = make_store_path(store) + store_path = make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -252,10 +250,17 @@ async def save_array( if zarr_format is None: zarr_format = 3 # default via config? - store_path = make_store_path(store) + store_path = make_store_path(store, mode="w") if path is not None: store_path = store_path / path - new = await AsyncArray.create(store_path, zarr_format=zarr_format, **kwargs) + new = await AsyncArray.create( + store_path, + zarr_format=zarr_format, + shape=arr.shape, + dtype=arr.dtype, + chunks=arr.shape, + **kwargs, + ) await new.setitem(slice(None), arr) @@ -295,7 +300,8 @@ async def save_group( for i, arr in enumerate(args): aws.append(save_array(store, arr, zarr_format=zarr_format, path=f"{path}/arr_{i}")) for k, arr in kwargs.items(): - aws.append(save_array(store, arr, zarr_format=zarr_format, path=f"{path}/{k}")) + path = f"{path}/{k}" if path is not None else k + aws.append(save_array(store, arr, zarr_format=zarr_format, path=path)) await asyncio.gather(*aws) @@ -428,7 +434,7 @@ async def group( async def open_group( *, # Note: this is a change from v2 store: StoreLike | None = None, - mode: str | None = None, # not used + mode: OpenMode | None = None, # not used cache_attrs: bool | None = None, # not used, default changed synchronizer: Any = None, # not used path: str | None = None, @@ -480,8 +486,6 @@ async def open_group( if zarr_format is None: zarr_format = 3 # default from config? - if mode is not None: - warnings.warn("mode is not yet implemented", RuntimeWarning, stacklevel=2) if cache_attrs is not None: warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) if synchronizer is not None: @@ -493,7 +497,7 @@ async def open_group( if storage_options is not None: warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2) - store_path = make_store_path(store) + store_path = make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -508,7 +512,6 @@ async def open_group( ) -# TODO: require kwargs async def create( shape: ShapeLike, *, # Note: this is a change from v2 @@ -680,7 +683,7 @@ async def create( if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) - store_path = make_store_path(store) + store_path = make_store_path(store, mode="w") if path is not None: store_path = store_path / path @@ -801,9 +804,14 @@ async def open_array( ) try: + print(store_path) return await AsyncArray.open(store_path, zarr_format=zarr_format) - except KeyError: - pass + except KeyError as e: + print(e, type(e)) + if store_path.store.writeable: + pass + else: + raise e # if array was not found, create it return await create(store=store, path=path, zarr_format=zarr_format, **kwargs) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 764b9d8142..470c1d0280 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -5,7 +5,7 @@ import zarr.api.asynchronous as async_api from zarr.array import Array from zarr.buffer import NDArrayLike -from zarr.common import JSON, ZarrFormat +from zarr.common import JSON, OpenMode, ZarrFormat from zarr.group import Group from zarr.store import StoreLike from zarr.sync import sync @@ -66,7 +66,7 @@ def load( def open( *, store: StoreLike | None = None, - mode: str | None = None, # type and value changed + mode: OpenMode | None = None, # type and value changed zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, @@ -297,7 +297,7 @@ def group( def open_group( *, # Note: this is a change from v2 store: StoreLike | None = None, - mode: str | None = None, # not used in async api + mode: OpenMode | None = None, # not used in async api cache_attrs: bool | None = None, # default changed, not used in async api synchronizer: Any = None, # not used in async api path: str | None = None, diff --git a/src/zarr/array.py b/src/zarr/array.py index 7da39c285e..215251fef2 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -288,19 +288,24 @@ async def open( store: StoreLike, zarr_format: ZarrFormat | None = 3, ) -> AsyncArray: + print(f"store: {store}") store_path = make_store_path(store) + print(f"store_path: {store_path}") if zarr_format == 2: + print("^^^^^^", (store_path / ZARR_JSON)) zarray_bytes, zattrs_bytes = await gather( (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get() ) if zarray_bytes is None: raise KeyError(store_path) # filenotfounderror? elif zarr_format == 3: + print("*******", (store_path / ZARR_JSON)) zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: raise KeyError(store_path) # filenotfounderror? elif zarr_format is None: + print("$$$$$$", (store_path / ZARR_JSON)) zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather( (store_path / ZARR_JSON).get(), (store_path / ZARRAY_JSON).get(), @@ -355,6 +360,10 @@ def dtype(self) -> np.dtype[Any]: def attrs(self) -> dict[str, JSON]: return self.metadata.attributes + @property + def read_only(self) -> bool: + return bool(~self.store_path.store.writeable) + async def getitem( self, selection: Selection, *, factory: Factory.Create = NDBuffer.create ) -> NDArrayLike: @@ -582,6 +591,10 @@ def store_path(self) -> StorePath: def order(self) -> Literal["C", "F"]: return self._async_array.order + @property + def read_only(self) -> bool: + return self._async_array.read_only + def __getitem__(self, selection: Selection) -> NDArrayLike: return sync( self._async_array.getitem(selection), diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index a68577be68..3d7ab4a236 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -215,6 +215,24 @@ def merge_with_morton_order( break return obj + @classmethod + def merge_with_c_order( + cls, + chunks_per_shard: ChunkCoords, + tombstones: set[ChunkCoords], + *shard_dicts: ShardMapping, + ) -> _ShardBuilder: + obj = cls.create_empty(chunks_per_shard) + for chunk_coords in c_order_iter(chunks_per_shard): + if tombstones is not None and chunk_coords in tombstones: + continue + for shard_dict in shard_dicts: + maybe_value = shard_dict.get(chunk_coords, None) + if maybe_value is not None: + obj[chunk_coords] = maybe_value + break + return obj + @classmethod def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder: obj = cls() @@ -284,7 +302,8 @@ async def finalize( index_location: ShardingCodecIndexLocation, index_encoder: Callable[[_ShardIndex], Awaitable[Buffer]], ) -> Buffer: - shard_builder = _ShardBuilder.merge_with_morton_order( + print("merging shards with c order") + shard_builder = _ShardBuilder.merge_with_c_order( self.new_dict.index.chunks_per_shard, self.tombstones, self.new_dict, diff --git a/src/zarr/store/core.py b/src/zarr/store/core.py index b1768a064f..17dfa79b36 100644 --- a/src/zarr/store/core.py +++ b/src/zarr/store/core.py @@ -67,39 +67,15 @@ def make_store_path(store_like: StoreLike | None, *, mode: OpenMode | None = Non if mode is not None: assert mode == store_like.store.mode return store_like - elif store_like is None: - if mode is None: - mode = "r" - return StorePath(MemoryStore(mode=mode)) elif isinstance(store_like, Store): if mode is not None: assert mode == store_like.mode return StorePath(store_like) + elif store_like is None: + if mode is None: + mode = "r" + return StorePath(MemoryStore(mode=mode)) elif isinstance(store_like, str): assert mode is not None return StorePath(LocalStore(Path(store_like), mode=mode)) raise TypeError - - -def _normalize_interval_index( - data: Buffer, interval: None | tuple[int | None, int | None] -) -> tuple[int, int]: - """ - Convert an implicit interval into an explicit start and length - """ - if interval is None: - start = 0 - length = len(data) - else: - maybe_start, maybe_len = interval - if maybe_start is None: - start = 0 - else: - start = maybe_start - - if maybe_len is None: - length = len(data) - start - else: - length = maybe_len - - return (start, length) diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 74bb5454fe..bccfe9c059 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -5,7 +5,7 @@ from zarr.abc.store import Store from zarr.buffer import Buffer from zarr.common import OpenMode, concurrent_map -from zarr.store.core import _normalize_interval_index +from zarr.store.utils import _normalize_interval_index # TODO: this store could easily be extended to wrap any MutableMapping store from v2 diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index b317f383f6..533c7d3ad3 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -4,7 +4,7 @@ from zarr.abc.store import Store from zarr.buffer import Buffer -from zarr.store.core import _normalize_interval_index +from zarr.store.utils import _normalize_interval_index from zarr.testing.utils import assert_bytes_equal S = TypeVar("S", bound=Store) From e7ca38a020476d8475e4b24fb7fb40df30d0bc77 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 31 May 2024 22:17:11 -0700 Subject: [PATCH 07/11] basic tests are passing --- src/zarr/api/asynchronous.py | 167 +++----- src/zarr/api/synchronous.py | 261 ++---------- src/zarr/array.py | 5 - src/zarr/group.py | 2 +- src/zarr/store/core.py | 5 +- tests/v3/test_api.py | 778 +++++++++++++++++++++++++++++++++++ 6 files changed, 876 insertions(+), 342 deletions(-) create mode 100644 tests/v3/test_api.py diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 3f65e628f4..3afa2acdba 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -3,7 +3,7 @@ import asyncio import warnings from collections.abc import Iterable -from typing import Any, Literal, Union +from typing import Any, Literal, Union, cast import numpy as np import numpy.typing as npt @@ -70,6 +70,26 @@ def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> None: pass +def _handle_zarr_version_or_format( + *, zarr_version: ZarrFormat | None, zarr_format: ZarrFormat | None +) -> ZarrFormat | None: + if zarr_format is not None and zarr_version is not None and zarr_format != zarr_version: + raise ValueError( + f"zarr_format {zarr_format} does not match zarr_version {zarr_version}, please only set one" + ) + if zarr_version is not None: + warnings.warn( + "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 + ) + return zarr_version + return zarr_format + + +def _default_zarr_version() -> ZarrFormat: + # TODO: set default value from config + return 3 + + async def consolidate_metadata(*args: Any, **kwargs: Any) -> AsyncGroup: raise NotImplementedError @@ -87,7 +107,11 @@ async def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: async def load( - store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None + *, + store: StoreLike, + path: str | None = None, + zarr_format: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, ) -> NDArrayLike | dict[str, NDArrayLike]: """Load data from an array or group into memory. @@ -114,13 +138,9 @@ async def load( If loading data from a group of arrays, data will not be immediately loaded into memory. Rather, arrays will be loaded into memory as they are requested. """ - if zarr_version is not None: - warnings.warn( - "zarr_version is deprecated and no longer required in load", - DeprecationWarning, - stacklevel=2, - ) - obj = await open(store=store, path=path) + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + obj = await open(store=store, path=path, zarr_format=zarr_format) if isinstance(obj, AsyncArray): return await obj.getitem(slice(None)) else: @@ -160,12 +180,7 @@ async def open( z : AsyncArray or AsyncGroup Array or group, depending on what exists in the given store. """ - if zarr_version is not None: - warnings.warn( - "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 - ) - zarr_format = zarr_version - + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) store_path = make_store_path(store, mode=mode) if path is not None: @@ -204,11 +219,8 @@ async def save( kwargs NumPy arrays with data to save. """ - if zarr_version is not None: - warnings.warn( - "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 - ) - zarr_format = zarr_version + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + if len(args) == 0 and len(kwargs) == 0: raise ValueError("at least one array must be provided") if len(args) == 1 and len(kwargs) == 0: @@ -242,14 +254,10 @@ async def save_array( kwargs Passed through to :func:`create`, e.g., compressor. """ - if zarr_version is not None: - warnings.warn( - "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 - ) - zarr_format = zarr_version - - if zarr_format is None: - zarr_format = 3 # default via config? + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) store_path = make_store_path(store, mode="w") if path is not None: @@ -289,11 +297,7 @@ async def save_group( kwargs NumPy arrays with data to save. """ - if zarr_version is not None: - warnings.warn( - "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 - ) - zarr_format = zarr_version + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) if len(args) == 0 and len(kwargs) == 0: raise ValueError("at least one array must be provided") @@ -301,8 +305,8 @@ async def save_group( for i, arr in enumerate(args): aws.append(save_array(store, arr, zarr_format=zarr_format, path=f"{path}/arr_{i}")) for k, arr in kwargs.items(): - path = f"{path}/{k}" if path is not None else k - aws.append(save_array(store, arr, zarr_format=zarr_format, path=path)) + _path = f"{path}/{k}" if path is not None else k + aws.append(save_array(store, arr, zarr_format=zarr_format, path=_path)) await asyncio.gather(*aws) @@ -337,8 +341,9 @@ async def array(data: NDArrayLike, **kwargs: Any) -> AsyncArray: else: kwargs["chunks"] = kw_chunks - # pop read-only to apply after storing the data - # read_only = kwargs.pop("read_only", False) + read_only = kwargs.pop("read_only", False) + if read_only: + raise ValueError("read_only=True is no longer supported when creating new arrays") # instantiate array z = await create(**kwargs) @@ -346,9 +351,6 @@ async def array(data: NDArrayLike, **kwargs: Any) -> AsyncArray: # fill with data await z.setitem(slice(None), data) - # set read_only property afterwards - # z.read_only = read_only - return z @@ -396,14 +398,10 @@ async def group( g : AsyncGroup """ - if zarr_version is not None: - zarr_format = zarr_version - warnings.warn( - "zarr_format is deprecated, use zarr_format instead", DeprecationWarning, stacklevel=2 - ) - - if zarr_format is None: - zarr_format = 3 # default via config? + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) store_path = make_store_path(store) if path is not None: @@ -479,13 +477,10 @@ async def open_group( g : AsyncGroup """ - if zarr_version is not None: - zarr_format = zarr_version - warnings.warn( - "zarr_format is deprecated, use zarr_format instead", DeprecationWarning, stacklevel=2 - ) - if zarr_format is None: - zarr_format = 3 # default from config? + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) if cache_attrs is not None: warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) @@ -536,7 +531,6 @@ async def create( zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # TODO: need type - storage_transformers: Any | None = (), # TODO: need type attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ChunkCoords | None = None, @@ -612,17 +606,8 @@ async def create( .. versionadded:: 2.11 - storage_transformers : sequence of StorageTransformers, optional - Setting storage transformers, changes the storage structure and behaviour - of data coming from the underlying store. The transformers are applied in the - order of the given sequence. Supplying an empty sequence is the same as omitting - the argument or setting it to None. May only be set when using zarr_version 3. - - .. versionadded:: 2.13 - zarr_format : {2, 3, None}, optional The zarr format to use when saving. - meta_array : array-like, optional An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. @@ -633,26 +618,10 @@ async def create( ------- z : zarr.core.Array """ - # TODOs: - # order=order, # TODO: set via config - # synchronizer=synchronizer, # TODO: warn if set - # chunk_store=chunk_store, # TODO: this should be a store parameter - # cache_metadata=cache_metadata, # TODO: not yet implemented - # cache_attrs=cache_attrs, # TODO: not yet implemented - # read_only=read_only, # TODO: this should be a store parameter - # object_codec=object_codec, # TODO: not yet implemented - # write_empty_chunks=write_empty_chunks, # TODO: not yet implemented - # meta_array=meta_array, # TODO: not yet implemented - # storage_transformers=storage_transformers, # TODO: not yet implemented - - if zarr_version is not None: - zarr_format = zarr_version - warnings.warn( - "zarr_format is deprecated, use zarr_format instead", DeprecationWarning, stacklevel=2 - ) - - if zarr_format is None: - zarr_format = 3 # default from config? + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) if zarr_format == 2 and chunks is None: chunks = shape @@ -661,7 +630,9 @@ async def create( if order is not None: warnings.warn( - "order is deprecated, use zarr config instead", DeprecationWarning, stacklevel=2 + "order is deprecated, use config `array.order` instead", + DeprecationWarning, + stacklevel=2, ) if synchronizer is not None: warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) @@ -671,20 +642,24 @@ async def create( warnings.warn("cache_metadata is not yet implemented", RuntimeWarning, stacklevel=2) if cache_attrs is not None: warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) - if read_only is not None: - warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2) if object_codec is not None: warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2) if dimension_separator is not None: - warnings.warn("dimension_separator is not yet implemented", RuntimeWarning, stacklevel=2) + if zarr_format == 3: + raise ValueError( + "dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead" + ) + else: + warnings.warn( + "dimension_separator is not yet implemented", RuntimeWarning, stacklevel=2 + ) if write_empty_chunks: warnings.warn("write_empty_chunks is not yet implemented", RuntimeWarning, stacklevel=2) - if storage_transformers: - warnings.warn("storage_transformers is not yet implemented", RuntimeWarning, stacklevel=2) if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) - store_path = make_store_path(store, mode="w") + mode = cast(OpenMode, "r" if read_only else "w") + store_path = make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -798,17 +773,11 @@ async def open_array( if path is not None: store_path = store_path / path - if zarr_version is not None: - zarr_format = zarr_version - warnings.warn( - "zarr_format is deprecated, use zarr_format instead", DeprecationWarning, stacklevel=2 - ) + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) try: - print(store_path) return await AsyncArray.open(store_path, zarr_format=zarr_format) except KeyError as e: - print(e, type(e)) if store_path.store.writeable: pass else: diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 7f3dd8dbae..53a7a2f64f 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -12,54 +12,24 @@ def consolidate_metadata(*args: Any, **kwargs: Any) -> Group: - # TODO return Group(sync(async_api.consolidate_metadata(*args, **kwargs))) def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: - # TODO return sync(async_api.copy(*args, **kwargs)) def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: - # TODO return sync(async_api.copy_all(*args, **kwargs)) def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: - # TODO return sync(async_api.copy_store(*args, **kwargs)) def load( store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None ) -> NDArrayLike | dict[str, NDArrayLike]: - """ - Load data from an array or group into memory. - - Parameters - ---------- - store : Store or string - Store or path to directory in file system or name of zip file. - path : str or None, optional - The path within the store from which to load. - - Returns - ------- - out - If the path contains an array, out will be a numpy array. If the path contains - a group, out will be a dict-like object where keys are array names and values - are numpy arrays. - - See Also - -------- - save, savez - - Notes - ----- - If loading data from a group of arrays, data will not be immediately loaded into - memory. Rather, arrays will be loaded into memory as they are requested. - """ return sync(async_api.load(store=store, zarr_version=zarr_version, path=path)) @@ -72,30 +42,6 @@ def open( path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.open ) -> Array | Group: - """Convenience function to open a group or array using file-mode-like semantics. - - Parameters - ---------- - store : Store or string, optional - Store or path to directory in file system or name of zip file. - mode : {'r', 'r+', 'a', 'w', 'w-'}, optional - Persistence mode: 'r' means read only (must exist); 'r+' means - read/write (must exist); 'a' means read/write (create if doesn't - exist); 'w' means create (overwrite if exists); 'w-' means create - (fail if exists). - zarr_format : {2, 3, None}, optional - The zarr format to use when saving. - path : str or None, optional - The path within the store to open. - **kwargs - Additional parameters are passed through to :func:`zarr.creation.open_array` or - :func:`zarr.hierarchy.open_group`. - - Returns - ------- - z : AsyncArray or AsyncGroup - Array or group, depending on what exists in the given store. - """ obj = sync( async_api.open( store=store, @@ -124,21 +70,6 @@ def save( path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.save ) -> None: - """Convenience function to save an array or group of arrays to the local file system. - - Parameters - ---------- - store : Store or string - Store or path to directory in file system or name of zip file. - args : ndarray - NumPy arrays with data to save. - zarr_format : {2, 3, None}, optional - The zarr format to use when saving. - path : str or None, optional - The path within the group where the arrays will be saved. - kwargs - NumPy arrays with data to save. - """ return sync( async_api.save( store, *args, zarr_version=zarr_version, zarr_format=zarr_format, path=path, **kwargs @@ -155,22 +86,6 @@ def save_array( path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.save_array ) -> None: - """Convenience function to save a NumPy array to the local file system, following a - similar API to the NumPy save() function. - - Parameters - ---------- - store : Store or string - Store or path to directory in file system or name of zip file. - arr : ndarray - NumPy array with data to save. - zarr_format : {2, 3, None}, optional - The zarr format to use when saving. - path : str or None, optional - The path within the store where the array will be saved. - kwargs - Passed through to :func:`create`, e.g., compressor. - """ return sync( async_api.save_array( store=store, @@ -191,22 +106,6 @@ def save_group( path: str | None = None, **kwargs: NDArrayLike, ) -> None: - """Convenience function to save several NumPy arrays to the local file system, following a - similar API to the NumPy savez()/savez_compressed() functions. - - Parameters - ---------- - store : Store or string - Store or path to directory in file system or name of zip file. - args : ndarray - NumPy arrays with data to save. - zarr_format : {2, 3, None}, optional - The zarr format to use when saving. - path : str or None, optional - Path within the store where the group will be saved. - kwargs - NumPy arrays with data to save. - """ return sync( async_api.save_group( store, @@ -225,11 +124,6 @@ def tree(*args: Any, **kwargs: Any) -> None: # TODO: add type annotations for kwargs def array(data: NDArrayLike, **kwargs: Any) -> Array: - """Create an array filled with `data`. - - The `data` argument should be a array-like object. For - other parameter definitions see :func:`zarr.api.synchronous.create`. - """ return Array(sync(async_api.array(data=data, **kwargs))) @@ -246,36 +140,6 @@ def group( meta_array: Any | None = None, # not used in async_api attributes: dict[str, JSON] | None = None, ) -> Group: - """Create a group. - - Parameters - ---------- - store : Store or string, optional - Store or path to directory in file system. - overwrite : bool, optional - If True, delete any pre-existing data in `store` at `path` before - creating the group. - chunk_store : Store, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. - cache_attrs : bool, optional - If True (default), user attributes will be cached for attribute read - operations. If False, user attributes are reloaded from the store prior - to all attribute read operations. - synchronizer : object, optional - Array synchronizer. - path : string, optional - Group path within store. - meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. - zarr_format : {2, 3, None}, optional - The zarr format to use when saving. - - Returns - ------- - g : Group - """ return Group( sync( async_api.group( @@ -307,38 +171,6 @@ def open_group( zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used in async api ) -> Group: - """Open a group using file-mode-like semantics. - - Parameters - ---------- - store : Store or string, optional - Store or path to directory in file system or name of zip file. - mode : {'r', 'r+', 'a', 'w', 'w-'}, optional - Persistence mode: 'r' means read only (must exist); 'r+' means - read/write (must exist); 'a' means read/write (create if doesn't - exist); 'w' means create (overwrite if exists); 'w-' means create - (fail if exists). - cache_attrs : bool, optional - If True (default), user attributes will be cached for attribute read - operations. If False, user attributes are reloaded from the store prior - to all attribute read operations. - synchronizer : object, optional - Array synchronizer. - path : string, optional - Group path within store. - chunk_store : Store or string, optional - Store or path to directory in file system or name of zip file. - storage_options : dict - If using an fsspec URL to create the store, these will be passed to - the backend implementation. Ignored otherwise. - meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. - - Returns - ------- - g : AsyncGroup - """ return Group( sync( async_api.open_group( @@ -365,118 +197,79 @@ def create(*args: Any, **kwargs: Any) -> Array: # TODO: move shapelike to common module # TODO: add type annotations for kwargs def empty(shape: async_api.ShapeLike, **kwargs: Any) -> Array: - """Create an empty array. - - For parameter definitions see :func:`zarr.api.asynchronous.create`. - - Notes - ----- - The contents of an empty Zarr array are not defined. On attempting to - retrieve data from an empty Zarr array, any values may be returned, - and these are not guaranteed to be stable from one access to the next. - """ return Array(sync(async_api.empty(shape, **kwargs))) # TODO: move ArrayLike to common module # TODO: add type annotations for kwargs def empty_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: - """Create an empty array like `a`.""" return Array(sync(async_api.empty_like(a, **kwargs))) # TODO: add type annotations for kwargs and fill_value def full(shape: async_api.ShapeLike, fill_value: Any, **kwargs: Any) -> Array: - """Create an array, with `fill_value` being used as the default value for - uninitialized portions of the array. - - For parameter definitions see :func:`zarr.api.asynchronous.create`. - """ return Array(sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs))) # TODO: move ArrayLike to common module # TODO: add type annotations for kwargs def full_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: - """Create a filled array like `a`.""" return Array(sync(async_api.full_like(a, **kwargs))) # TODO: add type annotations for kwargs # TODO: move ShapeLike to common module def ones(shape: async_api.ShapeLike, **kwargs: Any) -> Array: - """Create an array, with one being used as the default value for - uninitialized portions of the array. - - For parameter definitions see :func:`zarr.api.asynchronous.create`. - - Returns - ------- - Array - The new array. - """ return Array(sync(async_api.ones(shape, **kwargs))) # TODO: add type annotations for kwargs def ones_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: - """Create an array of ones like `a`.""" return Array(sync(async_api.ones_like(a, **kwargs))) # TODO: update this once async_api.open_array is fully implemented def open_array(*args: Any, **kwargs: Any) -> Array: - """Open an array using file-mode-like semantics. - - Parameters - ---------- - TODO - - Returns - ------- - AsyncArray - The opened array. - """ return Array(sync(async_api.open_array(*args, **kwargs))) # TODO: add type annotations for kwargs def open_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: - """Open a persistent array like `a`. - - Parameters - ---------- - a : Array - The shape and data-type of a define these same attributes of the returned array. - path : str - The path to the new array. - **kwargs - Any keyword arguments to pass to the array constructor. - - Returns - ------- - Array - The opened array. - """ return Array(sync(async_api.open_like(a, **kwargs))) # TODO: add type annotations for kwargs def zeros(*args: Any, **kwargs: Any) -> Array: - """ - Create an array, with zero being used as the default value for - uninitialized portions of the array. - - For parameter definitions see :func:`zarr.creation.create`. - - Returns: - Array - The new array. - """ return Array(sync(async_api.zeros(*args, **kwargs))) # TODO: add type annotations for kwargs def zeros_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: - """Create an array of zeros like `a`.""" return Array(sync(async_api.zeros_like(a, **kwargs))) + + +consolidate_metadata.__doc__ = async_api.copy.__doc__ +copy.__doc__ = async_api.copy.__doc__ +copy_all.__doc__ = async_api.copy_all.__doc__ +copy_store.__doc__ = async_api.copy_store.__doc__ +load.__doc__ = async_api.load.__doc__ +open.__doc__ = async_api.open.__doc__ +open_consolidated.__doc__ = async_api.open_consolidated.__doc__ +save.__doc__ = async_api.save.__doc__ +save_array.__doc__ = async_api.save_array.__doc__ +save_group.__doc__ = async_api.save_group.__doc__ +tree.__doc__ = async_api.tree.__doc__ +array.__doc__ = async_api.array.__doc__ +group.__doc__ = async_api.group.__doc__ +open_group.__doc__ = async_api.open_group.__doc__ +create.__doc__ = async_api.create.__doc__ +empty.__doc__ = async_api.empty.__doc__ +empty_like.__doc__ = async_api.empty_like.__doc__ +full.__doc__ = async_api.full.__doc__ +full_like.__doc__ = async_api.full_like.__doc__ +ones.__doc__ = async_api.ones.__doc__ +ones_like.__doc__ = async_api.ones_like.__doc__ +open_array.__doc__ = async_api.open_array.__doc__ +open_like.__doc__ = async_api.open_like.__doc__ +zeros.__doc__ = async_api.zeros.__doc__ +zeros_like.__doc__ = async_api.zeros_like.__doc__ diff --git a/src/zarr/array.py b/src/zarr/array.py index dea3984144..dbdd749eca 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -288,24 +288,19 @@ async def open( store: StoreLike, zarr_format: ZarrFormat | None = 3, ) -> AsyncArray: - print(f"store: {store}") store_path = make_store_path(store) - print(f"store_path: {store_path}") if zarr_format == 2: - print("^^^^^^", (store_path / ZARR_JSON)) zarray_bytes, zattrs_bytes = await gather( (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get() ) if zarray_bytes is None: raise KeyError(store_path) # filenotfounderror? elif zarr_format == 3: - print("*******", (store_path / ZARR_JSON)) zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: raise KeyError(store_path) # filenotfounderror? elif zarr_format is None: - print("$$$$$$", (store_path / ZARR_JSON)) zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather( (store_path / ZARR_JSON).get(), (store_path / ZARRAY_JSON).get(), diff --git a/src/zarr/group.py b/src/zarr/group.py index 55390bac9e..4dff2dc302 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -295,7 +295,7 @@ async def create_array( self, path: str, shape: ChunkCoords, - dtype: npt.DTypeLike, + dtype: npt.DTypeLike = "float64", fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only diff --git a/src/zarr/store/core.py b/src/zarr/store/core.py index 17dfa79b36..60a9bdf238 100644 --- a/src/zarr/store/core.py +++ b/src/zarr/store/core.py @@ -73,9 +73,8 @@ def make_store_path(store_like: StoreLike | None, *, mode: OpenMode | None = Non return StorePath(store_like) elif store_like is None: if mode is None: - mode = "r" + mode = "w" # exception to the default mode = 'r' return StorePath(MemoryStore(mode=mode)) elif isinstance(store_like, str): - assert mode is not None - return StorePath(LocalStore(Path(store_like), mode=mode)) + return StorePath(LocalStore(Path(store_like), mode=mode or "r")) raise TypeError diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py new file mode 100644 index 0000000000..31e6fbfcd9 --- /dev/null +++ b/tests/v3/test_api.py @@ -0,0 +1,778 @@ +import numpy as np +import pytest +from numpy.testing import assert_array_equal + +import zarr +from zarr import Array, Group +from zarr.abc.store import Store +from zarr.api.synchronous import load, open, open_group, save, save_array, save_group + + +def test_open_array(memory_store: Store) -> None: + store = memory_store + + # open array, create if doesn't exist + z = open(store=store, shape=100) + assert isinstance(z, Array) + assert z.shape == (100,) + + # open array, overwrite + store._store_dict = {} + z = open(store=store, shape=200, mode="w") # mode="w" + assert isinstance(z, Array) + assert z.shape == (200,) + + # open array, read-only + ro_store = type(store)(store_dict=store._store_dict, mode="r") + z = open(store=ro_store) + assert isinstance(z, Array) + assert z.shape == (200,) + assert z.read_only + + # path not found + with pytest.raises(ValueError): + open(store="doesnotexist", mode="r") + + +def test_open_group(memory_store: Store) -> None: + store = memory_store + + # open group, create if doesn't exist + g = open_group(store=store) + g.create_group("foo") + assert isinstance(g, Group) + assert "foo" in g + + # open group, overwrite + # g = open_group(store=store) + # assert isinstance(g, Group) + # assert "foo" not in g + + # open group, read-only + ro_store = type(store)(store_dict=store._store_dict, mode="r") + g = open_group(store=ro_store) + assert isinstance(g, Group) + # assert g.read_only + + +def test_save_errors() -> None: + with pytest.raises(ValueError): + # no arrays provided + save_group("data/group.zarr") + with pytest.raises(TypeError): + # no array provided + save_array("data/group.zarr") + with pytest.raises(ValueError): + # no arrays provided + save("data/group.zarr") + + +# def test_lazy_loader(): +# foo = np.arange(100) +# bar = np.arange(100, 0, -1) +# store = "data/group.zarr" +# save(store, foo=foo, bar=bar) +# loader = load(store) +# assert "foo" in loader +# assert "bar" in loader +# assert "baz" not in loader +# assert len(loader) == 2 +# assert sorted(loader) == ["bar", "foo"] +# assert_array_equal(foo, loader["foo"]) +# assert_array_equal(bar, loader["bar"]) +# assert "LazyLoader: " in repr(loader) + + +def test_load_array(memory_store: Store) -> None: + store = memory_store + foo = np.arange(100) + bar = np.arange(100, 0, -1) + save(store, foo=foo, bar=bar) + + # can also load arrays directly into a numpy array + for array_name in ["foo", "bar"]: + array = load(store, path=array_name) + assert isinstance(array, np.ndarray) + if array_name == "foo": + assert_array_equal(foo, array) + else: + assert_array_equal(bar, array) + + +def test_tree() -> None: + g1 = zarr.group() + g1.create_group("foo") + g3 = g1.create_group("bar") + g3.create_group("baz") + g5 = g3.create_group("qux") + g5.create_array("baz", shape=100, chunks=10) + # TODO: complete after tree has been reimplemented + # assert repr(zarr.tree(g1)) == repr(g1.tree()) + # assert str(zarr.tree(g1)) == str(g1.tree()) + + +# @pytest.mark.parametrize("stores_from_path", [False, True]) +# @pytest.mark.parametrize( +# "with_chunk_store,listable", +# [(False, True), (True, True), (False, False)], +# ids=["default-listable", "with_chunk_store-listable", "default-unlistable"], +# ) +# def test_consolidate_metadata(with_chunk_store, listable, monkeypatch, stores_from_path): +# # setup initial data +# if stores_from_path: +# store = tempfile.mkdtemp() +# atexit.register(atexit_rmtree, store) +# if with_chunk_store: +# chunk_store = tempfile.mkdtemp() +# atexit.register(atexit_rmtree, chunk_store) +# else: +# chunk_store = None +# else: +# store = MemoryStore() +# chunk_store = MemoryStore() if with_chunk_store else None +# path = None +# z = group(store, chunk_store=chunk_store, path=path) + +# # Reload the actual store implementation in case str +# store_to_copy = z.store + +# z.create_group("g1") +# g2 = z.create_group("g2") +# g2.attrs["hello"] = "world" +# arr = g2.create_array("arr", shape=(20, 20), chunks=(5, 5), dtype="f8") +# assert 16 == arr.nchunks +# assert 0 == arr.nchunks_initialized +# arr.attrs["data"] = 1 +# arr[:] = 1.0 +# assert 16 == arr.nchunks_initialized + +# if stores_from_path: +# # get the actual store class for use with consolidate_metadata +# store_class = z._store +# else: +# store_class = store + +# # perform consolidation +# out = consolidate_metadata(store_class, path=path) +# assert isinstance(out, Group) +# assert ["g1", "g2"] == list(out) +# if not stores_from_path: +# assert isinstance(out._store, ConsolidatedMetadataStore) +# assert ".zmetadata" in store +# meta_keys = [ +# ".zgroup", +# "g1/.zgroup", +# "g2/.zgroup", +# "g2/.zattrs", +# "g2/arr/.zarray", +# "g2/arr/.zattrs", +# ] + +# for key in meta_keys: +# del store[key] + +# # https://github.com/zarr-developers/zarr-python/issues/993 +# # Make sure we can still open consolidated on an unlistable store: +# if not listable: +# fs_memory = pytest.importorskip("fsspec.implementations.memory") +# monkeypatch.setattr(fs_memory.MemoryFileSystem, "isdir", lambda x, y: False) +# monkeypatch.delattr(fs_memory.MemoryFileSystem, "ls") +# fs = fs_memory.MemoryFileSystem() +# store_to_open = FSStore("", fs=fs) +# # copy original store to new unlistable store +# store_to_open.update(store_to_copy) + +# else: +# store_to_open = store + +# # open consolidated +# z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path) +# assert ["g1", "g2"] == list(z2) +# assert "world" == z2.g2.attrs["hello"] +# assert 1 == z2.g2.arr.attrs["data"] +# assert (z2.g2.arr[:] == 1.0).all() +# assert 16 == z2.g2.arr.nchunks +# if listable: +# assert 16 == z2.g2.arr.nchunks_initialized +# else: +# with pytest.raises(NotImplementedError): +# _ = z2.g2.arr.nchunks_initialized + +# if stores_from_path: +# # path string is note a BaseStore subclass so cannot be used to +# # initialize a ConsolidatedMetadataStore. + +# with pytest.raises(ValueError): +# cmd = ConsolidatedMetadataStore(store) +# else: +# # tests del/write on the store + +# cmd = ConsolidatedMetadataStore(store) +# with pytest.raises(PermissionError): +# del cmd[".zgroup"] +# with pytest.raises(PermissionError): +# cmd[".zgroup"] = None + +# # test getsize on the store +# assert isinstance(getsize(cmd), Integral) + +# # test new metadata are not writeable +# with pytest.raises(PermissionError): +# z2.create_group("g3") +# with pytest.raises(PermissionError): +# z2.create_dataset("spam", shape=42, chunks=7, dtype="i4") +# with pytest.raises(PermissionError): +# del z2["g2"] + +# # test consolidated metadata are not writeable +# with pytest.raises(PermissionError): +# z2.g2.attrs["hello"] = "universe" +# with pytest.raises(PermissionError): +# z2.g2.arr.attrs["foo"] = "bar" + +# # test the data are writeable +# z2.g2.arr[:] = 2 +# assert (z2.g2.arr[:] == 2).all() + +# # test invalid modes +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="a", path=path) +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="w", path=path) +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="w-", path=path) + +# # make sure keyword arguments are passed through without error +# open_consolidated( +# store, +# chunk_store=chunk_store, +# path=path, +# cache_attrs=True, +# synchronizer=None, +# ) + + +# @pytest.mark.parametrize( +# "options", +# ( +# {"dimension_separator": "/"}, +# {"dimension_separator": "."}, +# {"dimension_separator": None}, +# ), +# ) +# def test_save_array_separator(tmpdir, options): +# data = np.arange(6).reshape((3, 2)) +# url = tmpdir.join("test.zarr") +# save_array(url, data, **options) + + +# class TestCopyStore(unittest.TestCase): +# _version = 2 + +# def setUp(self): +# source = dict() +# source["foo"] = b"xxx" +# source["bar/baz"] = b"yyy" +# source["bar/qux"] = b"zzz" +# self.source = source + +# def _get_dest_store(self): +# return dict() + +# def test_no_paths(self): +# source = self.source +# dest = self._get_dest_store() +# copy_store(source, dest) +# assert len(source) == len(dest) +# for key in source: +# assert source[key] == dest[key] + +# def test_source_path(self): +# source = self.source +# # paths should be normalized +# for source_path in "bar", "bar/", "/bar", "/bar/": +# dest = self._get_dest_store() +# copy_store(source, dest, source_path=source_path) +# assert 2 == len(dest) +# for key in source: +# if key.startswith("bar/"): +# dest_key = key.split("bar/")[1] +# assert source[key] == dest[dest_key] +# else: +# assert key not in dest + +# def test_dest_path(self): +# source = self.source +# # paths should be normalized +# for dest_path in "new", "new/", "/new", "/new/": +# dest = self._get_dest_store() +# copy_store(source, dest, dest_path=dest_path) +# assert len(source) == len(dest) +# for key in source: +# if self._version == 3: +# dest_key = key[:10] + "new/" + key[10:] +# else: +# dest_key = "new/" + key +# assert source[key] == dest[dest_key] + +# def test_source_dest_path(self): +# source = self.source +# # paths should be normalized +# for source_path in "bar", "bar/", "/bar", "/bar/": +# for dest_path in "new", "new/", "/new", "/new/": +# dest = self._get_dest_store() +# copy_store(source, dest, source_path=source_path, dest_path=dest_path) +# assert 2 == len(dest) +# for key in source: +# if key.startswith("bar/"): +# dest_key = "new/" + key.split("bar/")[1] +# assert source[key] == dest[dest_key] +# else: +# assert key not in dest +# assert ("new/" + key) not in dest + +# def test_excludes_includes(self): +# source = self.source + +# # single excludes +# dest = self._get_dest_store() +# excludes = "f.*" +# copy_store(source, dest, excludes=excludes) +# assert len(dest) == 2 + +# root = "" +# assert root + "foo" not in dest + +# # multiple excludes +# dest = self._get_dest_store() +# excludes = "b.z", ".*x" +# copy_store(source, dest, excludes=excludes) +# assert len(dest) == 1 +# assert root + "foo" in dest +# assert root + "bar/baz" not in dest +# assert root + "bar/qux" not in dest + +# # excludes and includes +# dest = self._get_dest_store() +# excludes = "b.*" +# includes = ".*x" +# copy_store(source, dest, excludes=excludes, includes=includes) +# assert len(dest) == 2 +# assert root + "foo" in dest +# assert root + "bar/baz" not in dest +# assert root + "bar/qux" in dest + +# def test_dry_run(self): +# source = self.source +# dest = self._get_dest_store() +# copy_store(source, dest, dry_run=True) +# assert 0 == len(dest) + +# def test_if_exists(self): +# source = self.source +# dest = self._get_dest_store() +# root = "" +# dest[root + "bar/baz"] = b"mmm" + +# # default ('raise') +# with pytest.raises(CopyError): +# copy_store(source, dest) + +# # explicit 'raise' +# with pytest.raises(CopyError): +# copy_store(source, dest, if_exists="raise") + +# # skip +# copy_store(source, dest, if_exists="skip") +# assert 3 == len(dest) +# assert dest[root + "foo"] == b"xxx" +# assert dest[root + "bar/baz"] == b"mmm" +# assert dest[root + "bar/qux"] == b"zzz" + +# # replace +# copy_store(source, dest, if_exists="replace") +# assert 3 == len(dest) +# assert dest[root + "foo"] == b"xxx" +# assert dest[root + "bar/baz"] == b"yyy" +# assert dest[root + "bar/qux"] == b"zzz" + +# # invalid option +# with pytest.raises(ValueError): +# copy_store(source, dest, if_exists="foobar") + + +# def check_copied_array(original, copied, without_attrs=False, expect_props=None): +# # setup +# source_h5py = original.__module__.startswith("h5py.") +# dest_h5py = copied.__module__.startswith("h5py.") +# zarr_to_zarr = not (source_h5py or dest_h5py) +# h5py_to_h5py = source_h5py and dest_h5py +# zarr_to_h5py = not source_h5py and dest_h5py +# h5py_to_zarr = source_h5py and not dest_h5py +# if expect_props is None: +# expect_props = dict() +# else: +# expect_props = expect_props.copy() + +# # common properties in zarr and h5py +# for p in "dtype", "shape", "chunks": +# expect_props.setdefault(p, getattr(original, p)) + +# # zarr-specific properties +# if zarr_to_zarr: +# for p in "compressor", "filters", "order", "fill_value": +# expect_props.setdefault(p, getattr(original, p)) + +# # h5py-specific properties +# if h5py_to_h5py: +# for p in ( +# "maxshape", +# "compression", +# "compression_opts", +# "shuffle", +# "scaleoffset", +# "fletcher32", +# "fillvalue", +# ): +# expect_props.setdefault(p, getattr(original, p)) + +# # common properties with some name differences +# if h5py_to_zarr: +# expect_props.setdefault("fill_value", original.fillvalue) +# if zarr_to_h5py: +# expect_props.setdefault("fillvalue", original.fill_value) + +# # compare properties +# for k, v in expect_props.items(): +# assert v == getattr(copied, k) + +# # compare data +# assert_array_equal(original[:], copied[:]) + +# # compare attrs +# if without_attrs: +# for k in original.attrs.keys(): +# assert k not in copied.attrs +# else: +# if dest_h5py and "filters" in original.attrs: +# # special case in v3 (storing filters metadata under attributes) +# # we explicitly do not copy this info over to HDF5 +# original_attrs = original.attrs.asdict().copy() +# original_attrs.pop("filters") +# else: +# original_attrs = original.attrs +# assert sorted(original_attrs.items()) == sorted(copied.attrs.items()) + + +# def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): +# # setup +# if expect_props is None: +# expect_props = dict() +# else: +# expect_props = expect_props.copy() + +# # compare children +# for k, v in original.items(): +# if hasattr(v, "shape"): +# assert k in copied +# check_copied_array(v, copied[k], without_attrs=without_attrs, expect_props=expect_props) +# elif shallow: +# assert k not in copied +# else: +# assert k in copied +# check_copied_group( +# v, +# copied[k], +# without_attrs=without_attrs, +# shallow=shallow, +# expect_props=expect_props, +# ) + +# # compare attrs +# if without_attrs: +# for k in original.attrs.keys(): +# assert k not in copied.attrs +# else: +# assert sorted(original.attrs.items()) == sorted(copied.attrs.items()) + + +# def test_copy_all(): +# """ +# https://github.com/zarr-developers/zarr-python/issues/269 + +# copy_all used to not copy attributes as `.keys()` does not return hidden `.zattrs`. + +# """ +# original_group = zarr.group(store=MemoryStore(), overwrite=True) +# original_group.attrs["info"] = "group attrs" +# original_subgroup = original_group.create_group("subgroup") +# original_subgroup.attrs["info"] = "sub attrs" + +# destination_group = zarr.group(store=MemoryStore(), overwrite=True) + +# # copy from memory to directory store +# copy_all( +# original_group, +# destination_group, +# dry_run=False, +# ) + +# assert "subgroup" in destination_group +# assert destination_group.attrs["info"] == "group attrs" +# assert destination_group.subgroup.attrs["info"] == "sub attrs" + + +# class TestCopy: +# @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) +# def source(self, request, tmpdir): +# def prep_source(source): +# foo = source.create_group("foo") +# foo.attrs["experiment"] = "weird science" +# baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) +# baz.attrs["units"] = "metres" +# if request.param: +# extra_kws = dict( +# compression="gzip", +# compression_opts=3, +# fillvalue=84, +# shuffle=True, +# fletcher32=True, +# ) +# else: +# extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) +# source.create_dataset( +# "spam", +# data=np.arange(100, 200).reshape(20, 5), +# chunks=(10, 2), +# dtype="i2", +# **extra_kws, +# ) +# return source + +# if request.param: +# h5py = pytest.importorskip("h5py") +# fn = tmpdir.join("source.h5") +# with h5py.File(str(fn), mode="w") as h5f: +# yield prep_source(h5f) +# else: +# yield prep_source(group()) + +# @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) +# def dest(self, request, tmpdir): +# if request.param: +# h5py = pytest.importorskip("h5py") +# fn = tmpdir.join("dest.h5") +# with h5py.File(str(fn), mode="w") as h5f: +# yield h5f +# else: +# yield group() + +# def test_copy_array(self, source, dest): +# # copy array with default options +# copy(source["foo/bar/baz"], dest) +# check_copied_array(source["foo/bar/baz"], dest["baz"]) +# copy(source["spam"], dest) +# check_copied_array(source["spam"], dest["spam"]) + +# def test_copy_bad_dest(self, source, dest): +# # try to copy to an array, dest must be a group +# dest = dest.create_dataset("eggs", shape=(100,)) +# with pytest.raises(ValueError): +# copy(source["foo/bar/baz"], dest) + +# def test_copy_array_name(self, source, dest): +# # copy array with name +# copy(source["foo/bar/baz"], dest, name="qux") +# assert "baz" not in dest +# check_copied_array(source["foo/bar/baz"], dest["qux"]) + +# def test_copy_array_create_options(self, source, dest): +# dest_h5py = dest.__module__.startswith("h5py.") + +# # copy array, provide creation options +# compressor = Zlib(9) +# create_kws = dict(chunks=(10,)) +# if dest_h5py: +# create_kws.update( +# compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 +# ) +# else: +# create_kws.update(compressor=compressor, fill_value=42, order="F", filters=[Adler32()]) +# copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) +# check_copied_array( +# source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws +# ) + +# def test_copy_array_exists_array(self, source, dest): +# # copy array, dest array in the way +# dest.create_dataset("baz", shape=(10,)) + +# # raise +# with pytest.raises(CopyError): +# # should raise by default +# copy(source["foo/bar/baz"], dest) +# assert (10,) == dest["baz"].shape +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest, if_exists="raise") +# assert (10,) == dest["baz"].shape + +# # skip +# copy(source["foo/bar/baz"], dest, if_exists="skip") +# assert (10,) == dest["baz"].shape + +# # replace +# copy(source["foo/bar/baz"], dest, if_exists="replace") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# # invalid option +# with pytest.raises(ValueError): +# copy(source["foo/bar/baz"], dest, if_exists="foobar") + +# def test_copy_array_exists_group(self, source, dest): +# # copy array, dest group in the way +# dest.create_group("baz") + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest) +# assert not hasattr(dest["baz"], "shape") +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest, if_exists="raise") +# assert not hasattr(dest["baz"], "shape") + +# # skip +# copy(source["foo/bar/baz"], dest, if_exists="skip") +# assert not hasattr(dest["baz"], "shape") + +# # replace +# copy(source["foo/bar/baz"], dest, if_exists="replace") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# def test_copy_array_skip_initialized(self, source, dest): +# dest_h5py = dest.__module__.startswith("h5py.") + +# dest.create_dataset("baz", shape=(100,), chunks=(10,), dtype="i8") +# assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) + +# if dest_h5py: +# with pytest.raises(ValueError): +# # not available with copy to h5py +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") + +# else: +# # copy array, dest array exists but not yet initialized +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# # copy array, dest array exists and initialized, will be skipped +# dest["baz"][:] = np.arange(100, 200) +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") +# assert_array_equal(np.arange(100, 200), dest["baz"][:]) +# assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) + +# def test_copy_group(self, source, dest): +# # copy group, default options +# copy(source["foo"], dest) +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_no_name(self, source, dest): +# with pytest.raises(TypeError): +# # need a name if copy root +# copy(source, dest) + +# copy(source, dest, name="root") +# check_copied_group(source, dest["root"]) + +# def test_copy_group_options(self, source, dest): +# # copy group, non-default options +# copy(source["foo"], dest, name="qux", without_attrs=True) +# assert "foo" not in dest +# check_copied_group(source["foo"], dest["qux"], without_attrs=True) + +# def test_copy_group_shallow(self, source, dest): +# # copy group, shallow +# copy(source, dest, name="eggs", shallow=True) +# check_copied_group(source, dest["eggs"], shallow=True) + +# def test_copy_group_exists_group(self, source, dest): +# # copy group, dest groups exist +# dest.create_group("foo/bar") +# copy(source["foo"], dest) +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_exists_array(self, source, dest): +# # copy group, dest array in the way +# dest.create_dataset("foo/bar", shape=(10,)) + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo"], dest) +# assert dest["foo/bar"].shape == (10,) +# with pytest.raises(CopyError): +# copy(source["foo"], dest, if_exists="raise") +# assert dest["foo/bar"].shape == (10,) + +# # skip +# copy(source["foo"], dest, if_exists="skip") +# assert dest["foo/bar"].shape == (10,) + +# # replace +# copy(source["foo"], dest, if_exists="replace") +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_dry_run(self, source, dest): +# # dry run, empty destination +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, return_stats=True +# ) +# assert 0 == len(dest) +# assert 3 == n_copied +# assert 0 == n_skipped +# assert 0 == n_bytes_copied + +# # dry run, array exists in destination +# baz = np.arange(100, 200) +# dest.create_dataset("foo/bar/baz", data=baz) +# assert not np.all(source["foo/bar/baz"][:] == dest["foo/bar/baz"][:]) +# assert 1 == len(dest) + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo"], dest, dry_run=True) +# assert 1 == len(dest) + +# # skip +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, if_exists="skip", return_stats=True +# ) +# assert 1 == len(dest) +# assert 2 == n_copied +# assert 1 == n_skipped +# assert 0 == n_bytes_copied +# assert_array_equal(baz, dest["foo/bar/baz"]) + +# # replace +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, if_exists="replace", return_stats=True +# ) +# assert 1 == len(dest) +# assert 3 == n_copied +# assert 0 == n_skipped +# assert 0 == n_bytes_copied +# assert_array_equal(baz, dest["foo/bar/baz"]) + +# def test_logging(self, source, dest, tmpdir): +# # callable log +# copy(source["foo"], dest, dry_run=True, log=print) + +# # file name +# fn = str(tmpdir.join("log_name")) +# copy(source["foo"], dest, dry_run=True, log=fn) + +# # file +# with tmpdir.join("log_file").open(mode="w") as f: +# copy(source["foo"], dest, dry_run=True, log=f) + +# # bad option +# with pytest.raises(TypeError): +# copy(source["foo"], dest, dry_run=True, log=True) From a4282f775e186900f74b6d887a29e21871cedb6c Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 31 May 2024 22:31:43 -0700 Subject: [PATCH 08/11] docs and missing store utils file --- src/zarr/api/asynchronous.py | 130 +++++++++++++++++++++++++++++++---- src/zarr/store/utils.py | 25 +++++++ 2 files changed, 141 insertions(+), 14 deletions(-) create mode 100644 src/zarr/store/utils.py diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 3afa2acdba..110ac0f848 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -27,6 +27,7 @@ def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ShapeLike | None, ChunkCoords | None]: + """helper function to get the shape and chunks from an array-like object""" shape = None chunks = None @@ -44,6 +45,7 @@ def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ShapeLike | None, ChunkCoords def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> None: + """set default values for shape and chunks if they are not present in the array-like object""" shape, chunks = _get_shape_chunks(a) if shape is not None: kwargs.setdefault("shape", shape) @@ -73,6 +75,7 @@ def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> None: def _handle_zarr_version_or_format( *, zarr_version: ZarrFormat | None, zarr_format: ZarrFormat | None ) -> ZarrFormat | None: + """handle the deprecated zarr_version kwarg and return zarr_format""" if zarr_format is not None and zarr_version is not None and zarr_format != zarr_version: raise ValueError( f"zarr_format {zarr_format} does not match zarr_version {zarr_version}, please only set one" @@ -86,6 +89,7 @@ def _handle_zarr_version_or_format( def _default_zarr_version() -> ZarrFormat: + """return the default zarr_version""" # TODO: set default value from config return 3 @@ -177,8 +181,8 @@ async def open( Returns ------- - z : AsyncArray or AsyncGroup - Array or group, depending on what exists in the given store. + z : array or group + Return type depends on what exists in the given store. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) store_path = make_store_path(store, mode=mode) @@ -317,8 +321,17 @@ async def tree(*args: Any, **kwargs: Any) -> None: async def array(data: NDArrayLike, **kwargs: Any) -> AsyncArray: """Create an array filled with `data`. - The `data` argument should be a array-like object. For - other parameter definitions see :func:`zarr.api.asynchronous.create`. + Parameters + ---------- + data : array_like + The data to fill the array with. + kwargs + Passed through to :func:`create`. + + Returns + ------- + array : array + The new array. """ # ensure data is array-like @@ -395,7 +408,8 @@ async def group( Returns ------- - g : AsyncGroup + g : group + The new group. """ zarr_format = ( @@ -474,7 +488,8 @@ async def open_group( Returns ------- - g : AsyncGroup + g : group + The new group. """ zarr_format = ( @@ -616,7 +631,8 @@ async def create( Returns ------- - z : zarr.core.Array + z : array + The array. """ zarr_format = ( _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) @@ -686,7 +702,12 @@ async def create( async def empty(shape: ShapeLike, **kwargs: Any) -> AsyncArray: """Create an empty array. - For parameter definitions see :func:`zarr.api.asynchronous.create`. + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. Notes ----- @@ -698,7 +719,20 @@ async def empty(shape: ShapeLike, **kwargs: Any) -> AsyncArray: async def empty_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: - """Create an empty array like `a`.""" + """Create an empty array like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ _like_args(a, kwargs) return await empty(**kwargs) @@ -708,14 +742,39 @@ async def full(shape: ShapeLike, fill_value: Any, **kwargs: Any) -> AsyncArray: """Create an array, with `fill_value` being used as the default value for uninitialized portions of the array. - For parameter definitions see :func:`zarr.api.asynchronous.create`. + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + fill_value : scalar + Fill value. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. """ return await create(shape=shape, fill_value=fill_value, **kwargs) # TODO: add type annotations for kwargs async def full_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: - """Create a filled array like `a`.""" + """Create a filled array like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ _like_args(a, kwargs) if isinstance(a, AsyncArray): kwargs.setdefault("fill_value", a.metadata.fill_value) @@ -726,7 +785,12 @@ async def ones(shape: ShapeLike, **kwargs: Any) -> AsyncArray: """Create an array, with one being used as the default value for uninitialized portions of the array. - For parameter definitions see :func:`zarr.creation.create`. + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. Returns ------- @@ -737,7 +801,20 @@ async def ones(shape: ShapeLike, **kwargs: Any) -> AsyncArray: async def ones_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: - """Create an array of ones like `a`.""" + """Create an array of ones like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ _like_args(a, kwargs) return await ones(**kwargs) @@ -813,11 +890,36 @@ async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AsyncArray: async def zeros(shape: ShapeLike, **kwargs: Any) -> AsyncArray: """Create an array, with zero being used as the default value for uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. """ return await create(shape=shape, fill_value=0, **kwargs) async def zeros_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: - """Create an array of zeros like `a`.""" + """Create an array of zeros like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ _like_args(a, kwargs) return await zeros(**kwargs) diff --git a/src/zarr/store/utils.py b/src/zarr/store/utils.py new file mode 100644 index 0000000000..17c9234221 --- /dev/null +++ b/src/zarr/store/utils.py @@ -0,0 +1,25 @@ +from zarr.buffer import Buffer + + +def _normalize_interval_index( + data: Buffer, interval: None | tuple[int | None, int | None] +) -> tuple[int, int]: + """ + Convert an implicit interval into an explicit start and length + """ + if interval is None: + start = 0 + length = len(data) + else: + maybe_start, maybe_len = interval + if maybe_start is None: + start = 0 + else: + start = maybe_start + + if maybe_len is None: + length = len(data) - start + else: + length = maybe_len + + return (start, length) From cdf2cbd1b0b5012a4a1f3bfee53a74d230478fe4 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 31 May 2024 22:38:56 -0700 Subject: [PATCH 09/11] fix parse shapelike test --- tests/v3/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_common.py b/tests/v3/test_common.py index 3bdbd2bffe..cc33aa75cf 100644 --- a/tests/v3/test_common.py +++ b/tests/v3/test_common.py @@ -64,7 +64,7 @@ def parse_indexing_order_valid(data: Literal["C", "F"]): assert parse_indexing_order(data) == data -@pytest.mark.parametrize("data", [10, ("0", 1, 2, 3), {"0": "0"}, []]) +@pytest.mark.parametrize("data", [("0", 1, 2, 3), {"0": "0"}, []]) def test_parse_shapelike_invalid(data: Any): if isinstance(data, Iterable): if len(data) == 0: From 4fd06cd40d4ef56993b790dceba832da6941f536 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 3 Jun 2024 09:53:56 -0700 Subject: [PATCH 10/11] fix bad merge --- src/zarr/metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 6fced083d5..ab4952b99b 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -120,7 +120,6 @@ class ArrayMetadata(Metadata, ABC): shape: ChunkCoords fill_value: Any chunk_grid: ChunkGrid - fill_value: Any attributes: dict[str, JSON] zarr_format: ZarrFormat From 8bb00bf12bfd359a697521ceaa9de1579dd71405 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 4 Jun 2024 23:01:02 -0700 Subject: [PATCH 11/11] respond to reviews --- src/zarr/api/asynchronous.py | 62 +++++++++++++++++++----------------- src/zarr/api/synchronous.py | 10 +++--- src/zarr/common.py | 2 +- 3 files changed, 38 insertions(+), 36 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 110ac0f848..52d07fb6fe 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -12,7 +12,7 @@ from zarr.array import Array, AsyncArray from zarr.buffer import NDArrayLike from zarr.chunk_key_encodings import ChunkKeyEncoding -from zarr.common import JSON, MEMORY_ORDER, ChunkCoords, OpenMode, ZarrFormat +from zarr.common import JSON, ChunkCoords, MemoryOrder, OpenMode, ZarrFormat from zarr.group import AsyncGroup from zarr.metadata import ArrayV2Metadata, ArrayV3Metadata from zarr.store import ( @@ -20,13 +20,12 @@ make_store_path, ) -ShapeLike = tuple[int, ...] # TODO: support int for shape # TODO: this type could use some more thought, noqa to avoid "Variable "asynchronous.ArrayLike" is not valid as a type" ArrayLike = Union[AsyncArray | Array | npt.NDArray[Any]] # noqa PathLike = str -def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ShapeLike | None, ChunkCoords | None]: +def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ChunkCoords | None, ChunkCoords | None]: """helper function to get the shape and chunks from an array-like object""" shape = None chunks = None @@ -44,33 +43,38 @@ def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ShapeLike | None, ChunkCoords return shape, chunks -def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> None: +def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> dict[str, Any]: """set default values for shape and chunks if they are not present in the array-like object""" + + new = kwargs.copy() + shape, chunks = _get_shape_chunks(a) if shape is not None: - kwargs.setdefault("shape", shape) + new["shape"] = shape if chunks is not None: - kwargs.setdefault("chunks", chunks) + new["chunks"] = chunks if hasattr(a, "dtype"): - kwargs.setdefault("dtype", a.dtype) + new["dtype"] = a.dtype if isinstance(a, AsyncArray): - kwargs.setdefault("order", a.order) + new["order"] = a.order if isinstance(a.metadata, ArrayV2Metadata): - kwargs.setdefault("compressor", a.metadata.compressor) - kwargs.setdefault("filters", a.metadata.filters) + new["compressor"] = a.metadata.compressor + new["filters"] = a.metadata.filters if isinstance(a.metadata, ArrayV3Metadata): - kwargs.setdefault("codecs", a.metadata.codecs) + new["codecs"] = a.metadata.codecs else: raise ValueError(f"Unsupported zarr format: {a.metadata.zarr_format}") else: # TODO: set default values compressor/codecs # to do this, we may need to evaluate if this is a v2 or v3 array - # kwargs.setdefault("compressor", "default") + # new["compressor"] = "default" pass + return new + def _handle_zarr_version_or_format( *, zarr_version: ZarrFormat | None, zarr_format: ZarrFormat | None @@ -524,13 +528,13 @@ async def open_group( async def create( - shape: ShapeLike, + shape: ChunkCoords, *, # Note: this is a change from v2 - chunks: ShapeLike | None = None, # TODO: v2 allowed chunks=True + chunks: ChunkCoords | None = None, # TODO: v2 allowed chunks=True dtype: npt.DTypeLike | None = None, compressor: dict[str, JSON] | None = None, # TODO: default and type change fill_value: Any = 0, # TODO: need type - order: MEMORY_ORDER | None = None, # TODO: default change + order: MemoryOrder | None = None, # TODO: default change store: str | StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, @@ -699,7 +703,7 @@ async def create( ) -async def empty(shape: ShapeLike, **kwargs: Any) -> AsyncArray: +async def empty(shape: ChunkCoords, **kwargs: Any) -> AsyncArray: """Create an empty array. Parameters @@ -733,12 +737,12 @@ async def empty_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: Array The new array. """ - _like_args(a, kwargs) - return await empty(**kwargs) + like_kwargs = _like_args(a, kwargs) + return await empty(**like_kwargs) # TODO: add type annotations for fill_value and kwargs -async def full(shape: ShapeLike, fill_value: Any, **kwargs: Any) -> AsyncArray: +async def full(shape: ChunkCoords, fill_value: Any, **kwargs: Any) -> AsyncArray: """Create an array, with `fill_value` being used as the default value for uninitialized portions of the array. @@ -775,13 +779,13 @@ async def full_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: Array The new array. """ - _like_args(a, kwargs) + like_kwargs = _like_args(a, kwargs) if isinstance(a, AsyncArray): kwargs.setdefault("fill_value", a.metadata.fill_value) - return await full(**kwargs) + return await full(**like_kwargs) -async def ones(shape: ShapeLike, **kwargs: Any) -> AsyncArray: +async def ones(shape: ChunkCoords, **kwargs: Any) -> AsyncArray: """Create an array, with one being used as the default value for uninitialized portions of the array. @@ -815,8 +819,8 @@ async def ones_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: Array The new array. """ - _like_args(a, kwargs) - return await ones(**kwargs) + like_kwargs = _like_args(a, kwargs) + return await ones(**like_kwargs) async def open_array( @@ -881,13 +885,13 @@ async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AsyncArray: AsyncArray The opened array. """ - _like_args(a, kwargs) + like_kwargs = _like_args(a, kwargs) if isinstance(a, (AsyncArray | Array)): kwargs.setdefault("fill_value", a.metadata.fill_value) - return await open_array(path=path, **kwargs) + return await open_array(path=path, **like_kwargs) -async def zeros(shape: ShapeLike, **kwargs: Any) -> AsyncArray: +async def zeros(shape: ChunkCoords, **kwargs: Any) -> AsyncArray: """Create an array, with zero being used as the default value for uninitialized portions of the array. @@ -921,5 +925,5 @@ async def zeros_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: Array The new array. """ - _like_args(a, kwargs) - return await zeros(**kwargs) + like_kwargs = _like_args(a, kwargs) + return await zeros(**like_kwargs) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 53a7a2f64f..57b9d5630f 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -5,7 +5,7 @@ import zarr.api.asynchronous as async_api from zarr.array import Array, AsyncArray from zarr.buffer import NDArrayLike -from zarr.common import JSON, OpenMode, ZarrFormat +from zarr.common import JSON, ChunkCoords, OpenMode, ZarrFormat from zarr.group import Group from zarr.store import StoreLike from zarr.sync import sync @@ -194,9 +194,8 @@ def create(*args: Any, **kwargs: Any) -> Array: return Array(sync(async_api.create(*args, **kwargs))) -# TODO: move shapelike to common module # TODO: add type annotations for kwargs -def empty(shape: async_api.ShapeLike, **kwargs: Any) -> Array: +def empty(shape: ChunkCoords, **kwargs: Any) -> Array: return Array(sync(async_api.empty(shape, **kwargs))) @@ -207,7 +206,7 @@ def empty_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: # TODO: add type annotations for kwargs and fill_value -def full(shape: async_api.ShapeLike, fill_value: Any, **kwargs: Any) -> Array: +def full(shape: ChunkCoords, fill_value: Any, **kwargs: Any) -> Array: return Array(sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs))) @@ -218,8 +217,7 @@ def full_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: # TODO: add type annotations for kwargs -# TODO: move ShapeLike to common module -def ones(shape: async_api.ShapeLike, **kwargs: Any) -> Array: +def ones(shape: ChunkCoords, **kwargs: Any) -> Array: return Array(sync(async_api.ones(shape, **kwargs))) diff --git a/src/zarr/common.py b/src/zarr/common.py index 787c3cd8e6..9349f9f018 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -34,7 +34,7 @@ Selection = slice | SliceSelection ZarrFormat = Literal[2, 3] JSON = None | str | int | float | Enum | dict[str, "JSON"] | list["JSON"] | tuple["JSON", ...] -MEMORY_ORDER = Literal["C", "F"] +MemoryOrder = Literal["C", "F"] OpenMode = Literal["r", "r+", "a", "w", "w-"]