From 5e57f75e30a01370b6d5c2b764cd4f1adcf5f47e Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 Sep 2024 08:28:59 -0700 Subject: [PATCH 01/21] fix: selection with zarr arrays (#2137) * fix: selection with zarr arrays * fixup * Update src/zarr/core/indexing.py Co-authored-by: Davis Bennett * Apply suggestions from code review * lint --------- Co-authored-by: Davis Bennett --- src/zarr/core/indexing.py | 27 ++++++++++++++++++++++++--- tests/v3/test_indexing.py | 17 +++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index caf57e212a..0ead269f44 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -35,7 +35,6 @@ ArrayOfIntOrBool = npt.NDArray[np.intp] | npt.NDArray[np.bool_] BasicSelector = int | slice | EllipsisType Selector = BasicSelector | ArrayOfIntOrBool - BasicSelection = BasicSelector | tuple[BasicSelector, ...] # also used for BlockIndex CoordinateSelection = IntSequence | tuple[IntSequence, ...] MaskSelection = npt.NDArray[np.bool_] @@ -75,6 +74,15 @@ def err_too_many_indices(selection: Any, shape: ChunkCoords) -> None: raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}") +def _zarr_array_to_int_or_bool_array(arr: Array) -> npt.NDArray[np.intp] | npt.NDArray[np.bool_]: + if arr.dtype.kind in ("i", "b"): + return np.asarray(arr) + else: + raise IndexError( + f"Invalid array dtype: {arr.dtype}. Arrays used as indices must be of integer or boolean type" + ) + + @runtime_checkable class Indexer(Protocol): shape: ChunkCoords @@ -842,7 +850,14 @@ def __iter__(self) -> Iterator[ChunkProjection]: class OIndex: array: Array - def __getitem__(self, selection: OrthogonalSelection) -> NDArrayLike: + # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool + def __getitem__(self, selection: OrthogonalSelection | Array) -> NDArrayLike: + from zarr.core.array import Array + + # if input is a Zarr array, we materialize it now. + if isinstance(selection, Array): + selection = _zarr_array_to_int_or_bool_array(selection) + fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) @@ -1130,7 +1145,13 @@ def __init__(self, selection: MaskSelection, shape: ChunkCoords, chunk_grid: Chu class VIndex: array: Array - def __getitem__(self, selection: CoordinateSelection | MaskSelection) -> NDArrayLike: + # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool + def __getitem__(self, selection: CoordinateSelection | MaskSelection | Array) -> NDArrayLike: + from zarr.core.array import Array + + # if input is a Zarr array, we materialize it now. + if isinstance(selection, Array): + selection = _zarr_array_to_int_or_bool_array(selection) fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index 8b509f93d1..81dc67f388 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -1861,3 +1861,20 @@ def test_orthogonal_bool_indexing_like_numpy_ix( # note: in python 3.10 z[*selection] is not valid unpacking syntax actual = z[(*selection,)] assert_array_equal(expected, actual, err_msg=f"{selection=}") + + +def test_indexing_with_zarr_array(store: StorePath) -> None: + # regression test for https://github.com/zarr-developers/zarr-python/issues/2133 + a = np.arange(10) + za = zarr.array(a, chunks=2, store=store, path="a") + ix = [False, True, False, True, False, True, False, True, False, True] + ii = [0, 2, 4, 5] + + zix = zarr.array(ix, chunks=2, store=store, dtype="bool", path="ix") + zii = zarr.array(ii, chunks=2, store=store, dtype="i4", path="ii") + assert_array_equal(a[ix], za[zix]) + assert_array_equal(a[ix], za.oindex[zix]) + assert_array_equal(a[ix], za.vindex[zix]) + + assert_array_equal(a[ii], za[zii]) + assert_array_equal(a[ii], za.oindex[zii]) From 4cbb17eb1c0e8aaca0fd32fa02de08dcdd1a01ab Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 00:09:47 +0200 Subject: [PATCH 02/21] Apply and enforce more ruff rules (#2053) * Apply ruff/Perflint rule PERF102 PERF102 When using only the keys of a dict use the `keys()` method * Apply ruff/Perflint rule PERF401 PERF401 Use an async list comprehension to create a transformed list * Apply ruff/flake8-pytest-style rule PT022 * Fix pre-commit warning Ignore lint rules conflicting with the ruff formatter * Apply ruff/pygrep-hooks rule PGH003 PGH003 Use specific rule codes when ignoring type issues * Apply ruff/pygrep-hooks rule PGH004 PGH004 Use specific rule codes when using `noqa` * Enforce ruff/pygrep-hooks rules (PGH) * Apply ruff/flake8-comprehensions rule C417 C417 Unnecessary `map` usage (rewrite using a generator expression) * Apply ruff/flake8-pyi rule PYI032 PYI032 Prefer `object` to `Any` for the second parameter to `__eq__` * Apply ruff/flake8-pyi rule PYI036 PYI036 Returning Any from function * Apply ruff/flake8-pyi rule PYI038 * Apply ruff/flake8-pyi rule PYI041 PYI041 Use `complex` instead of `float | complex` PYI041 Use `float` instead of `int | float` * Apply ruff/flake8-pyi rule PYI055 PYI055 Multiple `type` members in a union. Combine them into one. --- pyproject.toml | 28 ++++++++++++++++++---- src/zarr/abc/store.py | 10 ++++++-- src/zarr/api/asynchronous.py | 10 ++++---- src/zarr/codecs/transpose.py | 6 ++--- src/zarr/core/array.py | 7 +++--- src/zarr/core/attributes.py | 2 +- src/zarr/core/buffer/core.py | 6 ++--- src/zarr/core/buffer/cpu.py | 4 ++-- src/zarr/core/buffer/gpu.py | 4 ++-- src/zarr/core/group.py | 12 +++++----- src/zarr/core/indexing.py | 24 +++++++++++-------- src/zarr/core/metadata/v3.py | 12 +++++----- src/zarr/core/sync.py | 4 +--- src/zarr/registry.py | 8 +++---- src/zarr/store/common.py | 13 ++++------ src/zarr/store/local.py | 2 +- src/zarr/store/memory.py | 9 ++++--- src/zarr/store/remote.py | 3 ++- src/zarr/testing/strategies.py | 5 ++-- tests/v3/conftest.py | 7 +++--- tests/v3/test_array.py | 2 +- tests/v3/test_group.py | 12 +++++----- tests/v3/test_indexing.py | 8 +++---- tests/v3/test_metadata/test_v3.py | 2 +- tests/v3/test_properties.py | 9 +++---- tests/v3/test_store/test_remote.py | 5 +--- tests/v3/test_store/test_stateful_store.py | 6 ++--- tests/v3/test_v2.py | 2 +- 28 files changed, 119 insertions(+), 103 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 63a58ac795..ed6c25893c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -207,18 +207,36 @@ extend-exclude = [ [tool.ruff.lint] extend-select = [ - "B", # flake8-bugbear - "I", # isort - "ISC", - "UP", # pyupgrade - "RSE", + "B", # flake8-bugbear + "I", # isort + "ISC", # flake8-implicit-str-concat + "PGH", # pygrep-hooks + "PYI", # flake8-pyi + "RSE", # flake8-raise "RUF", "TCH", # flake8-type-checking "TRY", # tryceratops + "UP", # pyupgrade ] ignore = [ + "PYI013", "RUF005", "TRY003", + # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules + "W191", + "E111", + "E114", + "E117", + "D206", + "D300", + "Q000", + "Q001", + "Q002", + "Q003", + "COM812", + "COM819", + "ISC001", + "ISC002", ] [tool.mypy] diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index f95ba34efd..2f02ac36ad 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from asyncio import gather from collections.abc import AsyncGenerator, Iterable +from types import TracebackType from typing import Any, NamedTuple, Protocol, runtime_checkable from typing_extensions import Self @@ -35,7 +36,7 @@ class Store(ABC): _mode: AccessMode _is_open: bool - def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any): + def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any) -> None: self._is_open = False self._mode = AccessMode.from_literal(mode) @@ -49,7 +50,12 @@ def __enter__(self) -> Self: """Enter a context manager that will close the store upon exiting.""" return self - def __exit__(self, *args: Any) -> None: + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: """Close the store.""" self.close() diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 5fbb38c5e7..95adcf2936 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -2,7 +2,7 @@ import asyncio import warnings -from typing import TYPE_CHECKING, Any, Literal, Union, cast +from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np import numpy.typing as npt @@ -25,6 +25,10 @@ from zarr.core.buffer import NDArrayLike from zarr.core.chunk_key_encodings import ChunkKeyEncoding + # TODO: this type could use some more thought + ArrayLike = AsyncArray | Array | npt.NDArray[Any] + PathLike = str + __all__ = [ "consolidate_metadata", "copy", @@ -53,10 +57,6 @@ "zeros_like", ] -# TODO: this type could use some more thought, noqa to avoid "Variable "asynchronous.ArrayLike" is not valid as a type" -ArrayLike = Union[AsyncArray | Array | npt.NDArray[Any]] # noqa -PathLike = str - def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ChunkCoords | None, ChunkCoords | None]: """helper function to get the shape and chunks from an array-like object""" diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py index 45eb5bbe5f..40a4cdbf37 100644 --- a/src/zarr/codecs/transpose.py +++ b/src/zarr/codecs/transpose.py @@ -96,16 +96,14 @@ async def _decode_single( chunk_spec: ArraySpec, ) -> NDBuffer: inverse_order = np.argsort(self.order) - chunk_array = chunk_array.transpose(inverse_order) - return chunk_array + return chunk_array.transpose(inverse_order) async def _encode_single( self, chunk_array: NDBuffer, _chunk_spec: ArraySpec, ) -> NDBuffer | None: - chunk_array = chunk_array.transpose(self.order) - return chunk_array + return chunk_array.transpose(self.order) def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: return input_byte_length diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index b825ca4ca1..7d1aa33087 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -110,7 +110,7 @@ def __init__( metadata: ArrayMetadata, store_path: StorePath, order: Literal["C", "F"] | None = None, - ): + ) -> None: metadata_parsed = parse_array_metadata(metadata) order_parsed = parse_indexing_order(order or config.get("array.order")) @@ -294,7 +294,7 @@ async def _create_v2( dtype: npt.DTypeLike, chunks: ChunkCoords, dimension_separator: Literal[".", "/"] | None = None, - fill_value: None | int | float = None, + fill_value: None | float = None, order: Literal["C", "F"] | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, @@ -331,8 +331,7 @@ def from_dict( data: dict[str, JSON], ) -> AsyncArray: metadata = parse_array_metadata(data) - async_array = cls(metadata=metadata, store_path=store_path) - return async_array + return cls(metadata=metadata, store_path=store_path) @classmethod async def open( diff --git a/src/zarr/core/attributes.py b/src/zarr/core/attributes.py index 913a4b74ed..7f9864d1b5 100644 --- a/src/zarr/core/attributes.py +++ b/src/zarr/core/attributes.py @@ -13,7 +13,7 @@ class Attributes(MutableMapping[str, JSON]): - def __init__(self, obj: Array | Group): + def __init__(self, obj: Array | Group) -> None: # key=".zattrs", read_only=False, cache=True, synchronizer=None self._obj = obj diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index ba629befa1..95c4e00e99 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -93,7 +93,7 @@ def ravel(self, order: Literal["K", "A", "C", "F"] = ...) -> Self: ... def all(self) -> bool: ... - def __eq__(self, other: Any) -> Self: # type: ignore[explicit-override, override] + def __eq__(self, other: object) -> Self: # type: ignore[explicit-override, override] """Element-wise equal Notes @@ -136,7 +136,7 @@ class Buffer(ABC): array-like object that must be 1-dim, contiguous, and byte dtype. """ - def __init__(self, array_like: ArrayLike): + def __init__(self, array_like: ArrayLike) -> None: if array_like.ndim != 1: raise ValueError("array_like: only 1-dim allowed") if array_like.dtype != np.dtype("b"): @@ -313,7 +313,7 @@ class NDBuffer: ndarray-like object that is convertible to a regular Numpy array. """ - def __init__(self, array: NDArrayLike): + def __init__(self, array: NDArrayLike) -> None: # assert array.ndim > 0 assert array.dtype != object self._data = array diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index cef16209ec..a82584a477 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -45,7 +45,7 @@ class Buffer(core.Buffer): array-like object that must be 1-dim, contiguous, and byte dtype. """ - def __init__(self, array_like: ArrayLike): + def __init__(self, array_like: ArrayLike) -> None: super().__init__(array_like) @classmethod @@ -143,7 +143,7 @@ class NDBuffer(core.NDBuffer): ndarray-like object that is convertible to a regular Numpy array. """ - def __init__(self, array: NDArrayLike): + def __init__(self, array: NDArrayLike) -> None: super().__init__(array) @classmethod diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py index c817431d3d..1227175146 100644 --- a/src/zarr/core/buffer/gpu.py +++ b/src/zarr/core/buffer/gpu.py @@ -48,7 +48,7 @@ class Buffer(core.Buffer): array-like object that must be 1-dim, contiguous, and byte dtype. """ - def __init__(self, array_like: ArrayLike): + def __init__(self, array_like: ArrayLike) -> None: if cp is None: raise ImportError( "Cannot use zarr.buffer.gpu.Buffer without cupy. Please install cupy." @@ -137,7 +137,7 @@ class NDBuffer(core.NDBuffer): ndarray-like object that is convertible to a regular Numpy array. """ - def __init__(self, array: NDArrayLike): + def __init__(self, array: NDArrayLike) -> None: if cp is None: raise ImportError( "Cannot use zarr.buffer.gpu.NDBuffer without cupy. Please install cupy." diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 7c56707a4f..b09968b62a 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -54,7 +54,7 @@ def parse_zarr_format(data: Any) -> ZarrFormat: def parse_attributes(data: Any) -> dict[str, Any]: if data is None: return {} - elif isinstance(data, dict) and all(map(lambda v: isinstance(v, str), data.keys())): + elif isinstance(data, dict) and all(isinstance(k, str) for k in data): return data msg = f"Expected dict with string keys. Got {type(data)} instead." raise TypeError(msg) @@ -104,7 +104,9 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: ), } - def __init__(self, attributes: dict[str, Any] | None = None, zarr_format: ZarrFormat = 3): + def __init__( + self, attributes: dict[str, Any] | None = None, zarr_format: ZarrFormat = 3 + ) -> None: attributes_parsed = parse_attributes(attributes) zarr_format_parsed = parse_zarr_format(zarr_format) @@ -202,11 +204,10 @@ def from_dict( store_path: StorePath, data: dict[str, Any], ) -> AsyncGroup: - group = cls( + return cls( metadata=GroupMetadata.from_dict(data), store_path=store_path, ) - return group async def getitem( self, @@ -888,8 +889,7 @@ def members(self, max_depth: int | None = 0) -> tuple[tuple[str, Array | Group], """ _members = self._sync_iter(self._async_group.members(max_depth=max_depth)) - result = tuple(map(lambda kv: (kv[0], _parse_async_node(kv[1])), _members)) - return result + return tuple((kv[0], _parse_async_node(kv[1])) for kv in _members) def __contains__(self, member: str) -> bool: return self._sync(self._async_group.contains(member)) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 0ead269f44..3968a057f8 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -54,7 +54,7 @@ class ArrayIndexError(IndexError): class BoundsCheckError(IndexError): _msg = "" - def __init__(self, dim_len: int): + def __init__(self, dim_len: int) -> None: self._msg = f"index out of bounds for dimension with length {dim_len}" @@ -255,7 +255,7 @@ class IntDimIndexer: dim_chunk_len: int nitems: int = 1 - def __init__(self, dim_sel: int, dim_len: int, dim_chunk_len: int): + def __init__(self, dim_sel: int, dim_len: int, dim_chunk_len: int) -> None: object.__setattr__(self, "dim_sel", normalize_integer_selection(dim_sel, dim_len)) object.__setattr__(self, "dim_len", dim_len) object.__setattr__(self, "dim_chunk_len", dim_chunk_len) @@ -279,7 +279,7 @@ class SliceDimIndexer: stop: int step: int - def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int): + def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int) -> None: # normalize start, stop, step = dim_sel.indices(dim_len) if step < 1: @@ -453,7 +453,7 @@ def __init__( selection: BasicSelection, shape: ChunkCoords, chunk_grid: ChunkGrid, - ): + ) -> None: chunk_shape = get_chunk_shape(chunk_grid) # handle ellipsis selection_normalized = replace_ellipsis(selection, shape) @@ -509,7 +509,7 @@ class BoolArrayDimIndexer: nitems: int dim_chunk_ixs: npt.NDArray[np.intp] - def __init__(self, dim_sel: npt.NDArray[np.bool_], dim_len: int, dim_chunk_len: int): + def __init__(self, dim_sel: npt.NDArray[np.bool_], dim_len: int, dim_chunk_len: int) -> None: # check number of dimensions if not is_bool_array(dim_sel, 1): raise IndexError("Boolean arrays in an orthogonal selection must be 1-dimensional only") @@ -626,7 +626,7 @@ def __init__( wraparound: bool = True, boundscheck: bool = True, order: Order = Order.UNKNOWN, - ): + ) -> None: # ensure 1d array dim_sel = np.asanyarray(dim_sel) if not is_integer_array(dim_sel, 1): @@ -766,7 +766,7 @@ class OrthogonalIndexer(Indexer): is_advanced: bool drop_axes: tuple[int, ...] - def __init__(self, selection: Selection, shape: ChunkCoords, chunk_grid: ChunkGrid): + def __init__(self, selection: Selection, shape: ChunkCoords, chunk_grid: ChunkGrid) -> None: chunk_shape = get_chunk_shape(chunk_grid) # handle ellipsis @@ -880,7 +880,9 @@ class BlockIndexer(Indexer): shape: ChunkCoords drop_axes: ChunkCoords - def __init__(self, selection: BasicSelection, shape: ChunkCoords, chunk_grid: ChunkGrid): + def __init__( + self, selection: BasicSelection, shape: ChunkCoords, chunk_grid: ChunkGrid + ) -> None: chunk_shape = get_chunk_shape(chunk_grid) # handle ellipsis @@ -1005,7 +1007,9 @@ class CoordinateIndexer(Indexer): chunk_shape: ChunkCoords drop_axes: ChunkCoords - def __init__(self, selection: CoordinateSelection, shape: ChunkCoords, chunk_grid: ChunkGrid): + def __init__( + self, selection: CoordinateSelection, shape: ChunkCoords, chunk_grid: ChunkGrid + ) -> None: chunk_shape = get_chunk_shape(chunk_grid) cdata_shape: ChunkCoords @@ -1122,7 +1126,7 @@ def __iter__(self) -> Iterator[ChunkProjection]: @dataclass(frozen=True) class MaskIndexer(CoordinateIndexer): - def __init__(self, selection: MaskSelection, shape: ChunkCoords, chunk_grid: ChunkGrid): + def __init__(self, selection: MaskSelection, shape: ChunkCoords, chunk_grid: ChunkGrid) -> None: # some initial normalization selection_normalized = cast(tuple[MaskSelection], ensure_tuple(selection)) selection_normalized = cast(tuple[MaskSelection], replace_lists(selection_normalized)) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 603cd343af..345655cc0b 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -292,35 +292,35 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self: @overload def parse_fill_value( - fill_value: int | float | complex | str | bytes | np.generic | Sequence[Any] | bool | None, + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool | None, dtype: BOOL_DTYPE, ) -> BOOL: ... @overload def parse_fill_value( - fill_value: int | float | complex | str | bytes | np.generic | Sequence[Any] | bool | None, + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool | None, dtype: INTEGER_DTYPE, ) -> INTEGER: ... @overload def parse_fill_value( - fill_value: int | float | complex | str | bytes | np.generic | Sequence[Any] | bool | None, + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool | None, dtype: FLOAT_DTYPE, ) -> FLOAT: ... @overload def parse_fill_value( - fill_value: int | float | complex | str | bytes | np.generic | Sequence[Any] | bool | None, + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool | None, dtype: COMPLEX_DTYPE, ) -> COMPLEX: ... @overload def parse_fill_value( - fill_value: int | float | complex | str | bytes | np.generic | Sequence[Any] | bool | None, + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool | None, dtype: np.dtype[Any], ) -> Any: # This dtype[Any] is unfortunately necessary right now. @@ -334,7 +334,7 @@ def parse_fill_value( def parse_fill_value( - fill_value: int | float | complex | str | bytes | np.generic | Sequence[Any] | bool | None, + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool | None, dtype: BOOL_DTYPE | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | np.dtype[Any], ) -> BOOL | INTEGER | FLOAT | COMPLEX | Any: """ diff --git a/src/zarr/core/sync.py b/src/zarr/core/sync.py index db3dce79b2..755020ef3c 100644 --- a/src/zarr/core/sync.py +++ b/src/zarr/core/sync.py @@ -117,9 +117,7 @@ async def _collect_aiterator(data: AsyncIterator[T]) -> tuple[T, ...]: """ Collect an entire async iterator into a tuple """ - result = [] - async for x in data: - result.append(x) + result = [x async for x in data] return tuple(result) diff --git a/src/zarr/registry.py b/src/zarr/registry.py index cde3b7d848..c6566d12b5 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -107,7 +107,7 @@ def fully_qualified_name(cls: type) -> str: def register_codec(key: str, codec_cls: type[Codec]) -> None: - if key not in __codec_registries.keys(): + if key not in __codec_registries: __codec_registries[key] = Registry() __codec_registries[key].register(codec_cls) @@ -158,7 +158,7 @@ def get_pipeline_class(reload_config: bool = False) -> type[CodecPipeline]: if pipeline_class: return pipeline_class raise BadConfigError( - f"Pipeline class '{path}' not found in registered pipelines: {list(__pipeline_registry.keys())}." + f"Pipeline class '{path}' not found in registered pipelines: {list(__pipeline_registry)}." ) @@ -172,7 +172,7 @@ def get_buffer_class(reload_config: bool = False) -> type[Buffer]: if buffer_class: return buffer_class raise BadConfigError( - f"Buffer class '{path}' not found in registered buffers: {list(__buffer_registry.keys())}." + f"Buffer class '{path}' not found in registered buffers: {list(__buffer_registry)}." ) @@ -185,7 +185,7 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]: if ndbuffer_class: return ndbuffer_class raise BadConfigError( - f"NDBuffer class '{path}' not found in registered buffers: {list(__ndbuffer_registry.keys())}." + f"NDBuffer class '{path}' not found in registered buffers: {list(__ndbuffer_registry)}." ) diff --git a/src/zarr/store/common.py b/src/zarr/store/common.py index 196479dd67..0c126c63da 100644 --- a/src/zarr/store/common.py +++ b/src/zarr/store/common.py @@ -23,15 +23,14 @@ def _dereference_path(root: str, path: str) -> str: assert isinstance(path, str) root = root.rstrip("/") path = f"{root}/{path}" if root else path - path = path.rstrip("/") - return path + return path.rstrip("/") class StorePath: store: Store path: str - def __init__(self, store: Store, path: str | None = None): + def __init__(self, store: Store, path: str | None = None) -> None: self.store = store self.path = path or "" @@ -64,10 +63,9 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"StorePath({self.store.__class__.__name__}, {str(self)!r})" - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: try: - if self.store == other.store and self.path == other.path: - return True + return self.store == other.store and self.path == other.path # type: ignore[attr-defined, no-any-return] except Exception: pass return False @@ -266,8 +264,7 @@ async def contains_array(store_path: StorePath, zarr_format: ZarrFormat) -> bool except (ValueError, KeyError): return False elif zarr_format == 2: - result = await (store_path / ZARRAY_JSON).exists() - return result + return await (store_path / ZARRAY_JSON).exists() msg = f"Invalid zarr_format provided. Got {zarr_format}, expected 2 or 3" raise ValueError(msg) diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index fd209cd7c3..39a94969eb 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -79,7 +79,7 @@ class LocalStore(Store): root: Path - def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r"): + def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None: super().__init__(mode=mode) if isinstance(root, str): root = Path(root) diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 7baa6aee26..83734e8942 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -30,7 +30,7 @@ def __init__( store_dict: MutableMapping[str, Buffer] | None = None, *, mode: AccessModeLiteral = "r", - ): + ) -> None: super().__init__(mode=mode) if store_dict is None: store_dict = {} @@ -80,8 +80,7 @@ async def get_partial_values( async def _get(key: str, byte_range: tuple[int, int | None]) -> Buffer | None: return await self.get(key, prototype=prototype, byte_range=byte_range) - vals = await concurrent_map(key_ranges, _get, limit=None) - return vals + return await concurrent_map(key_ranges, _get, limit=None) async def exists(self, key: str) -> bool: return key in self._store_dict @@ -137,7 +136,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: prefix = prefix[:-1] if prefix == "": - keys_unique = set(k.split("/")[0] for k in self._store_dict.keys()) + keys_unique = set(k.split("/")[0] for k in self._store_dict) else: # Our dictionary doesn't contain directory markers, but we want to include # a pseudo directory when there's a nested item and we're listing an @@ -166,7 +165,7 @@ def __init__( store_dict: MutableMapping[str, Buffer] | None = None, *, mode: AccessModeLiteral = "r", - ): + ) -> None: super().__init__(mode=mode) if store_dict: self._store_dict = {k: gpu.Buffer.from_buffer(store_dict[k]) for k in iter(store_dict)} diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index ecb46a31d3..02bda6b1dd 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -39,7 +39,7 @@ def __init__( mode: AccessModeLiteral = "r", path: str = "/", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, - ): + ) -> None: """ Parameters ---------- @@ -49,6 +49,7 @@ def __init__( keys, rather than some other IO failure storage_options: passed on to fsspec to make the filesystem instance. If url is a UPath, this must not be used. + """ super().__init__(mode=mode) self.fs = fs diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 3f9d1264d9..8a83f82b06 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -4,7 +4,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import given, settings # noqa +from hypothesis import given, settings # noqa: F401 from hypothesis.strategies import SearchStrategy from zarr.core.array import Array @@ -171,5 +171,4 @@ def key_ranges(keys: SearchStrategy = node_names) -> SearchStrategy[list]: st.none() | st.integers(min_value=0), st.none() | st.integers(min_value=0) ) key_tuple = st.tuples(keys, byte_ranges) - key_range_st = st.lists(key_tuple, min_size=1, max_size=10) - return key_range_st + return st.lists(key_tuple, min_size=1, max_size=10) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index d1ac410753..c3516f676c 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -14,8 +14,7 @@ from zarr.store.remote import RemoteStore if TYPE_CHECKING: - from collections.abc import Generator, Iterator - from types import ModuleType + from collections.abc import Generator from typing import Any, Literal from _pytest.compat import LEGACY_PATH @@ -99,13 +98,13 @@ async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> As @pytest.fixture(params=["numpy", "cupy"]) -def xp(request: pytest.FixtureRequest) -> Iterator[ModuleType]: +def xp(request: pytest.FixtureRequest) -> Any: """Fixture to parametrize over numpy-like libraries""" if request.param == "cupy": request.node.add_marker(pytest.mark.gpu) - yield pytest.importorskip(request.param) + return pytest.importorskip(request.param) @pytest.fixture(autouse=True) diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index b3362c52b0..06fd791e6e 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -27,7 +27,7 @@ def test_array_creation_existing_node( """ spath = StorePath(store) group = Group.from_store(spath, zarr_format=zarr_format) - expected_exception: type[ContainsArrayError] | type[ContainsGroupError] + expected_exception: type[ContainsArrayError | ContainsGroupError] if extant_node == "array": expected_exception = ContainsArrayError _ = group.create_array("extant", shape=(10,), dtype="uint8") diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index c8310f33e5..8beb344b47 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -93,8 +93,8 @@ def test_group_members(store: Store, zarr_format: ZarrFormat) -> None: members_expected["subgroup"] = group.create_group("subgroup") # make a sub-sub-subgroup, to ensure that the children calculation doesn't go # too deep in the hierarchy - subsubgroup = members_expected["subgroup"].create_group("subsubgroup") # type: ignore - subsubsubgroup = subsubgroup.create_group("subsubsubgroup") # type: ignore + subsubgroup = members_expected["subgroup"].create_group("subsubgroup") + subsubsubgroup = subsubgroup.create_group("subsubsubgroup") members_expected["subarray"] = group.create_array( "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True @@ -271,7 +271,7 @@ def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: group = Group.from_store(store, zarr_format=zarr_format) with pytest.raises(NotImplementedError): - [x for x in group] # type: ignore + [x for x in group] def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: @@ -281,7 +281,7 @@ def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: group = Group.from_store(store, zarr_format=zarr_format) with pytest.raises(NotImplementedError): - len(group) # type: ignore + len(group) def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: @@ -468,7 +468,7 @@ def test_group_creation_existing_node( """ spath = StorePath(store) group = Group.from_store(spath, zarr_format=zarr_format) - expected_exception: type[ContainsArrayError] | type[ContainsGroupError] + expected_exception: type[ContainsArrayError | ContainsGroupError] attributes: dict[str, JSON] = {"old": True} if extant_node == "array": @@ -550,7 +550,7 @@ async def test_asyncgroup_attrs(store: Store, zarr_format: ZarrFormat) -> None: async def test_asyncgroup_info(store: Store, zarr_format: ZarrFormat) -> None: - agroup = await AsyncGroup.from_store( # noqa + agroup = await AsyncGroup.from_store( # noqa: F841 store, zarr_format=zarr_format, ) diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index 81dc67f388..d2cf455e07 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -36,7 +36,7 @@ @pytest.fixture async def store() -> AsyncGenerator[StorePath]: - yield StorePath(await MemoryStore.open(mode="w")) + return StorePath(await MemoryStore.open(mode="w")) def zarr_array_from_numpy_array( @@ -1782,9 +1782,9 @@ async def test_accessed_chunks( # Combine and generate the cartesian product to determine the chunks keys that # will be accessed - chunks_accessed = [] - for comb in itertools.product(*chunks_per_dim): - chunks_accessed.append(".".join([str(ci) for ci in comb])) + chunks_accessed = [ + ".".join([str(ci) for ci in comb]) for comb in itertools.product(*chunks_per_dim) + ] counts_before = store.counter.copy() diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py index d4cf0c73e3..f8e2ebd7b3 100644 --- a/tests/v3/test_metadata/test_v3.py +++ b/tests/v3/test_metadata/test_v3.py @@ -303,7 +303,7 @@ def test_parse_invalid_dtype_raises(data): @pytest.mark.parametrize( "data_type,fill_value", [("uint8", -1), ("int32", 22.5), ("float32", "foo")] ) -async def test_invalid_fill_value_raises(data_type: str, fill_value: int | float) -> None: +async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> None: metadata_dict = { "zarr_format": 3, "node_type": "array", diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py index a78e9207bd..250ede67b5 100644 --- a/tests/v3/test_properties.py +++ b/tests/v3/test_properties.py @@ -4,10 +4,11 @@ pytest.importorskip("hypothesis") -import hypothesis.extra.numpy as npst # noqa -import hypothesis.strategies as st # noqa -from hypothesis import given, settings # noqa -from zarr.testing.strategies import arrays, np_arrays, basic_indices # noqa +import hypothesis.extra.numpy as npst # noqa: E402 +import hypothesis.strategies as st # noqa: E402 +from hypothesis import given # noqa: E402 + +from zarr.testing.strategies import arrays, basic_indices, np_arrays # noqa: E402 @given(st.data()) diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index 495a5e5c4f..a6457cfebc 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -86,10 +86,7 @@ def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: async def alist(it): - out = [] - async for a in it: - out.append(a) - return out + return [a async for a in it] async def test_basic() -> None: diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py index 1ecbd87cc1..85a31f9eb3 100644 --- a/tests/v3/test_store/test_stateful_store.py +++ b/tests/v3/test_store/test_stateful_store.py @@ -131,7 +131,7 @@ def get(self, key: str, data: DataObject) -> None: @rule(key=paths, data=st.data()) def get_invalid_keys(self, key: str, data: DataObject) -> None: note("(get_invalid)") - assume(key not in self.model.keys()) + assume(key not in self.model) assert self.store.get(key, self.prototype) is None @precondition(lambda self: len(self.model.keys()) > 0) @@ -202,9 +202,9 @@ def check_paths_equal(self) -> None: @invariant() def check_vals_equal(self) -> None: note("Checking values equal") - for key, _val in self.model.items(): + for key, val in self.model.items(): store_item = self.store.get(key, self.prototype).to_bytes() - assert self.model[key].to_bytes() == store_item + assert val.to_bytes() == store_item @invariant() def check_num_keys_equal(self) -> None: diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py index 9ddde68e23..c67f991a0d 100644 --- a/tests/v3/test_v2.py +++ b/tests/v3/test_v2.py @@ -9,7 +9,7 @@ @pytest.fixture async def store() -> Iterator[StorePath]: - yield StorePath(await MemoryStore.open(mode="w")) + return StorePath(await MemoryStore.open(mode="w")) def test_simple(store: StorePath) -> None: From fafd0bf7528599187ec601325f57113349f01d66 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 25 Sep 2024 11:22:11 -0500 Subject: [PATCH 03/21] Update V2 codec pipeline to use concrete classes (#2244) The previous implementation used the codec config, rather than the codec itself. --- src/zarr/codecs/_v2.py | 12 ++++++------ src/zarr/core/metadata/v2.py | 2 ++ tests/v3/test_v2.py | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py index c8bc558349..cc6129e604 100644 --- a/src/zarr/codecs/_v2.py +++ b/src/zarr/codecs/_v2.py @@ -8,16 +8,18 @@ from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec from zarr.core.buffer import Buffer, NDBuffer, default_buffer_prototype -from zarr.core.common import JSON, to_thread +from zarr.core.common import to_thread from zarr.registry import get_ndbuffer_class if TYPE_CHECKING: + import numcodecs.abc + from zarr.core.array_spec import ArraySpec @dataclass(frozen=True) class V2Compressor(ArrayBytesCodec): - compressor: dict[str, JSON] | None + compressor: numcodecs.abc.Codec | None is_fixed_size = False @@ -27,9 +29,8 @@ async def _decode_single( chunk_spec: ArraySpec, ) -> NDBuffer: if self.compressor is not None: - compressor = numcodecs.get_codec(self.compressor) chunk_numpy_array = ensure_ndarray( - await to_thread(compressor.decode, chunk_bytes.as_array_like()) + await to_thread(self.compressor.decode, chunk_bytes.as_array_like()) ) else: chunk_numpy_array = ensure_ndarray(chunk_bytes.as_array_like()) @@ -47,14 +48,13 @@ async def _encode_single( ) -> Buffer | None: chunk_numpy_array = chunk_array.as_numpy_array() if self.compressor is not None: - compressor = numcodecs.get_codec(self.compressor) if ( not chunk_numpy_array.flags.c_contiguous and not chunk_numpy_array.flags.f_contiguous ): chunk_numpy_array = chunk_numpy_array.copy(order="A") encoded_chunk_bytes = ensure_bytes( - await to_thread(compressor.encode, chunk_numpy_array) + await to_thread(self.compressor.encode, chunk_numpy_array) ) else: encoded_chunk_bytes = ensure_bytes(chunk_numpy_array) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 34bdbb537f..27c1badafd 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -100,6 +100,8 @@ def _json_convert( return o.str else: return o.descr + if isinstance(o, numcodecs.abc.Codec): + return o.get_config() if np.isscalar(o): out: Any if hasattr(o, "dtype") and o.dtype.kind == "M" and hasattr(o, "view"): diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py index c67f991a0d..943c425f54 100644 --- a/tests/v3/test_v2.py +++ b/tests/v3/test_v2.py @@ -2,7 +2,10 @@ import numpy as np import pytest +from numcodecs import Delta +from numcodecs.blosc import Blosc +import zarr from zarr import Array from zarr.store import MemoryStore, StorePath @@ -26,3 +29,20 @@ def test_simple(store: StorePath) -> None: a[:, :] = data assert np.array_equal(data, a[:, :]) + + +def test_codec_pipeline() -> None: + # https://github.com/zarr-developers/zarr-python/issues/2243 + store = MemoryStore(mode="w") + array = zarr.create( + store=store, + shape=(1,), + dtype="i4", + zarr_format=2, + filters=[Delta(dtype="i4").get_config()], + compressor=Blosc().get_config(), + ) + array[:] = 1 + result = array[:] + expected = np.ones(1) + np.testing.assert_array_equal(result, expected) From 692593bb32bcd8f526f1b611c8c6ff0bf758ceff Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 25 Sep 2024 10:47:25 -0600 Subject: [PATCH 04/21] Fix fill_value handling for complex dtypes (#2200) * Fix fill_value handling for complex & datetime dtypes * cleanup * more cleanup * more cleanup * Fix default fill_value * Fixes * Add booleans * Add v2, v3 specific dtypes * Add version.py to gitignore * cleanpu * style: pre-commit fixes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .gitignore | 2 + src/zarr/core/array.py | 7 --- src/zarr/core/buffer/core.py | 7 ++- src/zarr/core/metadata/v3.py | 4 +- src/zarr/testing/strategies.py | 112 ++++++++++++++++++--------------- tests/v3/test_properties.py | 10 +-- 6 files changed, 77 insertions(+), 65 deletions(-) diff --git a/.gitignore b/.gitignore index a09fb54d5c..199ab10578 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,5 @@ fixture/ .DS_Store tests/.hypothesis .hypothesis/ + +zarr/version.py diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 7d1aa33087..bc95252ee0 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -252,12 +252,6 @@ async def _create_v3( shape = parse_shapelike(shape) codecs = list(codecs) if codecs is not None else [BytesCodec()] - if fill_value is None: - if dtype == np.dtype("bool"): - fill_value = False - else: - fill_value = 0 - if chunk_key_encoding is None: chunk_key_encoding = ("default", "/") assert chunk_key_encoding is not None @@ -281,7 +275,6 @@ async def _create_v3( ) array = cls(metadata=metadata, store_path=store_path) - await array._save_metadata(metadata) return array diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 95c4e00e99..0a56db5e74 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -464,9 +464,14 @@ def __repr__(self) -> str: def all_equal(self, other: Any, equal_nan: bool = True) -> bool: """Compare to `other` using np.array_equal.""" + if other is None: + # Handle None fill_value for Zarr V2 + return False # use array_equal to obtain equal_nan=True functionality data, other = np.broadcast_arrays(self._data, other) - result = np.array_equal(self._data, other, equal_nan=equal_nan) + result = np.array_equal( + self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "US" else False + ) return result def fill(self, value: Any) -> None: diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 345655cc0b..f1cb07fd4c 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -360,7 +360,7 @@ def parse_fill_value( if fill_value is None: return dtype.type(0) if isinstance(fill_value, Sequence) and not isinstance(fill_value, str): - if dtype in (np.complex64, np.complex128): + if dtype.type in (np.complex64, np.complex128): dtype = cast(COMPLEX_DTYPE, dtype) if len(fill_value) == 2: # complex datatypes serialize to JSON arrays with two elements @@ -391,7 +391,7 @@ def parse_fill_value( pass elif fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value): pass - elif dtype.kind == "f": + elif dtype.kind in "cf": # float comparison is not exact, especially when dtype st.SearchStrategy[np.dtype]: + return ( + npst.boolean_dtypes() + | npst.integer_dtypes(endianness="=") + | npst.unsigned_integer_dtypes(endianness="=") + | npst.floating_dtypes(endianness="=") + | npst.complex_number_dtypes(endianness="=") + # | npst.byte_string_dtypes(endianness="=") + # | npst.unicode_string_dtypes() + # | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + ) + + +def v2_dtypes() -> st.SearchStrategy[np.dtype]: + return ( + npst.boolean_dtypes() + | npst.integer_dtypes(endianness="=") + | npst.unsigned_integer_dtypes(endianness="=") + | npst.floating_dtypes(endianness="=") + | npst.complex_number_dtypes(endianness="=") + | npst.byte_string_dtypes(endianness="=") + | npst.unicode_string_dtypes(endianness="=") + | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + ) + + # From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names # 1. must not be the empty string ("") # 2. must not include the character "/" @@ -33,21 +61,29 @@ array_names = node_names attrs = st.none() | st.dictionaries(_attr_keys, _attr_values) paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/") -np_arrays = npst.arrays( - # TODO: re-enable timedeltas once they are supported - dtype=npst.scalar_dtypes().filter( - lambda x: (x.kind not in ["m", "M"]) and (x.byteorder not in [">"]) - ), - shape=npst.array_shapes(max_dims=4), -) stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) compressors = st.sampled_from([None, "default"]) -format = st.sampled_from([2, 3]) +zarr_formats: st.SearchStrategy[Literal[2, 3]] = st.sampled_from([2, 3]) +array_shapes = npst.array_shapes(max_dims=4) + + +@st.composite # type: ignore[misc] +def numpy_arrays( + draw: st.DrawFn, + *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, + zarr_formats: st.SearchStrategy[Literal[2, 3]] = zarr_formats, +) -> Any: + """ + Generate numpy arrays that can be saved in the provided Zarr format. + """ + zarr_format = draw(zarr_formats) + return draw(npst.arrays(dtype=v3_dtypes() if zarr_format == 3 else v2_dtypes(), shape=shapes)) @st.composite # type: ignore[misc] def np_array_and_chunks( - draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays + draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays ) -> tuple[np.ndarray, tuple[int]]: # type: ignore[type-arg] """A hypothesis strategy to generate small sized random arrays. @@ -66,73 +102,49 @@ def np_array_and_chunks( def arrays( draw: st.DrawFn, *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, compressors: st.SearchStrategy = compressors, stores: st.SearchStrategy[StoreLike] = stores, - arrays: st.SearchStrategy[np.ndarray] = np_arrays, paths: st.SearchStrategy[None | str] = paths, array_names: st.SearchStrategy = array_names, + arrays: st.SearchStrategy | None = None, attrs: st.SearchStrategy = attrs, - format: st.SearchStrategy = format, + zarr_formats: st.SearchStrategy = zarr_formats, ) -> Array: store = draw(stores) - nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) path = draw(paths) name = draw(array_names) attributes = draw(attrs) - zarr_format = draw(format) + zarr_format = draw(zarr_formats) + if arrays is None: + arrays = numpy_arrays(shapes=shapes, zarr_formats=st.just(zarr_format)) + nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) + # test that None works too. + fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)])) # compressor = draw(compressors) - # TODO: clean this up - # if path is None and name is None: - # array_path = None - # array_name = None - # elif path is None and name is not None: - # array_path = f"{name}" - # array_name = f"/{name}" - # elif path is not None and name is None: - # array_path = path - # array_name = None - # elif path == "/": - # assert name is not None - # array_path = name - # array_name = "/" + name - # else: - # assert name is not None - # array_path = f"{path}/{name}" - # array_name = "/" + array_path - expected_attrs = {} if attributes is None else attributes array_path = path + ("/" if not path.endswith("/") else "") + name root = Group.from_store(store, zarr_format=zarr_format) - fill_value_args: tuple[Any, ...] = tuple() - if nparray.dtype.kind == "M": - m = re.search(r"\[(.+)\]", nparray.dtype.str) - if not m: - raise ValueError(f"Couldn't find precision for dtype '{nparray.dtype}.") - - fill_value_args = ( - # e.g. ns, D - m.groups()[0], - ) a = root.create_array( array_path, shape=nparray.shape, chunks=chunks, - dtype=nparray.dtype.str, + dtype=nparray.dtype, attributes=attributes, - # compressor=compressor, # TODO: FIXME - fill_value=nparray.dtype.type(0, *fill_value_args), + # compressor=compressor, # FIXME + fill_value=fill_value, ) assert isinstance(a, Array) + assert a.fill_value is not None + assert isinstance(root[array_path], Array) assert nparray.shape == a.shape assert chunks == a.chunks assert array_path == a.path, (path, name, array_path, a.name, a.path) - # assert array_path == a.name, (path, name, array_path, a.name, a.path) - # assert a.basename is None # TODO - # assert a.store == normalize_store_arg(store) + assert a.basename == name, (a.basename, name) assert dict(a.attrs) == expected_attrs a[:] = nparray diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py index 250ede67b5..380a4d851e 100644 --- a/tests/v3/test_properties.py +++ b/tests/v3/test_properties.py @@ -8,13 +8,13 @@ import hypothesis.strategies as st # noqa: E402 from hypothesis import given # noqa: E402 -from zarr.testing.strategies import arrays, basic_indices, np_arrays # noqa: E402 +from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats # noqa: E402 -@given(st.data()) -def test_roundtrip(data: st.DataObject) -> None: - nparray = data.draw(np_arrays) - zarray = data.draw(arrays(arrays=st.just(nparray))) +@given(data=st.data(), zarr_format=zarr_formats) +def test_roundtrip(data: st.DataObject, zarr_format: int) -> None: + nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format))) + zarray = data.draw(arrays(arrays=st.just(nparray), zarr_formats=st.just(zarr_format))) assert_array_equal(nparray, zarray[:]) From 1569eca42a5957c643bc82c78ea8e3d955a4bd3c Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 00:18:52 +0200 Subject: [PATCH 05/21] Enforce ruff/flynt rules (FLY) (#2240) * Apply ruff/flynt rule FLY002 FLY002 Consider f-string instead of string join * Enforce ruff/flynt rules (FLY) --- pyproject.toml | 1 + src/zarr/core/group.py | 2 +- src/zarr/store/remote.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ed6c25893c..5f45d31831 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -208,6 +208,7 @@ extend-exclude = [ [tool.ruff.lint] extend-select = [ "B", # flake8-bugbear + "FLY", # flynt "I", # isort "ISC", # flake8-implicit-str-concat "PGH", # pygrep-hooks diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index b09968b62a..d7ad960b14 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -676,7 +676,7 @@ async def _members( async for child_key, val in obj._members( max_depth=max_depth, current_depth=current_depth + 1 ): - yield "/".join([key, child_key]), val + yield f"{key}/{child_key}", val except KeyError: # keyerror is raised when `key` names an object (in the object storage sense), # as opposed to a prefix, in the store under the prefix associated with this group diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index 02bda6b1dd..7aea8a3780 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -234,6 +234,6 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: AsyncGenerator[str, None] """ - find_str = "/".join([self.path, prefix]) + find_str = f"{self.path}/{prefix}" for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False): yield onefile.removeprefix(find_str) From 1d9658bb9f0ebacf2f50221b5af815ab6417e302 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 00:21:51 +0200 Subject: [PATCH 06/21] Enforce ruff/flake8-return rules (RET) (#2237) * Apply ruff/flake8-return rule RET501 RET501 Do not explicitly `return None` in function if it is the only possible return value * Apply ruff/flake8-return rule RET504 RET504 Unnecessary assignment before `return` statement * Enforce ruff/flake8-return rules (RET) --- pyproject.toml | 3 +++ src/zarr/abc/store.py | 2 +- src/zarr/core/buffer/core.py | 3 +-- tests/v3/conftest.py | 6 ++---- tests/v3/test_store/test_stateful_store.py | 8 ++------ 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5f45d31831..8b50870eb2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -214,6 +214,7 @@ extend-select = [ "PGH", # pygrep-hooks "PYI", # flake8-pyi "RSE", # flake8-raise + "RET", # flake8-return "RUF", "TCH", # flake8-type-checking "TRY", # tryceratops @@ -221,6 +222,8 @@ extend-select = [ ] ignore = [ "PYI013", + "RET505", + "RET506", "RUF005", "TRY003", # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 2f02ac36ad..c453733f00 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -170,7 +170,7 @@ async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None: Insert multiple (key, value) pairs into storage. """ await gather(*(self.set(key, value) for key, value in values)) - return None + return @property @abstractmethod diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 0a56db5e74..49f04aafa0 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -469,10 +469,9 @@ def all_equal(self, other: Any, equal_nan: bool = True) -> bool: return False # use array_equal to obtain equal_nan=True functionality data, other = np.broadcast_arrays(self._data, other) - result = np.array_equal( + return np.array_equal( self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "US" else False ) - return result def fill(self, value: Any) -> None: self._data.fill(value) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index c3516f676c..4a2ef26433 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -46,8 +46,7 @@ def path_type(request: pytest.FixtureRequest) -> Any: @pytest.fixture async def store_path(tmpdir: LEGACY_PATH) -> StorePath: store = await LocalStore.open(str(tmpdir), mode="w") - p = StorePath(store) - return p + return StorePath(store) @pytest.fixture(scope="function") @@ -88,13 +87,12 @@ async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> As param: AsyncGroupRequest = request.param store = await parse_store(param.store, str(tmpdir)) - agroup = await AsyncGroup.from_store( + return await AsyncGroup.from_store( store, attributes=param.attributes, zarr_format=param.zarr_format, exists_ok=False, ) - return agroup @pytest.fixture(params=["numpy", "cupy"]) diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py index 85a31f9eb3..057d1a9501 100644 --- a/tests/v3/test_store/test_stateful_store.py +++ b/tests/v3/test_store/test_stateful_store.py @@ -40,16 +40,12 @@ def list(self) -> list: return self._sync_iter(self.store.list()) def get(self, key: str, prototype: BufferPrototype) -> zarr.core.buffer.Buffer: - obs = self._sync(self.store.get(key, prototype=prototype)) - return obs + return self._sync(self.store.get(key, prototype=prototype)) def get_partial_values( self, key_ranges: list, prototype: BufferPrototype ) -> zarr.core.buffer.Buffer: - obs_partial = self._sync( - self.store.get_partial_values(prototype=prototype, key_ranges=key_ranges) - ) - return obs_partial + return self._sync(self.store.get_partial_values(prototype=prototype, key_ranges=key_ranges)) def delete(self, path: str) -> None: return self._sync(self.store.delete(path)) From c06fa234d36f5cdffe4c7e156079c15d3b523d1d Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 00:22:23 +0200 Subject: [PATCH 07/21] Fix multiple identical imports (#2241) --- src/zarr/core/array.py | 1 - src/zarr/store/memory.py | 1 - tests/v3/test_store/test_remote.py | 1 - 3 files changed, 3 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index bc95252ee0..f7747e9b2b 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -9,7 +9,6 @@ import numpy.typing as npt from zarr._compat import _deprecate_positional_args -from zarr.abc.codec import Codec, CodecPipeline from zarr.abc.store import set_or_delete from zarr.codecs import BytesCodec from zarr.codecs._v2 import V2Compressor, V2Filters diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 83734e8942..351667f646 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -1,6 +1,5 @@ from __future__ import annotations -from collections.abc import AsyncGenerator, MutableMapping from typing import TYPE_CHECKING from zarr.abc.store import Store diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index a6457cfebc..ac77a50f8c 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -2,7 +2,6 @@ import json import os -from collections.abc import Generator from typing import TYPE_CHECKING import fsspec From 9f825e1d5df4bfb9c322fe172cb24408b83f383f Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 00:23:45 +0200 Subject: [PATCH 08/21] Enforce ruff/flake8-pytest-style rules (PT) (#2236) * Apply ruff/flake8-pytest-style rule PT018 PT018 Assertion should be broken down into multiple parts * Apply ruff/flake8-pytest-style rule PT007 PT007 Wrong values type in `@pytest.mark.parametrize` expected `list` of `tuple` * Apply ruff/flake8-pytest-style rule PT006 PT006 Wrong type passed to first argument of `@pytest.mark.parametrize`; expected `tuple` * Apply ruff/flake8-pytest-style rule PT003 PT003 `scope='function'` is implied in `@pytest.fixture()` * Apply ruff/flake8-pytest-style rule PT001 PT001 Use `@pytest.fixture` over `@pytest.fixture()` * Enforce ruff/flake8-pytest-style rules (PT) --- pyproject.toml | 4 ++++ src/zarr/testing/store.py | 10 +++++----- tests/v3/conftest.py | 12 ++++++------ tests/v3/test_array.py | 22 +++++++++++----------- tests/v3/test_chunk_grids.py | 4 ++-- tests/v3/test_codec_entrypoints.py | 2 +- tests/v3/test_codecs/test_blosc.py | 2 +- tests/v3/test_codecs/test_codecs.py | 16 ++++++++-------- tests/v3/test_codecs/test_endian.py | 4 ++-- tests/v3/test_codecs/test_gzip.py | 2 +- tests/v3/test_codecs/test_sharding.py | 19 ++++++++++--------- tests/v3/test_codecs/test_transpose.py | 6 +++--- tests/v3/test_codecs/test_zstd.py | 2 +- tests/v3/test_config.py | 8 ++++---- tests/v3/test_group.py | 16 ++++++++-------- tests/v3/test_indexing.py | 14 +++++++------- tests/v3/test_metadata/test_v3.py | 4 ++-- tests/v3/test_store/test_memory.py | 8 ++++---- tests/v3/test_store/test_remote.py | 6 +++--- tests/v3/test_store/test_zip.py | 2 +- 20 files changed, 84 insertions(+), 79 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8b50870eb2..a10c22d08e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -212,6 +212,7 @@ extend-select = [ "I", # isort "ISC", # flake8-implicit-str-concat "PGH", # pygrep-hooks + "PT", # flake8-pytest-style "PYI", # flake8-pyi "RSE", # flake8-raise "RET", # flake8-return @@ -221,6 +222,9 @@ extend-select = [ "UP", # pyupgrade ] ignore = [ + "PT004", # deprecated + "PT011", # TODO: apply this rule + "PT012", # TODO: apply this rule "PYI013", "RET505", "RET506", diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index ebd4b85c90..70d2e16efd 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -37,11 +37,11 @@ def get(self, store: S, key: str) -> Buffer: raise NotImplementedError - @pytest.fixture(scope="function") + @pytest.fixture def store_kwargs(self) -> dict[str, Any]: return {"mode": "r+"} - @pytest.fixture(scope="function") + @pytest.fixture async def store(self, store_kwargs: dict[str, Any]) -> Store: return await self.store_cls.open(**store_kwargs) @@ -97,7 +97,7 @@ def test_store_supports_listing(self, store: S) -> None: @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"]) @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) - @pytest.mark.parametrize("byte_range", (None, (0, None), (1, None), (1, 2), (None, 1))) + @pytest.mark.parametrize("byte_range", [None, (0, None), (1, None), (1, 2), (None, 1)]) async def test_get( self, store: S, key: str, data: bytes, byte_range: None | tuple[int | None, int | None] ) -> None: @@ -137,12 +137,12 @@ async def test_set_many(self, store: S) -> None: @pytest.mark.parametrize( "key_ranges", - ( + [ [], [("zarr.json", (0, 1))], [("c/0", (0, 1)), ("zarr.json", (0, None))], [("c/0/0", (0, 1)), ("c/0/1", (None, 2)), ("c/0/2", (0, 3))], - ), + ], ) async def test_get_partial_values( self, store: S, key_ranges: list[tuple[str, tuple[int | None, int | None]]] diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 4a2ef26433..fc1f950ad4 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -49,27 +49,27 @@ async def store_path(tmpdir: LEGACY_PATH) -> StorePath: return StorePath(store) -@pytest.fixture(scope="function") +@pytest.fixture async def local_store(tmpdir: LEGACY_PATH) -> LocalStore: return await LocalStore.open(str(tmpdir), mode="w") -@pytest.fixture(scope="function") +@pytest.fixture async def remote_store(url: str) -> RemoteStore: return await RemoteStore.open(url, mode="w") -@pytest.fixture(scope="function") +@pytest.fixture async def memory_store() -> MemoryStore: return await MemoryStore.open(mode="w") -@pytest.fixture(scope="function") +@pytest.fixture async def zip_store(tmpdir: LEGACY_PATH) -> ZipStore: return await ZipStore.open(str(tmpdir / "zarr.zip"), mode="w") -@pytest.fixture(scope="function") +@pytest.fixture async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: param = request.param return await parse_store(param, str(tmpdir)) @@ -82,7 +82,7 @@ class AsyncGroupRequest: attributes: dict[str, Any] = field(default_factory=dict) -@pytest.fixture(scope="function") +@pytest.fixture async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> AsyncGroup: param: AsyncGroupRequest = request.param diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 06fd791e6e..02358cb39b 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -12,8 +12,8 @@ from zarr.store.common import StorePath -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("exists_ok", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_array_creation_existing_node( @@ -61,8 +61,8 @@ def test_array_creation_existing_node( ) -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) def test_array_name_properties_no_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: @@ -72,8 +72,8 @@ def test_array_name_properties_no_group( assert arr.basename is None -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) def test_array_name_properties_with_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: @@ -123,7 +123,7 @@ def test_array_v3_fill_value_default( @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize( - "dtype_str,fill_value", + ("dtype_str", "fill_value"), [("bool", True), ("uint8", 99), ("float32", -99.9), ("complex64", 3 + 4j)], ) def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str) -> None: @@ -201,8 +201,8 @@ async def test_array_v3_nan_fill_value(store: MemoryStore) -> None: assert len([a async for a in store.list_prefix("/")]) == 0 -@pytest.mark.parametrize("store", ("local",), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) async def test_serializable_async_array( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: @@ -219,8 +219,8 @@ async def test_serializable_async_array( # TODO: uncomment the parts of this test that will be impacted by the config/prototype changes in flight -@pytest.mark.parametrize("store", ("local",), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = Array.create( store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4" diff --git a/tests/v3/test_chunk_grids.py b/tests/v3/test_chunk_grids.py index e1b4df10a7..12166bd210 100644 --- a/tests/v3/test_chunk_grids.py +++ b/tests/v3/test_chunk_grids.py @@ -5,9 +5,9 @@ @pytest.mark.parametrize( - "shape", ((0,), (0,) * 2, (1, 2, 0, 4, 5), (10, 0), (10,), (100,) * 3, (1000000,), (10000,) * 2) + "shape", [(0,), (0,) * 2, (1, 2, 0, 4, 5), (10, 0), (10,), (100,) * 3, (1000000,), (10000,) * 2] ) -@pytest.mark.parametrize("itemsize", (1, 2, 4)) +@pytest.mark.parametrize("itemsize", [1, 2, 4]) def test_guess_chunks(shape: tuple[int, ...], itemsize: int) -> None: chunks = _guess_chunks(shape, itemsize) chunk_size = np.prod(chunks) * itemsize diff --git a/tests/v3/test_codec_entrypoints.py b/tests/v3/test_codec_entrypoints.py index 95dae68762..e1ef027dd4 100644 --- a/tests/v3/test_codec_entrypoints.py +++ b/tests/v3/test_codec_entrypoints.py @@ -10,7 +10,7 @@ here = os.path.abspath(os.path.dirname(__file__)) -@pytest.fixture() +@pytest.fixture def set_path() -> Generator[None, None, None]: sys.path.append(here) zarr.registry._collect_entrypoints() diff --git a/tests/v3/test_codecs/test_blosc.py b/tests/v3/test_codecs/test_blosc.py index 4c569055b7..982b0213b9 100644 --- a/tests/v3/test_codecs/test_blosc.py +++ b/tests/v3/test_codecs/test_blosc.py @@ -10,7 +10,7 @@ from zarr.store.common import StorePath -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("dtype", ["uint8", "uint16"]) async def test_blosc_evolve(store: Store, dtype: str) -> None: typesize = np.dtype(dtype).itemsize diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py index 8e98cf20f5..75b1d15d00 100644 --- a/tests/v3/test_codecs/test_codecs.py +++ b/tests/v3/test_codecs/test_codecs.py @@ -59,7 +59,7 @@ def test_sharding_pickle() -> None: pass -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("input_order", ["F", "C"]) @pytest.mark.parametrize("store_order", ["F", "C"]) @pytest.mark.parametrize("runtime_write_order", ["F", "C"]) @@ -117,7 +117,7 @@ async def test_order( assert read_data.flags["C_CONTIGUOUS"] -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("input_order", ["F", "C"]) @pytest.mark.parametrize("runtime_write_order", ["F", "C"]) @pytest.mark.parametrize("runtime_read_order", ["F", "C"]) @@ -159,7 +159,7 @@ def test_order_implicit( assert read_data.flags["C_CONTIGUOUS"] -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_open(store: Store) -> None: spath = StorePath(store) a = Array.create( @@ -205,7 +205,7 @@ def test_morton() -> None: ] -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_write_partial_chunks(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) spath = StorePath(store) @@ -220,7 +220,7 @@ def test_write_partial_chunks(store: Store) -> None: assert np.array_equal(a[0:16, 0:16], data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_delete_empty_chunks(store: Store) -> None: data = np.ones((16, 16)) path = "delete_empty_chunks" @@ -238,7 +238,7 @@ async def test_delete_empty_chunks(store: Store) -> None: assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype()) is None -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_dimension_names(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) path = "dimension_names" @@ -272,7 +272,7 @@ async def test_dimension_names(store: Store) -> None: assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_invalid_metadata(store: Store) -> None: spath = StorePath(store, "invalid_metadata") with pytest.raises(ValueError): @@ -360,7 +360,7 @@ def test_invalid_metadata(store: Store) -> None: ) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_resize(store: Store) -> None: data = np.zeros((16, 18), dtype="uint16") path = "resize" diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py index 5b5b2eb899..81b24e7349 100644 --- a/tests/v3/test_codecs/test_endian.py +++ b/tests/v3/test_codecs/test_endian.py @@ -11,7 +11,7 @@ from .test_codecs import _AsyncArrayProxy -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("endian", ["big", "little"]) async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) @@ -32,7 +32,7 @@ async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: assert np.array_equal(data, readback_data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("dtype_input_endian", [">u2", " None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index bd8aab5e03..ecf2ea7bd7 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -21,7 +21,7 @@ from .test_codecs import _AsyncArrayProxy, order_from_dim -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize( "array_fixture", @@ -76,7 +76,7 @@ def test_sharding( @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize( "array_fixture", [ @@ -126,7 +126,7 @@ def test_sharding_partial( indirect=["array_fixture"], ) @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_sharding_partial_read( store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation ) -> None: @@ -163,7 +163,7 @@ def test_sharding_partial_read( indirect=["array_fixture"], ) @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_sharding_partial_overwrite( store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation ) -> None: @@ -214,7 +214,7 @@ def test_sharding_partial_overwrite( "inner_index_location", ["start", "end"], ) -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_nested_sharding( store: Store, array_fixture: npt.NDArray[Any], @@ -247,7 +247,7 @@ def test_nested_sharding( assert np.array_equal(data, read_data) -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_open_sharding(store: Store) -> None: path = "open_sharding" spath = StorePath(store, path) @@ -272,7 +272,7 @@ def test_open_sharding(store: Store) -> None: assert a.metadata == b.metadata -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_write_partial_sharded_chunks(store: Store) -> None: data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) spath = StorePath(store) @@ -296,7 +296,7 @@ def test_write_partial_sharded_chunks(store: Store) -> None: assert np.array_equal(a[0:16, 0:16], data) -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) async def test_delete_empty_shards(store: Store) -> None: if not store.supports_deletes: pytest.skip("store does not support deletes") @@ -323,7 +323,8 @@ async def test_delete_empty_shards(store: Store) -> None: assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get()) assert await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype()) is None chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype()) - assert chunk_bytes is not None and len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 + assert chunk_bytes is not None + assert len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 def test_pickle() -> None: diff --git a/tests/v3/test_codecs/test_transpose.py b/tests/v3/test_codecs/test_transpose.py index 4eff57bab9..a14ace7201 100644 --- a/tests/v3/test_codecs/test_transpose.py +++ b/tests/v3/test_codecs/test_transpose.py @@ -19,7 +19,7 @@ @pytest.mark.parametrize("runtime_write_order", ["F", "C"]) @pytest.mark.parametrize("runtime_read_order", ["F", "C"]) @pytest.mark.parametrize("with_sharding", [True, False]) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_transpose( store: Store, input_order: MemoryOrder, @@ -69,7 +69,7 @@ async def test_transpose( assert read_data.flags["C_CONTIGUOUS"] -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("order", [[1, 2, 0], [1, 2, 3, 0], [3, 2, 4, 0, 1]]) def test_transpose_non_self_inverse(store: Store, order: list[int]) -> None: shape = [i + 3 for i in range(len(order))] @@ -88,7 +88,7 @@ def test_transpose_non_self_inverse(store: Store, order: list[int]) -> None: assert np.array_equal(data, read_data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_transpose_invalid( store: Store, ) -> None: diff --git a/tests/v3/test_codecs/test_zstd.py b/tests/v3/test_codecs/test_zstd.py index 0726e5944c..cf80a8053c 100644 --- a/tests/v3/test_codecs/test_zstd.py +++ b/tests/v3/test_codecs/test_zstd.py @@ -7,7 +7,7 @@ from zarr.store.common import StorePath -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("checksum", [True, False]) def test_zstd(store: Store, checksum: bool) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) diff --git a/tests/v3/test_config.py b/tests/v3/test_config.py index 115487ba87..79b28055d6 100644 --- a/tests/v3/test_config.py +++ b/tests/v3/test_config.py @@ -69,7 +69,7 @@ def test_config_defaults_set() -> None: @pytest.mark.parametrize( - "key, old_val, new_val", + ("key", "old_val", "new_val"), [("array.order", "C", "F"), ("async.concurrency", None, 10), ("json_indent", 2, 0)], ) def test_config_defaults_can_be_overridden(key: str, old_val: Any, new_val: Any) -> None: @@ -88,7 +88,7 @@ class MockClass: ) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_codec_pipeline_class(store: Store) -> None: # has default value assert get_pipeline_class().__name__ != "" @@ -139,7 +139,7 @@ class MockEnvCodecPipeline(CodecPipeline): assert get_pipeline_class(reload_config=True) == MockEnvCodecPipeline -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_codec_implementation(store: Store) -> None: # has default value assert fully_qualified_name(get_codec_class("blosc")) == config.defaults[0]["codecs"]["blosc"] @@ -172,7 +172,7 @@ async def _encode_single( assert get_codec_class("blosc", reload_config=True) == BloscCodec -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_ndbuffer_implementation(store: Store) -> None: # has default value assert fully_qualified_name(get_ndbuffer_class()) == config.defaults[0]["ndbuffer"] diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 8beb344b47..22499dfb40 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -453,8 +453,8 @@ def test_group_array_creation( assert full_like_array.store_path.store == store -@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("exists_ok", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_group_creation_existing_node( @@ -601,10 +601,10 @@ async def test_asyncgroup_open_wrong_format( # should this be async? @pytest.mark.parametrize( "data", - ( + [ {"zarr_format": 3, "node_type": "group", "attributes": {"foo": 100}}, {"zarr_format": 2, "attributes": {"foo": 100}}, - ), + ], ) def test_asyncgroup_from_dict(store: Store, data: dict[str, Any]) -> None: """ @@ -744,8 +744,8 @@ async def test_asyncgroup_update_attributes(store: Store, zarr_format: ZarrForma assert agroup_new_attributes.attrs == attributes_new -@pytest.mark.parametrize("store", ("local",), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) async def test_serializable_async_group(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = await AsyncGroup.from_store( store=store, attributes={"foo": 999}, zarr_format=zarr_format @@ -755,8 +755,8 @@ async def test_serializable_async_group(store: LocalStore, zarr_format: ZarrForm assert actual == expected -@pytest.mark.parametrize("store", ("local",), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) def test_serializable_sync_group(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = Group.from_store(store=store, attributes={"foo": 999}, zarr_format=zarr_format) p = pickle.dumps(expected) diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index d2cf455e07..890c487b00 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -130,7 +130,7 @@ def test_replace_ellipsis() -> None: @pytest.mark.parametrize( - "value, dtype", + ("value", "dtype"), [ (42, "uint8"), pytest.param( @@ -138,7 +138,7 @@ def test_replace_ellipsis() -> None: ), ], ) -@pytest.mark.parametrize("use_out", (True, False)) +@pytest.mark.parametrize("use_out", [True, False]) def test_get_basic_selection_0d(store: StorePath, use_out: bool, value: Any, dtype: Any) -> None: # setup arr_np = np.array(value, dtype=dtype) @@ -385,7 +385,7 @@ def test_fancy_indexing_fallback_on_get_setitem(store: StorePath) -> None: @pytest.mark.parametrize( - "index,expected_result", + ("index", "expected_result"), [ # Single iterable of integers ([0, 1], [[0, 1, 2], [3, 4, 5]]), @@ -426,7 +426,7 @@ def test_orthogonal_indexing_fallback_on_getitem_2d( @pytest.mark.parametrize( - "index,expected_result", + ("index", "expected_result"), [ # Single iterable of integers ([0, 1], [[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]), @@ -466,7 +466,7 @@ def test_orthogonal_indexing_fallback_on_getitem_3d( @pytest.mark.parametrize( - "index,expected_result", + ("index", "expected_result"), [ # Single iterable of integers ([0, 1], [[1, 1, 1], [1, 1, 1], [0, 0, 0]]), @@ -509,7 +509,7 @@ def test_fancy_indexing_doesnt_mix_with_implicit_slicing(store: StorePath) -> No @pytest.mark.parametrize( - "value, dtype", + ("value", "dtype"), [ (42, "uint8"), pytest.param( @@ -1735,7 +1735,7 @@ def test_numpy_int_indexing(store: StorePath) -> None: @pytest.mark.parametrize( - "shape, chunks, ops", + ("shape", "chunks", "ops"), [ # 1D test cases ((1070,), (50,), [("__getitem__", (slice(200, 400),))]), diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py index f8e2ebd7b3..025d59422a 100644 --- a/tests/v3/test_metadata/test_v3.py +++ b/tests/v3/test_metadata/test_v3.py @@ -82,7 +82,7 @@ def test_parse_auto_fill_value(dtype_str: str) -> None: @pytest.mark.parametrize( - "fill_value,dtype_str", + ("fill_value", "dtype_str"), [ (True, "bool"), (False, "bool"), @@ -301,7 +301,7 @@ def test_parse_invalid_dtype_raises(data): @pytest.mark.parametrize( - "data_type,fill_value", [("uint8", -1), ("int32", 22.5), ("float32", "foo")] + ("data_type", "fill_value"), [("uint8", -1), ("int32", 22.5), ("float32", "foo")] ) async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> None: metadata_dict = { diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index 2498cdc24a..4413047178 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -18,7 +18,7 @@ def set(self, store: MemoryStore, key: str, value: Buffer) -> None: def get(self, store: MemoryStore, key: str) -> Buffer: return store._store_dict[key] - @pytest.fixture(scope="function", params=[None, True]) + @pytest.fixture(params=[None, True]) def store_kwargs( self, request: pytest.FixtureRequest ) -> dict[str, str | None | dict[str, Buffer]]: @@ -27,7 +27,7 @@ def store_kwargs( kwargs["store_dict"] = {} return kwargs - @pytest.fixture(scope="function") + @pytest.fixture def store(self, store_kwargs: str | None | dict[str, Buffer]) -> MemoryStore: return self.store_cls(**store_kwargs) @@ -58,11 +58,11 @@ def set(self, store: GpuMemoryStore, key: str, value: Buffer) -> None: def get(self, store: MemoryStore, key: str) -> Buffer: return store._store_dict[key] - @pytest.fixture(scope="function", params=[None, {}]) + @pytest.fixture(params=[None, {}]) def store_kwargs(self, request) -> dict[str, str | None | dict[str, Buffer]]: return {"store_dict": request.param, "mode": "r+"} - @pytest.fixture(scope="function") + @pytest.fixture def store(self, store_kwargs: str | None | dict[str, gpu.Buffer]) -> GpuMemoryStore: return self.store_cls(**store_kwargs) diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index ac77a50f8c..6010f7eca8 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -55,7 +55,7 @@ def get_boto3_client() -> botocore.client.BaseClient: return session.create_client("s3", endpoint_url=endpoint_url) -@pytest.fixture(autouse=True, scope="function") +@pytest.fixture(autouse=True) def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: """ Quoting Martin Durant: @@ -110,14 +110,14 @@ class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]): store_cls = RemoteStore buffer_cls = cpu.Buffer - @pytest.fixture(scope="function") + @pytest.fixture def store_kwargs(self, request) -> dict[str, str | bool]: fs, path = fsspec.url_to_fs( f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False ) return {"fs": fs, "path": path, "mode": "r+"} - @pytest.fixture(scope="function") + @pytest.fixture def store(self, store_kwargs: dict[str, str | bool]) -> RemoteStore: return self.store_cls(**store_kwargs) diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index 7c332e9a2e..595d1a3e53 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -22,7 +22,7 @@ class TestZipStore(StoreTests[ZipStore, cpu.Buffer]): store_cls = ZipStore buffer_cls = cpu.Buffer - @pytest.fixture(scope="function") + @pytest.fixture def store_kwargs(self, request) -> dict[str, str | bool]: fd, temp_path = tempfile.mkstemp() os.close(fd) From 1574e8b14387bf6960dc1005edeacdac460a540f Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Thu, 26 Sep 2024 04:05:14 +0530 Subject: [PATCH 09/21] Replace Gitter with Zulip (#2254) --- .github/ISSUE_TEMPLATE/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 9cb5ec9a78..809e4e31b6 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -3,9 +3,9 @@ contact_links: - name: ✨ Propose a new major feature url: https://github.com/zarr-developers/zarr-specs about: A new major feature should be discussed in the Zarr specifications repository. - - name: ❓ Discuss something on gitter - url: https://gitter.im/zarr-developers/community - about: For questions like "How do I do X with Zarr?", you can move to our Gitter channel. + - name: ❓ Discuss something on ZulipChat + url: https://ossci.zulipchat.com/ + about: For questions like "How do I do X with Zarr?", you can move to our ZulipChat. - name: ❓ Discuss something on GitHub Discussions url: https://github.com/zarr-developers/zarr-python/discussions about: For questions like "How do I do X with Zarr?", you can move to GitHub Discussions. From 3ce7670ee39db5b36c4a83c67955077894f527ad Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 25 Sep 2024 15:42:42 -0700 Subject: [PATCH 10/21] Use `map(str, *)` in `test_accessed_chunks` (#2229) * Update test_indexing.py * style: pre-commit fixes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Joe Hamman --- tests/v3/test_indexing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index 890c487b00..da358afbd6 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -1782,9 +1782,7 @@ async def test_accessed_chunks( # Combine and generate the cartesian product to determine the chunks keys that # will be accessed - chunks_accessed = [ - ".".join([str(ci) for ci in comb]) for comb in itertools.product(*chunks_per_dim) - ] + chunks_accessed = [".".join(map(str, comb)) for comb in itertools.product(*chunks_per_dim)] counts_before = store.counter.copy() From e968ac5c3d6a03728228ff65908d9b8273801f39 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 00:48:21 +0200 Subject: [PATCH 11/21] Enforce ruff/flake8-comprehensions rules (C4) (#2239) * Apply ruff/flake8-comprehensions rule C401 C401 Unnecessary generator (rewrite as a `set` comprehension) * Apply ruff/flake8-comprehensions rule C408 C408 Unnecessary `dict` call (rewrite as a literal) * Apply ruff/flake8-comprehensions rule C409 C409 Unnecessary list comprehension passed to `tuple()` (rewrite as a generator) * Apply ruff/flake8-comprehensions rule C416 C416 Unnecessary `list` comprehension (rewrite using `list()`) * Enforce ruff/flake8-comprehensions rules (C4) --------- Co-authored-by: Joe Hamman --- bench/compress_normal.py | 2 +- pyproject.toml | 1 + src/zarr/codecs/sharding.py | 8 ++++---- src/zarr/store/memory.py | 2 +- src/zarr/store/zip.py | 2 +- tests/v3/test_group.py | 2 +- tests/v3/test_store/test_remote.py | 2 +- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/bench/compress_normal.py b/bench/compress_normal.py index 608cfe8dce..179520a0e4 100644 --- a/bench/compress_normal.py +++ b/bench/compress_normal.py @@ -16,7 +16,7 @@ a, chunks=1000000, compression="blosc", - compression_opts=dict(cname="lz4", clevel=5, shuffle=2), + compression_opts={"cname": "lz4", "clevel": 5, "shuffle": 2}, ) print(z) diff --git a/pyproject.toml b/pyproject.toml index a10c22d08e..33aa538141 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -208,6 +208,7 @@ extend-exclude = [ [tool.ruff.lint] extend-select = [ "B", # flake8-bugbear + "C4", # flake8-comprehensions "FLY", # flynt "I", # isort "ISC", # flake8-implicit-str-concat diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 3ae51ce54b..6282750f20 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -151,7 +151,7 @@ def is_dense(self, chunk_byte_length: int) -> bool: # Are all non-empty offsets unique? if len( - set(offset for offset, _ in sorted_offsets_and_lengths if offset != MAX_UINT_64) + {offset for offset, _ in sorted_offsets_and_lengths if offset != MAX_UINT_64} ) != len(sorted_offsets_and_lengths): return False @@ -380,8 +380,8 @@ def to_dict(self) -> dict[str, JSON]: "name": "sharding_indexed", "configuration": { "chunk_shape": self.chunk_shape, - "codecs": tuple([s.to_dict() for s in self.codecs]), - "index_codecs": tuple([s.to_dict() for s in self.index_codecs]), + "codecs": tuple(s.to_dict() for s in self.codecs), + "index_codecs": tuple(s.to_dict() for s in self.index_codecs), "index_location": self.index_location.value, }, } @@ -477,7 +477,7 @@ async def _decode_partial_single( ) indexed_chunks = list(indexer) - all_chunk_coords = set(chunk_coords for chunk_coords, _, _ in indexed_chunks) + all_chunk_coords = {chunk_coords for chunk_coords, _, _ in indexed_chunks} # reading bytes of all requested chunks shard_dict: ShardMapping = {} diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 351667f646..16599b2605 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -135,7 +135,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: prefix = prefix[:-1] if prefix == "": - keys_unique = set(k.split("/")[0] for k in self._store_dict) + keys_unique = {k.split("/")[0] for k in self._store_dict} else: # Our dictionary doesn't contain directory markers, but we want to include # a pseudo directory when there's a nested item and we're listing an diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index 2e4927aced..2c3b16a65d 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -232,7 +232,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: keys = self._zf.namelist() seen = set() if prefix == "": - keys_unique = set(k.split("/")[0] for k in keys) + keys_unique = {k.split("/")[0] for k in keys} for key in keys_unique: if key not in seen: seen.add(key) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 22499dfb40..8c6464d3b5 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -271,7 +271,7 @@ def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: group = Group.from_store(store, zarr_format=zarr_format) with pytest.raises(NotImplementedError): - [x for x in group] + list(group) def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index 6010f7eca8..ca74fc1842 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -92,7 +92,7 @@ async def test_basic() -> None: store = RemoteStore.from_url( f"s3://{test_bucket_name}", mode="w", - storage_options=dict(endpoint_url=endpoint_url, anon=False), + storage_options={"endpoint_url": endpoint_url, "anon": False}, ) assert await _collect_aiterator(store.list()) == () assert not await store.exists("foo") From c3f5401aa328388d12a8328e25d14419e4f254f3 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:18:19 +0200 Subject: [PATCH 12/21] Remove unnecessary lambda expression (#2260) --- src/zarr/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 588692e704..e77c14b486 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -60,7 +60,7 @@ def v2_dtypes() -> st.SearchStrategy[np.dtype]: ) array_names = node_names attrs = st.none() | st.dictionaries(_attr_keys, _attr_values) -paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/") +paths = st.lists(node_names, min_size=1).map("/".join) | st.just("/") stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) compressors = st.sampled_from([None, "default"]) zarr_formats: st.SearchStrategy[Literal[2, 3]] = st.sampled_from([2, 3]) From 265837cc7ac77fe237130a83eab36852e54d84bc Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:45:47 +0200 Subject: [PATCH 13/21] No need to run DeepSource any more - we use ruff (#2261) --- .deepsource.toml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml deleted file mode 100644 index e68653328f..0000000000 --- a/.deepsource.toml +++ /dev/null @@ -1,7 +0,0 @@ -version = 1 - -test_patterns = ["zarr/tests/test_*.py"] - -[[analyzers]] -name = "python" -enabled = true From 19ed733f13924f162d63279f6d3c42eb31de9a31 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 19:25:03 +0200 Subject: [PATCH 14/21] Apply ruff/flake8-annotations rule ANN204 (#2258) ANN204 Missing return type annotation for special method `__init__` --- src/zarr/core/metadata/v2.py | 2 +- src/zarr/core/metadata/v3.py | 2 +- src/zarr/store/zip.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 27c1badafd..4a9baaf8b8 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -52,7 +52,7 @@ def __init__( compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None, filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None, attributes: dict[str, JSON] | None = None, - ): + ) -> None: """ Metadata for a Zarr version 2 array. """ diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index f1cb07fd4c..16bfc822a5 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -72,7 +72,7 @@ def parse_dimension_names(data: object) -> tuple[str | None, ...] | None: class V3JsonEncoder(json.JSONEncoder): - def __init__(self, *args: Any, **kwargs: Any): + def __init__(self, *args: Any, **kwargs: Any) -> None: self.indent = kwargs.pop("indent", config.get("json_indent")) super().__init__(*args, **kwargs) diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index 2c3b16a65d..cd9df4d374 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -56,7 +56,7 @@ def __init__( mode: ZipStoreAccessModeLiteral = "r", compression: int = zipfile.ZIP_STORED, allowZip64: bool = True, - ): + ) -> None: super().__init__(mode=mode) if isinstance(path, str): From f0443dba39407365d94fbe77f2bfe683c7044ec2 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 26 Sep 2024 20:56:14 +0200 Subject: [PATCH 15/21] Add array storage helpers (#2065) * implement store.list_prefix and store._set_dict * simplify string handling * add nchunks_initialized, and necessary additions for it * rename _iter_chunks to _iter_chunk_coords * fix test name * bring in correct store list_dir implementations * bump numcodecs to dodge zstd exception * remove store._set_dict, and add _set_many and get_many instead * update deprecation warning template * add a type annotation * refactor chunk iterators. they are not properties any more, just methods, and they can take an origin kwarg * _get_many returns tuple[str, buffer] * stricter store types * fix types * lint * remove deprecation warnings * fix zip list_prefix * tests for nchunks_initialized, chunks_initialized; add selection_shape kwarg to grid iteration; make chunk grid iterators consistent for array and async array * add nchunks test * fix docstrings * fix docstring * revert unnecessary changes to project config --- src/zarr/abc/store.py | 47 +++-- src/zarr/codecs/sharding.py | 6 +- src/zarr/core/array.py | 275 ++++++++++++++++++++++++++++- src/zarr/core/common.py | 2 +- src/zarr/core/indexing.py | 80 +++++++++ src/zarr/store/_utils.py | 7 +- src/zarr/store/common.py | 6 +- src/zarr/store/local.py | 10 +- src/zarr/store/memory.py | 10 +- src/zarr/store/remote.py | 12 +- src/zarr/store/zip.py | 12 +- src/zarr/testing/store.py | 24 ++- tests/v3/conftest.py | 10 ++ tests/v3/test_array.py | 73 ++++++++ tests/v3/test_indexing.py | 52 ++++++ tests/v3/test_store/test_remote.py | 4 - 16 files changed, 578 insertions(+), 52 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index c453733f00..42eb18ce0b 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -1,16 +1,24 @@ +from __future__ import annotations + from abc import ABC, abstractmethod from asyncio import gather from collections.abc import AsyncGenerator, Iterable -from types import TracebackType -from typing import Any, NamedTuple, Protocol, runtime_checkable +from typing import TYPE_CHECKING, Any, NamedTuple, Protocol, runtime_checkable + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Iterable + from types import TracebackType + from typing import Any, TypeAlias -from typing_extensions import Self + from typing_extensions import Self -from zarr.core.buffer import Buffer, BufferPrototype -from zarr.core.common import AccessModeLiteral, BytesLike + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.common import AccessModeLiteral, BytesLike __all__ = ["Store", "AccessMode", "ByteGetter", "ByteSetter", "set_or_delete"] +ByteRangeRequest: TypeAlias = tuple[int | None, int | None] + class AccessMode(NamedTuple): str: AccessModeLiteral @@ -100,14 +108,14 @@ async def get( self, key: str, prototype: BufferPrototype, - byte_range: tuple[int | None, int | None] | None = None, + byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: """Retrieve the value associated with a given key. Parameters ---------- key : str - byte_range : tuple[int, Optional[int]], optional + byte_range : tuple[int | None, int | None], optional Returns ------- @@ -119,13 +127,13 @@ async def get( async def get_partial_values( self, prototype: BufferPrototype, - key_ranges: list[tuple[str, tuple[int | None, int | None]]], + key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: """Retrieve possibly partial values from given key_ranges. Parameters ---------- - key_ranges : list[tuple[str, tuple[int, int]]] + key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]] Ordered set of key, range pairs, a key may occur multiple times with different ranges Returns @@ -195,7 +203,9 @@ def supports_partial_writes(self) -> bool: ... @abstractmethod - async def set_partial_values(self, key_start_values: list[tuple[str, int, BytesLike]]) -> None: + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, BytesLike]] + ) -> None: """Store values at a given key, starting at byte range_start. Parameters @@ -259,21 +269,32 @@ def close(self) -> None: """Close the store.""" self._is_open = False + async def _get_many( + self, requests: Iterable[tuple[str, BufferPrototype, ByteRangeRequest | None]] + ) -> AsyncGenerator[tuple[str, Buffer | None], None]: + """ + Retrieve a collection of objects from storage. In general this method does not guarantee + that objects will be retrieved in the order in which they were requested, so this method + yields tuple[str, Buffer | None] instead of just Buffer | None + """ + for req in requests: + yield (req[0], await self.get(*req)) + @runtime_checkable class ByteGetter(Protocol): async def get( - self, prototype: BufferPrototype, byte_range: tuple[int, int | None] | None = None + self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None ) -> Buffer | None: ... @runtime_checkable class ByteSetter(Protocol): async def get( - self, prototype: BufferPrototype, byte_range: tuple[int, int | None] | None = None + self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None ) -> Buffer | None: ... - async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: ... + async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: ... async def delete(self) -> None: ... diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 6282750f20..2f8946e465 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -17,7 +17,7 @@ Codec, CodecPipeline, ) -from zarr.abc.store import ByteGetter, ByteSetter +from zarr.abc.store import ByteGetter, ByteRangeRequest, ByteSetter from zarr.codecs.bytes import BytesCodec from zarr.codecs.crc32c_ import Crc32cCodec from zarr.core.array_spec import ArraySpec @@ -78,7 +78,7 @@ class _ShardingByteGetter(ByteGetter): chunk_coords: ChunkCoords async def get( - self, prototype: BufferPrototype, byte_range: tuple[int, int | None] | None = None + self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None ) -> Buffer | None: assert byte_range is None, "byte_range is not supported within shards" assert ( @@ -91,7 +91,7 @@ async def get( class _ShardingByteSetter(_ShardingByteGetter, ByteSetter): shard_dict: ShardMutableMapping - async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: assert byte_range is None, "byte_range is not supported within shards" self.shard_dict[self.chunk_coords] = value diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index f7747e9b2b..fac0facd7d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -49,6 +49,8 @@ OrthogonalSelection, Selection, VIndex, + _iter_grid, + ceildiv, check_fields, check_no_multi_fields, is_pure_fancy_indexing, @@ -58,7 +60,7 @@ ) from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata -from zarr.core.sync import sync +from zarr.core.sync import collect_aiterator, sync from zarr.registry import get_pipeline_class from zarr.store import StoreLike, StorePath, make_store_path from zarr.store.common import ( @@ -66,7 +68,7 @@ ) if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Iterable, Iterator, Sequence from zarr.abc.codec import Codec, CodecPipeline from zarr.core.metadata.common import ArrayMetadata @@ -390,10 +392,12 @@ def shape(self) -> ChunkCoords: def chunks(self) -> ChunkCoords: if isinstance(self.metadata.chunk_grid, RegularChunkGrid): return self.metadata.chunk_grid.chunk_shape - else: - raise TypeError( - f"chunk attribute is only available for RegularChunkGrid, this array has a {self.metadata.chunk_grid}" - ) + + msg = ( + f"The `chunks` attribute is only defined for arrays using `RegularChunkGrid`." + f"This array has a {self.metadata.chunk_grid} instead." + ) + raise NotImplementedError(msg) @property def size(self) -> int: @@ -434,6 +438,111 @@ def basename(self) -> str | None: return self.name.split("/")[-1] return None + @property + def cdata_shape(self) -> ChunkCoords: + """ + The shape of the chunk grid for this array. + """ + return tuple(ceildiv(s, c) for s, c in zip(self.shape, self.chunks, strict=False)) + + @property + def nchunks(self) -> int: + """ + The number of chunks in the stored representation of this array. + """ + return product(self.cdata_shape) + + @property + def nchunks_initialized(self) -> int: + """ + The number of chunks that have been persisted in storage. + """ + return nchunks_initialized(self) + + def _iter_chunk_coords( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[ChunkCoords]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` + keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + chunk_coords: ChunkCoords + The coordinates of each chunk in the selection. + """ + return _iter_grid(self.cdata_shape, origin=origin, selection_shape=selection_shape) + + def _iter_chunk_keys( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[str]: + """ + Iterate over the storage keys of each chunk, relative to an optional origin, and optionally + limited to a contiguous region in chunk grid coordinates. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + key: str + The storage key of each chunk in the selection. + """ + # Iterate over the coordinates of chunks in chunk grid space. + for k in self._iter_chunk_coords(origin=origin, selection_shape=selection_shape): + # Encode the chunk key from the chunk coordinates. + yield self.metadata.encode_chunk_key(k) + + def _iter_chunk_regions( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each chunk. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each chunk in the selection. + """ + for cgrid_position in self._iter_chunk_coords( + origin=origin, selection_shape=selection_shape + ): + out: tuple[slice, ...] = () + for c_pos, c_shape in zip(cgrid_position, self.chunks, strict=False): + start = c_pos * c_shape + stop = start + c_shape + out += (slice(start, stop, 1),) + yield out + + @property + def nbytes(self) -> int: + """ + The number of bytes that can be stored in this array. + """ + return self.nchunks * self.dtype.itemsize + async def _get_selection( self, indexer: Indexer, @@ -742,6 +851,106 @@ def read_only(self) -> bool: def fill_value(self) -> Any: return self.metadata.fill_value + @property + def cdata_shape(self) -> ChunkCoords: + """ + The shape of the chunk grid for this array. + """ + return tuple(ceildiv(s, c) for s, c in zip(self.shape, self.chunks, strict=False)) + + @property + def nchunks(self) -> int: + """ + The number of chunks in the stored representation of this array. + """ + return self._async_array.nchunks + + def _iter_chunk_coords( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[ChunkCoords]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` + keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + chunk_coords: ChunkCoords + The coordinates of each chunk in the selection. + """ + yield from self._async_array._iter_chunk_coords( + origin=origin, selection_shape=selection_shape + ) + + @property + def nbytes(self) -> int: + """ + The number of bytes that can be stored in this array. + """ + return self._async_array.nbytes + + @property + def nchunks_initialized(self) -> int: + """ + The number of chunks that have been initialized in the stored representation of this array. + """ + return self._async_array.nchunks_initialized + + def _iter_chunk_keys( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[str]: + """ + Iterate over the storage keys of each chunk, relative to an optional origin, and optionally + limited to a contiguous region in chunk grid coordinates. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + key: str + The storage key of each chunk in the selection. + """ + yield from self._async_array._iter_chunk_keys( + origin=origin, selection_shape=selection_shape + ) + + def _iter_chunk_regions( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each chunk. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each chunk in the selection. + """ + yield from self._async_array._iter_chunk_regions( + origin=origin, selection_shape=selection_shape + ) + def __array__( self, dtype: npt.DTypeLike | None = None, copy: bool | None = None ) -> NDArrayLike: @@ -2073,3 +2282,57 @@ def info(self) -> None: return sync( self._async_array.info(), ) + + +def nchunks_initialized(array: AsyncArray | Array) -> int: + """ + Calculate the number of chunks that have been initialized, i.e. the number of chunks that have + been persisted to the storage backend. + + Parameters + ---------- + array : Array + The array to inspect. + + Returns + ------- + nchunks_initialized : int + The number of chunks that have been initialized. + + See Also + -------- + chunks_initialized + """ + return len(chunks_initialized(array)) + + +def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: + """ + Return the keys of the chunks that have been persisted to the storage backend. + + Parameters + ---------- + array : Array + The array to inspect. + + Returns + ------- + chunks_initialized : tuple[str, ...] + The keys of the chunks that have been initialized. + + See Also + -------- + nchunks_initialized + + """ + # TODO: make this compose with the underlying async iterator + store_contents = list( + collect_aiterator(array.store_path.store.list_prefix(prefix=array.store_path.path)) + ) + out: list[str] = [] + + for chunk_key in array._iter_chunk_keys(): + if chunk_key in store_contents: + out.append(chunk_key) + + return tuple(out) diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index 6847bd419f..80c743cc90 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -45,7 +45,7 @@ def product(tup: ChunkCoords) -> int: async def concurrent_map( - items: list[T], func: Callable[..., Awaitable[V]], limit: int | None = None + items: Iterable[T], func: Callable[..., Awaitable[V]], limit: int | None = None ) -> list[V]: if limit is None: return await asyncio.gather(*[func(*item) for item in items]) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 3968a057f8..1c153fc161 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -12,8 +12,10 @@ from typing import ( TYPE_CHECKING, Any, + Literal, NamedTuple, Protocol, + TypeAlias, TypeGuard, TypeVar, cast, @@ -95,6 +97,84 @@ def ceildiv(a: float, b: float) -> int: return math.ceil(a / b) +_ArrayIndexingOrder: TypeAlias = Literal["lexicographic"] + + +def _iter_grid( + grid_shape: Sequence[int], + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, + order: _ArrayIndexingOrder = "lexicographic", +) -> Iterator[ChunkCoords]: + """ + Iterate over the elements of grid of integers, with the option to restrict the domain of + iteration to a contiguous subregion of that grid. + + Parameters + ---------- + grid_shape: Sequence[int] + The size of the domain to iterate over. + origin: Sequence[int] | None, default=None + The first coordinate of the domain to return. + selection_shape: Sequence[int] | None, default=None + The shape of the selection. + order: Literal["lexicographic"], default="lexicographic" + The linear indexing order to use. + + Returns + ------- + + itertools.product object + An iterator over tuples of integers + + Examples + -------- + >>> tuple(iter_grid((1,))) + ((0,),) + + >>> tuple(iter_grid((2,3))) + ((0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)) + + >>> tuple(iter_grid((2,3)), origin=(1,1)) + ((1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3)) + + >>> tuple(iter_grid((2,3)), origin=(1,1), selection_shape=(2,2)) + ((1, 1), (1, 2), (1, 3), (2, 1)) + """ + if origin is None: + origin_parsed = (0,) * len(grid_shape) + else: + if len(origin) != len(grid_shape): + msg = ( + "Shape and origin parameters must have the same length." + f"Got {len(grid_shape)} elements in shape, but {len(origin)} elements in origin." + ) + raise ValueError(msg) + origin_parsed = tuple(origin) + if selection_shape is None: + selection_shape_parsed = tuple( + g - o for o, g in zip(origin_parsed, grid_shape, strict=True) + ) + else: + selection_shape_parsed = tuple(selection_shape) + if order == "lexicographic": + dimensions: tuple[range, ...] = () + for idx, (o, gs, ss) in enumerate( + zip(origin_parsed, grid_shape, selection_shape_parsed, strict=True) + ): + if o + ss > gs: + raise IndexError( + f"Invalid selection shape ({selection_shape}) for origin ({origin}) and grid shape ({grid_shape}) at axis {idx}." + ) + dimensions += (range(o, o + ss),) + yield from itertools.product(*(dimensions)) + + else: + msg = f"Indexing order {order} is not supported at this time." # type: ignore[unreachable] + raise NotImplementedError(msg) + + def is_integer(x: Any) -> TypeGuard[int]: """True if x is an integer (both pure Python or NumPy).""" return isinstance(x, numbers.Integral) and not is_bool(x) diff --git a/src/zarr/store/_utils.py b/src/zarr/store/_utils.py index 04a06351c5..cbc9c42bbd 100644 --- a/src/zarr/store/_utils.py +++ b/src/zarr/store/_utils.py @@ -1,4 +1,9 @@ -from zarr.core.buffer import Buffer +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from zarr.core.buffer import Buffer def _normalize_interval_index( diff --git a/src/zarr/store/common.py b/src/zarr/store/common.py index 0c126c63da..f39edb19ac 100644 --- a/src/zarr/store/common.py +++ b/src/zarr/store/common.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Literal -from zarr.abc.store import AccessMode, Store +from zarr.abc.store import AccessMode, ByteRangeRequest, Store from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError @@ -37,13 +37,13 @@ def __init__(self, store: Store, path: str | None = None) -> None: async def get( self, prototype: BufferPrototype | None = None, - byte_range: tuple[int, int | None] | None = None, + byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: if prototype is None: prototype = default_buffer_prototype() return await self.store.get(self.path, prototype=prototype, byte_range=byte_range) - async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: if byte_range is not None: raise NotImplementedError("Store.set does not have partial writes yet") await self.store.set(self.path, value) diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index 39a94969eb..f1bce769d2 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -6,12 +6,12 @@ from pathlib import Path from typing import TYPE_CHECKING -from zarr.abc.store import Store +from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer from zarr.core.common import concurrent_map, to_thread if TYPE_CHECKING: - from collections.abc import AsyncGenerator + from collections.abc import AsyncGenerator, Iterable from zarr.core.buffer import BufferPrototype from zarr.core.common import AccessModeLiteral @@ -131,7 +131,7 @@ async def get( async def get_partial_values( self, prototype: BufferPrototype, - key_ranges: list[tuple[str, tuple[int | None, int | None]]], + key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: """ Read byte ranges from multiple keys. @@ -161,7 +161,9 @@ async def set(self, key: str, value: Buffer) -> None: path = self.root / key await to_thread(_put, path, value) - async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]] + ) -> None: self._check_writable() args = [] for key, start, value in key_start_values: diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 16599b2605..ee4107b0ab 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -2,13 +2,13 @@ from typing import TYPE_CHECKING -from zarr.abc.store import Store +from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, gpu from zarr.core.common import concurrent_map from zarr.store._utils import _normalize_interval_index if TYPE_CHECKING: - from collections.abc import AsyncGenerator, MutableMapping + from collections.abc import AsyncGenerator, Iterable, MutableMapping from zarr.core.buffer import BufferPrototype from zarr.core.common import AccessModeLiteral @@ -73,10 +73,10 @@ async def get( async def get_partial_values( self, prototype: BufferPrototype, - key_ranges: list[tuple[str, tuple[int | None, int | None]]], + key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: # All the key-ranges arguments goes with the same prototype - async def _get(key: str, byte_range: tuple[int, int | None]) -> Buffer | None: + async def _get(key: str, byte_range: ByteRangeRequest) -> Buffer | None: return await self.get(key, prototype=prototype, byte_range=byte_range) return await concurrent_map(key_ranges, _get, limit=None) @@ -106,7 +106,7 @@ async def delete(self, key: str) -> None: except KeyError: pass # Q(JH): why not raise? - async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: + async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: raise NotImplementedError async def list(self) -> AsyncGenerator[str, None]: diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index 7aea8a3780..284cd8d77f 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -4,11 +4,11 @@ import fsspec -from zarr.abc.store import Store +from zarr.abc.store import ByteRangeRequest, Store from zarr.store.common import _dereference_path if TYPE_CHECKING: - from collections.abc import AsyncGenerator + from collections.abc import AsyncGenerator, Iterable from fsspec.asyn import AsyncFileSystem @@ -110,7 +110,7 @@ async def get( self, key: str, prototype: BufferPrototype, - byte_range: tuple[int | None, int | None] | None = None, + byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: if not self._is_open: await self._open() @@ -177,7 +177,7 @@ async def exists(self, key: str) -> bool: async def get_partial_values( self, prototype: BufferPrototype, - key_ranges: list[tuple[str, tuple[int | None, int | None]]], + key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: if key_ranges: paths, starts, stops = zip( @@ -203,7 +203,9 @@ async def get_partial_values( return [None if isinstance(r, Exception) else prototype.buffer.from_bytes(r) for r in res] - async def set_partial_values(self, key_start_values: list[tuple[str, int, BytesLike]]) -> None: + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, BytesLike]] + ) -> None: raise NotImplementedError async def list(self) -> AsyncGenerator[str, None]: diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index cd9df4d374..9496609138 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -7,11 +7,11 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Literal -from zarr.abc.store import Store +from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, BufferPrototype if TYPE_CHECKING: - from collections.abc import AsyncGenerator + from collections.abc import AsyncGenerator, Iterable ZipStoreAccessModeLiteral = Literal["r", "w", "a"] @@ -128,7 +128,7 @@ def _get( self, key: str, prototype: BufferPrototype, - byte_range: tuple[int | None, int | None] | None = None, + byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: try: with self._zf.open(key) as f: # will raise KeyError @@ -151,7 +151,7 @@ async def get( self, key: str, prototype: BufferPrototype, - byte_range: tuple[int | None, int | None] | None = None, + byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: assert isinstance(key, str) @@ -161,7 +161,7 @@ async def get( async def get_partial_values( self, prototype: BufferPrototype, - key_ranges: list[tuple[str, tuple[int | None, int | None]]], + key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: out = [] with self._lock: @@ -188,7 +188,7 @@ async def set(self, key: str, value: Buffer) -> None: with self._lock: self._set(key, value) - async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: + async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: raise NotImplementedError async def delete(self, key: str) -> None: diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 70d2e16efd..7b78b8ed00 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -5,7 +5,7 @@ from zarr.abc.store import AccessMode, Store from zarr.core.buffer import Buffer, default_buffer_prototype -from zarr.core.sync import _collect_aiterator +from zarr.core.sync import _collect_aiterator, collect_aiterator from zarr.store._utils import _normalize_interval_index from zarr.testing.utils import assert_bytes_equal @@ -111,6 +111,28 @@ async def test_get( expected = data_buf[start : start + length] assert_bytes_equal(observed, expected) + async def test_get_many(self, store: S) -> None: + """ + Ensure that multiple keys can be retrieved at once with the _get_many method. + """ + keys = tuple(map(str, range(10))) + values = tuple(f"{k}".encode() for k in keys) + for k, v in zip(keys, values, strict=False): + self.set(store, k, self.buffer_cls.from_bytes(v)) + observed_buffers = collect_aiterator( + store._get_many( + zip( + keys, + (default_buffer_prototype(),) * len(keys), + (None,) * len(keys), + strict=False, + ) + ) + ) + observed_kvs = sorted(((k, b.to_bytes()) for k, b in observed_buffers)) # type: ignore[union-attr] + expected_kvs = sorted(((k, b) for k, b in zip(keys, values, strict=False))) + assert observed_kvs == expected_kvs + @pytest.mark.parametrize("key", ["zarr.json", "c/0", "foo/c/0.0", "foo/0/0"]) @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) async def test_set(self, store: S, key: str, data: bytes) -> None: diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index fc1f950ad4..15a0b55b0e 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -129,6 +129,16 @@ def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]: ) +@pytest.fixture(params=(2, 3)) +def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: + if request.param == 2: + return 2 + elif request.param == 3: + return 3 + msg = f"Invalid zarr format requested. Got {request.param}, expected on of (2,3)." + raise ValueError(msg) + + settings.register_profile( "ci", max_examples=1000, diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 02358cb39b..95bbde1740 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -1,12 +1,16 @@ import pickle +from itertools import accumulate from typing import Literal import numpy as np import pytest from zarr import Array, AsyncArray, Group +from zarr.core.array import chunks_initialized from zarr.core.buffer.cpu import NDBuffer from zarr.core.common import ZarrFormat +from zarr.core.indexing import ceildiv +from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError from zarr.store import LocalStore, MemoryStore from zarr.store.common import StorePath @@ -232,3 +236,72 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> assert actual == expected np.testing.assert_array_equal(actual[:], expected[:]) + + +@pytest.mark.parametrize("test_cls", [Array, AsyncArray]) +@pytest.mark.parametrize("nchunks", [2, 5, 10]) +def test_nchunks(test_cls: type[Array] | type[AsyncArray], nchunks: int) -> None: + """ + Test that nchunks returns the number of chunks defined for the array. + """ + store = MemoryStore({}, mode="w") + shape = 100 + arr = Array.create(store, shape=(shape,), chunks=(ceildiv(shape, nchunks),), dtype="i4") + expected = nchunks + if test_cls == Array: + observed = arr.nchunks + else: + observed = arr._async_array.nchunks + assert observed == expected + + +@pytest.mark.parametrize("test_cls", [Array, AsyncArray]) +def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray]) -> None: + """ + Test that nchunks_initialized accurately returns the number of stored chunks. + """ + store = MemoryStore({}, mode="w") + arr = Array.create(store, shape=(100,), chunks=(10,), dtype="i4") + + # write chunks one at a time + for idx, region in enumerate(arr._iter_chunk_regions()): + arr[region] = 1 + expected = idx + 1 + if test_cls == Array: + observed = arr.nchunks_initialized + else: + observed = arr._async_array.nchunks_initialized + assert observed == expected + + # delete chunks + for idx, key in enumerate(arr._iter_chunk_keys()): + sync(arr.store_path.store.delete(key)) + if test_cls == Array: + observed = arr.nchunks_initialized + else: + observed = arr._async_array.nchunks_initialized + expected = arr.nchunks - idx - 1 + assert observed == expected + + +@pytest.mark.parametrize("test_cls", [Array, AsyncArray]) +def test_chunks_initialized(test_cls: type[Array] | type[AsyncArray]) -> None: + """ + Test that chunks_initialized accurately returns the keys of stored chunks. + """ + store = MemoryStore({}, mode="w") + arr = Array.create(store, shape=(100,), chunks=(10,), dtype="i4") + + chunks_accumulated = tuple( + accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys())) + ) + for keys, region in zip(chunks_accumulated, arr._iter_chunk_regions(), strict=False): + arr[region] = 1 + + if test_cls == Array: + observed = sorted(chunks_initialized(arr)) + else: + observed = sorted(chunks_initialized(arr._async_array)) + + expected = sorted(keys) + assert observed == expected diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index da358afbd6..59169c67b1 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools from collections import Counter from typing import TYPE_CHECKING, Any from uuid import uuid4 @@ -16,6 +17,7 @@ CoordinateSelection, OrthogonalSelection, Selection, + _iter_grid, make_slice_selection, normalize_integer_selection, oindex, @@ -1861,6 +1863,56 @@ def test_orthogonal_bool_indexing_like_numpy_ix( assert_array_equal(expected, actual, err_msg=f"{selection=}") +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@pytest.mark.parametrize("origin_0d", [None, (0,), (1,)]) +@pytest.mark.parametrize("selection_shape_0d", [None, (2,), (3,)]) +def test_iter_grid( + ndim: int, origin_0d: tuple[int] | None, selection_shape_0d: tuple[int] | None +) -> None: + """ + Test that iter_grid works as expected for 1, 2, and 3 dimensions. + """ + grid_shape = (5,) * ndim + + if origin_0d is not None: + origin_kwarg = origin_0d * ndim + origin = origin_kwarg + else: + origin_kwarg = None + origin = (0,) * ndim + + if selection_shape_0d is not None: + selection_shape_kwarg = selection_shape_0d * ndim + selection_shape = selection_shape_kwarg + else: + selection_shape_kwarg = None + selection_shape = tuple(gs - o for gs, o in zip(grid_shape, origin, strict=False)) + + observed = tuple( + _iter_grid(grid_shape, origin=origin_kwarg, selection_shape=selection_shape_kwarg) + ) + + # generate a numpy array of indices, and index it + coord_array = np.array(list(itertools.product(*[range(s) for s in grid_shape]))).reshape( + (*grid_shape, ndim) + ) + coord_array_indexed = coord_array[ + tuple(slice(o, o + s, 1) for o, s in zip(origin, selection_shape, strict=False)) + + (range(ndim),) + ] + + expected = tuple(map(tuple, coord_array_indexed.reshape(-1, ndim).tolist())) + assert observed == expected + + +def test_iter_grid_invalid() -> None: + """ + Ensure that a selection_shape that exceeds the grid_shape + origin produces an indexing error. + """ + with pytest.raises(IndexError): + list(_iter_grid((5,), origin=(0,), selection_shape=(10,))) + + def test_indexing_with_zarr_array(store: StorePath) -> None: # regression test for https://github.com/zarr-developers/zarr-python/issues/2133 a = np.arange(10) diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index ca74fc1842..18ba1e6d1c 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -84,10 +84,6 @@ def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: # ### end from s3fs ### # -async def alist(it): - return [a async for a in it] - - async def test_basic() -> None: store = RemoteStore.from_url( f"s3://{test_bucket_name}", From 33659289a8fb38f7ff3975cb262554bbe56cbd0e Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 26 Sep 2024 23:40:51 +0200 Subject: [PATCH 16/21] Apply assorted ruff/flake8-simplify rules (SIM) (#2259) * Apply ruff/flake8-simplify rule SIM103 SIM103 Return the condition directly * Apply ruff/flake8-simplify rule SIM118 SIM118 Use `key in dict` instead of `key in dict.keys()` --- src/zarr/core/indexing.py | 4 +--- src/zarr/store/zip.py | 5 +---- src/zarr/testing/store.py | 4 ++-- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 1c153fc161..2cc70291dd 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -218,9 +218,7 @@ def is_scalar(value: Any, dtype: np.dtype[Any]) -> bool: return True if hasattr(value, "shape") and value.shape == (): return True - if isinstance(value, tuple) and dtype.names and len(value) == len(dtype.names): - return True - return False + return isinstance(value, tuple) and dtype.names is not None and len(value) == len(dtype.names) def is_pure_fancy_indexing(selection: Any, ndim: int) -> bool: diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index 9496609138..f9c4587092 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -110,10 +110,7 @@ async def clear(self) -> None: async def empty(self) -> bool: with self._lock: - if self._zf.namelist(): - return False - else: - return True + return not self._zf.namelist() def __str__(self) -> str: return f"zip://{self.path}" diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 7b78b8ed00..5c75007347 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -248,7 +248,7 @@ async def test_list_prefix(self, store: S) -> None: for prefix in prefixes: observed = tuple(sorted(await _collect_aiterator(store.list_prefix(prefix)))) expected: tuple[str, ...] = () - for key in store_dict.keys(): + for key in store_dict: if key.startswith(prefix): expected += (key.removeprefix(prefix),) expected = tuple(sorted(expected)) @@ -267,7 +267,7 @@ async def test_list_dir(self, store: S) -> None: await store._set_many(store_dict.items()) keys_observed = await _collect_aiterator(store.list_dir(root)) - keys_expected = {k.removeprefix(root + "/").split("/")[0] for k in store_dict.keys()} + keys_expected = {k.removeprefix(root + "/").split("/")[0] for k in store_dict} assert sorted(keys_observed) == sorted(keys_expected) From 6984294a87fe4af66ee218edd9351864017efb92 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 26 Sep 2024 15:42:39 -0600 Subject: [PATCH 17/21] feature(store): add LoggingStore wrapper (#2231) * feature(store): add LoggingStore wrapper * add counter * lint --- src/zarr/store/logging.py | 162 ++++++++++++++++++++++++++++ tests/v3/test_store/test_logging.py | 50 +++++++++ 2 files changed, 212 insertions(+) create mode 100644 src/zarr/store/logging.py create mode 100644 tests/v3/test_store/test_logging.py diff --git a/src/zarr/store/logging.py b/src/zarr/store/logging.py new file mode 100644 index 0000000000..792dc66d93 --- /dev/null +++ b/src/zarr/store/logging.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +import inspect +import logging +import time +from collections import defaultdict +from contextlib import contextmanager +from typing import TYPE_CHECKING + +from zarr.abc.store import AccessMode, Store + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Generator + + from zarr.core.buffer import Buffer, BufferPrototype + + +class LoggingStore(Store): + _store: Store + counter: defaultdict[str, int] + + def __init__( + self, + store: Store, + log_level: str = "DEBUG", + log_handler: logging.Handler | None = None, + ): + self._store = store + self.counter = defaultdict(int) + + self._configure_logger(log_level, log_handler) + + def _configure_logger( + self, log_level: str = "DEBUG", log_handler: logging.Handler | None = None + ) -> None: + self.log_level = log_level + self.logger = logging.getLogger(f"LoggingStore({self._store!s})") + self.logger.setLevel(log_level) + + if not self.logger.hasHandlers(): + if not log_handler: + log_handler = self._default_handler() + # Add handler to logger + self.logger.addHandler(log_handler) + + def _default_handler(self) -> logging.Handler: + """Define a default log handler""" + handler = logging.StreamHandler() + handler.setLevel(self.log_level) + handler.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ) + return handler + + @contextmanager + def log(self) -> Generator[None, None, None]: + method = inspect.stack()[2].function + op = f"{type(self._store).__name__}.{method}" + self.logger.info(f"Calling {op}") + start_time = time.time() + try: + self.counter[method] += 1 + yield + finally: + end_time = time.time() + self.logger.info(f"Finished {op} in {end_time - start_time:.2f} seconds") + + @property + def supports_writes(self) -> bool: + with self.log(): + return self._store.supports_writes + + @property + def supports_deletes(self) -> bool: + with self.log(): + return self._store.supports_deletes + + @property + def supports_partial_writes(self) -> bool: + with self.log(): + return self._store.supports_partial_writes + + @property + def supports_listing(self) -> bool: + with self.log(): + return self._store.supports_listing + + @property + def _mode(self) -> AccessMode: # type: ignore[override] + with self.log(): + return self._store._mode + + @property + def _is_open(self) -> bool: # type: ignore[override] + with self.log(): + return self._store._is_open + + async def empty(self) -> bool: + with self.log(): + return await self._store.empty() + + async def clear(self) -> None: + with self.log(): + return await self._store.clear() + + def __str__(self) -> str: + return f"logging-{self._store!s}" + + def __repr__(self) -> str: + return f"LoggingStore({repr(self._store)!r})" + + def __eq__(self, other: object) -> bool: + with self.log(): + return self._store == other + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + with self.log(): + return await self._store.get(key=key, prototype=prototype, byte_range=byte_range) + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: list[tuple[str, tuple[int | None, int | None]]], + ) -> list[Buffer | None]: + with self.log(): + return await self._store.get_partial_values(prototype=prototype, key_ranges=key_ranges) + + async def exists(self, key: str) -> bool: + with self.log(): + return await self._store.exists(key) + + async def set(self, key: str, value: Buffer) -> None: + with self.log(): + return await self._store.set(key=key, value=value) + + async def delete(self, key: str) -> None: + with self.log(): + return await self._store.delete(key=key) + + async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: + with self.log(): + return await self._store.set_partial_values(key_start_values=key_start_values) + + async def list(self) -> AsyncGenerator[str, None]: + with self.log(): + async for key in self._store.list(): + yield key + + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + with self.log(): + async for key in self._store.list_prefix(prefix=prefix): + yield key + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + with self.log(): + async for key in self._store.list_dir(prefix=prefix): + yield key diff --git a/tests/v3/test_store/test_logging.py b/tests/v3/test_store/test_logging.py new file mode 100644 index 0000000000..a263c2ae08 --- /dev/null +++ b/tests/v3/test_store/test_logging.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import zarr +from zarr.core.buffer import default_buffer_prototype +from zarr.store.logging import LoggingStore + +if TYPE_CHECKING: + from zarr.abc.store import Store + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +async def test_logging_store(store: Store, caplog) -> None: + wrapped = LoggingStore(store=store, log_level="DEBUG") + buffer = default_buffer_prototype().buffer + + caplog.clear() + res = await wrapped.set("foo/bar/c/0", buffer.from_bytes(b"\x01\x02\x03\x04")) + assert res is None + assert len(caplog.record_tuples) == 2 + for tup in caplog.record_tuples: + assert str(store) in tup[0] + assert f"Calling {type(store).__name__}.set" in caplog.record_tuples[0][2] + assert f"Finished {type(store).__name__}.set" in caplog.record_tuples[1][2] + + caplog.clear() + keys = [k async for k in wrapped.list()] + assert keys == ["foo/bar/c/0"] + assert len(caplog.record_tuples) == 2 + for tup in caplog.record_tuples: + assert str(store) in tup[0] + assert f"Calling {type(store).__name__}.list" in caplog.record_tuples[0][2] + assert f"Finished {type(store).__name__}.list" in caplog.record_tuples[1][2] + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +async def test_logging_store_counter(store: Store) -> None: + wrapped = LoggingStore(store=store, log_level="DEBUG") + + arr = zarr.create(shape=(10,), store=wrapped, overwrite=True) + arr[:] = 1 + + assert wrapped.counter["set"] == 2 + assert wrapped.counter["get"] == 0 # 1 if overwrite=False + assert wrapped.counter["list"] == 0 + assert wrapped.counter["list_dir"] == 0 + assert wrapped.counter["list_prefix"] == 0 From 0e4cc7e6b1e11874197993b993d1a7834821f790 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 26 Sep 2024 16:03:00 -0600 Subject: [PATCH 18/21] chore(deps): drop support for python 3.10 and numpy 1.24 (#2217) * chore(deps): drop support for python 3.10 and numpy 124 * lint * bump python in readthedocs * fix array import in docs * try on 3.12 * style: pre-commit fixes * fixup --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/workflows/test.yml | 4 +-- .readthedocs.yaml | 2 +- docs/release.rst | 4 +-- docs/tutorial.rst | 50 ++++++++++++++++---------------- pyproject.toml | 25 ++++++++-------- src/zarr/abc/codec.py | 2 +- src/zarr/abc/metadata.py | 2 +- src/zarr/abc/store.py | 5 ++-- src/zarr/codecs/blosc.py | 2 +- src/zarr/codecs/bytes.py | 2 +- src/zarr/codecs/crc32c_.py | 2 +- src/zarr/codecs/gzip.py | 2 +- src/zarr/codecs/pipeline.py | 2 +- src/zarr/codecs/sharding.py | 3 +- src/zarr/codecs/transpose.py | 4 +-- src/zarr/codecs/zstd.py | 2 +- src/zarr/core/buffer/core.py | 3 +- src/zarr/core/buffer/cpu.py | 3 +- src/zarr/core/buffer/gpu.py | 3 +- src/zarr/core/chunk_grids.py | 3 +- src/zarr/core/metadata/common.py | 3 +- src/zarr/core/metadata/v2.py | 3 +- src/zarr/core/metadata/v3.py | 3 +- src/zarr/core/sync.py | 2 +- src/zarr/store/logging.py | 10 ++++--- src/zarr/testing/buffer.py | 3 +- 26 files changed, 70 insertions(+), 79 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 48e579711b..5683b62dff 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,8 +21,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10', '3.11', '3.12'] - numpy-version: ['1.24', '1.26', '2.0'] + python-version: ['3.11', '3.12'] + numpy-version: ['1.25', '1.26', '2.0'] dependency-set: ["minimal", "optional"] steps: diff --git a/.readthedocs.yaml b/.readthedocs.yaml index cae58c064a..32a3f0e4e1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,7 +3,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.10" + python: "3.12" sphinx: configuration: docs/conf.py diff --git a/docs/release.rst b/docs/release.rst index dbf390a800..5c3a43a148 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -1142,7 +1142,7 @@ Documentation * Update docs to use ``python -m pytest``. By :user:`Ray Bell ` :issue:`923`. -* Fix versionadded tag in zarr.core.Array docstring. +* Fix versionadded tag in zarr.Array docstring. By :user:`Juan Nunez-Iglesias ` :issue:`852`. * Doctest seem to be stricter now, updating tostring() to tobytes(). @@ -1896,7 +1896,7 @@ Enhancements :user:`John Kirkham `, :issue:`92`, :issue:`122`. * **Viewing an array as a different dtype**. The ``Array`` class has a new - :func:`zarr.core.Array.astype` method, which is a convenience that enables an + :func:`zarr.Array.astype` method, which is a convenience that enables an array to be viewed as a different dtype. By :user:`John Kirkham `, :issue:`94`, :issue:`96`. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 4099bac1c8..a40422490b 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -18,7 +18,7 @@ Zarr has several functions for creating arrays. For example:: >>> import zarr >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z - + The code above creates a 2-dimensional array of 32-bit integers with 10000 rows and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000 @@ -168,7 +168,7 @@ compression ratio. Zarr arrays provide a ``info`` property which can be used to print some diagnostics, e.g.:: >>> z.info - Type : zarr.core.Array + Type : zarr.Array Data type : int32 Shape : (10000, 10000) Chunk shape : (1000, 1000) @@ -260,7 +260,7 @@ Here is an example using a delta filter with the Blosc compressor:: >>> data = np.arange(100000000, dtype='i4').reshape(10000, 10000) >>> z = zarr.array(data, chunks=(1000, 1000), filters=filters, compressor=compressor) >>> z.info - Type : zarr.core.Array + Type : zarr.Array Data type : int32 Shape : (10000, 10000) Chunk shape : (1000, 1000) @@ -302,7 +302,7 @@ Groups can also contain arrays, e.g.:: >>> z1 = bar.zeros('baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z1 - + Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the ``create_dataset()`` and ``require_dataset()`` @@ -310,7 +310,7 @@ methods, e.g.:: >>> z = bar.create_dataset('quux', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z - + Members of a group can be accessed via the suffix notation, e.g.:: @@ -323,7 +323,7 @@ call, e.g.:: >>> root['foo/bar'] >>> root['foo/bar/baz'] - + The :func:`zarr.hierarchy.Group.tree` method can be used to print a tree representation of the hierarchy, e.g.:: @@ -344,7 +344,7 @@ sub-directories, e.g.:: >>> z = root.zeros('foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z - + Groups can be used as context managers (in a ``with`` statement). If the underlying store has a ``close`` method, it will be called on exit. @@ -388,7 +388,7 @@ property. E.g.:: >>> bar.info Name : /foo/bar - Type : zarr.core.Array + Type : zarr.Array Data type : int64 Shape : (1000000,) Chunk shape : (100000,) @@ -403,7 +403,7 @@ property. E.g.:: >>> baz.info Name : /foo/baz - Type : zarr.core.Array + Type : zarr.Array Data type : float32 Shape : (1000, 1000) Chunk shape : (100, 100) @@ -472,7 +472,7 @@ Note that although this functionality is similar to some of the advanced indexing capabilities available on NumPy arrays and on h5py datasets, **the Zarr API for advanced indexing is different from both NumPy and h5py**, so please read this section carefully. For a complete description of the indexing API, -see the documentation for the :class:`zarr.core.Array` class. +see the documentation for the :class:`zarr.Array` class. Indexing with coordinate arrays ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -880,10 +880,10 @@ Here is an example using S3Map to read an array created previously:: >>> root = zarr.group(store=store) >>> z = root['foo/bar/baz'] >>> z - + >>> z.info Name : /foo/bar/baz - Type : zarr.core.Array + Type : zarr.Array Data type : |S1 Shape : (21,) Chunk shape : (7,) @@ -1150,7 +1150,7 @@ your array, then you can use an array with a fixed-length bytes dtype. E.g.:: >>> z = zarr.zeros(10, dtype='S6') >>> z - + >>> z[0] = b'Hello' >>> z[1] = b'world!' >>> z[:] @@ -1166,7 +1166,7 @@ A fixed-length unicode dtype is also available, e.g.:: >>> text_data = greetings * 10000 >>> z = zarr.array(text_data, dtype='U20') >>> z - + >>> z[:] array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ..., 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], @@ -1182,7 +1182,7 @@ E.g. using ``VLenUTF8``:: >>> import numcodecs >>> z = zarr.array(text_data, dtype=object, object_codec=numcodecs.VLenUTF8()) >>> z - + >>> z.filters [VLenUTF8()] >>> z[:] @@ -1194,7 +1194,7 @@ is a short-hand for ``dtype=object, object_codec=numcodecs.VLenUTF8()``, e.g.:: >>> z = zarr.array(text_data, dtype=str) >>> z - + >>> z.filters [VLenUTF8()] >>> z[:] @@ -1210,7 +1210,7 @@ e.g.:: >>> bytes_data = [g.encode('utf-8') for g in greetings] * 10000 >>> z = zarr.array(bytes_data, dtype=bytes) >>> z - + >>> z.filters [VLenBytes()] >>> z[:] @@ -1225,7 +1225,7 @@ integer. E.g.:: >>> categorize = numcodecs.Categorize(greetings, dtype=object) >>> z = zarr.array(text_data, dtype=object, object_codec=categorize) >>> z - + >>> z.filters [Categorize(dtype='|O', astype='|u1', labels=['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ...])] >>> z[:] @@ -1275,7 +1275,7 @@ and stores the same primitive type (a.k.a. a ragged array), the >>> z = zarr.empty(4, dtype=object, object_codec=numcodecs.VLenArray(int)) >>> z - + >>> z.filters [VLenArray(dtype='>> z[0] = np.array([1, 3, 5]) @@ -1291,7 +1291,7 @@ primitive dtype such as 'i4' or 'f8'. E.g.:: >>> z = zarr.empty(4, dtype='array:i8') >>> z - + >>> z.filters [VLenArray(dtype='>> z[0] = np.array([1, 3, 5]) @@ -1367,7 +1367,7 @@ ratios, depending on the correlation structure within the data. E.g.:: >>> a = np.arange(100000000, dtype='i4').reshape(10000, 10000).T >>> c = zarr.array(a, chunks=(1000, 1000)) >>> c.info - Type : zarr.core.Array + Type : zarr.Array Data type : int32 Shape : (10000, 10000) Chunk shape : (1000, 1000) @@ -1381,7 +1381,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Chunks initialized : 100/100 >>> f = zarr.array(a, chunks=(1000, 1000), order='F') >>> f.info - Type : zarr.core.Array + Type : zarr.Array Data type : int32 Shape : (10000, 10000) Chunk shape : (1000, 1000) @@ -1549,7 +1549,7 @@ with thread synchronization:: >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4', ... synchronizer=zarr.ThreadSynchronizer()) >>> z - + This array is safe to read or write within a multi-threaded program. @@ -1563,7 +1563,7 @@ some networked file systems). E.g.:: ... chunks=(1000, 1000), dtype='i4', ... synchronizer=synchronizer) >>> z - + This array is safe to read or write from multiple processes. @@ -1631,7 +1631,7 @@ arrays, as long as the units are specified. E.g.:: >>> z = zarr.array(['2007-07-13', '2006-01-13', '2010-08-13'], dtype='M8[D]') >>> z - + >>> z[:] array(['2007-07-13', '2006-01-13', '2010-08-13'], dtype='datetime64[D]') >>> z[0] diff --git a/pyproject.toml b/pyproject.toml index 33aa538141..9b7776ea65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,13 +20,13 @@ maintainers = [ { name = "Norman Rzepka" }, { name = "Ryan Abernathey" } ] -requires-python = ">=3.10" +requires-python = ">=3.11" # If you add a new dependency here, please also add it to .pre-commit-config.yml dependencies = [ 'asciitree', - 'numpy>=1.24', + 'numpy>=1.25', 'fasteners', - 'numcodecs>=0.10.0', + 'numcodecs>=0.10.2', 'fsspec>2024', 'crc32c', 'typing_extensions', @@ -45,7 +45,6 @@ classifiers = [ 'Topic :: Software Development :: Libraries :: Python Modules', 'Operating System :: Unix', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', ] @@ -130,18 +129,18 @@ dependencies = [ features = ["test", "extra"] [[tool.hatch.envs.test.matrix]] -python = ["3.10", "3.11", "3.12"] -numpy = ["1.24", "1.26", "2.0"] +python = ["3.11", "3.12"] +numpy = ["1.25", "1.26", "2.0"] version = ["minimal"] [[tool.hatch.envs.test.matrix]] -python = ["3.10", "3.11", "3.12"] -numpy = ["1.24", "1.26", "2.0"] +python = ["3.11", "3.12"] +numpy = ["1.25", "1.26", "2.0"] features = ["optional"] [[tool.hatch.envs.test.matrix]] -python = ["3.10", "3.11", "3.12"] -numpy = ["1.24", "1.26", "2.0"] +python = ["3.11", "3.12"] +numpy = ["1.25", "1.26", "2.0"] features = ["gpu"] [tool.hatch.envs.test.scripts] @@ -161,8 +160,8 @@ dependencies = [ features = ["test", "extra", "gpu"] [[tool.hatch.envs.gputest.matrix]] -python = ["3.10", "3.11", "3.12"] -numpy = ["1.24", "1.26", "2.0"] +python = ["3.11", "3.12"] +numpy = ["1.25", "1.26", "2.0"] version = ["minimal"] [tool.hatch.envs.gputest.scripts] @@ -249,7 +248,7 @@ ignore = [ ] [tool.mypy] -python_version = "3.10" +python_version = "3.11" ignore_missing_imports = true namespace_packages = false diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index 2098d989e9..057e66c4b0 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -10,9 +10,9 @@ if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Iterable + from typing import Self import numpy as np - from typing_extensions import Self from zarr.abc.store import ByteGetter, ByteSetter from zarr.core.array_spec import ArraySpec diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py index 7ea668c891..239d151c0c 100644 --- a/src/zarr/abc/metadata.py +++ b/src/zarr/abc/metadata.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing_extensions import Self + from typing import Self from zarr.core.common import JSON diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 42eb18ce0b..5f50360554 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -3,14 +3,13 @@ from abc import ABC, abstractmethod from asyncio import gather from collections.abc import AsyncGenerator, Iterable +from types import TracebackType from typing import TYPE_CHECKING, Any, NamedTuple, Protocol, runtime_checkable if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable from types import TracebackType - from typing import Any, TypeAlias - - from typing_extensions import Self + from typing import Any, Self, TypeAlias from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.common import AccessModeLiteral, BytesLike diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py index 7b10d91a6a..16bcf48a34 100644 --- a/src/zarr/codecs/blosc.py +++ b/src/zarr/codecs/blosc.py @@ -14,7 +14,7 @@ from zarr.registry import register_codec if TYPE_CHECKING: - from typing_extensions import Self + from typing import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py index 7a683411e9..78c7b22fbc 100644 --- a/src/zarr/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -13,7 +13,7 @@ from zarr.registry import register_codec if TYPE_CHECKING: - from typing_extensions import Self + from typing import Self from zarr.core.array_spec import ArraySpec diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py index f814ba15e5..3a6624ad25 100644 --- a/src/zarr/codecs/crc32c_.py +++ b/src/zarr/codecs/crc32c_.py @@ -12,7 +12,7 @@ from zarr.registry import register_codec if TYPE_CHECKING: - from typing_extensions import Self + from typing import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py index 0dd31009c4..6cc8517f20 100644 --- a/src/zarr/codecs/gzip.py +++ b/src/zarr/codecs/gzip.py @@ -11,7 +11,7 @@ from zarr.registry import register_codec if TYPE_CHECKING: - from typing_extensions import Self + from typing import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index 182621c59f..6828377f97 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -21,9 +21,9 @@ if TYPE_CHECKING: from collections.abc import Iterable, Iterator + from typing import Self import numpy as np - from typing_extensions import Self from zarr.abc.store import ByteGetter, ByteSetter from zarr.core.array_spec import ArraySpec diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 2f8946e465..c818e3c66b 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -49,8 +49,7 @@ if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Iterator - - from typing_extensions import Self + from typing import Self from zarr.core.common import JSON diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py index 40a4cdbf37..3a471beaf5 100644 --- a/src/zarr/codecs/transpose.py +++ b/src/zarr/codecs/transpose.py @@ -12,9 +12,7 @@ from zarr.registry import register_codec if TYPE_CHECKING: - from typing import Any - - from typing_extensions import Self + from typing import Any, Self from zarr.core.buffer import NDBuffer from zarr.core.chunk_grids import ChunkGrid diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index 572d594d53..913d0f01c7 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -13,7 +13,7 @@ from zarr.registry import register_codec if TYPE_CHECKING: - from typing_extensions import Self + from typing import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 49f04aafa0..c59c47bb67 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -23,8 +23,7 @@ if TYPE_CHECKING: from collections.abc import Iterable, Sequence - - from typing_extensions import Self + from typing import Self from zarr.codecs.bytes import Endian from zarr.core.common import BytesLike, ChunkCoords diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index a82584a477..187e2d82dc 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -17,8 +17,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Iterable - - from typing_extensions import Self + from typing import Self from zarr.core.buffer.core import ArrayLike, NDArrayLike from zarr.core.common import BytesLike diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py index 1227175146..d5daba0e9a 100644 --- a/src/zarr/core/buffer/gpu.py +++ b/src/zarr/core/buffer/gpu.py @@ -16,8 +16,7 @@ if TYPE_CHECKING: from collections.abc import Iterable - - from typing_extensions import Self + from typing import Self from zarr.core.common import BytesLike diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 61723215c6..46209bd16b 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -23,8 +23,7 @@ if TYPE_CHECKING: from collections.abc import Iterator - - from typing_extensions import Self + from typing import Self def _guess_chunks( diff --git a/src/zarr/core/metadata/common.py b/src/zarr/core/metadata/common.py index 583375b4b7..7d71455a44 100644 --- a/src/zarr/core/metadata/common.py +++ b/src/zarr/core/metadata/common.py @@ -3,10 +3,9 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Literal + from typing import Any, Literal, Self import numpy as np - from typing_extensions import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer, BufferPrototype diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 4a9baaf8b8..df7f2abaea 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -5,10 +5,9 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Literal + from typing import Any, Literal, Self import numpy.typing as npt - from typing_extensions import Self from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.common import JSON, ChunkCoords diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 16bfc822a5..8681c8250e 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -4,8 +4,9 @@ from typing import TYPE_CHECKING, cast, overload if TYPE_CHECKING: + from typing import Self + import numpy.typing as npt - from typing_extensions import Self from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.chunk_grids import ChunkGrid diff --git a/src/zarr/core/sync.py b/src/zarr/core/sync.py index 755020ef3c..c572851e5e 100644 --- a/src/zarr/core/sync.py +++ b/src/zarr/core/sync.py @@ -83,7 +83,7 @@ def sync( finished, unfinished = wait([future], return_when=asyncio.ALL_COMPLETED, timeout=timeout) if len(unfinished) > 0: - raise asyncio.TimeoutError(f"Coroutine {coro} failed to finish in within {timeout}s") + raise TimeoutError(f"Coroutine {coro} failed to finish in within {timeout}s") assert len(finished) == 1 return_result = next(iter(finished)).result() diff --git a/src/zarr/store/logging.py b/src/zarr/store/logging.py index 792dc66d93..0c05b42651 100644 --- a/src/zarr/store/logging.py +++ b/src/zarr/store/logging.py @@ -7,10 +7,10 @@ from contextlib import contextmanager from typing import TYPE_CHECKING -from zarr.abc.store import AccessMode, Store +from zarr.abc.store import AccessMode, ByteRangeRequest, Store if TYPE_CHECKING: - from collections.abc import AsyncGenerator, Generator + from collections.abc import AsyncGenerator, Generator, Iterable from zarr.core.buffer import Buffer, BufferPrototype @@ -125,7 +125,7 @@ async def get( async def get_partial_values( self, prototype: BufferPrototype, - key_ranges: list[tuple[str, tuple[int | None, int | None]]], + key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: with self.log(): return await self._store.get_partial_values(prototype=prototype, key_ranges=key_ranges) @@ -142,7 +142,9 @@ async def delete(self, key: str) -> None: with self.log(): return await self._store.delete(key=key) - async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]] + ) -> None: with self.log(): return await self._store.set_partial_values(key_start_values=key_start_values) diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py index 9d640d2c64..a0d70e78ea 100644 --- a/src/zarr/testing/buffer.py +++ b/src/zarr/testing/buffer.py @@ -11,8 +11,7 @@ if TYPE_CHECKING: from collections.abc import Iterable - - from typing_extensions import Self + from typing import Self __all__ = [ From 45156713a90e580ffc4aab427553aebb83248fac Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 26 Sep 2024 16:03:25 -0600 Subject: [PATCH 19/21] fix(async): set default concurrency to 10 tasks (#2256) * fix(async): set default concurrency to 10 tasks * fixup --- src/zarr/core/config.py | 2 +- tests/v3/test_config.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 45e4114389..735755616f 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -43,7 +43,7 @@ def reset(self) -> None: { "default_zarr_version": 3, "array": {"order": "C"}, - "async": {"concurrency": None, "timeout": None}, + "async": {"concurrency": 10, "timeout": None}, "json_indent": 2, "codec_pipeline": { "path": "zarr.codecs.pipeline.BatchedCodecPipeline", diff --git a/tests/v3/test_config.py b/tests/v3/test_config.py index 79b28055d6..e324367b3d 100644 --- a/tests/v3/test_config.py +++ b/tests/v3/test_config.py @@ -41,7 +41,7 @@ def test_config_defaults_set() -> None: { "default_zarr_version": 3, "array": {"order": "C"}, - "async": {"concurrency": None, "timeout": None}, + "async": {"concurrency": 10, "timeout": None}, "json_indent": 2, "codec_pipeline": { "path": "zarr.codecs.pipeline.BatchedCodecPipeline", @@ -62,7 +62,7 @@ def test_config_defaults_set() -> None: } ] assert config.get("array.order") == "C" - assert config.get("async.concurrency") is None + assert config.get("async.concurrency") == 10 assert config.get("async.timeout") is None assert config.get("codec_pipeline.batch_size") == 1 assert config.get("json_indent") == 2 @@ -70,7 +70,7 @@ def test_config_defaults_set() -> None: @pytest.mark.parametrize( ("key", "old_val", "new_val"), - [("array.order", "C", "F"), ("async.concurrency", None, 10), ("json_indent", 2, 0)], + [("array.order", "C", "F"), ("async.concurrency", 10, 20), ("json_indent", 2, 0)], ) def test_config_defaults_can_be_overridden(key: str, old_val: Any, new_val: Any) -> None: assert config.get(key) == old_val From 5ca080d0fd21dbb2b6a8b101cc99d86de5039f08 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 27 Sep 2024 08:05:02 +0200 Subject: [PATCH 20/21] feat: metadata-only support for storage transformers metadata (#2180) * feat: meager support for storage transformers metadata * remove warning, and instead error when creating v3 arrays with storage transformers * unbreak test fixture --- src/zarr/core/array.py | 9 ++++++++- src/zarr/core/metadata/v3.py | 21 +++++++++++++++++++++ tests/v3/test_array.py | 24 +++++++++++++++++++++++- tests/v3/test_metadata/test_v3.py | 11 +++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index fac0facd7d..fee3169e29 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -82,7 +82,14 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata: return data elif isinstance(data, dict): if data["zarr_format"] == 3: - return ArrayV3Metadata.from_dict(data) + meta_out = ArrayV3Metadata.from_dict(data) + if len(meta_out.storage_transformers) > 0: + msg = ( + f"Array metadata contains storage transformers: {meta_out.storage_transformers}." + "Arrays with storage transformers are not supported in zarr-python at this time." + ) + raise ValueError(msg) + return meta_out elif data["zarr_format"] == 2: return ArrayV2Metadata.from_dict(data) raise TypeError diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 8681c8250e..f0c6dc6282 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -72,6 +72,23 @@ def parse_dimension_names(data: object) -> tuple[str | None, ...] | None: raise TypeError(msg) +def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: + """ + Parse storage_transformers. Zarr python cannot use storage transformers + at this time, so this function doesn't attempt to validate them. + """ + if data is None: + return () + if isinstance(data, Iterable): + if len(tuple(data)) >= 1: + return data # type: ignore[return-value] + else: + return () + raise TypeError( + f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead." + ) + + class V3JsonEncoder(json.JSONEncoder): def __init__(self, *args: Any, **kwargs: Any) -> None: self.indent = kwargs.pop("indent", config.get("json_indent")) @@ -144,6 +161,7 @@ class ArrayV3Metadata(ArrayMetadata): dimension_names: tuple[str, ...] | None = None zarr_format: Literal[3] = field(default=3, init=False) node_type: Literal["array"] = field(default="array", init=False) + storage_transformers: tuple[dict[str, JSON], ...] def __init__( self, @@ -156,6 +174,7 @@ def __init__( codecs: Iterable[Codec | dict[str, JSON]], attributes: None | dict[str, JSON], dimension_names: None | Iterable[str], + storage_transformers: None | Iterable[dict[str, JSON]] = None, ) -> None: """ Because the class is a frozen dataclass, we set attributes using object.__setattr__ @@ -168,6 +187,7 @@ def __init__( fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed) attributes_parsed = parse_attributes(attributes) codecs_parsed_partial = parse_codecs(codecs) + storage_transformers_parsed = parse_storage_transformers(storage_transformers) array_spec = ArraySpec( shape=shape_parsed, @@ -186,6 +206,7 @@ def __init__( object.__setattr__(self, "dimension_names", dimension_names_parsed) object.__setattr__(self, "fill_value", fill_value_parsed) object.__setattr__(self, "attributes", attributes_parsed) + object.__setattr__(self, "storage_transformers", storage_transformers_parsed) self._validate_metadata() diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 95bbde1740..6224bc39e3 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -6,9 +6,10 @@ import pytest from zarr import Array, AsyncArray, Group +from zarr.codecs.bytes import BytesCodec from zarr.core.array import chunks_initialized from zarr.core.buffer.cpu import NDBuffer -from zarr.core.common import ZarrFormat +from zarr.core.common import JSON, ZarrFormat from zarr.core.indexing import ceildiv from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError @@ -238,6 +239,27 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> np.testing.assert_array_equal(actual[:], expected[:]) +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_storage_transformers(store: MemoryStore) -> None: + """ + Test that providing an actual storage transformer produces a warning and otherwise passes through + """ + metadata_dict: dict[str, JSON] = { + "zarr_format": 3, + "node_type": "array", + "shape": (10,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": "uint8", + "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, + "codecs": (BytesCodec().to_dict(),), + "fill_value": 0, + "storage_transformers": ({"test": "should_raise"}), + } + match = "Arrays with storage transformers are not supported in zarr-python at this time." + with pytest.raises(ValueError, match=match): + Array.from_dict(StorePath(store), data=metadata_dict) + + @pytest.mark.parametrize("test_cls", [Array, AsyncArray]) @pytest.mark.parametrize("nchunks", [2, 5, 10]) def test_nchunks(test_cls: type[Array] | type[AsyncArray], nchunks: int) -> None: diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py index 025d59422a..71dc917c35 100644 --- a/tests/v3/test_metadata/test_v3.py +++ b/tests/v3/test_metadata/test_v3.py @@ -14,6 +14,7 @@ from typing import Any from zarr.abc.codec import Codec + from zarr.core.common import JSON import numpy as np @@ -196,6 +197,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str) @pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"]) @pytest.mark.parametrize("dimension_separator", [".", "/", None]) @pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"]) +@pytest.mark.parametrize("storage_transformers", [None, ()]) def test_metadata_to_dict( chunk_grid: str, codecs: list[Codec], @@ -204,6 +206,7 @@ def test_metadata_to_dict( dimension_separator: Literal[".", "/"] | None, dimension_names: Literal["nones", "strings", "missing"], attributes: None | dict[str, Any], + storage_transformers: None | tuple[dict[str, JSON]], ) -> None: shape = (1, 2, 3) data_type = "uint8" @@ -234,6 +237,7 @@ def test_metadata_to_dict( "chunk_key_encoding": cke, "codecs": tuple(c.to_dict() for c in codecs), "fill_value": fill_value, + "storage_transformers": storage_transformers, } if attributes is not None: @@ -244,9 +248,16 @@ def test_metadata_to_dict( metadata = ArrayV3Metadata.from_dict(metadata_dict) observed = metadata.to_dict() expected = metadata_dict.copy() + + # if unset or None or (), storage_transformers gets normalized to () + assert observed["storage_transformers"] == () + observed.pop("storage_transformers") + expected.pop("storage_transformers") + if attributes is None: assert observed["attributes"] == {} observed.pop("attributes") + if dimension_separator is None: if chunk_key_encoding == "default": expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict() From 1560d218c2b5990fe8f05c32a2209f31156960c8 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 27 Sep 2024 11:11:12 -0500 Subject: [PATCH 21/21] Default zarr.open to open_group if shape is not provided (#2158) * Default zarr.open to open_group if shape is not provided * linting * Address failing tests * Add check if store_path contains array to open() * Allow AsyncArray constructor to accept dictionary metadata * Explicitly construct array from metadata in open() * Check if metadata input is dict rather than ArrayMetadata * fixup --------- Co-authored-by: Joe Hamman Co-authored-by: Joe Hamman --- src/zarr/api/asynchronous.py | 14 ++++- src/zarr/core/array.py | 103 +++++++++++++++++++---------------- tests/v3/test_api.py | 7 ++- 3 files changed, 76 insertions(+), 48 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 95adcf2936..2b6f938478 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -7,7 +7,7 @@ import numpy as np import numpy.typing as npt -from zarr.core.array import Array, AsyncArray +from zarr.core.array import Array, AsyncArray, get_array_metadata from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat from zarr.core.config import config from zarr.core.group import AsyncGroup @@ -230,6 +230,18 @@ async def open( if path is not None: store_path = store_path / path + if "shape" not in kwargs and mode in {"a", "w", "w-"}: + try: + metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) + # for v2, the above would already have raised an exception if not an array + zarr_format = metadata_dict["zarr_format"] + is_v3_array = zarr_format == 3 and metadata_dict.get("node_type") == "array" + if is_v3_array or zarr_format == 2: + return AsyncArray(store_path=store_path, metadata=metadata_dict) + except (AssertionError, FileNotFoundError): + pass + return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + try: return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) except KeyError: diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index fee3169e29..cc52dd3ac6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -106,6 +106,53 @@ def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecP raise TypeError +async def get_array_metadata( + store_path: StorePath, zarr_format: ZarrFormat | None = 3 +) -> dict[str, Any]: + if zarr_format == 2: + zarray_bytes, zattrs_bytes = await gather( + (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get() + ) + if zarray_bytes is None: + raise FileNotFoundError(store_path) + elif zarr_format == 3: + zarr_json_bytes = await (store_path / ZARR_JSON).get() + if zarr_json_bytes is None: + raise FileNotFoundError(store_path) + elif zarr_format is None: + zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather( + (store_path / ZARR_JSON).get(), + (store_path / ZARRAY_JSON).get(), + (store_path / ZATTRS_JSON).get(), + ) + if zarr_json_bytes is not None and zarray_bytes is not None: + # TODO: revisit this exception type + # alternatively, we could warn and favor v3 + raise ValueError("Both zarr.json and .zarray objects exist") + if zarr_json_bytes is None and zarray_bytes is None: + raise FileNotFoundError(store_path) + # set zarr_format based on which keys were found + if zarr_json_bytes is not None: + zarr_format = 3 + else: + zarr_format = 2 + else: + raise ValueError(f"unexpected zarr_format: {zarr_format}") + + metadata_dict: dict[str, Any] + if zarr_format == 2: + # V2 arrays are comprised of a .zarray and .zattrs objects + assert zarray_bytes is not None + metadata_dict = json.loads(zarray_bytes.to_bytes()) + zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} + metadata_dict["attributes"] = zattrs_dict + else: + # V3 arrays are comprised of a zarr.json object + assert zarr_json_bytes is not None + metadata_dict = json.loads(zarr_json_bytes.to_bytes()) + return metadata_dict + + @dataclass(frozen=True) class AsyncArray: metadata: ArrayMetadata @@ -115,10 +162,17 @@ class AsyncArray: def __init__( self, - metadata: ArrayMetadata, + metadata: ArrayMetadata | dict[str, Any], store_path: StorePath, order: Literal["C", "F"] | None = None, ) -> None: + if isinstance(metadata, dict): + zarr_format = metadata["zarr_format"] + if zarr_format == 2: + metadata = ArrayV2Metadata.from_dict(metadata) + else: + metadata = ArrayV3Metadata.from_dict(metadata) + metadata_parsed = parse_array_metadata(metadata) order_parsed = parse_indexing_order(order or config.get("array.order")) @@ -341,51 +395,8 @@ async def open( zarr_format: ZarrFormat | None = 3, ) -> AsyncArray: store_path = await make_store_path(store) - - if zarr_format == 2: - zarray_bytes, zattrs_bytes = await gather( - (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get() - ) - if zarray_bytes is None: - raise FileNotFoundError(store_path) - elif zarr_format == 3: - zarr_json_bytes = await (store_path / ZARR_JSON).get() - if zarr_json_bytes is None: - raise FileNotFoundError(store_path) - elif zarr_format is None: - zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather( - (store_path / ZARR_JSON).get(), - (store_path / ZARRAY_JSON).get(), - (store_path / ZATTRS_JSON).get(), - ) - if zarr_json_bytes is not None and zarray_bytes is not None: - # TODO: revisit this exception type - # alternatively, we could warn and favor v3 - raise ValueError("Both zarr.json and .zarray objects exist") - if zarr_json_bytes is None and zarray_bytes is None: - raise FileNotFoundError(store_path) - # set zarr_format based on which keys were found - if zarr_json_bytes is not None: - zarr_format = 3 - else: - zarr_format = 2 - else: - raise ValueError(f"unexpected zarr_format: {zarr_format}") - - if zarr_format == 2: - # V2 arrays are comprised of a .zarray and .zattrs objects - assert zarray_bytes is not None - zarray_dict = json.loads(zarray_bytes.to_bytes()) - zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} - zarray_dict["attributes"] = zattrs_dict - return cls(store_path=store_path, metadata=ArrayV2Metadata.from_dict(zarray_dict)) - else: - # V3 arrays are comprised of a zarr.json object - assert zarr_json_bytes is not None - return cls( - store_path=store_path, - metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())), - ) + metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) + return cls(store_path=store_path, metadata=metadata_dict) @property def ndim(self) -> int: diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index 1b4330eef3..0717d542cf 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -140,7 +140,12 @@ def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None: z2[:] = 3 -def test_open_with_mode_a(tmp_path: pathlib.Path) -> None: +async def test_open_with_mode_a(tmp_path: pathlib.Path) -> None: + # Open without shape argument should default to group + g = zarr.open(store=tmp_path, mode="a") + assert isinstance(g, Group) + await g.store_path.delete() + # 'a' means read/write (create if doesn't exist) arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3)) assert isinstance(arr, Array)