From 122760fc3440f2f35f5904e9dc7973d5188bdaca Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 13 Dec 2024 17:04:00 +0100 Subject: [PATCH 01/12] Feat/latency store (#2474) * feat: add wrapperstore * feat: add latencystore * rename noisysetter -> noisygetter * rename _wrapped to _store * loggingstore inherits from wrapperstore * Update src/zarr/storage/wrapper.py Co-authored-by: Joe Hamman * back to asynciterators * update docstrings --------- Co-authored-by: Joe Hamman Co-authored-by: Deepak Cherian --- src/zarr/storage/__init__.py | 2 + src/zarr/storage/logging.py | 12 +-- src/zarr/storage/wrapper.py | 139 +++++++++++++++++++++++++++++++ src/zarr/testing/store.py | 96 +++++++++++++++------ tests/test_store/test_wrapper.py | 46 ++++++++++ 5 files changed, 266 insertions(+), 29 deletions(-) create mode 100644 src/zarr/storage/wrapper.py create mode 100644 tests/test_store/test_wrapper.py diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py index 6703aa2723..17b11f54a6 100644 --- a/src/zarr/storage/__init__.py +++ b/src/zarr/storage/__init__.py @@ -3,6 +3,7 @@ from zarr.storage.logging import LoggingStore from zarr.storage.memory import MemoryStore from zarr.storage.remote import RemoteStore +from zarr.storage.wrapper import WrapperStore from zarr.storage.zip import ZipStore __all__ = [ @@ -12,6 +13,7 @@ "RemoteStore", "StoreLike", "StorePath", + "WrapperStore", "ZipStore", "make_store_path", ] diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index bc90b4f30f..9ec3a9be18 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -7,15 +7,19 @@ from contextlib import contextmanager from typing import TYPE_CHECKING, Any -from zarr.abc.store import ByteRangeRequest, Store +from zarr.abc.store import Store +from zarr.storage.wrapper import WrapperStore if TYPE_CHECKING: from collections.abc import AsyncIterator, Generator, Iterable + from zarr.abc.store import ByteRangeRequest from zarr.core.buffer import Buffer, BufferPrototype + counter: defaultdict[str, int] + -class LoggingStore(Store): +class LoggingStore(WrapperStore[Store]): """ Store wrapper that logs all calls to the wrapped store. @@ -34,7 +38,6 @@ class LoggingStore(Store): Counter of number of times each method has been called """ - _store: Store counter: defaultdict[str, int] def __init__( @@ -43,11 +46,10 @@ def __init__( log_level: str = "DEBUG", log_handler: logging.Handler | None = None, ) -> None: - self._store = store + super().__init__(store) self.counter = defaultdict(int) self.log_level = log_level self.log_handler = log_handler - self._configure_logger(log_level, log_handler) def _configure_logger( diff --git a/src/zarr/storage/wrapper.py b/src/zarr/storage/wrapper.py new file mode 100644 index 0000000000..c160100084 --- /dev/null +++ b/src/zarr/storage/wrapper.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Generic, TypeVar + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, AsyncIterator, Iterable + from types import TracebackType + from typing import Any, Self + + from zarr.abc.store import ByteRangeRequest + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.common import BytesLike + +from zarr.abc.store import Store + +T_Store = TypeVar("T_Store", bound=Store) + + +class WrapperStore(Store, Generic[T_Store]): + """ + A store class that wraps an existing ``Store`` instance. + By default all of the store methods are delegated to the wrapped store instance, which is + accessible via the ``._store`` attribute of this class. + + Use this class to modify or extend the behavior of the other store classes. + """ + + _store: T_Store + + def __init__(self, store: T_Store) -> None: + self._store = store + + @classmethod + async def open(cls: type[Self], store_cls: type[T_Store], *args: Any, **kwargs: Any) -> Self: + store = store_cls(*args, **kwargs) + await store._open() + return cls(store=store) + + def __enter__(self) -> Self: + return type(self)(self._store.__enter__()) + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + return self._store.__exit__(exc_type, exc_value, traceback) + + async def _open(self) -> None: + await self._store._open() + + async def _ensure_open(self) -> None: + await self._store._ensure_open() + + async def is_empty(self, prefix: str) -> bool: + return await self._store.is_empty(prefix) + + async def clear(self) -> None: + return await self._store.clear() + + @property + def read_only(self) -> bool: + return self._store.read_only + + def _check_writable(self) -> None: + return self._store._check_writable() + + def __eq__(self, value: object) -> bool: + return type(self) is type(value) and self._store.__eq__(value) + + async def get( + self, key: str, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None + ) -> Buffer | None: + return await self._store.get(key, prototype, byte_range) + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRangeRequest]], + ) -> list[Buffer | None]: + return await self._store.get_partial_values(prototype, key_ranges) + + async def exists(self, key: str) -> bool: + return await self._store.exists(key) + + async def set(self, key: str, value: Buffer) -> None: + await self._store.set(key, value) + + async def set_if_not_exists(self, key: str, value: Buffer) -> None: + return await self._store.set_if_not_exists(key, value) + + async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None: + await self._store._set_many(values) + + @property + def supports_writes(self) -> bool: + return self._store.supports_writes + + @property + def supports_deletes(self) -> bool: + return self._store.supports_deletes + + async def delete(self, key: str) -> None: + await self._store.delete(key) + + @property + def supports_partial_writes(self) -> bool: + return self._store.supports_partial_writes + + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, BytesLike]] + ) -> None: + return await self._store.set_partial_values(key_start_values) + + @property + def supports_listing(self) -> bool: + return self._store.supports_listing + + def list(self) -> AsyncIterator[str]: + return self._store.list() + + def list_prefix(self, prefix: str) -> AsyncIterator[str]: + return self._store.list_prefix(prefix) + + def list_dir(self, prefix: str) -> AsyncIterator[str]: + return self._store.list_dir(prefix) + + async def delete_dir(self, prefix: str) -> None: + return await self._store.delete_dir(prefix) + + def close(self) -> None: + self._store.close() + + async def _get_many( + self, requests: Iterable[tuple[str, BufferPrototype, ByteRangeRequest | None]] + ) -> AsyncGenerator[tuple[str, Buffer | None], None]: + async for req in self._store._get_many(requests): + yield req diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index b793f2d67b..53dee012bf 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -1,9 +1,20 @@ +from __future__ import annotations + +import asyncio import pickle -from typing import Any, Generic, TypeVar +from typing import TYPE_CHECKING, Generic, TypeVar + +from zarr.storage.wrapper import WrapperStore + +if TYPE_CHECKING: + from typing import Any + + from zarr.abc.store import ByteRangeRequest + from zarr.core.buffer.core import BufferPrototype import pytest -from zarr.abc.store import Store +from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.sync import _collect_aiterator from zarr.storage._utils import _normalize_interval_index @@ -319,25 +330,62 @@ async def test_set_if_not_exists(self, store: S) -> None: result = await store.get("k2", default_buffer_prototype()) assert result == new - async def test_getsize(self, store: S) -> None: - key = "k" - data = self.buffer_cls.from_bytes(b"0" * 10) - await self.set(store, key, data) - - result = await store.getsize(key) - assert isinstance(result, int) - assert result > 0 - - async def test_getsize_raises(self, store: S) -> None: - with pytest.raises(FileNotFoundError): - await store.getsize("not-a-real-key") - - async def test_getsize_prefix(self, store: S) -> None: - prefix = "array/c/" - for i in range(10): - data = self.buffer_cls.from_bytes(b"0" * 10) - await self.set(store, f"{prefix}/{i}", data) - - result = await store.getsize_prefix(prefix) - assert isinstance(result, int) - assert result > 0 + +class LatencyStore(WrapperStore[Store]): + """ + A wrapper class that takes any store class in its constructor and + adds latency to the `set` and `get` methods. This can be used for + performance testing. + """ + + get_latency: float + set_latency: float + + def __init__(self, cls: Store, *, get_latency: float = 0, set_latency: float = 0) -> None: + self.get_latency = float(get_latency) + self.set_latency = float(set_latency) + self._store = cls + + async def set(self, key: str, value: Buffer) -> None: + """ + Add latency to the ``set`` method. + + Calls ``asyncio.sleep(self.set_latency)`` before invoking the wrapped ``set`` method. + + Parameters + ---------- + key : str + The key to set + value : Buffer + The value to set + + Returns + ------- + None + """ + await asyncio.sleep(self.set_latency) + await self._store.set(key, value) + + async def get( + self, key: str, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None + ) -> Buffer | None: + """ + Add latency to the ``get`` method. + + Calls ``asyncio.sleep(self.get_latency)`` before invoking the wrapped ``get`` method. + + Parameters + ---------- + key : str + The key to get + prototype : BufferPrototype + The BufferPrototype to use. + byte_range : ByteRangeRequest, optional + An optional byte range. + + Returns + ------- + buffer : Buffer or None + """ + await asyncio.sleep(self.get_latency) + return await self._store.get(key, prototype=prototype, byte_range=byte_range) diff --git a/tests/test_store/test_wrapper.py b/tests/test_store/test_wrapper.py new file mode 100644 index 0000000000..1caf9c9ae4 --- /dev/null +++ b/tests/test_store/test_wrapper.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from zarr.core.buffer.cpu import Buffer, buffer_prototype +from zarr.storage.wrapper import WrapperStore + +if TYPE_CHECKING: + from zarr.abc.store import Store + from zarr.core.buffer.core import BufferPrototype + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=True) +async def test_wrapped_set(store: Store, capsys: pytest.CaptureFixture[str]) -> None: + # define a class that prints when it sets + class NoisySetter(WrapperStore): + async def set(self, key: str, value: Buffer) -> None: + print(f"setting {key}") + await super().set(key, value) + + key = "foo" + value = Buffer.from_bytes(b"bar") + store_wrapped = NoisySetter(store) + await store_wrapped.set(key, value) + captured = capsys.readouterr() + assert f"setting {key}" in captured.out + assert await store_wrapped.get(key, buffer_prototype) == value + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=True) +async def test_wrapped_get(store: Store, capsys: pytest.CaptureFixture[str]) -> None: + # define a class that prints when it sets + class NoisyGetter(WrapperStore): + def get(self, key: str, prototype: BufferPrototype) -> None: + print(f"getting {key}") + return super().get(key, prototype=prototype) + + key = "foo" + value = Buffer.from_bytes(b"bar") + store_wrapped = NoisyGetter(store) + await store_wrapped.set(key, value) + assert await store_wrapped.get(key, buffer_prototype) == value + captured = capsys.readouterr() + assert f"getting {key}" in captured.out From ab1a7b3758a7e14952b546e4f18d1d9e59168f5d Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sat, 14 Dec 2024 20:20:56 -0500 Subject: [PATCH 02/12] rename RemoteStore -> FsspecStore (#2557) * rename RemoteStore -> FsspecStore * release note * fix store type * fixup doc build --- docs/release.rst | 2 ++ src/zarr/storage/__init__.py | 4 +-- src/zarr/storage/common.py | 4 +-- src/zarr/storage/{remote.py => fsspec.py} | 22 +++++++------- tests/conftest.py | 16 +++++----- tests/test_store/test_core.py | 4 +-- .../{test_remote.py => test_fsspec.py} | 30 +++++++++---------- 7 files changed, 42 insertions(+), 40 deletions(-) rename src/zarr/storage/{remote.py => fsspec.py} (96%) rename tests/test_store/{test_remote.py => test_fsspec.py} (89%) diff --git a/docs/release.rst b/docs/release.rst index 7f424c00e2..dd60502e85 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -39,6 +39,8 @@ Dependency Changes fsspec and any relevant implementations (e.g. s3fs) before using the ``RemoteStore``. By :user:`Joe Hamman ` :issue:`2391`. +* ``RemoteStore`` was renamed to ``FsspecStore``. + By :user:`Joe Hamman ` :issue:`2557`. .. release_3.0.0-alpha: diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py index 17b11f54a6..6f3ec59b01 100644 --- a/src/zarr/storage/__init__.py +++ b/src/zarr/storage/__init__.py @@ -1,16 +1,16 @@ from zarr.storage.common import StoreLike, StorePath, make_store_path +from zarr.storage.fsspec import FsspecStore from zarr.storage.local import LocalStore from zarr.storage.logging import LoggingStore from zarr.storage.memory import MemoryStore -from zarr.storage.remote import RemoteStore from zarr.storage.wrapper import WrapperStore from zarr.storage.zip import ZipStore __all__ = [ + "FsspecStore", "LocalStore", "LoggingStore", "MemoryStore", - "RemoteStore", "StoreLike", "StorePath", "WrapperStore", diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index e9d57197e1..973c8b13e3 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -281,7 +281,7 @@ async def make_store_path( TypeError If the StoreLike object is not one of the supported types. """ - from zarr.storage.remote import RemoteStore # circular import + from zarr.storage.fsspec import FsspecStore # circular import used_storage_options = False path_normalized = normalize_path(path) @@ -302,7 +302,7 @@ async def make_store_path( if _is_fsspec_uri(store_like): used_storage_options = True - store = RemoteStore.from_url( + store = FsspecStore.from_url( store_like, storage_options=storage_options, read_only=_read_only ) else: diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/fsspec.py similarity index 96% rename from src/zarr/storage/remote.py rename to src/zarr/storage/fsspec.py index 2b8329c9fa..c9edd8f8ac 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/fsspec.py @@ -22,7 +22,7 @@ ) -class RemoteStore(Store): +class FsspecStore(Store): """ A remote Store based on FSSpec @@ -61,8 +61,8 @@ class RemoteStore(Store): See Also -------- - RemoteStore.from_upath - RemoteStore.from_url + FsspecStore.from_upath + FsspecStore.from_url """ # based on FSSpec @@ -96,7 +96,7 @@ def __init__( if "://" in path and not path.startswith("http"): # `not path.startswith("http")` is a special case for the http filesystem (¯\_(ツ)_/¯) scheme, _ = path.split("://", maxsplit=1) - raise ValueError(f"path argument to RemoteStore must not include scheme ({scheme}://)") + raise ValueError(f"path argument to FsspecStore must not include scheme ({scheme}://)") @classmethod def from_upath( @@ -104,9 +104,9 @@ def from_upath( upath: Any, read_only: bool = False, allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, - ) -> RemoteStore: + ) -> FsspecStore: """ - Create a RemoteStore from an upath object. + Create a FsspecStore from an upath object. Parameters ---------- @@ -120,7 +120,7 @@ def from_upath( Returns ------- - RemoteStore + FsspecStore """ return cls( fs=upath.fs, @@ -136,9 +136,9 @@ def from_url( storage_options: dict[str, Any] | None = None, read_only: bool = False, allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, - ) -> RemoteStore: + ) -> FsspecStore: """ - Create a RemoteStore from a URL. + Create a FsspecStore from a URL. Parameters ---------- @@ -154,7 +154,7 @@ def from_url( Returns ------- - RemoteStore + FsspecStore """ try: from fsspec import url_to_fs @@ -185,7 +185,7 @@ async def clear(self) -> None: pass def __repr__(self) -> str: - return f"" + return f"" def __eq__(self, other: object) -> bool: return ( diff --git a/tests/conftest.py b/tests/conftest.py index fbef922931..ee31d0d071 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,7 +13,7 @@ from zarr.abc.store import Store from zarr.core.sync import sync from zarr.storage import LocalStore, MemoryStore, StorePath, ZipStore -from zarr.storage.remote import RemoteStore +from zarr.storage.fsspec import FsspecStore if TYPE_CHECKING: from collections.abc import Generator @@ -25,14 +25,14 @@ async def parse_store( - store: Literal["local", "memory", "remote", "zip"], path: str -) -> LocalStore | MemoryStore | RemoteStore | ZipStore: + store: Literal["local", "memory", "fsspec", "zip"], path: str +) -> LocalStore | MemoryStore | FsspecStore | ZipStore: if store == "local": return await LocalStore.open(path) if store == "memory": return await MemoryStore.open() - if store == "remote": - return await RemoteStore.open(url=path) + if store == "fsspec": + return await FsspecStore.open(url=path) if store == "zip": return await ZipStore.open(path + "/zarr.zip", mode="w") raise AssertionError @@ -56,8 +56,8 @@ async def local_store(tmpdir: LEGACY_PATH) -> LocalStore: @pytest.fixture -async def remote_store(url: str) -> RemoteStore: - return await RemoteStore.open(url) +async def remote_store(url: str) -> FsspecStore: + return await FsspecStore.open(url) @pytest.fixture @@ -87,7 +87,7 @@ def sync_store(request: pytest.FixtureRequest, tmp_path: LEGACY_PATH) -> Store: @dataclass class AsyncGroupRequest: zarr_format: ZarrFormat - store: Literal["local", "remote", "memory", "zip"] + store: Literal["local", "fsspec", "memory", "zip"] attributes: dict[str, Any] = field(default_factory=dict) diff --git a/tests/test_store/test_core.py b/tests/test_store/test_core.py index 81ed3744a9..48f8d2a529 100644 --- a/tests/test_store/test_core.py +++ b/tests/test_store/test_core.py @@ -7,9 +7,9 @@ from zarr.core.common import AccessModeLiteral from zarr.storage._utils import normalize_path from zarr.storage.common import StoreLike, StorePath, make_store_path +from zarr.storage.fsspec import FsspecStore from zarr.storage.local import LocalStore from zarr.storage.memory import MemoryStore -from zarr.storage.remote import RemoteStore @pytest.mark.parametrize("path", [None, "", "bar"]) @@ -73,7 +73,7 @@ async def test_make_store_path_invalid() -> None: async def test_make_store_path_fsspec(monkeypatch) -> None: pytest.importorskip("fsspec") store_path = await make_store_path("http://foo.com/bar") - assert isinstance(store_path.store, RemoteStore) + assert isinstance(store_path.store, FsspecStore) @pytest.mark.parametrize( diff --git a/tests/test_store/test_remote.py b/tests/test_store/test_fsspec.py similarity index 89% rename from tests/test_store/test_remote.py rename to tests/test_store/test_fsspec.py index c7f33e4b39..b307f2cdf4 100644 --- a/tests/test_store/test_remote.py +++ b/tests/test_store/test_fsspec.py @@ -10,7 +10,7 @@ import zarr.api.asynchronous from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.core.sync import _collect_aiterator, sync -from zarr.storage import RemoteStore +from zarr.storage import FsspecStore from zarr.testing.store import StoreTests if TYPE_CHECKING: @@ -84,7 +84,7 @@ def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: async def test_basic() -> None: - store = RemoteStore.from_url( + store = FsspecStore.from_url( f"s3://{test_bucket_name}/foo/spam/", storage_options={"endpoint_url": endpoint_url, "anon": False}, ) @@ -102,8 +102,8 @@ async def test_basic() -> None: assert out[0].to_bytes() == data[1:] -class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]): - store_cls = RemoteStore +class TestFsspecStoreS3(StoreTests[FsspecStore, cpu.Buffer]): + store_cls = FsspecStore buffer_cls = cpu.Buffer @pytest.fixture @@ -114,36 +114,36 @@ def store_kwargs(self, request) -> dict[str, str | bool]: return {"fs": fs, "path": path} @pytest.fixture - def store(self, store_kwargs: dict[str, str | bool]) -> RemoteStore: + def store(self, store_kwargs: dict[str, str | bool]) -> FsspecStore: return self.store_cls(**store_kwargs) - async def get(self, store: RemoteStore, key: str) -> Buffer: + async def get(self, store: FsspecStore, key: str) -> Buffer: # make a new, synchronous instance of the filesystem because this test is run in sync code new_fs = fsspec.filesystem( "s3", endpoint_url=store.fs.endpoint_url, anon=store.fs.anon, asynchronous=False ) return self.buffer_cls.from_bytes(new_fs.cat(f"{store.path}/{key}")) - async def set(self, store: RemoteStore, key: str, value: Buffer) -> None: + async def set(self, store: FsspecStore, key: str, value: Buffer) -> None: # make a new, synchronous instance of the filesystem because this test is run in sync code new_fs = fsspec.filesystem( "s3", endpoint_url=store.fs.endpoint_url, anon=store.fs.anon, asynchronous=False ) new_fs.write_bytes(f"{store.path}/{key}", value.to_bytes()) - def test_store_repr(self, store: RemoteStore) -> None: - assert str(store) == "" + def test_store_repr(self, store: FsspecStore) -> None: + assert str(store) == "" - def test_store_supports_writes(self, store: RemoteStore) -> None: + def test_store_supports_writes(self, store: FsspecStore) -> None: assert store.supports_writes - def test_store_supports_partial_writes(self, store: RemoteStore) -> None: + def test_store_supports_partial_writes(self, store: FsspecStore) -> None: assert not store.supports_partial_writes - def test_store_supports_listing(self, store: RemoteStore) -> None: + def test_store_supports_listing(self, store: FsspecStore) -> None: assert store.supports_listing - async def test_remote_store_from_uri(self, store: RemoteStore): + async def test_fsspec_store_from_uri(self, store: FsspecStore) -> None: storage_options = { "endpoint_url": endpoint_url, "anon": False, @@ -188,7 +188,7 @@ def test_from_upath(self) -> None: anon=False, asynchronous=True, ) - result = RemoteStore.from_upath(path) + result = FsspecStore.from_upath(path) assert result.fs.endpoint_url == endpoint_url assert result.fs.asynchronous assert result.path == f"{test_bucket_name}/foo/bar" @@ -197,7 +197,7 @@ def test_init_raises_if_path_has_scheme(self, store_kwargs) -> None: # regression test for https://github.com/zarr-developers/zarr-python/issues/2342 store_kwargs["path"] = "s3://" + store_kwargs["path"] with pytest.raises( - ValueError, match="path argument to RemoteStore must not include scheme .*" + ValueError, match="path argument to FsspecStore must not include scheme .*" ): self.store_cls(**store_kwargs) From 9972066af88cb5e64f1ba5d4564d66572703e6cc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 12:23:13 +0000 Subject: [PATCH 03/12] Bump pypa/gh-action-pypi-publish in the actions group (#2562) Bumps the actions group with 1 update: [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish). Updates `pypa/gh-action-pypi-publish` from 1.12.2 to 1.12.3 - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.12.2...v1.12.3) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 34d6696413..375d9651d5 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -55,7 +55,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.12.2 + - uses: pypa/gh-action-pypi-publish@v1.12.3 with: user: __token__ password: ${{ secrets.pypi_password }} From 77d0b112cb8211d3e033c502f35c96d230b78647 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Mon, 16 Dec 2024 12:43:18 +0000 Subject: [PATCH 04/12] Clean up optional dependency groups (#2541) * Clean up optional dependency groups * Fix hatch envs * Remove jupyter extra * Drop upath and tree dependency groups * Change fsspec group to remote group * Add a v3 what's new * Fix optional group * Fix spelling --------- Co-authored-by: Joe Hamman --- docs/guide/index.rst | 1 + docs/guide/whatsnew_v3.rst | 14 ++++++++++++++ pyproject.toml | 33 ++++++++++----------------------- 3 files changed, 25 insertions(+), 23 deletions(-) create mode 100644 docs/guide/whatsnew_v3.rst diff --git a/docs/guide/index.rst b/docs/guide/index.rst index f841dbb85d..e532a13e20 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -4,5 +4,6 @@ Guide .. toctree:: :maxdepth: 1 + whatsnew_v3 storage consolidated_metadata diff --git a/docs/guide/whatsnew_v3.rst b/docs/guide/whatsnew_v3.rst new file mode 100644 index 0000000000..302c3cf20c --- /dev/null +++ b/docs/guide/whatsnew_v3.rst @@ -0,0 +1,14 @@ +What's new in v3 +================ + +This page gives an overview of major changes and additions in version 3. + + +Dependencies +------------ +- The new ``remote`` dependency group can be used to install a supported version of + ``fsspec``, required for remote data access. +- The new ``gpu`` dependency group can be used to install a supported version of + ``cuda``, required for GPU functionality. +- The ``jupyter`` optional dependency group has been removed, since v3 contains no + jupyter specific functionality. diff --git a/pyproject.toml b/pyproject.toml index 5f2d7569b9..b438a2c292 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,9 +53,14 @@ license = {text = "MIT License"} keywords = ["Python", "compressed", "ndimensional-arrays", "zarr"] [project.optional-dependencies] -fsspec = [ +# User extras +remote = [ "fsspec>=2023.10.0", ] +gpu = [ + "cupy-cuda12x", +] +# Development extras test = [ "coverage", "pytest", @@ -68,15 +73,7 @@ test = [ "hypothesis", "universal-pathlib", ] - -jupyter = [ - 'notebook', - 'ipytree>=0.2.2', - 'ipywidgets>=8.0.0', -] -gpu = [ - "cupy-cuda12x", -] +optional = ["rich", "universal-pathlib"] docs = [ 'sphinx==8.1.3', 'sphinx-autobuild>=2021.3.14', @@ -87,19 +84,9 @@ docs = [ 'pydata-sphinx-theme', 'numpydoc', 'numcodecs[msgpack]', - 'msgpack', -] -extra = [ - 'msgpack', -] -optional = [ - 'universal-pathlib>=0.0.22', - 'rich' -] -tree = [ - 'rich', ] + [project.urls] "Bug Tracker" = "https://github.com/zarr-developers/zarr-python/issues" Changelog = "https://zarr.readthedocs.io/en/stable/release.html" @@ -129,7 +116,7 @@ dependencies = [ "numpy~={matrix:numpy}", "universal_pathlib", ] -features = ["test", "extra"] +features = ["test"] [[tool.hatch.envs.test.matrix]] python = ["3.11", "3.12", "3.13"] @@ -160,7 +147,7 @@ dependencies = [ "numpy~={matrix:numpy}", "universal_pathlib", ] -features = ["test", "extra", "gpu"] +features = ["test", "gpu"] [[tool.hatch.envs.gputest.matrix]] python = ["3.11", "3.12", "3.13"] From 775979fce31175149a35aee04f0f7b8cb29ab9a4 Mon Sep 17 00:00:00 2001 From: Norman Rzepka Date: Mon, 16 Dec 2024 14:57:02 +0100 Subject: [PATCH 05/12] add warnings when using non-spec features with v3 (#2556) * add warnings when using non-spec features with v3 * tweak signature * may change in the future * wording --- pyproject.toml | 1 + src/zarr/api/asynchronous.py | 9 +++++++++ src/zarr/codecs/vlen_utf8.py | 19 +++++++++++++++++++ src/zarr/core/array.py | 13 +++++++++++-- src/zarr/core/metadata/v3.py | 10 +++++----- 5 files changed, 45 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b438a2c292..7b516bbc05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -363,6 +363,7 @@ filterwarnings = [ "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning", "ignore:Creating a zarr.buffer.gpu.*:UserWarning", "ignore:Duplicate name:UserWarning", # from ZipFile + "ignore:.*is currently not part in the Zarr version 3 specification.*:UserWarning", ] markers = [ "gpu: mark a test as requiring CuPy and GPU" diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index b5dbb0cfa5..2d1c26e145 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -195,6 +195,14 @@ async def consolidate_metadata( v = dataclasses.replace(v, consolidated_metadata=ConsolidatedMetadata(metadata={})) members_metadata[k] = v + if any(m.zarr_format == 3 for m in members_metadata.values()): + warnings.warn( + "Consolidated metadata is currently not part in the Zarr version 3 specification. It " + "may not be supported by other zarr implementations and may change in the future.", + category=UserWarning, + stacklevel=1, + ) + ConsolidatedMetadata._flat_to_nested(members_metadata) consolidated_metadata = ConsolidatedMetadata(metadata=members_metadata) @@ -203,6 +211,7 @@ async def consolidate_metadata( group, metadata=metadata, ) + await group._save_metadata() return group diff --git a/src/zarr/codecs/vlen_utf8.py b/src/zarr/codecs/vlen_utf8.py index 43544e0809..e5b895ae0c 100644 --- a/src/zarr/codecs/vlen_utf8.py +++ b/src/zarr/codecs/vlen_utf8.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING +from warnings import warn import numpy as np from numcodecs.vlen import VLenBytes, VLenUTF8 @@ -25,6 +26,15 @@ @dataclass(frozen=True) class VLenUTF8Codec(ArrayBytesCodec): + def __init__(self) -> None: + warn( + "The codec `vlen-utf8` is currently not part in the Zarr version 3 specification. It " + "may not be supported by other zarr implementations and may change in the future.", + category=UserWarning, + stacklevel=2, + ) + super().__init__() + @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration( @@ -71,6 +81,15 @@ def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) - @dataclass(frozen=True) class VLenBytesCodec(ArrayBytesCodec): + def __init__(self) -> None: + warn( + "The codec `vlen-bytes` is currently not part in the Zarr version 3 specification. It " + "may not be supported by other zarr implementations and may change in the future.", + category=UserWarning, + stacklevel=2, + ) + super().__init__() + @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration( diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index aab7e2a527..b57712717b 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -6,6 +6,7 @@ from itertools import starmap from logging import getLogger from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload +from warnings import warn import numpy as np import numpy.typing as npt @@ -539,7 +540,7 @@ async def _create_v3( store_path: StorePath, *, shape: ShapeLike, - dtype: npt.DTypeLike, + dtype: np.dtype[Any], chunk_shape: ChunkCoords, fill_value: Any | None = None, order: MemoryOrder | None = None, @@ -580,6 +581,14 @@ async def _create_v3( else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) ) + if dtype.kind in "UTS": + warn( + f"The dtype `{dtype}` is currently not part in the Zarr version 3 specification. It " + "may not be supported by other zarr implementations and may change in the future.", + category=UserWarning, + stacklevel=2, + ) + metadata = ArrayV3Metadata( shape=shape, data_type=dtype, @@ -601,7 +610,7 @@ async def _create_v2( store_path: StorePath, *, shape: ChunkCoords, - dtype: npt.DTypeLike, + dtype: np.dtype[Any], chunks: ChunkCoords, dimension_separator: Literal[".", "/"] | None = None, fill_value: float | None = None, diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index b800ae4d73..3e925e08bd 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -95,14 +95,14 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None: # we need to have special codecs if we are decoding vlen strings or bytestrings # TODO: use codec ID instead of class name - codec_id = abc.__class__.__name__ - if dtype == DataType.string and not codec_id == "VLenUTF8Codec": + codec_class_name = abc.__class__.__name__ + if dtype == DataType.string and not codec_class_name == "VLenUTF8Codec": raise ValueError( - f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_id}`." + f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`." ) - if dtype == DataType.bytes and not codec_id == "VLenBytesCodec": + if dtype == DataType.bytes and not codec_class_name == "VLenBytesCodec": raise ValueError( - f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_id}`." + f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_class_name}`." ) From c0f7ece3aba07c208431f1401f5635c072cb3033 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Mon, 16 Dec 2024 16:03:02 +0000 Subject: [PATCH 06/12] Shorten contributing page title (#2565) --- docs/contributing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 8038330239..6b0567f38d 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -1,5 +1,5 @@ -Contributing to Zarr -==================== +Contributing +============ Zarr is a community maintained project. We welcome contributions in the form of bug reports, bug fixes, documentation, enhancement proposals and more. This page provides From a615ee90b0409e890998f12ad1a593173f1d729a Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 17 Dec 2024 11:38:56 +0000 Subject: [PATCH 07/12] Remove specs pages from docs (#2555) * Remove specs pages from docs * Add spec doc redirects * Fix redirects * Fix index page typo Co-authored-by: Josh Moore --------- Co-authored-by: Josh Moore --- docs/conf.py | 8 ++++++++ docs/contributing.rst | 3 ++- docs/index.rst | 4 ++-- docs/release.rst | 20 ++++++++++---------- docs/spec.rst | 11 ----------- docs/spec/v1.rst | 7 ------- docs/spec/v2.rst | 7 ------- docs/spec/v3.rst | 7 ------- pyproject.toml | 1 + test.py | 7 +++++++ 10 files changed, 30 insertions(+), 45 deletions(-) delete mode 100644 docs/spec.rst delete mode 100644 docs/spec/v1.rst delete mode 100644 docs/spec/v2.rst delete mode 100644 docs/spec/v3.rst create mode 100644 test.py diff --git a/docs/conf.py b/docs/conf.py index 72c6130a16..5f714421d3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,6 +47,7 @@ "sphinx_issues", "sphinx_copybutton", "sphinx_design", + 'sphinx_reredirects', ] issues_github_path = "zarr-developers/zarr-python" @@ -81,6 +82,13 @@ version = get_version("zarr") release = get_version("zarr") +redirects = { + "spec": "https://zarr-specs.readthedocs.io", + "spec/v1": 'https://zarr-specs.readthedocs.io/en/latest/v1/v1.0.html', + "spec/v2": "https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html", + "spec/v3": "https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html", +} + # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # diff --git a/docs/contributing.rst b/docs/contributing.rst index 6b0567f38d..0ead6c8267 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -307,7 +307,8 @@ Data format compatibility The data format used by Zarr is defined by a specification document, which should be platform-independent and contain sufficient detail to construct an interoperable software library to read and/or write Zarr data using any programming language. The -latest version of the specification document is available from the :ref:`spec` page. +latest version of the specification document is available on the +`Zarr specifications website `_. Here, **data format compatibility** means that all software libraries that implement a particular version of the Zarr storage specification are interoperable, in the sense diff --git a/docs/index.rst b/docs/index.rst index d0b41ed634..82ed2889f4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,7 +12,6 @@ Zarr-Python tutorial guide/index api/index - spec release license contributing @@ -26,7 +25,8 @@ Zarr-Python `Installation `_ | `Source Repository `_ | `Issue Tracker `_ | -`Zulip Chat `_ +`Zulip Chat `_ | +`Zarr specifications `_ Zarr is a file storage format for chunked, compressed, N-dimensional arrays based on an open-source specification. diff --git a/docs/release.rst b/docs/release.rst index dd60502e85..be0919f08b 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -218,17 +218,17 @@ Typing Maintenance ~~~~~~~~~~~ -* Remedy a situation where ``zarr-python`` was importing ``DummyStorageTransformer`` from the test suite. +* Remedy a situation where ``zarr-python`` was importing ``DummyStorageTransformer`` from the test suite. The dependency relationship is now reversed: the test suite imports this class from ``zarr-python``. By :user:`Davis Bennett ` :issue:`1601`. -* [V3] Update minimum supported Python and Numpy versions. +* [V3] Update minimum supported Python and Numpy versions. By :user:`Joe Hamman ` :issue:`1638` * use src layout and use hatch for packaging. By :user:`Davis Bennett ` :issue:`1592`. -* temporarily disable mypy in v3 directory. +* temporarily disable mypy in v3 directory. By :user:`Joe Hamman ` :issue:`1649`. * create hatch test env. @@ -315,10 +315,10 @@ Maintenance Documentation ~~~~~~~~~~~~~ -* Specify docs hatch env for v3 branch. +* Specify docs hatch env for v3 branch. By :user:`Max Jones ` :issue:`1655`. -* Development installation/contributing docs updates. +* Development installation/contributing docs updates. By :user:`Alden Keefe Sampson ` :issue:`1643`. * chore: update project settings per scientific python repo-review. @@ -336,7 +336,7 @@ Enhancements ~~~~~~~~~~~~ * Added support for creating a copy of data when converting a `zarr.Array` to a numpy array. - By :user:`David Stansby ` (:issue:`2106`) and + By :user:`David Stansby ` (:issue:`2106`) and :user:`Joe Hamman ` (:issue:`2123`). Maintenance @@ -2191,7 +2191,7 @@ Other changes ~~~~~~~~~~~~~ To accommodate support for hierarchies and filters, the Zarr metadata format -has been modified. See the :ref:`spec_v2` for more information. To migrate an +has been modified. See the ``spec_v2`` for more information. To migrate an array stored using Zarr version 1.x, use the :func:`zarr.storage.migrate_1to2` function. @@ -2237,14 +2237,14 @@ abstraction layer between the core array logic and data storage (:issue:`21`). In this release, any object that implements the ``MutableMapping`` interface can be used as an array store. See the tutorial sections on :ref:`tutorial_persist` -and :ref:`tutorial_storage`, the :ref:`spec_v1`, and the +and :ref:`tutorial_storage`, the ``spec_v1``, and the :mod:`zarr.storage` module documentation for more information. Please note also that the file organization and file name conventions used when storing a Zarr array in a directory on the file system have changed. Persistent Zarr arrays created using previous versions of the software will not be compatible with this version. See the -:mod:`zarr.storage` API docs and the :ref:`spec_v1` for more +:mod:`zarr.storage` API docs and the ``spec_v1`` for more information. Compression @@ -2257,7 +2257,7 @@ as the default compressor, however other compressors including zlib, BZ2 and LZMA are also now supported via the Python standard library. New compressors can also be dynamically registered for use with Zarr. See the tutorial sections on :ref:`tutorial_compress` and -:ref:`tutorial_tips_blosc`, the :ref:`spec_v1`, and the +:ref:`tutorial_tips_blosc`, the ``spec_v1``, and the :mod:`zarr.compressors` module documentation for more information. Synchronization diff --git a/docs/spec.rst b/docs/spec.rst deleted file mode 100644 index 8aca0bbd80..0000000000 --- a/docs/spec.rst +++ /dev/null @@ -1,11 +0,0 @@ -.. _spec: - -Specifications -============== - -.. toctree:: - :maxdepth: 1 - - spec/v3 - spec/v2 - spec/v1 diff --git a/docs/spec/v1.rst b/docs/spec/v1.rst deleted file mode 100644 index 27a0490e0a..0000000000 --- a/docs/spec/v1.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. _spec_v1: - -Zarr Storage Specification Version 1 -==================================== - -The V1 Specification has been migrated to its website → -https://zarr-specs.readthedocs.io/. diff --git a/docs/spec/v2.rst b/docs/spec/v2.rst deleted file mode 100644 index deb6d46ce6..0000000000 --- a/docs/spec/v2.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. _spec_v2: - -Zarr Storage Specification Version 2 -==================================== - -The V2 Specification has been migrated to its website → -https://zarr-specs.readthedocs.io/. diff --git a/docs/spec/v3.rst b/docs/spec/v3.rst deleted file mode 100644 index 3d39f35ba6..0000000000 --- a/docs/spec/v3.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. _spec_v3: - -Zarr Storage Specification Version 3 -==================================== - -The V3 Specification has been migrated to its website → -https://zarr-specs.readthedocs.io/. diff --git a/pyproject.toml b/pyproject.toml index 7b516bbc05..6c8110cbf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,7 @@ docs = [ 'sphinx_design', 'sphinx-issues', 'sphinx-copybutton', + 'sphinx-reredirects', 'pydata-sphinx-theme', 'numpydoc', 'numcodecs[msgpack]', diff --git a/test.py b/test.py new file mode 100644 index 0000000000..29dac92c8b --- /dev/null +++ b/test.py @@ -0,0 +1,7 @@ +import zarr + +store = zarr.DirectoryStore("data") +r = zarr.open_group(store=store) +z = r.full("myArray", 42, shape=(), dtype="i4", compressor=None) + +print(z.oindex[...]) From a7714c70298d82bf1a2c9555e208fde9ad7ac3c4 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 17 Dec 2024 16:13:04 +0000 Subject: [PATCH 08/12] Trim trailing whitespace (#2563) --- .github/workflows/gpu_test.yml | 2 +- .github/workflows/releases.yml | 2 +- .github/workflows/test.yml | 4 ++-- .pre-commit-config.yaml | 1 + README-v3.md | 2 +- bench/compress_normal.txt | 40 +++++++++++++++++----------------- docs/guide/storage.rst | 10 ++++----- docs/roadmap.rst | 8 +++---- docs/tutorial.rst | 4 ++-- 9 files changed, 37 insertions(+), 36 deletions(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 0403a9c06e..b13da7d36f 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -55,7 +55,7 @@ jobs: cache: 'pip' - name: Install Hatch and CuPy run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip pip install hatch - name: Set Up Hatch Env run: | diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 375d9651d5..1b23260c2e 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -23,7 +23,7 @@ jobs: - name: Install PyBuild run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip pip install hatch - name: Build wheel and sdist run: hatch build diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1c25dcb1f4..1157fccc86 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -52,7 +52,7 @@ jobs: cache: 'pip' - name: Install Hatch run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip pip install hatch - name: Set Up Hatch Env run: | @@ -84,7 +84,7 @@ jobs: cache: 'pip' - name: Install Hatch run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip pip install hatch - name: Set Up Hatch Env run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b300752b31..4a93e9ce87 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,6 +20,7 @@ repos: rev: v5.0.0 hooks: - id: check-yaml + - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.13.0 hooks: diff --git a/README-v3.md b/README-v3.md index 8348038e5a..598e646377 100644 --- a/README-v3.md +++ b/README-v3.md @@ -38,7 +38,7 @@ hatch env create test ## Run the Tests ``` -hatch run test:run +hatch run test:run ``` or diff --git a/bench/compress_normal.txt b/bench/compress_normal.txt index d527cf03d4..e5d6be6aeb 100644 --- a/bench/compress_normal.txt +++ b/bench/compress_normal.txt @@ -19,7 +19,7 @@ Line # Hits Time Per Hit % Time Line Contents ============================================================== 137 def compress(source, char* cname, int clevel, int shuffle): 138 """Compress data in a numpy array. - 139 + 139 140 Parameters 141 ---------- 142 source : array-like @@ -30,14 +30,14 @@ Line # Hits Time Per Hit % Time Line Contents 147 Compression level. 148 shuffle : int 149 Shuffle filter. - 150 + 150 151 Returns 152 ------- 153 dest : bytes-like 154 Compressed data. - 155 + 155 156 """ - 157 + 157 158 cdef: 159 char *source_ptr 160 char *dest_ptr @@ -45,18 +45,18 @@ Line # Hits Time Per Hit % Time Line Contents 162 size_t nbytes, cbytes, itemsize 163 200 506 2.5 0.2 array.array char_array_template = array.array('b', []) 164 array.array dest - 165 + 165 166 # setup source buffer 167 200 458 2.3 0.2 PyObject_GetBuffer(source, &source_buffer, PyBUF_ANY_CONTIGUOUS) 168 200 119 0.6 0.0 source_ptr = source_buffer.buf - 169 + 169 170 # setup destination 171 200 239 1.2 0.1 nbytes = source_buffer.len 172 200 103 0.5 0.0 itemsize = source_buffer.itemsize 173 200 2286 11.4 0.8 dest = array.clone(char_array_template, nbytes + BLOSC_MAX_OVERHEAD, 174 zero=False) 175 200 129 0.6 0.0 dest_ptr = dest.data.as_voidptr - 176 + 176 177 # perform compression 178 200 1734 8.7 0.6 if _get_use_threads(): 179 # allow blosc to use threads internally @@ -67,24 +67,24 @@ Line # Hits Time Per Hit % Time Line Contents 184 cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, 185 source_ptr, dest_ptr, 186 nbytes + BLOSC_MAX_OVERHEAD) - 187 + 187 188 else: 189 with nogil: 190 cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, 191 source_ptr, dest_ptr, 192 nbytes + BLOSC_MAX_OVERHEAD, cname, 193 0, 1) - 194 + 194 195 # release source buffer 196 200 616 3.1 0.2 PyBuffer_Release(&source_buffer) - 197 + 197 198 # check compression was successful 199 200 120 0.6 0.0 if cbytes <= 0: 200 raise RuntimeError('error during blosc compression: %d' % cbytes) - 201 + 201 202 # resize after compression 203 200 1896 9.5 0.6 array.resize(dest, cbytes) - 204 + 204 205 200 186 0.9 0.1 return dest ******************************************************************************* @@ -100,19 +100,19 @@ Line # Hits Time Per Hit % Time Line Contents ============================================================== 75 def decompress(source, dest): 76 """Decompress data. - 77 + 77 78 Parameters 79 ---------- 80 source : bytes-like 81 Compressed data, including blosc header. 82 dest : array-like 83 Object to decompress into. - 84 + 84 85 Notes 86 ----- 87 Assumes that the size of the destination buffer is correct for the size of 88 the uncompressed data. - 89 + 89 90 """ 91 cdef: 92 int ret @@ -122,7 +122,7 @@ Line # Hits Time Per Hit % Time Line Contents 96 array.array source_array 97 Py_buffer dest_buffer 98 size_t nbytes - 99 + 99 100 # setup source buffer 101 200 573 2.9 0.2 if PY2 and isinstance(source, array.array): 102 # workaround fact that array.array does not support new-style buffer @@ -134,13 +134,13 @@ Line # Hits Time Per Hit % Time Line Contents 108 200 112 0.6 0.0 release_source_buffer = True 109 200 144 0.7 0.1 PyObject_GetBuffer(source, &source_buffer, PyBUF_ANY_CONTIGUOUS) 110 200 98 0.5 0.0 source_ptr = source_buffer.buf - 111 + 111 112 # setup destination buffer 113 200 552 2.8 0.2 PyObject_GetBuffer(dest, &dest_buffer, 114 PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) 115 200 100 0.5 0.0 dest_ptr = dest_buffer.buf 116 200 84 0.4 0.0 nbytes = dest_buffer.len - 117 + 117 118 # perform decompression 119 200 1856 9.3 0.8 if _get_use_threads(): 120 # allow blosc to use threads internally @@ -149,12 +149,12 @@ Line # Hits Time Per Hit % Time Line Contents 123 else: 124 with nogil: 125 ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) - 126 + 126 127 # release buffers 128 200 754 3.8 0.3 if release_source_buffer: 129 200 326 1.6 0.1 PyBuffer_Release(&source_buffer) 130 200 165 0.8 0.1 PyBuffer_Release(&dest_buffer) - 131 + 131 132 # handle errors 133 200 128 0.6 0.1 if ret <= 0: 134 raise RuntimeError('error during blosc decompression: %d' % ret) diff --git a/docs/guide/storage.rst b/docs/guide/storage.rst index 69de796b3d..730b0bfcc8 100644 --- a/docs/guide/storage.rst +++ b/docs/guide/storage.rst @@ -4,7 +4,7 @@ Storage Zarr-Python supports multiple storage backends, including: local file systems, Zip files, remote stores via ``fsspec`` (S3, HTTP, etc.), and in-memory stores. In Zarr-Python 3, stores must implement the abstract store API from -:class:`zarr.abc.store.Store`. +:class:`zarr.abc.store.Store`. .. note:: Unlike Zarr-Python 2 where the store interface was built around a generic ``MutableMapping`` @@ -50,8 +50,8 @@ filesystem. Zip Store ~~~~~~~~~ -The :class:`zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single -Zip file. The `Zip Store specification_` is currently in draft form. +The :class:`zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single +Zip file. The `Zip Store specification_` is currently in draft form. .. code-block:: python @@ -65,7 +65,7 @@ Remote Store The :class:`zarr.storage.RemoteStore` stores the contents of a Zarr hierarchy in following the same logical layout as the ``LocalStore``, except the store is assumed to be on a remote storage system -such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The +such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The :class:`zarr.storage.RemoteStore` is backed by `Fsspec_` and can support any Fsspec backend that implements the `AbstractFileSystem` API, @@ -80,7 +80,7 @@ Memory Store ~~~~~~~~~~~~ The :class:`zarr.storage.RemoteStore` a in-memory store that allows for serialization of -Zarr data (metadata and chunks) to a dictionary. +Zarr data (metadata and chunks) to a dictionary. .. code-block:: python diff --git a/docs/roadmap.rst b/docs/roadmap.rst index 93f2a26896..d9fc32b775 100644 --- a/docs/roadmap.rst +++ b/docs/roadmap.rst @@ -16,7 +16,7 @@ Roadmap - Martin Durrant / @martindurant .. note:: - + This document was written in the early stages of the 3.0 refactor. Some aspects of the design have changed since this was originally written. Questions and discussion about the contents of this document should be directed to @@ -227,7 +227,7 @@ expose the required methods as async methods. async def get_partial_values(self, key_ranges: List[Tuple[str, int, int]) -> bytes: ... - + async def set(self, key: str, value: Union[bytes, bytearray, memoryview]) -> None: ... # required for writable stores @@ -246,10 +246,10 @@ expose the required methods as async methods. # additional (optional methods) async def getsize(self, prefix: str) -> int: ... - + async def rename(self, src: str, dest: str) -> None ... - + Recognizing that there are many Zarr applications today that rely on the ``MutableMapping`` interface supported by Zarr-Python 2, a wrapper store diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 5d977c48a5..71254900d5 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -1015,12 +1015,12 @@ class from ``fsspec``. The following example demonstrates how to access a ZIP-archived Zarr group on s3 using `s3fs `_ and ``ZipFileSystem``: >>> s3_path = "s3://path/to/my.zarr.zip" - >>> + >>> >>> s3 = s3fs.S3FileSystem() >>> f = s3.open(s3_path) >>> fs = ZipFileSystem(f, mode="r") >>> store = FSMap("", fs, check=False) - >>> + >>> >>> # caching may improve performance when repeatedly reading the same data >>> cache = zarr.storage.LRUStoreCache(store, max_size=2**28) >>> z = zarr.group(store=cache) From f360fc6c26d1b006b616a5282cb9120ec96e5531 Mon Sep 17 00:00:00 2001 From: Hannes Spitz <44113112+brokkoli71@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:00:13 +0100 Subject: [PATCH 09/12] Remove config warning if only one implementation exists (#2571) * add test_warning_on_missing_codec_config * improve config tests * remove warning if only one implementation exists --- src/zarr/registry.py | 2 + tests/test_config.py | 133 ++++++++++++++++++++++++++----------------- 2 files changed, 84 insertions(+), 51 deletions(-) diff --git a/src/zarr/registry.py b/src/zarr/registry.py index 12b0738016..9055bb1447 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -138,6 +138,8 @@ def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]: config_entry = config.get("codecs", {}).get(key) if config_entry is None: + if len(codec_classes) == 1: + return next(iter(codec_classes.values())) warnings.warn( f"Codec '{key}' not configured in config. Selecting any implementation.", stacklevel=2 ) diff --git a/tests/test_config.py b/tests/test_config.py index 2e919a0add..e3f5ec25e3 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -143,6 +143,7 @@ class MockEnvCodecPipeline(CodecPipeline): assert get_pipeline_class(reload_config=True) == MockEnvCodecPipeline +@pytest.mark.filterwarnings("error") @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_codec_implementation(store: Store) -> None: # has default value @@ -156,24 +157,29 @@ async def _encode_single( ) -> CodecOutput | None: _mock.call() - config.set({"codecs.blosc": fully_qualified_name(MockBloscCodec)}) register_codec("blosc", MockBloscCodec) - assert get_codec_class("blosc") == MockBloscCodec - - # test if codec is used - arr = Array.create( - store=store, - shape=(100,), - chunks=(10,), - zarr_format=3, - dtype="i4", - codecs=[BytesCodec(), {"name": "blosc", "configuration": {}}], - ) - arr[:] = range(100) - _mock.call.assert_called() + with config.set({"codecs.blosc": fully_qualified_name(MockBloscCodec)}): + assert get_codec_class("blosc") == MockBloscCodec + + # test if codec is used + arr = Array.create( + store=store, + shape=(100,), + chunks=(10,), + zarr_format=3, + dtype="i4", + codecs=[BytesCodec(), {"name": "blosc", "configuration": {}}], + ) + arr[:] = range(100) + _mock.call.assert_called() + + # test set codec with environment variable + class NewBloscCodec(BloscCodec): + pass - with mock.patch.dict(os.environ, {"ZARR_CODECS__BLOSC": fully_qualified_name(BloscCodec)}): - assert get_codec_class("blosc", reload_config=True) == BloscCodec + register_codec("blosc", NewBloscCodec) + with mock.patch.dict(os.environ, {"ZARR_CODECS__BLOSC": fully_qualified_name(NewBloscCodec)}): + assert get_codec_class("blosc", reload_config=True) == NewBloscCodec @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @@ -183,18 +189,17 @@ def test_config_ndbuffer_implementation(store: Store) -> None: # set custom ndbuffer with TestNDArrayLike implementation register_ndbuffer(NDBufferUsingTestNDArrayLike) - config.set({"ndbuffer": fully_qualified_name(NDBufferUsingTestNDArrayLike)}) - assert get_ndbuffer_class() == NDBufferUsingTestNDArrayLike - arr = Array.create( - store=store, - shape=(100,), - chunks=(10,), - zarr_format=3, - dtype="i4", - ) - got = arr[:] - print(type(got)) - assert isinstance(got, TestNDArrayLike) + with config.set({"ndbuffer": fully_qualified_name(NDBufferUsingTestNDArrayLike)}): + assert get_ndbuffer_class() == NDBufferUsingTestNDArrayLike + arr = Array.create( + store=store, + shape=(100,), + chunks=(10,), + zarr_format=3, + dtype="i4", + ) + got = arr[:] + assert isinstance(got, TestNDArrayLike) def test_config_buffer_implementation() -> None: @@ -208,27 +213,53 @@ def test_config_buffer_implementation() -> None: arr[:] = np.arange(100) register_buffer(TestBuffer) - config.set({"buffer": fully_qualified_name(TestBuffer)}) - assert get_buffer_class() == TestBuffer - - # no error using TestBuffer - data = np.arange(100) - arr[:] = np.arange(100) - assert np.array_equal(arr[:], data) - - data2d = np.arange(1000).reshape(100, 10) - arr_sharding = zeros( - shape=(100, 10), - store=StoreExpectingTestBuffer(), - codecs=[ShardingCodec(chunk_shape=(10, 10))], - ) - arr_sharding[:] = data2d - assert np.array_equal(arr_sharding[:], data2d) + with config.set({"buffer": fully_qualified_name(TestBuffer)}): + assert get_buffer_class() == TestBuffer - arr_Crc32c = zeros( - shape=(100, 10), - store=StoreExpectingTestBuffer(), - codecs=[BytesCodec(), Crc32cCodec()], - ) - arr_Crc32c[:] = data2d - assert np.array_equal(arr_Crc32c[:], data2d) + # no error using TestBuffer + data = np.arange(100) + arr[:] = np.arange(100) + assert np.array_equal(arr[:], data) + + data2d = np.arange(1000).reshape(100, 10) + arr_sharding = zeros( + shape=(100, 10), + store=StoreExpectingTestBuffer(), + codecs=[ShardingCodec(chunk_shape=(10, 10))], + ) + arr_sharding[:] = data2d + assert np.array_equal(arr_sharding[:], data2d) + + arr_Crc32c = zeros( + shape=(100, 10), + store=StoreExpectingTestBuffer(), + codecs=[BytesCodec(), Crc32cCodec()], + ) + arr_Crc32c[:] = data2d + assert np.array_equal(arr_Crc32c[:], data2d) + + +@pytest.mark.filterwarnings("error") +def test_warning_on_missing_codec_config() -> None: + class NewCodec(BytesCodec): + pass + + class NewCodec2(BytesCodec): + pass + + # error if codec is not registered + with pytest.raises(KeyError): + get_codec_class("missing_codec") + + # no warning if only one implementation is available + register_codec("new_codec", NewCodec) + get_codec_class("new_codec") + + # warning because multiple implementations are available but none is selected in the config + register_codec("new_codec", NewCodec2) + with pytest.warns(UserWarning): + get_codec_class("new_codec") + + # no warning if multiple implementations are available and one is selected in the config + with config.set({"codecs.new_codec": fully_qualified_name(NewCodec)}): + get_codec_class("new_codec") From 4455726a0dbdeebb00d146b3b7a4bfa4e63374b5 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Wed, 18 Dec 2024 19:10:50 +0000 Subject: [PATCH 10/12] Remove license page from docs (#2570) * Remove license page from docs * Add license redirect --- docs/conf.py | 1 + docs/index.rst | 1 - docs/license.rst | 4 ---- 3 files changed, 1 insertion(+), 5 deletions(-) delete mode 100644 docs/license.rst diff --git a/docs/conf.py b/docs/conf.py index 5f714421d3..8b22e33c6d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -87,6 +87,7 @@ "spec/v1": 'https://zarr-specs.readthedocs.io/en/latest/v1/v1.0.html', "spec/v2": "https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html", "spec/v3": "https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html", + "license": "https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt" } # The language for content autogenerated by Sphinx. Refer to documentation diff --git a/docs/index.rst b/docs/index.rst index 82ed2889f4..4d6188d3a0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,7 +13,6 @@ Zarr-Python guide/index api/index release - license contributing roadmap diff --git a/docs/license.rst b/docs/license.rst deleted file mode 100644 index 8f93aa7d66..0000000000 --- a/docs/license.rst +++ /dev/null @@ -1,4 +0,0 @@ -License -======= - -.. include:: ../LICENSE.txt From f035d453894ec83234f69f7627b540390ee2b6eb Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 18 Dec 2024 22:57:07 +0100 Subject: [PATCH 11/12] docs/add docstrings to synchronous API (#2549) * add docstrings and complete function signatures to synchronous api, and tests for the above * clean up types and docstrings * Update src/zarr/api/synchronous.py Co-authored-by: Joe Hamman * Update src/zarr/api/synchronous.py Co-authored-by: Joe Hamman * Update src/zarr/api/synchronous.py Co-authored-by: Joe Hamman * Update src/zarr/api/synchronous.py Co-authored-by: Joe Hamman * Update src/zarr/api/synchronous.py Co-authored-by: Joe Hamman * Update src/zarr/api/synchronous.py Co-authored-by: Joe Hamman * Update src/zarr/api/synchronous.py Co-authored-by: Joe Hamman * remove doomed docstring tests * allow bool in create --------- Co-authored-by: Joe Hamman --- src/zarr/api/asynchronous.py | 8 +- src/zarr/api/synchronous.py | 685 ++++++++++++++++++++++++++++++++--- src/zarr/core/array.py | 8 +- tests/test_api.py | 4 +- tests/test_array.py | 18 +- 5 files changed, 652 insertions(+), 71 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 2d1c26e145..e859df44a6 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -774,9 +774,9 @@ async def open_group( async def create( - shape: ChunkCoords, + shape: ChunkCoords | int, *, # Note: this is a change from v2 - chunks: ChunkCoords | None = None, # TODO: v2 allowed chunks=True + chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True dtype: npt.DTypeLike | None = None, compressor: dict[str, JSON] | None = None, # TODO: default and type change fill_value: Any | None = 0, # TODO: need type @@ -798,7 +798,7 @@ async def create( meta_array: Any | None = None, # TODO: need type attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, + chunk_shape: ChunkCoords | int | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -1104,6 +1104,8 @@ async def open_array( ---------- store : Store or str Store or path to directory in file system or name of zip file. + zarr_version : {2, 3, None}, optional + The zarr format to use when saving. Deprecated in favor of zarr_format. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str, optional diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 8e8ecf40b8..6ae062865c 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -11,8 +11,15 @@ from zarr.core.sync import sync if TYPE_CHECKING: + from collections.abc import Iterable + + import numpy.typing as npt + + from zarr.abc.codec import Codec + from zarr.api.asynchronous import ArrayLike, PathLike from zarr.core.buffer import NDArrayLike - from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, ZarrFormat + from zarr.core.chunk_key_encodings import ChunkKeyEncoding + from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat from zarr.storage import StoreLike __all__ = [ @@ -44,8 +51,38 @@ ] -def consolidate_metadata(*args: Any, **kwargs: Any) -> Group: - return Group(sync(async_api.consolidate_metadata(*args, **kwargs))) +def consolidate_metadata( + store: StoreLike, + path: str | None = None, + zarr_format: ZarrFormat | None = None, +) -> Group: + """ + Consolidate the metadata of all nodes in a hierarchy. + + Upon completion, the metadata of the root node in the Zarr hierarchy will be + updated to include all the metadata of child nodes. + + Parameters + ---------- + store : StoreLike + The store-like object whose metadata you wish to consolidate. + path : str, optional + A path to a group in the store to consolidate at. Only children + below that group will be consolidated. + + By default, the root node is used so all the metadata in the + store is consolidated. + zarr_format : {2, 3, None}, optional + The zarr format of the hierarchy. By default the zarr format + is inferred. + + Returns + ------- + group: Group + The group, with the ``consolidated_metadata`` field set to include + the metadata of each child node. + """ + return Group(sync(async_api.consolidate_metadata(store, path=path, zarr_format=zarr_format))) def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: @@ -61,9 +98,39 @@ def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: def load( - store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None + store: StoreLike, + path: str | None = None, + zarr_format: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, ) -> NDArrayLike | dict[str, NDArrayLike]: - return sync(async_api.load(store=store, zarr_version=zarr_version, path=path)) + """Load data from an array or group into memory. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + path : str or None, optional + The path within the store from which to load. + + Returns + ------- + out + If the path contains an array, out will be a numpy array. If the path contains + a group, out will be a dict-like object where keys are array names and values + are numpy arrays. + + See Also + -------- + save, savez + + Notes + ----- + If loading data from a group of arrays, data will not be immediately loaded into + memory. Rather, arrays will be loaded into memory as they are requested. + """ + return sync( + async_api.load(store=store, zarr_version=zarr_version, zarr_format=zarr_format, path=path) + ) @_deprecate_positional_args @@ -74,8 +141,36 @@ def open( zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, + storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.open ) -> Array | Group: + """Convenience function to open a group or array using file-mode-like semantics. + + Parameters + ---------- + store : Store or str, optional + Store or path to directory in file system or name of zip file. + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store to open. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + **kwargs + Additional parameters are passed through to :func:`zarr.api.asynchronous.open_array` or + :func:`zarr.api.asynchronous.open_group`. + + Returns + ------- + z : array or group + Return type depends on what exists in the given store. + """ obj = sync( async_api.open( store=store, @@ -83,6 +178,7 @@ def open( zarr_version=zarr_version, zarr_format=zarr_format, path=path, + storage_options=storage_options, **kwargs, ) ) @@ -93,6 +189,9 @@ def open( def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group: + """ + Alias for :func:`open_group` with ``use_consolidated=True``. + """ return Group( sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs)) ) @@ -106,6 +205,21 @@ def save( path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.save ) -> None: + """Convenience function to save an array or group of arrays to the local file system. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + *args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the group where the arrays will be saved. + **kwargs + NumPy arrays with data to save. + """ return sync( async_api.save( store, *args, zarr_version=zarr_version, zarr_format=zarr_format, path=path, **kwargs @@ -121,8 +235,28 @@ def save_array( zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, + storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.save_array ) -> None: + """Convenience function to save a NumPy array to the local file system, following a + similar API to the NumPy save() function. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + arr : ndarray + NumPy array with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store where the array will be saved. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + **kwargs + Passed through to :func:`create`, e.g., compressor. + """ return sync( async_api.save_array( store=store, @@ -130,6 +264,7 @@ def save_array( zarr_version=zarr_version, zarr_format=zarr_format, path=path, + storage_options=storage_options, **kwargs, ) ) @@ -144,6 +279,26 @@ def save_group( storage_options: dict[str, Any] | None = None, **kwargs: NDArrayLike, ) -> None: + """Convenience function to save several NumPy arrays to the local file system, following a + similar API to the NumPy savez()/savez_compressed() functions. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + *args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + Path within the store where the group will be saved. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + **kwargs + NumPy arrays with data to save. + """ + return sync( async_api.save_group( store, @@ -159,28 +314,98 @@ def save_group( @deprecated("Use Group.tree instead.") def tree(grp: Group, expand: bool | None = None, level: int | None = None) -> Any: + """Provide a rich display of the hierarchy. + + Parameters + ---------- + grp : Group + Zarr or h5py group. + expand : bool, optional + Only relevant for HTML representation. If True, tree will be fully expanded. + level : int, optional + Maximum depth to descend into hierarchy. + + Returns + ------- + TreeRepr + A pretty-printable object displaying the hierarchy. + + .. deprecated:: 3.0.0 + `zarr.tree()` is deprecated and will be removed in a future release. + Use `group.tree()` instead. + """ return sync(async_api.tree(grp._async_group, expand=expand, level=level)) # TODO: add type annotations for kwargs -def array(data: NDArrayLike, **kwargs: Any) -> Array: +def array(data: npt.ArrayLike, **kwargs: Any) -> Array: + """Create an array filled with `data`. + + Parameters + ---------- + data : array_like + The data to fill the array with. + **kwargs + Passed through to :func:`create`. + + Returns + ------- + array : Array + The new array. + """ + return Array(sync(async_api.array(data=data, **kwargs))) @_deprecate_positional_args def group( store: StoreLike | None = None, - *, # Note: this is a change from v2 + *, overwrite: bool = False, - chunk_store: StoreLike | None = None, # not used in async_api - cache_attrs: bool | None = None, # default changed, not used in async_api - synchronizer: Any | None = None, # not used in async_api + chunk_store: StoreLike | None = None, # not used + cache_attrs: bool | None = None, # not used, default changed + synchronizer: Any | None = None, # not used path: str | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, - meta_array: Any | None = None, # not used in async_api + meta_array: Any | None = None, # not used attributes: dict[str, JSON] | None = None, + storage_options: dict[str, Any] | None = None, ) -> Group: + """Create a group. + + Parameters + ---------- + store : Store or str, optional + Store or path to directory in file system. + overwrite : bool, optional + If True, delete any pre-existing data in `store` at `path` before + creating the group. + chunk_store : Store, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : str, optional + Group path within store. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + + Returns + ------- + g : Group + The new group. + """ return Group( sync( async_api.group( @@ -194,6 +419,7 @@ def group( zarr_format=zarr_format, meta_array=meta_array, attributes=attributes, + storage_options=storage_options, ) ) ) @@ -215,6 +441,67 @@ def open_group( attributes: dict[str, JSON] | None = None, use_consolidated: bool | str | None = None, ) -> Group: + """Open a group using file-mode-like semantics. + + Parameters + ---------- + store : Store, str, or mapping, optional + Store or path to directory in file system or name of zip file. + + Strings are interpreted as paths on the local file system + and used as the ``root`` argument to :class:`zarr.storage.LocalStore`. + + Dictionaries are used as the ``store_dict`` argument in + :class:`zarr.storage.MemoryStore``. + + By default (``store=None``) a new :class:`zarr.storage.MemoryStore` + is created. + + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : str, optional + Group path within store. + chunk_store : Store or str, optional + Store or path to directory in file system or name of zip file. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + attributes : dict + A dictionary of JSON-serializable values with user-defined attributes. + use_consolidated : bool or str, default None + Whether to use consolidated metadata. + + By default, consolidated metadata is used if it's present in the + store (in the ``zarr.json`` for Zarr v3 and in the ``.zmetadata`` file + for Zarr v2). + + To explicitly require consolidated metadata, set ``use_consolidated=True``, + which will raise an exception if consolidated metadata is not found. + + To explicitly *not* use consolidated metadata, set ``use_consolidated=False``, + which will fall back to using the regular, non consolidated metadata. + + Zarr v2 allowed configuring the key storing the consolidated metadata + (``.zmetadata`` by default). Specify the custom key as ``use_consolidated`` + to load consolidated metadata from a non-default key. + + Returns + ------- + g : Group + The new group. + """ return Group( sync( async_api.open_group( @@ -236,84 +523,382 @@ def open_group( # TODO: add type annotations for kwargs -def create(*args: Any, **kwargs: Any) -> Array: - return Array(sync(async_api.create(*args, **kwargs))) +def create( + shape: ChunkCoords | int, + *, # Note: this is a change from v2 + chunks: ChunkCoords | int | bool | None = None, + dtype: npt.DTypeLike | None = None, + compressor: dict[str, JSON] | None = None, # TODO: default and type change + fill_value: Any | None = 0, # TODO: need type + order: MemoryOrder | None = None, + store: str | StoreLike | None = None, + synchronizer: Any | None = None, + overwrite: bool = False, + path: PathLike | None = None, + chunk_store: StoreLike | None = None, + filters: list[dict[str, JSON]] | None = None, # TODO: type has changed + cache_metadata: bool | None = None, + cache_attrs: bool | None = None, + read_only: bool | None = None, + object_codec: Codec | None = None, # TODO: type has changed + dimension_separator: Literal[".", "/"] | None = None, + write_empty_chunks: bool = False, # TODO: default has changed + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # TODO: need type + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | int | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + storage_options: dict[str, Any] | None = None, + **kwargs: Any, +) -> Array: + """Create an array. + + Parameters + ---------- + shape : int or tuple of ints + Array shape. + chunks : int or tuple of ints, optional + Chunk shape. If True, will be guessed from `shape` and `dtype`. If + False, will be set to `shape`, i.e., single chunk for the whole array. + If an int, the chunk size in each dimension will be given by the value + of `chunks`. Default is True. + dtype : str or dtype, optional + NumPy dtype. + compressor : Codec, optional + Primary compressor. + fill_value : object + Default value to use for uninitialized portions of the array. + order : {'C', 'F'}, optional + Memory layout to be used within each chunk. + Default is set in Zarr's config (`array.order`). + store : Store or str + Store or path to directory in file system or name of zip file. + synchronizer : object, optional + Array synchronizer. + overwrite : bool, optional + If True, delete all pre-existing data in `store` at `path` before + creating the array. + path : str, optional + Path under which array is stored. + chunk_store : MutableMapping, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + filters : sequence of Codecs, optional + Sequence of filters to use to encode chunk data prior to compression. + cache_metadata : bool, optional + If True, array configuration metadata will be cached for the + lifetime of the object. If False, array metadata will be reloaded + prior to all data access and modification operations (may incur + overhead depending on storage and data access pattern). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + read_only : bool, optional + True if array should be protected against modification. + object_codec : Codec, optional + A codec to encode object arrays, only needed if dtype=object. + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. + + .. versionadded:: 2.8 + + write_empty_chunks : bool, optional + If True (default), all chunks will be stored regardless of their + contents. If False, each chunk is compared to the array's fill value + prior to storing. If a chunk is uniformly equal to the fill value, then + that chunk is not be stored, and the store entry for that chunk's key + is deleted. This setting enables sparser storage, as only chunks with + non-fill-value data are stored, at the expense of overhead associated + with checking the data of each chunk. + + .. versionadded:: 2.11 + + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.13 + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + + Returns + ------- + z : Array + The array. + """ + return Array( + sync( + async_api.create( + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + store=store, + synchronizer=synchronizer, + overwrite=overwrite, + path=path, + chunk_store=chunk_store, + filters=filters, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + read_only=read_only, + object_codec=object_codec, + dimension_separator=dimension_separator, + write_empty_chunks=write_empty_chunks, + zarr_version=zarr_version, + zarr_format=zarr_format, + meta_array=meta_array, + attributes=attributes, + chunk_shape=chunk_shape, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + storage_options=storage_options, + **kwargs, + ) + ) + ) # TODO: add type annotations for kwargs def empty(shape: ChunkCoords, **kwargs: Any) -> Array: + """Create an empty array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + + Notes + ----- + The contents of an empty Zarr array are not defined. On attempting to + retrieve data from an empty Zarr array, any values may be returned, + and these are not guaranteed to be stable from one access to the next. + """ return Array(sync(async_api.empty(shape, **kwargs))) # TODO: move ArrayLike to common module # TODO: add type annotations for kwargs -def empty_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: +def empty_like(a: ArrayLike, **kwargs: Any) -> Array: + """Create an empty array like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ return Array(sync(async_api.empty_like(a, **kwargs))) # TODO: add type annotations for kwargs and fill_value def full(shape: ChunkCoords, fill_value: Any, **kwargs: Any) -> Array: + """Create an array, with `fill_value` being used as the default value for + uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + fill_value : scalar + Fill value. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ return Array(sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs))) # TODO: move ArrayLike to common module # TODO: add type annotations for kwargs -def full_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: +def full_like(a: ArrayLike, **kwargs: Any) -> Array: + """Create a filled array like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ return Array(sync(async_api.full_like(a, **kwargs))) # TODO: add type annotations for kwargs def ones(shape: ChunkCoords, **kwargs: Any) -> Array: + """Create an array, with one being used as the default value for + uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ return Array(sync(async_api.ones(shape, **kwargs))) # TODO: add type annotations for kwargs -def ones_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: +def ones_like(a: ArrayLike, **kwargs: Any) -> Array: + """Create an array of ones like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ return Array(sync(async_api.ones_like(a, **kwargs))) # TODO: update this once async_api.open_array is fully implemented -def open_array(*args: Any, **kwargs: Any) -> Array: - return Array(sync(async_api.open_array(*args, **kwargs))) +def open_array( + store: StoreLike | None = None, + *, + zarr_version: ZarrFormat | None = None, + path: PathLike = "", + storage_options: dict[str, Any] | None = None, + **kwargs: Any, +) -> Array: + """Open an array using file-mode-like semantics. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + zarr_version : {2, 3, None}, optional + The zarr format to use when saving. + path : str, optional + Path in store to array. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + **kwargs + Any keyword arguments to pass to ``create``. + + Returns + ------- + AsyncArray + The opened array. + """ + return Array( + sync( + async_api.open_array( + store=store, + zarr_version=zarr_version, + path=path, + storage_options=storage_options, + **kwargs, + ) + ) + ) # TODO: add type annotations for kwargs -def open_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: - return Array(sync(async_api.open_like(a, **kwargs))) +def open_like(a: ArrayLike, path: str, **kwargs: Any) -> Array: + """Open a persistent array like `a`. + + Parameters + ---------- + a : Array + The shape and data-type of a define these same attributes of the returned array. + path : str + The path to the new array. + **kwargs + Any keyword arguments to pass to the array constructor. + + Returns + ------- + AsyncArray + The opened array. + """ + return Array(sync(async_api.open_like(a, path=path, **kwargs))) # TODO: add type annotations for kwargs -def zeros(*args: Any, **kwargs: Any) -> Array: - return Array(sync(async_api.zeros(*args, **kwargs))) +def zeros(shape: ChunkCoords, **kwargs: Any) -> Array: + """Create an array, with zero being used as the default value for + uninitialized portions of the array. + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. -# TODO: add type annotations for kwargs -def zeros_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: - return Array(sync(async_api.zeros_like(a, **kwargs))) + Returns + ------- + Array + The new array. + """ + return Array(sync(async_api.zeros(shape=shape, **kwargs))) -consolidate_metadata.__doc__ = async_api.copy.__doc__ -copy.__doc__ = async_api.copy.__doc__ -copy_all.__doc__ = async_api.copy_all.__doc__ -copy_store.__doc__ = async_api.copy_store.__doc__ -load.__doc__ = async_api.load.__doc__ -open.__doc__ = async_api.open.__doc__ -open_consolidated.__doc__ = async_api.open_consolidated.__doc__ -save.__doc__ = async_api.save.__doc__ -save_array.__doc__ = async_api.save_array.__doc__ -save_group.__doc__ = async_api.save_group.__doc__ -tree.__doc__ = async_api.tree.__doc__ -array.__doc__ = async_api.array.__doc__ -group.__doc__ = async_api.group.__doc__ -open_group.__doc__ = async_api.open_group.__doc__ -create.__doc__ = async_api.create.__doc__ -empty.__doc__ = async_api.empty.__doc__ -empty_like.__doc__ = async_api.empty_like.__doc__ -full.__doc__ = async_api.full.__doc__ -full_like.__doc__ = async_api.full_like.__doc__ -ones.__doc__ = async_api.ones.__doc__ -ones_like.__doc__ = async_api.ones_like.__doc__ -open_array.__doc__ = async_api.open_array.__doc__ -open_like.__doc__ = async_api.open_like.__doc__ -zeros.__doc__ = async_api.zeros.__doc__ -zeros_like.__doc__ = async_api.zeros_like.__doc__ +# TODO: add type annotations for kwargs +def zeros_like(a: ArrayLike, **kwargs: Any) -> Array: + """Create an array of zeros like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + return Array(sync(async_api.zeros_like(a, **kwargs))) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index b57712717b..e5fc707f0a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -285,7 +285,7 @@ async def create( fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, + chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -313,7 +313,7 @@ async def create( fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, + chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -340,7 +340,7 @@ async def create( fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, + chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -372,7 +372,7 @@ async def create( fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, + chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] diff --git a/tests/test_api.py b/tests/test_api.py index 90f6dae110..f98565ad68 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -49,11 +49,11 @@ def test_create_array(memory_store: Store) -> None: # create array with float shape with pytest.raises(TypeError): - z = create(shape=(400.5, 100), store=store, overwrite=True) + z = create(shape=(400.5, 100), store=store, overwrite=True) # type: ignore [arg-type] # create array with float chunk shape with pytest.raises(TypeError): - z = create(shape=(400, 100), chunks=(16, 16.5), store=store, overwrite=True) + z = create(shape=(400, 100), chunks=(16, 16.5), store=store, overwrite=True) # type: ignore [arg-type] @pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"]) diff --git a/tests/test_array.py b/tests/test_array.py index 263b536784..3eb317e50e 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -558,8 +558,7 @@ async def test_info_complete_async(self) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) -def test_resize_1d(store: MemoryStore, zarr_format: int) -> None: +def test_resize_1d(store: MemoryStore, zarr_format: ZarrFormat) -> None: z = zarr.create( shape=105, chunks=10, dtype="i4", fill_value=0, store=store, zarr_format=zarr_format ) @@ -597,8 +596,7 @@ def test_resize_1d(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) -def test_resize_2d(store: MemoryStore, zarr_format: int) -> None: +def test_resize_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None: z = zarr.create( shape=(105, 105), chunks=(10, 10), @@ -659,8 +657,7 @@ def test_resize_2d(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) -def test_append_1d(store: MemoryStore, zarr_format: int) -> None: +def test_append_1d(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(105) z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format) z[:] = a @@ -689,8 +686,7 @@ def test_append_1d(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) -def test_append_2d(store: MemoryStore, zarr_format: int) -> None: +def test_append_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) z = zarr.create( shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format @@ -713,8 +709,7 @@ def test_append_2d(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) -def test_append_2d_axis(store: MemoryStore, zarr_format: int) -> None: +def test_append_2d_axis(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) z = zarr.create( shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format @@ -735,8 +730,7 @@ def test_append_2d_axis(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) -def test_append_bad_shape(store: MemoryStore, zarr_format: int) -> None: +def test_append_bad_shape(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(100) z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format) z[:] = a From 5bf7bcfcfe6426bea37df5d0691211f6a71daa39 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 19 Dec 2024 01:23:27 -0800 Subject: [PATCH 12/12] deps: add packaging to required deps (#2573) * deps: add packaging to required deps * temporarily pin numpy version for mypy --- .pre-commit-config.yaml | 3 ++- pyproject.toml | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4a93e9ce87..ea1cd4dbab 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,9 +28,10 @@ repos: files: src|tests additional_dependencies: # Package dependencies + - packaging - donfig - numcodecs[crc32c] - - numpy + - numpy==2.1 # until https://github.com/numpy/numpy/issues/28034 is resolved - typing_extensions - universal-pathlib # Tests diff --git a/pyproject.toml b/pyproject.toml index 6c8110cbf9..75bbbf15d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ maintainers = [ requires-python = ">=3.11" # If you add a new dependency here, please also add it to .pre-commit-config.yml dependencies = [ + 'packaging>=22.0', 'numpy>=1.25', 'numcodecs[crc32c]>=0.14', 'typing_extensions>=4.9', @@ -173,6 +174,7 @@ serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0" [tool.hatch.envs.upstream] python = "3.13" dependencies = [ + 'packaging @ git+https://github.com/pypa/packaging', 'numpy', # from scientific-python-nightly-wheels 'numcodecs @ git+https://github.com/zarr-developers/numcodecs', 'fsspec @ git+https://github.com/fsspec/filesystem_spec', @@ -206,6 +208,7 @@ See Spec 0000 for details and drop schedule: https://scientific-python.org/specs """ python = "3.11" dependencies = [ + 'packaging==22.*', 'numpy==1.25.*', 'numcodecs==0.14.*', # 0.14 needed for zarr3 codecs 'fsspec==2022.10.0',