From ca261b115b26daf0b5a1fdfa417df9c2e9792082 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 13 Dec 2024 16:31:28 -0500 Subject: [PATCH 01/11] Check if store is writable for setting or deleting objects --- src/zarr/storage/object_store.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/zarr/storage/object_store.py b/src/zarr/storage/object_store.py index ee13364991..744f6bdc3e 100644 --- a/src/zarr/storage/object_store.py +++ b/src/zarr/storage/object_store.py @@ -104,10 +104,12 @@ def supports_writes(self) -> bool: return True async def set(self, key: str, value: Buffer) -> None: + self._check_writable() buf = value.to_bytes() await obs.put_async(self.store, key, buf) async def set_if_not_exists(self, key: str, value: Buffer) -> None: + self._check_writable() buf = value.to_bytes() await obs.put_async(self.store, key, buf, mode="create") @@ -116,6 +118,7 @@ def supports_deletes(self) -> bool: return True async def delete(self, key: str) -> None: + self._check_writable() await obs.delete_async(self.store, key) @property From 0eb416aaf80c1469a7b79e2b503cee167824a7a9 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 13 Dec 2024 16:57:19 -0500 Subject: [PATCH 02/11] Add test for object store repr --- tests/test_store/test_object.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_store/test_object.py b/tests/test_store/test_object.py index 31a3fa0ebc..b891cb3e35 100644 --- a/tests/test_store/test_object.py +++ b/tests/test_store/test_object.py @@ -20,3 +20,9 @@ def store_kwargs(self, tmpdir) -> dict[str, str | bool]: @pytest.fixture def store(self, store_kwargs: dict[str, str | bool]) -> ObjectStore: return self.store_cls(**store_kwargs) + + def test_store_repr(self, store: ObjectStore) -> None: + from fnmatch import fnmatch + + pattern = "ObjectStore(object://LocalStore(file:///*))" + assert fnmatch(f"{store!r}", pattern) From 247432f2cf10a0bd76c4f6438da2bee40f9caec9 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:29:08 -0500 Subject: [PATCH 03/11] Add attribute tests --- tests/test_store/test_object.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_store/test_object.py b/tests/test_store/test_object.py index b891cb3e35..da435ba9d7 100644 --- a/tests/test_store/test_object.py +++ b/tests/test_store/test_object.py @@ -26,3 +26,12 @@ def test_store_repr(self, store: ObjectStore) -> None: pattern = "ObjectStore(object://LocalStore(file:///*))" assert fnmatch(f"{store!r}", pattern) + + def test_store_supports_writes(self, store: ObjectStore) -> None: + assert store.supports_writes + + def test_store_supports_partial_writes(self, store: ObjectStore) -> None: + assert not store.supports_partial_writes + + def test_store_supports_listing(self, store: ObjectStore) -> None: + assert store.supports_listing From 4b31b3c8a809cabc4da0f41b4b233f7969ab88f4 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 13 Dec 2024 20:59:42 -0500 Subject: [PATCH 04/11] Add get and set methods to test class --- src/zarr/testing/store.py | 4 ++-- tests/test_store/test_object.py | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index d26d83e566..ddd4667017 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -23,7 +23,7 @@ class StoreTests(Generic[S, B]): async def set(self, store: S, key: str, value: Buffer) -> None: """ Insert a value into a storage backend, with a specific key. - This should not not use any store methods. Bypassing the store methods allows them to be + This should not use any store methods. Bypassing the store methods allows them to be tested. """ raise NotImplementedError @@ -31,7 +31,7 @@ async def set(self, store: S, key: str, value: Buffer) -> None: async def get(self, store: S, key: str) -> Buffer: """ Retrieve a value from a storage backend, by key. - This should not not use any store methods. Bypassing the store methods allows them to be + This should not use any store methods. Bypassing the store methods allows them to be tested. """ diff --git a/tests/test_store/test_object.py b/tests/test_store/test_object.py index da435ba9d7..9799e19ac8 100644 --- a/tests/test_store/test_object.py +++ b/tests/test_store/test_object.py @@ -3,10 +3,14 @@ obstore = pytest.importorskip("obstore") -from zarr.core.buffer import cpu +import re + +from zarr.core.buffer import Buffer, cpu from zarr.storage.object_store import ObjectStore from zarr.testing.store import StoreTests +PATTERN = r"file://(/[\w/.-]+)" + class TestObjectStore(StoreTests[ObjectStore, cpu.Buffer]): store_cls = ObjectStore @@ -21,6 +25,18 @@ def store_kwargs(self, tmpdir) -> dict[str, str | bool]: def store(self, store_kwargs: dict[str, str | bool]) -> ObjectStore: return self.store_cls(**store_kwargs) + async def get(self, store: ObjectStore, key: str) -> Buffer: + # TODO: There must be a better way to get the path to the store + store_path = re.search(PATTERN, str(store)).group(1) + new_local_store = obstore.store.LocalStore(prefix=store_path) + return self.buffer_cls.from_bytes(obstore.get(new_local_store, key)) + + async def set(self, store: ObjectStore, key: str, value: Buffer) -> None: + # TODO: There must be a better way to get the path to the store + store_path = re.search(PATTERN, str(store)).group(1) + new_local_store = obstore.store.LocalStore(prefix=store_path) + obstore.put(new_local_store, key, value.to_bytes()) + def test_store_repr(self, store: ObjectStore) -> None: from fnmatch import fnmatch From d49d1ff903a51e68bd6d80502b319eb24cec79e9 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 13 Dec 2024 21:16:59 -0500 Subject: [PATCH 05/11] Raise an exeption for previously set key --- src/zarr/testing/store.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index ddd4667017..6a0f73c00a 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -308,8 +308,9 @@ async def test_set_if_not_exists(self, store: S) -> None: data_buf = self.buffer_cls.from_bytes(b"0000") await self.set(store, key, data_buf) - new = self.buffer_cls.from_bytes(b"1111") - await store.set_if_not_exists("k", new) # no error + with pytest.raises(Exception): + new = self.buffer_cls.from_bytes(b"1111") + await store.set_if_not_exists("k", new) result = await store.get(key, default_buffer_prototype()) assert result == data_buf From c2ebc8ffe73eef6110a0a75e43560f9776e9b258 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:39:06 -0500 Subject: [PATCH 06/11] Update src/zarr/testing/store.py Co-authored-by: Davis Bennett --- src/zarr/testing/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 6a0f73c00a..7b70ccc51b 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -310,7 +310,7 @@ async def test_set_if_not_exists(self, store: S) -> None: with pytest.raises(Exception): new = self.buffer_cls.from_bytes(b"1111") - await store.set_if_not_exists("k", new) + await store.set_if_not_exists(key, new) result = await store.get(key, default_buffer_prototype()) assert result == data_buf From eb76698a18001aba21429b5b798e342f789d7bca Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 14 Dec 2024 21:05:38 -0500 Subject: [PATCH 07/11] Update _transform_list_dir to not remove all items --- src/zarr/storage/object_store.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/zarr/storage/object_store.py b/src/zarr/storage/object_store.py index 744f6bdc3e..13535c6e67 100644 --- a/src/zarr/storage/object_store.py +++ b/src/zarr/storage/object_store.py @@ -161,12 +161,13 @@ async def _transform_list_dir( # We assume that the underlying object-store implementation correctly handles the # prefix, so we don't double-check that the returned results actually start with the # given prefix. - prefix_len = len(prefix) + prefix_len = len(prefix) + 1 # If one is not added to the length, all items will contain "/" async for batch in list_stream: for item in batch: - # Yield this item if "/" does not exist after the prefix. - if "/" not in item["path"][prefix_len:]: - yield item["path"] + # Yield this item if "/" does not exist after the prefix + item_path = item["path"][prefix_len:] + if "/" not in item_path: + yield item_path class _BoundedRequest(TypedDict): From f310260df9cf75dc0822f8c9511e29bf4320e5a2 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 14 Dec 2024 21:10:42 -0500 Subject: [PATCH 08/11] Return bytes from GetResult --- tests/test_store/test_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_store/test_object.py b/tests/test_store/test_object.py index 9799e19ac8..ca6a1b176f 100644 --- a/tests/test_store/test_object.py +++ b/tests/test_store/test_object.py @@ -29,7 +29,7 @@ async def get(self, store: ObjectStore, key: str) -> Buffer: # TODO: There must be a better way to get the path to the store store_path = re.search(PATTERN, str(store)).group(1) new_local_store = obstore.store.LocalStore(prefix=store_path) - return self.buffer_cls.from_bytes(obstore.get(new_local_store, key)) + return self.buffer_cls.from_bytes(obstore.get(new_local_store, key).bytes()) async def set(self, store: ObjectStore, key: str, value: Buffer) -> None: # TODO: There must be a better way to get the path to the store From 86951b8eb6f87459d77318d37360b6424d2df49c Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 14 Dec 2024 22:38:54 -0500 Subject: [PATCH 09/11] Don't raise an exception on set_if_not_exists --- src/zarr/storage/object_store.py | 4 +++- src/zarr/testing/store.py | 5 ++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/zarr/storage/object_store.py b/src/zarr/storage/object_store.py index 13535c6e67..9bc3526058 100644 --- a/src/zarr/storage/object_store.py +++ b/src/zarr/storage/object_store.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import contextlib from collections import defaultdict from collections.abc import Iterable from typing import TYPE_CHECKING, Any, TypedDict @@ -111,7 +112,8 @@ async def set(self, key: str, value: Buffer) -> None: async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._check_writable() buf = value.to_bytes() - await obs.put_async(self.store, key, buf, mode="create") + with contextlib.suppress(obs.exceptions.AlreadyExistsError): + await obs.put_async(self.store, key, buf, mode="create") @property def supports_deletes(self) -> bool: diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 7b70ccc51b..ddd4667017 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -308,9 +308,8 @@ async def test_set_if_not_exists(self, store: S) -> None: data_buf = self.buffer_cls.from_bytes(b"0000") await self.set(store, key, data_buf) - with pytest.raises(Exception): - new = self.buffer_cls.from_bytes(b"1111") - await store.set_if_not_exists(key, new) + new = self.buffer_cls.from_bytes(b"1111") + await store.set_if_not_exists("k", new) # no error result = await store.get(key, default_buffer_prototype()) assert result == data_buf From f989884820e6a5a8e1a35dc1139752fd57e63aef Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 14 Dec 2024 22:41:17 -0500 Subject: [PATCH 10/11] Remove test that stores empty file --- src/zarr/testing/store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index ddd4667017..54e5e7e9f0 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -103,14 +103,14 @@ def test_store_supports_listing(self, store: S) -> None: raise NotImplementedError @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"]) - @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) @pytest.mark.parametrize("byte_range", [None, (0, None), (1, None), (1, 2), (None, 1)]) async def test_get( - self, store: S, key: str, data: bytes, byte_range: None | tuple[int | None, int | None] + self, store: S, key: str, byte_range: None | tuple[int | None, int | None] ) -> None: """ Ensure that data can be read from the store using the store.get method. """ + data = b"\x01\x02\x03\x04" data_buf = self.buffer_cls.from_bytes(data) await self.set(store, key, data_buf) observed = await store.get(key, prototype=default_buffer_prototype(), byte_range=byte_range) From 40e1b25bf6ffd66572fd1c081a2ddbedc2aaefd3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sat, 14 Dec 2024 22:50:06 -0500 Subject: [PATCH 11/11] Handle None as start or end of byte range request --- src/zarr/storage/object_store.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/zarr/storage/object_store.py b/src/zarr/storage/object_store.py index 9bc3526058..8304e578f1 100644 --- a/src/zarr/storage/object_store.py +++ b/src/zarr/storage/object_store.py @@ -71,17 +71,22 @@ async def get( return prototype.buffer.from_bytes(await resp.bytes_async()) start, end = byte_range + if (start is None or start == 0) and end is None: + resp = await obs.get_async(self.store, key) + return prototype.buffer.from_bytes(await resp.bytes_async()) if start is not None and end is not None: resp = await obs.get_range_async(self.store, key, start=start, end=end) return prototype.buffer.from_bytes(memoryview(resp)) elif start is not None: - if start >= 0: + if start > 0: # Offset request resp = await obs.get_async(self.store, key, options={"range": {"offset": start}}) else: resp = await obs.get_async(self.store, key, options={"range": {"suffix": start}}) - return prototype.buffer.from_bytes(await resp.bytes_async()) + elif end is not None: + resp = await obs.get_range_async(self.store, key, start=0, end=end) + return prototype.buffer.from_bytes(memoryview(resp)) else: raise ValueError(f"Unexpected input to `get`: {start=}, {end=}")