From 12baaa7c5e1a854f1a386182523e281cea93f1a3 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 9 Jun 2024 14:11:21 +0200 Subject: [PATCH 1/6] set up testing infrastructure for remote store --- pyproject.toml | 5 ++- src/zarr/store/remote.py | 4 +- .../test_local.py} | 36 ----------------- tests/v3/test_store/test_memory.py | 40 +++++++++++++++++++ .../test_remote.py} | 28 ++++++++++++- 5 files changed, 72 insertions(+), 41 deletions(-) rename tests/v3/{test_store.py => test_store/test_local.py} (50%) create mode 100644 tests/v3/test_store/test_memory.py rename tests/v3/{test_remote_store.py => test_store/test_remote.py} (66%) diff --git a/pyproject.toml b/pyproject.toml index 0bab6e6652..3f24da998e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,8 +112,11 @@ extra-dependencies = [ "pytest-cov", "msgpack", "lmdb", + "s3fs", "pytest-asyncio", - "moto", + "moto[s3]", + "flask-cors", + "flask", "requests", "mypy" ] diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index b803e66ce8..28f1908037 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -6,7 +6,7 @@ import fsspec from zarr.abc.store import Store -from zarr.buffer import Buffer, BufferPrototype +from zarr.buffer import Buffer, BufferPrototype, default_buffer_prototype from zarr.common import OpenMode from zarr.store.core import _dereference_path @@ -79,7 +79,7 @@ def __repr__(self) -> str: async def get( self, key: str, - prototype: BufferPrototype, + prototype: BufferPrototype = default_buffer_prototype, byte_range: tuple[int | None, int | None] | None = None, ) -> Buffer | None: path = _dereference_path(self.path, key) diff --git a/tests/v3/test_store.py b/tests/v3/test_store/test_local.py similarity index 50% rename from tests/v3/test_store.py rename to tests/v3/test_store/test_local.py index 52882ea78c..191a137d46 100644 --- a/tests/v3/test_store.py +++ b/tests/v3/test_store/test_local.py @@ -1,48 +1,12 @@ from __future__ import annotations -from typing import Any - import pytest from zarr.buffer import Buffer from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore from zarr.testing.store import StoreTests -class TestMemoryStore(StoreTests[MemoryStore]): - store_cls = MemoryStore - - def set(self, store: MemoryStore, key: str, value: Buffer) -> None: - store._store_dict[key] = value - - def get(self, store: MemoryStore, key: str) -> Buffer: - return store._store_dict[key] - - @pytest.fixture(scope="function", params=[None, {}]) - def store_kwargs(self, request) -> dict[str, Any]: - return {"store_dict": request.param, "mode": "w"} - - @pytest.fixture(scope="function") - def store(self, store_kwargs: dict[str, Any]) -> MemoryStore: - return self.store_cls(**store_kwargs) - - def test_store_repr(self, store: MemoryStore) -> None: - assert str(store) == f"memory://{id(store._store_dict)}" - - def test_store_supports_writes(self, store: MemoryStore) -> None: - assert store.supports_writes - - def test_store_supports_listing(self, store: MemoryStore) -> None: - assert store.supports_listing - - def test_store_supports_partial_writes(self, store: MemoryStore) -> None: - assert store.supports_partial_writes - - def test_list_prefix(self, store: MemoryStore) -> None: - assert True - - class TestLocalStore(StoreTests[LocalStore]): store_cls = LocalStore diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py new file mode 100644 index 0000000000..96b8b19e2c --- /dev/null +++ b/tests/v3/test_store/test_memory.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import pytest + +from zarr.buffer import Buffer +from zarr.store.memory import MemoryStore +from zarr.testing.store import StoreTests + + +class TestMemoryStore(StoreTests[MemoryStore]): + store_cls = MemoryStore + + def set(self, store: MemoryStore, key: str, value: Buffer) -> None: + store._store_dict[key] = value + + def get(self, store: MemoryStore, key: str) -> Buffer: + return store._store_dict[key] + + @pytest.fixture(scope="function", params=[None, {}]) + def store_kwargs(self, request) -> dict[str, str | None | dict[str, Buffer]]: + return {"store_dict": request.param, "mode": "w"} + + @pytest.fixture(scope="function") + def store(self, store_kwargs: str | None | dict[str, Buffer]) -> MemoryStore: + return self.store_cls(**store_kwargs) + + def test_store_repr(self, store: MemoryStore) -> None: + assert str(store) == f"memory://{id(store._store_dict)}" + + def test_store_supports_writes(self, store: MemoryStore) -> None: + assert store.supports_writes + + def test_store_supports_listing(self, store: MemoryStore) -> None: + assert store.supports_listing + + def test_store_supports_partial_writes(self, store: MemoryStore) -> None: + assert store.supports_partial_writes + + def test_list_prefix(self, store: MemoryStore) -> None: + assert True diff --git a/tests/v3/test_remote_store.py b/tests/v3/test_store/test_remote.py similarity index 66% rename from tests/v3/test_remote_store.py rename to tests/v3/test_store/test_remote.py index 56d5285508..d4b4c80a17 100644 --- a/tests/v3/test_remote_store.py +++ b/tests/v3/test_store/test_remote.py @@ -2,8 +2,9 @@ import pytest -from zarr.buffer import Buffer +from zarr.buffer import Buffer, default_buffer_prototype from zarr.store import RemoteStore +from zarr.testing.store import StoreTests s3fs = pytest.importorskip("s3fs") requests = pytest.importorskip("requests") @@ -70,5 +71,28 @@ async def test_basic(s3): await store.set("foo", Buffer.from_bytes(data)) assert await store.exists("foo") assert (await store.get("foo")).to_bytes() == data - out = await store.get_partial_values([("foo", (1, None))]) + out = await store.get_partial_values( + prototype=default_buffer_prototype, key_ranges=[("foo", (1, None))] + ) assert out[0].to_bytes() == data[1:] + + +class TestRemoteStore(StoreTests[RemoteStore]): + store_cls = RemoteStore + + @pytest.fixture(scope="function") + def store_kwargs(self) -> dict[str, str | bool]: + return {"mode": "w", "endpoint_url": endpoint_uri, "anon": False} + + @pytest.fixture(scope="function") + def store(self, store_kwargs: dict[str, str | bool]) -> RemoteStore: + return self.store_cls(url=test_bucket_name, **store_kwargs) + + def get(self, store: RemoteStore, key: str) -> Buffer: + return Buffer.from_bytes((store.root / key).read_bytes()) + + def set(self, store: RemoteStore, key: str, value: Buffer) -> None: + parent = (store.root / key).parent + if not parent.exists(): + parent.mkdir(parents=True) + (store.root / key).write_bytes(value.to_bytes()) From 095d72ea27d480582f35b3738d5786d03a1b17fc Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 9 Jun 2024 18:51:50 +0200 Subject: [PATCH 2/6] broken tests but get and set are implemented correctly for TestRemoteStoreS3 --- pyproject.toml | 3 +- tests/v3/test_store/test_remote.py | 56 +++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3f24da998e..96a884b737 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ dependencies = [ 'crc32c', 'zstandard', 'typing_extensions', - 'donfig' + 'donfig', + 'pytest' ] dynamic = [ "version", diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index d4b4c80a17..fd1d03e640 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -4,7 +4,9 @@ from zarr.buffer import Buffer, default_buffer_prototype from zarr.store import RemoteStore +from zarr.store.core import _normalize_interval_index from zarr.testing.store import StoreTests +from zarr.testing.utils import assert_bytes_equal s3fs = pytest.importorskip("s3fs") requests = pytest.importorskip("requests") @@ -77,22 +79,60 @@ async def test_basic(s3): assert out[0].to_bytes() == data[1:] -class TestRemoteStore(StoreTests[RemoteStore]): +class TestRemoteStoreS3(StoreTests[RemoteStore]): store_cls = RemoteStore @pytest.fixture(scope="function") def store_kwargs(self) -> dict[str, str | bool]: - return {"mode": "w", "endpoint_url": endpoint_uri, "anon": False} + return { + "mode": "w", + "endpoint_url": endpoint_uri, + "anon": False, + "url": f"s3://{test_bucket_name}", + } @pytest.fixture(scope="function") def store(self, store_kwargs: dict[str, str | bool]) -> RemoteStore: - return self.store_cls(url=test_bucket_name, **store_kwargs) + return self.store_cls(**store_kwargs) def get(self, store: RemoteStore, key: str) -> Buffer: - return Buffer.from_bytes((store.root / key).read_bytes()) + return Buffer.from_bytes(store._fs.get_mapper()[os.path.join(store.path, key)]) def set(self, store: RemoteStore, key: str, value: Buffer) -> None: - parent = (store.root / key).parent - if not parent.exists(): - parent.mkdir(parents=True) - (store.root / key).write_bytes(value.to_bytes()) + store._fs.get_mapper()[os.path.join(store.path, key)] = value.to_bytes() + + def test_store_repr(self, store: RemoteStore) -> None: + assert str(store) == f"Remote fsspec store: {store.path}" + + def test_store_supports_writes(self, store: RemoteStore) -> None: + assert True + + def test_store_supports_partial_writes(self, store: RemoteStore) -> None: + assert True + + def test_store_supports_listing(self, store: RemoteStore) -> None: + assert True + + @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"]) + @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) + @pytest.mark.parametrize("byte_range", (None, (0, None), (1, None), (1, 2), (None, 1))) + async def test_get( + self, + store: RemoteStore, + key: str, + data: bytes, + byte_range: None | tuple[int | None, int | None], + store_kwargs, + ) -> None: + """ + Ensure that data can be read from the store using the store.get method. + """ + + s3fs.S3FileSystem.clear_instance_cache() + data_buf = Buffer.from_bytes(data) + self.set(store, key, data_buf) + store = self.store_cls(**store_kwargs) + observed = await store.get(key, prototype=default_buffer_prototype, byte_range=byte_range) + start, length = _normalize_interval_index(data_buf, interval=byte_range) + expected = data_buf[start : start + length] + assert_bytes_equal(observed, expected) From 6ab10998223d90dc65ad1776282f5ae57472f1d6 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 9 Jun 2024 19:17:59 +0200 Subject: [PATCH 3/6] remove implementation of test_get, and make s3 fixture autoused, to reveal multiple event loop error --- tests/v3/test_store/test_remote.py | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index fd1d03e640..5904bbf2d9 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -4,9 +4,7 @@ from zarr.buffer import Buffer, default_buffer_prototype from zarr.store import RemoteStore -from zarr.store.core import _normalize_interval_index from zarr.testing.store import StoreTests -from zarr.testing.utils import assert_bytes_equal s3fs = pytest.importorskip("s3fs") requests = pytest.importorskip("requests") @@ -44,7 +42,7 @@ def get_boto3_client(): return session.create_client("s3", endpoint_url=endpoint_uri) -@pytest.fixture() +@pytest.fixture(autouse=True) def s3(s3_base): client = get_boto3_client() client.create_bucket(Bucket=test_bucket_name, ACL="public-read") @@ -112,27 +110,3 @@ def test_store_supports_partial_writes(self, store: RemoteStore) -> None: def test_store_supports_listing(self, store: RemoteStore) -> None: assert True - - @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"]) - @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) - @pytest.mark.parametrize("byte_range", (None, (0, None), (1, None), (1, 2), (None, 1))) - async def test_get( - self, - store: RemoteStore, - key: str, - data: bytes, - byte_range: None | tuple[int | None, int | None], - store_kwargs, - ) -> None: - """ - Ensure that data can be read from the store using the store.get method. - """ - - s3fs.S3FileSystem.clear_instance_cache() - data_buf = Buffer.from_bytes(data) - self.set(store, key, data_buf) - store = self.store_cls(**store_kwargs) - observed = await store.get(key, prototype=default_buffer_prototype, byte_range=byte_range) - start, length = _normalize_interval_index(data_buf, interval=byte_range) - expected = data_buf[start : start + length] - assert_bytes_equal(observed, expected) From a4ca3712d6c148637c2f5214695f40dd39a1cea6 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Mon, 10 Jun 2024 16:54:50 +0200 Subject: [PATCH 4/6] Update tests/v3/test_store/test_remote.py Co-authored-by: Martin Durant --- tests/v3/test_store/test_remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index 5904bbf2d9..7fe1a3f589 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -106,7 +106,7 @@ def test_store_supports_writes(self, store: RemoteStore) -> None: assert True def test_store_supports_partial_writes(self, store: RemoteStore) -> None: - assert True + assert False def test_store_supports_listing(self, store: RemoteStore) -> None: assert True From 0896cb8a7d4bc400edf97e8f456bd50ad164ef26 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 10 Jun 2024 17:34:05 +0200 Subject: [PATCH 5/6] don't use fsmap, and don't use os.path.join --- tests/v3/test_store/test_remote.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index 5904bbf2d9..300f468aec 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -4,6 +4,7 @@ from zarr.buffer import Buffer, default_buffer_prototype from zarr.store import RemoteStore +from zarr.sync import sync from zarr.testing.store import StoreTests s3fs = pytest.importorskip("s3fs") @@ -63,7 +64,7 @@ async def alist(it): return out -async def test_basic(s3): +async def test_basic(): store = RemoteStore(f"s3://{test_bucket_name}", mode="w", endpoint_url=endpoint_uri, anon=False) assert not await alist(store.list()) assert not await store.exists("foo") @@ -94,10 +95,10 @@ def store(self, store_kwargs: dict[str, str | bool]) -> RemoteStore: return self.store_cls(**store_kwargs) def get(self, store: RemoteStore, key: str) -> Buffer: - return Buffer.from_bytes(store._fs.get_mapper()[os.path.join(store.path, key)]) + return Buffer.from_bytes(sync(store._fs.cat(f"{store.path}/{key}"))) def set(self, store: RemoteStore, key: str, value: Buffer) -> None: - store._fs.get_mapper()[os.path.join(store.path, key)] = value.to_bytes() + store._fs.write_bytes(f"{store.path}/{key}", value.to_bytes()) def test_store_repr(self, store: RemoteStore) -> None: assert str(store) == f"Remote fsspec store: {store.path}" From 93a2c6a3d7393f156526b63b30fac9d5b204ca90 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 10 Jun 2024 17:38:24 +0200 Subject: [PATCH 6/6] scope s3 fixture to session, mark test_store_supports_partial_writes as xfail --- tests/v3/test_store/test_remote.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index 79b3837447..fbbae6683f 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -43,7 +43,7 @@ def get_boto3_client(): return session.create_client("s3", endpoint_url=endpoint_uri) -@pytest.fixture(autouse=True) +@pytest.fixture(autouse=True, scope="session") def s3(s3_base): client = get_boto3_client() client.create_bucket(Bucket=test_bucket_name, ACL="public-read") @@ -106,8 +106,9 @@ def test_store_repr(self, store: RemoteStore) -> None: def test_store_supports_writes(self, store: RemoteStore) -> None: assert True + @pytest.mark.xfail def test_store_supports_partial_writes(self, store: RemoteStore) -> None: - assert False + raise AssertionError def test_store_supports_listing(self, store: RemoteStore) -> None: assert True