Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Array.info_complete #2514

Merged
merged 3 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 45 additions & 8 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,18 +1346,53 @@ def info(self) -> Any:
AsyncArray.info_complete
All information about a group, including dynamic information
like the number of bytes and chunks written.

Examples
--------

>>> arr = await zarr.api.asynchronous.create(
... path="array", shape=(3, 4, 5), chunks=(2, 2, 2))
... )
>>> arr.info
Type : Array
Zarr format : 3
Data type : DataType.float64
Shape : (3, 4, 5)
Chunk shape : (2, 2, 2)
Order : C
Read-only : False
Store type : MemoryStore
Codecs : [{'endian': <Endian.little: 'little'>}]
No. bytes : 480
"""
return self._info()

async def info_complete(self) -> Any:
# TODO: get the size of the object from the store.
extra = {
"count_chunks_initialized": await self.nchunks_initialized(),
# count_bytes_stored isn't yet implemented.
}
return self._info(extra=extra)

def _info(self, extra: dict[str, int] | None = None) -> Any:
"""
Return all the information for an array, including dynamic information like a storage size.

In addition to the static information, this provides

- The count of chunks initialized
- The sum of the bytes written

Returns
-------
ArrayInfo

See Also
--------
AsyncArray.info
A property giving just the statically known information about an array.
"""
return self._info(
await self.nchunks_initialized(),
await self.store_path.store.getsize_prefix(self.store_path.path),
)

def _info(
self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
) -> Any:
kwargs: dict[str, Any] = {}
if self.metadata.zarr_format == 2:
assert isinstance(self.metadata, ArrayV2Metadata)
Expand Down Expand Up @@ -1386,6 +1421,8 @@ def _info(self, extra: dict[str, int] | None = None) -> Any:
_read_only=self.read_only,
_store_type=type(self.store_path.store).__name__,
_count_bytes=self.dtype.itemsize * self.size,
_count_bytes_stored=count_bytes_stored,
_count_chunks_initialized=count_chunks_initialized,
**kwargs,
)

Expand Down
82 changes: 82 additions & 0 deletions tests/test_array.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dataclasses
import json
import math
import pickle
Expand Down Expand Up @@ -474,6 +475,87 @@ def test_info_v3(self) -> None:
)
assert result == expected

def test_info_complete(self) -> None:
arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
result = arr.info_complete()
expected = ArrayInfo(
_zarr_format=3,
_data_type=DataType.parse("float64"),
_shape=(4, 4),
_chunk_shape=(2, 2),
_order="C",
_read_only=False,
_store_type="MemoryStore",
_codecs=[BytesCodec()],
_count_bytes=128,
_count_chunks_initialized=0,
_count_bytes_stored=373, # the metadata?
)
assert result == expected

arr[:2, :2] = 10
result = arr.info_complete()
expected = dataclasses.replace(
expected, _count_chunks_initialized=1, _count_bytes_stored=405
)
assert result == expected

async def test_info_v2_async(self) -> None:
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=2)
result = arr.info
expected = ArrayInfo(
_zarr_format=2,
_data_type=np.dtype("float64"),
_shape=(4, 4),
_chunk_shape=(2, 2),
_order="C",
_read_only=False,
_store_type="MemoryStore",
_count_bytes=128,
)
assert result == expected

async def test_info_v3_async(self) -> None:
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
result = arr.info
expected = ArrayInfo(
_zarr_format=3,
_data_type=DataType.parse("float64"),
_shape=(4, 4),
_chunk_shape=(2, 2),
_order="C",
_read_only=False,
_store_type="MemoryStore",
_codecs=[BytesCodec()],
_count_bytes=128,
)
assert result == expected

async def test_info_complete_async(self) -> None:
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
result = await arr.info_complete()
expected = ArrayInfo(
_zarr_format=3,
_data_type=DataType.parse("float64"),
_shape=(4, 4),
_chunk_shape=(2, 2),
_order="C",
_read_only=False,
_store_type="MemoryStore",
_codecs=[BytesCodec()],
_count_bytes=128,
_count_chunks_initialized=0,
_count_bytes_stored=373, # the metadata?
)
assert result == expected

await arr.setitem((slice(2), slice(2)), 10)
result = await arr.info_complete()
expected = dataclasses.replace(
expected, _count_chunks_initialized=1, _count_bytes_stored=405
)
assert result == expected


@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize("zarr_format", [2, 3])
Expand Down