Skip to content

Commit

Permalink
correct array.nbytes, and add tests (#2576)
Browse files Browse the repository at this point in the history
* correct array.nbytes, and add tests

* use nbytes in array info construction

* stronger docstrings
  • Loading branch information
d-v-b authored Dec 19, 2024
1 parent 5bf7bcf commit 1cc3917
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 4 deletions.
24 changes: 20 additions & 4 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -977,9 +977,17 @@ def _iter_chunk_regions(
@property
def nbytes(self) -> int:
"""
The number of bytes that can be stored in this array.
The total number of bytes that can be stored in the chunks of this array.
Notes
-----
This value is calculated by multiplying the number of elements in the array and the size
of each element, the latter of which is determined by the dtype of the array.
For this reason, ``nbytes`` will likely be inaccurate for arrays with variable-length
dtypes. It is not possible to determine the size of an array with variable-length elements
from the shape and dtype alone.
"""
return self.nchunks * self.dtype.itemsize
return self.size * self.dtype.itemsize

async def _get_selection(
self,
Expand Down Expand Up @@ -1429,7 +1437,7 @@ def _info(
_order=self.order,
_read_only=self.read_only,
_store_type=type(self.store_path.store).__name__,
_count_bytes=self.dtype.itemsize * self.size,
_count_bytes=self.nbytes,
_count_bytes_stored=count_bytes_stored,
_count_chunks_initialized=count_chunks_initialized,
**kwargs,
Expand Down Expand Up @@ -1740,7 +1748,15 @@ def _iter_chunk_coords(
@property
def nbytes(self) -> int:
"""
The number of bytes that can be stored in this array.
The total number of bytes that can be stored in the chunks of this array.
Notes
-----
This value is calculated by multiplying the number of elements in the array and the size
of each element, the latter of which is determined by the dtype of the array.
For this reason, ``nbytes`` will likely be inaccurate for arrays with variable-length
dtypes. It is not possible to determine the size of an array with variable-length elements
from the shape and dtype alone.
"""
return self._async_array.nbytes

Expand Down
18 changes: 18 additions & 0 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,3 +776,21 @@ async def test_special_complex_fill_values_roundtrip(fill_value: Any, expected:
assert content is not None
actual = json.loads(content.to_bytes())
assert actual["fill_value"] == expected


@pytest.mark.parametrize("shape", [(1,), (2, 3), (4, 5, 6)])
@pytest.mark.parametrize("dtype", ["uint8", "float32"])
@pytest.mark.parametrize("array_type", ["async", "sync"])
async def test_nbytes(
shape: tuple[int, ...], dtype: str, array_type: Literal["async", "sync"]
) -> None:
"""
Test that the ``nbytes`` attribute of an Array or AsyncArray correctly reports the capacity of
the chunks of that array.
"""
store = MemoryStore()
arr = Array.create(store=store, shape=shape, dtype=dtype, fill_value=0)
if array_type == "async":
assert arr._async_array.nbytes == np.prod(arr.shape) * arr.dtype.itemsize
else:
assert arr.nbytes == np.prod(arr.shape) * arr.dtype.itemsize

0 comments on commit 1cc3917

Please sign in to comment.