-
-
Notifications
You must be signed in to change notification settings - Fork 290
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: validate v3 dtypes when loading/creating v3 metadata #2209
Changes from 1 commit
f1b01ac
30cdd89
0012508
44d310b
c23a945
eaf8063
236487d
1ba45fc
b6c33fc
9736b4b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,7 @@ | |
from zarr.core.array_spec import ArraySpec | ||
from zarr.core.chunk_grids import RegularChunkGrid | ||
from zarr.core.chunk_key_encodings import parse_separator | ||
from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, parse_dtype, parse_shapelike | ||
from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, parse_shapelike | ||
from zarr.core.config import config, parse_indexing_order | ||
from zarr.core.metadata.common import ArrayMetadata, parse_attributes | ||
|
||
|
@@ -157,6 +157,11 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self: | |
return replace(self, attributes=attributes) | ||
|
||
|
||
def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]: | ||
# todo: real validation | ||
return np.dtype(data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we now parse v2 and v3 dtypes differently. |
||
|
||
|
||
def parse_zarr_format(data: object) -> Literal[2]: | ||
if data == 2: | ||
return 2 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,9 @@ | |
paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/") | ||
np_arrays = npst.arrays( | ||
# TODO: re-enable timedeltas once they are supported | ||
dtype=npst.scalar_dtypes().filter(lambda x: x.kind != "m"), | ||
dtype=npst.scalar_dtypes().filter( | ||
lambda x: (x.kind not in ["m", "M"]) and (x.byteorder not in [">"]) | ||
), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I concur. It's not being handled today |
||
shape=npst.array_shapes(max_dims=4), | ||
) | ||
stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,9 @@ | ||
from __future__ import annotations | ||
|
||
import json | ||
import re | ||
from typing import TYPE_CHECKING, Literal | ||
|
||
from zarr.codecs.bytes import BytesCodec | ||
from zarr.core.buffer import default_buffer_prototype | ||
from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding | ||
from zarr.core.metadata.v3 import ArrayV3Metadata | ||
|
||
|
@@ -19,7 +17,12 @@ | |
import numpy as np | ||
import pytest | ||
|
||
from zarr.core.metadata.v3 import parse_dimension_names, parse_fill_value, parse_zarr_format | ||
from zarr.core.metadata.v3 import ( | ||
parse_dimension_names, | ||
parse_dtype, | ||
parse_fill_value, | ||
parse_zarr_format, | ||
) | ||
|
||
bool_dtypes = ("bool",) | ||
|
||
|
@@ -234,22 +237,43 @@ def test_metadata_to_dict( | |
assert observed == expected | ||
|
||
|
||
@pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) | ||
@pytest.mark.parametrize("precision", ["ns", "D"]) | ||
async def test_datetime_metadata(fill_value: int, precision: str) -> None: | ||
# @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) | ||
# @pytest.mark.parametrize("precision", ["ns", "D"]) | ||
# async def test_datetime_metadata(fill_value: int, precision: str) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test was creating an invalid dtype (datetime). We can leave it in place and bring it back when there is support for this dtype in v3. |
||
# metadata_dict = { | ||
# "zarr_format": 3, | ||
# "node_type": "array", | ||
# "shape": (1,), | ||
# "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, | ||
# "data_type": f"<M8[{precision}]", | ||
# "chunk_key_encoding": {"name": "default", "separator": "."}, | ||
# "codecs": (), | ||
# "fill_value": np.datetime64(fill_value, precision), | ||
# } | ||
# metadata = ArrayV3Metadata.from_dict(metadata_dict) | ||
# # ensure there isn't a TypeError here. | ||
# d = metadata.to_buffer_dict(default_buffer_prototype()) | ||
|
||
# result = json.loads(d["zarr.json"].to_bytes()) | ||
# assert result["fill_value"] == fill_value | ||
|
||
|
||
async def test_invalid_dtype_raises() -> None: | ||
metadata_dict = { | ||
"zarr_format": 3, | ||
"node_type": "array", | ||
"shape": (1,), | ||
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, | ||
"data_type": f"<M8[{precision}]", | ||
"data_type": "<M8[ns]", | ||
"chunk_key_encoding": {"name": "default", "separator": "."}, | ||
"codecs": (), | ||
"fill_value": np.datetime64(fill_value, precision), | ||
"fill_value": np.datetime64(0, "ns"), | ||
} | ||
metadata = ArrayV3Metadata.from_dict(metadata_dict) | ||
# ensure there isn't a TypeError here. | ||
d = metadata.to_buffer_dict(default_buffer_prototype()) | ||
with pytest.raises(ValueError, match=r"Invalid V3 data_type"): | ||
ArrayV3Metadata.from_dict(metadata_dict) | ||
|
||
|
||
result = json.loads(d["zarr.json"].to_bytes()) | ||
assert result["fill_value"] == fill_value | ||
@pytest.mark.parametrize("data", ["datetime64[s]", "foo", object()]) | ||
def test_parse_invalid_dtype_raises(data): | ||
with pytest.raises(ValueError, match=r"Invalid V3 data_type"): | ||
parse_dtype(data) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The input type is already
np.dtype[Any]
so I don't think we need to parse this.