Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for custom metadata. Tested :) #19

Merged
merged 4 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion funlib/persistence/arrays/array.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
from functools import reduce
from typing import Optional, Sequence, Union
from typing import Optional, Sequence, Union, Any

import dask.array as da
import numpy as np
from dask.array.optimization import fuse_slice
from zarr import Array as ZarrArray

from funlib.geometry import Coordinate, Roi

Expand Down Expand Up @@ -83,6 +84,10 @@ def __init__(
shape=self._source_data.shape,
)

# used for custom metadata unrelated to indexing with physical units
# only used if not reading from zarr and there is no built in `.attrs`
self._attrs: dict[str, Any] = {}

if lazy_op is not None:
self.apply_lazy_ops(lazy_op)

Expand All @@ -93,6 +98,18 @@ def __init__(

self.validate()

@property
def attrs(self) -> dict:
"""
Return dict that can be used to store custom metadata. Will be persistent
for zarr arrays. If reading from zarr, any existing metadata (such as
voxel_size, axis_names, etc.) will also be exposed here.
"""
if isinstance(self._source_data, ZarrArray):
return self._source_data.attrs
else:
return self._attrs

@property
def chunk_shape(self) -> Coordinate:
return Coordinate(self.data.chunksize)
Expand Down
28 changes: 19 additions & 9 deletions funlib/persistence/arrays/datasets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Optional, Sequence, Union
from typing import Any, Optional, Sequence, Union

import numpy as np
import zarr
Expand Down Expand Up @@ -128,6 +128,7 @@ def prepare_ds(
chunk_shape: Optional[Sequence[int]] = None,
dtype: DTypeLike = np.float32,
mode: str = "a",
custom_metadata: dict[str, Any] | None = None,
**kwargs,
) -> Array:
"""Prepare a Zarr or N5 dataset.
Expand Down Expand Up @@ -179,6 +180,11 @@ def prepare_ds(
The mode to open the dataset in.
See https://zarr.readthedocs.io/en/stable/api/creation.html#zarr.creation.open_array

custom_metadata:

A dictionary of custom metadata to add to the dataset. This will be written to the
zarr .attrs object.

kwargs:

See additional arguments available here:
Expand Down Expand Up @@ -319,14 +325,18 @@ def prepare_ds(
raise ArrayNotFoundError(f"Nothing found at path {store}")

default_metadata_format = get_default_metadata_format()
ds.attrs.put(
{
default_metadata_format.axis_names_attr: combined_metadata.axis_names,
default_metadata_format.units_attr: combined_metadata.units,
default_metadata_format.voxel_size_attr: combined_metadata.voxel_size,
default_metadata_format.offset_attr: combined_metadata.offset,
}
)
our_metadata = {
default_metadata_format.axis_names_attr: combined_metadata.axis_names,
default_metadata_format.units_attr: combined_metadata.units,
default_metadata_format.voxel_size_attr: combined_metadata.voxel_size,
default_metadata_format.offset_attr: combined_metadata.offset,
}
# check keys don't conflict
if custom_metadata is not None:
assert set(our_metadata.keys()).isdisjoint(custom_metadata.keys())
our_metadata.update(custom_metadata)

ds.attrs.put(our_metadata)

# open array
array = Array(ds, offset, voxel_size, axis_names, units)
Expand Down
17 changes: 17 additions & 0 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@
}


@pytest.mark.parametrize("store", stores.keys())
def test_metadata(tmpdir, store):
store = tmpdir / store

# test prepare_ds creates array if it does not exist and mode is write
array = prepare_ds(
store,
(10, 10),
mode="w",
custom_metadata={"custom": "metadata"},
)
assert array.attrs["custom"] == "metadata"
array.attrs["custom2"] = "new metadata"

assert open_ds(store).attrs["custom2"] == "new metadata"


@pytest.mark.parametrize("store", stores.keys())
@pytest.mark.parametrize("dtype", [np.float32, np.uint8, np.uint64])
def test_helpers(tmpdir, store, dtype):
Expand Down
Loading