Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Zarr compatibility functions #478

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
linting
ghidalgo3 committed Jul 16, 2024
commit 54650ac583f05e112b0596c39620a9119a7a8fac
2 changes: 1 addition & 1 deletion kerchunk/fits.py
Original file line number Diff line number Diff line change
@@ -38,7 +38,7 @@ def process_file(
inline_threshold=100,
primary_attr_to_group=False,
out=None,
zarr_version=None
zarr_version=None,
):
"""
Create JSON references for a single FITS file as a zarr group
3 changes: 3 additions & 0 deletions kerchunk/tests/test_fits.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
range_im = os.path.join(testdir, "arange.fits")
var = os.path.join(testdir, "variable_length_table.fits")


@pytest.mark.parametrize("zarr_version", [2, 3])
def test_ascii_table(zarr_version):
# this one directly hits a remote server - should cache?
@@ -57,6 +58,7 @@ def test_cube(zarr_version):
expected = hdul[0].data
assert (arr[:] == expected).all()


@pytest.mark.parametrize("zarr_version", [2, 3])
def test_with_class(zarr_version):
ftz = kerchunk.fits.FitsToZarr(range_im)
@@ -70,6 +72,7 @@ def test_with_class(zarr_version):
expected = hdul[0].data
assert (arr[:] == expected).all()


@pytest.mark.parametrize("zarr_version", [2, 3])
def test_var(zarr_version):
data = fits.open(var)[1].data
1 change: 1 addition & 0 deletions kerchunk/tests/test_hdf.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@

here = osp.dirname(__file__)


@pytest.mark.parametrize("zarr_version", [2, 3])
def test_single(zarr_version):
"""Test creating references for a single HDF file"""
11 changes: 9 additions & 2 deletions kerchunk/utils.py
Original file line number Diff line number Diff line change
@@ -13,12 +13,12 @@
from zarr.store import StorePath, MemoryStore
from zarr.v2.hierarchy import group
import zarr.array

_ZARR_VERSION = 3
except:
except ModuleNotFoundError:
_ZARR_VERSION = 2



def class_factory(func):
"""Experimental uniform API across function-based file scanners"""

@@ -62,17 +62,21 @@ def consolidate(refs):
out[k] = v
return {"version": 1, "refs": out}


def encode_fill_value(v, dtype, object_codec=None):
if _ZARR_VERSION == 3:
# Precarious use of this function
# https://github.com/zarr-developers/zarr-python/issues/2021
# https://github.com/zarr-developers/VirtualiZarr/pull/182#discussion_r1673096418
from zarr.v2.meta import Metadata2

return Metadata2.encode_fill_value(v, dtype, object_codec)
else:
from zarr.meta import encode_fill_value as _encode_fill_value

return _encode_fill_value(v, dtype, object_codec)


def rename_target(refs, renames):
"""Utility to change URLs in a reference set in a predictable way

@@ -135,6 +139,7 @@ def rename_target_files(
with fsspec.open(url_out, mode="wt", **(storage_options_out or {})) as f:
ujson.dump(new, f)


def zarr_init_group_and_store(store=None, zarr_version=None):
zarr_version = zarr_version or 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you clarify the semantics of zarr_version here (and in zarr_open)? What's the behavior of each of these cases?

zarr-python-library-version zarr_version behavior
2.x None write zarr v2
2.x 2 write zarr v2
2.x 3 error
3.x None write zarr v2 or v3?
3.x 2 write zarr v2
3.x 3 write zarr v3

Really it's just the case of zarr_version=None with zarr-python 3.x that I'm unsure about. i.e. what's the default behavior: write zarr v2, or write whatever version is the default for that version of zarr-python?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also worth confirming that we do error for zarr_version=3 with zarr-python=2, to not silently ignore that keyword argument.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surprisingly, if you have zarrv2 installed and you set zarr_version=3 then zarr will accept that, issue a warning, and give you a valid group. I need to re-run these tests with zarrv3 installed and see what happens in those cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Zarr2 (i.e., mainline released version) has had v3 internally for quite a while, but the implementation is different. It should still conform to the v3 spec, though!

if _ZARR_VERSION == 3 and zarr_version == 2:
@@ -147,6 +152,7 @@ def zarr_init_group_and_store(store=None, zarr_version=None):
store = store or {}
return zarr.group(store, overwrite=True, zarr_version=zarr_version), store


def zarr_open(store, zarr_version=None):
if _ZARR_VERSION == 3:
store = store or StorePath(MemoryStore(mode="w"))
@@ -155,6 +161,7 @@ def zarr_open(store, zarr_version=None):
store = store or {}
return zarr.open(store, zarr_version=zarr_version)


def _encode_for_JSON(store, zarr_version=2):
"""Make store JSON encodable"""
if _ZARR_VERSION == 2 or zarr_version == 2: