From bbaf3baae31a04e52d957142862157c550aa77f8 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Thu, 10 Oct 2024 16:37:42 -0400 Subject: [PATCH] Fix array dimensions --- .../tests/test_writers/test_icechunk.py | 6 ++++-- virtualizarr/writers/icechunk.py | 21 ++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/virtualizarr/tests/test_writers/test_icechunk.py b/virtualizarr/tests/test_writers/test_icechunk.py index 72c4fe6..bce95a7 100644 --- a/virtualizarr/tests/test_writers/test_icechunk.py +++ b/virtualizarr/tests/test_writers/test_icechunk.py @@ -8,7 +8,7 @@ import numpy as np import numpy.testing as npt -from xarray import Dataset, open_dataset +from xarray import Dataset, open_dataset, open_zarr from xarray.core.variable import Variable from zarr import Array, Group, group @@ -70,7 +70,7 @@ def test_write_new_virtual_variable( # assert dict(arr.attrs) == {"units": "km"} # check dimensions - assert arr.attrs["DIMENSION_NAMES"] == ["x", "y"] + assert arr.attrs["_ARRAY_DIMENSIONS"] == ["x", "y"] def test_set_single_virtual_ref_without_encoding( self, icechunk_filestore: "IcechunkStore", simple_netcdf4: Path @@ -111,6 +111,8 @@ def test_set_single_virtual_ref_without_encoding( expected_array = expected_ds["foo"].to_numpy() npt.assert_equal(array, expected_array) + #ds = open_zarr(store=icechunk_filestore, group='foo', zarr_format=3, consolidated=False) + # note: we don't need to test that committing works, because now we have confirmed # the refs are in the store (even uncommitted) it's icechunk's problem to manage them now. diff --git a/virtualizarr/writers/icechunk.py b/virtualizarr/writers/icechunk.py index 84fbe1b..091835b 100644 --- a/virtualizarr/writers/icechunk.py +++ b/virtualizarr/writers/icechunk.py @@ -25,7 +25,7 @@ } -def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None: +async def dataset_to_icechunk_async(ds: Dataset, store: "IcechunkStore") -> None: """ Write an xarray dataset whose variables wrap ManifestArrays to an Icechunk store. @@ -52,13 +52,17 @@ def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None: # root_group.attrs = ds.attrs for k, v in ds.attrs.items(): root_group.attrs[k] = encode_zarr_attr_value(v) + + return await write_variables_to_icechunk_group( + ds.variables, + store=store, + group=root_group, + ) + +def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None: asyncio.run( - write_variables_to_icechunk_group( - ds.variables, - store=store, - group=root_group, - ) + dataset_to_icechunk_async(ds=ds, store=store) ) @@ -113,12 +117,13 @@ async def write_virtual_variable_to_icechunk( zarray = ma.zarray # creates array if it doesn't already exist + codecs = zarray._v3_codec_pipeline() arr = group.require_array( name=name, shape=zarray.shape, chunk_shape=zarray.chunks, dtype=encode_dtype(zarray.dtype), - codecs=zarray._v3_codec_pipeline(), + #codecs=codecs, dimension_names=var.dims, fill_value=zarray.fill_value, # TODO fill_value? @@ -127,7 +132,7 @@ async def write_virtual_variable_to_icechunk( # TODO it would be nice if we could assign directly to the .attrs property for k, v in var.attrs.items(): arr.attrs[k] = encode_zarr_attr_value(v) - arr.attrs["DIMENSION_NAMES"] = encode_zarr_attr_value(var.dims) + arr.attrs["_ARRAY_DIMENSIONS"] = encode_zarr_attr_value(var.dims) _encoding_keys = {"_FillValue", "missing_value", "scale_factor", "add_offset"} for k, v in var.encoding.items():