Skip to content
This repository has been archived by the owner on Nov 12, 2024. It is now read-only.

Switch to _ARRAY_DIMENSIONS for xarray #3

Merged
merged 1 commit into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np
import numpy.testing as npt
from xarray import Dataset, open_dataset
from xarray import Dataset, open_dataset, open_zarr
from xarray.core.variable import Variable
from zarr import Array, Group, group

Expand Down Expand Up @@ -70,7 +70,7 @@ def test_write_new_virtual_variable(
# assert dict(arr.attrs) == {"units": "km"}

# check dimensions
assert arr.attrs["DIMENSION_NAMES"] == ["x", "y"]
assert arr.attrs["_ARRAY_DIMENSIONS"] == ["x", "y"]

def test_set_single_virtual_ref_without_encoding(
self, icechunk_filestore: "IcechunkStore", simple_netcdf4: Path
Expand Down Expand Up @@ -111,6 +111,8 @@ def test_set_single_virtual_ref_without_encoding(
expected_array = expected_ds["foo"].to_numpy()
npt.assert_equal(array, expected_array)

#ds = open_zarr(store=icechunk_filestore, group='foo', zarr_format=3, consolidated=False)

# note: we don't need to test that committing works, because now we have confirmed
# the refs are in the store (even uncommitted) it's icechunk's problem to manage them now.

Expand Down
21 changes: 13 additions & 8 deletions virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
}


def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None:
async def dataset_to_icechunk_async(ds: Dataset, store: "IcechunkStore") -> None:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't changing this without changing the test break the test?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't change this, extracted it and made both async and sync versions

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yes - sorry I couldn't see that easily when I looked from my phone haha

"""
Write an xarray dataset whose variables wrap ManifestArrays to an Icechunk store.

Expand All @@ -52,13 +52,17 @@ def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None:
# root_group.attrs = ds.attrs
for k, v in ds.attrs.items():
root_group.attrs[k] = encode_zarr_attr_value(v)

return await write_variables_to_icechunk_group(
ds.variables,
store=store,
group=root_group,
)


def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None:
asyncio.run(
write_variables_to_icechunk_group(
ds.variables,
store=store,
group=root_group,
)
dataset_to_icechunk_async(ds=ds, store=store)
)


Expand Down Expand Up @@ -113,12 +117,13 @@ async def write_virtual_variable_to_icechunk(
zarray = ma.zarray

# creates array if it doesn't already exist
codecs = zarray._v3_codec_pipeline()
arr = group.require_array(
name=name,
shape=zarray.shape,
chunk_shape=zarray.chunks,
dtype=encode_dtype(zarray.dtype),
codecs=zarray._v3_codec_pipeline(),
#codecs=codecs,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the codec pipeline stuff not work at all?

Copy link
Author

@mpiannucci mpiannucci Oct 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently crashing when zlib is included. I'm gunna wait till the next cut of zarr v3 to try again there

dimension_names=var.dims,
fill_value=zarray.fill_value,
# TODO fill_value?
Expand All @@ -127,7 +132,7 @@ async def write_virtual_variable_to_icechunk(
# TODO it would be nice if we could assign directly to the .attrs property
for k, v in var.attrs.items():
arr.attrs[k] = encode_zarr_attr_value(v)
arr.attrs["DIMENSION_NAMES"] = encode_zarr_attr_value(var.dims)
arr.attrs["_ARRAY_DIMENSIONS"] = encode_zarr_attr_value(var.dims)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did I just make DIMENSION_NAMES up?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure haha but this is what xarray needs

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Zarr V3 actual has native support for dimension names, but it's not supported in Xarray yet. So put a flag that we should change this eventually.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are just going to have both for now with this pr.


_encoding_keys = {"_FillValue", "missing_value", "scale_factor", "add_offset"}
for k, v in var.encoding.items():
Expand Down