From 370c651861d72f9ee3a7765c03a4416e22765c25 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 14:49:08 -0400 Subject: [PATCH 01/14] test passing indexes={} --- virtualizarr/tests/test_xarray.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 4355d033..29558c0e 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -1,6 +1,8 @@ import numpy as np +import pytest import xarray as xr +from virtualizarr import open_virtual_dataset from virtualizarr.manifests import ChunkManifest, ManifestArray from virtualizarr.zarr import ZArray @@ -104,6 +106,7 @@ def test_concat_along_existing_dim(self): ds2 = xr.Dataset({"a": (["x", "y"], marr2)}) result = xr.concat([ds1, ds2], dim="x")["a"] + assert result.indexes == {} assert result.shape == (2, 20) assert result.chunks == (1, 10) @@ -150,6 +153,7 @@ def test_concat_along_new_dim(self): ds2 = xr.Dataset({"a": (["x", "y"], marr2)}) result = xr.concat([ds1, ds2], dim="z")["a"] + assert result.indexes == {} # xarray.concat adds new dimensions along axis=0 assert result.shape == (2, 5, 20) @@ -201,6 +205,7 @@ def test_concat_dim_coords_along_existing_dim(self): ds2 = xr.Dataset(coords=coords) result = xr.concat([ds1, ds2], dim="t")["t"] + assert result.indexes == {} assert result.shape == (40,) assert result.chunks == (10,) @@ -215,3 +220,21 @@ def test_concat_dim_coords_along_existing_dim(self): assert result.data.zarray.fill_value == zarray.fill_value assert result.data.zarray.order == zarray.order assert result.data.zarray.zarr_format == zarray.zarr_format + + +@pytest.fixture +def netcdf4_file(tmpdir): + # Set up example xarray dataset + ds = xr.tutorial.open_dataset("air_temperature") + + # Save it to disk as netCDF (in temporary directory) + filepath = f"{tmpdir}/air.nc" + ds.to_netcdf(filepath) + + return filepath + + +class TestOpenVirtualDataset: + def test_no_indexes(self, netcdf4_file): + vds = open_virtual_dataset(netcdf4_file, indexes={}) + assert vds.indexes == {} From bc32a825e1261165a0389a243b2711b6189d4d81 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 15:53:16 -0400 Subject: [PATCH 02/14] test creating default indexes by passing indexes=None --- virtualizarr/tests/test_xarray.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 29558c0e..45f12dc4 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -1,6 +1,9 @@ +from typing import Mapping + import numpy as np import pytest import xarray as xr +from xarray.core.indexes import Index from virtualizarr import open_virtual_dataset from virtualizarr.manifests import ChunkManifest, ManifestArray @@ -234,7 +237,31 @@ def netcdf4_file(tmpdir): return filepath -class TestOpenVirtualDataset: +class TestOpenVirtualDataseIndexes: def test_no_indexes(self, netcdf4_file): vds = open_virtual_dataset(netcdf4_file, indexes={}) assert vds.indexes == {} + + def test_create_default_indexes(self, netcdf4_file): + vds = open_virtual_dataset(netcdf4_file, indexes=None) + ds = xr.open_dataset(netcdf4_file) + print(vds.indexes) + print(ds.indexes) + # TODO use xr.testing.assert_identical(vds.indexes, ds.indexes) instead once class supported by assertion comparison, see https://github.com/pydata/xarray/issues/5812 + assert index_mappings_equal(vds.xindexes, ds.xindexes) + + +def index_mappings_equal(indexes1: Mapping[str, Index], indexes2: Mapping[str, Index]): + # Check if the mappings have the same keys + if set(indexes1.keys()) != set(indexes2.keys()): + return False + + # Check if the values for each key are identical + for key in indexes1.keys(): + index1 = indexes1[key] + index2 = indexes2[key] + + if not index1.equals(index2): + return False + + return True From 6d63446829edbdaabbc6c62323d625651c49fc23 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 15:53:34 -0400 Subject: [PATCH 03/14] implementation of creating default indexes --- virtualizarr/xarray.py | 63 +++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index f6912c4e..d1253a7a 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -1,9 +1,10 @@ -from typing import List, Literal, Optional, Union, overload +from typing import List, Literal, Mapping, Optional, Union, overload import ujson # type: ignore import xarray as xr from xarray import register_dataset_accessor from xarray.backends import BackendArray +from xarray.core.indexes import Index import virtualizarr.kerchunk as kerchunk from virtualizarr.kerchunk import KerchunkStoreRefs @@ -20,8 +21,8 @@ def open_virtual_dataset( filepath: str, filetype: Optional[str] = None, drop_variables: Optional[List[str]] = None, + indexes: Mapping[str, Index] | None = None, virtual_array_class=ManifestArray, - indexes={}, ) -> xr.Dataset: """ Open a file or store as an xarray Dataset wrapping virtualized zarr arrays. @@ -38,27 +39,38 @@ def open_virtual_dataset( If not provided will attempt to automatically infer the correct filetype from the the filepath's extension. drop_variables: list[str], default is None Variables in the file to drop before returning. + indexes : Mapping[str, Index], default is None + Default is None, which will read any 1D coordinate data to create in-memory Pandas indexes. + To avoid creating any indexes, pass indexes={}. virtual_array_class Virtual array class to use to represent the references to the chunks in each on-disk array. Currently can only be ManifestArray, but once VirtualZarrArray is implemented the default should be changed to that. """ # this is the only place we actually always need to use kerchunk directly - ds_refs = kerchunk.read_kerchunk_references_from_file( + vds_refs = kerchunk.read_kerchunk_references_from_file( filepath=filepath, filetype=filetype, ) - ds = dataset_from_kerchunk_refs( - ds_refs, + if indexes is None: + # add default indexes by reading data from file + # TODO we are reading a bunch of stuff we know we won't need here, e.g. all of the data variables... + # TODO it would also be nice if we could somehow consolidate this with the reading of the kerchunk references + ds = xr.open_dataset(filepath) + indexes = ds.xindexes + ds.close() + + vds = dataset_from_kerchunk_refs( + vds_refs, drop_variables=drop_variables, virtual_array_class=virtual_array_class, indexes=indexes, ) - # TODO we should probably also use ds.set_close() to tell xarray how to close the file we opened + # TODO we should probably also use vds.set_close() to tell xarray how to close the file we opened - return ds + return vds def dataset_from_kerchunk_refs( @@ -86,14 +98,9 @@ def dataset_from_kerchunk_refs( vars = {} for var_name in var_names_to_keep: - # TODO abstract all this parsing into a function/method? - arr_refs = kerchunk.extract_array_refs(refs, var_name) - chunk_dict, zarray, zattrs = kerchunk.parse_array_refs(arr_refs) - manifest = ChunkManifest.from_kerchunk_chunk_dict(chunk_dict) - dims = zattrs["_ARRAY_DIMENSIONS"] - - varr = virtual_array_class(zarray=zarray, chunkmanifest=manifest) - vars[var_name] = xr.Variable(data=varr, dims=dims, attrs=zattrs) + vars[var_name] = variable_from_kerchunk_refs( + refs, var_name, virtual_array_class + ) data_vars, coords = separate_coords(vars, indexes) @@ -109,6 +116,20 @@ def dataset_from_kerchunk_refs( return ds +def variable_from_kerchunk_refs( + refs: KerchunkStoreRefs, var_name: str, virtual_array_class +) -> xr.Variable: + """Create a single xarray Variable by reading specific keys of a kerchunk references dict.""" + + arr_refs = kerchunk.extract_array_refs(refs, var_name) + chunk_dict, zarray, zattrs = kerchunk.parse_array_refs(arr_refs) + manifest = ChunkManifest.from_kerchunk_chunk_dict(chunk_dict) + dims = zattrs["_ARRAY_DIMENSIONS"] + varr = virtual_array_class(zarray=zarray, chunkmanifest=manifest) + + return xr.Variable(data=varr, dims=dims, attrs=zattrs) + + def separate_coords( vars: dict[str, xr.Variable], indexes={}, @@ -121,6 +142,7 @@ def separate_coords( # this would normally come from CF decoding, let's hope the fact we're skipping that doesn't cause any problems... coord_names: List[str] = [] + # split data and coordinate variables (promote dimension coordinates) data_vars = {} coord_vars = {} @@ -135,16 +157,7 @@ def separate_coords( else: data_vars[name] = var - # this is stolen from https://github.com/pydata/xarray/pull/8051 - # needed otherwise xarray errors whilst trying to turn the KerchunkArrays for the 1D coordinate variables into indexes - # but it doesn't appear to work with `main` since #8107, which is why the workaround above is needed - # EDIT: actually even the workaround doesn't work - to avoid creating indexes I had to checkout xarray v2023.08.0, the last one before #8107 was merged - set_indexes = False - if set_indexes: - coords = coord_vars - else: - # explict Coordinates object with no index passed - coords = xr.Coordinates(coord_vars, indexes=indexes) + coords = xr.Coordinates(coord_vars, indexes=indexes) return data_vars, coords From 3cc5cab8a6b1aedbc50cfdff96468d4187defe58 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 15:59:05 -0400 Subject: [PATCH 04/14] typo --- virtualizarr/tests/test_xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 45f12dc4..296a7d93 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -237,7 +237,7 @@ def netcdf4_file(tmpdir): return filepath -class TestOpenVirtualDataseIndexes: +class TestOpenVirtualDatasetIndexes: def test_no_indexes(self, netcdf4_file): vds = open_virtual_dataset(netcdf4_file, indexes={}) assert vds.indexes == {} From 8b929cbf229b41b310a117fc3bf92cc7fa4008ae Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 16:00:59 -0400 Subject: [PATCH 05/14] clarify docstring --- virtualizarr/xarray.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index d1253a7a..59e7326e 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -27,7 +27,9 @@ def open_virtual_dataset( """ Open a file or store as an xarray Dataset wrapping virtualized zarr arrays. - It's important that we avoid creating any IndexVariables, as our virtualized zarr array objects don't actually contain a collection that can be turned into a pandas.Index. + No data variables will be loaded. + + Xarray indexes can optionally be created (the default behaviour). To avoid creating any xarray indexes pass indexes={}. Parameters ---------- From cb5f2e108c1fec9ae70852e6a6dc7e5bde637490 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Mar 2024 20:01:15 +0000 Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index 59e7326e..d441aa99 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -28,7 +28,7 @@ def open_virtual_dataset( Open a file or store as an xarray Dataset wrapping virtualized zarr arrays. No data variables will be loaded. - + Xarray indexes can optionally be created (the default behaviour). To avoid creating any xarray indexes pass indexes={}. Parameters From cb7397ac26b92c1d4774cc04b425c8d08e3afa74 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 16:02:27 -0400 Subject: [PATCH 07/14] clarify docstring further --- virtualizarr/xarray.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index 59e7326e..d3b89665 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -28,7 +28,7 @@ def open_virtual_dataset( Open a file or store as an xarray Dataset wrapping virtualized zarr arrays. No data variables will be loaded. - + Xarray indexes can optionally be created (the default behaviour). To avoid creating any xarray indexes pass indexes={}. Parameters @@ -42,6 +42,7 @@ def open_virtual_dataset( drop_variables: list[str], default is None Variables in the file to drop before returning. indexes : Mapping[str, Index], default is None + Indexes to use on the returned xarray Dataset. Default is None, which will read any 1D coordinate data to create in-memory Pandas indexes. To avoid creating any indexes, pass indexes={}. virtual_array_class From 809c83ed0a9cfb632928e28100032f9d1e513dd3 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 16:04:29 -0400 Subject: [PATCH 08/14] add pooch to test dependencies --- ci/environment.yml | 1 + pyproject.toml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/environment.yml b/ci/environment.yml index 876c9a5c..25a189b4 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -14,5 +14,6 @@ dependencies: - kerchunk - ujson - pydantic + - pooch - pip: - git+https://github.com/TomNicholas/xarray.git@concat-no-indexes#egg=xarray diff --git a/pyproject.toml b/pyproject.toml index 159451cf..8c5f7111 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,8 @@ test = [ "pre-commit", "pytest-mypy", "pytest", - "scipy" + "scipy", + "pooch", ] From 11cc55d3be6c51efb93a32f6679b48cbd394ee02 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 16:17:44 -0400 Subject: [PATCH 09/14] remove | character --- virtualizarr/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index d3b89665..6ba8f7a3 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -21,7 +21,7 @@ def open_virtual_dataset( filepath: str, filetype: Optional[str] = None, drop_variables: Optional[List[str]] = None, - indexes: Mapping[str, Index] | None = None, + indexes: Optional[Mapping[str, Index]] = None, virtual_array_class=ManifestArray, ) -> xr.Dataset: """ From 2f654edf95b303112ea6cf60edb54c0e021dd046 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 16:26:32 -0400 Subject: [PATCH 10/14] remove rogue print statements --- virtualizarr/tests/test_xarray.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 296a7d93..6cb04647 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -245,8 +245,7 @@ def test_no_indexes(self, netcdf4_file): def test_create_default_indexes(self, netcdf4_file): vds = open_virtual_dataset(netcdf4_file, indexes=None) ds = xr.open_dataset(netcdf4_file) - print(vds.indexes) - print(ds.indexes) + # TODO use xr.testing.assert_identical(vds.indexes, ds.indexes) instead once class supported by assertion comparison, see https://github.com/pydata/xarray/issues/5812 assert index_mappings_equal(vds.xindexes, ds.xindexes) From 5ffd401c55e1fe3f4ddca10f803b491e73ecd271 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 22:40:20 -0400 Subject: [PATCH 11/14] test using combine_by_coords --- virtualizarr/tests/test_xarray.py | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 6cb04647..4951d36c 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -237,6 +237,24 @@ def netcdf4_file(tmpdir): return filepath +@pytest.fixture +def netcdf4_files(tmpdir): + # Set up example xarray dataset + ds = xr.tutorial.open_dataset("air_temperature") + + # split inrto equal chunks so we can concatenate them back together later + ds1 = ds.isel(time=slice(None, 1460)) + ds2 = ds.isel(time=slice(1460, None)) + + # Save it to disk as netCDF (in temporary directory) + filepath1 = f"{tmpdir}/air1.nc" + filepath2 = f"{tmpdir}/air2.nc" + ds1.to_netcdf(filepath1) + ds2.to_netcdf(filepath2) + + return filepath1, filepath2 + + class TestOpenVirtualDatasetIndexes: def test_no_indexes(self, netcdf4_file): vds = open_virtual_dataset(netcdf4_file, indexes={}) @@ -264,3 +282,17 @@ def index_mappings_equal(indexes1: Mapping[str, Index], indexes2: Mapping[str, I return False return True + + +class TestCombineUsingIndexes: + def test_combine_by_coords(self, netcdf4_files): + filepath1, filepath2 = netcdf4_files + + vds1 = open_virtual_dataset(filepath1) + vds2 = open_virtual_dataset(filepath2) + + combined_vds = xr.combine_by_coords( + [vds2, vds1], + ) + + assert combined_vds.xindexes["time"].to_pandas_index().is_monotonic_increasing From 6dc8d8d0e6f9d14f5863cab0ce537fb1dab92ad9 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 26 Mar 2024 22:40:58 -0400 Subject: [PATCH 12/14] document how to create virtual datasets with in-memory indexes --- docs/usage.md | 77 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 4fcd7793..a6f50ab0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -175,7 +175,7 @@ ds1 = ds.isel(time=slice(None, 1460)) ds2 = ds.isel(time=slice(1460, None)) ds1.to_netcdf('air1.nc') -ds1.to_netcdf('air2.nc') +ds2.to_netcdf('air2.nc') ``` Note that we have created these in such a way that each dataset has one equally-sized chunk. @@ -195,6 +195,16 @@ vds1 = open_virtual_dataset('air1.nc', indexes={}) vds2 = open_virtual_dataset('air2.nc', indexes={}) ``` +We can see that the datasets have no indexes. + +```python +vds1 +``` +``` +Indexes: + *empty* +``` + ```{note} Passing `indexes={}` will only work if you use a [specific branch of xarray](https://github.com/TomNicholas/xarray/tree/concat-no-indexes), as it requires multiple in-progress PR's, see [GH issue #14](https://github.com/TomNicholas/VirtualiZarr/issues/14#issuecomment-2018369470). ``` @@ -249,10 +259,11 @@ In future we would like for it to be possible to just use `xr.open_mfdataset` to vds = xr.open_mfdataset( ['air1.nc', 'air2.nc'], - combine='nested, + combine='nested', concat_dim=['time'], coords='minimal', compat='override', + indexes={}, ) but this requires some [upstream changes](https://github.com/TomNicholas/VirtualiZarr/issues/35) in xarray. @@ -260,9 +271,67 @@ but this requires some [upstream changes](https://github.com/TomNicholas/Virtual ### Automatic ordering using coordinate data -TODO: How to concatenate with order inferred from indexes automatically +Sometimes we don't have a priori knowledge of which files contain what content, and we would like to concatenate them in an order dictated by their coordinates (e.g. so that a `time` coordinate monotonically increases into the future). + +For this we will actually want to create xarray indexes, so that we can use the values in them to determine the correct concatenation order. This requires loading coordinate values into memory, the same way that `xarray.open_dataset` does by default. + +To open a virtual dataset but with in-memory indexes along 1D [dimension coordinates](), pass `indexes=None` to `open_virtual_dataset` (which is the default). + +```python +vds1 = open_virtual_dataset('air1.nc') +vds2 = open_virtual_dataset('air2.nc') +``` + +Now we can see that some indexes have been created by default. -TODO: Note on how this could be done using `open_mfdataset(..., combine='by_coords')` in future +```python +vds1.xindexes +``` +``` +Indexes: + lat PandasIndex + lon PandasIndex + time PandasIndex +``` + +To use these indexes to infer concatenation order we can use `xarray.combine_by_coords`. + +```python +combined_vds = xr.combine_by_coords([vds2, vds1]) +combined_vds +``` +``` + Size: 8MB +Dimensions: (time: 2920, lat: 25, lon: 53) +Coordinates: + * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0 + * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0 + * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00 +Data variables: + air (time, lat, lon) int16 8MB ManifestArray Date: Wed, 27 Mar 2024 10:40:25 -0400 Subject: [PATCH 13/14] correct .indexes example --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index a6f50ab0..4cd36b4e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -198,7 +198,7 @@ vds2 = open_virtual_dataset('air2.nc', indexes={}) We can see that the datasets have no indexes. ```python -vds1 +vds1.indexes ``` ``` Indexes: From 709eee15461ca4d10a10dbbdd863fe35ec2d6a8e Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 27 Mar 2024 14:14:30 -0400 Subject: [PATCH 14/14] note about work on writing to Zarr v3 --- docs/usage.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 4cd36b4e..2d8e9397 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -351,7 +351,7 @@ To write out all the references in the virtual dataset as a single kerchunk-comp combined_vds.virtualize.to_kerchunk('combined.json', format='json') ``` -These references can now be interpreted like they were a Zarr store by [fsspec](https://github.com/fsspec/filesystem_spec), using its built-in kerchunk xarray backend. +These references can now be interpreted like they were a Zarr store by [fsspec](https://github.com/fsspec/filesystem_spec), using kerchunk's built-in xarray backend (so you need kerchunk to be installed to use `engine='kerchunk'`). ```python import fsspec @@ -364,4 +364,6 @@ combined_ds = xr.open_dataset(mapper, engine="kerchunk") ### Writing as Zarr +TODO: Write out references as a Zarr v3 store following the [Chunk Manifest ZEP](https://github.com/zarr-developers/zarr-specs/issues/287), see [PR #45](https://github.com/TomNicholas/VirtualiZarr/pull/45) + TODO: Explanation of how this requires changes in zarr upstream to be able to read it