Skip to content

Commit

Permalink
Compatibility for zarr-python 3.x (#9552)
Browse files Browse the repository at this point in the history
* Remove zarr pin

* Define zarr_v3 helper

* zarr-v3: filters / compressors -> codecs

* zarr-v3: update tests to avoid values equal to fillValue

* Various test fixes

* zarr_version fixes

* removed open_consolidated workarounds
* removed _store_version check
* pass through zarr_version

* fixup! zarr-v3: filters / compressors -> codecs

* fixup! fixup! zarr-v3: filters / compressors -> codecs

* fixup

* path / key normalization in set_variables

* fixes

* workaround nested consolidated metadata

* test: avoid fill_value

* test: Adjust call counts

* zarr-python 3.x Array.resize doesn't mutate

* test compatibility

- skip write_empty_chunks on 3.x
- update patch targets

* skip ZipStore with_mode

* test: more fill_value avoidance

* test: more fill_value avoidance

* v3 compat for instrumented test

* Handle zarr_version / zarr_format deprecation

* wip

* most Zarr tests passing

* unskip tests

* add custom Zarr _FillValue encoding / decoding

* relax dtype comparison in test_roundtrip_empty_vlen_string_array

* fix test_explicitly_omit_fill_value_via_encoding_kwarg

* fix test_append_string_length_mismatch_raises

* fix test_check_encoding_is_consistent_after_append for v3

* skip roundtrip_endian for zarr v3

* unskip datetimes and fix test_compressor_encoding

* unskip tests

* add back dtype skip

* point upstream to v3 branch

* Create temporary directory before using it

* Avoid zarr.storage.zip on v2

* fixed close_store_on_close bug

* Remove workaround, fixed upstream

* Restore original `w` mode.

* workaround for store closing with mode=w

* typing fixes

* compat

* Remove unnecessary pop

* fixed skip

* fixup types

* fixup types

* [test-upstream]

* Update install-upstream-wheels.sh

* set use_consolidated to false when user provides consolidated=False

* fix: import consolidated_metadata from package root

* fix: relax instrumented store checks for v3

* Adjust 2.18.3 thresholds

* skip datatree zarr tests w/ zarr 3 for now

* fixed kvstore usage

* typing fixes

* move zarr.codecs import

* fixup ignores

* storage options fix, skip

* fixed types

* Update ci/install-upstream-wheels.sh

* type fixes

* whats-new

* Update xarray/tests/test_backends_datatree.py

* fix type import

* set mapper, chunk_mapper

* Pass through zarr_format

* Fixup

* more cleanup

* revert test changes

* Update xarray/backends/zarr.py

* cleanup

* update docstring

* fix rtd

* tweak

---------

Co-authored-by: Ryan Abernathey <[email protected]>
Co-authored-by: Joe Hamman <[email protected]>
Co-authored-by: Deepak Cherian <[email protected]>
Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
5 people authored Oct 23, 2024
1 parent 4798707 commit b133fdc
Show file tree
Hide file tree
Showing 17 changed files with 674 additions and 245 deletions.
6 changes: 3 additions & 3 deletions ci/install-upstream-wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ python -m pip install \
--pre \
--upgrade \
pyarrow
# manually install `pint` to pull in new dependencies
python -m pip install --upgrade pint
# manually install `pint`, `donfig`, and `crc32c` to pull in new dependencies
python -m pip install --upgrade pint donfig crc32c
python -m pip install \
--no-deps \
--upgrade \
git+https://github.com/dask/dask \
git+https://github.com/dask/dask-expr \
git+https://github.com/dask/distributed \
git+https://github.com/zarr-developers/zarr.git@main \
git+https://github.com/zarr-developers/zarr \
git+https://github.com/Unidata/cftime \
git+https://github.com/pypa/packaging \
git+https://github.com/hgrecco/pint \
Expand Down
3 changes: 2 additions & 1 deletion doc/user-guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -823,8 +823,9 @@ For example:
.. ipython:: python
import zarr
from numcodecs.blosc import Blosc
compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2)
compressor = Blosc(cname="zstd", clevel=3, shuffle=2)
ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}})
.. note::
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ New Features
By `Holly Mandel <https://github.com/hollymandel>`_.
- Implement handling of complex numbers (netcdf4/h5netcdf) and enums (h5netcdf) (:issue:`9246`, :issue:`3297`, :pull:`9509`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
- Support for Zarr-Python 3 (:issue:`95515`, :pull:`9552`).
By `Tom Augspurger <https://github.com/TomAugspurger>`_,
`Ryan Abernathey <https://github.com/rabernat>`_ and
`Joe Hamman <https://github.com/jhamman>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ dev = [
"sphinx_autosummary_accessors",
"xarray[complete]",
]
io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr<3", "fsspec", "cftime", "pooch"]
io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"]
etc = ["sparse"]
parallel = ["dask[complete]"]
viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"]
Expand Down Expand Up @@ -124,6 +124,7 @@ module = [
"nc_time_axis.*",
"netCDF4.*",
"netcdftime.*",
"numcodecs.*",
"opt_einsum.*",
"pint.*",
"pooch.*",
Expand Down
30 changes: 17 additions & 13 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
_normalize_path,
)
from xarray.backends.locks import _get_scheduler
from xarray.backends.zarr import _zarr_v3
from xarray.core import indexing
from xarray.core.combine import (
_infer_concat_order_from_positions,
Expand Down Expand Up @@ -1685,6 +1686,7 @@ def to_zarr(
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
zarr_format: int | None = None,
write_empty_chunks: bool | None = None,
chunkmanager_store_kwargs: dict[str, Any] | None = None,
) -> backends.ZarrStore | Delayed:
Expand All @@ -1703,21 +1705,28 @@ def to_zarr(
store = _normalize_path(store)
chunk_store = _normalize_path(chunk_store)

kwargs = {}
if storage_options is None:
mapper = store
chunk_mapper = chunk_store
else:
from fsspec import get_mapper

if not isinstance(store, str):
raise ValueError(
f"store must be a string to use storage_options. Got {type(store)}"
)
mapper = get_mapper(store, **storage_options)
if chunk_store is not None:
chunk_mapper = get_mapper(chunk_store, **storage_options)
else:

if _zarr_v3():
kwargs["storage_options"] = storage_options
mapper = store
chunk_mapper = chunk_store
else:
from fsspec import get_mapper

mapper = get_mapper(store, **storage_options)
if chunk_store is not None:
chunk_mapper = get_mapper(chunk_store, **storage_options)
else:
chunk_mapper = chunk_store

if encoding is None:
encoding = {}
Expand Down Expand Up @@ -1747,13 +1756,6 @@ def to_zarr(
# validate Dataset keys, DataArray names
_validate_dataset_names(dataset)

if zarr_version is None:
# default to 2 if store doesn't specify its version (e.g. a path)
zarr_version = int(getattr(store, "_store_version", 2))

if consolidated is None and zarr_version > 2:
consolidated = False

if mode == "r+":
already_consolidated = consolidated
consolidate_on_close = False
Expand All @@ -1773,7 +1775,9 @@ def to_zarr(
safe_chunks=safe_chunks,
stacklevel=4, # for Dataset.to_zarr()
zarr_version=zarr_version,
zarr_format=zarr_format,
write_empty=write_empty_chunks,
**kwargs,
)

if region is not None:
Expand Down
Loading

0 comments on commit b133fdc

Please sign in to comment.