Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: opening a group with unspecified format finds either v2 or v3 #2183

Merged
merged 1 commit into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from zarr.core.array import Array, AsyncArray
from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat
from zarr.core.config import config
from zarr.core.group import AsyncGroup
from zarr.core.metadata.v2 import ArrayV2Metadata
from zarr.core.metadata.v3 import ArrayV3Metadata
Expand Down Expand Up @@ -126,8 +127,7 @@ def _handle_zarr_version_or_format(

def _default_zarr_version() -> ZarrFormat:
"""return the default zarr_version"""
# TODO: set default value from config
return 3
return cast(ZarrFormat, int(config.get("default_zarr_version", 3)))


async def consolidate_metadata(*args: Any, **kwargs: Any) -> AsyncGroup:
Expand Down Expand Up @@ -337,7 +337,10 @@ async def save_group(
kwargs
NumPy arrays with data to save.
"""
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
zarr_format = (
_handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor, but maybe handle_zarr_version_or_format could do the fallback to _default_zarr_version(). Then we can remove all the ors.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see what you are saying but we don't always do the fallback -- or in some cases, where we try to open without the fallback before creating with it.

or _default_zarr_version()
)

if len(args) == 0 and len(kwargs) == 0:
raise ValueError("at least one array must be provided")
Expand Down Expand Up @@ -448,10 +451,7 @@ async def group(
The new group.
"""

zarr_format = (
_handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
or _default_zarr_version()
)
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

store_path = await make_store_path(store)
if path is not None:
Expand All @@ -474,7 +474,7 @@ async def group(
except (KeyError, FileNotFoundError):
return await AsyncGroup.create(
store=store_path,
zarr_format=zarr_format,
zarr_format=zarr_format or _default_zarr_version(),
exists_ok=overwrite,
attributes=attributes,
)
Expand All @@ -483,7 +483,7 @@ async def group(
async def open_group(
*, # Note: this is a change from v2
store: StoreLike | None = None,
mode: AccessModeLiteral | None = None, # not used
mode: AccessModeLiteral | None = None,
cache_attrs: bool | None = None, # not used, default changed
synchronizer: Any = None, # not used
path: str | None = None,
Expand Down Expand Up @@ -538,10 +538,7 @@ async def open_group(
The new group.
"""

zarr_format = (
_handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
or _default_zarr_version()
)
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

if cache_attrs is not None:
warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2)
Expand All @@ -565,7 +562,10 @@ async def open_group(
return await AsyncGroup.open(store_path, zarr_format=zarr_format)
except (KeyError, FileNotFoundError):
return await AsyncGroup.create(
store_path, zarr_format=zarr_format, exists_ok=True, attributes=attributes
store_path,
zarr_format=zarr_format or _default_zarr_version(),
exists_ok=True,
attributes=attributes,
)


Expand Down Expand Up @@ -687,7 +687,7 @@ async def create(

if zarr_format == 2 and chunks is None:
chunks = shape
if zarr_format == 3 and chunk_shape is None:
elif zarr_format == 3 and chunk_shape is None:
if chunks is not None:
chunk_shape = chunks
chunks = None
Expand Down Expand Up @@ -908,7 +908,7 @@ async def open_array(
if store_path.store.mode.create:
return await create(
store=store_path,
zarr_format=zarr_format,
zarr_format=zarr_format or _default_zarr_version(),
overwrite=store_path.store.mode.overwrite,
**kwargs,
)
Expand Down
21 changes: 10 additions & 11 deletions src/zarr/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,20 @@ def reset(self) -> None:
self.refresh()


"""
The config module is responsible for managing the configuration of zarr and is based on the Donfig python library.
For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations
in the registry and then select them in the config.
e.g. an implementation of the bytes codec in a class "NewBytesCodec", requires the value of codecs.bytes.name to be
"NewBytesCodec".
Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations
e.g. export ZARR_CODECS__BYTES__NAME="NewBytesCodec"
(for more information see github.com/pytroll/donfig)
Default values below point to the standard implementations of zarr-python
"""
# The config module is responsible for managing the configuration of zarr and is based on the Donfig python library.
# For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations
# in the registry and then select them in the config.
# e.g. an implementation of the bytes codec in a class "NewBytesCodec", requires the value of codecs.bytes.name to be
# "NewBytesCodec".
# Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations
# e.g. export ZARR_CODECS__BYTES__NAME="NewBytesCodec"
# (for more information see github.com/pytroll/donfig)
# Default values below point to the standard implementations of zarr-python
config = Config(
"zarr",
defaults=[
{
"default_zarr_version": 3,
"array": {"order": "C"},
"async": {"concurrency": None, "timeout": None},
"json_indent": 2,
Expand Down
21 changes: 21 additions & 0 deletions tests/v3/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from zarr import Array, Group
from zarr.abc.store import Store
from zarr.api.synchronous import create, load, open, open_group, save, save_array, save_group
from zarr.core.common import ZarrFormat
from zarr.store.memory import MemoryStore


Expand Down Expand Up @@ -81,6 +82,26 @@ async def test_open_group(memory_store: MemoryStore) -> None:
# assert g.read_only


@pytest.mark.parametrize("zarr_format", [None, 2, 3])
async def test_open_group_unspecified_version(
tmpdir: pathlib.Path, zarr_format: ZarrFormat
) -> None:
"""regression test for https://github.com/zarr-developers/zarr-python/issues/2175"""

# create a group with specified zarr format (could be 2, 3, or None)
_ = await zarr.api.asynchronous.open_group(
store=str(tmpdir), mode="w", zarr_format=zarr_format, attributes={"foo": "bar"}
)

# now open that group without specifying the format
g2 = await zarr.api.asynchronous.open_group(store=str(tmpdir), mode="r")

assert g2.attrs == {"foo": "bar"}

if zarr_format is not None:
assert g2.metadata.zarr_format == zarr_format


def test_save_errors() -> None:
with pytest.raises(ValueError):
# no arrays provided
Expand Down
1 change: 1 addition & 0 deletions tests/v3/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def test_config_defaults_set() -> None:
# regression test for available defaults
assert config.defaults == [
{
"default_zarr_version": 3,
"array": {"order": "C"},
"async": {"concurrency": None, "timeout": None},
"json_indent": 2,
Expand Down