Skip to content

Commit

Permalink
Merge branch 'v3' into generalize-stateful-store
Browse files Browse the repository at this point in the history
* v3:
  chore: update pre-commit hooks (zarr-developers#2222)
  fix: validate v3 dtypes when loading/creating v3 metadata (zarr-developers#2209)
  fix typo in store integration test (zarr-developers#2223)
  Basic Zarr-python 2.x compatibility changes (zarr-developers#2098)
  Make Group.arrays, groups compatible with v2 (zarr-developers#2213)
  Typing fixes to test_indexing (zarr-developers#2193)
  Default to RemoteStore for fsspec URIs (zarr-developers#2198)
  Make MemoryStore serialiazable (zarr-developers#2204)
  [v3] Implement Group methods for empty, full, ones, and zeros (zarr-developers#2210)
  implement `store.list_prefix` and `store._set_many` (zarr-developers#2064)
  Fixed codec for v2 data with no fill value (zarr-developers#2207)
  • Loading branch information
dcherian committed Sep 24, 2024
2 parents 7e8c1c2 + cd7321b commit ad3df88
Show file tree
Hide file tree
Showing 34 changed files with 1,321 additions and 520 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ default_language_version:
python: python3
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.5
rev: v0.6.7
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ test = [
"flask",
"requests",
"mypy",
"hypothesis"
"hypothesis",
"universal-pathlib",
]

jupyter = [
Expand Down Expand Up @@ -273,6 +274,7 @@ filterwarnings = [
"ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning",
"ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning",
"ignore:Creating a zarr.buffer.gpu.*:UserWarning",
"ignore:Duplicate name:UserWarning", # from ZipFile
]
markers = [
"gpu: mark a test as requiring CuPy and GPU"
Expand Down
68 changes: 68 additions & 0 deletions src/zarr/_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import warnings
from collections.abc import Callable
from functools import wraps
from inspect import Parameter, signature
from typing import Any, TypeVar

T = TypeVar("T")

# Based off https://github.com/scikit-learn/scikit-learn/blob/e87b32a81c70abed8f2e97483758eb64df8255e9/sklearn/utils/validation.py#L63


def _deprecate_positional_args(
func: Callable[..., T] | None = None, *, version: str = "3.1.0"
) -> Callable[..., T]:
"""Decorator for methods that issues warnings for positional arguments.
Using the keyword-only argument syntax in pep 3102, arguments after the
* will issue a warning when passed as a positional argument.
Parameters
----------
func : callable, default=None
Function to check arguments on.
version : callable, default="3.1.0"
The version when positional arguments will result in error.
"""

def _inner_deprecate_positional_args(f: Callable[..., T]) -> Callable[..., T]:
sig = signature(f)
kwonly_args = []
all_args = []

for name, param in sig.parameters.items():
if param.kind == Parameter.POSITIONAL_OR_KEYWORD:
all_args.append(name)
elif param.kind == Parameter.KEYWORD_ONLY:
kwonly_args.append(name)

@wraps(f)
def inner_f(*args: Any, **kwargs: Any) -> T:
extra_args = len(args) - len(all_args)
if extra_args <= 0:
return f(*args, **kwargs)

# extra_args > 0
args_msg = [
f"{name}={arg}"
for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:], strict=False)
]
formatted_args_msg = ", ".join(args_msg)
warnings.warn(
(
f"Pass {formatted_args_msg} as keyword args. From version "
f"{version} passing these as positional arguments "
"will result in an error"
),
FutureWarning,
stacklevel=2,
)
kwargs.update(zip(sig.parameters, args, strict=False))
return f(**kwargs)

return inner_f

if func is not None:
return _inner_deprecate_positional_args(func)

return _inner_deprecate_positional_args # type: ignore[return-value]
14 changes: 12 additions & 2 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC, abstractmethod
from collections.abc import AsyncGenerator
from asyncio import gather
from collections.abc import AsyncGenerator, Iterable
from typing import Any, NamedTuple, Protocol, runtime_checkable

from typing_extensions import Self
Expand Down Expand Up @@ -158,6 +159,13 @@ async def set(self, key: str, value: Buffer) -> None:
"""
...

async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None:
"""
Insert multiple (key, value) pairs into storage.
"""
await gather(*(self.set(key, value) for key, value in values))
return None

@property
@abstractmethod
def supports_deletes(self) -> bool:
Expand Down Expand Up @@ -211,7 +219,9 @@ def list(self) -> AsyncGenerator[str, None]:

@abstractmethod
def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
"""Retrieve all keys in the store with a given prefix.
"""
Retrieve all keys in the store that begin with a given prefix. Keys are returned with the
common leading prefix removed.
Parameters
----------
Expand Down
71 changes: 54 additions & 17 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ async def open(
zarr_version: ZarrFormat | None = None, # deprecated
zarr_format: ZarrFormat | None = None,
path: str | None = None,
storage_options: dict[str, Any] | None = None,
**kwargs: Any, # TODO: type kwargs as valid args to open_array
) -> AsyncArray | AsyncGroup:
"""Convenience function to open a group or array using file-mode-like semantics.
Expand All @@ -211,6 +212,9 @@ async def open(
The zarr format to use when saving.
path : str or None, optional
The path within the store to open.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
**kwargs
Additional parameters are passed through to :func:`zarr.creation.open_array` or
:func:`zarr.hierarchy.open_group`.
Expand All @@ -221,7 +225,7 @@ async def open(
Return type depends on what exists in the given store.
"""
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
store_path = await make_store_path(store, mode=mode)
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)

if path is not None:
store_path = store_path / path
Expand Down Expand Up @@ -276,6 +280,7 @@ async def save_array(
zarr_version: ZarrFormat | None = None, # deprecated
zarr_format: ZarrFormat | None = None,
path: str | None = None,
storage_options: dict[str, Any] | None = None,
**kwargs: Any, # TODO: type kwargs as valid args to create
) -> None:
"""Convenience function to save a NumPy array to the local file system, following a
Expand All @@ -291,6 +296,9 @@ async def save_array(
The zarr format to use when saving.
path : str or None, optional
The path within the store where the array will be saved.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
kwargs
Passed through to :func:`create`, e.g., compressor.
"""
Expand All @@ -299,7 +307,7 @@ async def save_array(
or _default_zarr_version()
)

store_path = await make_store_path(store, mode="w")
store_path = await make_store_path(store, mode="w", storage_options=storage_options)
if path is not None:
store_path = store_path / path
new = await AsyncArray.create(
Expand All @@ -319,6 +327,7 @@ async def save_group(
zarr_version: ZarrFormat | None = None, # deprecated
zarr_format: ZarrFormat | None = None,
path: str | None = None,
storage_options: dict[str, Any] | None = None,
**kwargs: NDArrayLike,
) -> None:
"""Convenience function to save several NumPy arrays to the local file system, following a
Expand All @@ -334,22 +343,40 @@ async def save_group(
The zarr format to use when saving.
path : str or None, optional
Path within the store where the group will be saved.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
kwargs
NumPy arrays with data to save.
"""
zarr_format = (
_handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
_handle_zarr_version_or_format(
zarr_version=zarr_version,
zarr_format=zarr_format,
)
or _default_zarr_version()
)

if len(args) == 0 and len(kwargs) == 0:
raise ValueError("at least one array must be provided")
aws = []
for i, arr in enumerate(args):
aws.append(save_array(store, arr, zarr_format=zarr_format, path=f"{path}/arr_{i}"))
aws.append(
save_array(
store,
arr,
zarr_format=zarr_format,
path=f"{path}/arr_{i}",
storage_options=storage_options,
)
)
for k, arr in kwargs.items():
_path = f"{path}/{k}" if path is not None else k
aws.append(save_array(store, arr, zarr_format=zarr_format, path=_path))
aws.append(
save_array(
store, arr, zarr_format=zarr_format, path=_path, storage_options=storage_options
)
)
await asyncio.gather(*aws)


Expand Down Expand Up @@ -418,6 +445,7 @@ async def group(
zarr_format: ZarrFormat | None = None,
meta_array: Any | None = None, # not used
attributes: dict[str, JSON] | None = None,
storage_options: dict[str, Any] | None = None,
) -> AsyncGroup:
"""Create a group.
Expand All @@ -444,6 +472,9 @@ async def group(
to users. Use `numpy.empty(())` by default.
zarr_format : {2, 3, None}, optional
The zarr format to use when saving.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
Returns
-------
Expand All @@ -453,7 +484,7 @@ async def group(

zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

store_path = await make_store_path(store)
store_path = await make_store_path(store, storage_options=storage_options)
if path is not None:
store_path = store_path / path

Expand All @@ -472,7 +503,7 @@ async def group(
try:
return await AsyncGroup.open(store=store_path, zarr_format=zarr_format)
except (KeyError, FileNotFoundError):
return await AsyncGroup.create(
return await AsyncGroup.from_store(
store=store_path,
zarr_format=zarr_format or _default_zarr_version(),
exists_ok=overwrite,
Expand All @@ -481,14 +512,14 @@ async def group(


async def open_group(
*, # Note: this is a change from v2
store: StoreLike | None = None,
*, # Note: this is a change from v2
mode: AccessModeLiteral | None = None,
cache_attrs: bool | None = None, # not used, default changed
synchronizer: Any = None, # not used
path: str | None = None,
chunk_store: StoreLike | None = None, # not used
storage_options: dict[str, Any] | None = None, # not used
storage_options: dict[str, Any] | None = None,
zarr_version: ZarrFormat | None = None, # deprecated
zarr_format: ZarrFormat | None = None,
meta_array: Any | None = None, # not used
Expand Down Expand Up @@ -548,10 +579,8 @@ async def open_group(
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)
if chunk_store is not None:
warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2)
if storage_options is not None:
warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2)

store_path = await make_store_path(store, mode=mode)
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
if path is not None:
store_path = store_path / path

Expand All @@ -561,7 +590,7 @@ async def open_group(
try:
return await AsyncGroup.open(store_path, zarr_format=zarr_format)
except (KeyError, FileNotFoundError):
return await AsyncGroup.create(
return await AsyncGroup.from_store(
store_path,
zarr_format=zarr_format or _default_zarr_version(),
exists_ok=True,
Expand All @@ -575,7 +604,7 @@ async def create(
chunks: ChunkCoords | None = None, # TODO: v2 allowed chunks=True
dtype: npt.DTypeLike | None = None,
compressor: dict[str, JSON] | None = None, # TODO: default and type change
fill_value: Any = 0, # TODO: need type
fill_value: Any | None = 0, # TODO: need type
order: MemoryOrder | None = None, # TODO: default change
store: str | StoreLike | None = None,
synchronizer: Any | None = None,
Expand Down Expand Up @@ -603,6 +632,7 @@ async def create(
) = None,
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
**kwargs: Any,
) -> AsyncArray:
"""Create an array.
Expand Down Expand Up @@ -674,6 +704,9 @@ async def create(
to users. Use `numpy.empty(())` by default.
.. versionadded:: 2.13
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
Returns
-------
Expand Down Expand Up @@ -725,7 +758,7 @@ async def create(
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)

mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w"))
store_path = await make_store_path(store, mode=mode)
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
if path is not None:
store_path = store_path / path

Expand Down Expand Up @@ -827,7 +860,7 @@ async def full_like(a: ArrayLike, **kwargs: Any) -> AsyncArray:
"""
like_kwargs = _like_args(a, kwargs)
if isinstance(a, AsyncArray):
kwargs.setdefault("fill_value", a.metadata.fill_value)
like_kwargs.setdefault("fill_value", a.metadata.fill_value)
return await full(**like_kwargs)


Expand Down Expand Up @@ -875,6 +908,7 @@ async def open_array(
zarr_version: ZarrFormat | None = None, # deprecated
zarr_format: ZarrFormat | None = None,
path: PathLike | None = None,
storage_options: dict[str, Any] | None = None,
**kwargs: Any, # TODO: type kwargs as valid args to save
) -> AsyncArray:
"""Open an array using file-mode-like semantics.
Expand All @@ -887,6 +921,9 @@ async def open_array(
The zarr format to use when saving.
path : string, optional
Path in store to array.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
**kwargs
Any keyword arguments to pass to the array constructor.
Expand All @@ -896,7 +933,7 @@ async def open_array(
The opened array.
"""

store_path = await make_store_path(store)
store_path = await make_store_path(store, storage_options=storage_options)
if path is not None:
store_path = store_path / path

Expand Down
Loading

0 comments on commit ad3df88

Please sign in to comment.