From 207c4b5cb411637070dc9a5f7011a0e0c98ef877 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 May 2024 21:34:26 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/readers/hdf.py | 16 ++++---------- virtualizarr/readers/hdf_filters.py | 22 ++++++++++++------- virtualizarr/tests/test_readers/conftest.py | 18 +++++++-------- virtualizarr/tests/test_readers/test_hdf.py | 5 +---- .../tests/test_readers/test_hdf_filters.py | 2 +- .../test_readers/test_hdf_integration.py | 6 ++--- virtualizarr/xarray.py | 5 ++--- 7 files changed, 33 insertions(+), 41 deletions(-) diff --git a/virtualizarr/readers/hdf.py b/virtualizarr/readers/hdf.py index 7d95d996..78e718e4 100644 --- a/virtualizarr/readers/hdf.py +++ b/virtualizarr/readers/hdf.py @@ -36,15 +36,11 @@ def _dataset_chunk_manifest(path: str, dataset: h5py.Dataset) -> ChunkManifest: key_list = [0] * (len(dataset.shape) or 1) key = ".".join(map(str, key_list)) chunk_entry = ChunkEntry( - path=path, - offset=dsid.get_offset(), - length=dsid.get_storage_size() + path=path, offset=dsid.get_offset(), length=dsid.get_storage_size() ) chunk_key = ChunkKey(key) chunk_entries = {chunk_key: chunk_entry} - chunk_manifest = ChunkManifest( - entries=chunk_entries - ) + chunk_manifest = ChunkManifest(entries=chunk_entries) return chunk_manifest else: num_chunks = dsid.get_num_chunks() @@ -60,9 +56,7 @@ def get_key(blob): def store_chunk_entry(blob): chunk_entries[get_key(blob)] = ChunkEntry( - path=path, - offset=blob.byte_offset, - length=blob.size + path=path, offset=blob.byte_offset, length=blob.size ) has_chunk_iter = callable(getattr(dsid, "chunk_iter", None)) @@ -72,9 +66,7 @@ def store_chunk_entry(blob): for index in range(num_chunks): store_chunk_entry(dsid.get_chunk_info(index)) - chunk_manifest = ChunkManifest( - entries=chunk_entries - ) + chunk_manifest = ChunkManifest(entries=chunk_entries) return chunk_manifest diff --git a/virtualizarr/readers/hdf_filters.py b/virtualizarr/readers/hdf_filters.py index 75f06bdc..77e7037e 100644 --- a/virtualizarr/readers/hdf_filters.py +++ b/virtualizarr/readers/hdf_filters.py @@ -6,9 +6,7 @@ from numcodecs.abc import Codec from pydantic import BaseModel, validator -_non_standard_filters = { - "gzip": "zlib" -} +_non_standard_filters = {"gzip": "zlib"} class BloscProperties(BaseModel): @@ -20,12 +18,15 @@ class BloscProperties(BaseModel): @validator("cname", pre=True) def get_cname_from_code(cls, v): blosc_compressor_codes = { - value: key for key, value in hdf5plugin._filters.Blosc._Blosc__COMPRESSIONS.items() + value: key + for key, value in hdf5plugin._filters.Blosc._Blosc__COMPRESSIONS.items() } return blosc_compressor_codes[v] -def _filter_to_codec(filter_id: str, filter_properties: Union[int, Tuple] = None) -> Codec: +def _filter_to_codec( + filter_id: str, filter_properties: Union[int, Tuple] = None +) -> Codec: try: id = int(filter_id) except ValueError: @@ -41,9 +42,14 @@ def _filter_to_codec(filter_id: str, filter_properties: Union[int, Tuple] = None filter = hdf5plugin.get_filters(id)[0] id = filter.filter_name if id == "blosc": - blosc_props = BloscProperties(**{k: v for k, v in - zip(BloscProperties.__fields__.keys(), - filter_properties[-4:])}) + blosc_props = BloscProperties( + **{ + k: v + for k, v in zip( + BloscProperties.__fields__.keys(), filter_properties[-4:] + ) + } + ) conf = blosc_props.model_dump() conf["id"] = id diff --git a/virtualizarr/tests/test_readers/conftest.py b/virtualizarr/tests/test_readers/conftest.py index aa66f933..53c9630e 100644 --- a/virtualizarr/tests/test_readers/conftest.py +++ b/virtualizarr/tests/test_readers/conftest.py @@ -138,12 +138,15 @@ def filter_encoded_netcdf4_file(tmpdir, np_uncompressed, request): filepath = f"{tmpdir}/{request.param}.nc" f = h5py.File(filepath, "w") if request.param == "gzip": - f.create_dataset(name="data", data=np_uncompressed, compression="gzip", compression_opts=1) + f.create_dataset( + name="data", data=np_uncompressed, compression="gzip", compression_opts=1 + ) if request.param == "blosc": - f.create_dataset(name="data", data=np_uncompressed, - **hdf5plugin.Blosc( - cname="lz4", clevel=9, shuffle=hdf5plugin.Blosc.SHUFFLE - )) + f.create_dataset( + name="data", + data=np_uncompressed, + **hdf5plugin.Blosc(cname="lz4", clevel=9, shuffle=hdf5plugin.Blosc.SHUFFLE), + ) return filepath @@ -152,10 +155,7 @@ def filter_encoded_xarray_netcdf4_files(tmpdir, request): ds = xr.tutorial.open_dataset("air_temperature") encoding = {} if request.param == "gzip": - encoding_config = { - "zlib": True, - "complevel": 1 - } + encoding_config = {"zlib": True, "complevel": 1} for var_name in ds.variables: encoding[var_name] = encoding_config diff --git a/virtualizarr/tests/test_readers/test_hdf.py b/virtualizarr/tests/test_readers/test_hdf.py index 0d5a16db..a83bfc39 100644 --- a/virtualizarr/tests/test_readers/test_hdf.py +++ b/virtualizarr/tests/test_readers/test_hdf.py @@ -105,8 +105,5 @@ def test_groups_not_implemented(self, group_netcdf4_file): virtual_vars_from_hdf(group_netcdf4_file) def test_drop_variables(self, multiple_datasets_netcdf4_file): - variables = virtual_vars_from_hdf( - multiple_datasets_netcdf4_file, - ["data2"] - ) + variables = virtual_vars_from_hdf(multiple_datasets_netcdf4_file, ["data2"]) assert "data2" not in variables.keys() diff --git a/virtualizarr/tests/test_readers/test_hdf_filters.py b/virtualizarr/tests/test_readers/test_hdf_filters.py index 8094d4cf..28b5d69f 100644 --- a/virtualizarr/tests/test_readers/test_hdf_filters.py +++ b/virtualizarr/tests/test_readers/test_hdf_filters.py @@ -36,7 +36,7 @@ def test_numcodec_decoding(self, np_uncompressed, filter_encoded_netcdf4_file): ds = f["data"] chunk_info = ds.id.get_chunk_info(0) codecs = codecs_from_dataset(ds) - with open(filter_encoded_netcdf4_file, 'rb') as file: + with open(filter_encoded_netcdf4_file, "rb") as file: file.seek(chunk_info.byte_offset) bytes_read = file.read(chunk_info.size) decoded = codecs[0].decode(bytes_read) diff --git a/virtualizarr/tests/test_readers/test_hdf_integration.py b/virtualizarr/tests/test_readers/test_hdf_integration.py index 94fc0c1c..b31289c0 100644 --- a/virtualizarr/tests/test_readers/test_hdf_integration.py +++ b/virtualizarr/tests/test_readers/test_hdf_integration.py @@ -7,11 +7,9 @@ class TestIntegration: - def test_filters_end_to_end(self, tmpdir, - filter_encoded_xarray_netcdf4_files): + def test_filters_end_to_end(self, tmpdir, filter_encoded_xarray_netcdf4_files): virtual_ds = virtualizarr.open_virtual_dataset( - filter_encoded_xarray_netcdf4_files, - filetype=FileType("netcdf4") + filter_encoded_xarray_netcdf4_files, filetype=FileType("netcdf4") ) kerchunk_file = f"{tmpdir}/kerchunk.json" virtual_ds.virtualize.to_kerchunk(kerchunk_file, format="json") diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index 72645654..d8b6a080 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -20,8 +20,8 @@ _automatically_determine_filetype, ) from virtualizarr.manifests import ChunkManifest, ManifestArray -from virtualizarr.utils import _fsspec_openfile_from_filepath from virtualizarr.readers.hdf import attrs_from_root_group, virtual_vars_from_hdf +from virtualizarr.utils import _fsspec_openfile_from_filepath from virtualizarr.zarr import ( attrs_from_zarr_group_json, dataset_to_zarr, @@ -109,8 +109,7 @@ def open_virtual_dataset( if filetype.name.lower() == "netcdf4": print("wat") virtual_vars = virtual_vars_from_hdf( - path=filepath, - drop_variables=drop_variables + path=filepath, drop_variables=drop_variables ) ds_attrs = attrs_from_root_group(path=filepath) if filetype == "zarr_v3":