Skip to content

Commit

Permalink
Fix compatibility for read_nested_dict_from_hdf() (#61)
Browse files Browse the repository at this point in the history
* Fix compatibility for read_nested_dict_from_hdf()

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
jan-janssen and pre-commit-ci[bot] authored Aug 17, 2024
1 parent 44bd78a commit 2495813
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 38 deletions.
110 changes: 72 additions & 38 deletions h5io_browser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"csr_matrix",
"csc_array",
"csr_array",
"dict",
"multiarray",
)

Expand Down Expand Up @@ -81,19 +82,9 @@ def read_dict_from_hdf(file_name, h5_path, recursive=False, slash="ignore"):
any type supported by ``write_hdf5``.
"""
with h5py.File(file_name, "r") as hdf:
if recursive:
nodes_lst = _get_hdf_content(
hdf=hdf[h5_path], recursive=recursive, only_nodes=True
)
else:
nodes_lst = [h5_path]
if len(nodes_lst) > 0 and nodes_lst[0] != "/":
return {
n: _read_hdf(hdf_filehandle=hdf, h5_path=n, slash=slash)
for n in nodes_lst
}
else:
return {}
return _read_dict_from_open_hdf(
hdf_filehandle=hdf, h5_path=h5_path, recursive=recursive, slash=slash
)


def read_nested_dict_from_hdf(
Expand All @@ -119,32 +110,44 @@ def read_nested_dict_from_hdf(
if h5_path[0] != "/":
h5_path = "/" + h5_path
with h5py.File(file_name, "r") as hdf:
nodes_lst = _get_hdf_content(
hdf=hdf[h5_path], recursive=recursive, only_nodes=True
)
if not recursive and len(nodes_lst) == 0 and h5_path != "/":
nodes_lst += [h5_path]
if len(group_paths) > 0:
for group in group_paths:
nodes_lst += _get_hdf_content(
hdf=hdf[posixpath.join(h5_path, group)],
recursive=recursive,
only_nodes=True,
)
if len(nodes_lst) > 0:
return_dict = {}
for n in nodes_lst:
return_dict = _merge_nested_dict(
main_dict=return_dict,
add_dict=_get_nested_dict_item(
key=n,
value=_read_hdf(hdf_filehandle=hdf, h5_path=n, slash=slash),
h5_path=h5_path,
),
)
return return_dict
group_attrs_dict = hdf[h5_path].attrs
if (
"TITLE" in group_attrs_dict.keys()
and group_attrs_dict["TITLE"] in H5IO_GROUP_TYPES
):
return _read_dict_from_open_hdf(
hdf_filehandle=hdf,
h5_path=h5_path[1:],
recursive=recursive,
slash=slash,
)
else:
return {}
nodes_lst = _get_hdf_content(
hdf=hdf[h5_path], recursive=recursive, only_nodes=True
)
if not recursive and len(nodes_lst) == 0 and h5_path != "/":
nodes_lst += [h5_path]
if len(group_paths) > 0:
for group in group_paths:
nodes_lst += _get_hdf_content(
hdf=hdf[posixpath.join(h5_path, group)],
recursive=recursive,
only_nodes=True,
)
if len(nodes_lst) > 0:
return_dict = {}
for n in nodes_lst:
return_dict = _merge_nested_dict(
main_dict=return_dict,
add_dict=_get_nested_dict_item(
key=n,
value=_read_hdf(hdf_filehandle=hdf, h5_path=n, slash=slash),
h5_path=h5_path,
),
)
return return_dict
else:
return {}


def write_dict_to_hdf(file_name, data_dict, compression=4, slash="error"):
Expand Down Expand Up @@ -293,6 +296,37 @@ def _read_hdf(hdf_filehandle, h5_path, slash="ignore"):
)


def _read_dict_from_open_hdf(hdf_filehandle, h5_path, recursive=False, slash="ignore"):
"""
Read data from an open HDF5 file into a dictionary - by default only the nodes are converted to dictionaries,
additional sub groups can be converted using the recursive parameter.
Args:
hdf_filehandle (h5py.File): Open HDF5 file
h5_path (str): Path to a group in the HDF5 file from where the data is read
recursive (bool/int): Recursively browse through the HDF5 file, either a boolean flag or an integer
which specifies the level of recursion.
slash (str): 'ignore' | 'replace' Whether to replace the string {FWDSLASH} with the value /. This does
not apply to the top level name (title). If 'ignore', nothing will be replaced.
Returns:
dict: The loaded data as dictionary, with the keys being the path inside the HDF5 file. The values can be of
any type supported by ``write_hdf5``.
"""
if recursive:
nodes_lst = _get_hdf_content(
hdf=hdf_filehandle[h5_path], recursive=recursive, only_nodes=True
)
else:
nodes_lst = [h5_path]
if len(nodes_lst) > 0 and nodes_lst[0] != "/":
return {
n: _read_hdf(hdf_filehandle=hdf_filehandle, h5_path=n, slash=slash)
for n in nodes_lst
}
else:
return {}


def _write_hdf(
hdf_filehandle,
h5_path,
Expand Down
39 changes: 39 additions & 0 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import h5py
from unittest import TestCase
import posixpath
import h5io
from h5io_browser import (
delete_item,
list_hdf,
Expand Down Expand Up @@ -398,3 +399,41 @@ def test_delete(self):
nodes, groups = list_hdf(file_name=self.file_name, h5_path="/data_json")
self.assertEqual(groups, [])
self.assertEqual(nodes, ["/data_json/a"])


class TestCompatibility(TestCase):
def setUp(self):
self.file_name = "testcomp.h5"
self.data = {
"array": np.ones(4) * 42,
"b": 42,
}
self.h5_path = "h5io"
h5io.write_hdf5("testcomp.h5", self.data)

def test_h5io(self):
dataread = h5io.read_hdf5(self.file_name, self.h5_path)
for k, v in self.data.items():
if isinstance(v, np.ndarray):
self.assertTrue(all(np.equal(v, dataread[k])))
else:
self.assertTrue(v == dataread[k])

def test_read_dict_from_hdf(self):
dataread = read_dict_from_hdf(self.file_name, self.h5_path)
for k, v in self.data.items():
if isinstance(v, np.ndarray):
self.assertTrue(all(np.equal(v, dataread[self.h5_path][k])))
else:
self.assertTrue(v == dataread[self.h5_path][k])

def test_read_nested_dict_from_hdf(self):
dataread = read_nested_dict_from_hdf(self.file_name, self.h5_path)
for k, v in self.data.items():
if isinstance(v, np.ndarray):
self.assertTrue(all(np.equal(v, dataread[self.h5_path][k])))
else:
self.assertTrue(v == dataread[self.h5_path][k])

def tearDown(self):
os.remove(self.file_name)

0 comments on commit 2495813

Please sign in to comment.