Skip to content

Commit

Permalink
Optimize hdf5 IO
Browse files Browse the repository at this point in the history
-  Don't evaluate data field twice in data_grabber
-  Allow data_grabber to load single data set (if there are no items)
-  Avoid that load_data_field load more fields than necessary
  • Loading branch information
jnoelke committed Mar 22, 2023
1 parent 287ae69 commit 3917f7a
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions simpa/io_handling/io_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,18 @@ def data_grabber(file, path):
:param file: hdf5 file instance to load the data from.
:param path: Current group path in hdf5 file group structure.
:returns: Dictionary
:returns: Dictionary or np.array
"""

if isinstance(h5file[path], h5py._hl.dataset.Dataset):
return h5file[path][()]

dictionary = {}
for key, item in h5file[path].items():
if isinstance(item, h5py._hl.dataset.Dataset):
if item[()] is not None:
dictionary[key] = item[()]
item = item[()]
if item is not None:
dictionary[key] = item
if isinstance(dictionary[key], bytes):
dictionary[key] = dictionary[key].decode("utf-8")
elif isinstance(dictionary[key], np.bool_):
Expand Down Expand Up @@ -163,10 +168,8 @@ def data_grabber(file, path):


def load_data_field(file_path, data_field, wavelength=None):
dict_path = generate_dict_path(data_field, wavelength=wavelength)
data_field_key = dict_path.split("/")[-2]
dict_path = "/".join(dict_path.split("/")[:-2]) + "/"
data = load_hdf5(file_path, dict_path)[data_field_key]
path = generate_dict_path(data_field, wavelength=wavelength)
data = load_hdf5(file_path, path)
return data


Expand Down

3 comments on commit 3917f7a

@cbender98
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't you use the same trick again further down in the code?
i.e. at line 145/150

                            if item[listkey][()] is not None:
                                list_item = item[listkey][()]

to

                            listkey_item = item[listkey][()]
                            if listkey_item is not None:
                                list_item = listkey_item

@cbender98
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see new commit

@jnoelke
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree, that is the same thing

Please sign in to comment.