Skip to content

Commit

Permalink
Merge pull request #121 from ManuelHu/lh5concat-struct
Browse files Browse the repository at this point in the history
lh5concat: allow concatenating of group-like structs
  • Loading branch information
gipert authored Nov 21, 2024
2 parents 05e6061 + 0104086 commit 9d1ad8f
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 2 deletions.
32 changes: 30 additions & 2 deletions src/lgdo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
import sys

from . import Array, Table, VectorOfVectors, __version__, lh5
from . import Array, Scalar, Struct, Table, VectorOfVectors, __version__, lh5
from . import logging as lgdogging # eheheh

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -212,6 +212,7 @@ def lh5concat(args=None):
store = lh5.LH5Store()
h5f0 = store.gimme_file(file0)
lgdos = {}
lgdo_structs = {}
# loop over object list in the first file
for name in obj_list:
# now loop over groups starting from root
Expand All @@ -222,7 +223,7 @@ def lh5concat(args=None):
if current in lgdos:
break

# not even an LGDO!
# not even an LGDO (i.e. a plain HDF5 group)!
if "datatype" not in h5f0[current].attrs:
continue

Expand All @@ -232,14 +233,30 @@ def lh5concat(args=None):
# read all!
obj, _ = store.read(current, h5f0)
lgdos[current] = obj
elif isinstance(obj, Struct):
# structs might be used in a "group-like" fashion (i.e. they might only
# contain array-like objects).
# note: handle after handling tables, as tables also satisfy this check.
lgdo_structs[current] = obj.attrs["datatype"]
continue
elif isinstance(obj, Scalar):
msg = f"cannot concat scalar field {current}"
log.warning(msg)

break

msg = f"first-level, array-like objects: {lgdos.keys()}"
log.debug(msg)
msg = f"nested structs: {lgdo_structs.keys()}"
log.debug(msg)

h5f0.close()

if lgdos == {}:
msg = "did not find any field to concatenate, exit"
log.error(msg)
return

# 2. remove (nested) table fields based on obj_list

def _inplace_table_filter(name, table, obj_list):
Expand Down Expand Up @@ -298,3 +315,14 @@ def _inplace_table_filter(name, table, obj_list):
_inplace_table_filter(name, obj, obj_list)

store.write(obj, name, args.output, wo_mode="append")

# 5. reset datatypes of the "group-like" structs

if lgdo_structs != {}:
output_file = store.gimme_file(args.output, mode="a")
for struct, struct_dtype in lgdo_structs.items():
msg = f"reset datatype of struct {struct} to {struct_dtype}"
log.debug(msg)

output_file[struct].attrs["datatype"] = struct_dtype
output_file.close()
18 changes: 18 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,21 @@ def test_lh5concat(lgnd_test_data, tmptestdir):
assert tbl.packet_id[i] == tbl2.packet_id[i - 10]
assert np.array_equal(tbl.tracelist[i], tbl2.tracelist[i - 10])
assert np.array_equal(tbl.waveform.values[i], tbl2.waveform.values[i - 10])

# test concatenating arrays in structs.
infile1 = f"{tmptestdir}/concat_test_struct_0.lh5"
tb1 = types.Table(col_dict={"col": types.Array(np.zeros(4))})
struct1 = types.Struct({"x": tb1})
store.write(struct1, "stp", infile1, wo_mode="overwrite_file")

infile2 = f"{tmptestdir}/concat_test_struct_1.lh5"
tb2 = types.Table(col_dict={"col": types.Array(np.ones(7))})
struct2 = types.Struct({"x": tb2})
store.write(struct2, "stp", infile2, wo_mode="overwrite_file")

outfile = f"{tmptestdir}/concat_test_struct_out.lh5"
cli.lh5concat(["--output", outfile, "--", infile1, infile2])

out_stp = store.read("stp", outfile)[0]
assert out_stp.attrs["datatype"] == "struct{x}"
assert np.all(out_stp.x["col"].nda == np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]))

0 comments on commit 9d1ad8f

Please sign in to comment.