Skip to content

Commit

Permalink
add the option to copy RUN and CONTROL data to the output files
Browse files Browse the repository at this point in the history
  • Loading branch information
tmichela committed Nov 24, 2023
1 parent 8ec2f26 commit 3f42eef
Showing 1 changed file with 48 additions and 8 deletions.
56 changes: 48 additions & 8 deletions extra_data/tests/utils/gendata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from ..utils import progress_bar

__all__ = ["clone"]


def progress(processed, total, *, show=True):
"""Show progress information"""
Expand All @@ -19,7 +21,9 @@ def progress(processed, total, *, show=True):
print(pbar, file=sys.stderr)


def clone_file_structure(h5file: Path, output: Path) -> None:
def _clone_file_structure(
h5file: Path, output: Path, *, run_data=False, control_data=False
) -> None:
clone = h5py.File(output / h5file.name, "w")

def visitor(name, obj):
Expand All @@ -28,8 +32,8 @@ def visitor(name, obj):
elif isinstance(obj, h5py.Dataset):
if (
name.startswith("INSTRUMENT")
or name.startswith("CONTROL")
or name.startswith("RUN")
or (name.startswith("CONTROL") and not control_data)
or (name.startswith("RUN") and not run_data)
):
clone.create_dataset_like(name, obj)
else:
Expand All @@ -39,19 +43,33 @@ def visitor(name, obj):
original.visititems(visitor)


def clone(input: Path, output: Path, *, term_progress=False) -> None:
def clone(
input: Path,
output: Path,
*,
run_data=False,
control_data=False,
term_progress=False,
) -> None:
"""Clone EuXFEL HDF5 file structure without any of its data.
Clone the input file or files present the input directory.
The cloned files will be writen to output.
The cloned files will be written to output.
args:
run_data: Copy data in RUN group if set to True
control_data: Copy data in CONTROL group if set to True
term_progress: show progress in terminal if set to True
"""
if not output.is_dir():
raise ValueError(f"The given output directory does not exist: {output}")

if h5py.is_hdf5(input):
if output == input.parent:
raise ValueError("Input and output must be different directories.")
clone_file_structure(input, output)
_clone_file_structure(
input, output, run_data=run_data, control_data=control_data
)
elif input.is_dir():
if output == input:
raise ValueError("Input and output must be different directories.")
Expand All @@ -60,7 +78,9 @@ def clone(input: Path, output: Path, *, term_progress=False) -> None:

progress(0, len(h5files), show=term_progress)
for n, file_ in enumerate(h5files, start=1):
clone_file_structure(file_, output)
_clone_file_structure(
file_, output, run_data=run_data, control_data=control_data
)
progress(n, len(h5files), show=term_progress)
else:
raise ValueError(f"invalid input: {input}")
Expand All @@ -72,14 +92,34 @@ def main(argv=None):
ap.add_argument(
"output", type=str, help="Output directory to write the cloned files."
)
ap.add_argument(
"--copy-run-data",
"-cr",
action="store_true",
default=False,
help="Copy data present in the RUN group.",
)
ap.add_argument(
"--copy-control-data",
"-cc",
action="store_true",
default=False,
help="Copy dara present in the CONTROL group.",
)

args = ap.parse_args()

path_in = Path(args.input).expanduser()
path_out = Path(args.output).expanduser()

print(f"Cloning file(s) structure:\ninput: {path_in}\nOutput: {path_out}\n")
clone(path_in, path_out, term_progress=True)
clone(
path_in,
path_out,
run_data=args.copy_run_data,
control_data=args.copy_control_data,
term_progress=True,
)
print("Done.")


Expand Down

0 comments on commit 3f42eef

Please sign in to comment.