From cf15210fdca88f0968189a4a69ee8d04a3bf8dda Mon Sep 17 00:00:00 2001 From: Valerio Dandrea Date: Tue, 28 Nov 2023 12:30:43 +0100 Subject: [PATCH 001/103] first version of dplms script and merging of lh5 par files --- rules/dsp.smk | 47 +++++++++++++++++ scripts/merge_channels.py | 47 +++++++++++++++++ scripts/pars_dsp_dplms.py | 103 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+) create mode 100644 scripts/pars_dsp_dplms.py diff --git a/rules/dsp.smk b/rules/dsp.smk index 428ecd2..969bc3d 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -98,11 +98,52 @@ rule build_pars_dsp_eopt: "--final_dsp_pars {output.dsp_pars}" +# This rule builds the dplms energy filter for the dsp using fft and cal files +rule build_pars_dsp_dplms: + input: + fft_files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" + ), + cal_files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" + ), + database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), + lh5_path=temp(get_pattern_pars_tmp_channel(setup, "dsp", extension="lh5")), + plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), + log: + get_pattern_log_channel(setup, "pars_dsp_dplms"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/pars_dsp_dplms.py')} " + "--fft_raw_filelist {input.fft_files}" + "--cal_raw_filelist {input.cal_files}" + "--database {input.database} " + "--configs {configs} " + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--dsp_pars {output.dsp_pars}" + "--lh5_path {output.lh5_path}" + "--plot_path {output.plots} " + + rule build_pars_dsp: input: lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"), lambda wildcards: read_filelist_plts_cal_channel(wildcards, "dsp"), lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_objects_pkl"), + lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"), output: get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), get_pattern_pars( @@ -113,6 +154,12 @@ rule build_pars_dsp: check_in_cycle=check_in_cycle, ), get_pattern_plts(setup, "dsp"), + get_pattern_pars( + setup, + "dsp", + extension="lh5", + check_in_cycle=check_in_cycle, + ), group: "merge-dsp" shell: diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index 1d43e6f..b7d7a59 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -5,6 +5,10 @@ import pickle as pkl import shelve +import lgdo.lh5_store as lh5 +from lgdo import Array +sto = lh5.LH5Store() + argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", nargs="*", type=str) argparser.add_argument("--output", help="output file", nargs="*", type=str) @@ -31,6 +35,14 @@ name, ) = os.path.basename(channel).split("-") out_dict[channel_name] = channel_dict + + for key in channel_dict.keys(): + key_dict = channel_dict[key] + for key_pars in key_dict.keys(): + if isinstance(key_dict[key_pars], str): + if "loadlh5" in key_dict[key_pars]: + out_lh5 = outfile.replace(".json",".lh5") + out_dict[channel_name][key][key_pars] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')" else: pass @@ -86,3 +98,38 @@ pass if len(common_dict) > 0: shelf["common"] = common_dict + + elif file_extension == ".lh5": + for channel in channel_files: + if os.path.splitext(channel)[0].split("-")[-1] == processing_step: + with open(channel) as r: + channel_dict = json.load(r) + ( + experiment, + period, + run, + datatype, + timestamp, + channel_name, + name, + ) = os.path.basename(channel).split("-") + + out_dict[channel_name] = channel_dict + + for key in channel_dict.keys(): + key_dict = channel_dict[key] + for key_pars in key_dict.keys(): + if isinstance(key_dict[key_pars], str): + if "loadlh5" in key_dict[key_pars]: + path_to_file = key_dict[key_pars].split("'")[1] + path_in_file = key_dict[key_pars].split("'")[3] + data = sto.read_object(path_in_file, path_to_file)[0].nda + sto.write_object( + Array(data), + name=key, + lh5_file=out_file, + wo_mode="overwrite", + group=channel_name + ) + else: + pass \ No newline at end of file diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py new file mode 100644 index 0000000..9f5c0ca --- /dev/null +++ b/scripts/pars_dsp_dplms.py @@ -0,0 +1,103 @@ +from pygama.dsp.utils import numba_defaults + +numba_defaults.cache = False +numba_defaults.boundscheck = True + +import argparse +import json +import logging +import os +import pathlib +import pickle as pkl +import time + +import pygama.pargen.dplms_ge_dict as pdd +from legendmeta import LegendMetadata + +argparser = argparse.ArgumentParser() +argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) +argparser.add_argument("--cal_raw_filelist", help="cal_raw_filelist", type=str) +argparser.add_argument("--database", help="database", type=str, required=True) +argparser.add_argument("--configs", help="configs", type=str, required=True) + +argparser.add_argument("--log", help="log_file", type=str) + +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--channel", help="Channel", type=str, required=True) + +argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) +argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True) +argparser.add_argument("--plot_path", help="plot_path", type=str) + +args = argparser.parse_args() + +logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +logging.getLogger("numba").setLevel(logging.INFO) +logging.getLogger("parse").setLevel(logging.INFO) +logging.getLogger("pygama.lgdo.lh5_store").setLevel(logging.INFO) +logging.getLogger("h5py._conv").setLevel(logging.INFO) +logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO) + +log = logging.getLogger(__name__) + + +t0 = time.time() + +conf = LegendMetadata(path=args.configs) +configs = configs.on(args.timestamp, system=args.datatype) +dsp_config = config_dict['snakemake_rules']['pars_dsp_dplms']["inputs"]['proc_chain'][args.channel] + +dplms_json = config_dict['snakemake_rules']['pars_dsp_dplms']["inputs"]['dplms_pars'][args.channel] +with open(dplms_json) as r: + dplms_dict = json.load(r) + +with open(args.database) as t: + db_dict = json.load(t) + +if opt_dict["run_dplms"] is True: + with open(args.fft_raw_filelist) as f: + fft_files = f.read().splitlines() + with open(args.cal_raw_filelist) as f: + cal_files = f.read().splitlines() + + fft_files = sorted(fft_files) + cal_files = sorted(cal_files) + + if isinstance(dsp_config, str): + with open(dsp_config) as r: + dsp_config = json.load(r) + + if args.plot_path: + out_dict, plot_dict = pdd.dplms_ge_dict( + args.channel, + fft_files, + cal_files, + dsp_config, + db_dict, + args.lh5_path, + dplms_dict, + display=1 + ) + pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + with open(args.plot_path, "wb") as f: + pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) + else: + out_dict, plot_dict = pdd.dplms_ge_dict( + args.channel, + fft_files, + cal_files, + dsp_config, + db_dict, + args.lh5_path, + dplms_dict, + ) + + t1 = time.time() + log.info(f"DPLMS creation finished in {(t1-t0)/60} minutes") +else: + out_dict = {} + +pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +with open(args.dsp_pars, "w") as w: + json.dump(out_dict, w, indent=2) From c3d3525155b49f2c911395b98cb95004437a73dc Mon Sep 17 00:00:00 2001 From: Valerio Dandrea Date: Tue, 28 Nov 2023 12:53:13 +0100 Subject: [PATCH 002/103] style fixes --- scripts/merge_channels.py | 30 ++++++++++++++---------------- scripts/pars_dsp_dplms.py | 17 ++++++++--------- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index b7d7a59..8e97bd6 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -39,10 +39,9 @@ for key in channel_dict.keys(): key_dict = channel_dict[key] for key_pars in key_dict.keys(): - if isinstance(key_dict[key_pars], str): - if "loadlh5" in key_dict[key_pars]: - out_lh5 = outfile.replace(".json",".lh5") - out_dict[channel_name][key][key_pars] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')" + if isinstance(key_dict[key_pars], str) and ("loadlh5" in key_dict[key_pars]): + out_lh5 = out_file.replace(".json",".lh5") + out_dict[channel_name][key][key_pars] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')" else: pass @@ -119,17 +118,16 @@ for key in channel_dict.keys(): key_dict = channel_dict[key] for key_pars in key_dict.keys(): - if isinstance(key_dict[key_pars], str): - if "loadlh5" in key_dict[key_pars]: - path_to_file = key_dict[key_pars].split("'")[1] - path_in_file = key_dict[key_pars].split("'")[3] - data = sto.read_object(path_in_file, path_to_file)[0].nda - sto.write_object( - Array(data), - name=key, - lh5_file=out_file, - wo_mode="overwrite", - group=channel_name - ) + if isinstance(key_dict[key_pars], str) and ("loadlh5" in key_dict[key_pars]): + path_to_file = key_dict[key_pars].split("'")[1] + path_in_file = key_dict[key_pars].split("'")[3] + data = sto.read_object(path_in_file, path_to_file)[0].nda + sto.write_object( + Array(data), + name=key, + lh5_file=out_file, + wo_mode="overwrite", + group=channel_name + ) else: pass \ No newline at end of file diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 9f5c0ca..23db5b6 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -1,8 +1,3 @@ -from pygama.dsp.utils import numba_defaults - -numba_defaults.cache = False -numba_defaults.boundscheck = True - import argparse import json import logging @@ -11,9 +6,13 @@ import pickle as pkl import time +from pygama.dsp.utils import numba_defaults import pygama.pargen.dplms_ge_dict as pdd from legendmeta import LegendMetadata +numba_defaults.cache = False +numba_defaults.boundscheck = True + argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) argparser.add_argument("--cal_raw_filelist", help="cal_raw_filelist", type=str) @@ -45,17 +44,17 @@ t0 = time.time() conf = LegendMetadata(path=args.configs) -configs = configs.on(args.timestamp, system=args.datatype) -dsp_config = config_dict['snakemake_rules']['pars_dsp_dplms']["inputs"]['proc_chain'][args.channel] +configs = conf.on(args.timestamp, system=args.datatype) +dsp_config = configs['snakemake_rules']['pars_dsp_dplms']["inputs"]['proc_chain'][args.channel] -dplms_json = config_dict['snakemake_rules']['pars_dsp_dplms']["inputs"]['dplms_pars'][args.channel] +dplms_json = configs['snakemake_rules']['pars_dsp_dplms']["inputs"]['dplms_pars'][args.channel] with open(dplms_json) as r: dplms_dict = json.load(r) with open(args.database) as t: db_dict = json.load(t) -if opt_dict["run_dplms"] is True: +if dplms_dict["run_dplms"] is True: with open(args.fft_raw_filelist) as f: fft_files = f.read().splitlines() with open(args.cal_raw_filelist) as f: From 1825128a0487e96f0f32a2a18ac30c6f6aa0b56a Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 12:54:27 +0100 Subject: [PATCH 003/103] run pre-commmit --- scripts/merge_channels.py | 33 ++++++++++++++++++++------------- scripts/pars_dsp_dplms.py | 8 ++++---- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index 8e97bd6..6df04bd 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -7,6 +7,7 @@ import lgdo.lh5_store as lh5 from lgdo import Array + sto = lh5.LH5Store() argparser = argparse.ArgumentParser() @@ -35,13 +36,17 @@ name, ) = os.path.basename(channel).split("-") out_dict[channel_name] = channel_dict - - for key in channel_dict.keys(): + + for key in channel_dict: key_dict = channel_dict[key] - for key_pars in key_dict.keys(): - if isinstance(key_dict[key_pars], str) and ("loadlh5" in key_dict[key_pars]): - out_lh5 = out_file.replace(".json",".lh5") - out_dict[channel_name][key][key_pars] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')" + for key_pars in key_dict: + if isinstance(key_dict[key_pars], str) and ( + "loadlh5" in key_dict[key_pars] + ): + out_lh5 = out_file.replace(".json", ".lh5") + out_dict[channel_name][key][ + key_pars + ] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')" else: pass @@ -112,13 +117,15 @@ channel_name, name, ) = os.path.basename(channel).split("-") - + out_dict[channel_name] = channel_dict - - for key in channel_dict.keys(): + + for key in channel_dict: key_dict = channel_dict[key] - for key_pars in key_dict.keys(): - if isinstance(key_dict[key_pars], str) and ("loadlh5" in key_dict[key_pars]): + for key_pars in key_dict: + if isinstance(key_dict[key_pars], str) and ( + "loadlh5" in key_dict[key_pars] + ): path_to_file = key_dict[key_pars].split("'")[1] path_in_file = key_dict[key_pars].split("'")[3] data = sto.read_object(path_in_file, path_to_file)[0].nda @@ -127,7 +134,7 @@ name=key, lh5_file=out_file, wo_mode="overwrite", - group=channel_name + group=channel_name, ) else: - pass \ No newline at end of file + pass diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 23db5b6..f4c7296 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -6,9 +6,9 @@ import pickle as pkl import time -from pygama.dsp.utils import numba_defaults import pygama.pargen.dplms_ge_dict as pdd from legendmeta import LegendMetadata +from pygama.dsp.utils import numba_defaults numba_defaults.cache = False numba_defaults.boundscheck = True @@ -45,9 +45,9 @@ conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) -dsp_config = configs['snakemake_rules']['pars_dsp_dplms']["inputs"]['proc_chain'][args.channel] +dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] -dplms_json = configs['snakemake_rules']['pars_dsp_dplms']["inputs"]['dplms_pars'][args.channel] +dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel] with open(dplms_json) as r: dplms_dict = json.load(r) @@ -76,7 +76,7 @@ db_dict, args.lh5_path, dplms_dict, - display=1 + display=1, ) pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) with open(args.plot_path, "wb") as f: From aa1d48082e2ecf30a2a0cd27a74ae6ba5f28a304 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 18:37:48 +0100 Subject: [PATCH 004/103] moved load data out of pargen routine --- scripts/pars_dsp_dplms.py | 48 +++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index f4c7296..131dd1c 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -6,9 +6,15 @@ import pickle as pkl import time +import lgdo.lh5_store as lh5 +import numpy as np import pygama.pargen.dplms_ge_dict as pdd from legendmeta import LegendMetadata from pygama.dsp.utils import numba_defaults +from pygama.pargen.energy_optimisation import ( + event_selection, + index_data, +) numba_defaults.cache = False numba_defaults.boundscheck = True @@ -39,9 +45,7 @@ logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO) log = logging.getLogger(__name__) - - -t0 = time.time() +sto = lh5.LH5Store() conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) @@ -63,6 +67,33 @@ fft_files = sorted(fft_files) cal_files = sorted(cal_files) + t0 = time.time() + log.info("\nLoad fft data") + energies = sto.read_object(f"{args.channel}/raw/daqenergy", fft_files)[0] + idxs = np.where(energies.nda == 0)[0] + raw_fft = sto.read_object( + f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs + )[0] + t1 = time.time() + log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") + + log.info("\nRunning event selection") + peaks_keV = np.array(dplms_dict["peaks_keV"]) + kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] + raw_cal, idx_list = event_selection( + cal_files, + f"{args.channel}/raw", + dsp_config, + db_dict[args.channel], + peaks_keV, + np.arange(0, len(peaks_keV), 1).tolist(), + kev_widths, + cut_parameters=dplms_dict["wfs_cut_pars"], + n_events=dplms_dict["n_signals"], + ) + raw_cal = index_data(raw_cal, idx_list[-1]) + log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") + if isinstance(dsp_config, str): with open(dsp_config) as r: dsp_config = json.load(r) @@ -70,8 +101,8 @@ if args.plot_path: out_dict, plot_dict = pdd.dplms_ge_dict( args.channel, - fft_files, - cal_files, + raw_fft, + raw_cal, dsp_config, db_dict, args.lh5_path, @@ -84,16 +115,15 @@ else: out_dict, plot_dict = pdd.dplms_ge_dict( args.channel, - fft_files, - cal_files, + raw_fft, + raw_cal, dsp_config, db_dict, args.lh5_path, dplms_dict, ) - t1 = time.time() - log.info(f"DPLMS creation finished in {(t1-t0)/60} minutes") + log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") else: out_dict = {} From f52d125d0690dc0889a24144a093ea6177f2bb6f Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 16 Jan 2024 18:28:06 +0100 Subject: [PATCH 005/103] modification to account for lgdo changes --- scripts/pars_dsp_dplms.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 131dd1c..524397c 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -6,18 +6,16 @@ import pickle as pkl import time +os.environ["LGDO_CACHE"] = "false" +os.environ["LGDO_BOUNDSCHECK"] = "false" +os.environ["DSPEED_CACHE"] = "false" +os.environ["DSPEED_BOUNDSCHECK"] = "false" + import lgdo.lh5_store as lh5 import numpy as np -import pygama.pargen.dplms_ge_dict as pdd from legendmeta import LegendMetadata -from pygama.dsp.utils import numba_defaults -from pygama.pargen.energy_optimisation import ( - event_selection, - index_data, -) - -numba_defaults.cache = False -numba_defaults.boundscheck = True +from pygama.pargen.dplms_ge_dict import dplms_ge_dict +from pygama.pargen.energy_optimisation import event_selection argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) @@ -40,8 +38,9 @@ logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("pygama.lgdo.lh5_store").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) +logging.getLogger("lgdo").setLevel(logging.INFO) +logging.getLogger("h5py").setLevel(logging.INFO) +logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO) log = logging.getLogger(__name__) @@ -69,9 +68,9 @@ t0 = time.time() log.info("\nLoad fft data") - energies = sto.read_object(f"{args.channel}/raw/daqenergy", fft_files)[0] + energies = sto.read(f"{args.channel}/raw/daqenergy", fft_files)[0] idxs = np.where(energies.nda == 0)[0] - raw_fft = sto.read_object( + raw_fft = sto.read( f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs )[0] t1 = time.time() @@ -80,7 +79,7 @@ log.info("\nRunning event selection") peaks_keV = np.array(dplms_dict["peaks_keV"]) kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] - raw_cal, idx_list = event_selection( + idx_events, idx_list = event_selection( cal_files, f"{args.channel}/raw", dsp_config, @@ -91,7 +90,11 @@ cut_parameters=dplms_dict["wfs_cut_pars"], n_events=dplms_dict["n_signals"], ) - raw_cal = index_data(raw_cal, idx_list[-1]) + raw_cal = sto.read( + f"{args.channel}/raw", + cal_files, + idx=idx_events, + )[0] log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") if isinstance(dsp_config, str): @@ -99,7 +102,7 @@ dsp_config = json.load(r) if args.plot_path: - out_dict, plot_dict = pdd.dplms_ge_dict( + out_dict, plot_dict = dplms_ge_dict( args.channel, raw_fft, raw_cal, @@ -113,7 +116,7 @@ with open(args.plot_path, "wb") as f: pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) else: - out_dict, plot_dict = pdd.dplms_ge_dict( + out_dict, plot_dict = dplms_ge_dict( args.channel, raw_fft, raw_cal, From 9b851cce5008f32cd2045cbee7312f946fc5a497 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 20 Feb 2024 19:21:43 +0100 Subject: [PATCH 006/103] changes for upgrades to optimisation --- scripts/pars_dsp_eopt.py | 46 ++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 9f39691..06b4ebd 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -5,22 +5,26 @@ import pathlib import pickle as pkl import time +import warnings os.environ["LGDO_CACHE"] = "false" os.environ["LGDO_BOUNDSCHECK"] = "false" os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" -import lgdo.lh5_store as lh5 +import lgdo.lh5 as lh5 import numpy as np import pygama.math.peak_fitting as pgf import pygama.pargen.energy_optimisation as om import sklearn.gaussian_process.kernels as ker +from dspeed.units import unit_registry as ureg from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.utils import get_tcm_pulser_ids +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + argparser = argparse.ArgumentParser() argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) @@ -161,7 +165,7 @@ wf_field=opt_dict["wf_field"], ) - tb_data = sto.read_object( + tb_data = sto.read( f"{args.channel}/raw", raw_files, idx=idx_events, @@ -172,12 +176,14 @@ log.info(f"Data Loaded in {(t1-t0)/60} minutes") if isinstance(dsp_config, str): - with open(dsp_config) as r: - dsp_config = json.load(r) + dsp_config = Props.read_from(dsp_config) + + dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"] init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]] flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10 + if flat_val < 1.0: flat_val = 1.0 elif flat_val > 4: @@ -291,23 +297,37 @@ + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1)) ) + lambda_param = 5 + sampling_rate = tb_data["waveform_presummed"]["dt"][0] + sampling_unit = ureg.Quantity(tb_data["waveform_presummed"]["dt"].attrs["units"]) + waveform_sampling = sampling_rate * sampling_unit + bopt_cusp = om.BayesianOptimizer( - acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, ) - bopt_cusp.lambda_param = 1 - bopt_cusp.add_dimension("cusp", "sigma", 1, 16, 2, "us") + bopt_cusp.lambda_param = lambda_param + bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us") bopt_zac = om.BayesianOptimizer( - acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, ) - bopt_zac.lambda_param = 1 - bopt_zac.add_dimension("zac", "sigma", 1, 16, 2, "us") + bopt_zac.lambda_param = lambda_param + bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us") bopt_trap = om.BayesianOptimizer( - acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, ) - bopt_trap.lambda_param = 1 - bopt_trap.add_dimension("etrap", "rise", 1, 12, 2, "us") + bopt_trap.lambda_param = lambda_param + bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us") bopt_cusp.add_initial_values(x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp) bopt_zac.add_initial_values(x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac) From d73d49c3626c253e26697e6ad4ef4b0112f11fcb Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:27:38 +0100 Subject: [PATCH 007/103] add legendmeta to info for logs --- scripts/pars_hit_aoe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index b4fad6d..6017f79 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -99,6 +99,7 @@ def aoe_calibration( logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ From 7a0f6d11afe738fa6e11b081f2f0a0980b01e799 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:27:57 +0100 Subject: [PATCH 008/103] first changes for new ecal --- scripts/pars_hit_ecal.py | 135 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 5 deletions(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 169ed35..ac92032 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -125,6 +125,70 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None): plot_dict[key] = item["function"](data) return plot_dict +def get_results_dict(ecal_class, data): + if np.isnan(ecal_class.pars).all(): + return {} + else: + fwhm_linear = ecal_class.fwhm_fit_linear.copy() + fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() + fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() + fwhm_linear["cov"] = fwhm_linear["cov"].tolist() + fwhm_quad = ecal_class.fwhm_fit_quadratic.copy() + fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() + fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() + fwhm_quad["cov"] = fwhm_quad["cov"].tolist() + + pk_dict = { + Ei: { + "function": func_i.__name__, + "module": func_i.__module__, + "parameters_in_ADC": parsi.to_dict(), + "uncertainties_in_ADC": errorsi.to_dict(), + "p_val": pvali, + "fwhm_in_keV": list(fwhmi), + "pk_position":(posi, posuni), + } + for i, (Ei, parsi, errorsi, pvali, fwhmi, posi, posuni, func_i) in enumerate( + zip( + ecal_class.results["fitted_keV"], + ecal_class.results["pk_pars"][ecal_class.results["pk_validities"]], + ecal_class.results["pk_errors"][ecal_class.results["pk_validities"]], + ecal_class.results["pk_pvals"][ecal_class.results["pk_validities"]], + ecal_class.results["pk_fwhms"], + ecal_class.results["pk_pos"], + ecal_class.results["pk_pos_uncertainties"], + ecal_class.funcs, + ) + ) + } + + return { + "total_fep": len( + data.query( + f"{ecal_class.cal_energy_param}>2604&{ecal_class.cal_energy_param}<2624" + ) + ), + "total_dep": len( + data.query( + f"{ecal_class.cal_energy_param}>1587&{ecal_class.cal_energy_param}<1597" + ) + ), + "pass_fep": len( + data.query( + f"{ecal_class.cal_energy_param}>2604&{ecal_class.cal_energy_param}<2624&{ecal_class.selection_string}" + ) + ), + "pass_dep": len( + data.query( + f"{ecal_class.cal_energy_param}>1587&{ecal_class.cal_energy_param}<1597&{ecal_class.selection_string}" + ) + ), + "eres_linear": fwhm_linear, + "eres_quadratic": fwhm_quad, + "fitted_peaks": ecal_class.results["fitted_keV"].tolist(), + "pk_fits": pk_dict, + "mode":ecal_class.results["mode"], + } def energy_cal_th( data: pd.Dataframe, @@ -153,12 +217,58 @@ def energy_cal_th( if cal_energy_params is None: cal_energy_params = [energy_param + "_cal" for energy_param in energy_params] + + glines = [ + # 238.632, + 583.191, + 727.330, + 860.564, + 1592.53, + 1620.50, + 2103.53, + 2614.50, + ] # gamma lines used for calibration + range_keV = [ + # (8, 8), + (20, 20), + (30, 30), + (30, 30), + (40, 20), + (20, 40), + (40, 40), + (60, 60), + ] # side bands width + funcs = [ + # pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + ] + gof_funcs = [ + # pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + ] + results_dict = {} plot_dict = {} full_object_dict = {} for energy_param, cal_energy_param in zip(energy_params, cal_energy_params): full_object_dict[cal_energy_param] = calibrate_parameter( energy_param, + glines, + range_keV, + funcs, + gof_funcs, selection_string, plot_options, guess_keV, @@ -168,9 +278,10 @@ def energy_cal_th( simplex, deg, tail_weight=tail_weight, + cal_energy_param=cal_energy_param, ) full_object_dict[cal_energy_param].calibrate_parameter(data) - results_dict[cal_energy_param] = full_object_dict[cal_energy_param].get_results_dict(data) + results_dict[cal_energy_param] = get_results_dict(full_object_dict[cal_energy_param], data) hit_dict.update(full_object_dict[cal_energy_param].hit_dict) if ~np.isnan(full_object_dict[cal_energy_param].pars).all(): plot_dict[cal_energy_param] = ( @@ -186,6 +297,8 @@ def energy_cal_th( argparser.add_argument("--files", help="files", nargs="*", type=str) argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) argparser.add_argument("--ctc_dict", help="ctc_dict", nargs="*") + argparser.add_argument("--in_hit_dict", help="in_hit_dict", nargs="*", required=False) + argparser.add_argument("--inplot_dict", help="inplot_dict", nargs="*", required=False) argparser.add_argument("--configs", help="config", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -206,10 +319,14 @@ def energy_cal_th( logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) + logging.getLogger("legendmeta").setLevel(logging.INFO) + + if args.in_hit_dict: + hit_dict = Props.read_from(args.in_hit_dict) database_dic = Props.read_from(args.ctc_dict) - hit_dict = database_dic[args.channel]["ctc_params"] + hit_dict = hit_dict.update(database_dic[args.channel]["ctc_params"]) # get metadata dictionary configs = LegendMetadata(path=args.configs) @@ -274,8 +391,6 @@ def energy_cal_th( plot_item = common_dict.pop(plot) plot_dict.update({plot: plot_item}) - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - for key, item in plot_dict.items(): if isinstance(item, dict) and len(item) > 0: param_dict = {} @@ -283,8 +398,18 @@ def energy_cal_th( if plot in item: param_dict.update({plot: item[plot]}) common_dict.update({key: param_dict}) - plot_dict["common"] = common_dict + if args.inplot_dict: + with open(args.inplot_dict, "rb") as f: + total_plot_dict = pkl.load(args.inplot_dict, protocol=pkl.HIGHEST_PROTOCOL) + if "common" in total_plot_dict: + total_plot_dict["common"].update(common_dict) + else: + plot_dict["common"] = common_dict + + total_plot_dict = total_plot_dict.update(plot_dict) + + pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) with open(args.plot_path, "wb") as f: pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) From 5d5f075f370515f9c9c3a51e854823b593f8693c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:30:38 +0100 Subject: [PATCH 009/103] add dplms --- rules/dsp.smk | 152 ++++++++++++++++++++++++++------------ scripts/pars_dsp_dplms.py | 73 ++++++++++++------ 2 files changed, 154 insertions(+), 71 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 49caaa2..002496f 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -92,98 +92,111 @@ rule build_pars_dsp_nopt: "--raw_filelist {input.files}" -# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files -rule build_pars_dsp_eopt: +# This rule builds the dplms energy filter for the dsp using fft and cal files +rule build_pars_dsp_dplms: input: - files=os.path.join( + fft_files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" + ), + cal_files=os.path.join( filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" ), - tcm_filelist=os.path.join( + tcm_files=os.path.join( filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" ), - decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"), + database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"), inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), - qbb_grid=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl") - ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp",'dplms')), + lh5_path=temp(get_pattern_pars_tmp_channel(setup, "dsp","dplms",extension="lh5")), + plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), log: - get_pattern_log_channel(setup, "pars_dsp_eopt"), + get_pattern_log_channel(setup, "pars_dsp_dplms"), group: "par-dsp" resources: runtime=300, shell: "{swenv} python3 -B " - f"{workflow.source_path('../scripts/pars_dsp_eopt.py')} " - "--log {log} " + f"{workflow.source_path('../scripts/pars_dsp_dplms.py')} " + "--fft_raw_filelist {input.fft_files} " + "--cal_raw_filelist {input.cal_files} " + "--tcm_filelist {input.tcm_files} " + "--database {input.database} " + "--inplots {input.inplots} " "--configs {configs} " + "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--raw_filelist {input.files} " - "--tcm_filelist {input.tcm_filelist} " - "--inplots {input.inplots} " - "--decay_const {input.decay_const} " + "--dsp_pars {output.dsp_pars} " + "--lh5_path {output.lh5_path} " "--plot_path {output.plots} " - "--qbb_grid_path {output.qbb_grid} " - "--final_dsp_pars {output.dsp_pars}" - -# This rule builds the dplms energy filter for the dsp using fft and cal files -rule build_pars_dsp_dplms: +# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files +rule build_pars_dsp_eopt: input: - fft_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" - ), - cal_files=os.path.join( + files=os.path.join( filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" ), - database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), + tcm_filelist=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" + ), + decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"), + inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), - lh5_path=temp(get_pattern_pars_tmp_channel(setup, "dsp", extension="lh5")), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), + qbb_grid=temp( + get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl") + ), + plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), log: - get_pattern_log_channel(setup, "pars_dsp_dplms"), + get_pattern_log_channel(setup, "pars_dsp_eopt"), group: "par-dsp" resources: runtime=300, shell: "{swenv} python3 -B " - f"{workflow.source_path('../scripts/pars_dsp_dplms.py')} " - "--fft_raw_filelist {input.fft_files}" - "--cal_raw_filelist {input.cal_files}" - "--database {input.database} " - "--configs {configs} " + f"{workflow.source_path('../scripts/pars_dsp_eopt.py')} " "--log {log} " + "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--dsp_pars {output.dsp_pars}" - "--lh5_path {output.lh5_path}" + "--raw_filelist {input.files} " + "--tcm_filelist {input.tcm_filelist} " + "--inplots {input.inplots} " + "--decay_const {input.decay_const} " "--plot_path {output.plots} " + "--qbb_grid_path {output.qbb_grid} " + "--final_dsp_pars {output.dsp_pars}" - -rule build_pars_dsp: +rule build_plts_dsp: input: - lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"), lambda wildcards: read_filelist_plts_cal_channel(wildcards, "dsp"), + output: + get_pattern_plts(setup, "dsp"), + group: + "merge-dsp" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + +rule build_pars_dsp_objects: + input: lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_objects_pkl"), - lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"), output: - get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), get_pattern_pars( setup, "dsp", @@ -191,26 +204,69 @@ rule build_pars_dsp: extension="dir", check_in_cycle=check_in_cycle, ), - get_pattern_plts(setup, "dsp"), - get_pattern_pars( + group: + "merge-dsp" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + +rule build_pars_dsp_db: + input: + lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"), + output: + temp(get_pattern_pars_tmp( + setup, + "dsp", + datatype="cal", + )), + group: + "merge-dsp" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + +rule build_pars_dsp: + input: + in_files = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_dplms_lh5"), + in_db = get_pattern_pars_tmp( + setup, + "dsp", + datatype="cal", + ), + plts = get_pattern_plts(setup, "dsp"), + objects = get_pattern_pars( + setup, + "dsp", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + output: + out_file = get_pattern_pars( setup, "dsp", extension="lh5", check_in_cycle=check_in_cycle, ), + out_db = get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), group: "merge-dsp" shell: "{swenv} python3 -B " - f"{workflow.source_path('../scripts/merge_channels.py')} " - "--input {input} " - "--output {output} " + f"{basedir}/../scripts/merge_channels.py " + "--output {output.out_file} " + "--in_db {input.in_db} " + "--out_db {output.out_db} " + "--input {input.in_files} " rule build_dsp: input: raw_file=get_pattern_tier_raw(setup), - tcm_file=get_pattern_tier_tcm(setup), pars_file=ancient( lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "dsp" diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 524397c..52bb811 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -11,19 +11,24 @@ os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" -import lgdo.lh5_store as lh5 +import lgdo.lh5 as lh5 import numpy as np from legendmeta import LegendMetadata +from legendmeta.catalog import Props from pygama.pargen.dplms_ge_dict import dplms_ge_dict from pygama.pargen.energy_optimisation import event_selection +from pygama.pargen.utils import get_tcm_pulser_ids +from lgdo import Array, Table argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) argparser.add_argument("--cal_raw_filelist", help="cal_raw_filelist", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) -argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) +argparser.add_argument("--inplots", help="in_plot_path", type=str) argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--database", help="database", type=str, required=True) +argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -41,7 +46,8 @@ logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO) +logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -51,20 +57,15 @@ dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel] -with open(dplms_json) as r: - dplms_dict = json.load(r) +dplms_dict = Props.read_from(dplms_json) -with open(args.database) as t: - db_dict = json.load(t) +db_dict = Props.read_from(args.database) if dplms_dict["run_dplms"] is True: with open(args.fft_raw_filelist) as f: - fft_files = f.read().splitlines() + fft_files = sorted(f.read().splitlines()) with open(args.cal_raw_filelist) as f: - cal_files = f.read().splitlines() - - fft_files = sorted(fft_files) - cal_files = sorted(cal_files) + cal_files = sorted(f.read().splitlines()) t0 = time.time() log.info("\nLoad fft data") @@ -76,6 +77,15 @@ t1 = time.time() log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") + log.info("\nRemoving pulser") + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, dplms_dict.pop("pulser_multiplicity_threshold") + ) + log.info("\nRunning event selection") peaks_keV = np.array(dplms_dict["peaks_keV"]) kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] @@ -83,12 +93,14 @@ cal_files, f"{args.channel}/raw", dsp_config, - db_dict[args.channel], + db_dict, peaks_keV, np.arange(0, len(peaks_keV), 1).tolist(), kev_widths, + pulser_mask=mask, cut_parameters=dplms_dict["wfs_cut_pars"], n_events=dplms_dict["n_signals"], + threshold=dplms_dict["threshold"], ) raw_cal = sto.read( f"{args.channel}/raw", @@ -97,39 +109,54 @@ )[0] log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") - if isinstance(dsp_config, str): - with open(dsp_config) as r: - dsp_config = json.load(r) + if isinstance(dsp_config, (str, list)): + dsp_config = Props.read_from(dsp_config) if args.plot_path: out_dict, plot_dict = dplms_ge_dict( - args.channel, raw_fft, raw_cal, dsp_config, db_dict, - args.lh5_path, dplms_dict, display=1, ) + if args.inplots: + with open(args.inplots, "rb") as r: + inplot_dict = pkl.load(r) + inplot_dict.update({"dplms":plot_dict}) + pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) with open(args.plot_path, "wb") as f: pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) else: - out_dict, plot_dict = dplms_ge_dict( - args.channel, + out_dict = dplms_ge_dict( raw_fft, raw_cal, dsp_config, db_dict, - args.lh5_path, dplms_dict, ) + coeffs = out_dict["dplms"].pop("coefficients") + dplms_pars = Table(col_dict={"coefficients":Array(coeffs)}) + out_dict["dplms"]["coefficients"] =f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')" + log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") else: out_dict = {} + dplms_pars = Table(col_dict={"coefficients":Array([])}) + +db_dict.update(out_dict) + +pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) +sto.write( + Table(col_dict={"dplms":dplms_pars}), + name = args.channel, + lh5_file=args.lh5_path, + wo_mode="overwrite" +) pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) with open(args.dsp_pars, "w") as w: - json.dump(out_dict, w, indent=2) + json.dump(db_dict, w, indent=2) From 41d45c07b62e76573bd93e6c69ace58beaf67bc3 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:31:02 +0100 Subject: [PATCH 010/103] split out merging into separate rules --- rules/pht.smk | 56 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/rules/pht.smk b/rules/pht.smk index 71f9acd..f375fe6 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -7,6 +7,7 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 """ from scripts.util.pars_loading import pars_catalog +import scripts.util.create_pars_keylist import pars_key_resolve from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, @@ -21,6 +22,13 @@ from scripts.util.patterns import ( get_pattern_pars, ) +ds.pars_key_resolve.write_par_catalog( + ["-*-*-*-cal"], + os.path.join(pars_path(setup), "pht", "validity.jsonl"), + get_pattern_tier_raw(setup), + {"cal": ["par_pht"], "lar": ["par_pht"]}, +) + # This rule builds the energy calibration using the calibration dsp files rule build_per_energy_calibration: @@ -69,33 +77,62 @@ rule build_per_energy_calibration: "--tcm_filelist {input.tcm_filelist} " "--files {input.files}" - -rule build_pars_pht: +rule build_pars_pht_objects: input: - lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"), - lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht"), lambda wildcards: read_filelist_pars_cal_channel( wildcards, "pht_objects_pkl", ), output: - get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle), get_pattern_pars( setup, "pht", name="objects", extension="dir", check_in_cycle=check_in_cycle, - ), - get_pattern_plts(setup, "pht"), + ) group: "merge-hit" shell: "{swenv} python3 -B " - f"{workflow.source_path('../scripts/merge_channels.py')} " + f"{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " +rule build_plts_pht: + input: + lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht"), + output: + get_pattern_plts(setup, "pht") + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + +rule build_pars_pht: + input: + infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"), + plts = get_pattern_plts(setup, "pht"), + objects = get_pattern_pars( + setup, + "pht", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ) + output: + get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle), + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input.infiles} " + "--output {output} " + rule build_pht: input: @@ -134,9 +171,6 @@ rule build_pht: part_pht_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): - print( - part.get_wildcard_constraints(partition, key), - ) rule: input: From fd67c2f0ce271ba67b70d0756f714fadad54cf4e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:31:24 +0100 Subject: [PATCH 011/103] first draft psp --- rules/psp.smk | 256 +++++++++++++++++++++++++++++++++++++++++++++ scripts/par_psp.py | 100 ++++++++++++++++++ 2 files changed, 356 insertions(+) create mode 100644 rules/psp.smk create mode 100644 scripts/par_psp.py diff --git a/rules/psp.smk b/rules/psp.smk new file mode 100644 index 0000000..811893e --- /dev/null +++ b/rules/psp.smk @@ -0,0 +1,256 @@ +""" +Snakemake rules for processing pht (partition hit) tier data. This is done in 4 steps: +- extraction of calibration curves(s) for each run for each channel from cal data +- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data +- combining of all channels into single pars files with associated plot and results files +- running build hit over all channels using par file +""" + +from scripts.util.pars_loading import pars_catalog +import scripts.util.create_pars_keylist import pars_key_resolve +from scripts.util.utils import par_psp_path, set_last_rule_name +from scripts.util.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_log_channel, + get_pattern_plts, + get_pattern_tier, + get_pattern_pars_tmp, + get_pattern_log, + get_pattern_pars, +) + +pars_key_resolve.write_par_catalog( + ["-*-*-*-cal"], + os.path.join(pars_path(setup), "psp", "validity.jsonl"), + get_pattern_tier_raw(setup), + {"cal": ["par_psp"], "lar": ["par_psp"]}, +) + +part_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + dsp_pars=part.get_par_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="dsp" + ), + dsp_objs=part.get_par_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="dsp", + name="objects", + extension="pkl", + ), + dsp_plots=part.get_plt_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="dsp" + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp" + ), + output: + psp_pars=part.get_par_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="psp" + ), + psp_objs=part.get_par_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="psp", + name="objects", + extension="pkl", + ), + psp_plots=part.get_plt_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="psp" + ), + log: + part.get_log_file( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + "psp", + name="par_psp", + ), + group: + "par-psp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/par_psp.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--in_plots {input.dsp_plots} " + "--out_plots {input.psp_plots} " + "--in_obj {input.dsp_objs} " + "--out_obj {input.psp_objs} " + "--input {input.plot_files} " + "--output {input.dsp_plots} " + + set_last_rule_name( + workflow, f"{key}-{partition}-build_par_psp" + ) + + if key in part_pht_rules: + part_pht_rules[key].append(list(workflow.rules)[-1]) + else: + part_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_psp: + input: + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), + dsp_objs=temp( + get_pattern_pars_tmp_channel( + setup, "dsp", "objects", extension="pkl" + ) + ), + dsp_plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), + psp_objs=temp( + get_pattern_pars_tmp_channel( + setup, "psp", "objects", extension="pkl" + ) + ), + psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")), + log: + get_pattern_log_channel(setup, "pars_psp"), + group: + "par-psp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/par_psp.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--in_plots {input.dsp_plots} " + "--out_plots {input.psp_plots} " + "--in_obj {input.dsp_objs} " + "--out_obj {input.psp_objs} " + "--input {input.plot_files} " + "--output {input.dsp_plots} " + + + +rule build_pars_psp_objects: + input: + lambda wildcards: read_filelist_pars_cal_channel( + wildcards, + "psp_objects_pkl", + ), + output: + get_pattern_pars( + setup, + "psp", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ) + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + +rule build_plts_pht: + input: + lambda wildcards: read_filelist_plts_cal_channel(wildcards, "psp"), + output: + get_pattern_plts(setup, "psp") + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + +rule build_pars_pht: + input: + infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"), + plts = get_pattern_plts(setup, "psp"), + objects = get_pattern_pars( + setup, + "psp", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ) + output: + get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle), + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input.infiles} " + "--output {output} " + + +rule build_psp: + input: + raw_file=get_pattern_tier_raw(setup), + pars_file=ancient( + lambda wildcards: pars_catalog.get_par_file( + setup, wildcards.timestamp, "psp" + ) + ), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "psp_db"), + log: + get_pattern_log(setup, "tier_dsp"), + group: + "tier-dsp" + resources: + runtime=300, + mem_swap=50, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.raw_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file}" \ No newline at end of file diff --git a/scripts/par_psp.py b/scripts/par_psp.py new file mode 100644 index 0000000..7ef0fad --- /dev/null +++ b/scripts/par_psp.py @@ -0,0 +1,100 @@ +import argparse +import json +import os +import pathlib +from legendmeta.catalog import Props +from util.FileKey import ChannelProcKey +import numpy as np +import matplotlib.pyplot as pyplot +import matplotlib as mpl +from datetime import datetime +import pickle as pkl +mpl.use("Agg") + + +argparser = argparse.ArgumentParser() +argparser.add_argument("--input", help="input files", nargs="*", type=str, required=True) +argparser.add_argument("--output", help="output file", nargs="*", type=str, required=True) +argparser.add_argument("--in_plots", help="input plot files", nargs="*", type=str, required=False) +argparser.add_argument("--out_plots", help="output plot files", nargs="*", type=str, required=False) +argparser.add_argument("--in_obj", help="input object files", nargs="*", type=str, required=False) +argparser.add_argument("--out_obj", help="output object files", nargs="*", type=str, required=False) +args = argparser.parse_args() + +conf = LegendMetadata(path=args.configs) +configs = conf.on(args.timestamp, system=args.datatype) +merge_config = configs["snakemake_rules"]["pars_psp"]["inputs"]["config"][ + args.channel +] + +ave_fields = merge_config["average_fields"] + +# partitions could be different for different channels - do separately for each channel +in_dicts = {} +for file in args.input: + tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + in_dicts[tstamp] = Props.read_from(file) + +plot_dict = {} +for field in ave_fields: + keys = field.split(".") + vals = [] + for tstamp in in_dicts: + val = in_dicts[tstamp] + for key in keys: + val = val[key] + vals.append(val) + if len(vals[~np.isnan(vals)]) ==0: + mean = np.nan + else: + mean = np.nanmean(vals) + for tstamp in in_dicts: + val = in_dicts[tstamp] + for key in keys: + val = val[key] + val = mean + + fig = plt.figure() + plt.scatter([datetime.strptime(tstamp,'%Y%m%dT%H%M%SZ') for tstamp in in_dicts] , vals) + plt.axhline(y=mean, color='r', linestyle='-') + plt.xlabel("time") + plt.ylabel("value") + plt.title(f"{field} over time") + plot_dict[field] = fig + plt.close() + +for file in args.output: + tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + with open(file, "w") as f: + json.dump(in_dicts[tstamp], f, indent=2) + + +if args.out_plots: + for file in args.out_plots: + tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + if args.in_plots: + for infile in args.in_plots: + if tstamp in infile: + with open(infile, "rb") as f: + old_plot_dict = pkl.load(f) + break + new_plot_dict = old_plot_dict.update({"psp": plot_dict}) + else: + new_plot_dict = {"psp": plot_dict} + with open(file, "w") as f: + pkl.dump(new_plot_dict, file, protocol=pkl.HIGHEST_PROTOCOL) + +if args.out_obj: + for file in args.out_obj: + tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + if args.in_obj: + for infile in args.in_obj: + if tstamp in infile: + with open(infile, "rb") as f: + old_obj_dict = pkl.load(f) + break + new_obj_dict = old_obj_dict + else: + new_obj_dict = {} + with open(file, "w") as f: + pkl.dump(new_obj_dict, file, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file From 086fe6dd6424444df0a13f30e3a8e81fa6f5a511 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:31:59 +0100 Subject: [PATCH 012/103] add lh5 support to merge channels --- scripts/merge_channels.py | 239 ++++++++++++++++++-------------------- 1 file changed, 115 insertions(+), 124 deletions(-) diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index 6df04bd..b45d16e 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -4,137 +4,128 @@ import pathlib import pickle as pkl import shelve +from legendmeta.catalog import Props +from util.FileKey import ChannelProcKey +import numpy as np -import lgdo.lh5_store as lh5 + +import lgdo.lh5 as lh5 from lgdo import Array -sto = lh5.LH5Store() +def replace_path(d, old_path, new_path): + if isinstance(d, dict): + for k, v in d.items(): + d[k] = replace_path(v, old_path, new_path) + elif isinstance(d, list): + for i in range(len(d)): + d[i] = replace_path(d[i], old_path, new_path) + elif isinstance(d, str): + if old_path in d: + d = d.replace(old_path, new_path) + return d argparser = argparse.ArgumentParser() -argparser.add_argument("--input", help="input file", nargs="*", type=str) -argparser.add_argument("--output", help="output file", nargs="*", type=str) +argparser.add_argument("--input", help="input file", nargs="*", type=str, required=True) +argparser.add_argument("--output", help="output file", type=str, required=True) +argparser.add_argument("--in_db", help="in db file (used for when lh5 files refered to in db)", type=str, required=False) +argparser.add_argument("--out_db", help="lh5 file (used for when lh5 files refered to in db)", type=str, required=False) args = argparser.parse_args() +# change to only have 1 output file for mutliple inputs +# don't care about processing step, check if extension matches + + channel_files = args.input -for _i, out_file in enumerate(args.output): - file_extension = pathlib.Path(out_file).suffix - processing_step = os.path.splitext(out_file)[0].split("-")[-1] - if file_extension == ".json": - out_dict = {} - for channel in channel_files: - if os.path.splitext(channel)[0].split("-")[-1] == processing_step: - with open(channel) as r: - channel_dict = json.load(r) - ( - experiment, - period, - run, - datatype, - timestamp, - channel_name, - name, - ) = os.path.basename(channel).split("-") - out_dict[channel_name] = channel_dict - - for key in channel_dict: - key_dict = channel_dict[key] - for key_pars in key_dict: - if isinstance(key_dict[key_pars], str) and ( - "loadlh5" in key_dict[key_pars] - ): - out_lh5 = out_file.replace(".json", ".lh5") - out_dict[channel_name][key][ - key_pars - ] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')" - else: - pass - - pathlib.Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True) - with open(out_file, "w") as w: - json.dump(out_dict, w, indent=4) - - elif file_extension == ".pkl": - out_dict = {} - for channel in channel_files: - if os.path.splitext(channel)[0].split("-")[-1] == processing_step: - with open(channel, "rb") as r: - channel_dict = pkl.load(r) - ( - experiment, - period, - run, - datatype, - timestamp, - channel_name, - name, - ) = os.path.basename(channel).split("-") - out_dict[channel_name] = channel_dict - else: - pass - pathlib.Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True) - with open(out_file, "wb") as w: - pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - - elif file_extension == ".dat" or file_extension == ".dir": - _out_file = os.path.splitext(out_file)[0] - pathlib.Path(os.path.dirname(_out_file)).mkdir(parents=True, exist_ok=True) - common_dict = {} - with shelve.open(_out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: - for channel in channel_files: - if os.path.splitext(channel)[0].split("-")[-1] == processing_step: - with open(channel, "rb") as r: - channel_dict = pkl.load(r) - ( - experiment, - period, - run, - datatype, - timestamp, - channel_name, - name, - ) = os.path.basename(channel).split("-") - if isinstance(channel_dict, dict) and "common" in list(channel_dict): - chan_common_dict = channel_dict.pop("common") - common_dict[channel_name] = chan_common_dict - shelf[channel_name] = channel_dict - else: - pass - if len(common_dict) > 0: - shelf["common"] = common_dict - - elif file_extension == ".lh5": + +file_extension = pathlib.Path(args.output).suffix + +if file_extension == ".dat" or file_extension == ".dir": + out_file = os.path.splitext(args.output)[0] +else: + out_file = args.output + +rng = np.random.default_rng() +rand_num = f"{rng.integers(0,99999):05d}" +temp_output = f"{out_file}.{rand_num}" + +pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) + + +if file_extension == ".json": + out_dict = {} + for channel in channel_files: + if pathlib.Path(channel).suffix == file_extension: + channel_dict = Props.read_from(channel) + + fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + channel_name = fkey.channel + out_dict[channel_name] = channel_dict + else: + raise RuntimeError("Output file extension does not match input file extension") + + with open(temp_output, "w") as w: + json.dump(out_dict, w, indent=4) + + os.rename(temp_output, out_file) + +elif file_extension == ".pkl": + out_dict = {} + for channel in channel_files: + with open(channel, "rb") as r: + channel_dict = pkl.load(r) + fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + channel_name = fkey.channel + out_dict[channel_name] = channel_dict + + with open(temp_output, "wb") as w: + pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + + os.rename(temp_output, out_file) + +elif file_extension == ".dat" or file_extension == ".dir": + common_dict = {} + with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: for channel in channel_files: - if os.path.splitext(channel)[0].split("-")[-1] == processing_step: - with open(channel) as r: - channel_dict = json.load(r) - ( - experiment, - period, - run, - datatype, - timestamp, - channel_name, - name, - ) = os.path.basename(channel).split("-") - - out_dict[channel_name] = channel_dict - - for key in channel_dict: - key_dict = channel_dict[key] - for key_pars in key_dict: - if isinstance(key_dict[key_pars], str) and ( - "loadlh5" in key_dict[key_pars] - ): - path_to_file = key_dict[key_pars].split("'")[1] - path_in_file = key_dict[key_pars].split("'")[3] - data = sto.read_object(path_in_file, path_to_file)[0].nda - sto.write_object( - Array(data), - name=key, - lh5_file=out_file, - wo_mode="overwrite", - group=channel_name, - ) - else: - pass + with open(channel, "rb") as r: + channel_dict = pkl.load(r) + fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + channel_name = fkey.channel + if isinstance(channel_dict, dict) and "common" in list(channel_dict): + chan_common_dict = channel_dict.pop("common") + common_dict[channel_name] = chan_common_dict + shelf[channel_name] = channel_dict + if len(common_dict) > 0: + shelf["common"] = common_dict + + +elif file_extension == ".lh5": + sto = lh5.LH5Store() + + if args.in_db: + db_dict = Props.read_from(args.in_db) + for channel in channel_files: + if pathlib.Path(channel).suffix == file_extension: + fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + channel_name = fkey.channel + + tb_in = sto.read( + f"{channel_name}", + channel + )[0] + + sto.write( + tb_in, + name = channel_name, + lh5_file = temp_output, + wo_mode="a", + ) + if args.in_db: + db_dict[channel_name] = replace_path(db_dict[channel_name], channel, args.output) + else: + raise RuntimeError("Output file extension does not match input file extension") + if args.out_db: + with open(args.out_db, "w") as w: + json.dump(db_dict, w, indent=4) + + os.rename(temp_output, out_file) \ No newline at end of file From 8e4b2b14317348dd1cbe33f74c9cc3e3e120380f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:33:00 +0100 Subject: [PATCH 013/103] legendmeta info logging --- scripts/pars_dsp_nopt.py | 5 +++-- scripts/pars_dsp_tau.py | 1 + scripts/pars_pht_aoecal.py | 1 + scripts/pars_pht_lqcal.py | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 1b2e798..bed75bf 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -43,7 +43,8 @@ logging.getLogger("parse").setLevel(logging.INFO) logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py._conv").setLevel(logging.INFO) -logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO) +logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) log = logging.getLogger(__name__) @@ -87,7 +88,7 @@ if args.plot_path: out_dict, plot_dict = pno.noise_optimization( - tb_data, dsp_config, db_dict, opt_dict, args.channel, display=1 + tb_data, dsp_config, db_dict.copy(), opt_dict, args.channel, display=1 ) else: out_dict = pno.noise_optimization( diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 0a315ff..04d4cdc 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -35,6 +35,7 @@ logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) sto = lh5.LH5Store() diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index a646857..49303e7 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -98,6 +98,7 @@ def aoe_calibration( logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) def run_splitter(files): diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 2d1bc06..2e656d6 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -116,6 +116,7 @@ def lq_calibration( logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) def run_splitter(files): From ed5a32c720c469bb386f4627bf88761198e4e144 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:33:35 +0100 Subject: [PATCH 014/103] add psp --- scripts/util/patterns.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 21fa1a5..c27ed5e 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -432,16 +432,18 @@ def get_pattern_pars_overwrite(setup, tier, name=None): ) -def get_pattern_pars_tmp(setup, tier, name=None): +def get_pattern_pars_tmp(setup, tier, name=None, datatype=None): + if datatype is None: + datatype = "{datatype}" if name is None: return os.path.join( f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier + ".json", + "{experiment}-{period}-{run}-"+datatype+"-{timestamp}-par_" + tier + ".json", ) else: return os.path.join( f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + "{experiment}-{period}-{run}-"+datatype+"-{timestamp}-par_" + tier + "_" + name From d645ed34055561655ff7f4ed3d3660b46294632a Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:34:10 +0100 Subject: [PATCH 015/103] update logging and better out dict handling --- scripts/pars_dsp_eopt.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index b61663c..ff9aa72 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -52,7 +52,8 @@ logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO) +logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) log = logging.getLogger(__name__) @@ -395,8 +396,10 @@ "expression": "trapEftp*(1+dt_eff*a)", "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, } - - db_dict.update({"ctc_params": out_alpha_dict}) + if "ctc_params" in db_dict: + db_dict["ctc_params"].update(out_alpha_dict) + else: + db_dict.update({"ctc_params": out_alpha_dict}) pathlib.Path(os.path.dirname(args.qbb_grid_path)).mkdir(parents=True, exist_ok=True) with open(args.qbb_grid_path, "wb") as f: From 85d135abedfbf545960543cde26a3737ebefc604 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:34:42 +0100 Subject: [PATCH 016/103] 2stage handling --- scripts/pars_pht_partcal.py | 149 +++++++++++++++++++++++++++++++++--- 1 file changed, 139 insertions(+), 10 deletions(-) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a148946..f3c926e 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -33,6 +33,49 @@ def update_cal_dicts(cal_dicts, update_dict): cal_dicts.update(update_dict) return cal_dicts +def get_results_dict(ecal_class, data): + if ecal_class.results: + fwhm_linear = ecal_class.fwhm_fit_linear.copy() + fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() + fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() + fwhm_linear["cov"] = fwhm_linear["cov"].tolist() + fwhm_quad = ecal_class.fwhm_fit_quadratic.copy() + fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() + fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() + fwhm_quad["cov"] = fwhm_quad["cov"].tolist() + + pk_dict = { + Ei: { + "function": func_i.__name__, + "module": func_i.__module__, + "parameters_in_keV": parsi.to_dict(), + "uncertainties_in_keV": errorsi.to_dict(), + "p_val": pvali, + "fwhm_in_keV": list(fwhmi), + "pk_position":(posi, posuni), + } + for i, (Ei, parsi, errorsi, pvali, fwhmi, posi, posuni, func_i) in enumerate( + zip( + ecal_class.results["fitted_keV"], + ecal_class.results["pk_pars"][ecal_class.results["pk_validities"]], + ecal_class.results["pk_errors"][ecal_class.results["pk_validities"]], + ecal_class.results["pk_pvals"][ecal_class.results["pk_validities"]], + ecal_class.results["pk_fwhms"], + ecal_class.results["pk_pos"], + ecal_class.results["pk_pos_uncertainties"], + ecal_class.funcs, + ) + ) + } + + return { + "eres_linear": fwhm_linear, + "eres_quadratic": fwhm_quad, + "fitted_peaks": ecal_class.results["fitted_keV"].tolist(), + "pk_fits": pk_dict, + } + else: + return {} def partition_energy_cal_th( data: pd.Datframe, @@ -44,28 +87,113 @@ def partition_energy_cal_th( plot_options: dict | None = None, simplex: bool = True, tail_weight: int = 20, - # cal_energy_params: list = None, - # deg:int=2, + cal_energy_params: list = None, + deg:int=2, ) -> tuple(dict, dict, dict, dict): results_dict = {} plot_dict = {} full_object_dict = {} - # if cal_energy_params is None: - # cal_energy_params = [energy_param + "_cal" for energy_param in energy_params] - for energy_param in energy_params: - full_object_dict[energy_param] = high_stats_fitting( + if cal_energy_params is None: + cal_energy_params = [energy_param + "_cal" for energy_param in energy_params] + glines = [ + 238.632, + 511, + 583.191, + 727.330, + 763, + 785, + 860.564, + 893, + 1079, + 1513, + 1592.53, + 1620.50, + 2103.53, + 2614.50, + 3125, + 3198, + 3474, + ] # gamma lines used for calibration + range_keV = [ + (10, 10), + (30, 30), + (30, 30), + (30, 30), + (30, 15), + (15, 30), + (30, 25), + (25, 30), + (30, 30), + (30, 30), + (30, 20), + (20, 30), + (30, 30), + (30, 30), + (30, 30), + (30, 30), + (30, 30), + ] # side bands width + funcs = [ + pgf.extended_gauss_step_pdf, # probably should be gauss on exp + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + ] + gof_funcs = [ + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + ] + + for energy_param, cal_energy_param in zip(energy_params, cal_energy_params): + full_object_dict[cal_energy_param] = high_stats_fitting( energy_param=energy_param, + glines=glines, + range_keV=range_keV, + funcs=funcs, + gof_funcs=gof_funcs, selection_string=selection_string, threshold=threshold, p_val=p_val, plot_options=plot_options, simplex=simplex, tail_weight=tail_weight, + cal_energy_param=cal_energy_param, + deg=deg, + fixed={1:1} ) - full_object_dict[energy_param].fit_peaks(data) - results_dict[energy_param] = full_object_dict[energy_param].get_results_dict(data) - if full_object_dict[energy_param].results: - plot_dict[energy_param] = full_object_dict[energy_param].fill_plot_dict(data).copy() + full_object_dict[cal_energy_param].update_calibration(data) + results_dict[cal_energy_param] = get_results_dict(full_object_dict[cal_energy_param], data) + hit_dicts = update_cal_dicts(hit_dicts, full_object_dict[cal_energy_param].hit_dict) + if full_object_dict[cal_energy_param].results: + plot_dict[cal_energy_param] = full_object_dict[cal_energy_param].fill_plot_dict(data).copy() log.info("Finished all calibrations") return hit_dicts, results_dict, plot_dict, full_object_dict @@ -96,6 +224,7 @@ def partition_energy_cal_th( logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) def run_splitter(files): From d6570fa64e1c13cc108c9c1d81223e78a7351c14 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:35:04 +0100 Subject: [PATCH 017/103] first versions --- scripts/pars_dsp_event_selection.py | 108 ++++++++++++++++++++++++++++ scripts/pars_hit_qc.py | 104 +++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 scripts/pars_dsp_event_selection.py create mode 100644 scripts/pars_hit_qc.py diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py new file mode 100644 index 0000000..9fc7197 --- /dev/null +++ b/scripts/pars_dsp_event_selection.py @@ -0,0 +1,108 @@ +import argparse +import json +import logging +import os +import pathlib +import pickle as pkl +import time +import warnings + +os.environ["LGDO_CACHE"] = "false" +os.environ["LGDO_BOUNDSCHECK"] = "false" +os.environ["DSPEED_CACHE"] = "false" +os.environ["DSPEED_BOUNDSCHECK"] = "false" + +import lgdo.lh5 as lh5 +import numpy as np +import pygama.pargen.energy_optimisation as om +from legendmeta import LegendMetadata +from legendmeta.catalog import Props +from pygama.pargen.utils import get_tcm_pulser_ids + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + +argparser = argparse.ArgumentParser() +argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) +argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) +argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) +argparser.add_argument("--configs", help="configs", type=str, required=True) + +argparser.add_argument("--log", help="log_file", type=str) + +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--channel", help="Channel", type=str, required=True) + +argparser.add_argument("--peak_file", help="peak_file", type=str, required=True) +args = argparser.parse_args() + +logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +logging.getLogger("numba").setLevel(logging.INFO) +logging.getLogger("parse").setLevel(logging.INFO) +logging.getLogger("lgdo").setLevel(logging.INFO) +logging.getLogger("h5py").setLevel(logging.INFO) +logging.getLogger("matplotlib").setLevel(logging.INFO) +logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) + + +log = logging.getLogger(__name__) + +t0 = time.time() + +conf = LegendMetadata(path=args.configs) +configs = conf.on(args.timestamp, system=args.datatype) +dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["processing_chain"][ + args.channel +] +peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][args.channel] + +peak_dict = Props.read_from(opt_json) +db_dict = Props.read_from(args.decay_const) + +if opt_dict.pop("run_selection") is True: + with open(args.raw_filelist) as f: + files = f.read().splitlines() + + raw_files = sorted(files) + + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"] + ) + + sto = lh5.LH5Store() + idx_events, idx_list = om.event_selection( + raw_files, + f"{args.channel}/raw", + dsp_config, + db_dict, + peaks_keV, + np.arange(0, len(peaks_keV), 1).tolist(), + kev_widths, + pulser_mask=mask, + cut_parameters=peak_dict["cut_parameters"], + n_events=peak_dict["n_events"], + threshold=peak_dict["threshold"], + wf_field=peak_dict["wf_field"], + ) + + tb_data = sto.read( + f"{args.channel}/raw", + raw_files, + idx=idx_events, + n_rows=opt_dict["n_events"], + )[0] + + pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) + sto.write( + tb_data, + name="raw", + lh5_file=args.peak_file, + wo_mode="overwrite", + ) +else: + pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) + pathlib.Path(args.peak_file).touch() \ No newline at end of file diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py new file mode 100644 index 0000000..05254d8 --- /dev/null +++ b/scripts/pars_hit_qc.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import argparse +import json +import logging +import os +import pathlib +import pickle as pkl +import warnings + +from legendmeta import LegendMetadata +from legendmeta.catalog import Props +from pygama.pargen.utils import get_tcm_pulser_ids, load_data +from pygama.pargen.cuts import generate_cuts + +log = logging.getLogger(__name__) + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +if __name__ == "__main__": + argparser = argparse.ArgumentParser() + argparser.add_argument("--files", help="files", nargs="*", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) + + argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + argparser.add_argument("--tier", help="tier", type=str, default="hit") + + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--plot_path", help="plot_path", type=str, required=False, nargs="*") + argparser.add_argument("--save_path", help="save_path", type=str, nargs="*") + args = argparser.parse_args() + + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("h5py").setLevel(logging.INFO) + logging.getLogger("matplotlib").setLevel(logging.INFO) + logging.getLogger("legendmeta").setLevel(logging.INFO) + + + # get metadata dictionary + configs = LegendMetadata(path=args.configs) + channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] + if args.tier == "hit": + channel_dict = channel_dict["pars_hit_qc"]["inputs"]["ecal_config"][args.channel] + elif args.tier == "pht": + channel_dict = channel_dict["pars_pht_qc"]["inputs"]["ecal_config"][args.channel] + else: + msg = "invalid tier" + raise ValueError(msg) + + kwarg_dict = Props.read_from(channel_dict) + + # load data in + data, threshold_mask = load_data( + args.files, + f"{args.channel}/dsp", + hit_dict, + list(kwarg_dict["cut_parameters"]) + + ["timestamp", "trapTmax"], + threshold=kwarg_dict["threshold"], + return_selection_mask=True, + cal_energy_param="trapTmax", + ) + + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + data["is_pulser"] = mask[threshold_mask] + + hit_dict, plot_dict = generate_cuts( + data, + cut_dict, + kwarg_dict.get("rounding",4), + display=1 if args.plot_path else 0, + ) + if isinstance(args.save_path, string): + save_path = [args.save_path] + else: + save_path = args.save_path + for file in save_path + pathlib.Path(os.path.dirname(save_path)).mkdir(parents=True, exist_ok=True) + with open(file, "w") as f: + json.dump(hit_dict, f, indent=4) + + if args.plot_path: + if isinstance(args.plot_path, string): + plot_path = [args.plot_path] + else: + plot_path = args.plot_path + for file in plot_path: + pathlib.Path(os.path.dirname(plot_path)).mkdir(parents=True, exist_ok=True) + with open(plot_path, "wb") as f: + pkl.dump({"qc":plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file From 98d5117e6236b5032b886008a8b44532253f595b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 10 Mar 2024 21:35:42 +0100 Subject: [PATCH 018/103] wildcard constraints and move pht dict gen to rules --- Snakefile | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/Snakefile b/Snakefile index 17fa780..279f320 100644 --- a/Snakefile +++ b/Snakefile @@ -40,24 +40,17 @@ swenv = runcmd(setup) part = ds.dataset_file(setup, os.path.join(configs, "partitions.json")) basedir = workflow.basedir +wildcard_constraints: + experiment="\w+", + period="\w+", + run="\w+", + datatype="\w+", + timestamp="\w+", + channel="\w+", + include: "rules/common.smk" include: "rules/main.smk" - - -localrules: - gen_filelist, - autogen_output, - - -ds.pars_key_resolve.write_par_catalog( - ["-*-*-*-cal"], - os.path.join(pars_path(setup), "pht", "validity.jsonl"), - get_pattern_tier_raw(setup), - {"cal": ["par_pht"], "lar": ["par_pht"]}, -) - - include: "rules/tcm.smk" include: "rules/dsp.smk" include: "rules/hit.smk" @@ -66,6 +59,10 @@ include: "rules/evt.smk" include: "rules/skm.smk" include: "rules/blinding_calibration.smk" +localrules: + gen_filelist, + autogen_output, + onstart: print("Starting workflow") From 6139322036fe09082a5d4173c8de187b251c5419 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:27:22 +0100 Subject: [PATCH 019/103] add psp paths --- scripts/util/patterns.py | 5 +++-- scripts/util/utils.py | 10 ++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index c27ed5e..90c8f2c 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -10,6 +10,7 @@ par_hit_path, par_overwrite_path, par_pht_path, + par_psp_path, par_raw_path, par_tcm_path, pars_path, @@ -317,7 +318,7 @@ def get_pattern_par_evt(setup, name=None, extension="json"): def get_pattern_par_psp(setup, name=None, extension="json"): if name is not None: return os.path.join( - f"{par_evt_path(setup)}", + f"{par_psp_path(setup)}", "cal", "{period}", "{run}", @@ -325,7 +326,7 @@ def get_pattern_par_psp(setup, name=None, extension="json"): ) else: return os.path.join( - f"{par_evt_path(setup)}", + f"{par_psp_path(setup)}", "cal", "{period}", "{run}", diff --git a/scripts/util/utils.py b/scripts/util/utils.py index d767610..8b11b3b 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -128,6 +128,8 @@ def par_dsp_path(setup): def par_hit_path(setup): return setup["paths"]["par_hit"] +def par_psp_path(setup): + return setup["paths"]["par_psp"] def par_pht_path(setup): return setup["paths"]["par_pht"] @@ -150,10 +152,14 @@ def get_pars_path(setup, tier): return par_dsp_path(setup) elif tier == "hit": return par_hit_path(setup) - elif tier == "pht": - return par_pht_path(setup) elif tier == "evt": return par_evt_path(setup) + elif tier == "psp": + return par_psp_path(setup) + elif tier == "pht": + return par_pht_path(setup) + elif tier == "pet": + return par_pet_path(setup) else: msg = f"no tier matching:{tier}" raise ValueError(msg) From 0a741cb0ff6e13f3c915bbc70d87d624701b14b7 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:27:42 +0100 Subject: [PATCH 020/103] updates for pargen refactor --- scripts/pars_dsp_dplms.py | 55 ++--- scripts/pars_dsp_eopt.py | 178 ++++++-------- scripts/pars_dsp_event_selection.py | 350 ++++++++++++++++++++++------ scripts/pars_dsp_nopt.py | 4 +- scripts/pars_dsp_tau.py | 40 +++- 5 files changed, 401 insertions(+), 226 deletions(-) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 52bb811..67b8bdd 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -16,14 +16,11 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.pargen.dplms_ge_dict import dplms_ge_dict -from pygama.pargen.energy_optimisation import event_selection -from pygama.pargen.utils import get_tcm_pulser_ids from lgdo import Array, Table argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) -argparser.add_argument("--cal_raw_filelist", help="cal_raw_filelist", type=str) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) +argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) argparser.add_argument("--log", help="log_file", type=str) @@ -64,8 +61,6 @@ if dplms_dict["run_dplms"] is True: with open(args.fft_raw_filelist) as f: fft_files = sorted(f.read().splitlines()) - with open(args.cal_raw_filelist) as f: - cal_files = sorted(f.read().splitlines()) t0 = time.time() log.info("\nLoad fft data") @@ -77,35 +72,20 @@ t1 = time.time() log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") - log.info("\nRemoving pulser") - # get pulser mask from tcm files - with open(args.tcm_filelist) as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, dplms_dict.pop("pulser_multiplicity_threshold") - ) - log.info("\nRunning event selection") - peaks_keV = np.array(dplms_dict["peaks_keV"]) + peaks_kev = np.array(dplms_dict["peaks_kev"]) kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] - idx_events, idx_list = event_selection( - cal_files, - f"{args.channel}/raw", - dsp_config, - db_dict, - peaks_keV, - np.arange(0, len(peaks_keV), 1).tolist(), - kev_widths, - pulser_mask=mask, - cut_parameters=dplms_dict["wfs_cut_pars"], - n_events=dplms_dict["n_signals"], - threshold=dplms_dict["threshold"], - ) + + peaks_rounded = [int(peak) for peak in peaks_kev] + peaks = sto.read(f"{args.channel}/raw", args.peak_file , field_mask=["peak"]) [0]["peak"].nda + ids = np.in1d(peaks, peaks_rounded) + peaks = peaks[ids] + idx_list = [np.where(peaks==peak)[0] for peak in peaks_rounded] + raw_cal = sto.read( f"{args.channel}/raw", - cal_files, - idx=idx_events, + args.peak_file, + idx=ids )[0] log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") @@ -126,9 +106,6 @@ inplot_dict = pkl.load(r) inplot_dict.update({"dplms":plot_dict}) - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: - pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) else: out_dict = dplms_ge_dict( raw_fft, @@ -146,6 +123,11 @@ else: out_dict = {} dplms_pars = Table(col_dict={"coefficients":Array([])}) + if args.inplots: + with open(args.inplots, "rb") as r: + inplot_dict = pkl.load(r) + else: + inplot_dict={} db_dict.update(out_dict) @@ -160,3 +142,8 @@ pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) with open(args.dsp_pars, "w") as w: json.dump(db_dict, w, indent=2) + +if args.plot_path: + pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + with open(args.plot_path, "wb") as f: + pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index ff9aa72..b176c65 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -14,20 +14,20 @@ import lgdo.lh5 as lh5 import numpy as np -import pygama.math.peak_fitting as pgf +from pygama.math.distributions import hpge_peak import pygama.pargen.energy_optimisation as om import sklearn.gaussian_process.kernels as ker from dspeed.units import unit_registry as ureg from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.dsp_optimize import run_one_dsp -from pygama.pargen.utils import get_tcm_pulser_ids +from pygama.pargen.dsp_optimize import run_one_dsp, BayesianOptimizer, run_bayesian_optimisation warnings.filterwarnings(action="ignore", category=RuntimeWarning) argparser = argparse.ArgumentParser() -argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) + +argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) @@ -57,8 +57,7 @@ log = logging.getLogger(__name__) - - +sto = lh5.LH5Store() t0 = time.time() conf = LegendMetadata(path=args.configs) @@ -72,105 +71,53 @@ db_dict = Props.read_from(args.decay_const) if opt_dict.pop("run_eopt") is True: - with open(args.raw_filelist) as f: - files = f.read().splitlines() - - raw_files = sorted(files) - # get pulser mask from tcm files - with open(args.tcm_filelist) as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, opt_dict.pop("pulser_multiplicity_threshold") - ) - peaks_keV = np.array(opt_dict["peaks"]) + peaks_kev = np.array(opt_dict["peaks"]) kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]] kwarg_dicts_cusp = [] kwarg_dicts_trap = [] kwarg_dicts_zac = [] - for peak in peaks_keV: - peak_idx = np.where(peaks_keV == peak)[0][0] + for peak in peaks_kev: + peak_idx = np.where(peaks_kev == peak)[0][0] kev_width = kev_widths[peak_idx] - if peak == 238.632: - kwarg_dicts_cusp.append( - { - "parameter": "cuspEmax", - "func": pgf.extended_gauss_step_pdf, - "gof_func": pgf.gauss_step_pdf, - "peak": peak, - "kev_width": kev_width, - } - ) - kwarg_dicts_zac.append( - { - "parameter": "zacEmax", - "func": pgf.extended_gauss_step_pdf, - "gof_func": pgf.gauss_step_pdf, - "peak": peak, - "kev_width": kev_width, - } - ) - kwarg_dicts_trap.append( - { - "parameter": "trapEmax", - "func": pgf.extended_gauss_step_pdf, - "gof_func": pgf.gauss_step_pdf, - "peak": peak, - "kev_width": kev_width, - } - ) - else: - kwarg_dicts_cusp.append( - { - "parameter": "cuspEmax", - "func": pgf.extended_radford_pdf, - "gof_func": pgf.radford_pdf, - "peak": peak, - "kev_width": kev_width, - } - ) - kwarg_dicts_zac.append( - { - "parameter": "zacEmax", - "func": pgf.extended_radford_pdf, - "gof_func": pgf.radford_pdf, - "peak": peak, - "kev_width": kev_width, - } - ) - kwarg_dicts_trap.append( - { - "parameter": "trapEmax", - "func": pgf.extended_radford_pdf, - "gof_func": pgf.radford_pdf, - "peak": peak, - "kev_width": kev_width, - } - ) - sto = lh5.LH5Store() - idx_events, idx_list = om.event_selection( - raw_files, - f"{args.channel}/raw", - dsp_config, - db_dict, - peaks_keV, - np.arange(0, len(peaks_keV), 1).tolist(), - kev_widths, - pulser_mask=mask, - cut_parameters=opt_dict["cut_parameters"], - n_events=opt_dict["n_events"], - threshold=opt_dict["threshold"], - wf_field=opt_dict["wf_field"], - ) + + kwarg_dicts_cusp.append( + { + "parameter": "cuspEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + } + ) + kwarg_dicts_zac.append( + { + "parameter": "zacEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + } + ) + kwarg_dicts_trap.append( + { + "parameter": "trapEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + } + ) + + peaks_rounded = [int(peak) for peak in peaks_kev] + peaks = sto.read(f"{args.channel}/raw",args.peak_file , field_mask=["peak"]) [0]["peak"].nda + ids = np.in1d(peaks, peaks_rounded) + peaks = peaks[ids] + idx_list = [np.where(peaks==peak)[0] for peak in peaks_rounded] tb_data = sto.read( f"{args.channel}/raw", - raw_files, - idx=idx_events, - n_rows=opt_dict["n_events"], + args.peak_file, + idx=ids )[0] t1 = time.time() @@ -204,26 +151,27 @@ kwarg_dict = [ { "peak_dicts": kwarg_dicts_cusp, - "ctc_param": "QDrift", + "ctc_param": "dt_eff", "idx_list": idx_list, - "peaks_keV": peaks_keV, + "peaks_kev": peaks_kev, }, { "peak_dicts": kwarg_dicts_zac, - "ctc_param": "QDrift", + "ctc_param": "dt_eff", "idx_list": idx_list, - "peaks_keV": peaks_keV, + "peaks_kev": peaks_kev, }, { "peak_dicts": kwarg_dicts_trap, - "ctc_param": "QDrift", + "ctc_param": "dt_eff", "idx_list": idx_list, - "peaks_keV": peaks_keV, + "peaks_kev": peaks_kev, }, ] fom = eval(opt_dict["fom"]) - + out_field = opt_dict["fom_field"] + out_err_field = opt_dict["fom_err_field"] sample_x = np.array(opt_dict["initial_samples"]) results_cusp = [] @@ -249,18 +197,18 @@ res = fom(tb_out, kwarg_dict[0]) results_cusp.append(res) - sample_y_cusp.append(res["y_val"]) - err_y_cusp.append(res["y_err"]) + sample_y_cusp.append(res[out_field]) + err_y_cusp.append(res[out_err_field]) res = fom(tb_out, kwarg_dict[1]) results_zac.append(res) - sample_y_zac.append(res["y_val"]) - err_y_zac.append(res["y_err"]) + sample_y_zac.append(res[out_field]) + err_y_zac.append(res[out_err_field]) res = fom(tb_out, kwarg_dict[2]) results_trap.append(res) - sample_y_trap.append(res["y_val"]) - err_y_trap.append(res["y_err"]) + sample_y_trap.append(res[out_field]) + err_y_trap.append(res[out_err_field]) log.info(f"{i+1} Finished") @@ -303,29 +251,35 @@ sampling_unit = ureg.Quantity(tb_data["waveform_presummed"]["dt"].attrs["units"]) waveform_sampling = sampling_rate * sampling_unit - bopt_cusp = om.BayesianOptimizer( + bopt_cusp = BayesianOptimizer( acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel, sampling_rate=waveform_sampling, + fom_value = out_field, + fom_error = out_err_field ) bopt_cusp.lambda_param = lambda_param bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us") - bopt_zac = om.BayesianOptimizer( + bopt_zac = BayesianOptimizer( acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel, sampling_rate=waveform_sampling, + fom_value = out_field, + fom_error = out_err_field ) bopt_zac.lambda_param = lambda_param bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us") - bopt_trap = om.BayesianOptimizer( + bopt_trap = BayesianOptimizer( acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel, sampling_rate=waveform_sampling, + fom_value = out_field, + fom_error = out_err_field ) bopt_trap.lambda_param = lambda_param bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us") @@ -348,7 +302,7 @@ optimisers = [bopt_cusp, bopt_zac, bopt_trap] - out_param_dict, out_results_list = om.run_optimisation( + out_param_dict, out_results_list = run_bayesian_optimisation( tb_data, dsp_config, [fom], diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 9fc7197..6fc8292 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -12,97 +12,303 @@ os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" +import lgdo import lgdo.lh5 as lh5 import numpy as np -import pygama.pargen.energy_optimisation as om +from bisect import bisect_left from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.utils import get_tcm_pulser_ids +from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys +import pygama.math.histogram as pgh +import pygama.pargen.energy_cal as pgc +from pygama.pargen.dsp_optimize import run_one_dsp warnings.filterwarnings(action="ignore", category=RuntimeWarning) -argparser = argparse.ArgumentParser() -argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) -argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) -argparser.add_argument("--configs", help="configs", type=str, required=True) +def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, + ecal_pars, raw_dict, peak, final_cut_field="is_valid_cal", + energy_param="trapTmax"): + for outname, info in cut_dict.items(): + outcol = dsp_data.eval(info["expression"], info.get("parameters", None)) + dsp_data.add_column(outname, outcol) + + for outname, info in raw_dict.items(): + outcol = raw_data.eval(info["expression"], info.get("parameters", None)) + raw_data.add_column(outname, outcol) -argparser.add_argument("--log", help="log_file", type=str) + final_mask = (dsp_data[energy_param].nda > e_lower_lim) & (dsp_data[energy_param].nda < e_upper_lim)&(dsp_data[final_cut_field].nda) + + wavefrom_windowed = lgdo.WaveformTable( + t0=raw_data["waveform_windowed"]["t0"].nda[final_mask], + t0_units=raw_data["waveform_windowed"]["t0"].attrs["units"], + dt=raw_data["waveform_windowed"]["dt"].nda[final_mask], + dt_units=raw_data["waveform_windowed"]["dt"].attrs["units"], + values=raw_data["waveform_windowed"]["values"].nda[final_mask] +) + wavefrom_presummed = lgdo.WaveformTable( + t0=raw_data["waveform_presummed"]["t0"].nda[final_mask], + t0_units=raw_data["waveform_presummed"]["t0"].attrs["units"], + dt=raw_data["waveform_presummed"]["dt"].nda[final_mask], + dt_units=raw_data["waveform_presummed"]["dt"].attrs["units"], + values=raw_data["waveform_presummed"]["values"].nda[final_mask] +) + + + out_tbl = lgdo.Table(col_dict = {"waveform_presummed": wavefrom_presummed, + "waveform_windowed":wavefrom_windowed, + "presum_rate":lgdo.Array(raw_data["presum_rate"].nda[final_mask]), + "timestamp":lgdo.Array(raw_data["timestamp"].nda[final_mask]), + "baseline":lgdo.Array(raw_data["baseline"].nda[final_mask]), + "daqenergy":lgdo.Array(raw_data["daqenergy"].nda[final_mask]), + "daqenergy_cal":lgdo.Array(raw_data["daqenergy_cal"].nda[final_mask]), + "trapTmax_cal":lgdo.Array(dsp_data["trapTmax"].nda[final_mask]*ecal_pars), + "peak":lgdo.Array(np.full(len(np.where(final_mask)[0]),int(peak))) + }) + return out_tbl, len(np.where(final_mask)[0]) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--peak_file", help="peak_file", type=str, required=True) -args = argparser.parse_args() +if __name__ == "__main__": -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) + argparser = argparse.ArgumentParser() + argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) + argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--raw_cal", help="raw_cal", type=str, required=True) -log = logging.getLogger(__name__) + argparser.add_argument("--log", help="log_file", type=str) -t0 = time.time() + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) -conf = LegendMetadata(path=args.configs) -configs = conf.on(args.timestamp, system=args.datatype) -dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["processing_chain"][ - args.channel -] -peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][args.channel] + argparser.add_argument("--peak_file", help="peak_file", type=str, required=True) + args = argparser.parse_args() -peak_dict = Props.read_from(opt_json) -db_dict = Props.read_from(args.decay_const) + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("h5py").setLevel(logging.INFO) + logging.getLogger("matplotlib").setLevel(logging.INFO) + logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) -if opt_dict.pop("run_selection") is True: - with open(args.raw_filelist) as f: - files = f.read().splitlines() - raw_files = sorted(files) + log = logging.getLogger(__name__) + sto = lh5.LH5Store() + t0 = time.time() - # get pulser mask from tcm files - with open(args.tcm_filelist) as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"] - ) + conf = LegendMetadata(path=args.configs) + configs = conf.on(args.timestamp, system=args.datatype) + dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["processing_chain"][ + args.channel + ] + peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][args.channel] - sto = lh5.LH5Store() - idx_events, idx_list = om.event_selection( - raw_files, - f"{args.channel}/raw", - dsp_config, - db_dict, - peaks_keV, - np.arange(0, len(peaks_keV), 1).tolist(), - kev_widths, - pulser_mask=mask, - cut_parameters=peak_dict["cut_parameters"], - n_events=peak_dict["n_events"], - threshold=peak_dict["threshold"], - wf_field=peak_dict["wf_field"], - ) - - tb_data = sto.read( - f"{args.channel}/raw", - raw_files, - idx=idx_events, - n_rows=opt_dict["n_events"], - )[0] + peak_dict = Props.read_from(peak_json) + db_dict = Props.read_from(args.decay_const) pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) - sto.write( - tb_data, - name="raw", - lh5_file=args.peak_file, - wo_mode="overwrite", - ) -else: - pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) - pathlib.Path(args.peak_file).touch() \ No newline at end of file + if peak_dict.pop("run_selection") is True: + + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.peak_file}.{rand_num}" + + + with open(args.raw_filelist) as f: + files = f.read().splitlines() + raw_files = sorted(files) + + if args.pulser_file: + with open(args.pulser_file) as f: + pulser_dict = json.load(f) + mask = np.array(pulser_dict["mask"]) + + elif args.tcm_filelist: + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"] + ) + else: + raise ValueError("No pulser file or tcm filelist provided") + + raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"] + + peaks_kev = peak_dict["peaks"] + kev_widths = peak_dict["kev_widths"] + cut_parameters = peak_dict["cut_parameters"] + n_events = peak_dict["n_events"] + final_cut_field = peak_dict["final_cut_field"] + energy_parameter = peak_dict.get("energy_parameter", "trapTmax") + + lh5_path = f"{args.channel}/raw" + + + if not isinstance(kev_widths, list): + kev_widths = [kev_widths] + + if lh5_path[-1] != "/": + lh5_path += "/" + + raw_fields = [ + field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path) + ] + + tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy"])[0] + + for outname, info in raw_dict.items(): + outcol = tb.eval(info["expression"], info.get("parameters", None)) + tb.add_column(outname, outcol) + + rough_energy = tb["daqenergy_cal"].nda + + masks = {} + for peak, kev_width in zip(peaks_kev,kev_widths) : + e_mask = (rough_energy > peak - 1.1* kev_width[0]) & (rough_energy < peak + 1.1* kev_width[0]) & (~mask) + masks[peak] = np.where(e_mask)[0] + log.debug(f"{len(masks[peak])} events found in energy range for {peak}") + + input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000)[0] + + if isinstance(dsp_config, str): + dsp_config = Props.read_from(dsp_config) + + dsp_config["outputs"] = get_keys(dsp_config["outputs"], cut_parameters) + [ + energy_parameter + ] + + log.debug("Processing data") + tb_data = run_one_dsp(input_data, dsp_config, db_dict=db_dict) + + if cut_parameters is not None: + cut_dict = generate_cuts(tb_data, cut_parameters) + log.debug(f"Cuts are calculated: {cut_dict}") + else: + cut_dict = None + + pk_dicts = {} + for peak, kev_width in zip(peaks_kev,kev_widths): + pk_dicts[peak] = {"idxs":(masks[peak],), + "n_rows_read":0, + "obj_buf_start":0, + "obj_buf":None, + "kev_width":kev_width + } + + for i,file in enumerate(raw_files): + log.debug(os.path.basename(file)) + for peak, peak_dict in pk_dicts.items(): + if peak_dict["idxs"] is not None: + # idx is a long continuous array + n_rows_i = sto.read_n_rows(lh5_path, file) + # find the length of the subset of idx that contains indices + # that are less than n_rows_i + n_rows_to_read_i = bisect_left(peak_dict["idxs"][0], n_rows_i) + # now split idx into idx_i and the remainder + idx_i = (peak_dict["idxs"][0][:n_rows_to_read_i],) + peak_dict["idxs"] = (peak_dict["idxs"][0][n_rows_to_read_i:] - n_rows_i,) + if len(idx_i[0])>0: + peak_dict["obj_buf"], n_rows_read_i = sto.read( + lh5_path, + file, + start_row=0, + idx=idx_i, + obj_buf=peak_dict["obj_buf"], + obj_buf_start=peak_dict["obj_buf_start"], + ) + + peak_dict["n_rows_read"] += n_rows_read_i + log.debug(f'{peak}: {peak_dict["n_rows_read"]}') + peak_dict["obj_buf_start"] += n_rows_read_i + if peak_dict["n_rows_read"] >=10000 or file ==raw_files[-1]: + if "e_lower_lim" not in peak_dict: + + tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict) + energy = tb_out[energy_parameter].nda + + hist, bins, var = pgh.get_hist( + energy, + range=(np.floor(np.nanmin(energy)), np.ceil(np.nanmax(energy))), + dx=peak / (np.nanpercentile(energy, 50)), + ) + peak_loc = pgh.get_bin_centers(bins)[np.nanargmax(hist)] + + mu, _, _ = pgc.hpge_fit_energy_peak_tops( + hist, + bins, + var, + [peak_loc], + n_to_fit=7, + )[ + 0 + ][0] + + if mu is None or np.isnan(mu): + log.debug("Fit failed, using max guess") + rough_adc_to_kev = peak / peak_loc + e_lower_lim = peak_loc - (1.5 * peak_dict["kev_width"][0]) / rough_adc_to_kev + e_upper_lim = peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev + hist, bins, var = pgh.get_hist( + energy, range=(int(e_lower_lim), int(e_upper_lim)), dx=1 + ) + mu = pgh.get_bin_centers(bins)[np.nanargmax(hist)] + + updated_adc_to_kev = peak / mu + e_lower_lim = mu - (peak_dict["kev_width"][0]) / updated_adc_to_kev + e_upper_lim = mu + (peak_dict["kev_width"][1]) / updated_adc_to_kev + log.info(f"{peak}: lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}") + peak_dict["e_lower_lim"] = e_lower_lim + peak_dict["e_upper_lim"] = e_upper_lim + peak_dict["ecal_par"] = updated_adc_to_kev + + out_tbl, n_wfs = get_out_data(peak_dict["obj_buf"], + tb_out, + cut_dict, + e_lower_lim, + e_upper_lim, + peak_dict["ecal_par"], + raw_dict, + int(peak), + final_cut_field=final_cut_field, + energy_param=energy_parameter + ) + sto.write(out_tbl ,name= lh5_path, + lh5_file=temp_output, + wo_mode="a") + peak_dict["obj_buf"] = None + peak_dict["obj_buf_start"] = 0 + peak_dict["n_events"] = n_wfs + else: + tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict) + out_tbl, n_wfs = get_out_data(peak_dict["obj_buf"], + tb_out, + cut_dict, + peak_dict["e_lower_lim"], + peak_dict["e_upper_lim"], + peak_dict["ecal_par"], + raw_dict, + int(peak), + final_cut_field=final_cut_field, + energy_param=energy_parameter + ) + peak_dict["n_events"] += n_wfs + sto.write(out_tbl ,name= lh5_path, + lh5_file=temp_output, + wo_mode="a") + peak_dict["obj_buf"] = None + peak_dict["obj_buf_start"] = 0 + if peak_dict["n_events"] >= n_events: + peak_dict["idxs"] = None + log.debug(f"{peak} has reached the required number of events") + log.debug(f"{peak}: {peak_dict['idxs']}, {peak_dict['idxs'] is not None}") + + else: + pathlib.Path(temp_output).touch() + + os.rename(temp_output, args.peak_file) \ No newline at end of file diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index bed75bf..d412e92 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -16,7 +16,7 @@ import pygama.pargen.noise_optimization as pno from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.cuts import generate_cuts, get_cut_indexes +from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp sto = lh5.LH5Store() @@ -76,7 +76,7 @@ log.info(f"Select baselines {len(tb_data)}") dsp_data = run_one_dsp(tb_data, dsp_config) - cut_dict = generate_cuts(dsp_data, parameters=opt_dict.pop("cut_pars")) + cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) cut_idxs = get_cut_indexes(dsp_data, cut_dict) tb_data = sto.read( f"{args.channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 04d4cdc..0c150d2 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -14,8 +14,9 @@ import numpy as np from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.extract_tau import dsp_preprocess_decay_const -from pygama.pargen.utils import get_tcm_pulser_ids +from pygama.pargen.extract_tau import ExtractTau +from pygama.pargen.data_cleaning import get_tcm_pulser_ids, get_cut_indexes +from pygama.pargen.dsp_optimize import run_one_dsp argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) @@ -25,6 +26,9 @@ argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument("--plot_path", help="plot path", type=str, required=False) argparser.add_argument("--output_file", help="output file", type=str, required=True) + +argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False) + argparser.add_argument("--raw_files", help="input files", nargs="*", type=str) argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) args = argparser.parse_args() @@ -38,6 +42,7 @@ logging.getLogger("legendmeta").setLevel(logging.INFO) sto = lh5.LH5Store() +log = logging.getLogger(__name__) configs = LegendMetadata(path=args.configs) config_dict = configs.on(args.timestamp, system=args.datatype) @@ -49,6 +54,7 @@ kwarg_dict = Props.read_from(kwarg_dict) if kwarg_dict["run_tau"] is True: + dsp_config = Props.read_from(channel_dict) kwarg_dict.pop("run_tau") if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist": input_file = args.raw_files[0] @@ -81,17 +87,39 @@ n_rows=kwarg_dict.pop("n_events"), )[0] - out_dict, plot_dict = dsp_preprocess_decay_const( - tb_data, channel_dict, **kwarg_dict, display=1 - ) + tb_out = run_one_dsp(tb_data, dsp_config) + log.debug("Processed Data") + cut_parameters = kwarg_dict.get("cut_parameters", None) + if cut_parameters is not None: + idxs = get_cut_indexes(tb_out, cut_parameters=cut_parameters) + log.debug("Applied cuts") + log.debug(f"{len(idxs)} events passed cuts") + else: + idxs = np.full(len(tb_out), True, dtype=bool) + + tau = ExtractTau(dsp_config, kwarg_dict["wf_field"]) + slopes = tb_out["tail_slope"].nda + log.debug("Calculating pz constant") + + tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) if args.plot_path: pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + + plot_dict = tau.plot_waveforms_after_correction(tb_data, "wf_pz", + norm_param=kwarg_dict.get("norm_param", "pz_mean")) + plot_dict.update(tau.plot_slopes(slopes[idxs])) + with open(args.plot_path, "wb") as f: pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) else: out_dict = {} +if args.pulser_file: + pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) + with open(args.pulser_file, "w") as f: + json.dump({"idxs": ids.tolist(), "mask": mask.tolist()} , f, indent=4) + pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) with open(args.output_file, "w") as f: - json.dump(out_dict, f, indent=4) + json.dump(tau.output_dict, f, indent=4) \ No newline at end of file From eb973d69d364db220d4ee6bfaff0707edee8c1d9 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:27:53 +0100 Subject: [PATCH 021/103] first version --- scripts/par_psp.py | 83 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/scripts/par_psp.py b/scripts/par_psp.py index 7ef0fad..400edbc 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -3,10 +3,12 @@ import os import pathlib from legendmeta.catalog import Props +from legendmeta import LegendMetadata from util.FileKey import ChannelProcKey import numpy as np -import matplotlib.pyplot as pyplot +import matplotlib.pyplot as plt import matplotlib as mpl +import matplotlib.dates as mdates from datetime import datetime import pickle as pkl mpl.use("Agg") @@ -19,13 +21,20 @@ argparser.add_argument("--out_plots", help="output plot files", nargs="*", type=str, required=False) argparser.add_argument("--in_obj", help="input object files", nargs="*", type=str, required=False) argparser.add_argument("--out_obj", help="output object files", nargs="*", type=str, required=False) + +argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--configs", help="configs", type=str, required=True) + +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--channel", help="Channel", type=str, required=True) args = argparser.parse_args() conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) -merge_config = configs["snakemake_rules"]["pars_psp"]["inputs"]["config"][ +merge_config = Props.read_from(configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][ args.channel -] +]) ave_fields = merge_config["average_fields"] @@ -44,22 +53,61 @@ for key in keys: val = val[key] vals.append(val) + if "dsp" in in_dicts[tstamp]: + tmp_dict = in_dicts[tstamp]["dsp"] + else: + tmp_dict = {} + in_dicts[tstamp]["dsp"] = tmp_dict + for i,key in enumerate(keys): + if i == len(keys)-1: + tmp_dict[key] = val + else: + if key in tmp_dict: + tmp_dict = tmp_dict[key] + else: + tmp_dict[key] = {} + tmp_dict = tmp_dict[key] + if isinstance(vals[0], str): + if "*" in vals[0]: + unit = vals[0].split("*")[1] + if "." in vals[0]: + rounding = len(val.split("*")[0].split(".")[-1]) + else: + rounding = 16 + vals = np.array([float(val.split("*")[0]) for val in vals]) + else: + unit = None + rounding = 16 + else: + vals=np.array(vals) + unit = None if len(vals[~np.isnan(vals)]) ==0: - mean = np.nan + mean_val = np.nan else: - mean = np.nanmean(vals) + mean_val = np.nanmean(vals) + if unit is not None: + mean = f"{round(mean_val, rounding)}*{unit}" + else: + mean = mean_val for tstamp in in_dicts: val = in_dicts[tstamp] - for key in keys: - val = val[key] - val = mean - + for i, key in enumerate(keys): + if i == len(keys)-1: + val[key]= mean + else: + val = val[key] + fig = plt.figure() plt.scatter([datetime.strptime(tstamp,'%Y%m%dT%H%M%SZ') for tstamp in in_dicts] , vals) - plt.axhline(y=mean, color='r', linestyle='-') + plt.axhline(y=mean_val, color='r', linestyle='-') plt.xlabel("time") - plt.ylabel("value") - plt.title(f"{field} over time") + if unit is not None: + plt.ylabel(f"value {unit}") + else: + plt.ylabel("value") + plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%y')) + plt.gcf().autofmt_xdate() + plt.title(f"{field}") plot_dict[field] = fig plt.close() @@ -78,11 +126,12 @@ with open(infile, "rb") as f: old_plot_dict = pkl.load(f) break - new_plot_dict = old_plot_dict.update({"psp": plot_dict}) + old_plot_dict.update({"psp": plot_dict}) + new_plot_dict = old_plot_dict else: new_plot_dict = {"psp": plot_dict} - with open(file, "w") as f: - pkl.dump(new_plot_dict, file, protocol=pkl.HIGHEST_PROTOCOL) + with open(file, "wb") as f: + pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) if args.out_obj: for file in args.out_obj: @@ -96,5 +145,5 @@ new_obj_dict = old_obj_dict else: new_obj_dict = {} - with open(file, "w") as f: - pkl.dump(new_obj_dict, file, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file + with open(file, "wb") as f: + pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file From b160497382f2813afeccf916cc87e21930e53cd0 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:28:31 +0100 Subject: [PATCH 022/103] fix pht bug, change to os.removes --- scripts/complete_run.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/complete_run.py b/scripts/complete_run.py index b266d50..f5e900d 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -148,7 +148,9 @@ def build_valid_keys(input_files, output_dir): with open(out_file, "w") as w: w.write(out_string) - os.system(f"rm {input_files}") + for input_file in input_files: + if os.path.isfile(input_file): + os.remove(input_file) def build_file_dbs(input_files, output_dir): @@ -188,7 +190,7 @@ def build_file_dbs(input_files, output_dir): ut.tier_hit_path(setup), "" ), "pht": pat.get_pattern_tier(setup, "pht", check_in_cycle=False).replace( - ut.tier_hit_path(setup), "" + ut.tier_pht_path(setup), "" ), "evt": pat.get_pattern_tier(setup, "evt", check_in_cycle=False).replace( ut.tier_evt_path(setup), "" @@ -233,7 +235,7 @@ def build_file_dbs(input_files, output_dir): ut.tier_hit_path(setup), "" ), "pht": pat.get_pattern_tier(setup, "pht", check_in_cycle=False).replace( - ut.tier_hit_path(setup), "" + ut.tier_pht_path(setup), "" ), "evt": pat.get_pattern_tier(setup, "evt", check_in_cycle=False).replace( ut.tier_evt_path(setup), "" @@ -269,8 +271,9 @@ def build_file_dbs(input_files, output_dir): json.dump(file_db_config, w, indent=2) build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path) - os.system(f"rm {os.path.join(snakemake.params.filedb_path, 'file_db_config.json')}") + os.remove(os.path.join(snakemake.params.filedb_path, 'file_db_config.json')) build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path) pathlib.Path(snakemake.output.gen_output).touch() + From 1dea790df9f6d9bea9231f7940e296f86e62dfd8 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:29:24 +0100 Subject: [PATCH 023/103] increase delta --- scripts/check_blinding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 550f5a8..4829608 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -71,7 +71,7 @@ # bin with 1 keV bins and get maxs hist, bins, var = get_hist(daqenergy_cal, np.arange(0, 3000, 1)) -maxs = get_i_local_maxima(hist, delta=5) +maxs = get_i_local_maxima(hist, delta=25) log.info(f"peaks found at : {maxs}") # plot the energy spectrum to check calibration From 8af1000ca7aba4bad84e8ab3bdde8d4314b4a702 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:29:49 +0100 Subject: [PATCH 024/103] first version psp --- rules/psp.smk | 79 ++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/rules/psp.smk b/rules/psp.smk index 811893e..df9b7be 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -7,8 +7,8 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 """ from scripts.util.pars_loading import pars_catalog -import scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_psp_path, set_last_rule_name +from scripts.util.create_pars_keylist import pars_key_resolve +from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -27,20 +27,19 @@ pars_key_resolve.write_par_catalog( {"cal": ["par_psp"], "lar": ["par_psp"]}, ) -part_pht_rules = {} +psp_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): - rule: input: dsp_pars=part.get_par_files( - f"{par_psp_path(setup)}/validity.jsonl", + f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp" ), dsp_objs=part.get_par_files( - f"{par_psp_path(setup)}/validity.jsonl", + f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp", @@ -48,7 +47,7 @@ for key, dataset in part.datasets.items(): extension="pkl", ), dsp_plots=part.get_plt_files( - f"{par_psp_path(setup)}/validity.jsonl", + f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp" @@ -62,26 +61,26 @@ for key, dataset in part.datasets.items(): f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp" ), output: - psp_pars=part.get_par_files( + psp_pars=temp(part.get_par_files( f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp" - ), - psp_objs=part.get_par_files( + )), + psp_objs=temp(part.get_par_files( f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp", name="objects", extension="pkl", - ), - psp_plots=part.get_plt_files( + )), + psp_plots=temp(part.get_plt_files( f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp" - ), + )), log: part.get_log_file( f"{par_psp_path(setup)}/validity.jsonl", @@ -103,44 +102,40 @@ for key, dataset in part.datasets.items(): "--timestamp {params.timestamp} " "--channel {params.channel} " "--in_plots {input.dsp_plots} " - "--out_plots {input.psp_plots} " + "--out_plots {output.psp_plots} " "--in_obj {input.dsp_objs} " - "--out_obj {input.psp_objs} " - "--input {input.plot_files} " - "--output {input.dsp_plots} " + "--out_obj {output.psp_objs} " + "--input {input.dsp_pars} " + "--output {output.psp_pars} " set_last_rule_name( workflow, f"{key}-{partition}-build_par_psp" ) - if key in part_pht_rules: - part_pht_rules[key].append(list(workflow.rules)[-1]) + if key in psp_rules: + psp_rules[key].append(list(workflow.rules)[-1]) else: - part_pht_rules[key] = [list(workflow.rules)[-1]] + psp_rules[key] = [list(workflow.rules)[-1]] # Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs # This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_psp: +rule build_par_psp: input: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), - dsp_objs=temp( - get_pattern_pars_tmp_channel( + dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp"), + dsp_objs=get_pattern_pars_tmp_channel( setup, "dsp", "objects", extension="pkl" - ) - ), - dsp_plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), + ), + dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), - psp_objs=temp( - get_pattern_pars_tmp_channel( + psp_objs=temp(get_pattern_pars_tmp_channel( setup, "psp", "objects", extension="pkl" - ) - ), + )), psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")), log: get_pattern_log_channel(setup, "pars_psp"), @@ -157,12 +152,20 @@ rule build_psp: "--timestamp {params.timestamp} " "--channel {params.channel} " "--in_plots {input.dsp_plots} " - "--out_plots {input.psp_plots} " + "--out_plots {output.psp_plots} " "--in_obj {input.dsp_objs} " - "--out_obj {input.psp_objs} " - "--input {input.plot_files} " - "--output {input.dsp_plots} " - + "--out_obj {output.psp_objs} " + "--input {input.dsp_pars} " + "--output {output.psp_pars} " + +fallback_psp_rule = list(workflow.rules)[-1] +rule_order_list = [] +ordered = OrderedDict(psp_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_psp_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_pars_psp_objects: @@ -187,7 +190,7 @@ rule build_pars_psp_objects: "--input {input} " "--output {output} " -rule build_plts_pht: +rule build_plts_psp: input: lambda wildcards: read_filelist_plts_cal_channel(wildcards, "psp"), output: @@ -200,7 +203,7 @@ rule build_plts_pht: "--input {input} " "--output {output} " -rule build_pars_pht: +rule build_pars_psp: input: infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"), plts = get_pattern_plts(setup, "psp"), From d9f7bd898aa51e0ecfbc43fad51f7bc5c5299f4e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:30:17 +0100 Subject: [PATCH 025/103] new event selection and get pulser mask from tau script --- rules/dsp.smk | 65 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 002496f..349b67b 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -20,6 +20,15 @@ from scripts.util.patterns import ( get_pattern_pars, ) +onstart: + if os.path.isfile(os.path.join(pars_path(setup), "dsp", "validity.jsonl")): + os.remove(os.path.join(pars_path(setup), "dsp", "validity.jsonl")) + ds.pars_key_resolve.write_par_catalog( + ["-*-*-*-cal"], + os.path.join(pars_path(setup), "dsp", "validity.jsonl"), + get_pattern_tier_raw(setup), + {"cal": ["par_dsp"], "lar": ["par_dsp"]}, + ) rule build_pars_dsp_tau: input: @@ -34,6 +43,7 @@ rule build_pars_dsp_tau: output: decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")), plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")), + pulser=temp(get_pattern_pars_tmp_channel(setup, "dsp", "pulser_ids")), log: get_pattern_log_channel(setup, "par_dsp_decay_constant"), group: @@ -50,9 +60,44 @@ rule build_pars_dsp_tau: "--channel {params.channel} " "--plot_path {output.plots} " "--output_file {output.decay_const} " + "--pulser_file {output.pulser} " "--tcm_files {input.tcm_files} " "--raw_files {input.files}" +rule build_pars_event_selection: + input: + files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" + ), + pulser_file=get_pattern_pars_tmp_channel(setup, "dsp", "pulser_ids"), + database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), + raw_cal=get_blinding_curve_file, + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")), + log: + get_pattern_log_channel(setup, "par_dsp_event_selection"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/pars_dsp_event_selection.py')} " + "--configs {configs} " + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--peak_file {output.peak_file} " + "--pulser_file {input.pulser_file} " + "--decay_const {input.database} " + "--raw_cal {input.raw_cal} " + "--raw_filelist {input.files}" + # This rule builds the optimal energy filter parameters for the dsp using fft files rule build_pars_dsp_nopt: @@ -98,12 +143,7 @@ rule build_pars_dsp_dplms: fft_files=os.path.join( filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" ), - cal_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), - tcm_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" - ), + peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"), inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"), params: @@ -124,8 +164,7 @@ rule build_pars_dsp_dplms: "{swenv} python3 -B " f"{workflow.source_path('../scripts/pars_dsp_dplms.py')} " "--fft_raw_filelist {input.fft_files} " - "--cal_raw_filelist {input.cal_files} " - "--tcm_filelist {input.tcm_files} " + "--peak_file {input.peak_file} " "--database {input.database} " "--inplots {input.inplots} " "--configs {configs} " @@ -140,12 +179,7 @@ rule build_pars_dsp_dplms: # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files rule build_pars_dsp_eopt: input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), - tcm_filelist=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" - ), + peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"), inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"), params: @@ -172,8 +206,7 @@ rule build_pars_dsp_eopt: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--raw_filelist {input.files} " - "--tcm_filelist {input.tcm_filelist} " + "--peak_file {input.peak_file} " "--inplots {input.inplots} " "--decay_const {input.decay_const} " "--plot_path {output.plots} " From 11b4dddbcf0fcf24fae29d0fe667fcaee552f265 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:30:58 +0100 Subject: [PATCH 026/103] include psp, move validity generation to rules, rms -> os.removes --- Snakefile | 63 ++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/Snakefile b/Snakefile index 279f320..e71e7ad 100644 --- a/Snakefile +++ b/Snakefile @@ -10,7 +10,7 @@ This includes: - the same for partition level tiers """ -import pathlib, os, json, sys +import pathlib, os, json, sys, glob import scripts.util as ds from scripts.util.pars_loading import pars_catalog from scripts.util.patterns import get_pattern_tier_raw @@ -21,6 +21,7 @@ from scripts.util.utils import ( chan_map_path, filelist_path, metadata_path, + tmp_log_path, ) from datetime import datetime from collections import OrderedDict @@ -55,6 +56,7 @@ include: "rules/tcm.smk" include: "rules/dsp.smk" include: "rules/hit.smk" include: "rules/pht.smk" +include: "rules/psp.smk" include: "rules/evt.smk" include: "rules/skm.smk" include: "rules/blinding_calibration.smk" @@ -66,39 +68,10 @@ localrules: onstart: print("Starting workflow") - shell(f"rm {pars_path(setup)}/dsp/validity.jsonl || true") - shell(f"rm {pars_path(setup)}/hit/validity.jsonl || true") - shell(f"rm {pars_path(setup)}/pht/validity.jsonl || true") - shell(f"rm {pars_path(setup)}/raw/validity.jsonl || true") - ds.pars_key_resolve.write_par_catalog( - ["-*-*-*-cal"], - os.path.join(pars_path(setup), "raw", "validity.jsonl"), - get_pattern_tier_raw(setup), - {"cal": ["par_raw"]}, - ) - ds.pars_key_resolve.write_par_catalog( - ["-*-*-*-cal"], - os.path.join(pars_path(setup), "dsp", "validity.jsonl"), - get_pattern_tier_raw(setup), - {"cal": ["par_dsp"], "lar": ["par_dsp"]}, - ) - ds.pars_key_resolve.write_par_catalog( - ["-*-*-*-cal"], - os.path.join(pars_path(setup), "hit", "validity.jsonl"), - get_pattern_tier_raw(setup), - {"cal": ["par_hit"], "lar": ["par_hit"]}, - ) - ds.pars_key_resolve.write_par_catalog( - ["-*-*-*-cal"], - os.path.join(pars_path(setup), "pht", "validity.jsonl"), - get_pattern_tier_raw(setup), - {"cal": ["par_pht"], "lar": ["par_pht"]}, - ) - onsuccess: from snakemake.report import auto_report - + rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True) # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html") @@ -109,8 +82,32 @@ onsuccess: f.writelines(str(workflow.persistence.dag.rule_dot())) # shell(f"cat {rep_dir}/rg.txt | dot -Tpdf > {rep_dir}/rg.pdf") print("Workflow finished, no error") - shell("rm *.gen || true") - shell(f"rm {filelist_path(setup)}/* || true") + + # remove .gen files + files = glob.glob("*.gen") + for file in files: + if os.path.isfile(file): + os.remove(file) + + # remove filelists + files = glob.glob(os.path.join(filelist_path(setup), "*")) + for file in files: + if os.path.isfile(file): + os.remove(file) + if os.path.exists(filelist_path(setup)): + os.rmdir(filelist_path(setup)) + + # remove logs + files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) + for file in files: + if os.path.isfile(file): + os.remove(file) + dirs = glob.glob(os.path.join(tmp_log_path(setup), "*")) + for d in dirs: + if os.path.isdir(d): + os.rmdir(d) + if os.path.exists(tmp_log_path(setup)): + os.rmdir(tmp_log_path(setup)) # Placeholder, can email or maybe put message in slack From c17107e99a2a71589532813f9f1544478cd0b1ee Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 21 Mar 2024 00:41:42 +0100 Subject: [PATCH 027/103] pre-commit fixes --- Snakefile | 13 +- rules/dsp.smk | 40 +++-- rules/psp.smk | 90 ++++++------ scripts/complete_run.py | 3 +- scripts/merge_channels.py | 53 ++++--- scripts/par_psp.py | 69 +++++---- scripts/pars_dsp_dplms.py | 42 +++--- scripts/pars_dsp_eopt.py | 35 ++--- scripts/pars_dsp_event_selection.py | 219 +++++++++++++++------------- scripts/pars_dsp_tau.py | 17 ++- scripts/util/patterns.py | 6 +- scripts/util/utils.py | 10 +- 12 files changed, 322 insertions(+), 275 deletions(-) diff --git a/Snakefile b/Snakefile index e71e7ad..c5149e8 100644 --- a/Snakefile +++ b/Snakefile @@ -41,6 +41,7 @@ swenv = runcmd(setup) part = ds.dataset_file(setup, os.path.join(configs, "partitions.json")) basedir = workflow.basedir + wildcard_constraints: experiment="\w+", period="\w+", @@ -61,6 +62,7 @@ include: "rules/evt.smk" include: "rules/skm.smk" include: "rules/blinding_calibration.smk" + localrules: gen_filelist, autogen_output, @@ -69,9 +71,10 @@ localrules: onstart: print("Starting workflow") + onsuccess: from snakemake.report import auto_report - + rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True) # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html") @@ -82,22 +85,22 @@ onsuccess: f.writelines(str(workflow.persistence.dag.rule_dot())) # shell(f"cat {rep_dir}/rg.txt | dot -Tpdf > {rep_dir}/rg.pdf") print("Workflow finished, no error") - + # remove .gen files files = glob.glob("*.gen") for file in files: if os.path.isfile(file): os.remove(file) - # remove filelists + # remove filelists files = glob.glob(os.path.join(filelist_path(setup), "*")) for file in files: if os.path.isfile(file): os.remove(file) if os.path.exists(filelist_path(setup)): os.rmdir(filelist_path(setup)) - - # remove logs + + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): diff --git a/rules/dsp.smk b/rules/dsp.smk index 349b67b..5c27f42 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -20,6 +20,7 @@ from scripts.util.patterns import ( get_pattern_pars, ) + onstart: if os.path.isfile(os.path.join(pars_path(setup), "dsp", "validity.jsonl")): os.remove(os.path.join(pars_path(setup), "dsp", "validity.jsonl")) @@ -30,6 +31,7 @@ onstart: {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) + rule build_pars_dsp_tau: input: files=os.path.join( @@ -64,6 +66,7 @@ rule build_pars_dsp_tau: "--tcm_files {input.tcm_files} " "--raw_files {input.files}" + rule build_pars_event_selection: input: files=os.path.join( @@ -151,8 +154,10 @@ rule build_pars_dsp_dplms: datatype="cal", channel="{channel}", output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp",'dplms')), - lh5_path=temp(get_pattern_pars_tmp_channel(setup, "dsp","dplms",extension="lh5")), + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")), + lh5_path=temp( + get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5") + ), plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), log: get_pattern_log_channel(setup, "pars_dsp_dplms"), @@ -176,6 +181,7 @@ rule build_pars_dsp_dplms: "--lh5_path {output.lh5_path} " "--plot_path {output.plots} " + # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files rule build_pars_dsp_eopt: input: @@ -213,6 +219,7 @@ rule build_pars_dsp_eopt: "--qbb_grid_path {output.qbb_grid} " "--final_dsp_pars {output.dsp_pars}" + rule build_plts_dsp: input: lambda wildcards: read_filelist_plts_cal_channel(wildcards, "dsp"), @@ -226,6 +233,7 @@ rule build_plts_dsp: "--input {input} " "--output {output} " + rule build_pars_dsp_objects: input: lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_objects_pkl"), @@ -245,15 +253,18 @@ rule build_pars_dsp_objects: "--input {input} " "--output {output} " + rule build_pars_dsp_db: input: lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"), output: - temp(get_pattern_pars_tmp( - setup, - "dsp", - datatype="cal", - )), + temp( + get_pattern_pars_tmp( + setup, + "dsp", + datatype="cal", + ) + ), group: "merge-dsp" shell: @@ -262,16 +273,19 @@ rule build_pars_dsp_db: "--input {input} " "--output {output} " + rule build_pars_dsp: input: - in_files = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_dplms_lh5"), - in_db = get_pattern_pars_tmp( + in_files=lambda wildcards: read_filelist_pars_cal_channel( + wildcards, "dsp_dplms_lh5" + ), + in_db=get_pattern_pars_tmp( setup, "dsp", datatype="cal", ), - plts = get_pattern_plts(setup, "dsp"), - objects = get_pattern_pars( + plts=get_pattern_plts(setup, "dsp"), + objects=get_pattern_pars( setup, "dsp", name="objects", @@ -279,13 +293,13 @@ rule build_pars_dsp: check_in_cycle=check_in_cycle, ), output: - out_file = get_pattern_pars( + out_file=get_pattern_pars( setup, "dsp", extension="lh5", check_in_cycle=check_in_cycle, ), - out_db = get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), + out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), group: "merge-dsp" shell: diff --git a/rules/psp.smk b/rules/psp.smk index df9b7be..a957e4b 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -30,13 +30,11 @@ pars_key_resolve.write_par_catalog( psp_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): + rule: input: dsp_pars=part.get_par_files( - f"{par_dsp_path(setup)}/validity.jsonl", - partition, - key, - tier="dsp" + f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp" ), dsp_objs=part.get_par_files( f"{par_dsp_path(setup)}/validity.jsonl", @@ -47,10 +45,7 @@ for key, dataset in part.datasets.items(): extension="pkl", ), dsp_plots=part.get_plt_files( - f"{par_dsp_path(setup)}/validity.jsonl", - partition, - key, - tier="dsp" + f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp" ), wildcard_constraints: channel=part.get_wildcard_constraints(partition, key), @@ -61,26 +56,32 @@ for key, dataset in part.datasets.items(): f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp" ), output: - psp_pars=temp(part.get_par_files( - f"{par_psp_path(setup)}/validity.jsonl", - partition, - key, - tier="psp" - )), - psp_objs=temp(part.get_par_files( - f"{par_psp_path(setup)}/validity.jsonl", - partition, - key, - tier="psp", - name="objects", - extension="pkl", - )), - psp_plots=temp(part.get_plt_files( - f"{par_psp_path(setup)}/validity.jsonl", - partition, - key, - tier="psp" - )), + psp_pars=temp( + part.get_par_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="psp", + ) + ), + psp_objs=temp( + part.get_par_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="psp", + name="objects", + extension="pkl", + ) + ), + psp_plots=temp( + part.get_plt_files( + f"{par_psp_path(setup)}/validity.jsonl", + partition, + key, + tier="psp", + ) + ), log: part.get_log_file( f"{par_psp_path(setup)}/validity.jsonl", @@ -108,9 +109,7 @@ for key, dataset in part.datasets.items(): "--input {input.dsp_pars} " "--output {output.psp_pars} " - set_last_rule_name( - workflow, f"{key}-{partition}-build_par_psp" - ) + set_last_rule_name(workflow, f"{key}-{partition}-build_par_psp") if key in psp_rules: psp_rules[key].append(list(workflow.rules)[-1]) @@ -123,9 +122,7 @@ for key, dataset in part.datasets.items(): rule build_par_psp: input: dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp"), - dsp_objs=get_pattern_pars_tmp_channel( - setup, "dsp", "objects", extension="pkl" - ), + dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"), dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"), params: datatype="cal", @@ -133,9 +130,9 @@ rule build_par_psp: timestamp="{timestamp}", output: psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), - psp_objs=temp(get_pattern_pars_tmp_channel( - setup, "psp", "objects", extension="pkl" - )), + psp_objs=temp( + get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl") + ), psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")), log: get_pattern_log_channel(setup, "pars_psp"), @@ -158,6 +155,7 @@ rule build_par_psp: "--input {input.dsp_pars} " "--output {output.psp_pars} " + fallback_psp_rule = list(workflow.rules)[-1] rule_order_list = [] ordered = OrderedDict(psp_rules) @@ -165,7 +163,7 @@ ordered.move_to_end("default") for key, items in ordered.items(): rule_order_list += [item.name for item in items] rule_order_list.append(fallback_psp_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] +workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_pars_psp_objects: @@ -181,7 +179,7 @@ rule build_pars_psp_objects: name="objects", extension="dir", check_in_cycle=check_in_cycle, - ) + ), group: "merge-hit" shell: @@ -190,11 +188,12 @@ rule build_pars_psp_objects: "--input {input} " "--output {output} " + rule build_plts_psp: input: lambda wildcards: read_filelist_plts_cal_channel(wildcards, "psp"), output: - get_pattern_plts(setup, "psp") + get_pattern_plts(setup, "psp"), group: "merge-hit" shell: @@ -203,17 +202,18 @@ rule build_plts_psp: "--input {input} " "--output {output} " + rule build_pars_psp: input: - infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"), - plts = get_pattern_plts(setup, "psp"), - objects = get_pattern_pars( + infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"), + plts=get_pattern_plts(setup, "psp"), + objects=get_pattern_pars( setup, "psp", name="objects", extension="dir", check_in_cycle=check_in_cycle, - ) + ), output: get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle), group: @@ -256,4 +256,4 @@ rule build_psp: "--input {input.raw_file} " "--output {output.tier_file} " "--db_file {output.db_file} " - "--pars_file {input.pars_file}" \ No newline at end of file + "--pars_file {input.pars_file}" diff --git a/scripts/complete_run.py b/scripts/complete_run.py index f5e900d..5829f1a 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -271,9 +271,8 @@ def build_file_dbs(input_files, output_dir): json.dump(file_db_config, w, indent=2) build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path) - os.remove(os.path.join(snakemake.params.filedb_path, 'file_db_config.json')) + os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path) pathlib.Path(snakemake.output.gen_output).touch() - diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index b45d16e..bc8337c 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -4,13 +4,12 @@ import pathlib import pickle as pkl import shelve + +import lgdo.lh5 as lh5 +import numpy as np from legendmeta.catalog import Props from util.FileKey import ChannelProcKey -import numpy as np - -import lgdo.lh5 as lh5 -from lgdo import Array def replace_path(d, old_path, new_path): if isinstance(d, dict): @@ -19,23 +18,32 @@ def replace_path(d, old_path, new_path): elif isinstance(d, list): for i in range(len(d)): d[i] = replace_path(d[i], old_path, new_path) - elif isinstance(d, str): - if old_path in d: - d = d.replace(old_path, new_path) + elif isinstance(d, str) and old_path in d: + d = d.replace(old_path, new_path) return d + argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", nargs="*", type=str, required=True) argparser.add_argument("--output", help="output file", type=str, required=True) -argparser.add_argument("--in_db", help="in db file (used for when lh5 files refered to in db)", type=str, required=False) -argparser.add_argument("--out_db", help="lh5 file (used for when lh5 files refered to in db)", type=str, required=False) +argparser.add_argument( + "--in_db", + help="in db file (used for when lh5 files referred to in db)", + type=str, + required=False, +) +argparser.add_argument( + "--out_db", + help="lh5 file (used for when lh5 files referred to in db)", + type=str, + required=False, +) args = argparser.parse_args() -# change to only have 1 output file for mutliple inputs +# change to only have 1 output file for multiple inputs # don't care about processing step, check if extension matches - channel_files = args.input file_extension = pathlib.Path(args.output).suffix @@ -51,7 +59,7 @@ def replace_path(d, old_path, new_path): pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) - + if file_extension == ".json": out_dict = {} for channel in channel_files: @@ -62,7 +70,8 @@ def replace_path(d, old_path, new_path): channel_name = fkey.channel out_dict[channel_name] = channel_dict else: - raise RuntimeError("Output file extension does not match input file extension") + msg = "Output file extension does not match input file extension" + raise RuntimeError(msg) with open(temp_output, "w") as w: json.dump(out_dict, w, indent=4) @@ -77,7 +86,7 @@ def replace_path(d, old_path, new_path): fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) channel_name = fkey.channel out_dict[channel_name] = channel_dict - + with open(temp_output, "wb") as w: pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) @@ -97,7 +106,7 @@ def replace_path(d, old_path, new_path): shelf[channel_name] = channel_dict if len(common_dict) > 0: shelf["common"] = common_dict - + elif file_extension == ".lh5": sto = lh5.LH5Store() @@ -109,23 +118,21 @@ def replace_path(d, old_path, new_path): fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) channel_name = fkey.channel - tb_in = sto.read( - f"{channel_name}", - channel - )[0] + tb_in = sto.read(f"{channel_name}", channel)[0] sto.write( tb_in, - name = channel_name, - lh5_file = temp_output, + name=channel_name, + lh5_file=temp_output, wo_mode="a", ) if args.in_db: db_dict[channel_name] = replace_path(db_dict[channel_name], channel, args.output) else: - raise RuntimeError("Output file extension does not match input file extension") + msg = "Output file extension does not match input file extension" + raise RuntimeError(msg) if args.out_db: with open(args.out_db, "w") as w: json.dump(db_dict, w, indent=4) - os.rename(temp_output, out_file) \ No newline at end of file + os.rename(temp_output, out_file) diff --git a/scripts/par_psp.py b/scripts/par_psp.py index 400edbc..3b07edf 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -1,16 +1,17 @@ import argparse import json import os -import pathlib -from legendmeta.catalog import Props +import pickle as pkl +from datetime import datetime + +import matplotlib as mpl +import matplotlib.dates as mdates +import matplotlib.pyplot as plt +import numpy as np from legendmeta import LegendMetadata +from legendmeta.catalog import Props from util.FileKey import ChannelProcKey -import numpy as np -import matplotlib.pyplot as plt -import matplotlib as mpl -import matplotlib.dates as mdates -from datetime import datetime -import pickle as pkl + mpl.use("Agg") @@ -18,9 +19,13 @@ argparser.add_argument("--input", help="input files", nargs="*", type=str, required=True) argparser.add_argument("--output", help="output file", nargs="*", type=str, required=True) argparser.add_argument("--in_plots", help="input plot files", nargs="*", type=str, required=False) -argparser.add_argument("--out_plots", help="output plot files", nargs="*", type=str, required=False) +argparser.add_argument( + "--out_plots", help="output plot files", nargs="*", type=str, required=False +) argparser.add_argument("--in_obj", help="input object files", nargs="*", type=str, required=False) -argparser.add_argument("--out_obj", help="output object files", nargs="*", type=str, required=False) +argparser.add_argument( + "--out_obj", help="output object files", nargs="*", type=str, required=False +) argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--configs", help="configs", type=str, required=True) @@ -32,9 +37,9 @@ conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) -merge_config = Props.read_from(configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][ - args.channel -]) +merge_config = Props.read_from( + configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel] +) ave_fields = merge_config["average_fields"] @@ -58,8 +63,8 @@ else: tmp_dict = {} in_dicts[tstamp]["dsp"] = tmp_dict - for i,key in enumerate(keys): - if i == len(keys)-1: + for i, key in enumerate(keys): + if i == len(keys) - 1: tmp_dict[key] = val else: if key in tmp_dict: @@ -70,42 +75,36 @@ if isinstance(vals[0], str): if "*" in vals[0]: unit = vals[0].split("*")[1] - if "." in vals[0]: - rounding = len(val.split("*")[0].split(".")[-1]) - else: - rounding = 16 + rounding = len(val.split("*")[0].split(".")[-1]) if "." in vals[0] else 16 vals = np.array([float(val.split("*")[0]) for val in vals]) else: unit = None rounding = 16 else: - vals=np.array(vals) + vals = np.array(vals) unit = None - if len(vals[~np.isnan(vals)]) ==0: - mean_val = np.nan - else: - mean_val = np.nanmean(vals) - if unit is not None: - mean = f"{round(mean_val, rounding)}*{unit}" - else: - mean = mean_val + rounding = 16 + + mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmean(vals) + mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val + for tstamp in in_dicts: val = in_dicts[tstamp] for i, key in enumerate(keys): - if i == len(keys)-1: - val[key]= mean + if i == len(keys) - 1: + val[key] = mean else: val = val[key] - + fig = plt.figure() - plt.scatter([datetime.strptime(tstamp,'%Y%m%dT%H%M%SZ') for tstamp in in_dicts] , vals) - plt.axhline(y=mean_val, color='r', linestyle='-') + plt.scatter([datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals) + plt.axhline(y=mean_val, color="r", linestyle="-") plt.xlabel("time") if unit is not None: plt.ylabel(f"value {unit}") else: plt.ylabel("value") - plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%y')) + plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) plt.gcf().autofmt_xdate() plt.title(f"{field}") plot_dict[field] = fig @@ -146,4 +145,4 @@ else: new_obj_dict = {} with open(file, "wb") as f: - pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file + pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 67b8bdd..60143e7 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -15,8 +15,8 @@ import numpy as np from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.dplms_ge_dict import dplms_ge_dict from lgdo import Array, Table +from pygama.pargen.dplms_ge_dict import dplms_ge_dict argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) @@ -54,7 +54,7 @@ dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel] -dplms_dict = Props.read_from(dplms_json) +dplms_dict = Props.read_from(dplms_json) db_dict = Props.read_from(args.database) @@ -75,18 +75,14 @@ log.info("\nRunning event selection") peaks_kev = np.array(dplms_dict["peaks_kev"]) kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] - + peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file , field_mask=["peak"]) [0]["peak"].nda + peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.in1d(peaks, peaks_rounded) peaks = peaks[ids] - idx_list = [np.where(peaks==peak)[0] for peak in peaks_rounded] + idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - raw_cal = sto.read( - f"{args.channel}/raw", - args.peak_file, - idx=ids - )[0] + raw_cal = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") if isinstance(dsp_config, (str, list)): @@ -104,8 +100,8 @@ if args.inplots: with open(args.inplots, "rb") as r: inplot_dict = pkl.load(r) - inplot_dict.update({"dplms":plot_dict}) - + inplot_dict.update({"dplms": plot_dict}) + else: out_dict = dplms_ge_dict( raw_fft, @@ -115,28 +111,30 @@ dplms_dict, ) - coeffs = out_dict["dplms"].pop("coefficients") - dplms_pars = Table(col_dict={"coefficients":Array(coeffs)}) - out_dict["dplms"]["coefficients"] =f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')" + coeffs = out_dict["dplms"].pop("coefficients") + dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) + out_dict["dplms"][ + "coefficients" + ] = f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')" log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") else: out_dict = {} - dplms_pars = Table(col_dict={"coefficients":Array([])}) + dplms_pars = Table(col_dict={"coefficients": Array([])}) if args.inplots: with open(args.inplots, "rb") as r: inplot_dict = pkl.load(r) else: - inplot_dict={} + inplot_dict = {} db_dict.update(out_dict) -pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) +pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) sto.write( - Table(col_dict={"dplms":dplms_pars}), - name = args.channel, + Table(col_dict={"dplms": dplms_pars}), + name=args.channel, lh5_file=args.lh5_path, - wo_mode="overwrite" + wo_mode="overwrite", ) pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) @@ -146,4 +144,4 @@ if args.plot_path: pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) with open(args.plot_path, "wb") as f: - pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file + pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index b176c65..4b37737 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -14,13 +14,16 @@ import lgdo.lh5 as lh5 import numpy as np -from pygama.math.distributions import hpge_peak -import pygama.pargen.energy_optimisation as om import sklearn.gaussian_process.kernels as ker from dspeed.units import unit_registry as ureg from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.dsp_optimize import run_one_dsp, BayesianOptimizer, run_bayesian_optimisation +from pygama.math.distributions import hpge_peak +from pygama.pargen.dsp_optimize import ( + BayesianOptimizer, + run_bayesian_optimisation, + run_one_dsp, +) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -71,8 +74,6 @@ db_dict = Props.read_from(args.decay_const) if opt_dict.pop("run_eopt") is True: - - peaks_kev = np.array(opt_dict["peaks"]) kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]] @@ -109,16 +110,12 @@ ) peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw",args.peak_file , field_mask=["peak"]) [0]["peak"].nda + peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.in1d(peaks, peaks_rounded) peaks = peaks[ids] - idx_list = [np.where(peaks==peak)[0] for peak in peaks_rounded] + idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - tb_data = sto.read( - f"{args.channel}/raw", - args.peak_file, - idx=ids - )[0] + tb_data = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] t1 = time.time() log.info(f"Data Loaded in {(t1-t0)/60} minutes") @@ -256,8 +253,8 @@ batch_size=opt_dict["batch_size"], kernel=kernel, sampling_rate=waveform_sampling, - fom_value = out_field, - fom_error = out_err_field + fom_value=out_field, + fom_error=out_err_field, ) bopt_cusp.lambda_param = lambda_param bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us") @@ -267,8 +264,8 @@ batch_size=opt_dict["batch_size"], kernel=kernel, sampling_rate=waveform_sampling, - fom_value = out_field, - fom_error = out_err_field + fom_value=out_field, + fom_error=out_err_field, ) bopt_zac.lambda_param = lambda_param bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us") @@ -278,8 +275,8 @@ batch_size=opt_dict["batch_size"], kernel=kernel, sampling_rate=waveform_sampling, - fom_value = out_field, - fom_error = out_err_field + fom_value=out_field, + fom_error=out_err_field, ) bopt_trap.lambda_param = lambda_param bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us") @@ -350,7 +347,7 @@ "expression": "trapEftp*(1+dt_eff*a)", "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, } - if "ctc_params" in db_dict: + if "ctc_params" in db_dict: db_dict["ctc_params"].update(out_alpha_dict) else: db_dict.update({"ctc_params": out_alpha_dict}) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 6fc8292..3802056 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -3,7 +3,6 @@ import logging import os import pathlib -import pickle as pkl import time import warnings @@ -12,63 +11,79 @@ os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" +from bisect import bisect_left + import lgdo import lgdo.lh5 as lh5 import numpy as np -from bisect import bisect_left -from legendmeta import LegendMetadata -from legendmeta.catalog import Props -from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys import pygama.math.histogram as pgh import pygama.pargen.energy_cal as pgc +from legendmeta import LegendMetadata +from legendmeta.catalog import Props +from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp warnings.filterwarnings(action="ignore", category=RuntimeWarning) -def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, - ecal_pars, raw_dict, peak, final_cut_field="is_valid_cal", - energy_param="trapTmax"): + +def get_out_data( + raw_data, + dsp_data, + cut_dict, + e_lower_lim, + e_upper_lim, + ecal_pars, + raw_dict, + peak, + final_cut_field="is_valid_cal", + energy_param="trapTmax", +): for outname, info in cut_dict.items(): outcol = dsp_data.eval(info["expression"], info.get("parameters", None)) dsp_data.add_column(outname, outcol) - + for outname, info in raw_dict.items(): outcol = raw_data.eval(info["expression"], info.get("parameters", None)) raw_data.add_column(outname, outcol) - final_mask = (dsp_data[energy_param].nda > e_lower_lim) & (dsp_data[energy_param].nda < e_upper_lim)&(dsp_data[final_cut_field].nda) - + final_mask = ( + (dsp_data[energy_param].nda > e_lower_lim) + & (dsp_data[energy_param].nda < e_upper_lim) + & (dsp_data[final_cut_field].nda) + ) + wavefrom_windowed = lgdo.WaveformTable( - t0=raw_data["waveform_windowed"]["t0"].nda[final_mask], - t0_units=raw_data["waveform_windowed"]["t0"].attrs["units"], - dt=raw_data["waveform_windowed"]["dt"].nda[final_mask], - dt_units=raw_data["waveform_windowed"]["dt"].attrs["units"], - values=raw_data["waveform_windowed"]["values"].nda[final_mask] -) + t0=raw_data["waveform_windowed"]["t0"].nda[final_mask], + t0_units=raw_data["waveform_windowed"]["t0"].attrs["units"], + dt=raw_data["waveform_windowed"]["dt"].nda[final_mask], + dt_units=raw_data["waveform_windowed"]["dt"].attrs["units"], + values=raw_data["waveform_windowed"]["values"].nda[final_mask], + ) wavefrom_presummed = lgdo.WaveformTable( - t0=raw_data["waveform_presummed"]["t0"].nda[final_mask], - t0_units=raw_data["waveform_presummed"]["t0"].attrs["units"], - dt=raw_data["waveform_presummed"]["dt"].nda[final_mask], - dt_units=raw_data["waveform_presummed"]["dt"].attrs["units"], - values=raw_data["waveform_presummed"]["values"].nda[final_mask] -) - - - out_tbl = lgdo.Table(col_dict = {"waveform_presummed": wavefrom_presummed, - "waveform_windowed":wavefrom_windowed, - "presum_rate":lgdo.Array(raw_data["presum_rate"].nda[final_mask]), - "timestamp":lgdo.Array(raw_data["timestamp"].nda[final_mask]), - "baseline":lgdo.Array(raw_data["baseline"].nda[final_mask]), - "daqenergy":lgdo.Array(raw_data["daqenergy"].nda[final_mask]), - "daqenergy_cal":lgdo.Array(raw_data["daqenergy_cal"].nda[final_mask]), - "trapTmax_cal":lgdo.Array(dsp_data["trapTmax"].nda[final_mask]*ecal_pars), - "peak":lgdo.Array(np.full(len(np.where(final_mask)[0]),int(peak))) - }) + t0=raw_data["waveform_presummed"]["t0"].nda[final_mask], + t0_units=raw_data["waveform_presummed"]["t0"].attrs["units"], + dt=raw_data["waveform_presummed"]["dt"].nda[final_mask], + dt_units=raw_data["waveform_presummed"]["dt"].attrs["units"], + values=raw_data["waveform_presummed"]["values"].nda[final_mask], + ) + + out_tbl = lgdo.Table( + col_dict={ + "waveform_presummed": wavefrom_presummed, + "waveform_windowed": wavefrom_windowed, + "presum_rate": lgdo.Array(raw_data["presum_rate"].nda[final_mask]), + "timestamp": lgdo.Array(raw_data["timestamp"].nda[final_mask]), + "baseline": lgdo.Array(raw_data["baseline"].nda[final_mask]), + "daqenergy": lgdo.Array(raw_data["daqenergy"].nda[final_mask]), + "daqenergy_cal": lgdo.Array(raw_data["daqenergy_cal"].nda[final_mask]), + "trapTmax_cal": lgdo.Array(dsp_data["trapTmax"].nda[final_mask] * ecal_pars), + "peak": lgdo.Array(np.full(len(np.where(final_mask)[0]), int(peak))), + } + ) return out_tbl, len(np.where(final_mask)[0]) if __name__ == "__main__": - argparser = argparse.ArgumentParser() argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) @@ -95,29 +110,28 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) - log = logging.getLogger(__name__) sto = lh5.LH5Store() t0 = time.time() conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) - dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["processing_chain"][ + dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][ + "processing_chain" + ][args.channel] + peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][ args.channel ] - peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][args.channel] peak_dict = Props.read_from(peak_json) db_dict = Props.read_from(args.decay_const) pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) if peak_dict.pop("run_selection") is True: - rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.peak_file}.{rand_num}" - with open(args.raw_filelist) as f: files = f.read().splitlines() raw_files = sorted(files) @@ -136,7 +150,8 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"] ) else: - raise ValueError("No pulser file or tcm filelist provided") + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"] @@ -149,16 +164,13 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, lh5_path = f"{args.channel}/raw" - if not isinstance(kev_widths, list): kev_widths = [kev_widths] if lh5_path[-1] != "/": lh5_path += "/" - raw_fields = [ - field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path) - ] + raw_fields = [field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)] tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy"])[0] @@ -169,18 +181,23 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, rough_energy = tb["daqenergy_cal"].nda masks = {} - for peak, kev_width in zip(peaks_kev,kev_widths) : - e_mask = (rough_energy > peak - 1.1* kev_width[0]) & (rough_energy < peak + 1.1* kev_width[0]) & (~mask) + for peak, kev_width in zip(peaks_kev, kev_widths): + e_mask = ( + (rough_energy > peak - 1.1 * kev_width[0]) + & (rough_energy < peak + 1.1 * kev_width[0]) + & (~mask) + ) masks[peak] = np.where(e_mask)[0] log.debug(f"{len(masks[peak])} events found in energy range for {peak}") - input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000)[0] + input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000)[0] if isinstance(dsp_config, str): dsp_config = Props.read_from(dsp_config) - dsp_config["outputs"] = get_keys(dsp_config["outputs"], cut_parameters) + [ - energy_parameter + dsp_config["outputs"] = [ + *get_keys(dsp_config["outputs"], cut_parameters), + energy_parameter, ] log.debug("Processing data") @@ -193,15 +210,16 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, cut_dict = None pk_dicts = {} - for peak, kev_width in zip(peaks_kev,kev_widths): - pk_dicts[peak] = {"idxs":(masks[peak],), - "n_rows_read":0, - "obj_buf_start":0, - "obj_buf":None, - "kev_width":kev_width - } - - for i,file in enumerate(raw_files): + for peak, kev_width in zip(peaks_kev, kev_widths): + pk_dicts[peak] = { + "idxs": (masks[peak],), + "n_rows_read": 0, + "obj_buf_start": 0, + "obj_buf": None, + "kev_width": kev_width, + } + + for file in raw_files: log.debug(os.path.basename(file)) for peak, peak_dict in pk_dicts.items(): if peak_dict["idxs"] is not None: @@ -213,7 +231,7 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, # now split idx into idx_i and the remainder idx_i = (peak_dict["idxs"][0][:n_rows_to_read_i],) peak_dict["idxs"] = (peak_dict["idxs"][0][n_rows_to_read_i:] - n_rows_i,) - if len(idx_i[0])>0: + if len(idx_i[0]) > 0: peak_dict["obj_buf"], n_rows_read_i = sto.read( lh5_path, file, @@ -226,9 +244,8 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, peak_dict["n_rows_read"] += n_rows_read_i log.debug(f'{peak}: {peak_dict["n_rows_read"]}') peak_dict["obj_buf_start"] += n_rows_read_i - if peak_dict["n_rows_read"] >=10000 or file ==raw_files[-1]: + if peak_dict["n_rows_read"] >= 10000 or file == raw_files[-1]: if "e_lower_lim" not in peak_dict: - tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict) energy = tb_out[energy_parameter].nda @@ -245,15 +262,17 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, var, [peak_loc], n_to_fit=7, - )[ - 0 - ][0] + )[0][0] if mu is None or np.isnan(mu): log.debug("Fit failed, using max guess") rough_adc_to_kev = peak / peak_loc - e_lower_lim = peak_loc - (1.5 * peak_dict["kev_width"][0]) / rough_adc_to_kev - e_upper_lim = peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev + e_lower_lim = ( + peak_loc - (1.5 * peak_dict["kev_width"][0]) / rough_adc_to_kev + ) + e_upper_lim = ( + peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev + ) hist, bins, var = pgh.get_hist( energy, range=(int(e_lower_lim), int(e_upper_lim)), dx=1 ) @@ -262,53 +281,55 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim, updated_adc_to_kev = peak / mu e_lower_lim = mu - (peak_dict["kev_width"][0]) / updated_adc_to_kev e_upper_lim = mu + (peak_dict["kev_width"][1]) / updated_adc_to_kev - log.info(f"{peak}: lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}") + log.info( + f"{peak}: lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}" + ) peak_dict["e_lower_lim"] = e_lower_lim peak_dict["e_upper_lim"] = e_upper_lim peak_dict["ecal_par"] = updated_adc_to_kev - out_tbl, n_wfs = get_out_data(peak_dict["obj_buf"], - tb_out, - cut_dict, - e_lower_lim, - e_upper_lim, - peak_dict["ecal_par"], - raw_dict, - int(peak), - final_cut_field=final_cut_field, - energy_param=energy_parameter - ) - sto.write(out_tbl ,name= lh5_path, - lh5_file=temp_output, - wo_mode="a") + out_tbl, n_wfs = get_out_data( + peak_dict["obj_buf"], + tb_out, + cut_dict, + e_lower_lim, + e_upper_lim, + peak_dict["ecal_par"], + raw_dict, + int(peak), + final_cut_field=final_cut_field, + energy_param=energy_parameter, + ) + sto.write(out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a") peak_dict["obj_buf"] = None peak_dict["obj_buf_start"] = 0 peak_dict["n_events"] = n_wfs else: tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict) - out_tbl, n_wfs = get_out_data(peak_dict["obj_buf"], - tb_out, - cut_dict, - peak_dict["e_lower_lim"], - peak_dict["e_upper_lim"], - peak_dict["ecal_par"], - raw_dict, - int(peak), - final_cut_field=final_cut_field, - energy_param=energy_parameter - ) + out_tbl, n_wfs = get_out_data( + peak_dict["obj_buf"], + tb_out, + cut_dict, + peak_dict["e_lower_lim"], + peak_dict["e_upper_lim"], + peak_dict["ecal_par"], + raw_dict, + int(peak), + final_cut_field=final_cut_field, + energy_param=energy_parameter, + ) peak_dict["n_events"] += n_wfs - sto.write(out_tbl ,name= lh5_path, - lh5_file=temp_output, - wo_mode="a") + sto.write(out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a") peak_dict["obj_buf"] = None peak_dict["obj_buf_start"] = 0 if peak_dict["n_events"] >= n_events: peak_dict["idxs"] = None log.debug(f"{peak} has reached the required number of events") - log.debug(f"{peak}: {peak_dict['idxs']}, {peak_dict['idxs'] is not None}") + log.debug( + f"{peak}: {peak_dict['idxs']}, {peak_dict['idxs'] is not None}" + ) else: pathlib.Path(temp_output).touch() - os.rename(temp_output, args.peak_file) \ No newline at end of file + os.rename(temp_output, args.peak_file) diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 0c150d2..c5b939e 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -14,9 +14,9 @@ import numpy as np from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.extract_tau import ExtractTau -from pygama.pargen.data_cleaning import get_tcm_pulser_ids, get_cut_indexes +from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp +from pygama.pargen.extract_tau import ExtractTau argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) @@ -100,14 +100,15 @@ tau = ExtractTau(dsp_config, kwarg_dict["wf_field"]) slopes = tb_out["tail_slope"].nda log.debug("Calculating pz constant") - - tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) + + tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) if args.plot_path: pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - plot_dict = tau.plot_waveforms_after_correction(tb_data, "wf_pz", - norm_param=kwarg_dict.get("norm_param", "pz_mean")) + plot_dict = tau.plot_waveforms_after_correction( + tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") + ) plot_dict.update(tau.plot_slopes(slopes[idxs])) with open(args.plot_path, "wb") as f: @@ -118,8 +119,8 @@ if args.pulser_file: pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) with open(args.pulser_file, "w") as f: - json.dump({"idxs": ids.tolist(), "mask": mask.tolist()} , f, indent=4) + json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4) pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) with open(args.output_file, "w") as f: - json.dump(tau.output_dict, f, indent=4) \ No newline at end of file + json.dump(tau.output_dict, f, indent=4) diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 90c8f2c..e1538d0 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -439,12 +439,14 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None): if name is None: return os.path.join( f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-"+datatype+"-{timestamp}-par_" + tier + ".json", + "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + tier + ".json", ) else: return os.path.join( f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-"+datatype+"-{timestamp}-par_" + "{experiment}-{period}-{run}-" + + datatype + + "-{timestamp}-par_" + tier + "_" + name diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 8b11b3b..903187a 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -128,15 +128,21 @@ def par_dsp_path(setup): def par_hit_path(setup): return setup["paths"]["par_hit"] + +def par_evt_path(setup): + return setup["paths"]["par_evt"] + + def par_psp_path(setup): return setup["paths"]["par_psp"] + def par_pht_path(setup): return setup["paths"]["par_pht"] -def par_evt_path(setup): - return setup["paths"]["par_evt"] +def par_pet_path(setup): + return setup["paths"]["par_pet"] def pars_path(setup): From 162369995c28ffdd575ed53ea8489123dbae13f2 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 22 Mar 2024 16:36:00 +0100 Subject: [PATCH 028/103] Update build_evt.py to support latest pygama.build_evt() --- scripts/build_evt.py | 43 ++++++++++++++++------------------------ scripts/pars_hit_aoe.py | 8 ++++---- scripts/pars_hit_ecal.py | 6 +++--- scripts/pars_hit_lq.py | 8 ++++---- 4 files changed, 28 insertions(+), 37 deletions(-) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index e5febca..606dc50 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -15,19 +15,6 @@ sto = lh5.LH5Store() -def replace_evt_with_key(dic, new_key): - for key, d in dic.items(): - if isinstance(d, dict): - dic[key] = replace_evt_with_key(d, new_key) - elif isinstance(d, list): - dic[key] = [item.replace("evt", new_key) for item in d] - elif isinstance(d, str): - dic[key] = d.replace("evt", new_key) - else: - pass - return dic - - argparser = argparse.ArgumentParser() argparser.add_argument("--hit_file", help="hit file", type=str) argparser.add_argument("--dsp_file", help="dsp file", type=str) @@ -45,8 +32,12 @@ def replace_evt_with_key(dic, new_key): argparser.add_argument("--output", help="output file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +if args.log is not None: + pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +else: + logging.basicConfig(level=logging.DEBUG) + logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) logging.getLogger("lgdo").setLevel(logging.INFO) @@ -88,7 +79,8 @@ def replace_evt_with_key(dic, new_key): else: chans = [] _evt_config["channels"][field] = chans - evt_config[key] = replace_evt_with_key(_evt_config, f"evt/{key}") + + evt_config[key] = _evt_config else: evt_config = {"all": Props.read_from(evt_config_file)} # block for snakemake to fill in channel lists @@ -118,17 +110,16 @@ def replace_evt_with_key(dic, new_key): tables = {} for key, config in evt_config.items(): + datainfo = { + "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), + } + tables[key] = build_evt( - f_tcm=args.tcm_file, - f_dsp=args.dsp_file, - f_hit=args.hit_file, - f_evt=None, - evt_config=config, - evt_group=f"evt/{key}" if key != "all" else "evt", - tcm_group="hardware_tcm_1", - dsp_group="dsp", - hit_group="hit", - tcm_id_table_pattern="ch{}", + datainfo, + config, ) tbl = Table(col_dict=tables) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index 3b6831f..2f7167b 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -221,7 +221,7 @@ def eres_func(x): pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) -results_dict = dict(**ecal_dict["results"], aoe = out_dict) +results_dict = dict(**ecal_dict["results"], aoe=out_dict) with open(args.hit_pars, "w") as w: final_hit_dict = { "pars": {"operations": cal_dict}, @@ -231,8 +231,8 @@ def eres_func(x): pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True) final_object_dict = dict( - **object_dict, - aoe=obj, - ) + **object_dict, + aoe=obj, +) with open(args.aoe_results, "wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 7f14c54..a7b399e 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -283,14 +283,14 @@ def energy_cal_th( if plot in item: param_dict.update({plot: item[plot]}) common_dict.update({key: param_dict}) - plot_dict = {"ecal":plot_dict} + plot_dict = {"ecal": plot_dict} plot_dict["common"] = common_dict with open(args.plot_path, "wb") as f: pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary - output_dict = {"pars": out_dict, "results": {"ecal":result_dict}} + output_dict = {"pars": out_dict, "results": {"ecal": result_dict}} with open(args.save_path, "w") as fp: pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True) json.dump(output_dict, fp, indent=4) @@ -298,4 +298,4 @@ def energy_cal_th( # save calibration objects with open(args.results_path, "wb") as fp: pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True) - pkl.dump({"ecal":ecal_object}, fp, protocol=pkl.HIGHEST_PROTOCOL) + pkl.dump({"ecal": ecal_object}, fp, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 860029f..3a43a45 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -220,7 +220,7 @@ def eres_func(x): pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -results_dict = dict(**eres_dict,lq = out_dict) +results_dict = dict(**eres_dict, lq=out_dict) pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) with open(args.hit_pars, "w") as w: final_hit_dict = { @@ -231,8 +231,8 @@ def eres_func(x): pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True) final_object_dict = dict( - **object_dict, - lq=obj, - ) + **object_dict, + lq=obj, +) with open(args.lq_results, "wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) From 9812097708473dd9bb296f747f39c2c32d038816 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:48:36 +0100 Subject: [PATCH 029/103] make sure arrays float32 --- scripts/build_dsp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 9906782..a94d547 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -51,7 +51,7 @@ def replace_list_with_array(dic): if isinstance(value, dict): dic[key] = replace_list_with_array(value) elif isinstance(value, list): - dic[key] = np.array(value) + dic[key] = np.array(value, dtype="float32") else: pass return dic From b17cf7b5e0e780bbc36ab8f7c476b7787bbdace5 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:48:54 +0100 Subject: [PATCH 030/103] use median value --- scripts/par_psp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/par_psp.py b/scripts/par_psp.py index 3b07edf..03bfeaf 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -85,7 +85,7 @@ unit = None rounding = 16 - mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmean(vals) + mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmedian(vals) mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val for tstamp in in_dicts: From 82e6b3a575f30841be86f638e9b84de342f8dc80 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:49:58 +0100 Subject: [PATCH 031/103] pygama math settings --- scripts/pars_dsp_dplms.py | 2 ++ scripts/pars_dsp_eopt.py | 5 ++++- scripts/pars_dsp_nopt.py | 2 ++ scripts/pars_dsp_tau.py | 28 ++++++++++++++++++---------- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 60143e7..bcf1ac0 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -10,6 +10,8 @@ os.environ["LGDO_BOUNDSCHECK"] = "false" os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" import lgdo.lh5 as lh5 import numpy as np diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 4b37737..4af1c37 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -11,6 +11,8 @@ os.environ["LGDO_BOUNDSCHECK"] = "false" os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" import lgdo.lh5 as lh5 import numpy as np @@ -26,6 +28,8 @@ ) warnings.filterwarnings(action="ignore", category=RuntimeWarning) +warnings.filterwarnings(action="ignore", category=np.RankWarning) + argparser = argparse.ArgumentParser() @@ -45,7 +49,6 @@ argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str) argparser.add_argument("--plot_path", help="plot_path", type=str) - argparser.add_argument("--plot_save_path", help="plot_save_path", type=str, required=False) args = argparser.parse_args() diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index d412e92..c2c393d 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -10,6 +10,8 @@ os.environ["LGDO_BOUNDSCHECK"] = "false" os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" import lgdo.lh5 as lh5 import numpy as np diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index c5b939e..1e10ea5 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -9,6 +9,8 @@ os.environ["LGDO_BOUNDSCHECK"] = "false" os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" import lgdo.lh5 as lh5 import numpy as np @@ -30,7 +32,7 @@ argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False) argparser.add_argument("--raw_files", help="input files", nargs="*", type=str) -argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) +argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False) args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -63,17 +65,23 @@ else: input_file = args.raw_files - if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": - tcm_files = args.tcm_files[0] - with open(tcm_files) as f: + if args.pulser_file: + with open(args.pulser_file) as f: + pulser_dict = json.load(f) + mask = np.array(pulser_dict["mask"]) + + elif args.tcm_filelist: + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + ) else: - tcm_files = args.tcm_files - # get pulser mask from tcm files - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + data = sto.read(f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp"])[ 0 ].view_as("pd") From 489e3e5e9c0ea0c69f4d776faf20d871fb873eb7 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:50:32 +0100 Subject: [PATCH 032/103] add more checks and debug --- scripts/pars_dsp_event_selection.py | 79 +++++++++++++++++++---------- 1 file changed, 51 insertions(+), 28 deletions(-) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 3802056..44c1604 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -10,6 +10,8 @@ os.environ["LGDO_BOUNDSCHECK"] = "false" os.environ["DSPEED_CACHE"] = "false" os.environ["DSPEED_BOUNDSCHECK"] = "false" +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" from bisect import bisect_left @@ -91,7 +93,7 @@ def get_out_data( argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) - argparser.add_argument("--raw_cal", help="raw_cal", type=str, required=True) + argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True) argparser.add_argument("--log", help="log_file", type=str) @@ -108,6 +110,7 @@ def get_out_data( logging.getLogger("lgdo").setLevel(logging.INFO) logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) + logging.getLogger("legendmeta").setLevel(logging.INFO) logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) log = logging.getLogger(__name__) @@ -128,6 +131,7 @@ def get_out_data( pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) if peak_dict.pop("run_selection") is True: + log.debug("Starting peak selection") rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.peak_file}.{rand_num}" @@ -190,7 +194,7 @@ def get_out_data( masks[peak] = np.where(e_mask)[0] log.debug(f"{len(masks[peak])} events found in energy range for {peak}") - input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000)[0] + input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000, idx=np.where(~mask)[0])[0] if isinstance(dsp_config, str): dsp_config = Props.read_from(dsp_config) @@ -205,7 +209,7 @@ def get_out_data( if cut_parameters is not None: cut_dict = generate_cuts(tb_data, cut_parameters) - log.debug(f"Cuts are calculated: {cut_dict}") + log.debug(f"Cuts are calculated: {json.dumps(cut_dict, indent=2)}") else: cut_dict = None @@ -249,10 +253,22 @@ def get_out_data( tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict) energy = tb_out[energy_parameter].nda + init_bin_width = ( + 2 + * (np.nanpercentile(energy, 75) - np.nanpercentile(energy, 25)) + * len(energy) ** (-1 / 3) + ) + + if init_bin_width > 2: + init_bin_width = 2 + hist, bins, var = pgh.get_hist( energy, - range=(np.floor(np.nanmin(energy)), np.ceil(np.nanmax(energy))), - dx=peak / (np.nanpercentile(energy, 50)), + range=( + np.floor(np.nanpercentile(energy, 1)), + np.ceil(np.nanpercentile(energy, 99)), + ), + dx=init_bin_width, ) peak_loc = pgh.get_bin_centers(bins)[np.nanargmax(hist)] @@ -274,7 +290,9 @@ def get_out_data( peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev ) hist, bins, var = pgh.get_hist( - energy, range=(int(e_lower_lim), int(e_upper_lim)), dx=1 + energy, + range=(int(e_lower_lim), int(e_upper_lim)), + dx=init_bin_width, ) mu = pgh.get_bin_centers(bins)[np.nanargmax(hist)] @@ -304,32 +322,37 @@ def get_out_data( peak_dict["obj_buf"] = None peak_dict["obj_buf_start"] = 0 peak_dict["n_events"] = n_wfs + log.debug(f'found {peak_dict["n_events"]} events for {peak}') else: - tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict) - out_tbl, n_wfs = get_out_data( - peak_dict["obj_buf"], - tb_out, - cut_dict, - peak_dict["e_lower_lim"], - peak_dict["e_upper_lim"], - peak_dict["ecal_par"], - raw_dict, - int(peak), - final_cut_field=final_cut_field, - energy_param=energy_parameter, - ) - peak_dict["n_events"] += n_wfs - sto.write(out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a") - peak_dict["obj_buf"] = None - peak_dict["obj_buf_start"] = 0 - if peak_dict["n_events"] >= n_events: - peak_dict["idxs"] = None - log.debug(f"{peak} has reached the required number of events") - log.debug( - f"{peak}: {peak_dict['idxs']}, {peak_dict['idxs'] is not None}" + if peak_dict["obj_buf"] is not None and len(peak_dict["obj_buf"]) > 0: + tb_out = run_one_dsp( + peak_dict["obj_buf"], dsp_config, db_dict=db_dict + ) + out_tbl, n_wfs = get_out_data( + peak_dict["obj_buf"], + tb_out, + cut_dict, + peak_dict["e_lower_lim"], + peak_dict["e_upper_lim"], + peak_dict["ecal_par"], + raw_dict, + int(peak), + final_cut_field=final_cut_field, + energy_param=energy_parameter, + ) + peak_dict["n_events"] += n_wfs + sto.write( + out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a" ) + peak_dict["obj_buf"] = None + peak_dict["obj_buf_start"] = 0 + log.debug(f'found {peak_dict["n_events"]} events for {peak}') + if peak_dict["n_events"] >= n_events: + peak_dict["idxs"] = None + log.debug(f"{peak} has reached the required number of events") else: pathlib.Path(temp_output).touch() + log.debug(f"event selection completed in {time.time()-t0} seconds") os.rename(temp_output, args.peak_file) From 5c4ca7b0194b583b09d63b2f90f607dfabb2b259 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:51:07 +0100 Subject: [PATCH 033/103] changes for pargen refactor and add qc and pulser routines --- scripts/pars_hit_aoe.py | 117 ++++-- scripts/pars_hit_ecal.py | 635 +++++++++++++++++++++--------- scripts/pars_hit_lq.py | 86 +++-- scripts/pars_hit_qc.py | 156 +++++--- scripts/pars_pht_aoecal.py | 144 ++++--- scripts/pars_pht_lqcal.py | 73 +++- scripts/pars_pht_partcal.py | 748 ++++++++++++++++++------------------ scripts/pars_pht_qc.py | 224 +++++++++++ scripts/pars_tcm_pulser.py | 64 +++ 9 files changed, 1518 insertions(+), 729 deletions(-) create mode 100644 scripts/pars_pht_qc.py create mode 100644 scripts/pars_tcm_pulser.py diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index a425e20..afb90a8 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -9,18 +9,48 @@ import warnings from typing import Callable +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + import numpy as np import pandas as pd from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.pargen.AoE_cal import * # noqa: F403 -from pygama.pargen.AoE_cal import cal_aoe, pol1, sigma_fit, standard_aoe -from pygama.pargen.utils import get_tcm_pulser_ids, load_data +from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak +from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.utils import load_data log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) +def get_results_dict(aoe_class): + return { + "cal_energy_param": aoe_class.cal_energy_param, + "dt_param": aoe_class.dt_param, + "rt_correction": aoe_class.dt_corr, + "1000-1300keV": aoe_class.timecorr_df.to_dict("index"), + "correction_fit_results": aoe_class.energy_corr_res_dict, + "low_cut": aoe_class.low_cut_val, + "high_cut": aoe_class.high_cut_val, + "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"), + "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"), + } + + +def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): + if plot_dict is not None: + for key, item in plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](aoe_class, data, **item["options"]) + else: + plot_dict[key] = item["function"](aoe_class, data) + else: + plot_dict = {} + return plot_dict + + def aoe_calibration( data: pd.Dataframe, cal_dicts: dict, @@ -28,36 +58,34 @@ def aoe_calibration( energy_param: str, cal_energy_param: str, eres_func: Callable, - pdf: Callable = standard_aoe, + pdf: Callable = aoe_peak, selection_string: str = "", dt_corr: bool = False, dep_correct: bool = False, dt_cut: dict | None = None, high_cut_val: int = 3, - mean_func: Callable = pol1, - sigma_func: Callable = sigma_fit, - dep_acc: float = 0.9, + mean_func: Callable = Pol1, + sigma_func: Callable = SigmaFit, + # dep_acc: float = 0.9, dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] - aoe = cal_aoe( - cal_dicts, - cal_energy_param, - eres_func, - pdf, - selection_string, - dt_corr, - dep_acc, - dep_correct, - dt_cut, - dt_param, - high_cut_val, - mean_func, - sigma_func, - comptBands_width, - plot_options if plot_options is not None else {}, + aoe = CalAoE( + cal_dicts=cal_dicts, + cal_energy_param=cal_energy_param, + eres_func=eres_func, + pdf=pdf, + selection_string=selection_string, + dt_corr=dt_corr, + dep_correct=dep_correct, + dt_cut=dt_cut, + dt_param=dt_param, + high_cut_val=high_cut_val, + mean_func=mean_func, + sigma_func=sigma_func, + compt_bands_width=comptBands_width, ) aoe.update_cal_dicts( @@ -71,12 +99,13 @@ def aoe_calibration( aoe.calibrate(data, "AoE_Uncorr") log.info("Calibrated A/E") - return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe + return cal_dicts, get_results_dict(aoe), fill_plot_dict(aoe, data, plot_options), aoe argparser = argparse.ArgumentParser() argparser.add_argument("files", help="files", nargs="*", type=str) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) +argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) +argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) @@ -119,11 +148,11 @@ def aoe_calibration( if kwarg_dict["run_aoe"] is True: kwarg_dict.pop("run_aoe") - pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else standard_aoe + pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak - sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else sigma_fit + sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit - mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else pol1 + mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 if "plot_options" in kwarg_dict: for field, item in kwarg_dict["plot_options"].items(): @@ -173,13 +202,25 @@ def eres_func(x): return_selection_mask=True, ) - # get pulser mask from tcm files - with open(args.tcm_filelist) as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(tcm_files) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) + if args.pulser_file: + with open(args.pulser_file) as f: + pulser_dict = json.load(f) + mask = np.array(pulser_dict["mask"]) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + data["is_pulser"] = mask[threshold_mask] cal_dict, out_dict, plot_dict, obj = aoe_calibration( @@ -222,7 +263,7 @@ def eres_func(x): pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) -results_dict = dict(**ecal_dict["results"], aoe = out_dict) +results_dict = dict(**ecal_dict["results"], aoe=out_dict) with open(args.hit_pars, "w") as w: final_hit_dict = { "pars": {"operations": cal_dict}, @@ -232,8 +273,8 @@ def eres_func(x): pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True) final_object_dict = dict( - **object_dict, - aoe=obj, - ) + **object_dict, + aoe=obj, +) with open(args.aoe_results, "wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index ad44d6d..1d7d436 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import copy import json import logging import os @@ -9,18 +10,22 @@ import warnings from datetime import datetime +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + import lgdo.lh5 as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np -import pandas as pd +import pygama.math.distributions as pgf import pygama.math.histogram as pgh from legendmeta import LegendMetadata from legendmeta.catalog import Props from matplotlib.colors import LogNorm -from pygama.pargen.ecal_th import * # noqa: F403 -from pygama.pargen.ecal_th import apply_cuts, calibrate_parameter -from pygama.pargen.utils import get_tcm_pulser_ids, load_data +from pygama.math.distributions import nb_poly +from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration +from pygama.pargen.utils import load_data from scipy.stats import binned_statistic log = logging.getLogger(__name__) @@ -28,6 +33,238 @@ sto = lh5.LH5Store() warnings.filterwarnings(action="ignore", category=RuntimeWarning) +warnings.filterwarnings(action="ignore", category=np.RankWarning) + + +def plot_2614_timemap( + data, + cal_energy_param, + selection_string, + figsize=(12, 8), + fontsize=12, + erange=(2580, 2630), + dx=1, + time_dx=180, +): + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + selection = data.query(f"{cal_energy_param}>2560&{cal_energy_param}<2660&{selection_string}") + + fig = plt.figure() + if len(selection) == 0: + pass + else: + time_bins = np.arange( + (np.amin(data["timestamp"]) // time_dx) * time_dx, + ((np.amax(data["timestamp"]) // time_dx) + 2) * time_dx, + time_dx, + ) + + plt.hist2d( + selection["timestamp"], + selection[cal_energy_param], + bins=[time_bins, np.arange(erange[0], erange[1] + dx, dx)], + norm=LogNorm(), + ) + + ticks, labels = plt.xticks() + plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}") + plt.ylabel("Energy(keV)") + plt.ylim([erange[0], erange[1]]) + + plt.xticks( + ticks, + [datetime.utcfromtimestamp(tick).strftime("%H:%M") for tick in ticks], + ) + plt.close() + return fig + + +def plot_pulser_timemap( + data, + cal_energy_param, + selection_string, # noqa: ARG001 + pulser_field="is_pulser", + figsize=(12, 8), + fontsize=12, + dx=0.2, + time_dx=180, + n_spread=3, +): + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + time_bins = np.arange( + (np.amin(data["timestamp"]) // time_dx) * time_dx, + ((np.amax(data["timestamp"]) // time_dx) + 2) * time_dx, + time_dx, + ) + + selection = data.query(pulser_field) + fig = plt.figure() + if len(selection) == 0: + pass + + else: + mean = np.nanpercentile(selection[cal_energy_param], 50) + spread = mean - np.nanpercentile(selection[cal_energy_param], 10) + + plt.hist2d( + selection["timestamp"], + selection[cal_energy_param], + bins=[ + time_bins, + np.arange(mean - n_spread * spread, mean + n_spread * spread + dx, dx), + ], + norm=LogNorm(), + ) + plt.ylim([mean - n_spread * spread, mean + n_spread * spread]) + ticks, labels = plt.xticks() + plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}") + plt.ylabel("Energy(keV)") + + plt.xticks( + ticks, + [datetime.utcfromtimestamp(tick).strftime("%H:%M") for tick in ticks], + ) + plt.close() + return fig + + +def get_median(x): + if len(x[~np.isnan(x)]) >= 10: + return np.nan + else: + return np.nanpercentile(x, 50) + + +def get_err(x): + if len(x[~np.isnan(x)]) >= 10: + return np.nan + else: + return np.nanvar(x) / np.sqrt(len(x)) + + +def bin_pulser_stability( + data, + cal_energy_param, + selection_string, # noqa: ARG001 + pulser_field="is_pulser", + time_slice=180, +): + selection = data.query(pulser_field) + + utime_array = data["timestamp"] + select_energies = selection[cal_energy_param].to_numpy() + + time_bins = np.arange( + (np.amin(utime_array) // time_slice) * time_slice, + ((np.amax(utime_array) // time_slice) + 2) * time_slice, + time_slice, + ) + # bin time values + times_average = (time_bins[:-1] + time_bins[1:]) / 2 + + if len(selection) == 0: + return { + "time": times_average, + "energy": np.full_like(times_average, np.nan), + "spread": np.full_like(times_average, np.nan), + } + + par_average, _, _ = binned_statistic( + selection["timestamp"], select_energies, statistic=get_median, bins=time_bins + ) + par_error, _, _ = binned_statistic( + selection["timestamp"], select_energies, statistic=get_err, bins=time_bins + ) + + return {"time": times_average, "energy": par_average, "spread": par_error} + + +def bin_stability( + data, + cal_energy_param, + selection_string, + time_slice=180, + energy_range=(2585, 2660), +): + selection = data.query( + f"{cal_energy_param}>{energy_range[0]}&{cal_energy_param}<{energy_range[1]}&{selection_string}" + ) + + utime_array = data["timestamp"] + select_energies = selection[cal_energy_param].to_numpy() + + time_bins = np.arange( + (np.amin(utime_array) // time_slice) * time_slice, + ((np.amax(utime_array) // time_slice) + 2) * time_slice, + time_slice, + ) + # bin time values + times_average = (time_bins[:-1] + time_bins[1:]) / 2 + + if len(selection) == 0: + return { + "time": times_average, + "energy": np.full_like(times_average, np.nan), + "spread": np.full_like(times_average, np.nan), + } + + par_average, _, _ = binned_statistic( + selection["timestamp"], select_energies, statistic=get_median, bins=time_bins + ) + par_error, _, _ = binned_statistic( + selection["timestamp"], select_energies, statistic=get_err, bins=time_bins + ) + + return {"time": times_average, "energy": par_average, "spread": par_error} + + +def bin_spectrum( + data, + cal_energy_param, + selection_string, + cut_field="is_valid_cal", + pulser_field="is_pulser", + erange=(0, 3000), + dx=2, +): + bins = np.arange(erange[0], erange[1] + dx, dx) + return { + "bins": pgh.get_bin_centers(bins), + "counts": np.histogram(data.query(selection_string)[cal_energy_param], bins)[0], + "cut_counts": np.histogram( + data.query(f"(~{cut_field})&(~{pulser_field})")[cal_energy_param], + bins, + )[0], + "pulser_counts": np.histogram( + data.query(pulser_field)[cal_energy_param], + bins, + )[0], + } + + +def bin_survival_fraction( + data, + cal_energy_param, + selection_string, + cut_field="is_valid_cal", + pulser_field="is_pulser", + erange=(0, 3000), + dx=6, +): + counts_pass, bins_pass, _ = pgh.get_hist( + data.query(selection_string)[cal_energy_param], + bins=np.arange(erange[0], erange[1] + dx, dx), + ) + counts_fail, bins_fail, _ = pgh.get_hist( + data.query(f"(~{cut_field})&(~{pulser_field})")[cal_energy_param], + bins=np.arange(erange[0], erange[1] + dx, dx), + ) + sf = 100 * (counts_pass + 10 ** (-6)) / (counts_pass + counts_fail + 10 ** (-6)) + return {"bins": pgh.get_bin_centers(bins_pass), "sf": sf} def plot_baseline_timemap( @@ -125,180 +362,64 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None): plot_dict[key] = item["function"](data) return plot_dict -def get_results_dict(ecal_class, data): + +def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if np.isnan(ecal_class.pars).all(): return {} else: - fwhm_linear = ecal_class.fwhm_fit_linear.copy() - fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() - fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() - fwhm_linear["cov"] = fwhm_linear["cov"].tolist() - fwhm_quad = ecal_class.fwhm_fit_quadratic.copy() - fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() - fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() - fwhm_quad["cov"] = fwhm_quad["cov"].tolist() - - pk_dict = { - Ei: { - "function": func_i.__name__, - "module": func_i.__module__, - "parameters_in_ADC": parsi.to_dict(), - "uncertainties_in_ADC": errorsi.to_dict(), - "p_val": pvali, - "fwhm_in_keV": list(fwhmi), - "pk_position":(posi, posuni), - } - for i, (Ei, parsi, errorsi, pvali, fwhmi, posi, posuni, func_i) in enumerate( - zip( - ecal_class.results["fitted_keV"], - ecal_class.results["pk_pars"][ecal_class.results["pk_validities"]], - ecal_class.results["pk_errors"][ecal_class.results["pk_validities"]], - ecal_class.results["pk_pvals"][ecal_class.results["pk_validities"]], - ecal_class.results["pk_fwhms"], - ecal_class.results["pk_pos"], - ecal_class.results["pk_pos_uncertainties"], - ecal_class.funcs, - ) - ) - } + results_dict = copy.deepcopy(ecal_class.results["hpge_fit_energy_peaks_1"]) + + if "FWHMLinear" in results_dict: + fwhm_linear = results_dict["FWHMLinear"] + fwhm_linear["function"] = fwhm_linear["function"].__name__ + fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() + fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() + fwhm_linear["cov"] = fwhm_linear["cov"].tolist() + else: + fwhm_linear = None + + if "FWHMQuadratic" in results_dict: + fwhm_quad = results_dict["FWHMQuadratic"] + fwhm_quad["function"] = fwhm_quad["function"].__name__ + fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() + fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() + fwhm_quad["cov"] = fwhm_quad["cov"].tolist() + else: + fwhm_quad = None + + pk_dict = results_dict["peak_parameters"] + + for _, dic in pk_dict.items(): + dic["function"] = dic["function"].name + dic["parameters"] = dic["parameters"].to_dict() + dic["uncertainties"] = dic["uncertainties"].to_dict() + dic.pop("covariance") return { - "total_fep": len( - data.query( - f"{ecal_class.cal_energy_param}>2604&{ecal_class.cal_energy_param}<2624" - ) - ), - "total_dep": len( - data.query( - f"{ecal_class.cal_energy_param}>1587&{ecal_class.cal_energy_param}<1597" - ) - ), + "total_fep": len(data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")), + "total_dep": len(data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")), "pass_fep": len( - data.query( - f"{ecal_class.cal_energy_param}>2604&{ecal_class.cal_energy_param}<2624&{ecal_class.selection_string}" - ) + data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}") ), "pass_dep": len( - data.query( - f"{ecal_class.cal_energy_param}>1587&{ecal_class.cal_energy_param}<1597&{ecal_class.selection_string}" - ) + data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}") ), "eres_linear": fwhm_linear, "eres_quadratic": fwhm_quad, - "fitted_peaks": ecal_class.results["fitted_keV"].tolist(), + "fitted_peaks": ecal_class.peaks_kev.tolist(), "pk_fits": pk_dict, - "mode":ecal_class.results["mode"], } -def energy_cal_th( - data: pd.Dataframe, - energy_params: list[str], - cal_energy_params: list | None = None, - selection_string: str = "", - hit_dict: dict | None = None, - cut_parameters: dict[str, int] | None = None, - plot_options: dict | None = None, - threshold: int = 0, - p_val: float = 0, - n_events: int | None = None, - final_cut_field: str = "is_valid_cal", - simplex: bool = True, - guess_keV: float | None = None, - tail_weight=100, - deg: int = 1, -) -> tuple(dict, dict, dict, dict): - data, hit_dict = apply_cuts( - data, - hit_dict if hit_dict is not None else {}, - cut_parameters if cut_parameters is not None else {}, - final_cut_field, - ) - - if cal_energy_params is None: - cal_energy_params = [energy_param + "_cal" for energy_param in energy_params] - - - glines = [ - # 238.632, - 583.191, - 727.330, - 860.564, - 1592.53, - 1620.50, - 2103.53, - 2614.50, - ] # gamma lines used for calibration - range_keV = [ - # (8, 8), - (20, 20), - (30, 30), - (30, 30), - (40, 20), - (20, 40), - (40, 40), - (60, 60), - ] # side bands width - funcs = [ - # pgf.extended_gauss_step_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - ] - gof_funcs = [ - # pgf.gauss_step_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - ] - - results_dict = {} - plot_dict = {} - full_object_dict = {} - for energy_param, cal_energy_param in zip(energy_params, cal_energy_params): - full_object_dict[cal_energy_param] = calibrate_parameter( - energy_param, - glines, - range_keV, - funcs, - gof_funcs, - selection_string, - plot_options, - guess_keV, - threshold, - p_val, - n_events, - simplex, - deg, - tail_weight=tail_weight, - cal_energy_param=cal_energy_param, - ) - full_object_dict[cal_energy_param].calibrate_parameter(data) - results_dict[cal_energy_param] = get_results_dict(full_object_dict[cal_energy_param], data) - hit_dict.update(full_object_dict[cal_energy_param].hit_dict) - if ~np.isnan(full_object_dict[cal_energy_param].pars).all(): - plot_dict[cal_energy_param] = ( - full_object_dict[cal_energy_param].fill_plot_dict(data).copy() - ) - - log.info("Finished all calibrations") - return hit_dict, results_dict, plot_dict, full_object_dict - if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("--files", help="files", nargs="*", type=str) - argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--files", help="filelist", nargs="*", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) + argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument("--ctc_dict", help="ctc_dict", nargs="*") - argparser.add_argument("--in_hit_dict", help="in_hit_dict", nargs="*", required=False) - argparser.add_argument("--inplot_dict", help="inplot_dict", nargs="*", required=False) + argparser.add_argument("--in_hit_dict", help="in_hit_dict", required=False) + argparser.add_argument("--inplot_dict", help="inplot_dict", required=False) argparser.add_argument("--configs", help="config", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -306,6 +427,8 @@ def energy_cal_th( argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument("--tier", help="tier", type=str, default="hit") + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) @@ -321,12 +444,17 @@ def energy_cal_th( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp) + + det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"] + if args.in_hit_dict: hit_dict = Props.read_from(args.in_hit_dict) database_dic = Props.read_from(args.ctc_dict) - hit_dict = hit_dict.update(database_dic[args.channel]["ctc_params"]) + hit_dict.update(database_dic[args.channel]["ctc_params"]) # get metadata dictionary configs = LegendMetadata(path=args.configs) @@ -350,40 +478,164 @@ def energy_cal_th( bl_plots[field]["function"] = eval(item["function"]) common_plots = kwarg_dict.pop("common_plots") + with open(args.files[0]) as f: + files = f.read().splitlines() + files = sorted(files) + # load data in data, threshold_mask = load_data( - args.files, + files, f"{args.channel}/dsp", hit_dict, - params=kwarg_dict["energy_params"] - + list(kwarg_dict["cut_parameters"]) - + ["timestamp", "trapTmax"], + params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"], threshold=kwarg_dict["threshold"], return_selection_mask=True, cal_energy_param="trapTmax", ) - # get pulser mask from tcm files - with open(args.tcm_filelist) as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) + if args.pulser_file: + with open(args.pulser_file) as f: + pulser_dict = json.load(f) + mask = np.array(pulser_dict["mask"]) + + elif args.tcm_filelist: + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + data["is_pulser"] = mask[threshold_mask] - # run energy calibration - out_dict, result_dict, plot_dict, ecal_object = energy_cal_th( - data, - hit_dict=hit_dict, - selection_string=f"({kwarg_dict['final_cut_field']})&(~is_pulser)", - **kwarg_dict, - ) + pk_pars = [ + (583.191, (20, 20), pgf.hpge_peak), + (727.330, (30, 30), pgf.hpge_peak), + (860.564, (30, 25), pgf.hpge_peak), + (1592.53, (40, 20), pgf.gauss_on_step), + (1620.50, (20, 40), pgf.gauss_on_step), + (2103.53, (40, 40), pgf.gauss_on_step), + (2614.50, (60, 60), pgf.hpge_peak), + ] + + glines = [pk_par[0] for pk_par in pk_pars] + + if "cal_energy_params" not in kwarg_dict: + cal_energy_params = [energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]] + else: + cal_energy_params = kwarg_dict["cal_energy_params"] + + selection_string = f"~is_pulser&{kwarg_dict['cut_param']}" + + results_dict = {} + plot_dict = {} + full_object_dict = {} + + for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): + e_uncal = data.query(selection_string)[energy_param].to_numpy() + + hist, bins, bar = pgh.get_hist( + e_uncal[ + (e_uncal > np.nanpercentile(e_uncal, 95)) + & (e_uncal < np.nanpercentile(e_uncal, 99.9)) + ], + dx=1, + range=[np.nanpercentile(e_uncal, 95), np.nanpercentile(e_uncal, 99.9)], + ) + + guess = 2614.553 / bins[np.nanargmax(hist)] + full_object_dict[cal_energy_param] = HPGeCalibration( + energy_param, + glines, + guess, + kwarg_dict.get("deg", 0), + ) + full_object_dict[cal_energy_param].hpge_get_energy_peaks(e_uncal) + got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy() + full_object_dict[cal_energy_param].hpge_fit_energy_peaks( + e_uncal, + peaks_kev=[2614.50], + peak_pars=pk_pars, + tail_weight=kwarg_dict.get("tail_weight", 0), + n_events=kwarg_dict.get("n_events", None), + allowed_p_val=kwarg_dict.get("p_val", 0), + update_cal_pars=bool(det_status == "on"), + ) + full_object_dict[cal_energy_param].hpge_fit_energy_peaks( + e_uncal, + peaks_kev=got_peaks_kev, + peak_pars=pk_pars, + tail_weight=kwarg_dict.get("tail_weight", 0), + n_events=kwarg_dict.get("n_events", None), + allowed_p_val=kwarg_dict.get("p_val", 0), + update_cal_pars=False, + ) + + full_object_dict[cal_energy_param].get_energy_res_curve( + FWHMLinear, + interp_energy_kev={"Qbb": 2039.0}, + ) + full_object_dict[cal_energy_param].get_energy_res_curve( + FWHMQuadratic, + interp_energy_kev={"Qbb": 2039.0}, + ) + + data[cal_energy_param] = nb_poly( + data[energy_param].to_numpy(), full_object_dict[cal_energy_param].pars + ) + + results_dict[cal_energy_param] = get_results_dict( + full_object_dict[cal_energy_param], data, cal_energy_param, selection_string + ) + + hit_dict.update({cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()}) + if args.plot_path: + param_plot_dict = {} + if ~np.isnan(full_object_dict[cal_energy_param].pars).all(): + param_plot_dict["fwhm_fit"] = full_object_dict[cal_energy_param].plot_eres_fit( + e_uncal + ) + param_plot_dict["cal_fit"] = full_object_dict[cal_energy_param].plot_cal_fit( + e_uncal + ) + param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits( + e_uncal + ) + + if "plot_options" in kwarg_dict: + for key, item in kwarg_dict["plot_options"].items(): + if item["options"] is not None: + param_plot_dict[key] = item["function"]( + data, + cal_energy_param, + selection_string, + **item["options"], + ) + else: + param_plot_dict[key] = item["function"]( + data, + cal_energy_param, + selection_string, + ) + plot_dict[cal_energy_param] = param_plot_dict + + for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks_1"]["peak_parameters"].values(): + peak_dict["function"] = peak_dict["function"].name + peak_dict["parameters"] = peak_dict["parameters"].to_dict() + peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict() + for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"]["peak_parameters"].values(): + peak_dict["function"] = peak_dict["function"].name + peak_dict["parameters"] = peak_dict["parameters"].to_dict() + peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict() # get baseline plots and save all plots to file if args.plot_path: common_dict = baseline_tracking_plots( - sorted(args.files), f"{args.channel}/dsp", plot_options=bl_plots + sorted(files), f"{args.channel}/dsp", plot_options=bl_plots ) for plot in list(common_dict): @@ -398,25 +650,26 @@ def energy_cal_th( if plot in item: param_dict.update({plot: item[plot]}) common_dict.update({key: param_dict}) - plot_dict = {"ecal":plot_dict} - plot_dict["common"] = common_dict if args.inplot_dict: with open(args.inplot_dict, "rb") as f: - total_plot_dict = pkl.load(args.inplot_dict, protocol=pkl.HIGHEST_PROTOCOL) - if "common" in total_plot_dict: - total_plot_dict["common"].update(common_dict) - else: - plot_dict["common"] = common_dict + total_plot_dict = pkl.load(f) + else: + total_plot_dict = {} + + if "common" in total_plot_dict: + total_plot_dict["common"].update(common_dict) + else: + total_plot_dict["common"] = common_dict - total_plot_dict = total_plot_dict.update(plot_dict) + total_plot_dict.update({"ecal": plot_dict}) pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) with open(args.plot_path, "wb") as f: - pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) + pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary - output_dict = {"pars": out_dict, "results": {"ecal":result_dict}} + output_dict = {"pars": hit_dict, "results": {"ecal": results_dict}} with open(args.save_path, "w") as fp: pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True) json.dump(output_dict, fp, indent=4) @@ -424,4 +677,4 @@ def energy_cal_th( # save calibration objects with open(args.results_path, "wb") as fp: pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True) - pkl.dump({"ecal":ecal_object}, fp, protocol=pkl.HIGHEST_PROTOCOL) + pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 860029f..ca4cd80 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -8,26 +8,53 @@ import pickle as pkl import warnings +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + import numpy as np import pandas as pd from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.math.peak_fitting import gauss_cdf +from pygama.math.distributions import gaussian +from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 -from pygama.pargen.lq_cal import cal_lq -from pygama.pargen.utils import get_tcm_pulser_ids, load_data +from pygama.pargen.lq_cal import LQCal +from pygama.pargen.utils import load_data log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) +def get_results_dict(lq_class): + return { + "cal_energy_param": lq_class.cal_energy_param, + "rt_correction": lq_class.dt_fit_pars, + # "cdf": lq_class.cdf.name, + "1590-1596keV": lq_class.timecorr_df.to_dict("index"), + "cut_value": lq_class.cut_val, + "sfs": lq_class.low_side_sf.to_dict("index"), + } + + +def fill_plot_dict(lq_class, data, plot_options, plot_dict=None): + if plot_dict is not None: + for key, item in plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](lq_class, data, **item["options"]) + else: + plot_dict[key] = item["function"](lq_class, data) + else: + plot_dict = {} + return plot_dict + + def lq_calibration( data: pd.DataFrame, cal_dicts: dict, energy_param: str, cal_energy_param: str, eres_func: callable, - cdf: callable = gauss_cdf, + cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, ): @@ -62,17 +89,16 @@ def lq_calibration( A dict containing the results of the LQ calibration plot_dict: dict A dict containing all the figures specified by the plot options - lq: cal_lq class - The cal_lq object used for the LQ calibration + lq: LQCal class + The LQCal object used for the LQ calibration """ - lq = cal_lq( + lq = LQCal( cal_dicts, cal_energy_param, eres_func, cdf, selection_string, - plot_options, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -88,12 +114,14 @@ def lq_calibration( lq.calibrate(data, "LQ_Ecorr") log.info("Calibrated LQ") - return cal_dicts, lq.get_results_dict(), lq.fill_plot_dict(data), lq + return cal_dicts, get_results_dict(lq), fill_plot_dict(lq, data, plot_options), lq argparser = argparse.ArgumentParser() argparser.add_argument("files", help="files", nargs="*", type=str) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) +argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) +argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) @@ -126,7 +154,7 @@ def lq_calibration( ecal_dict = Props.read_from(args.ecal_file) cal_dict = ecal_dict["pars"]["operations"] -eres_dict = ecal_dict["results"] +eres_dict = ecal_dict["results"]["ecal"] with open(args.eres_file, "rb") as o: object_dict = pkl.load(o) @@ -134,7 +162,7 @@ def lq_calibration( if kwarg_dict["run_lq"] is True: kwarg_dict.pop("run_lq") - cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gauss_cdf + cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian if "plot_options" in kwarg_dict: for field, item in kwarg_dict["plot_options"].items(): @@ -173,13 +201,25 @@ def eres_func(x): return_selection_mask=True, ) - # get pulser mask from tcm files - with open(args.tcm_filelist) as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(tcm_files) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) + if args.pulser_file: + with open(args.pulser_file) as f: + pulser_dict = json.load(f) + mask = np.array(pulser_dict["mask"]) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + data["is_pulser"] = mask[threshold_mask] cal_dict, out_dict, plot_dict, obj = lq_calibration( @@ -220,7 +260,7 @@ def eres_func(x): pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -results_dict = dict(**eres_dict,lq = out_dict) +results_dict = dict(**eres_dict, lq=out_dict) pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) with open(args.hit_pars, "w") as w: final_hit_dict = { @@ -231,8 +271,8 @@ def eres_func(x): pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True) final_object_dict = dict( - **object_dict, - lq=obj, - ) + **object_dict, + lq=obj, +) with open(args.lq_results, "wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 05254d8..09e14c6 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -6,12 +6,18 @@ import os import pathlib import pickle as pkl +import re import warnings +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + +import numpy as np from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.utils import get_tcm_pulser_ids, load_data -from pygama.pargen.cuts import generate_cuts +from lgdo.lh5 import ls +from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids, generate_cut_classifiers +from pygama.pargen.utils import load_data log = logging.getLogger(__name__) @@ -20,8 +26,10 @@ if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("--files", help="files", nargs="*", type=str) - argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) + argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) + argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--configs", help="config", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -31,8 +39,8 @@ argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, required=False, nargs="*") - argparser.add_argument("--save_path", help="save_path", type=str, nargs="*") + argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) + argparser.add_argument("--save_path", help="save_path", type=str) args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -43,62 +51,122 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) - # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - if args.tier == "hit": - channel_dict = channel_dict["pars_hit_qc"]["inputs"]["ecal_config"][args.channel] - elif args.tier == "pht": - channel_dict = channel_dict["pars_pht_qc"]["inputs"]["ecal_config"][args.channel] - else: - msg = "invalid tier" - raise ValueError(msg) + channel_dict = channel_dict["pars_hit_qc"]["inputs"]["qc_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) + kwarg_dict_cal = kwarg_dict["cal_fields"] + + cut_fields = get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(args.cal_files[0], f"{args.channel}/dsp/") + ], + kwarg_dict_cal["cut_parameters"], + ) + if "initial_cal_cuts" in kwarg_dict: + init_cal = kwarg_dict["initial_cal_cuts"] + cut_fields += get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(args.cal_files[0], f"{args.channel}/dsp/") + ], + init_cal["cut_parameters"], + ) + # load data in data, threshold_mask = load_data( - args.files, + args.cal_files, f"{args.channel}/dsp", - hit_dict, - list(kwarg_dict["cut_parameters"]) - + ["timestamp", "trapTmax"], - threshold=kwarg_dict["threshold"], + {}, + [*cut_fields, "timestamp", "trapTmax"], + threshold=kwarg_dict_cal.get("threshold", 0), return_selection_mask=True, cal_energy_param="trapTmax", ) - # get pulser mask from tcm files - with open(args.tcm_filelist) as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) + if args.pulser_file: + with open(args.pulser_file) as f: + pulser_dict = json.load(f) + mask = np.array(pulser_dict["mask"]) + + elif args.tcm_filelist: + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + data["is_pulser"] = mask[threshold_mask] - hit_dict, plot_dict = generate_cuts( + if "initial_cal_cuts" in kwarg_dict: + init_cal = kwarg_dict["initial_cal_cuts"] + hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers( data, - cut_dict, - kwarg_dict.get("rounding",4), + init_cal["cut_parameters"], + init_cal.get("rounding", 4), + display=1 if args.plot_path else 0, + ) + ct_mask = np.full(len(data), True, dtype=bool) + for outname, info in hit_dict_init_cal.items(): + # convert to pandas eval + exp = info["expression"] + for key in info.get("parameters", None): + exp = re.sub(f"(? 0: + fft_data = load_data( + args.fft_files, + f"{args.channel}/dsp", + {}, + [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"], + threshold=kwarg_dict_fft["threshold"], + return_selection_mask=False, + cal_energy_param="trapTmax", + ) + + hit_dict_fft, plot_dict_fft = generate_cut_classifiers( + data, + kwarg_dict_fft["cut_parameters"], + kwarg_dict.get("rounding", 4), display=1 if args.plot_path else 0, ) - if isinstance(args.save_path, string): - save_path = [args.save_path] else: - save_path = args.save_path - for file in save_path - pathlib.Path(os.path.dirname(save_path)).mkdir(parents=True, exist_ok=True) - with open(file, "w") as f: - json.dump(hit_dict, f, indent=4) + hit_dict_fft = {} + plot_dict_fft = {} + + hit_dict = {**hit_dict_init_cal, **hit_dict_cal, **hit_dict_fft} + plot_dict = {**plot_dict_init_cal, **plot_dict_cal, **plot_dict_fft} + + pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True) + with open(args.save_path, "w") as f: + json.dump(hit_dict, f, indent=4) if args.plot_path: - if isinstance(args.plot_path, string): - plot_path = [args.plot_path] - else: - plot_path = args.plot_path - for file in plot_path: - pathlib.Path(os.path.dirname(plot_path)).mkdir(parents=True, exist_ok=True) - with open(plot_path, "wb") as f: - pkl.dump({"qc":plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file + pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + with open(args.plot_path, "wb") as f: + pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 49303e7..34fa8f8 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import copy import json import logging import os @@ -9,19 +10,54 @@ import warnings from typing import Callable +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + import numpy as np import pandas as pd from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.pargen.AoE_cal import * # noqa: F403 -from pygama.pargen.AoE_cal import cal_aoe, pol1, sigma_fit, standard_aoe -from pygama.pargen.utils import get_tcm_pulser_ids, load_data +from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak +from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) +def get_results_dict(aoe_class): + result_dict = {} + for tstamp in aoe_class.low_side_sfs_by_run: + result_dict[tstamp] = { + "cal_energy_param": aoe_class.cal_energy_param, + "dt_param": aoe_class.dt_param, + "rt_correction": aoe_class.dt_corr, + "1000-1300keV": aoe_class.timecorr_df.to_dict("index"), + "correction_fit_results": aoe_class.energy_corr_res_dict, + "low_cut": aoe_class.low_cut_val, + "high_cut": aoe_class.high_cut_val, + "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"), + "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"), + "low_side_sfs_by_run": aoe_class.low_side_sfs_by_run[tstamp].to_dict("index"), + "2_side_sfs_by_run": aoe_class.two_side_sfs_by_run[tstamp].to_dict("index"), + } + return result_dict + + +def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): + if plot_dict is None: + plot_dict = {} + for key, item in plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](aoe_class, data, **item["options"]) + else: + plot_dict[key] = item["function"](aoe_class, data) + + return plot_dict + + def aoe_calibration( data: pd.Dataframe, cal_dicts: dict, @@ -29,36 +65,34 @@ def aoe_calibration( energy_param: str, cal_energy_param: str, eres_func: Callable, - pdf: Callable = standard_aoe, + pdf: Callable = aoe_peak, selection_string: str = "", dt_corr: bool = False, dep_correct: bool = False, dt_cut: dict | None = None, high_cut_val: int = 3, - mean_func: Callable = pol1, - sigma_func: Callable = sigma_fit, - dep_acc: float = 0.9, + mean_func: Callable = Pol1, + sigma_func: Callable = SigmaFit, + # dep_acc: float = 0.9, dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] - aoe = cal_aoe( - cal_dicts, - cal_energy_param, - eres_func, - pdf, - selection_string, - dt_corr, - dep_acc, - dep_correct, - dt_cut, - dt_param, - high_cut_val, - mean_func, - sigma_func, - comptBands_width, - plot_options if plot_options is not None else {}, + aoe = CalAoE( + cal_dicts=cal_dicts, + cal_energy_param=cal_energy_param, + eres_func=eres_func, + pdf=pdf, + selection_string=selection_string, + dt_corr=dt_corr, + dep_correct=dep_correct, + dt_cut=dt_cut, + dt_param=dt_param, + high_cut_val=high_cut_val, + mean_func=mean_func, + sigma_func=sigma_func, + compt_bands_width=comptBands_width, ) aoe.update_cal_dicts( { @@ -70,12 +104,13 @@ def aoe_calibration( ) aoe.calibrate(data, "AoE_Uncorr") log.info("Calibrated A/E") - return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe + return cal_dicts, get_results_dict(aoe), fill_plot_dict(aoe, data, plot_options), aoe argparser = argparse.ArgumentParser() argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=True) +argparser.add_argument("--pulser_files", help="pulser_file", nargs="*", type=str, required=False) +argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False) argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True) argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) @@ -226,20 +261,28 @@ def run_splitter(files): return_selection_mask=True, ) - # get pulser mask from tcm files - if isinstance(args.tcm_filelist, list): - tcm_files = [] - for file in args.tcm_filelist: + if args.pulser_files: + mask = np.array([], dtype=bool) + for file in args.pulser_files: with open(file) as f: - tcm_files += f.read().splitlines() - else: + pulser_dict = json.load(f) + pulser_mask = np.array(pulser_dict["mask"]) + mask = np.append(mask, pulser_mask) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files with open(args.tcm_filelist) as f: tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) data["is_pulser"] = mask[threshold_mask] for tstamp in cal_dict: @@ -249,19 +292,18 @@ def run_splitter(files): row = pd.DataFrame(row) data = pd.concat([data, row]) - pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else standard_aoe + pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak - mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else pol1 + mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 - if "sigma_func" in kwarg_dict: - sigma_func = eval(kwarg_dict.pop("sigma_func")) - else: - sigma_func = sigma_fit + sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit try: - eres = results_dicts[next(iter(results_dicts))]["partition_ecal"][ - kwarg_dict["cal_energy_param"] - ]["eres_linear"].copy() + eres = copy.deepcopy( + results_dicts[next(iter(results_dicts))]["partition_ecal"][ + kwarg_dict["cal_energy_param"] + ]["eres_linear"] + ) def eres_func(x): return eval(eres["expression"], dict(x=x, **eres["parameters"])) @@ -270,9 +312,11 @@ def eres_func(x): raise RuntimeError except (KeyError, RuntimeError): try: - eres = results_dicts[next(iter(results_dicts))]["ecal"][ - kwarg_dict["cal_energy_param"] - ]["eres_linear"].copy() + eres = copy.deepcopy( + results_dicts[next(iter(results_dicts))]["ecal"][kwarg_dict["cal_energy_param"]][ + "eres_linear" + ] + ) def eres_func(x): return eval(eres["expression"], dict(x=x, **eres["parameters"])) @@ -292,16 +336,16 @@ def eres_func(x): sigma_func=sigma_func, **kwarg_dict, ) - + aoe_obj.pdf = aoe_obj.pdf.name # need to change eres func as can't pickle lambdas try: aoe_obj.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][ kwarg_dict["cal_energy_param"] - ]["eres_linear"].copy() + ]["eres_linear"] except KeyError: aoe_obj.eres_func = {} else: - out_dict = {} + out_dict = {tstamp:None for tstamp in cal_dict} plot_dict = {} aoe_obj = None @@ -346,7 +390,7 @@ def eres_func(x): "pars": {"operations": cal_dict[fk.timestamp]}, "results": dict( **results_dicts[fk.timestamp], - aoe=out_dict, + aoe=out_dict[fk.timestamp], ), } pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 2e656d6..9937281 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -8,27 +8,54 @@ import pickle as pkl import warnings +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + import numpy as np import pandas as pd from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.math.peak_fitting import gauss_cdf +from pygama.math.distributions import gaussian +from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 -from pygama.pargen.lq_cal import cal_lq -from pygama.pargen.utils import get_tcm_pulser_ids, load_data +from pygama.pargen.lq_cal import LQCal +from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) +def get_results_dict(lq_class): + return { + "cal_energy_param": lq_class.cal_energy_param, + "rt_correction": lq_class.dt_fit_pars, + # "cdf": lq_class.cdf.name, + "1590-1596keV": lq_class.timecorr_df.to_dict("index"), + "cut_value": lq_class.cut_val, + "sfs": lq_class.low_side_sf.to_dict("index"), + } + + +def fill_plot_dict(lq_class, data, plot_options, plot_dict=None): + if plot_dict is None: + plot_dict = {} + for key, item in plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](lq_class, data, **item["options"]) + else: + plot_dict[key] = item["function"](lq_class, data) + + return plot_dict + + def lq_calibration( data: pd.DataFrame, cal_dicts: dict, energy_param: str, cal_energy_param: str, eres_func: callable, - cdf: callable = gauss_cdf, + cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, ): @@ -66,13 +93,12 @@ def lq_calibration( The cal_lq object used for the LQ calibration """ - lq = cal_lq( + lq = LQCal( cal_dicts, cal_energy_param, eres_func, cdf, selection_string, - plot_options, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -88,12 +114,13 @@ def lq_calibration( lq.calibrate(data, "LQ_Ecorr") log.info("Calibrated LQ") - return cal_dicts, lq.get_results_dict(), lq.fill_plot_dict(data), lq + return cal_dicts, get_results_dict(lq), fill_plot_dict(lq, data, plot_options), lq argparser = argparse.ArgumentParser() argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=True) +argparser.add_argument("--pulser_files", help="pulser_file", type=str, nargs="*", required=False) +argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False) argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True) argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) @@ -233,20 +260,28 @@ def run_splitter(files): return_selection_mask=True, ) - # get pulser mask from tcm files - if isinstance(args.tcm_filelist, list): - tcm_files = [] - for file in args.tcm_filelist: + if args.pulser_files: + mask = np.array([], dtype=bool) + for file in args.pulser_files: with open(file) as f: - tcm_files += f.read().splitlines() - else: + pulser_dict = json.load(f) + pulser_mask = np.array(pulser_dict["mask"]) + mask = np.append(mask, pulser_mask) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files with open(args.tcm_filelist) as f: tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) data["is_pulser"] = mask[threshold_mask] for tstamp in cal_dict: @@ -256,7 +291,7 @@ def run_splitter(files): row = pd.DataFrame(row) data = pd.concat([data, row]) - cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gauss_cdf + cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian try: eres = results_dicts[next(iter(results_dicts))]["partition_ecal"][ diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index f3c926e..e11f965 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import copy import json import logging import os @@ -9,19 +10,43 @@ import re import warnings +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + import numpy as np import pandas as pd +import pygama.math.distributions as pgf +import pygama.math.histogram as pgh from legendmeta import LegendMetadata from legendmeta.catalog import Props -from pygama.pargen.ecal_th import * # noqa: F403 -from pygama.pargen.ecal_th import high_stats_fitting -from pygama.pargen.utils import get_tcm_pulser_ids, load_data +from pygama.math.distributions import nb_poly +from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration +from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) +def run_splitter(files): + """ + Returns list containing lists of each run + """ + + runs = [] + run_files = [] + for file in files: + fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + if f"{fk.period}-{fk.run}" not in runs: + runs.append(f"{fk.period}-{fk.run}") + run_files.append([]) + for i, run in enumerate(runs): + if run == f"{fk.period}-{fk.run}": + run_files[i].append(file) + return run_files + + def update_cal_dicts(cal_dicts, update_dict): if re.match(r"(\d{8})T(\d{6})Z", next(iter(cal_dicts))): for tstamp in cal_dicts: @@ -33,398 +58,393 @@ def update_cal_dicts(cal_dicts, update_dict): cal_dicts.update(update_dict) return cal_dicts -def get_results_dict(ecal_class, data): - if ecal_class.results: - fwhm_linear = ecal_class.fwhm_fit_linear.copy() - fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() - fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() - fwhm_linear["cov"] = fwhm_linear["cov"].tolist() - fwhm_quad = ecal_class.fwhm_fit_quadratic.copy() - fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() - fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() - fwhm_quad["cov"] = fwhm_quad["cov"].tolist() - - pk_dict = { - Ei: { - "function": func_i.__name__, - "module": func_i.__module__, - "parameters_in_keV": parsi.to_dict(), - "uncertainties_in_keV": errorsi.to_dict(), - "p_val": pvali, - "fwhm_in_keV": list(fwhmi), - "pk_position":(posi, posuni), - } - for i, (Ei, parsi, errorsi, pvali, fwhmi, posi, posuni, func_i) in enumerate( - zip( - ecal_class.results["fitted_keV"], - ecal_class.results["pk_pars"][ecal_class.results["pk_validities"]], - ecal_class.results["pk_errors"][ecal_class.results["pk_validities"]], - ecal_class.results["pk_pvals"][ecal_class.results["pk_validities"]], - ecal_class.results["pk_fwhms"], - ecal_class.results["pk_pos"], - ecal_class.results["pk_pos_uncertainties"], - ecal_class.funcs, - ) - ) - } + +def bin_spectrum( + data, + cal_energy_param, + selection_string, + cut_field="is_valid_cal", + pulser_field="is_pulser", + erange=(0, 3000), + dx=2, +): + bins = np.arange(erange[0], erange[1] + dx, dx) + return { + "bins": pgh.get_bin_centers(bins), + "counts": np.histogram(data.query(selection_string)[cal_energy_param], bins)[0], + "cut_counts": np.histogram( + data.query(f"(~{cut_field})&(~{pulser_field})")[cal_energy_param], + bins, + )[0], + "pulser_counts": np.histogram( + data.query(pulser_field)[cal_energy_param], + bins, + )[0], + } + + +def get_results_dict(ecal_class, data, cal_energy_param, selection_string): + if np.isnan(ecal_class.pars).all(): + return {} + else: + results_dict = copy.deepcopy(ecal_class.results["hpge_fit_energy_peaks"]) + + if "FWHMLinear" in results_dict: + fwhm_linear = results_dict["FWHMLinear"] + fwhm_linear["function"] = fwhm_linear["function"].__name__ + fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() + fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() + fwhm_linear["cov"] = fwhm_linear["cov"].tolist() + else: + fwhm_linear = None + + if "FWHMQuadratic" in results_dict: + fwhm_quad = results_dict["FWHMQuadratic"] + fwhm_quad["function"] = fwhm_quad["function"].__name__ + fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() + fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() + fwhm_quad["cov"] = fwhm_quad["cov"].tolist() + else: + fwhm_quad = None + + pk_dict = results_dict["peak_parameters"] + + for _, dic in pk_dict.items(): + dic["function"] = dic["function"].name + dic["parameters"] = dic["parameters"].to_dict() + dic["uncertainties"] = dic["uncertainties"].to_dict() + dic.pop("covariance") return { + "total_fep": len(data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")), + "total_dep": len(data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")), + "pass_fep": len( + data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}") + ), + "pass_dep": len( + data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}") + ), "eres_linear": fwhm_linear, "eres_quadratic": fwhm_quad, - "fitted_peaks": ecal_class.results["fitted_keV"].tolist(), + "fitted_peaks": ecal_class.peaks_kev.tolist(), "pk_fits": pk_dict, + "peak_param":results_dict["peak_param"] } - else: - return {} - -def partition_energy_cal_th( - data: pd.Datframe, - hit_dicts: dict, - energy_params: list[str], - selection_string: str = "", - threshold: int = 0, - p_val: float = 0, - plot_options: dict | None = None, - simplex: bool = True, - tail_weight: int = 20, - cal_energy_params: list = None, - deg:int=2, -) -> tuple(dict, dict, dict, dict): - results_dict = {} - plot_dict = {} - full_object_dict = {} - if cal_energy_params is None: - cal_energy_params = [energy_param + "_cal" for energy_param in energy_params] - glines = [ - 238.632, - 511, - 583.191, - 727.330, - 763, - 785, - 860.564, - 893, - 1079, - 1513, - 1592.53, - 1620.50, - 2103.53, - 2614.50, - 3125, - 3198, - 3474, - ] # gamma lines used for calibration - range_keV = [ - (10, 10), - (30, 30), - (30, 30), - (30, 30), - (30, 15), - (15, 30), - (30, 25), - (25, 30), - (30, 30), - (30, 30), - (30, 20), - (20, 30), - (30, 30), - (30, 30), - (30, 30), - (30, 30), - (30, 30), - ] # side bands width - funcs = [ - pgf.extended_gauss_step_pdf, # probably should be gauss on exp - pgf.extended_gauss_step_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_gauss_step_pdf, - pgf.extended_gauss_step_pdf, - pgf.extended_radford_pdf, - pgf.extended_gauss_step_pdf, - pgf.extended_gauss_step_pdf, - pgf.extended_gauss_step_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_radford_pdf, - pgf.extended_gauss_step_pdf, - pgf.extended_gauss_step_pdf, - pgf.extended_gauss_step_pdf, - ] - gof_funcs = [ - pgf.gauss_step_pdf, - pgf.gauss_step_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.gauss_step_pdf, - pgf.gauss_step_pdf, - pgf.radford_pdf, - pgf.gauss_step_pdf, - pgf.gauss_step_pdf, - pgf.gauss_step_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.radford_pdf, - pgf.gauss_step_pdf, - pgf.gauss_step_pdf, - pgf.gauss_step_pdf, - ] - - for energy_param, cal_energy_param in zip(energy_params, cal_energy_params): - full_object_dict[cal_energy_param] = high_stats_fitting( - energy_param=energy_param, - glines=glines, - range_keV=range_keV, - funcs=funcs, - gof_funcs=gof_funcs, - selection_string=selection_string, - threshold=threshold, - p_val=p_val, - plot_options=plot_options, - simplex=simplex, - tail_weight=tail_weight, - cal_energy_param=cal_energy_param, - deg=deg, - fixed={1:1} - ) - full_object_dict[cal_energy_param].update_calibration(data) - results_dict[cal_energy_param] = get_results_dict(full_object_dict[cal_energy_param], data) - hit_dicts = update_cal_dicts(hit_dicts, full_object_dict[cal_energy_param].hit_dict) - if full_object_dict[cal_energy_param].results: - plot_dict[cal_energy_param] = full_object_dict[cal_energy_param].fill_plot_dict(data).copy() - log.info("Finished all calibrations") - return hit_dicts, results_dict, plot_dict, full_object_dict +if __name__ == "__main__": + argparser = argparse.ArgumentParser() + argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True) + argparser.add_argument( + "--pulser_files", help="pulser_file", nargs="*", type=str, required=False + ) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False + ) + argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True) + argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) + argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + + argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) + argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) + argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + args = argparser.parse_args() + + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("h5py").setLevel(logging.INFO) + logging.getLogger("matplotlib").setLevel(logging.INFO) + logging.getLogger("legendmeta").setLevel(logging.INFO) + + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp) + + det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"] + + configs = LegendMetadata(path=args.configs) + channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "pars_pht_partcal" + ]["inputs"]["pars_pht_partcal_config"][args.channel] + + kwarg_dict = Props.read_from(channel_dict) + + cal_dict = {} + results_dicts = {} + if isinstance(args.ecal_file, list): + for ecal in args.ecal_file: + cal = Props.read_from(ecal) -argparser = argparse.ArgumentParser() -argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=True) -argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True) -argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) -argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + cal_dict[fk.timestamp] = cal["pars"] + results_dicts[fk.timestamp] = cal["results"] + else: + cal = Props.read_from(args.ecal_file) -argparser.add_argument("--log", help="log_file", type=str) + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.ecal_file)) + cal_dict[fk.timestamp] = cal["pars"] + results_dicts[fk.timestamp] = cal["results"] -argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) -argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) -argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) -args = argparser.parse_args() + object_dict = {} + if isinstance(args.eres_file, list): + for ecal in args.eres_file: + with open(ecal, "rb") as o: + cal = pkl.load(o) + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + object_dict[fk.timestamp] = cal + else: + with open(args.eres_file, "rb") as o: + cal = pkl.load(o) + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.eres_file)) + object_dict[fk.timestamp] = cal -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) + inplots_dict = {} + if args.inplots: + if isinstance(args.inplots, list): + for ecal in args.inplots: + with open(ecal, "rb") as o: + cal = pkl.load(o) + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + inplots_dict[fk.timestamp] = cal + else: + with open(args.inplots, "rb") as o: + cal = pkl.load(o) + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.inplots)) + inplots_dict[fk.timestamp] = cal + if "plot_options" in kwarg_dict: + for field, item in kwarg_dict["plot_options"].items(): + kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) -def run_splitter(files): - """ - Returns list containing lists of each run - """ + # sort files in dictionary where keys are first timestamp from run + if isinstance(args.input_files, list): + files = [] + for file in args.input_files: + with open(file) as f: + files += f.read().splitlines() + else: + with open(args.input_files) as f: + files = f.read().splitlines() + + files = sorted( + np.unique(files) + ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also + + final_dict = {} + all_file = run_splitter(sorted(files)) + for filelist in all_file: + fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + timestamp = fk.timestamp + final_dict[timestamp] = sorted(filelist) + + params = [ + kwarg_dict["final_cut_field"], + "timestamp", + ] + params += kwarg_dict["energy_params"] + + # load data in + data, threshold_mask = load_data( + final_dict, + f"{args.channel}/dsp", + cal_dict, + params=params, + threshold=kwarg_dict["threshold"], + return_selection_mask=True, + cal_energy_param=kwarg_dict["energy_params"][0], + ) + + if args.pulser_files: + mask = np.array([], dtype=bool) + for file in args.pulser_files: + with open(file) as f: + pulser_dict = json.load(f) + pulser_mask = np.array(pulser_dict["mask"]) + mask = np.append(mask, pulser_mask) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + data["is_pulser"] = mask[threshold_mask] + + for tstamp in cal_dict: + if tstamp not in np.unique(data["run_timestamp"]): + row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data} + row["run_timestamp"] = tstamp + row = pd.DataFrame(row) + data = pd.concat([data, row]) + + pk_pars = [ + (238.632, (10, 10), pgf.gauss_on_step), + (511, (30, 30), pgf.gauss_on_step), + (583.191, (30, 30), pgf.hpge_peak), + (727.330, (30, 30), pgf.hpge_peak), + (763, (30, 15), pgf.gauss_on_step), + (785, (15, 30), pgf.gauss_on_step), + (860.564, (30, 25), pgf.hpge_peak), + (893, (25, 30), pgf.gauss_on_step), + (1079, (30, 30), pgf.gauss_on_step), + (1513, (30, 30), pgf.gauss_on_step), + (1592.53, (30, 20), pgf.hpge_peak), + (1620.50, (20, 30), pgf.hpge_peak), + (2103.53, (30, 30), pgf.hpge_peak), + (2614.50, (30, 30), pgf.hpge_peak), + (3125, (30, 30), pgf.gauss_on_step), + (3198, (30, 30), pgf.gauss_on_step), + (3474, (30, 30), pgf.gauss_on_step), + ] - runs = [] - run_files = [] - for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) - if f"{fk.period}-{fk.run}" not in runs: - runs.append(f"{fk.period}-{fk.run}") - run_files.append([]) - for i, run in enumerate(runs): - if run == f"{fk.period}-{fk.run}": - run_files[i].append(file) - return run_files + glines = [pk_par[0] for pk_par in pk_pars] + if "cal_energy_params" not in kwarg_dict: + cal_energy_params = [energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]] + else: + cal_energy_params = kwarg_dict["cal_energy_params"] -configs = LegendMetadata(path=args.configs) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_pht_partcal" -]["inputs"]["pars_pht_partcal_config"][args.channel] + selection_string = f"~is_pulser&{kwarg_dict['final_cut_field']}" -kwarg_dict = Props.read_from(channel_dict) + ecal_results = {} + plot_dict = {} + full_object_dict = {} -cal_dict = {} -results_dicts = {} -if isinstance(args.ecal_file, list): - for ecal in args.ecal_file: - cal = Props.read_from(ecal) + for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): + energy = data.query(selection_string)[energy_param].to_numpy() + full_object_dict[cal_energy_param] = HPGeCalibration( + energy_param, glines, 1, kwarg_dict.get("deg", 0), fixed={1: 1} + ) + full_object_dict[cal_energy_param].hpge_get_energy_peaks(energy) + full_object_dict[cal_energy_param].hpge_fit_energy_peaks( + energy, + peak_pars=pk_pars, + tail_weight=kwarg_dict.get("tail_weight", 0), + n_events=kwarg_dict.get("n_events", None), + allowed_p_val=kwarg_dict.get("p_val", 0), + update_cal_pars=bool(det_status == "on"), + ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) - cal_dict[fk.timestamp] = cal["pars"] - results_dicts[fk.timestamp] = cal["results"] -else: - cal = Props.read_from(args.ecal_file) + full_object_dict[cal_energy_param].get_energy_res_curve( + FWHMLinear, + interp_energy_kev={"Qbb": 2039.0}, + ) + full_object_dict[cal_energy_param].get_energy_res_curve( + FWHMQuadratic, + interp_energy_kev={"Qbb": 2039.0}, + ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.ecal_file)) - cal_dict[fk.timestamp] = cal["pars"] - results_dicts[fk.timestamp] = cal["results"] + data[cal_energy_param] = nb_poly( + data[energy_param].to_numpy(), full_object_dict[cal_energy_param].pars + ) -object_dict = {} -if isinstance(args.eres_file, list): - for ecal in args.eres_file: - with open(ecal, "rb") as o: - cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) - object_dict[fk.timestamp] = cal -else: - with open(args.eres_file, "rb") as o: - cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.eres_file)) - object_dict[fk.timestamp] = cal - -inplots_dict = {} -if args.inplots: - if isinstance(args.inplots, list): - for ecal in args.inplots: - with open(ecal, "rb") as o: - cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) - inplots_dict[fk.timestamp] = cal - else: - with open(args.inplots, "rb") as o: - cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.inplots)) - inplots_dict[fk.timestamp] = cal - - -if "plot_options" in kwarg_dict: - for field, item in kwarg_dict["plot_options"].items(): - kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - - -# sort files in dictionary where keys are first timestamp from run -if isinstance(args.input_files, list): - files = [] - for file in args.input_files: - with open(file) as f: - files += f.read().splitlines() -else: - with open(args.input_files) as f: - files = f.read().splitlines() - -files = sorted( - np.unique(files) -) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also - -final_dict = {} -all_file = run_splitter(sorted(files)) -for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) - timestamp = fk.timestamp - final_dict[timestamp] = sorted(filelist) - -params = [ - kwarg_dict["final_cut_field"], - "timestamp", -] -params += kwarg_dict["energy_params"] - -# load data in -data, threshold_mask = load_data( - final_dict, - f"{args.channel}/dsp", - cal_dict, - params=params, - threshold=kwarg_dict["threshold"], - return_selection_mask=True, - cal_energy_param=kwarg_dict["energy_params"][0], -) - -# get pulser mask from tcm files -if isinstance(args.tcm_filelist, list): - tcm_files = [] - for file in args.tcm_filelist: - with open(file) as f: - tcm_files += f.read().splitlines() -else: - with open(args.tcm_filelist) as f: - tcm_files = f.read().splitlines() - -tcm_files = sorted(np.unique(tcm_files)) -ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") -) -data["is_pulser"] = mask[threshold_mask] - -for tstamp in cal_dict: - if tstamp not in np.unique(data["run_timestamp"]): - row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data} - row["run_timestamp"] = tstamp - row = pd.DataFrame(row) - data = pd.concat([data, row]) - -# run energy supercal -hit_dicts, ecal_results, plot_dict, ecal_obj = partition_energy_cal_th( - data, - cal_dict, - selection_string=f"{kwarg_dict.pop('final_cut_field')}&(~is_pulser)", - **kwarg_dict, -) + ecal_results[cal_energy_param] = get_results_dict( + full_object_dict[cal_energy_param], data, cal_energy_param, selection_string + ) + cal_dict = update_cal_dicts( + cal_dict, {cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()} + ) -if args.plot_file: - common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None + if args.plot_file: + param_plot_dict = {} + if ~np.isnan(full_object_dict[cal_energy_param].pars).all(): + param_plot_dict["fwhm_fit"] = full_object_dict[cal_energy_param].plot_eres_fit( + energy + ) + param_plot_dict["cal_fit"] = full_object_dict[cal_energy_param].plot_cal_fit( + energy + ) + param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits( + energy, ncols=4, nrows=5 + ) - if isinstance(args.plot_file, list): - for plot_file in args.plot_file: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(plot_file)) + if "plot_options" in kwarg_dict: + for key, item in kwarg_dict["plot_options"].items(): + if item["options"] is not None: + param_plot_dict[key] = item["function"]( + data, + cal_energy_param, + selection_string, + **item["options"], + ) + else: + param_plot_dict[key] = item["function"]( + data, + cal_energy_param, + selection_string, + ) + plot_dict[cal_energy_param] = param_plot_dict + + for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"]["peak_parameters"].values(): + peak_dict["function"] = peak_dict["function"].name + peak_dict["parameters"] = peak_dict["parameters"].to_dict() + peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict() + + if args.plot_file: + common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None + + if isinstance(args.plot_file, list): + for plot_file in args.plot_file: + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(plot_file)) + if args.inplots: + out_plot_dict = inplots_dict[fk.timestamp] + out_plot_dict.update({"partition_ecal": plot_dict}) + else: + out_plot_dict = {"partition_ecal": plot_dict} + + if "common" in list(out_plot_dict) and common_dict is not None: + out_plot_dict["common"].update(common_dict) + elif common_dict is not None: + out_plot_dict["common"] = common_dict + + pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) + with open(plot_file, "wb") as w: + pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + else: if args.inplots: + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.plot_file)) out_plot_dict = inplots_dict[fk.timestamp] out_plot_dict.update({"partition_ecal": plot_dict}) else: out_plot_dict = {"partition_ecal": plot_dict} - if "common" in list(out_plot_dict) and common_dict is not None: out_plot_dict["common"].update(common_dict) elif common_dict is not None: out_plot_dict["common"] = common_dict - - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) + with open(args.plot_file, "wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - else: - if args.inplots: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.plot_file)) - out_plot_dict = inplots_dict[fk.timestamp] - out_plot_dict.update({"partition_ecal": plot_dict}) - else: - out_plot_dict = {"partition_ecal": plot_dict} - if "common" in list(out_plot_dict) and common_dict is not None: - out_plot_dict["common"].update(common_dict) - elif common_dict is not None: - out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: - pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - - -for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - final_hit_dict = { - "pars": hit_dicts[fk.timestamp], - "results": { - "ecal": results_dicts[fk.timestamp], - "partition_ecal": ecal_results, - }, - } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: - json.dump(final_hit_dict, w, indent=4) - -for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - final_object_dict = { - "ecal": object_dict[fk.timestamp], - "partition_ecal": ecal_obj, - } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: - pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + + for out in sorted(args.hit_pars): + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + final_hit_dict = { + "pars": cal_dict[fk.timestamp], + "results": dict(**results_dicts[fk.timestamp], partition_ecal= ecal_results) + } + pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + with open(out, "w") as w: + json.dump(final_hit_dict, w, indent=4) + + for out in args.fit_results: + fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + final_object_dict = dict(**object_dict[fk.timestamp], partition_ecal = full_object_dict) + pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + with open(out, "wb") as w: + pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py new file mode 100644 index 0000000..c9801be --- /dev/null +++ b/scripts/pars_pht_qc.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import argparse +import json +import logging +import os +import pathlib +import pickle as pkl +import re +import warnings + +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + +import numpy as np +from legendmeta import LegendMetadata +from legendmeta.catalog import Props +from lgdo.lh5 import ls +from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys, generate_cut_classifiers +from pygama.pargen.utils import load_data +from util.FileKey import ChannelProcKey, ProcessingFileKey + +log = logging.getLogger(__name__) + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +if __name__ == "__main__": + argparser = argparse.ArgumentParser() + argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) + argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False) + argparser.add_argument("--pulser_files", help="pulser_file", nargs="*", type=str, required=False) + + argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) + argparser.add_argument("--save_path", help="save_path", type=str, nargs="*", ) + args = argparser.parse_args() + + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("h5py").setLevel(logging.INFO) + logging.getLogger("matplotlib").setLevel(logging.INFO) + logging.getLogger("legendmeta").setLevel(logging.INFO) + + # get metadata dictionary + configs = LegendMetadata(path=args.configs) + channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] + channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel] + + + # sort files in dictionary where keys are first timestamp from run + if isinstance(args.cal_files, list): + cal_files = [] + for file in args.cal_files: + with open(file) as f: + cal_files += f.read().splitlines() + else: + with open(args.cal_files) as f: + cal_files = f.read().splitlines() + + cal_files = sorted( + np.unique(cal_files) + ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also + + + + kwarg_dict = Props.read_from(channel_dict) + kwarg_dict_cal = kwarg_dict["cal_fields"] + + cut_fields = get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(cal_files[0], f"{args.channel}/dsp/") + ], + kwarg_dict_cal["cut_parameters"], + ) + if "initial_cal_cuts" in kwarg_dict: + init_cal = kwarg_dict["initial_cal_cuts"] + cut_fields += get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(cal_files[0], f"{args.channel}/dsp/") + ], + init_cal["cut_parameters"], + ) + + # load data in + data, threshold_mask = load_data( + cal_files, + f"{args.channel}/dsp", + {}, + [*cut_fields, "timestamp", "trapTmax"], + threshold=kwarg_dict_cal.get("threshold", 0), + return_selection_mask=True, + cal_energy_param="trapTmax", + ) + + if args.pulser_files: + mask = np.array([], dtype=bool) + for file in args.pulser_files: + with open(file, 'r') as f: + pulser_dict = json.load(f) + pulser_mask = np.array(pulser_dict["mask"]) + mask = np.append(mask, pulser_mask) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files + with open(args.tcm_filelist) as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + data["is_pulser"] = mask[threshold_mask] + + if "initial_cal_cuts" in kwarg_dict: + init_cal = kwarg_dict["initial_cal_cuts"] + hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers( + data, + init_cal["cut_parameters"], + init_cal.get("rounding", 4), + display=1 if args.plot_path else 0, + ) + ct_mask = np.full(len(data), True, dtype=bool) + for outname, info in hit_dict_init_cal.items(): + # convert to pandas eval + exp = info["expression"] + for key in info.get("parameters", None): + exp = re.sub(f"(? 0: + + # sort files in dictionary where keys are first timestamp from run + if isinstance(args.fft_files, list): + fft_files = [] + for file in args.fft_files: + with open(file) as f: + fft_files += f.read().splitlines() + else: + with open(args.fft_files) as f: + fft_files = f.read().splitlines() + + fft_files = sorted( + np.unique(fft_files) + ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also + + if len(fft_files)>0: + fft_data = load_data( + fft_files, + f"{args.channel}/dsp", + {}, + [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"], + threshold=kwarg_dict_fft["threshold"], + return_selection_mask=False, + cal_energy_param="trapTmax", + ) + + hit_dict_fft, plot_dict_fft = generate_cut_classifiers( + data, + kwarg_dict_fft["cut_parameters"], + kwarg_dict.get("rounding", 4), + display=1 if args.plot_path else 0, + ) + + log.debug("fft cuts applied") + log.debug(f"cut_dict is: {json.dumps(hit_dict_fft, indent=2)}") + + else: + hit_dict_fft = {} + plot_dict_fft = {} + else: + hit_dict_fft = {} + plot_dict_fft = {} + + hit_dict = {**hit_dict_init_cal, **hit_dict_cal, **hit_dict_fft} + plot_dict = {**plot_dict_init_cal, **plot_dict_cal, **plot_dict_fft} + + for file in args.save_path: + pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + with open(file, "w") as f: + json.dump(hit_dict, f, indent=4) + + if args.plot_path: + for file in args.plot_path: + pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + with open(file, "wb") as f: + pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py new file mode 100644 index 0000000..3c6d88a --- /dev/null +++ b/scripts/pars_tcm_pulser.py @@ -0,0 +1,64 @@ +import argparse +import json +import logging +import os +import pathlib +import pickle as pkl + +os.environ["LGDO_CACHE"] = "false" +os.environ["LGDO_BOUNDSCHECK"] = "false" +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + +import lgdo.lh5 as lh5 +import numpy as np +from legendmeta import LegendMetadata +from legendmeta.catalog import Props +from pygama.pargen.data_cleaning import get_tcm_pulser_ids + + +argparser = argparse.ArgumentParser() +argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--log", help="log file", type=str) + +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--channel", help="Channel", type=str, required=True) + +argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False) + +argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) +args = argparser.parse_args() + +logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +logging.getLogger("numba").setLevel(logging.INFO) +logging.getLogger("parse").setLevel(logging.INFO) +logging.getLogger("lgdo").setLevel(logging.INFO) +logging.getLogger("h5py").setLevel(logging.INFO) +logging.getLogger("matplotlib").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) + +sto = lh5.LH5Store() +log = logging.getLogger(__name__) + +configs = LegendMetadata(path=args.configs) +config_dict = configs.on(args.timestamp, system=args.datatype) +kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"] + +kwarg_dict = Props.read_from(kwarg_dict) + +if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": + tcm_files = args.tcm_files[0] + with open(tcm_files) as f: + tcm_files = f.read().splitlines() +else: + tcm_files = args.tcm_files +# get pulser mask from tcm files +tcm_files = sorted(np.unique(tcm_files)) +ids, mask = get_tcm_pulser_ids( + tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + +pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) +with open(args.pulser_file, "w") as f: + json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4) \ No newline at end of file From 49869d95e172aa3984b193cd75fe5b8812bf3e9f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:51:28 +0100 Subject: [PATCH 034/103] fix psp path --- scripts/util/patterns.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index e1538d0..7d381b2 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -23,6 +23,7 @@ tier_path, tier_pet_path, tier_pht_path, + tier_psp_path, tier_raw_blind_path, tier_raw_path, tier_skm_path, @@ -148,7 +149,7 @@ def get_pattern_tier_evt(setup): def get_pattern_tier_psp(setup): return os.path.join( - f"{tier_evt_path(setup)}", + f"{tier_psp_path(setup)}", "{datatype}", "{period}", "{run}", From 904685a93baf2ea6a19e7a17e2f1d5382af0e034 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:51:46 +0100 Subject: [PATCH 035/103] add fft read --- rules/common.smk | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rules/common.smk b/rules/common.smk index 1d4282b..068100b 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -38,6 +38,15 @@ def read_filelist_cal(wildcards, tier): return files +def read_filelist_fft(wildcards, tier): + label = f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-fft" + with checkpoints.gen_filelist.get(label=label, tier=tier, extension="file").output[ + 0 + ].open() as f: + files = f.read().splitlines() + return files + + def read_filelist_pars_cal_channel(wildcards, tier): """ This function will read the filelist of the channels and return a list of dsp files one for each channel From 5f8e4a88e3e0dec19cba00f3481fc3102a74934b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:52:09 +0100 Subject: [PATCH 036/103] split pulser into own rule --- rules/dsp.smk | 9 ++++----- rules/tcm.smk | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 5c27f42..94ccf13 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -37,7 +37,7 @@ rule build_pars_dsp_tau: files=os.path.join( filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" ), - tcm_files=lambda wildcards: read_filelist_cal(wildcards, "tcm"), + pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), params: timestamp="{timestamp}", datatype="cal", @@ -45,7 +45,6 @@ rule build_pars_dsp_tau: output: decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")), plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")), - pulser=temp(get_pattern_pars_tmp_channel(setup, "dsp", "pulser_ids")), log: get_pattern_log_channel(setup, "par_dsp_decay_constant"), group: @@ -62,8 +61,7 @@ rule build_pars_dsp_tau: "--channel {params.channel} " "--plot_path {output.plots} " "--output_file {output.decay_const} " - "--pulser_file {output.pulser} " - "--tcm_files {input.tcm_files} " + "--pulser_file {input.pulser} " "--raw_files {input.files}" @@ -72,7 +70,7 @@ rule build_pars_event_selection: files=os.path.join( filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" ), - pulser_file=get_pattern_pars_tmp_channel(setup, "dsp", "pulser_ids"), + pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), raw_cal=get_blinding_curve_file, params: @@ -87,6 +85,7 @@ rule build_pars_event_selection: "par-dsp" resources: runtime=300, + mem_swap=70, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/pars_dsp_event_selection.py')} " diff --git a/rules/tcm.smk b/rules/tcm.smk index 380c882..cfdf72c 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -6,6 +6,8 @@ from scripts.util.patterns import ( get_pattern_tier_raw, get_pattern_tier, get_pattern_log, + get_pattern_pars_tmp_channel, + get_pattern_log_channel, ) @@ -33,3 +35,31 @@ rule build_tier_tcm: "--timestamp {params.timestamp} " "{input} " "{output}" + + +# This rule builds the tcm files each raw file +rule build_pulser_ids: + input: + tcm_files=lambda wildcards: read_filelist_cal(wildcards, "tcm"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + pulser=temp(get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids")), + log: + get_pattern_log_channel(setup, "tcm_pulsers"), + group: + "tier-tcm" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/pars_tcm_pulser.py')} " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--tcm_files {input.tcm_files} " + "--pulser_file {output.pulser} " From 57cdd606facfc31cf80ece546536e79449c53a37 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Mar 2024 13:52:33 +0100 Subject: [PATCH 037/103] add qc and use pulser files --- rules/hit.smk | 72 +++++++-- rules/pht.smk | 435 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 371 insertions(+), 136 deletions(-) diff --git a/rules/hit.smk b/rules/hit.smk index de918b3..bbce0bd 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -21,18 +21,67 @@ from scripts.util.patterns import ( ) +onstart: + if os.path.isfile(os.path.join(pars_path(setup), "hit", "validity.jsonl")): + os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) + + + ds.pars_key_resolve.write_par_catalog( + ["-*-*-*-cal"], + os.path.join(pars_path(setup), "hit", "validity.jsonl"), + get_pattern_tier_raw(setup), + {"cal": ["par_hit"], "lar": ["par_hit"]}, + ) + + +# This rule builds the qc using the calibration dsp files and fft files +rule build_qc: + input: + files=lambda wildcards: read_filelist_cal(wildcards, "dsp"), + fft_files=lambda wildcards: read_filelist_fft(wildcards, "dsp"), + pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + qc_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "qc")), + plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "qc")), + log: + get_pattern_log_channel(setup, "pars_hit_qc"), + group: + "par-hit" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/pars_hit_qc.py')} " + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--configs {configs} " + "--plot_path {output.plot_file} " + "--save_path {output.qc_file} " + "--pulser_file {input.pulser} " + "--cal_files {input.files} " + "--fft_files {input.fft_files} " + + # This rule builds the energy calibration using the calibration dsp files rule build_energy_calibration: input: - files=lambda wildcards: read_filelist_cal(wildcards, "dsp"), - tcm_filelist=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" + files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" ), + pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ctc_dict=ancient( lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "dsp" ) ), + inplots=get_pattern_plts_tmp_channel(setup, "hit", "qc"), + in_hit_dict=get_pattern_pars_tmp_channel(setup, "hit", "qc"), params: timestamp="{timestamp}", datatype="cal", @@ -59,11 +108,14 @@ rule build_energy_calibration: "--timestamp {params.timestamp} " "--channel {params.channel} " "--configs {configs} " + "--metadata {meta} " "--plot_path {output.plot_file} " "--results_path {output.results_file} " "--save_path {output.ecal_file} " + "--inplot_dict {input.inplots} " + "--in_hit_dict {input.in_hit_dict} " "--ctc_dict {input.ctc_dict} " - "--tcm_filelist {input.tcm_filelist} " + "--pulser_file {input.pulser} " "--files {input.files}" @@ -73,9 +125,7 @@ rule build_aoe_calibration: files=os.path.join( filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" ), - tcm_filelist=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" - ), + pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "energy_cal"), eres_file=get_pattern_pars_tmp_channel( setup, "hit", "energy_cal_objects", extension="pkl" @@ -112,7 +162,7 @@ rule build_aoe_calibration: "--eres_file {input.eres_file} " "--hit_pars {output.hit_pars} " "--plot_file {output.plot_file} " - "--tcm_filelist {input.tcm_filelist} " + "--pulser_file {input.pulser} " "--ecal_file {input.ecal_file} " "{input.files}" @@ -123,9 +173,7 @@ rule build_lq_calibration: files=os.path.join( filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" ), - tcm_filelist=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" - ), + pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "aoe_cal"), eres_file=get_pattern_pars_tmp_channel( setup, "hit", "aoe_cal_objects", extension="pkl" @@ -160,7 +208,7 @@ rule build_lq_calibration: "--eres_file {input.eres_file} " "--hit_pars {output.hit_pars} " "--plot_file {output.plot_file} " - "--tcm_filelist {input.tcm_filelist} " + "--pulser_file {input.pulser} " "--ecal_file {input.ecal_file} " "{input.files}" diff --git a/rules/pht.smk b/rules/pht.smk index f375fe6..cd11a9b 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -7,7 +7,7 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 """ from scripts.util.pars_loading import pars_catalog -import scripts.util.create_pars_keylist import pars_key_resolve +from scripts.util.create_pars_keylist import pars_key_resolve from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, @@ -15,7 +15,6 @@ from scripts.util.patterns import ( get_pattern_log_channel, get_pattern_par_pht, get_pattern_plts, - get_pattern_tier_dsp, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, @@ -29,17 +28,174 @@ ds.pars_key_resolve.write_par_catalog( {"cal": ["par_pht"], "lar": ["par_pht"]}, ) +intier = "dsp" + + +rule pht_checkpoint: + input: + files=lambda wildcards: read_filelist_cal(wildcards, intier), + output: + get_pattern_pars_tmp_channel(setup, "pht", "check"), + shell: + "touch {output}" + + +qc_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + cal_files=part.get_filelists(partition, key, intier), + fft_files=part.get_filelists(partition, key, intier, datatype="fft"), + pulser_files=[ + file.replace("pht", "tcm") + for file in part.get_par_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + check_files=part.get_par_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="check", + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + f"{par_pht_path(setup)}/validity.jsonl", partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="qc", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="qc", + ) + ], + log: + part.get_log_file( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + "pht", + name="par_pht_qc", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 20, + runtime=300, + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/pars_pht_qc.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--save_path {output.hit_pars} " + "--plot_path {output.plot_file} " + "--pulser_files {input.pulser_files} " + "--fft_files {input.fft_files} " + "--cal_files {input.cal_files}" + + set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc") + + if key in qc_pht_rules: + qc_pht_rules[key].append(list(workflow.rules)[-1]) + else: + qc_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_pht_qc: + input: + cal_files=os.path.join( + filelist_path(setup), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + fft_files=os.path.join( + filelist_path(setup), + "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist", + ), + pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")), + plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")), + log: + get_pattern_log_channel(setup, "pars_pht_qc"), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/pars_pht_qc.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--save_path {output.hit_pars} " + "--plot_path {output.plot_file} " + "--pulser_files {input.pulser_files} " + "--fft_files {input.fft_files} " + "--cal_files {input.cal_files}" + + +fallback_qc_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(qc_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_qc_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + # This rule builds the energy calibration using the calibration dsp files rule build_per_energy_calibration: input: - files=lambda wildcards: read_filelist_cal(wildcards, "dsp"), - tcm_filelist=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" + files=os.path.join( + filelist_path(setup), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", ), + pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"), + inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"), ctc_dict=ancient( lambda wildcards: pars_catalog.get_par_file( - setup, wildcards.timestamp, "dsp" + setup, wildcards.timestamp, intier ) ), params: @@ -70,103 +226,16 @@ rule build_per_energy_calibration: "--channel {params.channel} " "--configs {configs} " "--tier {params.tier} " + "--metadata {meta} " "--plot_path {output.plot_file} " "--results_path {output.results_file} " "--save_path {output.ecal_file} " + "--inplot_dict {input.inplots} " + "--in_hit_dict {input.pht_dict} " "--ctc_dict {input.ctc_dict} " - "--tcm_filelist {input.tcm_filelist} " + "--pulser_file {input.pulser} " "--files {input.files}" -rule build_pars_pht_objects: - input: - lambda wildcards: read_filelist_pars_cal_channel( - wildcards, - "pht_objects_pkl", - ), - output: - get_pattern_pars( - setup, - "pht", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ) - group: - "merge-hit" - shell: - "{swenv} python3 -B " - f"{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - -rule build_plts_pht: - input: - lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht"), - output: - get_pattern_plts(setup, "pht") - group: - "merge-hit" - shell: - "{swenv} python3 -B " - f"{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - -rule build_pars_pht: - input: - infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"), - plts = get_pattern_plts(setup, "pht"), - objects = get_pattern_pars( - setup, - "pht", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ) - output: - get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle), - group: - "merge-hit" - shell: - "{swenv} python3 -B " - f"{basedir}/../scripts/merge_channels.py " - "--input {input.infiles} " - "--output {output} " - - -rule build_pht: - input: - dsp_file=get_pattern_tier_dsp(setup), - #hit_file = get_pattern_tier_hit(setup), - pars_file=lambda wildcards: pars_catalog.get_par_file( - setup, wildcards.timestamp, "pht" - ), - output: - tier_file=get_pattern_tier(setup, "pht", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "pht_db"), - params: - timestamp="{timestamp}", - datatype="{datatype}", - tier="pht", - log: - get_pattern_log(setup, "tier_pht"), - group: - "tier-pht" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_hit.py')} " - "--configs {configs} " - "--log {log} " - "--tier {params.tier} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--pars_file {input.pars_file} " - "--output {output.tier_file} " - "--input {input.dsp_file} " - "--db_file {output.db_file}" - part_pht_rules = {} for key, dataset in part.datasets.items(): @@ -174,8 +243,17 @@ for key, dataset in part.datasets.items(): rule: input: - files=part.get_filelists(partition, key, "dsp"), - tcm_files=part.get_filelists(partition, key, "tcm"), + files=part.get_filelists(partition, key, intier), + pulser_files=[ + file.replace("pht", "tcm") + for file in part.get_par_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], ecal_file=part.get_par_files( f"{par_pht_path(setup)}/validity.jsonl", partition, @@ -249,7 +327,7 @@ for key, dataset in part.datasets.items(): group: "par-pht" resources: - mem_swap=300, + mem_swap=len(part.get_filelists(partition, key, intier)) * 20, runtime=300, shell: "{swenv} python3 -B " @@ -260,12 +338,13 @@ for key, dataset in part.datasets.items(): "--timestamp {params.timestamp} " "--inplots {input.inplots} " "--channel {params.channel} " + "--metadata {meta} " "--fit_results {output.partcal_results} " "--eres_file {input.eres_file} " "--hit_pars {output.hit_pars} " "--plot_file {output.plot_file} " "--ecal_file {input.ecal_file} " - "--tcm_filelist {input.tcm_files} " + "--pulser_files {input.pulser_files} " "--input_files {input.files}" set_last_rule_name( @@ -283,11 +362,10 @@ for key, dataset in part.datasets.items(): rule build_pht_energy_super_calibrations: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - tcm_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" + filelist_path(setup), + "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist", ), + pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "energy_cal"), eres_file=get_pattern_pars_tmp_channel( setup, "pht", "energy_cal_objects", extension="pkl" @@ -320,13 +398,14 @@ rule build_pht_energy_super_calibrations: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " + "--metadata {meta} " "--inplots {input.inplots} " "--fit_results {output.partcal_results} " "--eres_file {input.eres_file} " "--hit_pars {output.hit_pars} " "--plot_file {output.plot_file} " "--ecal_file {input.ecal_file} " - "--tcm_filelist {input.tcm_files} " + "--pulser_files {input.pulser_files} " "--input_files {input.files}" @@ -346,8 +425,17 @@ for key, dataset in part.datasets.items(): rule: input: - files=part.get_filelists(partition, key, "dsp"), - tcm_files=part.get_filelists(partition, key, "tcm"), + files=part.get_filelists(partition, key, intier), + pulser_files=[ + file.replace("pht", "tcm") + for file in part.get_par_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], ecal_file=part.get_par_files( f"{par_pht_path(setup)}/validity.jsonl", partition, @@ -421,7 +509,7 @@ for key, dataset in part.datasets.items(): group: "par-pht" resources: - mem_swap=300, + mem_swap=len(part.get_filelists(partition, key, intier)) * 20, runtime=300, shell: "{swenv} python3 -B " @@ -437,7 +525,7 @@ for key, dataset in part.datasets.items(): "--hit_pars {output.hit_pars} " "--plot_file {output.plot_file} " "--ecal_file {input.ecal_file} " - "--tcm_filelist {input.tcm_files} " + "--pulser_files {input.pulser_files} " "--input_files {input.files}" set_last_rule_name( @@ -455,11 +543,10 @@ for key, dataset in part.datasets.items(): rule build_pht_aoe_calibrations: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - tcm_filelist=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" + filelist_path(setup), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", ), + pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "partcal"), eres_file=get_pattern_pars_tmp_channel( setup, "pht", "partcal_objects", extension="pkl" @@ -498,7 +585,7 @@ rule build_pht_aoe_calibrations: "--hit_pars {output.hit_pars} " "--plot_file {output.plot_file} " "--ecal_file {input.ecal_file} " - "--tcm_filelist {input.tcm_filelist} " + "--pulser_files {input.pulser_files} " "--input_files {input.files}" @@ -518,8 +605,17 @@ for key, dataset in part.datasets.items(): rule: input: - files=part.get_filelists(partition, key, "dsp"), - tcm_files=part.get_filelists(partition, key, "tcm"), + files=part.get_filelists(partition, key, intier), + pulser_files=[ + file.replace("pht", "tcm") + for file in part.get_par_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], ecal_file=part.get_par_files( f"{par_pht_path(setup)}/validity.jsonl", partition, @@ -591,7 +687,7 @@ for key, dataset in part.datasets.items(): group: "par-pht" resources: - mem_swap=300, + mem_swap=len(part.get_filelists(partition, key, intier)) * 20, runtime=300, shell: "{swenv} python3 -B " @@ -607,7 +703,7 @@ for key, dataset in part.datasets.items(): "--hit_pars {output.hit_pars} " "--plot_file {output.plot_file} " "--ecal_file {input.ecal_file} " - "--tcm_filelist {input.tcm_files} " + "--pulser_files {input.pulser_files} " "--input_files {input.files}" set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration") @@ -622,11 +718,10 @@ for key, dataset in part.datasets.items(): rule build_pht_lq_calibration: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - tcm_filelist=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" + filelist_path(setup), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", ), + pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "aoecal"), eres_file=get_pattern_pars_tmp_channel( setup, "pht", "aoecal_objects", extension="pkl" @@ -663,7 +758,7 @@ rule build_pht_lq_calibration: "--hit_pars {output.hit_pars} " "--plot_file {output.plot_file} " "--ecal_file {input.ecal_file} " - "--tcm_filelist {input.tcm_filelist} " + "--pulser_files {input.pulser_files} " "--input_files {input.files}" @@ -676,3 +771,95 @@ for key, items in ordered.items(): rule_order_list += [item.name for item in items] rule_order_list.append(fallback_pht_rule.name) workflow._ruleorder.add(*rule_order_list) # [::-1] + + +rule build_pars_pht_objects: + input: + lambda wildcards: read_filelist_pars_cal_channel( + wildcards, + "pht_objects_pkl", + ), + output: + get_pattern_pars( + setup, + "pht", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + + +rule build_plts_pht: + input: + lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht"), + output: + get_pattern_plts(setup, "pht"), + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + + +rule build_pars_pht: + input: + infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"), + plts=get_pattern_plts(setup, "pht"), + objects=get_pattern_pars( + setup, + "pht", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + output: + get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle), + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input.infiles} " + "--output {output} " + + +rule build_pht: + input: + dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False), + pars_file=lambda wildcards: pars_catalog.get_par_file( + setup, wildcards.timestamp, "pht" + ), + output: + tier_file=get_pattern_tier(setup, "pht", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "pht_db"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="pht", + log: + get_pattern_log(setup, "tier_pht"), + group: + "tier-pht" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_hit.py')} " + "--configs {configs} " + "--log {log} " + "--tier {params.tier} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--pars_file {input.pars_file} " + "--output {output.tier_file} " + "--input {input.dsp_file} " + "--db_file {output.db_file}" From 03a85674f949935d6239b7092a92b4186928bacf Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 19:29:20 +0200 Subject: [PATCH 038/103] move onstart to snakefile --- rules/dsp.smk | 11 ----------- rules/hit.smk | 13 ------------- 2 files changed, 24 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 94ccf13..9ea2e7f 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -21,17 +21,6 @@ from scripts.util.patterns import ( ) -onstart: - if os.path.isfile(os.path.join(pars_path(setup), "dsp", "validity.jsonl")): - os.remove(os.path.join(pars_path(setup), "dsp", "validity.jsonl")) - ds.pars_key_resolve.write_par_catalog( - ["-*-*-*-cal"], - os.path.join(pars_path(setup), "dsp", "validity.jsonl"), - get_pattern_tier_raw(setup), - {"cal": ["par_dsp"], "lar": ["par_dsp"]}, - ) - - rule build_pars_dsp_tau: input: files=os.path.join( diff --git a/rules/hit.smk b/rules/hit.smk index bbce0bd..3d2c292 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -21,19 +21,6 @@ from scripts.util.patterns import ( ) -onstart: - if os.path.isfile(os.path.join(pars_path(setup), "hit", "validity.jsonl")): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) - - - ds.pars_key_resolve.write_par_catalog( - ["-*-*-*-cal"], - os.path.join(pars_path(setup), "hit", "validity.jsonl"), - get_pattern_tier_raw(setup), - {"cal": ["par_hit"], "lar": ["par_hit"]}, - ) - - # This rule builds the qc using the calibration dsp files and fft files rule build_qc: input: From bfe4bf4b148ce8cb4676dd0117365ae4eccf726c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 19:29:36 +0200 Subject: [PATCH 039/103] bugfix --- rules/pht.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/pht.smk b/rules/pht.smk index cd11a9b..028e0e3 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -141,7 +141,7 @@ rule build_pht_qc: filelist_path(setup), "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist", ), - pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"), params: datatype="cal", From d44e737d342efc500dc02c15dbb963fd82e9a95d Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 19:31:36 +0200 Subject: [PATCH 040/103] add muon table --- scripts/build_evt.py | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index e5febca..9fe9724 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -58,9 +58,8 @@ def replace_evt_with_key(dic, new_key): # load in config configs = LegendMetadata(path=args.configs) if args.tier == "evt" or args.tier == "pet": - evt_config_file = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_evt" - ]["inputs"]["evt_config"] + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"]["inputs"] + evt_config_file = config_dict["evt_config"] else: msg = "unknown tier" raise ValueError(msg) @@ -88,7 +87,6 @@ def replace_evt_with_key(dic, new_key): else: chans = [] _evt_config["channels"][field] = chans - evt_config[key] = replace_evt_with_key(_evt_config, f"evt/{key}") else: evt_config = {"all": Props.read_from(evt_config_file)} # block for snakemake to fill in channel lists @@ -124,15 +122,48 @@ def replace_evt_with_key(dic, new_key): f_hit=args.hit_file, f_evt=None, evt_config=config, - evt_group=f"evt/{key}" if key != "all" else "evt", + evt_group="evt", tcm_group="hardware_tcm_1", dsp_group="dsp", hit_group="hit", tcm_id_table_pattern="ch{}", ) +if "muon_config" in config_dict and config_dict["muon_config"] is not None: + muon_config = Props.read_from(config_dict["muon_config"]) + # block for snakemake to fill in channel lists + for field, dic in muon_config["channels"].items(): + if isinstance(dic, dict): + chans = chmap.map("system", unique=False)[dic["system"]] + if "selectors" in dic: + try: + for k, val in dic["selectors"].items(): + chans = chans.map(k, unique=False)[val] + except KeyError: + chans = None + if chans is not None: + chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] + else: + chans = [] + muon_config["channels"][field] = chans + +muon_table = build_evt( + f_tcm=args.tcm_file, + f_dsp=args.dsp_file, + f_hit=args.hit_file, + f_evt=None, + evt_config=muon_config, + evt_group="evt", + tcm_group="hardware_tcm_2", + dsp_group="dsp", + hit_group="hit", + tcm_id_table_pattern="ch{}", +) + tbl = Table(col_dict=tables) sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a") +sto.write(obj=muon_table, name="muon", lh5_file=temp_output, wo_mode="a") + os.rename(temp_output, args.output) t_elap = time.time() - t_start From 56c5d5d49e499f43be748720ad7a104d701c3ed0 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 19:32:07 +0200 Subject: [PATCH 041/103] fix pickling --- scripts/pars_hit_ecal.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 1d7d436..c859eea 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -352,9 +352,7 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None): if plot_options is None: plot_options = {} plot_dict = {} - data = sto.read(lh5_path, files, field_mask=["bl_mean", "baseline", "timestamp"])[0].view_as( - "pd" - ) + data = lh5.read_as(lh5_path, files, "pd", field_mask=["bl_mean", "baseline", "timestamp"]) for key, item in plot_options.items(): if item["options"] is not None: plot_dict[key] = item["function"](data, **item["options"]) @@ -362,6 +360,13 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None): plot_dict[key] = item["function"](data) return plot_dict +def monitor_parameters(files, lh5_path, parameters): + data = lh5.read_as(lh5_path, files, "pd", field_mask=parameters) + out_dict = {} + for param in parameters: + mode, stdev = get_mode_stdev(data[param].to_numpy()) + out_dict[param] = {"mode": mode, "stdev": stdev} + return out_dict def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if np.isnan(ecal_class.pars).all(): @@ -632,6 +637,10 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): peak_dict["parameters"] = peak_dict["parameters"].to_dict() peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict() + if "monitor_parameters" in kwarg_dict: + monitor_dict = monitor_parameters(files, f"{args.channel}/dsp", kwarg_dict["monitor_parameters"]) + results_dict.update({"monitoring_parameters":monitor_dict}) + # get baseline plots and save all plots to file if args.plot_path: common_dict = baseline_tracking_plots( From 207acedc90700b7932a5f43070c35ca6e1f622ff Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 19:32:44 +0200 Subject: [PATCH 042/103] add onstart --- Snakefile | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index c5149e8..bee9673 100644 --- a/Snakefile +++ b/Snakefile @@ -22,6 +22,7 @@ from scripts.util.utils import ( filelist_path, metadata_path, tmp_log_path, + pars_path ) from datetime import datetime from collections import OrderedDict @@ -53,14 +54,20 @@ wildcard_constraints: include: "rules/common.smk" include: "rules/main.smk" + include: "rules/tcm.smk" + include: "rules/dsp.smk" +include: "rules/psp.smk" + include: "rules/hit.smk" include: "rules/pht.smk" -include: "rules/psp.smk" + include: "rules/evt.smk" include: "rules/skm.smk" + include: "rules/blinding_calibration.smk" +include: "rules/qc_phy.smk" localrules: @@ -70,6 +77,26 @@ localrules: onstart: print("Starting workflow") + if os.path.isfile(os.path.join(pars_path(setup), "hit", "validity.jsonl")): + os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) + + + ds.pars_key_resolve.write_par_catalog( + ["-*-*-*-cal"], + os.path.join(pars_path(setup), "hit", "validity.jsonl"), + get_pattern_tier_raw(setup), + {"cal": ["par_hit"], "lar": ["par_hit"]}, + ) + + if os.path.isfile(os.path.join(pars_path(setup), "dsp", "validity.jsonl")): + os.remove(os.path.join(pars_path(setup), "dsp", "validity.jsonl")) + ds.pars_key_resolve.write_par_catalog( + ["-*-*-*-cal"], + os.path.join(pars_path(setup), "dsp", "validity.jsonl"), + get_pattern_tier_raw(setup), + {"cal": ["par_dsp"], "lar": ["par_dsp"]}, + ) + onsuccess: From 297b20ce142282bf7aa138a1b1063c7518247942 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 19:33:55 +0200 Subject: [PATCH 043/103] add scripts for qc from phy files for overrides --- rules/qc_phy.smk | 160 +++++++++++++++++++++++++++++++++++++ scripts/pars_pht_qc_phy.py | 116 +++++++++++++++++++++++++++ 2 files changed, 276 insertions(+) create mode 100644 rules/qc_phy.smk create mode 100644 scripts/pars_pht_qc_phy.py diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk new file mode 100644 index 0000000..f4f3b7e --- /dev/null +++ b/rules/qc_phy.smk @@ -0,0 +1,160 @@ +from scripts.util.pars_loading import pars_catalog +from scripts.util.create_pars_keylist import pars_key_resolve +from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_log_channel, + get_pattern_par_pht, + get_pattern_plts, + get_pattern_tier, + get_pattern_pars_tmp, + get_pattern_log, + get_pattern_pars, +) + +intier = "dsp" + + +qc_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + phy_files=part.get_filelists(partition, key, intier, datatype="phy"), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + f"{par_pht_path(setup)}/validity.jsonl", partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="qcphy", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + name="qcphy", + ) + ], + log: + part.get_log_file( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + "pht", + name="par_pht_qc_phy", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 20, + runtime=300, + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/pars_pht_qc_phy.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--save_path {output.hit_pars} " + "--plot_path {output.plot_file} " + "--phy_files {input.phy_files}" + + set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc_phy") + + if key in qc_pht_rules: + qc_pht_rules[key].append(list(workflow.rules)[-1]) + else: + qc_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_pht_qc_phy: + input: + phy_files=os.path.join( + filelist_path(setup), + "all-{experiment}-{period}-{run}-phy-" + f"{intier}.filelist", + ), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qcphy")), + plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qcphy")), + log: + get_pattern_log_channel(setup, "pars_pht_qc_phy"), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/pars_pht_qc_phy.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--save_path {output.hit_pars} " + "--plot_path {output.plot_file} " + "--phy_files {input.phy_files}" + + +fallback_qc_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(qc_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_qc_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + +rule build_plts_pht_phy: + input: + lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht_qcphy"), + output: + get_pattern_plts(setup, "pht", "qc_phy"), + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + + +rule build_pars_pht_phy: + input: + infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht_qcphy"), + plts=get_pattern_plts(setup, "pht" , "qc_phy"), + output: + get_pattern_pars(setup, "pht", name= "qc_phy", check_in_cycle=check_in_cycle), + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input.infiles} " + "--output {output} " + diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py new file mode 100644 index 0000000..804ffd4 --- /dev/null +++ b/scripts/pars_pht_qc_phy.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +import argparse +import json +import logging +import os +import pathlib +import pickle as pkl +import re +import warnings + +os.environ["PYGAMA_PARALLEL"] = "false" +os.environ["PYGAMA_FASTMATH"] = "false" + +import numpy as np +from legendmeta import LegendMetadata +from legendmeta.catalog import Props +from lgdo.lh5 import ls +import lgdo.lh5 as lh5 +from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys, generate_cut_classifiers +from pygama.pargen.utils import load_data +from util.FileKey import ChannelProcKey, ProcessingFileKey + +log = logging.getLogger(__name__) + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +if __name__ == "__main__": + argparser = argparse.ArgumentParser() + argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str) + + argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) + argparser.add_argument("--save_path", help="save_path", type=str, nargs="*", ) + args = argparser.parse_args() + + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("h5py").setLevel(logging.INFO) + logging.getLogger("matplotlib").setLevel(logging.INFO) + logging.getLogger("legendmeta").setLevel(logging.INFO) + + # get metadata dictionary + configs = LegendMetadata(path=args.configs) + channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] + channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel] + + sto = lh5.LH5Store() + + # sort files in dictionary where keys are first timestamp from run + bl_mask = np.array([], dtype=bool) + if isinstance(args.phy_files, list): + phy_files = [] + for file in sorted(args.phy_files): + with open(file) as f: + run_files = f.read().splitlines() + if len(run_files) == 0: + continue + else: + run_files = sorted(np.unique(run_files)) + phy_files += run_files + bls = sto.read("ch1027200/dsp/", run_files, field_mask = ["wf_max", "bl_mean"])[0] + puls = sto.read("ch1027201/dsp/", run_files, field_mask = ["trapTmax"])[0] + bl_idxs = ((bls["wf_max"].nda - bls["bl_mean"].nda)>1000) &(puls["trapTmax"].nda<200) + bl_mask = np.append(bl_mask, bl_idxs) + else: + with open(args.phy_files) as f: + phy_files = f.read().splitlines() + phy_files = sorted(np.unique(phy_files)) + bls = sto.read("ch1027200/dsp/", phy_files, field_mask = ["wf_max", "bl_mean"])[0] + puls = sto.read("ch1027201/dsp/", phy_files, field_mask = ["trapTmax"])[0] + bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda)>1000) &(puls["trapTmax"].nda<200) + + kwarg_dict = Props.read_from(channel_dict) + kwarg_dict_fft = kwarg_dict["fft_fields"] + + cut_fields = get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(phy_files[0], f"{args.channel}/dsp/") + ], + kwarg_dict_fft["cut_parameters"], + ) + + data = sto.read(f"{args.channel}/dsp/", phy_files, + field_mask=cut_fields, idx = np.where(bl_mask)[0])[0] + + hit_dict, plot_dict = generate_cut_classifiers( + data, + kwarg_dict_fft["cut_parameters"], + kwarg_dict.get("rounding", 4), + display=1 if args.plot_path else 0, + ) + + log.debug("fft cuts applied") + log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}") + + for file in args.save_path: + pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + with open(file, "w") as f: + json.dump({"pars":{"operations":hit_dict}}, f, indent=4) + + if args.plot_path: + for file in args.plot_path: + pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + with open(file, "wb") as f: + pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) From 2f8a5b2d46c5e62bfd03270b80f25c74df7338a3 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 19:42:14 +0200 Subject: [PATCH 044/103] pre-commit --- Snakefile | 8 +------- rules/qc_phy.smk | 6 +++--- scripts/build_evt.py | 6 ++++-- scripts/pars_hit_ecal.py | 26 ++++++++++++++++++------- scripts/pars_hit_qc.py | 6 +++++- scripts/pars_pht_aoecal.py | 4 ++-- scripts/pars_pht_lqcal.py | 2 +- scripts/pars_pht_partcal.py | 12 ++++++++---- scripts/pars_pht_qc.py | 30 ++++++++++++++++++----------- scripts/pars_pht_qc_phy.py | 38 ++++++++++++++++++++++--------------- scripts/pars_tcm_pulser.py | 6 ++---- 11 files changed, 87 insertions(+), 57 deletions(-) diff --git a/Snakefile b/Snakefile index bee9673..4d732bf 100644 --- a/Snakefile +++ b/Snakefile @@ -22,7 +22,7 @@ from scripts.util.utils import ( filelist_path, metadata_path, tmp_log_path, - pars_path + pars_path, ) from datetime import datetime from collections import OrderedDict @@ -54,18 +54,13 @@ wildcard_constraints: include: "rules/common.smk" include: "rules/main.smk" - include: "rules/tcm.smk" - include: "rules/dsp.smk" include: "rules/psp.smk" - include: "rules/hit.smk" include: "rules/pht.smk" - include: "rules/evt.smk" include: "rules/skm.smk" - include: "rules/blinding_calibration.smk" include: "rules/qc_phy.smk" @@ -96,7 +91,6 @@ onstart: get_pattern_tier_raw(setup), {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) - onsuccess: diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk index f4f3b7e..10eceb9 100644 --- a/rules/qc_phy.smk +++ b/rules/qc_phy.smk @@ -130,6 +130,7 @@ for key, items in ordered.items(): rule_order_list.append(fallback_qc_rule.name) workflow._ruleorder.add(*rule_order_list) # [::-1] + rule build_plts_pht_phy: input: lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht_qcphy"), @@ -147,9 +148,9 @@ rule build_plts_pht_phy: rule build_pars_pht_phy: input: infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht_qcphy"), - plts=get_pattern_plts(setup, "pht" , "qc_phy"), + plts=get_pattern_plts(setup, "pht", "qc_phy"), output: - get_pattern_pars(setup, "pht", name= "qc_phy", check_in_cycle=check_in_cycle), + get_pattern_pars(setup, "pht", name="qc_phy", check_in_cycle=check_in_cycle), group: "merge-hit" shell: @@ -157,4 +158,3 @@ rule build_pars_pht_phy: f"{basedir}/../scripts/merge_channels.py " "--input {input.infiles} " "--output {output} " - diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 9fe9724..f109871 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -58,7 +58,9 @@ def replace_evt_with_key(dic, new_key): # load in config configs = LegendMetadata(path=args.configs) if args.tier == "evt" or args.tier == "pet": - config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"]["inputs"] + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"][ + "inputs" + ] evt_config_file = config_dict["evt_config"] else: msg = "unknown tier" @@ -69,7 +71,7 @@ def replace_evt_with_key(dic, new_key): if isinstance(evt_config_file, dict): evt_config = {} - for key, _evt_config in evt_config_file.items(): + for _evt_config in evt_config_file.values(): if _evt_config is not None: _evt_config = Props.read_from(_evt_config) # block for snakemake to fill in channel lists diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index c859eea..4efc19f 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -23,7 +23,7 @@ from legendmeta.catalog import Props from matplotlib.colors import LogNorm from pygama.math.distributions import nb_poly -from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.data_cleaning import get_mode_stdev, get_tcm_pulser_ids from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from scipy.stats import binned_statistic @@ -360,6 +360,7 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None): plot_dict[key] = item["function"](data) return plot_dict + def monitor_parameters(files, lh5_path, parameters): data = lh5.read_as(lh5_path, files, "pd", field_mask=parameters) out_dict = {} @@ -368,6 +369,7 @@ def monitor_parameters(files, lh5_path, parameters): out_dict[param] = {"mode": mode, "stdev": stdev} return out_dict + def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if np.isnan(ecal_class.pars).all(): return {} @@ -627,19 +629,29 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): selection_string, ) plot_dict[cal_energy_param] = param_plot_dict - - for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks_1"]["peak_parameters"].values(): + + for peak_dict in ( + full_object_dict[cal_energy_param] + .results["hpge_fit_energy_peaks_1"]["peak_parameters"] + .values() + ): peak_dict["function"] = peak_dict["function"].name peak_dict["parameters"] = peak_dict["parameters"].to_dict() peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict() - for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"]["peak_parameters"].values(): + for peak_dict in ( + full_object_dict[cal_energy_param] + .results["hpge_fit_energy_peaks"]["peak_parameters"] + .values() + ): peak_dict["function"] = peak_dict["function"].name peak_dict["parameters"] = peak_dict["parameters"].to_dict() peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict() - if "monitor_parameters" in kwarg_dict: - monitor_dict = monitor_parameters(files, f"{args.channel}/dsp", kwarg_dict["monitor_parameters"]) - results_dict.update({"monitoring_parameters":monitor_dict}) + if "monitoring_parameters" in kwarg_dict: + monitor_dict = monitor_parameters( + files, f"{args.channel}/dsp", kwarg_dict["monitor_parameters"] + ) + results_dict.update({"monitoring_parameters": monitor_dict}) # get baseline plots and save all plots to file if args.plot_path: diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 09e14c6..110dfa9 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -16,7 +16,11 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo.lh5 import ls -from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids, generate_cut_classifiers +from pygama.pargen.data_cleaning import ( + generate_cut_classifiers, + get_keys, + get_tcm_pulser_ids, +) from pygama.pargen.utils import load_data log = logging.getLogger(__name__) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 34fa8f8..30e1a9e 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -47,7 +47,7 @@ def get_results_dict(aoe_class): def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): - if plot_dict is None: + if plot_dict is None: plot_dict = {} for key, item in plot_options.items(): if item["options"] is not None: @@ -345,7 +345,7 @@ def eres_func(x): except KeyError: aoe_obj.eres_func = {} else: - out_dict = {tstamp:None for tstamp in cal_dict} + out_dict = {tstamp: None for tstamp in cal_dict} plot_dict = {} aoe_obj = None diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 9937281..c5ba80b 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -38,7 +38,7 @@ def get_results_dict(lq_class): def fill_plot_dict(lq_class, data, plot_options, plot_dict=None): - if plot_dict is None: + if plot_dict is None: plot_dict = {} for key, item in plot_options.items(): if item["options"] is not None: diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index e11f965..7063f8a 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -128,7 +128,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): "eres_quadratic": fwhm_quad, "fitted_peaks": ecal_class.peaks_kev.tolist(), "pk_fits": pk_dict, - "peak_param":results_dict["peak_param"] + "peak_param": results_dict["peak_param"], } @@ -392,7 +392,11 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): ) plot_dict[cal_energy_param] = param_plot_dict - for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"]["peak_parameters"].values(): + for peak_dict in ( + full_object_dict[cal_energy_param] + .results["hpge_fit_energy_peaks"]["peak_parameters"] + .values() + ): peak_dict["function"] = peak_dict["function"].name peak_dict["parameters"] = peak_dict["parameters"].to_dict() peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict() @@ -436,7 +440,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) final_hit_dict = { "pars": cal_dict[fk.timestamp], - "results": dict(**results_dicts[fk.timestamp], partition_ecal= ecal_results) + "results": dict(**results_dicts[fk.timestamp], partition_ecal=ecal_results), } pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) with open(out, "w") as w: @@ -444,7 +448,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): for out in args.fit_results: fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - final_object_dict = dict(**object_dict[fk.timestamp], partition_ecal = full_object_dict) + final_object_dict = dict(**object_dict[fk.timestamp], partition_ecal=full_object_dict) pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) with open(out, "wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index c9801be..8eff510 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -16,9 +16,12 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo.lh5 import ls -from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys, generate_cut_classifiers +from pygama.pargen.data_cleaning import ( + generate_cut_classifiers, + get_keys, + get_tcm_pulser_ids, +) from pygama.pargen.utils import load_data -from util.FileKey import ChannelProcKey, ProcessingFileKey log = logging.getLogger(__name__) @@ -29,8 +32,12 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) - argparser.add_argument("--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False) - argparser.add_argument("--pulser_files", help="pulser_file", nargs="*", type=str, required=False) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False + ) + argparser.add_argument( + "--pulser_files", help="pulser_file", nargs="*", type=str, required=False + ) argparser.add_argument("--configs", help="config", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -40,7 +47,12 @@ argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) - argparser.add_argument("--save_path", help="save_path", type=str, nargs="*", ) + argparser.add_argument( + "--save_path", + help="save_path", + type=str, + nargs="*", + ) args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -56,7 +68,6 @@ channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel] - # sort files in dictionary where keys are first timestamp from run if isinstance(args.cal_files, list): cal_files = [] @@ -71,8 +82,6 @@ np.unique(cal_files) ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also - - kwarg_dict = Props.read_from(channel_dict) kwarg_dict_cal = kwarg_dict["cal_fields"] @@ -107,7 +116,7 @@ if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file, 'r') as f: + with open(file) as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -166,7 +175,6 @@ kwarg_dict_fft = kwarg_dict["fft_fields"] if len(args.fft_files) > 0: - # sort files in dictionary where keys are first timestamp from run if isinstance(args.fft_files, list): fft_files = [] @@ -181,7 +189,7 @@ np.unique(fft_files) ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also - if len(fft_files)>0: + if len(fft_files) > 0: fft_data = load_data( fft_files, f"{args.channel}/dsp", diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 804ffd4..8fe0a1f 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -6,20 +6,20 @@ import os import pathlib import pickle as pkl -import re import warnings os.environ["PYGAMA_PARALLEL"] = "false" os.environ["PYGAMA_FASTMATH"] = "false" +import lgdo.lh5 as lh5 import numpy as np from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo.lh5 import ls -import lgdo.lh5 as lh5 -from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys, generate_cut_classifiers -from pygama.pargen.utils import load_data -from util.FileKey import ChannelProcKey, ProcessingFileKey +from pygama.pargen.data_cleaning import ( + generate_cut_classifiers, + get_keys, +) log = logging.getLogger(__name__) @@ -38,7 +38,12 @@ argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) - argparser.add_argument("--save_path", help="save_path", type=str, nargs="*", ) + argparser.add_argument( + "--save_path", + help="save_path", + type=str, + nargs="*", + ) args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -68,17 +73,19 @@ else: run_files = sorted(np.unique(run_files)) phy_files += run_files - bls = sto.read("ch1027200/dsp/", run_files, field_mask = ["wf_max", "bl_mean"])[0] - puls = sto.read("ch1027201/dsp/", run_files, field_mask = ["trapTmax"])[0] - bl_idxs = ((bls["wf_max"].nda - bls["bl_mean"].nda)>1000) &(puls["trapTmax"].nda<200) + bls = sto.read("ch1027200/dsp/", run_files, field_mask=["wf_max", "bl_mean"])[0] + puls = sto.read("ch1027201/dsp/", run_files, field_mask=["trapTmax"])[0] + bl_idxs = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & ( + puls["trapTmax"].nda < 200 + ) bl_mask = np.append(bl_mask, bl_idxs) else: with open(args.phy_files) as f: phy_files = f.read().splitlines() phy_files = sorted(np.unique(phy_files)) - bls = sto.read("ch1027200/dsp/", phy_files, field_mask = ["wf_max", "bl_mean"])[0] - puls = sto.read("ch1027201/dsp/", phy_files, field_mask = ["trapTmax"])[0] - bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda)>1000) &(puls["trapTmax"].nda<200) + bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0] + puls = sto.read("ch1027201/dsp/", phy_files, field_mask=["trapTmax"])[0] + bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (puls["trapTmax"].nda < 200) kwarg_dict = Props.read_from(channel_dict) kwarg_dict_fft = kwarg_dict["fft_fields"] @@ -91,8 +98,9 @@ kwarg_dict_fft["cut_parameters"], ) - data = sto.read(f"{args.channel}/dsp/", phy_files, - field_mask=cut_fields, idx = np.where(bl_mask)[0])[0] + data = sto.read( + f"{args.channel}/dsp/", phy_files, field_mask=cut_fields, idx=np.where(bl_mask)[0] + )[0] hit_dict, plot_dict = generate_cut_classifiers( data, @@ -107,7 +115,7 @@ for file in args.save_path: pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) with open(file, "w") as f: - json.dump({"pars":{"operations":hit_dict}}, f, indent=4) + json.dump({"pars": {"operations": hit_dict}}, f, indent=4) if args.plot_path: for file in args.plot_path: diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 3c6d88a..5a6a336 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -3,7 +3,6 @@ import logging import os import pathlib -import pickle as pkl os.environ["LGDO_CACHE"] = "false" os.environ["LGDO_BOUNDSCHECK"] = "false" @@ -16,7 +15,6 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_tcm_pulser_ids - argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) @@ -57,8 +55,8 @@ tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) +) pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) with open(args.pulser_file, "w") as f: - json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4) \ No newline at end of file + json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4) From efbee94cb1d2ed003cdc42224be772f03ef52a2c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 19:58:59 +0200 Subject: [PATCH 045/103] fix monitoring fields --- scripts/pars_hit_ecal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 4efc19f..07a3c8f 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -649,7 +649,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if "monitoring_parameters" in kwarg_dict: monitor_dict = monitor_parameters( - files, f"{args.channel}/dsp", kwarg_dict["monitor_parameters"] + files, f"{args.channel}/dsp", kwarg_dict["monitoring_parameters"] ) results_dict.update({"monitoring_parameters": monitor_dict}) From 174fd8984077490d47290f7b39e9077414c97e83 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 31 Mar 2024 20:01:33 +0200 Subject: [PATCH 046/103] update packages --- templates/config.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/config.json b/templates/config.json index fcb4a8d..1884061 100644 --- a/templates/config.json +++ b/templates/config.json @@ -53,11 +53,11 @@ "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif" }, "pkg_versions": { - "pygama": "pygama==1.6.0a1", - "pylegendmeta": "pylegendmeta==0.9.0a2", - "dspeed": "dspeed==1.3.0a4", - "legend-pydataobj": "legend-pydataobj==1.5.0a5", - "legend-daq2lh5": "legend-daq2lh5==1.2.0a1" + "pygama": "pygama==1.6.0", + "pylegendmeta": "pylegendmeta==0.9.0", + "dspeed": "dspeed==1.3.0", + "legend-pydataobj": "legend-pydataobj==1.5.1", + "legend-daq2lh5": "legend-daq2lh5==1.2.0" } } } From f6028b37ab1e56719781ab4239cb475fc97ac5ae Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 1 Apr 2024 15:07:50 +0200 Subject: [PATCH 047/103] fix muon idnetations --- scripts/build_evt.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index f109871..c3560bf 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -149,22 +149,23 @@ def replace_evt_with_key(dic, new_key): chans = [] muon_config["channels"][field] = chans -muon_table = build_evt( - f_tcm=args.tcm_file, - f_dsp=args.dsp_file, - f_hit=args.hit_file, - f_evt=None, - evt_config=muon_config, - evt_group="evt", - tcm_group="hardware_tcm_2", - dsp_group="dsp", - hit_group="hit", - tcm_id_table_pattern="ch{}", -) + muon_table = build_evt( + f_tcm=args.tcm_file, + f_dsp=args.dsp_file, + f_hit=args.hit_file, + f_evt=None, + evt_config=muon_config, + evt_group="evt", + tcm_group="hardware_tcm_2", + dsp_group="dsp", + hit_group="hit", + tcm_id_table_pattern="ch{}", + ) + muon_tbl = Table(col_dict={"muon": muon_table}) + sto.write(obj=muon_tbl, name="evt2", lh5_file=temp_output, wo_mode="a") tbl = Table(col_dict=tables) sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a") -sto.write(obj=muon_table, name="muon", lh5_file=temp_output, wo_mode="a") os.rename(temp_output, args.output) From 56ea627bef0b260fee131d7457054ceef0b3217d Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 1 Apr 2024 17:43:22 +0200 Subject: [PATCH 048/103] fix config loop --- scripts/build_evt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index c3560bf..e060ce7 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -71,7 +71,7 @@ def replace_evt_with_key(dic, new_key): if isinstance(evt_config_file, dict): evt_config = {} - for _evt_config in evt_config_file.values(): + for key, _evt_config in evt_config_file.items(): if _evt_config is not None: _evt_config = Props.read_from(_evt_config) # block for snakemake to fill in channel lists @@ -89,6 +89,7 @@ def replace_evt_with_key(dic, new_key): else: chans = [] _evt_config["channels"][field] = chans + evt_config[key] = _evt_config else: evt_config = {"all": Props.read_from(evt_config_file)} # block for snakemake to fill in channel lists From 829139384fbda28fea76cd9a2cff6a0e35b817dc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 1 Apr 2024 17:48:02 +0200 Subject: [PATCH 049/103] make check files temp --- rules/pht.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/pht.smk b/rules/pht.smk index 028e0e3..142ca72 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -35,7 +35,7 @@ rule pht_checkpoint: input: files=lambda wildcards: read_filelist_cal(wildcards, intier), output: - get_pattern_pars_tmp_channel(setup, "pht", "check"), + temp(get_pattern_pars_tmp_channel(setup, "pht", "check")), shell: "touch {output}" From b7f652bac148ef9e24a0be9aee68ed7f1c1fd07e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 1 Apr 2024 22:14:41 +0200 Subject: [PATCH 050/103] check if hardware_tcm_2 in tcm before running muon evt build --- scripts/build_evt.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index e060ce7..7c2ed11 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -149,21 +149,21 @@ def replace_evt_with_key(dic, new_key): else: chans = [] muon_config["channels"][field] = chans - - muon_table = build_evt( - f_tcm=args.tcm_file, - f_dsp=args.dsp_file, - f_hit=args.hit_file, - f_evt=None, - evt_config=muon_config, - evt_group="evt", - tcm_group="hardware_tcm_2", - dsp_group="dsp", - hit_group="hit", - tcm_id_table_pattern="ch{}", - ) - muon_tbl = Table(col_dict={"muon": muon_table}) - sto.write(obj=muon_tbl, name="evt2", lh5_file=temp_output, wo_mode="a") + if "hardware_tcm_2" in lh5.ls(args.tcm_file): + muon_table = build_evt( + f_tcm=args.tcm_file, + f_dsp=args.dsp_file, + f_hit=args.hit_file, + f_evt=None, + evt_config=muon_config, + evt_group="evt", + tcm_group="hardware_tcm_2", + dsp_group="dsp", + hit_group="hit", + tcm_id_table_pattern="ch{}", + ) + muon_tbl = Table(col_dict={"muon": muon_table}) + sto.write(obj=muon_tbl, name="evt2", lh5_file=temp_output, wo_mode="a") tbl = Table(col_dict=tables) sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a") From 340e36df704774f062cc4f80b2577bed159d23e2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 2 Apr 2024 00:34:30 +0200 Subject: [PATCH 051/103] remove threshold for fft files --- scripts/pars_hit_qc.py | 3 --- scripts/pars_pht_qc.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 110dfa9..2d6e47f 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -148,9 +148,6 @@ f"{args.channel}/dsp", {}, [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"], - threshold=kwarg_dict_fft["threshold"], - return_selection_mask=False, - cal_energy_param="trapTmax", ) hit_dict_fft, plot_dict_fft = generate_cut_classifiers( diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 8eff510..6613084 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -195,9 +195,6 @@ f"{args.channel}/dsp", {}, [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"], - threshold=kwarg_dict_fft["threshold"], - return_selection_mask=False, - cal_energy_param="trapTmax", ) hit_dict_fft, plot_dict_fft = generate_cut_classifiers( From ecc5481b59688b11222eb0add5b1204360b7d888 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 2 Apr 2024 00:41:56 +0200 Subject: [PATCH 052/103] bugfix to use correct fft data --- scripts/pars_hit_qc.py | 2 +- scripts/pars_pht_qc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 2d6e47f..08e317b 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -151,7 +151,7 @@ ) hit_dict_fft, plot_dict_fft = generate_cut_classifiers( - data, + fft_data, kwarg_dict_fft["cut_parameters"], kwarg_dict.get("rounding", 4), display=1 if args.plot_path else 0, diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 6613084..1e2f712 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -198,7 +198,7 @@ ) hit_dict_fft, plot_dict_fft = generate_cut_classifiers( - data, + fft_data, kwarg_dict_fft["cut_parameters"], kwarg_dict.get("rounding", 4), display=1 if args.plot_path else 0, From 0898a47bf1c6d90a69e872919795e93b1c5ecadc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 2 Apr 2024 00:55:54 +0200 Subject: [PATCH 053/103] fix fft field loading --- scripts/pars_hit_qc.py | 10 +++++++++- scripts/pars_pht_qc.py | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 08e317b..a214941 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -143,11 +143,19 @@ kwarg_dict_fft = kwarg_dict["fft_fields"] if len(args.fft_files) > 0: + fft_fields = get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(args.fft_files[0], f"{args.channel}/dsp/") + ], + kwarg_dict_fft["cut_parameters"], + ) + fft_data = load_data( args.fft_files, f"{args.channel}/dsp", {}, - [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"], + [*fft_fields, "timestamp", "trapTmax"], ) hit_dict_fft, plot_dict_fft = generate_cut_classifiers( diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 1e2f712..1c9bc19 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -190,11 +190,19 @@ ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also if len(fft_files) > 0: + fft_fields = get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(fft_files[0], f"{args.channel}/dsp/") + ], + kwarg_dict_fft["cut_parameters"], + ) + fft_data = load_data( fft_files, f"{args.channel}/dsp", {}, - [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"], + [*fft_fields, "timestamp", "trapTmax"], ) hit_dict_fft, plot_dict_fft = generate_cut_classifiers( From d25354d7dceb4144014edc39ed1de33db706e8b1 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 2 Apr 2024 16:27:20 +0200 Subject: [PATCH 054/103] higher tol for ac channels --- scripts/pars_hit_ecal.py | 6 ++++-- scripts/pars_pht_partcal.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 07a3c8f..0f9138d 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -550,7 +550,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): (e_uncal > np.nanpercentile(e_uncal, 95)) & (e_uncal < np.nanpercentile(e_uncal, 99.9)) ], - dx=1, + dx=9, range=[np.nanpercentile(e_uncal, 95), np.nanpercentile(e_uncal, 99.9)], ) @@ -561,7 +561,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): guess, kwarg_dict.get("deg", 0), ) - full_object_dict[cal_energy_param].hpge_get_energy_peaks(e_uncal) + full_object_dict[cal_energy_param].hpge_get_energy_peaks( + e_uncal, etol_kev=5 if det_status == "on" else 10 + ) got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy() full_object_dict[cal_energy_param].hpge_fit_energy_peaks( e_uncal, diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 7063f8a..623be81 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -332,7 +332,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): full_object_dict[cal_energy_param] = HPGeCalibration( energy_param, glines, 1, kwarg_dict.get("deg", 0), fixed={1: 1} ) - full_object_dict[cal_energy_param].hpge_get_energy_peaks(energy) + full_object_dict[cal_energy_param].hpge_get_energy_peaks( + energy, etol_kev=5 if det_status == "on" else 10 + ) full_object_dict[cal_energy_param].hpge_fit_energy_peaks( energy, peak_pars=pk_pars, From 824c4f0435d459a0060afc0f0e2554f20dcc0ee9 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 2 Apr 2024 16:41:44 +0200 Subject: [PATCH 055/103] add bin widths --- scripts/pars_hit_ecal.py | 2 ++ scripts/pars_pht_partcal.py | 1 + 2 files changed, 3 insertions(+) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 0f9138d..f94f803 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -573,6 +573,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): n_events=kwarg_dict.get("n_events", None), allowed_p_val=kwarg_dict.get("p_val", 0), update_cal_pars=bool(det_status == "on"), + bin_width_kev=0.5, ) full_object_dict[cal_energy_param].hpge_fit_energy_peaks( e_uncal, @@ -582,6 +583,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): n_events=kwarg_dict.get("n_events", None), allowed_p_val=kwarg_dict.get("p_val", 0), update_cal_pars=False, + bin_width_kev=0.5, ) full_object_dict[cal_energy_param].get_energy_res_curve( diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 623be81..73461f4 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -342,6 +342,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): n_events=kwarg_dict.get("n_events", None), allowed_p_val=kwarg_dict.get("p_val", 0), update_cal_pars=bool(det_status == "on"), + bin_width_kev=0.25, ) full_object_dict[cal_energy_param].get_energy_res_curve( From 667c06716172c41daa5e9e37ee0dec173f3d730d Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 3 Apr 2024 13:40:55 +0200 Subject: [PATCH 056/103] add muon flag code --- scripts/build_evt.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 7c2ed11..f1897fa 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -9,7 +9,7 @@ import numpy as np from legendmeta import LegendMetadata from legendmeta.catalog import Props -from lgdo.types import Table +from lgdo.types import Array, Table from pygama.evt.build_evt import build_evt sto = lh5.LH5Store() @@ -28,6 +28,22 @@ def replace_evt_with_key(dic, new_key): return dic +def find_matching_values_with_delay(arr1, arr2, jit_delay): + matching_values = [] + + # Create an array with all possible delay values + delays = np.arange(0, int(1e9 * jit_delay)) * jit_delay + + for delay in delays: + arr2_delayed = arr2 + delay + + # Find matching values and indices + mask = np.isin(arr1, arr2_delayed, assume_unique=True) + matching_values.extend(arr1[mask]) + + return np.unique(matching_values) + + argparser = argparse.ArgumentParser() argparser.add_argument("--hit_file", help="hit file", type=str) argparser.add_argument("--dsp_file", help="dsp file", type=str) @@ -133,7 +149,8 @@ def replace_evt_with_key(dic, new_key): ) if "muon_config" in config_dict and config_dict["muon_config"] is not None: - muon_config = Props.read_from(config_dict["muon_config"]) + muon_config = Props.read_from(config_dict["muon_config"]["evt_config"]) + field_config = Props.read_from(config_dict["muon_config"]["field_config"]) # block for snakemake to fill in channel lists for field, dic in muon_config["channels"].items(): if isinstance(dic, dict): @@ -149,6 +166,10 @@ def replace_evt_with_key(dic, new_key): else: chans = [] muon_config["channels"][field] = chans + + trigger_timestamp = tables[field_config["ged_timestamp"]["table"]][ + field_config["ged_timestamp"]["field"] + ].nda if "hardware_tcm_2" in lh5.ls(args.tcm_file): muon_table = build_evt( f_tcm=args.tcm_file, @@ -165,6 +186,21 @@ def replace_evt_with_key(dic, new_key): muon_tbl = Table(col_dict={"muon": muon_table}) sto.write(obj=muon_tbl, name="evt2", lh5_file=temp_output, wo_mode="a") + muon_timestamp = muon_table[field_config["muon_timestamp"]["field"]].nda + muon_tbl_flag = muon_table[field_config["muon_flag"]["field"]].nda + if len(muon_timestamp[muon_tbl_flag]) > 0: + is_muon_veto_triggered = find_matching_values_with_delay( + trigger_timestamp, muon_timestamp[muon_tbl_flag], field_config["jitter"] + ) + muon_flag = np.isin(trigger_timestamp, is_muon_veto_triggered) + else: + muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) + else: + muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) + tables[field_config["output_field"]["table"]].add_column( + field_config["output_field"]["field"], Array(muon_flag) + ) + tbl = Table(col_dict=tables) sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a") From 35c91d2bfe23f091b45ad61b46b8bdc560b57327 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 7 Apr 2024 00:41:17 +0200 Subject: [PATCH 057/103] use pulser for cut determ --- scripts/pars_hit_qc.py | 2 +- scripts/pars_pht_qc.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index a214941..3eeef8d 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -109,7 +109,7 @@ msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - data["is_pulser"] = mask[threshold_mask] + data = data[mask[threshold_mask]] if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 1c9bc19..d427fe3 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -135,7 +135,11 @@ msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - data["is_pulser"] = mask[threshold_mask] + if len(mask[threshold_mask])==0: + mask= np.random.choice(len(data), 20000) + data = data[mask] + else: + data = data[mask[threshold_mask]] if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] From d8c7976b501cc52fa01e796f001fd55b33b8eb14 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 7 Apr 2024 00:41:45 +0200 Subject: [PATCH 058/103] fix log names, update memory requirements --- rules/pht.smk | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rules/pht.smk b/rules/pht.smk index 142ca72..c19e35e 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -105,7 +105,7 @@ for key, dataset in part.datasets.items(): group: "par-pht" resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 20, + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: "{swenv} python3 -B " @@ -151,7 +151,7 @@ rule build_pht_qc: hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")), log: - get_pattern_log_channel(setup, "pars_pht_qc"), + get_pattern_log_channel(setup, "par_pht_qc"), group: "par-pht" resources: @@ -212,7 +212,7 @@ rule build_per_energy_calibration: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "energy_cal")), log: - get_pattern_log_channel(setup, "pars_pht_energy_cal"), + get_pattern_log_channel(setup, "par_pht_energy_cal"), group: "par-pht" resources: @@ -327,7 +327,7 @@ for key, dataset in part.datasets.items(): group: "par-pht" resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 20, + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: "{swenv} python3 -B " @@ -384,7 +384,7 @@ rule build_pht_energy_super_calibrations: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "partcal")), log: - get_pattern_log_channel(setup, "pars_pht_partcal"), + get_pattern_log_channel(setup, "par_pht_partcal"), group: "par-pht" resources: @@ -509,7 +509,7 @@ for key, dataset in part.datasets.items(): group: "par-pht" resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 20, + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: "{swenv} python3 -B " @@ -565,7 +565,7 @@ rule build_pht_aoe_calibrations: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "aoecal")), log: - get_pattern_log_channel(setup, "pars_pht_aoe_cal"), + get_pattern_log_channel(setup, "par_pht_aoe_cal"), group: "par-pht" resources: @@ -687,7 +687,7 @@ for key, dataset in part.datasets.items(): group: "par-pht" resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 20, + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: "{swenv} python3 -B " @@ -738,7 +738,7 @@ rule build_pht_lq_calibration: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")), log: - get_pattern_log_channel(setup, "pars_pht_lq_cal"), + get_pattern_log_channel(setup, "par_pht_lq_cal"), group: "par-pht" resources: From 699ff4588cf46b8f3fd0c63e8b37c82bb3c79245 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 7 Apr 2024 12:13:11 +0200 Subject: [PATCH 059/103] widen window for ac dets and fix for qc for det with no pulser --- Snakefile | 6 +++--- scripts/pars_hit_ecal.py | 2 +- scripts/pars_hit_qc.py | 6 +++++- scripts/pars_pht_qc.py | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Snakefile b/Snakefile index 4d732bf..ae61549 100644 --- a/Snakefile +++ b/Snakefile @@ -55,9 +55,9 @@ wildcard_constraints: include: "rules/common.smk" include: "rules/main.smk" include: "rules/tcm.smk" -include: "rules/dsp.smk" -include: "rules/psp.smk" -include: "rules/hit.smk" +# include: "rules/dsp.smk" +# include: "rules/psp.smk" +# include: "rules/hit.smk" include: "rules/pht.smk" include: "rules/evt.smk" include: "rules/skm.smk" diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index f94f803..553c051 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -562,7 +562,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): kwarg_dict.get("deg", 0), ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( - e_uncal, etol_kev=5 if det_status == "on" else 10 + e_uncal, etol_kev=5 if det_status == "on" else 20 ) got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy() full_object_dict[cal_energy_param].hpge_fit_energy_peaks( diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 3eeef8d..d5917e8 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -109,7 +109,11 @@ msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - data = data[mask[threshold_mask]] + if len(mask[threshold_mask]) < 100: + mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False) + data = data[mask] + else: + data = data[mask[threshold_mask]] if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index d427fe3..510d00c 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -135,8 +135,8 @@ msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - if len(mask[threshold_mask])==0: - mask= np.random.choice(len(data), 20000) + if len(mask[threshold_mask]) < 100: + mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False) data = data[mask] else: data = data[mask[threshold_mask]] From 3f63f5d1b4435d897e4339aa01e9a536b67442b8 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 7 Apr 2024 13:25:24 +0200 Subject: [PATCH 060/103] lower find peaks threshold as some dets have low events --- scripts/pars_hit_ecal.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 553c051..edee334 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -550,7 +550,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): (e_uncal > np.nanpercentile(e_uncal, 95)) & (e_uncal < np.nanpercentile(e_uncal, 99.9)) ], - dx=9, + dx=1, range=[np.nanpercentile(e_uncal, 95), np.nanpercentile(e_uncal, 99.9)], ) @@ -564,6 +564,10 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 20 ) + if 2614.50 not in full_object_dict[cal_energy_param].peaks_kev: + full_object_dict[cal_energy_param].hpge_get_energy_peaks( + e_uncal, peaks_kev=glines, etol_kev=5 if det_status == "on" else 30, n_sigma=2 + ) got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy() full_object_dict[cal_energy_param].hpge_fit_energy_peaks( e_uncal, From b99c9881833915e8cab5709377f554827f3c0ec7 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 7 Apr 2024 13:25:36 +0200 Subject: [PATCH 061/103] require more events --- scripts/pars_pht_qc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 510d00c..1015840 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -135,7 +135,7 @@ msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - if len(mask[threshold_mask]) < 100: + if len(mask[threshold_mask]) < 100 * len(args.cal_files): mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False) data = data[mask] else: From e037915f929aa88501b9a5d1149e94bb93a18ea1 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 7 Apr 2024 15:43:49 +0200 Subject: [PATCH 062/103] initial cuts use non pulser waveforms, normal use pulser if possible --- scripts/pars_hit_qc.py | 17 +++++++++++------ scripts/pars_pht_qc.py | 17 +++++++++++------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index d5917e8..841a5c1 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -109,16 +109,16 @@ msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - if len(mask[threshold_mask]) < 100: - mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False) - data = data[mask] - else: - data = data[mask[threshold_mask]] + data["is_pulser"] = mask[threshold_mask] + + mask = np.random.Generator.choice( + len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False + ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers( - data, + data.query("~is_pulser")[mask], init_cal["cut_parameters"], init_cal.get("rounding", 4), display=1 if args.plot_path else 0, @@ -138,6 +138,11 @@ hit_dict_init_cal = {} plot_dict_init_cal = {} + if len(data.query("is_pulser")) > 500: + data = data.query("is_pulser") + else: + data = data.query("~is_pulser")[mask] + hit_dict_cal, plot_dict_cal = generate_cut_classifiers( data, kwarg_dict_cal["cut_parameters"], diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 1015840..6376f02 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -135,16 +135,16 @@ msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - if len(mask[threshold_mask]) < 100 * len(args.cal_files): - mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False) - data = data[mask] - else: - data = data[mask[threshold_mask]] + data["is_pulser"] = mask[threshold_mask] + + mask = np.random.Generator.choice( + len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False + ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers( - data, + data.query("~is_pulser")[mask], init_cal["cut_parameters"], init_cal.get("rounding", 4), display=1 if args.plot_path else 0, @@ -167,6 +167,11 @@ hit_dict_init_cal = {} plot_dict_init_cal = {} + if len(data.query("is_pulser")) > 500 * len(args.cal_files): + data = data.query("is_pulser") + else: + data = data.query("~is_pulser")[mask] + hit_dict_cal, plot_dict_cal = generate_cut_classifiers( data, kwarg_dict_cal["cut_parameters"], From 0316188af4983b6b2eb73ee04d9e929e07112102 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 7 Apr 2024 17:27:05 +0200 Subject: [PATCH 063/103] fix numpy choice --- scripts/pars_hit_qc.py | 6 ++---- scripts/pars_pht_qc.py | 5 +++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 841a5c1..beae8f1 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -110,10 +110,8 @@ raise ValueError(msg) data["is_pulser"] = mask[threshold_mask] - - mask = np.random.Generator.choice( - len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False - ) + rng = np.random.default_rng() + mask = sorted(rng.choice(len(data.query("~is_pulser")), 4000, replace=False)) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 6376f02..bfad2b7 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -137,8 +137,9 @@ data["is_pulser"] = mask[threshold_mask] - mask = np.random.Generator.choice( - len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False + rng = np.random.default_rng() + mask = sorted( + rng.choice(len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False) ) if "initial_cal_cuts" in kwarg_dict: From bb96fa45ed6f40520430caf6bde8d466c411b558 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 7 Apr 2024 21:56:09 +0200 Subject: [PATCH 064/103] fix mask --- scripts/pars_hit_qc.py | 3 ++- scripts/pars_pht_qc.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index beae8f1..e3cf429 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -111,7 +111,8 @@ data["is_pulser"] = mask[threshold_mask] rng = np.random.default_rng() - mask = sorted(rng.choice(len(data.query("~is_pulser")), 4000, replace=False)) + mask = np.full(len(data.query("~is_pulser")), False, dtype=bool) + mask[rng.choice(len(data.query("~is_pulser")), 4000, replace=False)] = True if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index bfad2b7..a13e8cb 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -138,9 +138,10 @@ data["is_pulser"] = mask[threshold_mask] rng = np.random.default_rng() - mask = sorted( + mask = np.full(len(data.query("~is_pulser")), False, dtype=bool) + mask[ rng.choice(len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False) - ) + ] = True if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] From e0d5c278b99933bb892ac0fb837894d0e0bb524e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 8 Apr 2024 11:52:18 +0200 Subject: [PATCH 065/103] fix mask --- scripts/pars_hit_qc.py | 1 + scripts/pars_pht_qc.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index e3cf429..c59e99d 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -132,6 +132,7 @@ ct_mask = ct_mask & data[outname] data = data[ct_mask] + mask = mask[ct_mask] else: hit_dict_init_cal = {} diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index a13e8cb..5e9a722 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -140,7 +140,7 @@ rng = np.random.default_rng() mask = np.full(len(data.query("~is_pulser")), False, dtype=bool) mask[ - rng.choice(len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False) + rng.choice(len(data.query("~is_pulser")), 2000 * len(args.cal_files), replace=False) ] = True if "initial_cal_cuts" in kwarg_dict: @@ -162,6 +162,7 @@ ct_mask = ct_mask & data[outname] data = data[ct_mask] + mask = mask[ct_mask] log.debug("initial cal cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}") @@ -169,7 +170,7 @@ hit_dict_init_cal = {} plot_dict_init_cal = {} - if len(data.query("is_pulser")) > 500 * len(args.cal_files): + if len(data.query("is_pulser")) > 200 * len(args.cal_files): data = data.query("is_pulser") else: data = data.query("~is_pulser")[mask] From e930a53fb99c94d12447d979967f433f099daa6e Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 9 Apr 2024 15:34:26 +0200 Subject: [PATCH 066/103] Update build_evt.py to support latest pygama build_evt() --- scripts/build_evt.py | 81 ++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 55 deletions(-) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 606dc50..aabd961 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -9,7 +9,6 @@ import numpy as np from legendmeta import LegendMetadata from legendmeta.catalog import Props -from lgdo.types import Table from pygama.evt.build_evt import build_evt sto = lh5.LH5Store() @@ -38,6 +37,7 @@ else: logging.basicConfig(level=logging.DEBUG) +logging.getLogger("legendmeta").setLevel(logging.INFO) logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) logging.getLogger("lgdo").setLevel(logging.INFO) @@ -59,47 +59,25 @@ meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) -if isinstance(evt_config_file, dict): - evt_config = {} - for key, _evt_config in evt_config_file.items(): - if _evt_config is not None: - _evt_config = Props.read_from(_evt_config) - # block for snakemake to fill in channel lists - for field, dic in _evt_config["channels"].items(): - if isinstance(dic, dict): - chans = chmap.map("system", unique=False)[dic["system"]] - if "selectors" in dic: - try: - for k, val in dic["selectors"].items(): - chans = chans.map(k, unique=False)[val] - except KeyError: - chans = None - if chans is not None: - chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] - else: - chans = [] - _evt_config["channels"][field] = chans - - evt_config[key] = _evt_config -else: - evt_config = {"all": Props.read_from(evt_config_file)} - # block for snakemake to fill in channel lists - for field, dic in evt_config["channels"].items(): - if isinstance(dic, dict): - chans = chmap.map("system", unique=False)[dic["system"]] - if "selectors" in dic: - try: - for k, val in dic["selectors"].items(): - chans = chans.map(k, unique=False)[val] - except KeyError: - chans = None - if chans is not None: - chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] - else: - chans = [] - evt_config["channels"][field] = chans - -log.debug(json.dumps(evt_config, indent=2)) +evt_config = Props.read_from(evt_config_file) + +# block for snakemake to fill in channel lists +for field, dic in evt_config["channels"].items(): + if isinstance(dic, dict): + chans = chmap.map("system", unique=False)[dic["system"]] + if "selectors" in dic: + try: + for k, val in dic["selectors"].items(): + chans = chans.map(k, unique=False)[val] + except KeyError: + chans = None + if chans is not None: + chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] + else: + chans = [] + evt_config["channels"][field] = chans + +log.debug(json.dumps(evt_config["channels"], indent=2)) t_start = time.time() pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) @@ -108,22 +86,15 @@ rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.output}.{rand_num}" -tables = {} -for key, config in evt_config.items(): - datainfo = { +build_evt( + { "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), "dsp": (args.dsp_file, "dsp", "ch{}"), "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), - } - - tables[key] = build_evt( - datainfo, - config, - ) - -tbl = Table(col_dict=tables) -sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a") + "evt": (temp_output, "evt"), + }, + evt_config, +) os.rename(temp_output, args.output) t_elap = time.time() - t_start From 1d381aaa4e1fd3f334172a91570d6302d11ab976 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Apr 2024 08:33:44 +0000 Subject: [PATCH 067/103] style: pre-commit fixes --- scripts/pars_hit_ecal.py | 2 +- scripts/pars_pht_qc.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index e6cb61a..edee334 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -706,4 +706,4 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): # save calibration objects with open(args.results_path, "wb") as fp: pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True) - pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL) \ No newline at end of file + pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 5e9a722..2390097 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -139,9 +139,9 @@ rng = np.random.default_rng() mask = np.full(len(data.query("~is_pulser")), False, dtype=bool) - mask[ - rng.choice(len(data.query("~is_pulser")), 2000 * len(args.cal_files), replace=False) - ] = True + mask[rng.choice(len(data.query("~is_pulser")), 2000 * len(args.cal_files), replace=False)] = ( + True + ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] From 34c32c0b0929e3f03bb90dbe9b2ecb9e995f1ab5 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 16 Apr 2024 11:22:05 +0200 Subject: [PATCH 068/103] Simplify evt.smk --- rules/common.smk | 25 ++++++++++++ rules/evt.smk | 99 ++++++++++++++++++------------------------------ 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/rules/common.smk b/rules/common.smk index b5fba4d..427d465 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -99,3 +99,28 @@ def get_pattern(tier): return get_pattern_tier_daq(setup) else: return get_pattern_tier_raw(setup) + + +def set_last_rule_name(workflow, new_name): + """Sets the name of the most recently created rule to be `new_name`. + Useful when creating rules dynamically (i.e. unnamed). + + Warning + ------- + This could mess up the workflow. Use at your own risk. + """ + rules = workflow._rules + last_key = next(reversed(rules)) + assert last_key == rules[last_key].name + + rules[new_name] = rules.pop(last_key) + rules[new_name].name = new_name + + if workflow.default_target == last_key: + workflow.default_target = new_name + + if last_key in workflow._localrules: + workflow._localrules.remove(last_key) + workflow._localrules.add(new_name) + + workflow.check_localrules() diff --git a/rules/evt.smk b/rules/evt.smk index 9cc6e13..c880c88 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -14,67 +14,42 @@ from scripts.util.patterns import ( ) -rule build_evt: - input: - dsp_file=get_pattern_tier_dsp(setup), - hit_file=get_pattern_tier_hit(setup), - tcm_file=get_pattern_tier_tcm(setup), - output: - evt_file=get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle), - params: - timestamp="{timestamp}", - datatype="{datatype}", - tier="evt", - log: - get_pattern_log(setup, "tier_evt"), - group: - "tier-evt" - resources: - runtime=300, - mem_swap=70, - shell: - "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_evt.py')} " - "--configs {configs} " - "--metadata {meta} " - "--log {log} " - "--tier {params.tier} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--hit_file {input.hit_file} " - "--tcm_file {input.tcm_file} " - "--dsp_file {input.dsp_file} " - "--output {output.evt_file} " +for tier in ("evt", "pet"): + rule: + input: + dsp_file=get_pattern_tier_dsp(setup), + hit_file=( + get_pattern_tier_hit(setup) + if tier == "evt" + else get_pattern_tier_pht(setup) + ), + tcm_file=get_pattern_tier_tcm(setup), + output: + evt_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier=tier, + log: + get_pattern_log(setup, f"tier_{tier}"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=70, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_evt.py')} " + "--configs {configs} " + "--metadata {meta} " + "--log {log} " + "--tier {params.tier} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--hit_file {input.hit_file} " + "--tcm_file {input.tcm_file} " + "--dsp_file {input.dsp_file} " + "--output {output.evt_file} " -rule build_pet: - input: - dsp_file=get_pattern_tier_dsp(setup), - hit_file=get_pattern_tier_pht(setup), - tcm_file=get_pattern_tier_tcm(setup), - output: - evt_file=get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle), - params: - timestamp="{timestamp}", - datatype="{datatype}", - tier="pet", - log: - get_pattern_log(setup, "tier_pet"), - group: - "tier-evt" - resources: - runtime=300, - mem_swap=70, - shell: - "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_evt.py')} " - "--configs {configs} " - "--log {log} " - "--tier {params.tier} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--metadata {meta} " - "--hit_file {input.hit_file} " - "--tcm_file {input.tcm_file} " - "--dsp_file {input.dsp_file} " - "--output {output.evt_file} " + set_last_rule_name(workflow, f"build_{tier}") From a39dcae9f85d129249d1da181945d1515f838f53 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 16 Apr 2024 12:11:09 +0200 Subject: [PATCH 069/103] Add rule to concatenate evt files in a run --- rules/evt.smk | 19 +++++++++++++++++++ scripts/util/utils.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/rules/evt.smk b/rules/evt.smk index c880c88..c84dce8 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -53,3 +53,22 @@ for tier in ("evt", "pet"): "--output {output.evt_file} " set_last_rule_name(workflow, f"build_{tier}") + + rule: + input: + lambda wildcards: sorted(read_filelist_phy(wildcards, tier)), + output: + get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + params: + timestamp="all", + datatype="{datatype}", + log: + get_pattern_log(setup, "tier_skm"), + group: + "tier-evt" + shell: + "{swenv} lh5concat --verbose --overwrite " + "--output {output} " + "-- {input} &> {log}" + + set_last_rule_name(workflow, f"concat_{tier}") diff --git a/scripts/util/utils.py b/scripts/util/utils.py index d767610..b4cbdcf 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -191,7 +191,7 @@ def runcmd(setup): exec_cmd = setup["execenv"]["cmd"] exec_arg = setup["execenv"]["arg"] path_install = setup["paths"]["install"] - return f"PYTHONUSERBASE={path_install} {exec_cmd} {exec_arg}" + return f"PYTHONUSERBASE={path_install} APPTAINERENV_PREPEND_PATH={path_install}/bin {exec_cmd} {exec_arg}" def subst_vars_impl(x, var_values, ignore_missing=False): From 915aa326b7be573d1c3c9c84580629d7d6026be8 Mon Sep 17 00:00:00 2001 From: Legend Data Management User Date: Thu, 18 Apr 2024 14:37:08 +0200 Subject: [PATCH 070/103] Add better wildcard_constraints --- Snakefile | 9 ++++----- Snakefile-build-raw | 8 ++++++++ rules/evt.smk | 2 ++ scripts/build_evt.py | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Snakefile b/Snakefile index ae61549..5d0f359 100644 --- a/Snakefile +++ b/Snakefile @@ -45,11 +45,10 @@ basedir = workflow.basedir wildcard_constraints: experiment="\w+", - period="\w+", - run="\w+", - datatype="\w+", - timestamp="\w+", - channel="\w+", + period="p\d{2}", + run="r\d{3}", + datatype="\w{3}", + timestamp="\d{8}T\d{6}Z" include: "rules/common.smk" diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 02362c6..ecb08b4 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -40,6 +40,14 @@ meta = metadata_path(setup) basedir = workflow.basedir +wildcard_constraints: + experiment="\w+", + period="p\d{2}", + run="r\d{3}", + datatype="\w{3}", + timestamp="\d{8}T\d{6}Z" + + localrules: gen_filelist, autogen_output, diff --git a/rules/evt.smk b/rules/evt.smk index c84dce8..c399808 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -55,6 +55,8 @@ for tier in ("evt", "pet"): set_last_rule_name(workflow, f"build_{tier}") rule: + wildcard_constraints: + timestamp="(?!\d{8}T\d{6}Z)" input: lambda wildcards: sorted(read_filelist_phy(wildcards, tier)), output: diff --git a/scripts/build_evt.py b/scripts/build_evt.py index baef99d..bba8084 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -109,7 +109,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), "dsp": (args.dsp_file, "dsp", "ch{}"), "hit": (args.hit_file, "hit", "ch{}"), - "evt": (temp_output, "evt"), + "evt": (None, "evt"), }, evt_config, ) From efb11aadc5241cd4b7f9d33355afdf95fc0d849d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 12:37:42 +0000 Subject: [PATCH 071/103] style: pre-commit fixes --- Snakefile | 2 +- Snakefile-build-raw | 2 +- rules/evt.smk | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Snakefile b/Snakefile index 5d0f359..7a0cbb9 100644 --- a/Snakefile +++ b/Snakefile @@ -48,7 +48,7 @@ wildcard_constraints: period="p\d{2}", run="r\d{3}", datatype="\w{3}", - timestamp="\d{8}T\d{6}Z" + timestamp="\d{8}T\d{6}Z", include: "rules/common.smk" diff --git a/Snakefile-build-raw b/Snakefile-build-raw index ecb08b4..edbc7d8 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -45,7 +45,7 @@ wildcard_constraints: period="p\d{2}", run="r\d{3}", datatype="\w{3}", - timestamp="\d{8}T\d{6}Z" + timestamp="\d{8}T\d{6}Z", localrules: diff --git a/rules/evt.smk b/rules/evt.smk index c399808..9da6d63 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -56,7 +56,7 @@ for tier in ("evt", "pet"): rule: wildcard_constraints: - timestamp="(?!\d{8}T\d{6}Z)" + timestamp="(?!\d{8}T\d{6}Z)", input: lambda wildcards: sorted(read_filelist_phy(wildcards, tier)), output: From 313f7b6c3e53105059440218265cef2c4412bc0c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 18 Apr 2024 23:24:51 +0200 Subject: [PATCH 072/103] switch to psp --- rules/evt.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/evt.smk b/rules/evt.smk index 9cc6e13..0ab5f4d 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -49,7 +49,7 @@ rule build_evt: rule build_pet: input: - dsp_file=get_pattern_tier_dsp(setup), + dsp_file=get_pattern_tier_psp(setup), hit_file=get_pattern_tier_pht(setup), tcm_file=get_pattern_tier_tcm(setup), output: From 808e8c4a69a8441d1ac682fc42a41cc8fda64eae Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 18 Apr 2024 23:25:56 +0200 Subject: [PATCH 073/103] fix missing rules --- Snakefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Snakefile b/Snakefile index ae61549..83b7f3e 100644 --- a/Snakefile +++ b/Snakefile @@ -55,9 +55,9 @@ wildcard_constraints: include: "rules/common.smk" include: "rules/main.smk" include: "rules/tcm.smk" -# include: "rules/dsp.smk" -# include: "rules/psp.smk" -# include: "rules/hit.smk" +include: "rules/dsp.smk" +include: "rules/psp.smk" +include: "rules/hit.smk" include: "rules/pht.smk" include: "rules/evt.smk" include: "rules/skm.smk" @@ -113,7 +113,7 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists + # remove filelists files = glob.glob(os.path.join(filelist_path(setup), "*")) for file in files: if os.path.isfile(file): @@ -121,7 +121,7 @@ onsuccess: if os.path.exists(filelist_path(setup)): os.rmdir(filelist_path(setup)) - # remove logs + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): From ce1329f66586973cdf7e2ca6950170f142c82954 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 18 Apr 2024 23:26:27 +0200 Subject: [PATCH 074/103] v1 svm --- rules/dsp.smk | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/rules/dsp.smk b/rules/dsp.smk index 9ea2e7f..f526132 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -208,6 +208,28 @@ rule build_pars_dsp_eopt: "--final_dsp_pars {output.dsp_pars}" +# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files +# rule build_pars_dsp_svm: +# input: +# hyperpars="", +# train_data="", +# output: +# dsp_pars=get_pattern_pars(setup, "dsp", "svm"), +# log: +# get_pattern_log_channel(setup, "pars_dsp_svm"), +# group: +# "par-dsp" +# resources: +# runtime=300, +# shell: +# "{swenv} python3 -B " +# f"{workflow.source_path('../scripts/pars_dsp_svm.py')} " +# "--log {log} " +# "--train_data {input.train_data} " +# "--train_hyperpars {input.hyperpars} " +# "--output_file {output.dsp_pars}" + + rule build_plts_dsp: input: lambda wildcards: read_filelist_plts_cal_channel(wildcards, "dsp"), From bdd6adbb81fe133527865f123043ee05cad446c6 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 18 Apr 2024 23:27:04 +0200 Subject: [PATCH 075/103] use non pulser evts --- scripts/pars_pht_qc.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 5e9a722..a7b6657 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -114,12 +114,12 @@ ) if args.pulser_files: - mask = np.array([], dtype=bool) + total_mask = np.array([], dtype=bool) for file in args.pulser_files: with open(file) as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) - mask = np.append(mask, pulser_mask) + total_mask = np.append(total_mask, pulser_mask) if "pulser_multiplicity_threshold" in kwarg_dict: kwarg_dict.pop("pulser_multiplicity_threshold") @@ -128,14 +128,14 @@ with open(args.tcm_filelist) as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( + ids, total_mask = get_tcm_pulser_ids( tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - data["is_pulser"] = mask[threshold_mask] + data["is_pulser"] = total_mask[threshold_mask] rng = np.random.default_rng() mask = np.full(len(data.query("~is_pulser")), False, dtype=bool) @@ -161,8 +161,8 @@ if "classifier" not in outname: ct_mask = ct_mask & data[outname] - data = data[ct_mask] - mask = mask[ct_mask] + mask = mask[ct_mask[~data["is_pulser"].to_numpy()]] + data = data[ct_mask] log.debug("initial cal cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}") @@ -170,10 +170,7 @@ hit_dict_init_cal = {} plot_dict_init_cal = {} - if len(data.query("is_pulser")) > 200 * len(args.cal_files): - data = data.query("is_pulser") - else: - data = data.query("~is_pulser")[mask] + data = data.query("~is_pulser")[mask] hit_dict_cal, plot_dict_cal = generate_cut_classifiers( data, From cfc9fd542e6d4021e9081ec9f49a9c0af57f60a0 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 19 Apr 2024 14:54:11 +0200 Subject: [PATCH 076/103] support evt and skm 1 file per run --- rules/evt.smk | 5 +++-- rules/skm.smk | 24 ++++++------------------ scripts/create_filelist.py | 28 +++++++++++++++++++--------- scripts/util/patterns.py | 38 ++++++++++++++++++++++++++++++-------- 4 files changed, 58 insertions(+), 37 deletions(-) diff --git a/rules/evt.smk b/rules/evt.smk index fb86875..2e29306 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -11,6 +11,7 @@ from scripts.util.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars, + get_pattern_log_concat, ) @@ -64,12 +65,12 @@ for tier in ("evt", "pet"): input: lambda wildcards: sorted(read_filelist_phy(wildcards, tier)), output: - get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + get_pattern_tier(setup, f"{tier}_concat", check_in_cycle=check_in_cycle), params: timestamp="all", datatype="{datatype}", log: - get_pattern_log(setup, "tier_skm"), + get_pattern_log_concat(setup, f"tier_{tier}_concat"), group: "tier-evt" shell: diff --git a/rules/skm.smk b/rules/skm.smk index c4356fa..d83b8a8 100644 --- a/rules/skm.smk +++ b/rules/skm.smk @@ -6,28 +6,20 @@ from scripts.util.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars, + get_pattern_log_concat, ) rule build_skm: input: - dsp_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-phy-dsp.filelist" - ), - hit_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-phy-pht.filelist" - ), - tcm_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-phy-tcm.filelist" - ), - evt_files=lambda wildcards: read_filelist_phy(wildcards, "pet"), + evt_file = get_pattern_tier(setup, "pet_concat", check_in_cycle=False), output: skm_file=get_pattern_tier(setup, "skm", check_in_cycle=check_in_cycle), params: - timestamp="{timestamp}", - datatype="{datatype}", + timestamp="all", + datatype="phy", log: - get_pattern_log(setup, "tier_skm"), + get_pattern_log_concat(setup, "tier_skm"), group: "tier-skm" resources: @@ -39,9 +31,5 @@ rule build_skm: "--metadata {meta} " "--log {log} " "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--hit_files {input.hit_files} " - "--tcm_files {input.tcm_files} " - "--dsp_files {input.dsp_files} " - "--evt_files {input.evt_files} " + "--evt_file {input.evt_file} " "--output {output.skm_file} " diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py index 8900343..1de40e2 100644 --- a/scripts/create_filelist.py +++ b/scripts/create_filelist.py @@ -57,6 +57,8 @@ other_filenames = [] if tier == "blind": fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) +elif tier == "skm": + fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) else: fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) @@ -70,7 +72,7 @@ else: if tier == "blind" and _key.datatype == "phy": filename = FileKey.get_path_from_filekey(_key, get_pattern_tier_raw_blind(setup)) - elif tier == "skm" and _key.datatype != "phy": + elif tier == "skm": #and _key.datatype != "phy" filename = FileKey.get_path_from_filekey( _key, get_pattern_tier(setup, "pet", check_in_cycle=False) ) @@ -101,17 +103,25 @@ phy_filenames = sorted(phy_filenames) other_filenames = sorted(other_filenames) -if tier == "skm": +if tier == "skm" or tier == "pet" or tier == "evt": sorted_phy_filenames = run_grouper(phy_filenames) phy_filenames = [] for run in sorted_phy_filenames: - run_files = sorted( - run, - key=lambda filename: FileKey.get_filekey_from_pattern( - filename, fn_pattern - ).get_unix_timestamp(), - ) - phy_filenames.append(run_files[0]) + key = FileKey.get_filekey_from_pattern(run[0], fn_pattern) + if tier == "skm": + out_key = FileKey.get_path_from_filekey( + key, get_pattern_tier(setup, "skm", check_in_cycle=False) + )[0] + elif tier == "pet": + out_key = FileKey.get_path_from_filekey( + key, get_pattern_tier(setup, "pet_concat", check_in_cycle=False) + )[0] + elif tier == "evt": + out_key = FileKey.get_path_from_filekey( + key, get_pattern_tier(setup, "evt_concat", check_in_cycle=False) + )[0] + + phy_filenames.append(out_key) filenames = phy_filenames + other_filenames diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 7d381b2..52c9f9e 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -3,6 +3,7 @@ """ import os +import pathlib from .utils import ( par_dsp_path, @@ -146,6 +147,13 @@ def get_pattern_tier_evt(setup): "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5", ) +def get_pattern_tier_evt_concat(setup): + return os.path.join( + f"{tier_evt_path(setup)}", + "{datatype}", + "{experiment}-{period}-{run}-{datatype}-tier_evt.lh5", + ) + def get_pattern_tier_psp(setup): return os.path.join( @@ -176,14 +184,19 @@ def get_pattern_tier_pet(setup): "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5", ) +def get_pattern_tier_pet_concat(setup): + return os.path.join( + f"{tier_pet_path(setup)}", + "{datatype}", + "{experiment}-{period}-{run}-{datatype}-tier_pet.lh5", + ) + def get_pattern_tier_skm(setup): return os.path.join( f"{tier_skm_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_skm.lh5", + "phy", + "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", ) @@ -200,21 +213,24 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): file_pattern = get_pattern_tier_hit(setup) elif tier == "evt": file_pattern = get_pattern_tier_evt(setup) + elif tier == "evt_concat": + file_pattern = get_pattern_tier_evt_concat(setup) elif tier == "psp": file_pattern = get_pattern_tier_psp(setup) elif tier == "pht": file_pattern = get_pattern_tier_pht(setup) elif tier == "pet": file_pattern = get_pattern_tier_pet(setup) + elif tier == "pet_concat": + file_pattern = get_pattern_tier_pet_concat(setup) elif tier == "skm": file_pattern = get_pattern_tier_skm(setup) else: msg = "invalid tier" raise Exception(msg) if ( - tier_path(setup) not in file_pattern + tier_path(setup) not in str(pathlib.Path(file_pattern).resolve()) and check_in_cycle is True - and ".." not in file_pattern ): return "/tmp/{experiment}-{period}-{run}-{datatype}-{timestamp}" + f"tier_{tier}.lh5" else: @@ -394,9 +410,8 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr msg = "invalid tier" raise Exception(msg) if ( - pars_path(setup) not in file_pattern + pars_path(setup) not in str(pathlib.Path(file_pattern).resolve()) and check_in_cycle is True - and ".." not in file_pattern ): if name is None: return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}" + f"par_{tier}.{extension}" @@ -527,6 +542,13 @@ def get_pattern_log(setup, processing_step): "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log", ) +def get_pattern_log_concat(setup, processing_step): + return os.path.join( + f"{tmp_log_path(setup)}", + processing_step, + "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", + ) + def get_pattern_log_channel(setup, processing_step): return os.path.join( From d364f490d345bfe1b8f238942edf2ffb48b32bce Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 12:54:29 +0000 Subject: [PATCH 077/103] style: pre-commit fixes --- Snakefile | 4 ++-- rules/skm.smk | 2 +- scripts/create_filelist.py | 14 +++++++------- scripts/pars_pht_qc.py | 2 +- scripts/util/patterns.py | 3 +++ 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Snakefile b/Snakefile index 3440d8f..67bfaba 100644 --- a/Snakefile +++ b/Snakefile @@ -112,7 +112,7 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists + # remove filelists files = glob.glob(os.path.join(filelist_path(setup), "*")) for file in files: if os.path.isfile(file): @@ -120,7 +120,7 @@ onsuccess: if os.path.exists(filelist_path(setup)): os.rmdir(filelist_path(setup)) - # remove logs + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): diff --git a/rules/skm.smk b/rules/skm.smk index d83b8a8..3c9a619 100644 --- a/rules/skm.smk +++ b/rules/skm.smk @@ -12,7 +12,7 @@ from scripts.util.patterns import ( rule build_skm: input: - evt_file = get_pattern_tier(setup, "pet_concat", check_in_cycle=False), + evt_file=get_pattern_tier(setup, "pet_concat", check_in_cycle=False), output: skm_file=get_pattern_tier(setup, "skm", check_in_cycle=check_in_cycle), params: diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py index 1de40e2..217b6bb 100644 --- a/scripts/create_filelist.py +++ b/scripts/create_filelist.py @@ -72,7 +72,7 @@ else: if tier == "blind" and _key.datatype == "phy": filename = FileKey.get_path_from_filekey(_key, get_pattern_tier_raw_blind(setup)) - elif tier == "skm": #and _key.datatype != "phy" + elif tier == "skm": # and _key.datatype != "phy" filename = FileKey.get_path_from_filekey( _key, get_pattern_tier(setup, "pet", check_in_cycle=False) ) @@ -110,16 +110,16 @@ key = FileKey.get_filekey_from_pattern(run[0], fn_pattern) if tier == "skm": out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, "skm", check_in_cycle=False) - )[0] + key, get_pattern_tier(setup, "skm", check_in_cycle=False) + )[0] elif tier == "pet": out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, "pet_concat", check_in_cycle=False) - )[0] + key, get_pattern_tier(setup, "pet_concat", check_in_cycle=False) + )[0] elif tier == "evt": out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, "evt_concat", check_in_cycle=False) - )[0] + key, get_pattern_tier(setup, "evt_concat", check_in_cycle=False) + )[0] phy_filenames.append(out_key) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index b560db3..3d142b2 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -162,7 +162,7 @@ ct_mask = ct_mask & data[outname] mask = mask[ct_mask[~data["is_pulser"].to_numpy()]] - data = data[ct_mask] + data = data[ct_mask] log.debug("initial cal cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}") diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 52c9f9e..9f4338a 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -147,6 +147,7 @@ def get_pattern_tier_evt(setup): "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5", ) + def get_pattern_tier_evt_concat(setup): return os.path.join( f"{tier_evt_path(setup)}", @@ -184,6 +185,7 @@ def get_pattern_tier_pet(setup): "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5", ) + def get_pattern_tier_pet_concat(setup): return os.path.join( f"{tier_pet_path(setup)}", @@ -542,6 +544,7 @@ def get_pattern_log(setup, processing_step): "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log", ) + def get_pattern_log_concat(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", From 479e3ac1c638d5b0b22f4e20b804fb5f340e7dbf Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 19 Apr 2024 15:26:31 +0200 Subject: [PATCH 078/103] fix cyclic dependence --- scripts/create_filelist.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py index 1de40e2..c06af68 100644 --- a/scripts/create_filelist.py +++ b/scripts/create_filelist.py @@ -57,8 +57,10 @@ other_filenames = [] if tier == "blind": fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) -elif tier == "skm": +elif tier == "skm" or tier=="pet_concat": fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) +elif tier == "skm" or tier=="evt_concat": + fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) else: fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) @@ -103,23 +105,14 @@ phy_filenames = sorted(phy_filenames) other_filenames = sorted(other_filenames) -if tier == "skm" or tier == "pet" or tier == "evt": +if tier == "skm" or tier == "pet_concat" or tier == "evt_concat": sorted_phy_filenames = run_grouper(phy_filenames) phy_filenames = [] for run in sorted_phy_filenames: key = FileKey.get_filekey_from_pattern(run[0], fn_pattern) - if tier == "skm": - out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, "skm", check_in_cycle=False) - )[0] - elif tier == "pet": - out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, "pet_concat", check_in_cycle=False) - )[0] - elif tier == "evt": - out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, "evt_concat", check_in_cycle=False) - )[0] + out_key = FileKey.get_path_from_filekey( + key, get_pattern_tier(setup, tier, check_in_cycle=False) + )[0] phy_filenames.append(out_key) From 22400642bb5f24256b363689581bd687830a9c55 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 13:27:53 +0000 Subject: [PATCH 079/103] style: pre-commit fixes --- Snakefile | 2 +- scripts/create_filelist.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Snakefile b/Snakefile index 67bfaba..e9e0a03 100644 --- a/Snakefile +++ b/Snakefile @@ -112,7 +112,7 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists + # remove filelists files = glob.glob(os.path.join(filelist_path(setup), "*")) for file in files: if os.path.isfile(file): diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py index 8a8596b..9ea6b4e 100644 --- a/scripts/create_filelist.py +++ b/scripts/create_filelist.py @@ -57,9 +57,9 @@ other_filenames = [] if tier == "blind": fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) -elif tier == "skm" or tier=="pet_concat": +elif tier == "skm" or tier == "pet_concat": fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) -elif tier == "skm" or tier=="evt_concat": +elif tier == "skm" or tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) else: fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) @@ -111,8 +111,8 @@ for run in sorted_phy_filenames: key = FileKey.get_filekey_from_pattern(run[0], fn_pattern) out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, tier, check_in_cycle=False) - )[0] + key, get_pattern_tier(setup, tier, check_in_cycle=False) + )[0] phy_filenames.append(out_key) From 9062bfd62314e95b3a22ae78d35e9945cd3b7c9b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 22 Apr 2024 18:51:11 +0200 Subject: [PATCH 080/103] bugfixes and formatting --- Snakefile | 4 ++-- scripts/create_filelist.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Snakefile b/Snakefile index 67bfaba..a806425 100644 --- a/Snakefile +++ b/Snakefile @@ -112,7 +112,7 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists + #remove filelists files = glob.glob(os.path.join(filelist_path(setup), "*")) for file in files: if os.path.isfile(file): @@ -120,7 +120,7 @@ onsuccess: if os.path.exists(filelist_path(setup)): os.rmdir(filelist_path(setup)) - # remove logs + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py index 8a8596b..a40b77c 100644 --- a/scripts/create_filelist.py +++ b/scripts/create_filelist.py @@ -57,9 +57,9 @@ other_filenames = [] if tier == "blind": fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) -elif tier == "skm" or tier=="pet_concat": +elif tier == "skm" or tier == "pet_concat": fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) -elif tier == "skm" or tier=="evt_concat": +elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) else: fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) @@ -111,8 +111,8 @@ for run in sorted_phy_filenames: key = FileKey.get_filekey_from_pattern(run[0], fn_pattern) out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, tier, check_in_cycle=False) - )[0] + key, get_pattern_tier(setup, tier, check_in_cycle=False) + )[0] phy_filenames.append(out_key) From 198e4c924560db19f804b67d60c34127afe63407 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 16:52:16 +0000 Subject: [PATCH 081/103] style: pre-commit fixes --- Snakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Snakefile b/Snakefile index a806425..67bfaba 100644 --- a/Snakefile +++ b/Snakefile @@ -112,7 +112,7 @@ onsuccess: if os.path.isfile(file): os.remove(file) - #remove filelists + # remove filelists files = glob.glob(os.path.join(filelist_path(setup), "*")) for file in files: if os.path.isfile(file): @@ -120,7 +120,7 @@ onsuccess: if os.path.exists(filelist_path(setup)): os.rmdir(filelist_path(setup)) - # remove logs + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): From f4dd3288ab7b65d87d3d2c01c6cc98ef0a7773d7 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 17:47:10 +0200 Subject: [PATCH 082/103] add svm scripts --- scripts/pars_dsp_build_svm.py | 59 +++++++++++++++++++++++++++++++++++ scripts/pars_dsp_svm.py | 36 +++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 scripts/pars_dsp_build_svm.py create mode 100644 scripts/pars_dsp_svm.py diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py new file mode 100644 index 0000000..6a44fec --- /dev/null +++ b/scripts/pars_dsp_build_svm.py @@ -0,0 +1,59 @@ +import argparse +import json +import logging +import os +import pickle as pkl + +os.environ["LGDO_CACHE"] = "false" +os.environ["LGDO_BOUNDSCHECK"] = "false" +os.environ["DSPEED_CACHE"] = "false" +os.environ["DSPEED_BOUNDSCHECK"] = "false" + +import lgdo.lh5 as lh5 +from sklearn.svm import SVC + +argparser = argparse.ArgumentParser() +argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--output_file", help="output SVM file", type=str, required=True) +argparser.add_argument("--train_data", help="input data file", type=str, required=True) +argparser.add_argument("--train_hyperpars", help="input hyperparameter file", required=True) +args = argparser.parse_args() + +logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +logging.getLogger("parse").setLevel(logging.INFO) +logging.getLogger("lgdo").setLevel(logging.INFO) +logging.getLogger("h5py").setLevel(logging.INFO) + +sto = lh5.LH5Store() +log = logging.getLogger(__name__) + +# Load files +tb, _ = sto.read("ml_train/dsp", args.train_data) +log.debug("loaded data") + +with open(args.train_hyperpars) as hyperpars_file: + hyperpars = json.load(hyperpars_file) + +# Define training inputs +dwts_norm = tb["dwt_norm"].nda +labels = tb["dc_label"].nda + + +log.debug("training model") +# Initialize and train SVM +svm = SVC( + random_state=int(hyperpars["random_state"]), + kernel=hyperpars["kernel"], + decision_function_shape=hyperpars["decision_function_shape"], + class_weight=hyperpars["class_weight"], + C=float(hyperpars["C"]), + gamma=float(hyperpars["gamma"]), +) + +svm.fit(dwts_norm, labels) + +log.debug("trained model") + +# Save trained model with pickle +with open(args.output_file, "wb") as svm_file: + pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py new file mode 100644 index 0000000..40f0a25 --- /dev/null +++ b/scripts/pars_dsp_svm.py @@ -0,0 +1,36 @@ +import argparse +import json +import logging +import os +import pathlib + +argparser = argparse.ArgumentParser() +argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--output_file", help="output par file", type=str, required=True) +argparser.add_argument("--input_file", help="input par file", type=str, required=True) +argparser.add_argument("--svm_file", help="svm file", required=True) +args = argparser.parse_args() + + +if args.log is not None: + pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +else: + logging.basicConfig(level=logging.DEBUG) + +logging.getLogger("parse").setLevel(logging.INFO) +logging.getLogger("lgdo").setLevel(logging.INFO) +logging.getLogger("h5py").setLevel(logging.INFO) + +log = logging.getLogger(__name__) + +with open(args.input_file) as r: + par_data = json.load(r) + +file = f"'$_/{os.path.basename(args.svm_file)}'" + +par_data["svm"] = {"model_file": file} + +pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +with open(args.output_file, "w") as w: + json.dump(par_data, w, indent=4) From 7d051627812397f596b7e23042cd17acf12be251 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 17:52:12 +0200 Subject: [PATCH 083/103] update cuts --- scripts/pars_dsp_eopt.py | 4 ++++ scripts/pars_dsp_event_selection.py | 21 +++++++++++++++++---- scripts/pars_dsp_tau.py | 23 ++++++++++++++--------- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 4af1c37..86b5f7b 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -16,6 +16,7 @@ import lgdo.lh5 as lh5 import numpy as np +import pygama.pargen.energy_optimisation as om # noqa: F401 import sklearn.gaussian_process.kernels as ker from dspeed.units import unit_registry as ureg from legendmeta import LegendMetadata @@ -93,6 +94,7 @@ "func": hpge_peak, "peak": peak, "kev_width": kev_width, + "bin_width": 5, } ) kwarg_dicts_zac.append( @@ -101,6 +103,7 @@ "func": hpge_peak, "peak": peak, "kev_width": kev_width, + "bin_width": 5, } ) kwarg_dicts_trap.append( @@ -109,6 +112,7 @@ "func": hpge_peak, "peak": peak, "kev_width": kev_width, + "bin_width": 5, } ) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 44c1604..9100689 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -176,7 +176,17 @@ def get_out_data( raw_fields = [field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)] - tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy"])[0] + tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"])[0] + + discharges = tb["t_sat_lo"].nda > 0 + discharge_timestamps = np.where(tb["timestamp"].nda[discharges])[0] + is_recovering = np.full(len(tb), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + (((tb["timestamp"].nda - tstamp) < 0.01) & ((tb["timestamp"].nda - tstamp) > 0)), + True, + False, + ) for outname, info in raw_dict.items(): outcol = tb.eval(info["expression"], info.get("parameters", None)) @@ -191,7 +201,7 @@ def get_out_data( & (rough_energy < peak + 1.1 * kev_width[0]) & (~mask) ) - masks[peak] = np.where(e_mask)[0] + masks[peak] = np.where(e_mask & (~is_recovering))[0] log.debug(f"{len(masks[peak])} events found in energy range for {peak}") input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000, idx=np.where(~mask)[0])[0] @@ -272,14 +282,17 @@ def get_out_data( ) peak_loc = pgh.get_bin_centers(bins)[np.nanargmax(hist)] - mu, _, _ = pgc.hpge_fit_energy_peak_tops( + peak_top_pars = pgc.hpge_fit_energy_peak_tops( hist, bins, var, [peak_loc], n_to_fit=7, )[0][0] - + try: + mu = peak_top_pars[0] + except Exception: + mu = np.nan if mu is None or np.isnan(mu): log.debug("Fit failed, using max guess") rough_adc_to_kev = peak / peak_loc diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 1e10ea5..8064308 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -82,11 +82,21 @@ msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - data = sto.read(f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp"])[ - 0 - ].view_as("pd") + data = sto.read( + f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] + )[0].view_as("pd") threshold = kwarg_dict.pop("threshold") - cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask))[0] + + discharges = data["t_sat_lo"] > 0 + discharge_timestamps = np.where(data["timestamp"][discharges])[0] + is_recovering = np.full(len(data), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)), + True, + False, + ) + cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0] tb_data = sto.read( f"{args.channel}/raw", @@ -124,11 +134,6 @@ else: out_dict = {} -if args.pulser_file: - pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) - with open(args.pulser_file, "w") as f: - json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4) - pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) with open(args.output_file, "w") as f: json.dump(tau.output_dict, f, indent=4) From fbafe64a215d3220137ce210edb7c5bf07d5b073 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 17:59:33 +0200 Subject: [PATCH 084/103] filter db files to json or yaml --- scripts/pars_hit_ecal.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index edee334..e84e51f 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -459,7 +459,13 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if args.in_hit_dict: hit_dict = Props.read_from(args.in_hit_dict) - database_dic = Props.read_from(args.ctc_dict) + db_files = [ + par_file + for par_file in args.ctc_dict + if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml" + ] + + database_dic = Props.read_from(db_files) hit_dict.update(database_dic[args.channel]["ctc_params"]) From 0d50328e779726d0e3ce3bee4152bf1f7fa3fea1 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 18:00:55 +0200 Subject: [PATCH 085/103] add new qc --- scripts/pars_hit_qc.py | 120 +++++++++++++++++------- scripts/pars_pht_qc.py | 182 +++++++++++++++++++++++++------------ scripts/pars_pht_qc_phy.py | 55 +++++++++-- 3 files changed, 257 insertions(+), 100 deletions(-) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index c59e99d..c432d69 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -62,6 +62,71 @@ kwarg_dict = Props.read_from(channel_dict) + kwarg_dict_fft = kwarg_dict["fft_fields"] + if len(args.fft_files) > 0: + fft_fields = get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(args.fft_files[0], f"{args.channel}/dsp/") + ], + kwarg_dict_fft["cut_parameters"], + ) + + fft_data = load_data( + args.fft_files, + f"{args.channel}/dsp", + {}, + [*fft_fields, "timestamp", "trapTmax"], + ) + + discharges = fft_data["t_sat_lo"] > 0 + discharge_timestamps = np.where(fft_data["timestamp"][discharges])[0] + is_recovering = np.full(len(fft_data), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + ( + ((fft_data["timestamp"] - tstamp) < 0.01) + & ((fft_data["timestamp"] - tstamp) > 0) + ), + True, + False, + ) + fft_data["is_recovering"] = is_recovering + + hit_dict_fft = {} + plot_dict_fft = {} + cut_data = fft_data.query("is_recovering==0") + log.debug(f"cut_data shape: {len(cut_data)}") + for name, cut in kwarg_dict_fft["cut_parameters"].items(): + cut_dict, cut_plots = generate_cut_classifiers( + cut_data, + {name: cut}, + kwarg_dict.get("rounding", 4), + display=1 if args.plot_path else 0, + ) + hit_dict_fft.update(cut_dict) + plot_dict_fft.update(cut_plots) + + log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}") + + ct_mask = np.full(len(cut_data), True, dtype=bool) + for outname, info in cut_dict.items(): + # convert to pandas eval + exp = info["expression"] + for key in info.get("parameters", None): + exp = re.sub(f"(? 0 + discharge_timestamps = np.where(data["timestamp"][discharges])[0] + is_recovering = np.full(len(data), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)), + True, + False, + ) + data["is_recovering"] = is_recovering + rng = np.random.default_rng() - mask = np.full(len(data.query("~is_pulser")), False, dtype=bool) - mask[rng.choice(len(data.query("~is_pulser")), 4000, replace=False)] = True + mask = np.full(len(data.query("~is_pulser & ~is_recovering")), False, dtype=bool) + mask[rng.choice(len(data.query("~is_pulser & ~is_recovering")), 4000, replace=False)] = True if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers( - data.query("~is_pulser")[mask], + data.query("~is_pulser & ~is_recovering")[mask], init_cal["cut_parameters"], init_cal.get("rounding", 4), display=1 if args.plot_path else 0, @@ -138,10 +215,10 @@ hit_dict_init_cal = {} plot_dict_init_cal = {} - if len(data.query("is_pulser")) > 500: - data = data.query("is_pulser") + if len(data.query("is_pulser & ~is_recovering")) > 500: + data = data.query("is_pulser & ~is_recovering") else: - data = data.query("~is_pulser")[mask] + data = data.query("~is_pulser & ~is_recovering")[mask] hit_dict_cal, plot_dict_cal = generate_cut_classifiers( data, @@ -150,35 +227,8 @@ display=1 if args.plot_path else 0, ) - kwarg_dict_fft = kwarg_dict["fft_fields"] - if len(args.fft_files) > 0: - fft_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.fft_files[0], f"{args.channel}/dsp/") - ], - kwarg_dict_fft["cut_parameters"], - ) - - fft_data = load_data( - args.fft_files, - f"{args.channel}/dsp", - {}, - [*fft_fields, "timestamp", "trapTmax"], - ) - - hit_dict_fft, plot_dict_fft = generate_cut_classifiers( - fft_data, - kwarg_dict_fft["cut_parameters"], - kwarg_dict.get("rounding", 4), - display=1 if args.plot_path else 0, - ) - else: - hit_dict_fft = {} - plot_dict_fft = {} - - hit_dict = {**hit_dict_init_cal, **hit_dict_cal, **hit_dict_fft} - plot_dict = {**plot_dict_init_cal, **plot_dict_cal, **plot_dict_fft} + hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} + plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True) with open(args.save_path, "w") as f: diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 3d142b2..18ff865 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -38,6 +38,9 @@ argparser.add_argument( "--pulser_files", help="pulser_file", nargs="*", type=str, required=False ) + argparser.add_argument( + "--overwrite_files", help="overwrite_files", nargs="*", type=str, required=False + ) argparser.add_argument("--configs", help="config", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -83,6 +86,101 @@ ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also kwarg_dict = Props.read_from(channel_dict) + + if args.overwrite_files: + overwrite = Props.read_from(args.overwrite_files)[args.channel]["pars"]["operations"] + else: + overwrite = None + + kwarg_dict_fft = kwarg_dict["fft_fields"] + if len(args.fft_files) > 0: + # sort files in dictionary where keys are first timestamp from run + if isinstance(args.fft_files, list): + fft_files = [] + for file in args.fft_files: + with open(file) as f: + fft_files += f.read().splitlines() + else: + with open(args.fft_files) as f: + fft_files = f.read().splitlines() + + fft_files = sorted( + np.unique(fft_files) + ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also + + if len(fft_files) > 0: + fft_fields = get_keys( + [ + key.replace(f"{args.channel}/dsp/", "") + for key in ls(fft_files[0], f"{args.channel}/dsp/") + ], + kwarg_dict_fft["cut_parameters"], + ) + + fft_data = load_data( + fft_files, + f"{args.channel}/dsp", + {}, + [*fft_fields, "timestamp", "trapTmax", "t_sat_lo"], + ) + + discharges = fft_data["t_sat_lo"] > 0 + discharge_timestamps = np.where(fft_data["timestamp"][discharges])[0] + is_recovering = np.full(len(fft_data), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + ( + ((fft_data["timestamp"] - tstamp) < 0.01) + & ((fft_data["timestamp"] - tstamp) > 0) + ), + True, + False, + ) + fft_data["is_recovering"] = is_recovering + + hit_dict_fft = {} + plot_dict_fft = {} + cut_data = fft_data.query("is_recovering==0") + log.debug(f"cut_data shape: {len(cut_data)}") + for name, cut in kwarg_dict_fft["cut_parameters"].items(): + cut_dict, cut_plots = generate_cut_classifiers( + cut_data, + {name: cut}, + kwarg_dict.get("rounding", 4), + display=1 if args.plot_path else 0, + ) + hit_dict_fft.update(cut_dict) + plot_dict_fft.update(cut_plots) + + log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}") + + ct_mask = np.full(len(cut_data), True, dtype=bool) + for outname, info in cut_dict.items(): + # convert to pandas eval + exp = info["expression"] + for key in info.get("parameters", None): + exp = re.sub(f"(? 0 + discharge_timestamps = np.where(data["timestamp"][discharges])[0] + is_recovering = np.full(len(data), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)), + True, + False, + ) + data["is_recovering"] = is_recovering + rng = np.random.default_rng() - mask = np.full(len(data.query("~is_pulser")), False, dtype=bool) - mask[rng.choice(len(data.query("~is_pulser")), 2000 * len(args.cal_files), replace=False)] = ( - True - ) + mask = np.full(len(data.query("~is_pulser & ~is_recovering")), False, dtype=bool) + mask[ + rng.choice( + len(data.query("~is_pulser & ~is_recovering")), + 2000 * len(args.cal_files), + replace=False, + ) + ] = True if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] @@ -161,7 +274,7 @@ if "classifier" not in outname: ct_mask = ct_mask & data[outname] - mask = mask[ct_mask[~data["is_pulser"].to_numpy()]] + mask = mask[ct_mask[~data["is_pulser & ~is_recovering"].to_numpy()]] data = data[ct_mask] log.debug("initial cal cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}") @@ -170,7 +283,7 @@ hit_dict_init_cal = {} plot_dict_init_cal = {} - data = data.query("~is_pulser")[mask] + data = data.query("~is_pulser & ~is_recovering")[mask] hit_dict_cal, plot_dict_cal = generate_cut_classifiers( data, @@ -182,57 +295,14 @@ log.debug("initial cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict_cal, indent=2)}") - kwarg_dict_fft = kwarg_dict["fft_fields"] - if len(args.fft_files) > 0: - # sort files in dictionary where keys are first timestamp from run - if isinstance(args.fft_files, list): - fft_files = [] - for file in args.fft_files: - with open(file) as f: - fft_files += f.read().splitlines() - else: - with open(args.fft_files) as f: - fft_files = f.read().splitlines() - - fft_files = sorted( - np.unique(fft_files) - ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also - - if len(fft_files) > 0: - fft_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(fft_files[0], f"{args.channel}/dsp/") - ], - kwarg_dict_fft["cut_parameters"], - ) - - fft_data = load_data( - fft_files, - f"{args.channel}/dsp", - {}, - [*fft_fields, "timestamp", "trapTmax"], - ) - - hit_dict_fft, plot_dict_fft = generate_cut_classifiers( - fft_data, - kwarg_dict_fft["cut_parameters"], - kwarg_dict.get("rounding", 4), - display=1 if args.plot_path else 0, - ) - - log.debug("fft cuts applied") - log.debug(f"cut_dict is: {json.dumps(hit_dict_fft, indent=2)}") - - else: - hit_dict_fft = {} - plot_dict_fft = {} - else: - hit_dict_fft = {} - plot_dict_fft = {} + if overwrite is not None: + for name in kwarg_dict_cal["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_cal.update({cut_name: cut_dict}) - hit_dict = {**hit_dict_init_cal, **hit_dict_cal, **hit_dict_fft} - plot_dict = {**plot_dict_init_cal, **plot_dict_cal, **plot_dict_fft} + hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} + plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} for file in args.save_path: pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 8fe0a1f..8d26fdf 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -6,6 +6,7 @@ import os import pathlib import pickle as pkl +import re import warnings os.environ["PYGAMA_PARALLEL"] = "false" @@ -99,15 +100,51 @@ ) data = sto.read( - f"{args.channel}/dsp/", phy_files, field_mask=cut_fields, idx=np.where(bl_mask)[0] - )[0] - - hit_dict, plot_dict = generate_cut_classifiers( - data, - kwarg_dict_fft["cut_parameters"], - kwarg_dict.get("rounding", 4), - display=1 if args.plot_path else 0, - ) + f"{args.channel}/dsp/", + phy_files, + field_mask=[*cut_fields, "daqenergy", "t_sat_lo", "timestamp"], + idx=np.where(bl_mask)[0], + )[0].view_as("pd") + + discharges = data["t_sat_lo"] > 0 + discharge_timestamps = np.where(data["timestamp"][discharges])[0] + is_recovering = np.full(len(data), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)), + True, + False, + ) + data["is_recovering"] = is_recovering + + log.debug(f"{len(discharge_timestamps)} discharges found in {len(data)} events") + + hit_dict = {} + plot_dict = {} + cut_data = data.query("is_recovering==0") + log.debug(f"cut_data shape: {len(cut_data)}") + for name, cut in kwarg_dict_fft["cut_parameters"].items(): + cut_dict, cut_plots = generate_cut_classifiers( + cut_data, + {name: cut}, + kwarg_dict.get("rounding", 4), + display=1 if args.plot_path else 0, + ) + hit_dict.update(cut_dict) + plot_dict.update(cut_plots) + + log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}") + + ct_mask = np.full(len(cut_data), True, dtype=bool) + for outname, info in cut_dict.items(): + # convert to pandas eval + exp = info["expression"] + for key in info.get("parameters", None): + exp = re.sub(f"(? Date: Wed, 24 Apr 2024 18:01:16 +0200 Subject: [PATCH 086/103] replace paths with relative --- scripts/merge_channels.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index bc8337c..c2698eb 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -20,6 +20,7 @@ def replace_path(d, old_path, new_path): d[i] = replace_path(d[i], old_path, new_path) elif isinstance(d, str) and old_path in d: d = d.replace(old_path, new_path) + d = f"$_/{os.path.basename(new_path)}" return d From 81306feb6da232f744962ddefbee8b3bc77ffaa2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 18:01:44 +0200 Subject: [PATCH 087/103] lists to flat32 arrays filter pars files --- scripts/build_dsp.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index a94d547..2fd2248 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -18,6 +18,18 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props + +def replace_list_with_array(dic): + for key, value in dic.items(): + if isinstance(value, dict): + dic[key] = replace_list_with_array(value) + elif isinstance(value, list): + dic[key] = np.array(value, dtype="float32") + else: + pass + return dic + + warnings.filterwarnings(action="ignore", category=RuntimeWarning) argparser = argparse.ArgumentParser() @@ -43,20 +55,14 @@ "inputs" ]["processing_chain"] -database_dic = Props.read_from(args.pars_file) - - -def replace_list_with_array(dic): - for key, value in dic.items(): - if isinstance(value, dict): - dic[key] = replace_list_with_array(value) - elif isinstance(value, list): - dic[key] = np.array(value, dtype="float32") - else: - pass - return dic - +channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} +db_files = [ + par_file + for par_file in args.pars_file + if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml" +] +database_dic = Props.read_from(db_files, subst_pathvar=True) database_dic = replace_list_with_array(database_dic) pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) @@ -88,8 +94,8 @@ def replace_list_with_array(dic): outputs = {} channels = [] -for channel, file in channel_dict.items(): - output = Props.read_from(file)["outputs"] +for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] in_dict = False for entry in outputs: if outputs[entry]["fields"] == output: From 221a15437b3aedbfca37b3554022c35e93d3fd6b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 18:02:50 +0200 Subject: [PATCH 088/103] change order --- scripts/complete_run.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/scripts/complete_run.py b/scripts/complete_run.py index 5829f1a..da65b49 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -167,6 +167,23 @@ def build_file_dbs(input_files, output_dir): setup = snakemake.params.setup basedir = snakemake.params.basedir +check_log_files( + snakemake.params.log_path, + snakemake.output.summary_log, + snakemake.output.gen_output, + warning_file=snakemake.output.warning_log, +) + +if snakemake.wildcards.tier != "daq": + os.makedirs(snakemake.params.filedb_path, exist_ok=True) + with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as w: + json.dump(file_db_config, w, indent=2) + + build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path) + os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) + + build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path) + if os.getenv("PRODENV") in snakemake.params.filedb_path: file_db_config = { "data_dir": "$PRODENV", @@ -258,21 +275,4 @@ def build_file_dbs(input_files, output_dir): }, } -check_log_files( - snakemake.params.log_path, - snakemake.output.summary_log, - snakemake.output.gen_output, - warning_file=snakemake.output.warning_log, -) - -if snakemake.wildcards.tier != "daq": - os.makedirs(snakemake.params.filedb_path, exist_ok=True) - with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as w: - json.dump(file_db_config, w, indent=2) - - build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path) - os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) - - build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path) - pathlib.Path(snakemake.output.gen_output).touch() From 1cac12c037b9619b399e09958fdecea597b7e0ff Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 18:04:08 +0200 Subject: [PATCH 089/103] add svm paths, update tmp paths and add concat paths --- scripts/util/patterns.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 9f4338a..91a4fa1 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -234,7 +234,7 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): tier_path(setup) not in str(pathlib.Path(file_pattern).resolve()) and check_in_cycle is True ): - return "/tmp/{experiment}-{period}-{run}-{datatype}-{timestamp}" + f"tier_{tier}.lh5" + return "/tmp/{experiment}-{period}-{run}-{datatype}-{timestamp}-" + f"tier_{tier}.lh5" else: return file_pattern @@ -416,16 +416,37 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr and check_in_cycle is True ): if name is None: - return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}" + f"par_{tier}.{extension}" + return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" else: return ( - "/tmp/{experiment}-{period}-{run}-cal-{timestamp}" + "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{extension}" ) else: return file_pattern +def get_pattern_pars_svm(setup, tier, name=None, ext="json"): + if name is not None: + return os.path.join( + f"{par_overwrite_path(setup)}", + tier, + "cal", + "{period}", + "{run}", + "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}", + ) + else: + return os.path.join( + f"{par_overwrite_path(setup)}", + tier, + "cal", + "{period}", + "{run}", + "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}", + ) + + def get_pattern_pars_overwrite(setup, tier, name=None): if name is not None: return os.path.join( From 61c1acc94c1725678f2365d7f3f0a1990f9f1eb6 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 18:05:12 +0200 Subject: [PATCH 090/103] svm rules --- rules/dsp.smk | 67 ++++++++++++++++++++++----------- rules/psp.smk | 102 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 123 insertions(+), 46 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index bb05278..7617d48 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -7,6 +7,7 @@ Snakemake rules for processing dsp tier. This is done in 4 steps: """ from scripts.util.pars_loading import pars_catalog +from scripts.util.utils import par_dsp_path from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -19,6 +20,8 @@ from scripts.util.patterns import ( get_pattern_pars_tmp, get_pattern_log, get_pattern_pars, + get_pattern_pars_overwrite, + get_pattern_pars_svm, ) @@ -182,7 +185,7 @@ rule build_pars_dsp_eopt: datatype="cal", channel="{channel}", output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")), qbb_grid=temp( get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl") ), @@ -209,26 +212,46 @@ rule build_pars_dsp_eopt: "--final_dsp_pars {output.dsp_pars}" -# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files -# rule build_pars_dsp_svm: -# input: -# hyperpars="", -# train_data="", -# output: -# dsp_pars=get_pattern_pars(setup, "dsp", "svm"), -# log: -# get_pattern_log_channel(setup, "pars_dsp_svm"), -# group: -# "par-dsp" -# resources: -# runtime=300, -# shell: -# "{swenv} python3 -B " -# f"{workflow.source_path('../scripts/pars_dsp_svm.py')} " -# "--log {log} " -# "--train_data {input.train_data} " -# "--train_hyperpars {input.hyperpars} " -# "--output_file {output.dsp_pars}" +rule build_svm_dsp: + input: + hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), + train_data=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_train"), + output: + dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), + log: + get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"), + group: + "par-dsp-svm" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/pars_dsp_build_svm.py')} " + "--log {log} " + "--train_data {input.train_data} " + "--train_hyperpars {input.hyperpars} " + "--output_file {output.dsp_pars}" + + +rule build_pars_dsp_svm: + input: + dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"), + svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"), + output: + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), + log: + get_pattern_log_channel(setup, "pars_dsp_svm"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/pars_dsp_svm.py')} " + "--log {log} " + "--input_file {input.dsp_pars} " + "--output_file {output.dsp_pars} " + "--svm_file {input.svm_file}" rule build_plts_dsp: @@ -353,4 +376,4 @@ rule build_dsp: "--input {input.raw_file} " "--output {output.tier_file} " "--db_file {output.db_file} " - "--pars_file {input.pars_file}" + "--pars_file {input.pars_file} " diff --git a/rules/psp.smk b/rules/psp.smk index 6b60b0e..6591f1b 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -20,6 +20,13 @@ from scripts.util.patterns import ( get_pattern_pars, ) +pars_key_resolve.write_par_catalog( + ["-*-*-*-cal"], + os.path.join(pars_path(setup), "dsp", "validity.jsonl"), + get_pattern_tier_raw(setup), + {"cal": ["par_dsp"], "lar": ["par_dsp"]}, +) + pars_key_resolve.write_par_catalog( ["-*-*-*-cal"], os.path.join(pars_path(setup), "psp", "validity.jsonl"), @@ -34,7 +41,11 @@ for key, dataset in part.datasets.items(): rule: input: dsp_pars=part.get_par_files( - f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp" + f"{par_dsp_path(setup)}/validity.jsonl", + partition, + key, + tier="dsp", + name="eopt", ), dsp_objs=part.get_par_files( f"{par_dsp_path(setup)}/validity.jsonl", @@ -62,6 +73,7 @@ for key, dataset in part.datasets.items(): partition, key, tier="psp", + name="eopt", ) ), psp_objs=temp( @@ -121,7 +133,7 @@ for key, dataset in part.datasets.items(): # This rule builds the a/e calibration using the calibration dsp files for the whole partition rule build_par_psp: input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp"), + dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"), dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"), dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"), params: @@ -129,7 +141,7 @@ rule build_par_psp: channel="{channel}", timestamp="{timestamp}", output: - psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), + psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")), psp_objs=temp( get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl") ), @@ -166,6 +178,48 @@ rule_order_list.append(fallback_psp_rule.name) workflow._ruleorder.add(*rule_order_list) # [::-1] +rule build_svm_psp: + input: + hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), + train_data=lambda wildcards: get_svm_file(wildcards, "psp", "svm_train"), + output: + dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), + log: + get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"), + group: + "par-dsp-svm" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/pars_dsp_build_svm.py')} " + "--log {log} " + "--train_data {input.train_data} " + "--train_hyperpars {input.hyperpars} " + "--output_file {output.dsp_pars}" + + +rule build_pars_psp_svm: + input: + dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"), + svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"), + output: + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), + log: + get_pattern_log_channel(setup, "pars_dsp_svm"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/pars_dsp_svm.py')} " + "--log {log} " + "--input_file {input.dsp_pars} " + "--output_file {output.dsp_pars} " + "--svm_file {input.svm_model}" + + rule build_pars_psp_objects: input: lambda wildcards: read_filelist_pars_cal_channel( @@ -203,26 +257,26 @@ rule build_plts_psp: "--output {output} " -# rule build_pars_psp: -# input: -# infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"), -# plts=get_pattern_plts(setup, "psp"), -# objects=get_pattern_pars( -# setup, -# "psp", -# name="objects", -# extension="dir", -# check_in_cycle=check_in_cycle, -# ), -# output: -# get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle), -# group: -# "merge-hit" -# shell: -# "{swenv} python3 -B " -# f"{basedir}/../scripts/merge_channels.py " -# "--input {input.infiles} " -# "--output {output} " +rule build_pars_psp: + input: + infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"), + plts=get_pattern_plts(setup, "psp"), + objects=get_pattern_pars( + setup, + "psp", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + output: + get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle), + group: + "merge-hit" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input.infiles} " + "--output {output} " rule build_psp: @@ -256,4 +310,4 @@ rule build_psp: "--input {input.raw_file} " "--output {output.tier_file} " "--db_file {output.db_file} " - "--pars_file {input.pars_file}" + "--pars_file {input.pars_file} " From c3dc6ef2045fe2ae13e10fcb79d2ae7b284057a4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 18:05:41 +0200 Subject: [PATCH 091/103] add overwrites for qc --- rules/pht.smk | 52 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/rules/pht.smk b/rules/pht.smk index cbb05e4..e3efae7 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -65,6 +65,15 @@ for key, dataset in part.datasets.items(): tier="pht", name="check", ), + overwrite_files=get_overwrite_file( + "pht", + timestamp=part.get_timestamp( + f"{par_pht_path(setup)}/validity.jsonl", + partition, + key, + tier="pht", + ), + ), wildcard_constraints: channel=part.get_wildcard_constraints(partition, key), params: @@ -117,6 +126,7 @@ for key, dataset in part.datasets.items(): "--channel {params.channel} " "--save_path {output.hit_pars} " "--plot_path {output.plot_file} " + "--overwrite_files {input.overwrite_files} " "--pulser_files {input.pulser_files} " "--fft_files {input.fft_files} " "--cal_files {input.cal_files}" @@ -143,6 +153,7 @@ rule build_pht_qc: ), pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"), + overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards), params: datatype="cal", channel="{channel}", @@ -167,6 +178,7 @@ rule build_pht_qc: "--channel {params.channel} " "--save_path {output.hit_pars} " "--plot_path {output.plot_file} " + "--overwrite_files {input.overwrite_files} " "--pulser_files {input.pulser_files} " "--fft_files {input.fft_files} " "--cal_files {input.cal_files}" @@ -810,26 +822,26 @@ rule build_plts_pht: "--output {output} " -rule build_pars_pht: - input: - infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"), - plts=get_pattern_plts(setup, "pht"), - objects=get_pattern_pars( - setup, - "pht", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - output: - get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle), - group: - "merge-hit" - shell: - "{swenv} python3 -B " - f"{basedir}/../scripts/merge_channels.py " - "--input {input.infiles} " - "--output {output} " +# rule build_pars_pht: +# input: +# infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"), +# plts=get_pattern_plts(setup, "pht"), +# objects=get_pattern_pars( +# setup, +# "pht", +# name="objects", +# extension="dir", +# check_in_cycle=check_in_cycle, +# ), +# output: +# get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle), +# group: +# "merge-hit" +# shell: +# "{swenv} python3 -B " +# f"{basedir}/../scripts/merge_channels.py " +# "--input {input.infiles} " +# "--output {output} " rule build_pht: From 6102536d0ba18e58572335855a6c68c29e18e786 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 18:05:57 +0200 Subject: [PATCH 092/103] func for svm --- rules/common.smk | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/rules/common.smk b/rules/common.smk index 6359ded..6cb5d40 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -11,6 +11,7 @@ from scripts.util.patterns import ( get_pattern_tier_raw, get_pattern_plts_tmp_channel, ) +from scripts.util import ProcessingFileKey def read_filelist(wildcards): @@ -133,3 +134,38 @@ def set_last_rule_name(workflow, new_name): workflow._localrules.add(new_name) workflow.check_localrules() + + +def get_svm_file(wildcards, tier, name): + par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + pars_files_overwrite = pars_catalog.get_calib_files( + par_overwrite_file, wildcards.timestamp + ) + for pars_file in pars_files_overwrite: + if name in pars_file: + return os.path.join(par_overwrite_path(setup), tier, pars_file) + raise ValueError(f"Could not find model in {pars_files_overwrite}") + + +def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): + par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + if timestamp is not None: + pars_files_overwrite = pars_catalog.get_calib_files( + par_overwrite_file, timestamp + ) + else: + pars_files_overwrite = pars_catalog.get_calib_files( + par_overwrite_file, wildcards.timestamp + ) + if name is None: + fullname = f"{tier}-overwrite.json" + else: + fullname = f"{tier}_{name}-overwrite.json" + out_files = [] + for pars_file in pars_files_overwrite: + if fullname in pars_file: + out_files.append(os.path.join(par_overwrite_path(setup), tier, pars_file)) + if len(out_files) == 0: + raise ValueError(f"Could not find name in {pars_files_overwrite}") + else: + return out_files From 5149b694674f1372a4692203780ea5e4a5a1733c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 24 Apr 2024 18:06:22 +0200 Subject: [PATCH 093/103] increase number simultaneous jobs --- rules/evt.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/evt.smk b/rules/evt.smk index 2e29306..7454957 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -42,7 +42,7 @@ for tier in ("evt", "pet"): "tier-evt" resources: runtime=300, - mem_swap=70, + mem_swap=50, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/build_evt.py')} " From 5de206ce8f3402f33c9ccd2588bfce22bb5eb296 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 26 Apr 2024 15:37:14 +0200 Subject: [PATCH 094/103] update ac to do top fit --- scripts/pars_hit_ecal.py | 13 ++++++++++--- scripts/pars_pht_partcal.py | 12 ++++++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index e84e51f..b324b62 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -532,7 +532,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): (1592.53, (40, 20), pgf.gauss_on_step), (1620.50, (20, 40), pgf.gauss_on_step), (2103.53, (40, 40), pgf.gauss_on_step), - (2614.50, (60, 60), pgf.hpge_peak), + (2614.553, (60, 60), pgf.hpge_peak), ] glines = [pk_par[0] for pk_par in pk_pars] @@ -570,14 +570,21 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 20 ) - if 2614.50 not in full_object_dict[cal_energy_param].peaks_kev: + if 2614.553 not in full_object_dict[cal_energy_param].peaks_kev: full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, peaks_kev=glines, etol_kev=5 if det_status == "on" else 30, n_sigma=2 ) got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy() + if det_status != "on": + full_object_dict[cal_energy_param].hpge_cal_energy_peak_tops( + e_uncal, + peaks_kev=got_peaks_kev, + update_cal_pars=True, + allowed_p_val=0, + ) full_object_dict[cal_energy_param].hpge_fit_energy_peaks( e_uncal, - peaks_kev=[2614.50], + peaks_kev=[2614.553], peak_pars=pk_pars, tail_weight=kwarg_dict.get("tail_weight", 0), n_events=kwarg_dict.get("n_events", None), diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 73461f4..21a2654 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -308,7 +308,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): (1592.53, (30, 20), pgf.hpge_peak), (1620.50, (20, 30), pgf.hpge_peak), (2103.53, (30, 30), pgf.hpge_peak), - (2614.50, (30, 30), pgf.hpge_peak), + (2614.553, (30, 30), pgf.hpge_peak), (3125, (30, 30), pgf.gauss_on_step), (3198, (30, 30), pgf.gauss_on_step), (3474, (30, 30), pgf.gauss_on_step), @@ -330,11 +330,19 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): energy = data.query(selection_string)[energy_param].to_numpy() full_object_dict[cal_energy_param] = HPGeCalibration( - energy_param, glines, 1, kwarg_dict.get("deg", 0), fixed={1: 1} + energy_param, glines, 1, kwarg_dict.get("deg", 0) # , fixed={1: 1} ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( energy, etol_kev=5 if det_status == "on" else 10 ) + + if det_status != "on": + full_object_dict[cal_energy_param].hpge_cal_energy_peak_tops( + energy, + update_cal_pars=True, + allowed_p_val=0, + ) + full_object_dict[cal_energy_param].hpge_fit_energy_peaks( energy, peak_pars=pk_pars, From b0530263fd7267808378368cfbb73c3f44604a19 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 26 Apr 2024 15:37:32 +0200 Subject: [PATCH 095/103] fix psp merging --- rules/psp.smk | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/rules/psp.smk b/rules/psp.smk index 6591f1b..de08064 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -235,7 +235,7 @@ rule build_pars_psp_objects: check_in_cycle=check_in_cycle, ), group: - "merge-hit" + "merge-psp" shell: "{swenv} python3 -B " f"{basedir}/../scripts/merge_channels.py " @@ -249,7 +249,27 @@ rule build_plts_psp: output: get_pattern_plts(setup, "psp"), group: - "merge-hit" + "merge-psp" + shell: + "{swenv} python3 -B " + f"{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + + +rule build_pars_psp_db: + input: + lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"), + output: + temp( + get_pattern_pars_tmp( + setup, + "psp", + datatype="cal", + ) + ), + group: + "merge-psp" shell: "{swenv} python3 -B " f"{basedir}/../scripts/merge_channels.py " @@ -259,7 +279,14 @@ rule build_plts_psp: rule build_pars_psp: input: - infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"), + in_files=lambda wildcards: read_filelist_pars_cal_channel( + wildcards, "dsp_dplms_lh5" + ), + in_db=get_pattern_pars_tmp( + setup, + "psp", + datatype="cal", + ), plts=get_pattern_plts(setup, "psp"), objects=get_pattern_pars( setup, @@ -269,14 +296,22 @@ rule build_pars_psp: check_in_cycle=check_in_cycle, ), output: - get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle), + out_file=get_pattern_pars( + setup, + "psp", + extension="lh5", + check_in_cycle=check_in_cycle, + ), + out_db=get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle), group: - "merge-hit" + "merge-psp" shell: "{swenv} python3 -B " f"{basedir}/../scripts/merge_channels.py " - "--input {input.infiles} " - "--output {output} " + "--output {output.out_file} " + "--in_db {input.in_db} " + "--out_db {output.out_db} " + "--input {input.in_files} " rule build_psp: From 8ef060e99e5508ce46ffde8133c412d8380f35b6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Apr 2024 13:55:51 +0000 Subject: [PATCH 096/103] style: pre-commit fixes --- Snakefile | 4 ++-- rules/main.smk | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Snakefile b/Snakefile index 67bfaba..3c7d486 100644 --- a/Snakefile +++ b/Snakefile @@ -95,7 +95,7 @@ onstart: onsuccess: from snakemake.report import auto_report - rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" + rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow() , '%Y%m%dT%H%M%SZ')}" pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True) # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html") with open(os.path.join(rep_dir, "dag.txt"), "w") as f: @@ -112,7 +112,7 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists + # remove filelists files = glob.glob(os.path.join(filelist_path(setup), "*")) for file in files: if os.path.isfile(file): diff --git a/rules/main.smk b/rules/main.smk index b67ea46..86d940a 100644 --- a/rules/main.smk +++ b/rules/main.smk @@ -29,10 +29,10 @@ rule autogen_output: gen_output="{label}-{tier}.gen", summary_log=f"{log_path(setup)}/summary-" + "{label}-{tier}" - + f"-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}.log", + + f"-{datetime.strftime(datetime.utcnow() , '%Y%m%dT%H%M%SZ')}.log", warning_log=f"{log_path(setup)}/warning-" + "{label}-{tier}" - + f"-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}.log", + + f"-{datetime.strftime(datetime.utcnow() , '%Y%m%dT%H%M%SZ')}.log", params: log_path=tmp_log_path(setup), tmp_par_path=os.path.join(tmp_par_path(setup), "*_db.json"), From c88b3c8c85925fe13ff1b0649cf724ad28e66e0d Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 28 Apr 2024 00:09:22 +0200 Subject: [PATCH 097/103] update svm rules and increase dsp jobs --- rules/dsp.smk | 4 ++-- rules/psp.smk | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 7617d48..5f4f355 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -215,7 +215,7 @@ rule build_pars_dsp_eopt: rule build_svm_dsp: input: hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_train"), + train_data=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars").replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: @@ -365,7 +365,7 @@ rule build_dsp: "tier-dsp" resources: runtime=300, - mem_swap=50, + mem_swap=40, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/build_dsp.py')} " diff --git a/rules/psp.smk b/rules/psp.smk index de08064..7ec81a2 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -181,7 +181,7 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file(wildcards, "psp", "svm_train"), + train_data=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars").replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: @@ -329,12 +329,12 @@ rule build_psp: tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle), db_file=get_pattern_pars_tmp(setup, "psp_db"), log: - get_pattern_log(setup, "tier_dsp"), + get_pattern_log(setup, "tier_psp"), group: "tier-dsp" resources: runtime=300, - mem_swap=50, + mem_swap=40, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/build_dsp.py')} " From b26896f385ac8710ae4bd91918300869cc277541 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 28 Apr 2024 00:09:37 +0200 Subject: [PATCH 098/103] bugfix for merging --- scripts/merge_channels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index c2698eb..b169d29 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -20,7 +20,7 @@ def replace_path(d, old_path, new_path): d[i] = replace_path(d[i], old_path, new_path) elif isinstance(d, str) and old_path in d: d = d.replace(old_path, new_path) - d = f"$_/{os.path.basename(new_path)}" + d = d.replace(new_path, f"$_/{os.path.basename(new_path)}") return d From 7c4435b3914ac843a46be53ca6fc4e980f1953dc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 28 Apr 2024 00:09:46 +0200 Subject: [PATCH 099/103] update to latest versions --- templates/config.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/config.json b/templates/config.json index 1884061..86091e0 100644 --- a/templates/config.json +++ b/templates/config.json @@ -53,11 +53,11 @@ "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif" }, "pkg_versions": { - "pygama": "pygama==1.6.0", - "pylegendmeta": "pylegendmeta==0.9.0", - "dspeed": "dspeed==1.3.0", - "legend-pydataobj": "legend-pydataobj==1.5.1", - "legend-daq2lh5": "legend-daq2lh5==1.2.0" + "pygama": "pygama==2.0.0a1", + "pylegendmeta": "pylegendmeta==0.10.0", + "dspeed": "dspeed==1.3.0a6", + "legend-pydataobj": "legend-pydataobj==1.6.1", + "legend-daq2lh5": "legend-daq2lh5==1.2.1" } } } From c775e01ee32454475dc4dd7e4e4b9021de311891 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 28 Apr 2024 00:11:56 +0200 Subject: [PATCH 100/103] pc fixes --- rules/dsp.smk | 4 +++- rules/psp.smk | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 5f4f355..3a917b6 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -215,7 +215,9 @@ rule build_pars_dsp_eopt: rule build_svm_dsp: input: hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars").replace("hyperpars.json", "train.lh5"), + train_data=lambda wildcards: get_svm_file( + wildcards, "dsp", "svm_hyperpars" + ).replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: diff --git a/rules/psp.smk b/rules/psp.smk index 7ec81a2..84c3f03 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -181,7 +181,9 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars").replace("hyperpars.json", "train.lh5"), + train_data=lambda wildcards: get_svm_file( + wildcards, "psp", "svm_hyperpars" + ).replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: From 5cd871f06d19bbba490565db57eb6cb990777dd6 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 29 Apr 2024 12:57:28 +0200 Subject: [PATCH 101/103] dsp job increase --- rules/dsp.smk | 2 +- rules/psp.smk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 3a917b6..d44a6db 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -367,7 +367,7 @@ rule build_dsp: "tier-dsp" resources: runtime=300, - mem_swap=40, + mem_swap=25, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/build_dsp.py')} " diff --git a/rules/psp.smk b/rules/psp.smk index 84c3f03..d581107 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -336,7 +336,7 @@ rule build_psp: "tier-dsp" resources: runtime=300, - mem_swap=40, + mem_swap=25, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/build_dsp.py')} " From a049094e66ed338d6ba2378838919562bdd228d6 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 29 Apr 2024 12:59:38 +0200 Subject: [PATCH 102/103] bugfix --- scripts/complete_run.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/scripts/complete_run.py b/scripts/complete_run.py index da65b49..722b244 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -174,16 +174,6 @@ def build_file_dbs(input_files, output_dir): warning_file=snakemake.output.warning_log, ) -if snakemake.wildcards.tier != "daq": - os.makedirs(snakemake.params.filedb_path, exist_ok=True) - with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as w: - json.dump(file_db_config, w, indent=2) - - build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path) - os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) - - build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path) - if os.getenv("PRODENV") in snakemake.params.filedb_path: file_db_config = { "data_dir": "$PRODENV", @@ -275,4 +265,14 @@ def build_file_dbs(input_files, output_dir): }, } +if snakemake.wildcards.tier != "daq": + os.makedirs(snakemake.params.filedb_path, exist_ok=True) + with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as w: + json.dump(file_db_config, w, indent=2) + + build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path) + os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) + + build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path) + pathlib.Path(snakemake.output.gen_output).touch() From 90a38fae4e0041376861d5c9857f1f94737dbcfe Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 29 Apr 2024 13:00:04 +0200 Subject: [PATCH 103/103] increment dspeed version --- templates/config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/config.json b/templates/config.json index 86091e0..5c46803 100644 --- a/templates/config.json +++ b/templates/config.json @@ -55,7 +55,7 @@ "pkg_versions": { "pygama": "pygama==2.0.0a1", "pylegendmeta": "pylegendmeta==0.10.0", - "dspeed": "dspeed==1.3.0a6", + "dspeed": "dspeed==1.4.0a1", "legend-pydataobj": "legend-pydataobj==1.6.1", "legend-daq2lh5": "legend-daq2lh5==1.2.1" }