From cf15210fdca88f0968189a4a69ee8d04a3bf8dda Mon Sep 17 00:00:00 2001
From: Valerio Dandrea <dandrea@lfs1.mpi-hd.mpg.de>
Date: Tue, 28 Nov 2023 12:30:43 +0100
Subject: [PATCH 001/103] first version of dplms script and merging of lh5 par
 files

---
 rules/dsp.smk             |  47 +++++++++++++++++
 scripts/merge_channels.py |  47 +++++++++++++++++
 scripts/pars_dsp_dplms.py | 103 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 197 insertions(+)
 create mode 100644 scripts/pars_dsp_dplms.py

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 428ecd2..969bc3d 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -98,11 +98,52 @@ rule build_pars_dsp_eopt:
         "--final_dsp_pars {output.dsp_pars}"
 
 
+# This rule builds the dplms energy filter for the dsp using fft and cal files
+rule build_pars_dsp_dplms:
+    input:
+        fft_files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
+        ),
+        cal_files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
+        ),
+        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
+        lh5_path=temp(get_pattern_pars_tmp_channel(setup, "dsp", extension="lh5")),
+        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")),
+    log:
+        get_pattern_log_channel(setup, "pars_dsp_dplms"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/pars_dsp_dplms.py')} "
+        "--fft_raw_filelist {input.fft_files}"
+        "--cal_raw_filelist {input.cal_files}"
+        "--database {input.database} "
+        "--configs {configs} "
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--dsp_pars {output.dsp_pars}"
+        "--lh5_path {output.lh5_path}"
+        "--plot_path {output.plots} "
+
+
 rule build_pars_dsp:
     input:
         lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"),
         lambda wildcards: read_filelist_plts_cal_channel(wildcards, "dsp"),
         lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_objects_pkl"),
+        lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"),
     output:
         get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
         get_pattern_pars(
@@ -113,6 +154,12 @@ rule build_pars_dsp:
             check_in_cycle=check_in_cycle,
         ),
         get_pattern_plts(setup, "dsp"),
+        get_pattern_pars(
+            setup,
+            "dsp",
+            extension="lh5",
+            check_in_cycle=check_in_cycle,
+        ),
     group:
         "merge-dsp"
     shell:
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index 1d43e6f..b7d7a59 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -5,6 +5,10 @@
 import pickle as pkl
 import shelve
 
+import lgdo.lh5_store as lh5
+from lgdo import Array
+sto = lh5.LH5Store()
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", nargs="*", type=str)
 argparser.add_argument("--output", help="output file", nargs="*", type=str)
@@ -31,6 +35,14 @@
                     name,
                 ) = os.path.basename(channel).split("-")
                 out_dict[channel_name] = channel_dict
+                
+                for key in channel_dict.keys():
+                    key_dict = channel_dict[key]
+                    for key_pars in key_dict.keys():
+                        if isinstance(key_dict[key_pars], str):
+                            if "loadlh5" in key_dict[key_pars]:
+                                out_lh5 = outfile.replace(".json",".lh5")
+                                out_dict[channel_name][key][key_pars] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')"
             else:
                 pass
 
@@ -86,3 +98,38 @@
                     pass
             if len(common_dict) > 0:
                 shelf["common"] = common_dict
+
+    elif file_extension == ".lh5":
+        for channel in channel_files:
+            if os.path.splitext(channel)[0].split("-")[-1] == processing_step:
+                with open(channel) as r:
+                    channel_dict = json.load(r)
+                (
+                    experiment,
+                    period,
+                    run,
+                    datatype,
+                    timestamp,
+                    channel_name,
+                    name,
+                ) = os.path.basename(channel).split("-")
+                
+                out_dict[channel_name] = channel_dict
+                
+                for key in channel_dict.keys():
+                    key_dict = channel_dict[key]
+                    for key_pars in key_dict.keys():
+                        if isinstance(key_dict[key_pars], str):
+                            if "loadlh5" in key_dict[key_pars]:
+                                path_to_file = key_dict[key_pars].split("'")[1]
+                                path_in_file = key_dict[key_pars].split("'")[3]
+                                data = sto.read_object(path_in_file, path_to_file)[0].nda
+                                sto.write_object(
+                                    Array(data),
+                                    name=key,
+                                    lh5_file=out_file,
+                                    wo_mode="overwrite",
+                                    group=channel_name
+                                )
+            else:
+                pass
\ No newline at end of file
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
new file mode 100644
index 0000000..9f5c0ca
--- /dev/null
+++ b/scripts/pars_dsp_dplms.py
@@ -0,0 +1,103 @@
+from pygama.dsp.utils import numba_defaults
+
+numba_defaults.cache = False
+numba_defaults.boundscheck = True
+
+import argparse
+import json
+import logging
+import os
+import pathlib
+import pickle as pkl
+import time
+
+import pygama.pargen.dplms_ge_dict as pdd
+from legendmeta import LegendMetadata
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
+argparser.add_argument("--cal_raw_filelist", help="cal_raw_filelist", type=str)
+argparser.add_argument("--database", help="database", type=str, required=True)
+argparser.add_argument("--configs", help="configs", type=str, required=True)
+
+argparser.add_argument("--log", help="log_file", type=str)
+
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True)
+argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True)
+argparser.add_argument("--plot_path", help="plot_path", type=str)
+
+args = argparser.parse_args()
+
+logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+logging.getLogger("numba").setLevel(logging.INFO)
+logging.getLogger("parse").setLevel(logging.INFO)
+logging.getLogger("pygama.lgdo.lh5_store").setLevel(logging.INFO)
+logging.getLogger("h5py._conv").setLevel(logging.INFO)
+logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO)
+
+log = logging.getLogger(__name__)
+
+
+t0 = time.time()
+
+conf = LegendMetadata(path=args.configs)
+configs = configs.on(args.timestamp, system=args.datatype)
+dsp_config = config_dict['snakemake_rules']['pars_dsp_dplms']["inputs"]['proc_chain'][args.channel]
+
+dplms_json = config_dict['snakemake_rules']['pars_dsp_dplms']["inputs"]['dplms_pars'][args.channel]
+with open(dplms_json) as r:
+    dplms_dict = json.load(r)
+
+with open(args.database) as t:
+    db_dict = json.load(t)
+
+if opt_dict["run_dplms"] is True:
+    with open(args.fft_raw_filelist) as f:
+        fft_files = f.read().splitlines()
+    with open(args.cal_raw_filelist) as f:
+        cal_files = f.read().splitlines()
+
+    fft_files = sorted(fft_files)
+    cal_files = sorted(cal_files)
+
+    if isinstance(dsp_config, str):
+        with open(dsp_config) as r:
+            dsp_config = json.load(r)
+
+    if args.plot_path:
+        out_dict, plot_dict = pdd.dplms_ge_dict(
+            args.channel,
+            fft_files,
+            cal_files,
+            dsp_config,
+            db_dict,
+            args.lh5_path,
+            dplms_dict,
+            display=1
+        )
+        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
+        with open(args.plot_path, "wb") as f:
+            pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
+    else:
+        out_dict, plot_dict = pdd.dplms_ge_dict(
+            args.channel,
+            fft_files,
+            cal_files,
+            dsp_config,
+            db_dict,
+            args.lh5_path,
+            dplms_dict,
+        )
+
+    t1 = time.time()
+    log.info(f"DPLMS creation finished in {(t1-t0)/60} minutes")
+else:
+    out_dict = {}
+
+pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True)
+with open(args.dsp_pars, "w") as w:
+    json.dump(out_dict, w, indent=2)

From c3d3525155b49f2c911395b98cb95004437a73dc Mon Sep 17 00:00:00 2001
From: Valerio Dandrea <dandrea@lfs1.mpi-hd.mpg.de>
Date: Tue, 28 Nov 2023 12:53:13 +0100
Subject: [PATCH 002/103] style fixes

---
 scripts/merge_channels.py | 30 ++++++++++++++----------------
 scripts/pars_dsp_dplms.py | 17 ++++++++---------
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index b7d7a59..8e97bd6 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -39,10 +39,9 @@
                 for key in channel_dict.keys():
                     key_dict = channel_dict[key]
                     for key_pars in key_dict.keys():
-                        if isinstance(key_dict[key_pars], str):
-                            if "loadlh5" in key_dict[key_pars]:
-                                out_lh5 = outfile.replace(".json",".lh5")
-                                out_dict[channel_name][key][key_pars] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')"
+                        if isinstance(key_dict[key_pars], str) and ("loadlh5" in key_dict[key_pars]):
+                            out_lh5 = out_file.replace(".json",".lh5")
+                            out_dict[channel_name][key][key_pars] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')"
             else:
                 pass
 
@@ -119,17 +118,16 @@
                 for key in channel_dict.keys():
                     key_dict = channel_dict[key]
                     for key_pars in key_dict.keys():
-                        if isinstance(key_dict[key_pars], str):
-                            if "loadlh5" in key_dict[key_pars]:
-                                path_to_file = key_dict[key_pars].split("'")[1]
-                                path_in_file = key_dict[key_pars].split("'")[3]
-                                data = sto.read_object(path_in_file, path_to_file)[0].nda
-                                sto.write_object(
-                                    Array(data),
-                                    name=key,
-                                    lh5_file=out_file,
-                                    wo_mode="overwrite",
-                                    group=channel_name
-                                )
+                        if isinstance(key_dict[key_pars], str) and ("loadlh5" in key_dict[key_pars]):
+                            path_to_file = key_dict[key_pars].split("'")[1]
+                            path_in_file = key_dict[key_pars].split("'")[3]
+                            data = sto.read_object(path_in_file, path_to_file)[0].nda
+                            sto.write_object(
+                                Array(data),
+                                name=key,
+                                lh5_file=out_file,
+                                wo_mode="overwrite",
+                                group=channel_name
+                            )
             else:
                 pass
\ No newline at end of file
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 9f5c0ca..23db5b6 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -1,8 +1,3 @@
-from pygama.dsp.utils import numba_defaults
-
-numba_defaults.cache = False
-numba_defaults.boundscheck = True
-
 import argparse
 import json
 import logging
@@ -11,9 +6,13 @@
 import pickle as pkl
 import time
 
+from pygama.dsp.utils import numba_defaults
 import pygama.pargen.dplms_ge_dict as pdd
 from legendmeta import LegendMetadata
 
+numba_defaults.cache = False
+numba_defaults.boundscheck = True
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
 argparser.add_argument("--cal_raw_filelist", help="cal_raw_filelist", type=str)
@@ -45,17 +44,17 @@
 t0 = time.time()
 
 conf = LegendMetadata(path=args.configs)
-configs = configs.on(args.timestamp, system=args.datatype)
-dsp_config = config_dict['snakemake_rules']['pars_dsp_dplms']["inputs"]['proc_chain'][args.channel]
+configs = conf.on(args.timestamp, system=args.datatype)
+dsp_config = configs['snakemake_rules']['pars_dsp_dplms']["inputs"]['proc_chain'][args.channel]
 
-dplms_json = config_dict['snakemake_rules']['pars_dsp_dplms']["inputs"]['dplms_pars'][args.channel]
+dplms_json = configs['snakemake_rules']['pars_dsp_dplms']["inputs"]['dplms_pars'][args.channel]
 with open(dplms_json) as r:
     dplms_dict = json.load(r)
 
 with open(args.database) as t:
     db_dict = json.load(t)
 
-if opt_dict["run_dplms"] is True:
+if dplms_dict["run_dplms"] is True:
     with open(args.fft_raw_filelist) as f:
         fft_files = f.read().splitlines()
     with open(args.cal_raw_filelist) as f:

From 1825128a0487e96f0f32a2a18ac30c6f6aa0b56a Mon Sep 17 00:00:00 2001
From: valerioda <valerio.dandrea@lngs.infn.it>
Date: Tue, 28 Nov 2023 12:54:27 +0100
Subject: [PATCH 003/103] run pre-commmit

---
 scripts/merge_channels.py | 33 ++++++++++++++++++++-------------
 scripts/pars_dsp_dplms.py |  8 ++++----
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index 8e97bd6..6df04bd 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -7,6 +7,7 @@
 
 import lgdo.lh5_store as lh5
 from lgdo import Array
+
 sto = lh5.LH5Store()
 
 argparser = argparse.ArgumentParser()
@@ -35,13 +36,17 @@
                     name,
                 ) = os.path.basename(channel).split("-")
                 out_dict[channel_name] = channel_dict
-                
-                for key in channel_dict.keys():
+
+                for key in channel_dict:
                     key_dict = channel_dict[key]
-                    for key_pars in key_dict.keys():
-                        if isinstance(key_dict[key_pars], str) and ("loadlh5" in key_dict[key_pars]):
-                            out_lh5 = out_file.replace(".json",".lh5")
-                            out_dict[channel_name][key][key_pars] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')"
+                    for key_pars in key_dict:
+                        if isinstance(key_dict[key_pars], str) and (
+                            "loadlh5" in key_dict[key_pars]
+                        ):
+                            out_lh5 = out_file.replace(".json", ".lh5")
+                            out_dict[channel_name][key][
+                                key_pars
+                            ] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')"
             else:
                 pass
 
@@ -112,13 +117,15 @@
                     channel_name,
                     name,
                 ) = os.path.basename(channel).split("-")
-                
+
                 out_dict[channel_name] = channel_dict
-                
-                for key in channel_dict.keys():
+
+                for key in channel_dict:
                     key_dict = channel_dict[key]
-                    for key_pars in key_dict.keys():
-                        if isinstance(key_dict[key_pars], str) and ("loadlh5" in key_dict[key_pars]):
+                    for key_pars in key_dict:
+                        if isinstance(key_dict[key_pars], str) and (
+                            "loadlh5" in key_dict[key_pars]
+                        ):
                             path_to_file = key_dict[key_pars].split("'")[1]
                             path_in_file = key_dict[key_pars].split("'")[3]
                             data = sto.read_object(path_in_file, path_to_file)[0].nda
@@ -127,7 +134,7 @@
                                 name=key,
                                 lh5_file=out_file,
                                 wo_mode="overwrite",
-                                group=channel_name
+                                group=channel_name,
                             )
             else:
-                pass
\ No newline at end of file
+                pass
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 23db5b6..f4c7296 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -6,9 +6,9 @@
 import pickle as pkl
 import time
 
-from pygama.dsp.utils import numba_defaults
 import pygama.pargen.dplms_ge_dict as pdd
 from legendmeta import LegendMetadata
+from pygama.dsp.utils import numba_defaults
 
 numba_defaults.cache = False
 numba_defaults.boundscheck = True
@@ -45,9 +45,9 @@
 
 conf = LegendMetadata(path=args.configs)
 configs = conf.on(args.timestamp, system=args.datatype)
-dsp_config = configs['snakemake_rules']['pars_dsp_dplms']["inputs"]['proc_chain'][args.channel]
+dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel]
 
-dplms_json = configs['snakemake_rules']['pars_dsp_dplms']["inputs"]['dplms_pars'][args.channel]
+dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel]
 with open(dplms_json) as r:
     dplms_dict = json.load(r)
 
@@ -76,7 +76,7 @@
             db_dict,
             args.lh5_path,
             dplms_dict,
-            display=1
+            display=1,
         )
         pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
         with open(args.plot_path, "wb") as f:

From aa1d48082e2ecf30a2a0cd27a74ae6ba5f28a304 Mon Sep 17 00:00:00 2001
From: valerioda <valerio.dandrea@lngs.infn.it>
Date: Tue, 28 Nov 2023 18:37:48 +0100
Subject: [PATCH 004/103] moved load data out of pargen routine

---
 scripts/pars_dsp_dplms.py | 48 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index f4c7296..131dd1c 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -6,9 +6,15 @@
 import pickle as pkl
 import time
 
+import lgdo.lh5_store as lh5
+import numpy as np
 import pygama.pargen.dplms_ge_dict as pdd
 from legendmeta import LegendMetadata
 from pygama.dsp.utils import numba_defaults
+from pygama.pargen.energy_optimisation import (
+    event_selection,
+    index_data,
+)
 
 numba_defaults.cache = False
 numba_defaults.boundscheck = True
@@ -39,9 +45,7 @@
 logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO)
 
 log = logging.getLogger(__name__)
-
-
-t0 = time.time()
+sto = lh5.LH5Store()
 
 conf = LegendMetadata(path=args.configs)
 configs = conf.on(args.timestamp, system=args.datatype)
@@ -63,6 +67,33 @@
     fft_files = sorted(fft_files)
     cal_files = sorted(cal_files)
 
+    t0 = time.time()
+    log.info("\nLoad fft data")
+    energies = sto.read_object(f"{args.channel}/raw/daqenergy", fft_files)[0]
+    idxs = np.where(energies.nda == 0)[0]
+    raw_fft = sto.read_object(
+        f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs
+    )[0]
+    t1 = time.time()
+    log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}")
+
+    log.info("\nRunning event selection")
+    peaks_keV = np.array(dplms_dict["peaks_keV"])
+    kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
+    raw_cal, idx_list = event_selection(
+        cal_files,
+        f"{args.channel}/raw",
+        dsp_config,
+        db_dict[args.channel],
+        peaks_keV,
+        np.arange(0, len(peaks_keV), 1).tolist(),
+        kev_widths,
+        cut_parameters=dplms_dict["wfs_cut_pars"],
+        n_events=dplms_dict["n_signals"],
+    )
+    raw_cal = index_data(raw_cal, idx_list[-1])
+    log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}")
+
     if isinstance(dsp_config, str):
         with open(dsp_config) as r:
             dsp_config = json.load(r)
@@ -70,8 +101,8 @@
     if args.plot_path:
         out_dict, plot_dict = pdd.dplms_ge_dict(
             args.channel,
-            fft_files,
-            cal_files,
+            raw_fft,
+            raw_cal,
             dsp_config,
             db_dict,
             args.lh5_path,
@@ -84,16 +115,15 @@
     else:
         out_dict, plot_dict = pdd.dplms_ge_dict(
             args.channel,
-            fft_files,
-            cal_files,
+            raw_fft,
+            raw_cal,
             dsp_config,
             db_dict,
             args.lh5_path,
             dplms_dict,
         )
 
-    t1 = time.time()
-    log.info(f"DPLMS creation finished in {(t1-t0)/60} minutes")
+    log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes")
 else:
     out_dict = {}
 

From f52d125d0690dc0889a24144a093ea6177f2bb6f Mon Sep 17 00:00:00 2001
From: valerioda <valerio.dandrea@lngs.infn.it>
Date: Tue, 16 Jan 2024 18:28:06 +0100
Subject: [PATCH 005/103] modification to account for lgdo changes

---
 scripts/pars_dsp_dplms.py | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 131dd1c..524397c 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -6,18 +6,16 @@
 import pickle as pkl
 import time
 
+os.environ["LGDO_CACHE"] = "false"
+os.environ["LGDO_BOUNDSCHECK"] = "false"
+os.environ["DSPEED_CACHE"] = "false"
+os.environ["DSPEED_BOUNDSCHECK"] = "false"
+
 import lgdo.lh5_store as lh5
 import numpy as np
-import pygama.pargen.dplms_ge_dict as pdd
 from legendmeta import LegendMetadata
-from pygama.dsp.utils import numba_defaults
-from pygama.pargen.energy_optimisation import (
-    event_selection,
-    index_data,
-)
-
-numba_defaults.cache = False
-numba_defaults.boundscheck = True
+from pygama.pargen.dplms_ge_dict import dplms_ge_dict
+from pygama.pargen.energy_optimisation import event_selection
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
@@ -40,8 +38,9 @@
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("pygama.lgdo.lh5_store").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
+logging.getLogger("lgdo").setLevel(logging.INFO)
+logging.getLogger("h5py").setLevel(logging.INFO)
+logging.getLogger("matplotlib").setLevel(logging.INFO)
 logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO)
 
 log = logging.getLogger(__name__)
@@ -69,9 +68,9 @@
 
     t0 = time.time()
     log.info("\nLoad fft data")
-    energies = sto.read_object(f"{args.channel}/raw/daqenergy", fft_files)[0]
+    energies = sto.read(f"{args.channel}/raw/daqenergy", fft_files)[0]
     idxs = np.where(energies.nda == 0)[0]
-    raw_fft = sto.read_object(
+    raw_fft = sto.read(
         f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs
     )[0]
     t1 = time.time()
@@ -80,7 +79,7 @@
     log.info("\nRunning event selection")
     peaks_keV = np.array(dplms_dict["peaks_keV"])
     kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
-    raw_cal, idx_list = event_selection(
+    idx_events, idx_list = event_selection(
         cal_files,
         f"{args.channel}/raw",
         dsp_config,
@@ -91,7 +90,11 @@
         cut_parameters=dplms_dict["wfs_cut_pars"],
         n_events=dplms_dict["n_signals"],
     )
-    raw_cal = index_data(raw_cal, idx_list[-1])
+    raw_cal = sto.read(
+        f"{args.channel}/raw",
+        cal_files,
+        idx=idx_events,
+    )[0]
     log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}")
 
     if isinstance(dsp_config, str):
@@ -99,7 +102,7 @@
             dsp_config = json.load(r)
 
     if args.plot_path:
-        out_dict, plot_dict = pdd.dplms_ge_dict(
+        out_dict, plot_dict = dplms_ge_dict(
             args.channel,
             raw_fft,
             raw_cal,
@@ -113,7 +116,7 @@
         with open(args.plot_path, "wb") as f:
             pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
     else:
-        out_dict, plot_dict = pdd.dplms_ge_dict(
+        out_dict, plot_dict = dplms_ge_dict(
             args.channel,
             raw_fft,
             raw_cal,

From 9b851cce5008f32cd2045cbee7312f946fc5a497 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 20 Feb 2024 19:21:43 +0100
Subject: [PATCH 006/103] changes for upgrades to optimisation

---
 scripts/pars_dsp_eopt.py | 46 ++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 9f39691..06b4ebd 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -5,22 +5,26 @@
 import pathlib
 import pickle as pkl
 import time
+import warnings
 
 os.environ["LGDO_CACHE"] = "false"
 os.environ["LGDO_BOUNDSCHECK"] = "false"
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
 
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
 import numpy as np
 import pygama.math.peak_fitting as pgf
 import pygama.pargen.energy_optimisation as om
 import sklearn.gaussian_process.kernels as ker
+from dspeed.units import unit_registry as ureg
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.utils import get_tcm_pulser_ids
 
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
 argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
@@ -161,7 +165,7 @@
         wf_field=opt_dict["wf_field"],
     )
 
-    tb_data = sto.read_object(
+    tb_data = sto.read(
         f"{args.channel}/raw",
         raw_files,
         idx=idx_events,
@@ -172,12 +176,14 @@
     log.info(f"Data Loaded in {(t1-t0)/60} minutes")
 
     if isinstance(dsp_config, str):
-        with open(dsp_config) as r:
-            dsp_config = json.load(r)
+        dsp_config = Props.read_from(dsp_config)
+
+    dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"]
 
     init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
     full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]]
     flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10
+
     if flat_val < 1.0:
         flat_val = 1.0
     elif flat_val > 4:
@@ -291,23 +297,37 @@
         + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1))
     )
 
+    lambda_param = 5
+    sampling_rate = tb_data["waveform_presummed"]["dt"][0]
+    sampling_unit = ureg.Quantity(tb_data["waveform_presummed"]["dt"].attrs["units"])
+    waveform_sampling = sampling_rate * sampling_unit
+
     bopt_cusp = om.BayesianOptimizer(
-        acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel
+        acq_func=opt_dict["acq_func"],
+        batch_size=opt_dict["batch_size"],
+        kernel=kernel,
+        sampling_rate=waveform_sampling,
     )
-    bopt_cusp.lambda_param = 1
-    bopt_cusp.add_dimension("cusp", "sigma", 1, 16, 2, "us")
+    bopt_cusp.lambda_param = lambda_param
+    bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us")
 
     bopt_zac = om.BayesianOptimizer(
-        acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel
+        acq_func=opt_dict["acq_func"],
+        batch_size=opt_dict["batch_size"],
+        kernel=kernel,
+        sampling_rate=waveform_sampling,
     )
-    bopt_zac.lambda_param = 1
-    bopt_zac.add_dimension("zac", "sigma", 1, 16, 2, "us")
+    bopt_zac.lambda_param = lambda_param
+    bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us")
 
     bopt_trap = om.BayesianOptimizer(
-        acq_func=opt_dict["acq_func"], batch_size=opt_dict["batch_size"], kernel=kernel
+        acq_func=opt_dict["acq_func"],
+        batch_size=opt_dict["batch_size"],
+        kernel=kernel,
+        sampling_rate=waveform_sampling,
     )
-    bopt_trap.lambda_param = 1
-    bopt_trap.add_dimension("etrap", "rise", 1, 12, 2, "us")
+    bopt_trap.lambda_param = lambda_param
+    bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us")
 
     bopt_cusp.add_initial_values(x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp)
     bopt_zac.add_initial_values(x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac)

From d73d49c3626c253e26697e6ad4ef4b0112f11fcb Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:27:38 +0100
Subject: [PATCH 007/103] add legendmeta to info for logs

---
 scripts/pars_hit_aoe.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index b4fad6d..6017f79 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -99,6 +99,7 @@ def aoe_calibration(
 logging.getLogger("lgdo").setLevel(logging.INFO)
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 configs = LegendMetadata(path=args.configs)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][

From 7a0f6d11afe738fa6e11b081f2f0a0980b01e799 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:27:57 +0100
Subject: [PATCH 008/103] first changes for new ecal

---
 scripts/pars_hit_ecal.py | 135 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 130 insertions(+), 5 deletions(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 169ed35..ac92032 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -125,6 +125,70 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None):
             plot_dict[key] = item["function"](data)
     return plot_dict
 
+def get_results_dict(ecal_class, data):
+    if np.isnan(ecal_class.pars).all():
+        return {}
+    else:
+        fwhm_linear = ecal_class.fwhm_fit_linear.copy()
+        fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict()
+        fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict()
+        fwhm_linear["cov"] = fwhm_linear["cov"].tolist()
+        fwhm_quad = ecal_class.fwhm_fit_quadratic.copy()
+        fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict()
+        fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict()
+        fwhm_quad["cov"] = fwhm_quad["cov"].tolist()
+
+        pk_dict = {
+            Ei: {
+                "function": func_i.__name__,
+                "module": func_i.__module__,
+                "parameters_in_ADC": parsi.to_dict(),
+                "uncertainties_in_ADC": errorsi.to_dict(),
+                "p_val": pvali,
+                "fwhm_in_keV": list(fwhmi),
+                "pk_position":(posi, posuni),
+            }
+            for i, (Ei, parsi, errorsi, pvali, fwhmi, posi, posuni, func_i) in enumerate(
+                zip(
+                    ecal_class.results["fitted_keV"],
+                    ecal_class.results["pk_pars"][ecal_class.results["pk_validities"]],
+                    ecal_class.results["pk_errors"][ecal_class.results["pk_validities"]],
+                    ecal_class.results["pk_pvals"][ecal_class.results["pk_validities"]],
+                    ecal_class.results["pk_fwhms"],
+                    ecal_class.results["pk_pos"],
+                    ecal_class.results["pk_pos_uncertainties"],
+                    ecal_class.funcs,
+                )
+            )
+        }
+
+        return {
+            "total_fep": len(
+                data.query(
+                    f"{ecal_class.cal_energy_param}>2604&{ecal_class.cal_energy_param}<2624"
+                )
+            ),
+            "total_dep": len(
+                data.query(
+                    f"{ecal_class.cal_energy_param}>1587&{ecal_class.cal_energy_param}<1597"
+                )
+            ),
+            "pass_fep": len(
+                data.query(
+                    f"{ecal_class.cal_energy_param}>2604&{ecal_class.cal_energy_param}<2624&{ecal_class.selection_string}"
+                )
+            ),
+            "pass_dep": len(
+                data.query(
+                    f"{ecal_class.cal_energy_param}>1587&{ecal_class.cal_energy_param}<1597&{ecal_class.selection_string}"
+                )
+            ),
+            "eres_linear": fwhm_linear,
+            "eres_quadratic": fwhm_quad,
+            "fitted_peaks": ecal_class.results["fitted_keV"].tolist(),
+            "pk_fits": pk_dict,
+            "mode":ecal_class.results["mode"],
+        }
 
 def energy_cal_th(
     data: pd.Dataframe,
@@ -153,12 +217,58 @@ def energy_cal_th(
     if cal_energy_params is None:
         cal_energy_params = [energy_param + "_cal" for energy_param in energy_params]
 
+
+    glines = [
+        # 238.632,
+        583.191,
+        727.330,
+        860.564,
+        1592.53,
+        1620.50,
+        2103.53,
+        2614.50,
+    ]  # gamma lines used for calibration
+    range_keV = [
+        # (8, 8),
+        (20, 20),
+        (30, 30),
+        (30, 30),
+        (40, 20),
+        (20, 40),
+        (40, 40),
+        (60, 60),
+    ]  # side bands width
+    funcs = [
+        # pgf.extended_gauss_step_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+    ]
+    gof_funcs = [
+        # pgf.gauss_step_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+    ]
+
     results_dict = {}
     plot_dict = {}
     full_object_dict = {}
     for energy_param, cal_energy_param in zip(energy_params, cal_energy_params):
         full_object_dict[cal_energy_param] = calibrate_parameter(
             energy_param,
+            glines,
+            range_keV,
+            funcs,
+            gof_funcs,
             selection_string,
             plot_options,
             guess_keV,
@@ -168,9 +278,10 @@ def energy_cal_th(
             simplex,
             deg,
             tail_weight=tail_weight,
+            cal_energy_param=cal_energy_param,
         )
         full_object_dict[cal_energy_param].calibrate_parameter(data)
-        results_dict[cal_energy_param] = full_object_dict[cal_energy_param].get_results_dict(data)
+        results_dict[cal_energy_param] = get_results_dict(full_object_dict[cal_energy_param], data)
         hit_dict.update(full_object_dict[cal_energy_param].hit_dict)
         if ~np.isnan(full_object_dict[cal_energy_param].pars).all():
             plot_dict[cal_energy_param] = (
@@ -186,6 +297,8 @@ def energy_cal_th(
     argparser.add_argument("--files", help="files", nargs="*", type=str)
     argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
     argparser.add_argument("--ctc_dict", help="ctc_dict", nargs="*")
+    argparser.add_argument("--in_hit_dict", help="in_hit_dict", nargs="*", required=False)
+    argparser.add_argument("--inplot_dict", help="inplot_dict", nargs="*", required=False)
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -206,10 +319,14 @@ def energy_cal_th(
     logging.getLogger("lgdo").setLevel(logging.INFO)
     logging.getLogger("h5py").setLevel(logging.INFO)
     logging.getLogger("matplotlib").setLevel(logging.INFO)
+    logging.getLogger("legendmeta").setLevel(logging.INFO)
+
+    if args.in_hit_dict:
+        hit_dict = Props.read_from(args.in_hit_dict)
 
     database_dic = Props.read_from(args.ctc_dict)
 
-    hit_dict = database_dic[args.channel]["ctc_params"]
+    hit_dict = hit_dict.update(database_dic[args.channel]["ctc_params"])
 
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
@@ -274,8 +391,6 @@ def energy_cal_th(
                 plot_item = common_dict.pop(plot)
                 plot_dict.update({plot: plot_item})
 
-        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-
         for key, item in plot_dict.items():
             if isinstance(item, dict) and len(item) > 0:
                 param_dict = {}
@@ -283,8 +398,18 @@ def energy_cal_th(
                     if plot in item:
                         param_dict.update({plot: item[plot]})
                 common_dict.update({key: param_dict})
-        plot_dict["common"] = common_dict
 
+        if args.inplot_dict:
+            with open(args.inplot_dict, "rb") as f:
+                total_plot_dict = pkl.load(args.inplot_dict, protocol=pkl.HIGHEST_PROTOCOL)
+            if "common" in total_plot_dict:
+                total_plot_dict["common"].update(common_dict)
+            else:
+                plot_dict["common"] = common_dict
+
+            total_plot_dict = total_plot_dict.update(plot_dict)
+
+        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
         with open(args.plot_path, "wb") as f:
             pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 

From 5d5f075f370515f9c9c3a51e854823b593f8693c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:30:38 +0100
Subject: [PATCH 009/103] add dplms

---
 rules/dsp.smk             | 152 ++++++++++++++++++++++++++------------
 scripts/pars_dsp_dplms.py |  73 ++++++++++++------
 2 files changed, 154 insertions(+), 71 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 49caaa2..002496f 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -92,98 +92,111 @@ rule build_pars_dsp_nopt:
         "--raw_filelist {input.files}"
 
 
-# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
-rule build_pars_dsp_eopt:
+# This rule builds the dplms energy filter for the dsp using fft and cal files
+rule build_pars_dsp_dplms:
     input:
-        files=os.path.join(
+        fft_files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
+        ),
+        cal_files=os.path.join(
             filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
         ),
-        tcm_filelist=os.path.join(
+        tcm_files=os.path.join(
             filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
         ),
-        decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"),
+        database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"),
         inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
-        qbb_grid=temp(
-            get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl")
-        ),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")),
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp",'dplms')),
+        lh5_path=temp(get_pattern_pars_tmp_channel(setup, "dsp","dplms",extension="lh5")),
+        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_eopt"),
+        get_pattern_log_channel(setup, "pars_dsp_dplms"),
     group:
         "par-dsp"
     resources:
         runtime=300,
     shell:
         "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/pars_dsp_eopt.py')} "
-        "--log {log} "
+        f"{workflow.source_path('../scripts/pars_dsp_dplms.py')} "
+        "--fft_raw_filelist {input.fft_files} "
+        "--cal_raw_filelist {input.cal_files} "
+        "--tcm_filelist {input.tcm_files} "
+        "--database {input.database} "
+        "--inplots {input.inplots} "
         "--configs {configs} "
+        "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--raw_filelist {input.files} "
-        "--tcm_filelist {input.tcm_filelist} "
-        "--inplots {input.inplots} "
-        "--decay_const {input.decay_const} "
+        "--dsp_pars {output.dsp_pars} "
+        "--lh5_path {output.lh5_path} "
         "--plot_path {output.plots} "
-        "--qbb_grid_path {output.qbb_grid} "
-        "--final_dsp_pars {output.dsp_pars}"
 
-
-# This rule builds the dplms energy filter for the dsp using fft and cal files
-rule build_pars_dsp_dplms:
+# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
+rule build_pars_dsp_eopt:
     input:
-        fft_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
-        ),
-        cal_files=os.path.join(
+        files=os.path.join(
             filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
         ),
-        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
+        tcm_filelist=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
+        ),
+        decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"),
+        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
         dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
-        lh5_path=temp(get_pattern_pars_tmp_channel(setup, "dsp", extension="lh5")),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")),
+        qbb_grid=temp(
+            get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl")
+        ),
+        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_dplms"),
+        get_pattern_log_channel(setup, "pars_dsp_eopt"),
     group:
         "par-dsp"
     resources:
         runtime=300,
     shell:
         "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/pars_dsp_dplms.py')} "
-        "--fft_raw_filelist {input.fft_files}"
-        "--cal_raw_filelist {input.cal_files}"
-        "--database {input.database} "
-        "--configs {configs} "
+        f"{workflow.source_path('../scripts/pars_dsp_eopt.py')} "
         "--log {log} "
+        "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--dsp_pars {output.dsp_pars}"
-        "--lh5_path {output.lh5_path}"
+        "--raw_filelist {input.files} "
+        "--tcm_filelist {input.tcm_filelist} "
+        "--inplots {input.inplots} "
+        "--decay_const {input.decay_const} "
         "--plot_path {output.plots} "
+        "--qbb_grid_path {output.qbb_grid} "
+        "--final_dsp_pars {output.dsp_pars}"
 
-
-rule build_pars_dsp:
+rule build_plts_dsp:
     input:
-        lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"),
         lambda wildcards: read_filelist_plts_cal_channel(wildcards, "dsp"),
+    output:
+        get_pattern_plts(setup, "dsp"),
+    group:
+        "merge-dsp"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+rule build_pars_dsp_objects:
+    input:
         lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_objects_pkl"),
-        lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"),
     output:
-        get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
         get_pattern_pars(
             setup,
             "dsp",
@@ -191,26 +204,69 @@ rule build_pars_dsp:
             extension="dir",
             check_in_cycle=check_in_cycle,
         ),
-        get_pattern_plts(setup, "dsp"),
-        get_pattern_pars(
+    group:
+        "merge-dsp"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+rule build_pars_dsp_db:
+    input:
+        lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"),
+    output:
+        temp(get_pattern_pars_tmp(
+            setup,
+            "dsp",
+            datatype="cal",
+        )),
+    group:
+        "merge-dsp"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+rule build_pars_dsp:
+    input:
+        in_files = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_dplms_lh5"),
+        in_db = get_pattern_pars_tmp(
+            setup,
+            "dsp",
+            datatype="cal",
+        ),
+        plts = get_pattern_plts(setup, "dsp"),
+        objects = get_pattern_pars(
+            setup,
+            "dsp",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        ),
+    output:
+        out_file = get_pattern_pars(
             setup,
             "dsp",
             extension="lh5",
             check_in_cycle=check_in_cycle,
         ),
+        out_db = get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
     group:
         "merge-dsp"
     shell:
         "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/merge_channels.py')} "
-        "--input {input} "
-        "--output {output} "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--output {output.out_file} "
+        "--in_db {input.in_db} "
+        "--out_db {output.out_db} "
+        "--input {input.in_files} "
 
 
 rule build_dsp:
     input:
         raw_file=get_pattern_tier_raw(setup),
-        tcm_file=get_pattern_tier_tcm(setup),
         pars_file=ancient(
             lambda wildcards: pars_catalog.get_par_file(
                 setup, wildcards.timestamp, "dsp"
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 524397c..52bb811 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -11,19 +11,24 @@
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
 
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
 import numpy as np
 from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
 from pygama.pargen.energy_optimisation import event_selection
+from pygama.pargen.utils import get_tcm_pulser_ids
+from lgdo import Array, Table
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
 argparser.add_argument("--cal_raw_filelist", help="cal_raw_filelist", type=str)
-argparser.add_argument("--database", help="database", type=str, required=True)
-argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+argparser.add_argument("--inplots", help="in_plot_path", type=str)
 
 argparser.add_argument("--log", help="log_file", type=str)
+argparser.add_argument("--database", help="database", type=str, required=True)
+argparser.add_argument("--configs", help="configs", type=str, required=True)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
@@ -41,7 +46,8 @@
 logging.getLogger("lgdo").setLevel(logging.INFO)
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO)
+logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
@@ -51,20 +57,15 @@
 dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel]
 
 dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel]
-with open(dplms_json) as r:
-    dplms_dict = json.load(r)
+dplms_dict = Props.read_from(dplms_json) 
 
-with open(args.database) as t:
-    db_dict = json.load(t)
+db_dict = Props.read_from(args.database)
 
 if dplms_dict["run_dplms"] is True:
     with open(args.fft_raw_filelist) as f:
-        fft_files = f.read().splitlines()
+        fft_files = sorted(f.read().splitlines())
     with open(args.cal_raw_filelist) as f:
-        cal_files = f.read().splitlines()
-
-    fft_files = sorted(fft_files)
-    cal_files = sorted(cal_files)
+        cal_files = sorted(f.read().splitlines())
 
     t0 = time.time()
     log.info("\nLoad fft data")
@@ -76,6 +77,15 @@
     t1 = time.time()
     log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}")
 
+    log.info("\nRemoving pulser")
+    # get pulser mask from tcm files
+    with open(args.tcm_filelist) as f:
+        tcm_files = f.read().splitlines()
+    tcm_files = sorted(np.unique(tcm_files))
+    ids, mask = get_tcm_pulser_ids(
+        tcm_files, args.channel, dplms_dict.pop("pulser_multiplicity_threshold")
+    )
+
     log.info("\nRunning event selection")
     peaks_keV = np.array(dplms_dict["peaks_keV"])
     kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
@@ -83,12 +93,14 @@
         cal_files,
         f"{args.channel}/raw",
         dsp_config,
-        db_dict[args.channel],
+        db_dict,
         peaks_keV,
         np.arange(0, len(peaks_keV), 1).tolist(),
         kev_widths,
+        pulser_mask=mask,
         cut_parameters=dplms_dict["wfs_cut_pars"],
         n_events=dplms_dict["n_signals"],
+        threshold=dplms_dict["threshold"],
     )
     raw_cal = sto.read(
         f"{args.channel}/raw",
@@ -97,39 +109,54 @@
     )[0]
     log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}")
 
-    if isinstance(dsp_config, str):
-        with open(dsp_config) as r:
-            dsp_config = json.load(r)
+    if isinstance(dsp_config, (str, list)):
+        dsp_config = Props.read_from(dsp_config)
 
     if args.plot_path:
         out_dict, plot_dict = dplms_ge_dict(
-            args.channel,
             raw_fft,
             raw_cal,
             dsp_config,
             db_dict,
-            args.lh5_path,
             dplms_dict,
             display=1,
         )
+        if args.inplots:
+            with open(args.inplots, "rb") as r:
+                inplot_dict = pkl.load(r)
+            inplot_dict.update({"dplms":plot_dict})
+        
         pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
         with open(args.plot_path, "wb") as f:
             pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
     else:
-        out_dict, plot_dict = dplms_ge_dict(
-            args.channel,
+        out_dict = dplms_ge_dict(
             raw_fft,
             raw_cal,
             dsp_config,
             db_dict,
-            args.lh5_path,
             dplms_dict,
         )
 
+    coeffs = out_dict["dplms"].pop("coefficients") 
+    dplms_pars = Table(col_dict={"coefficients":Array(coeffs)})
+    out_dict["dplms"]["coefficients"] =f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')"
+
     log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes")
 else:
     out_dict = {}
+    dplms_pars = Table(col_dict={"coefficients":Array([])})
+
+db_dict.update(out_dict)
+
+pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) 
+sto.write(
+    Table(col_dict={"dplms":dplms_pars}),
+    name = args.channel,
+    lh5_file=args.lh5_path,
+    wo_mode="overwrite"
+)
 
 pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True)
 with open(args.dsp_pars, "w") as w:
-    json.dump(out_dict, w, indent=2)
+    json.dump(db_dict, w, indent=2)

From 41d45c07b62e76573bd93e6c69ace58beaf67bc3 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:31:02 +0100
Subject: [PATCH 010/103] split out merging into separate rules

---
 rules/pht.smk | 56 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 11 deletions(-)

diff --git a/rules/pht.smk b/rules/pht.smk
index 71f9acd..f375fe6 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -7,6 +7,7 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 """
 
 from scripts.util.pars_loading import pars_catalog
+import scripts.util.create_pars_keylist import pars_key_resolve
 from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
@@ -21,6 +22,13 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
+ds.pars_key_resolve.write_par_catalog(
+    ["-*-*-*-cal"],
+    os.path.join(pars_path(setup), "pht", "validity.jsonl"),
+    get_pattern_tier_raw(setup),
+    {"cal": ["par_pht"], "lar": ["par_pht"]},
+)
+
 
 # This rule builds the energy calibration using the calibration dsp files
 rule build_per_energy_calibration:
@@ -69,33 +77,62 @@ rule build_per_energy_calibration:
         "--tcm_filelist {input.tcm_filelist} "
         "--files {input.files}"
 
-
-rule build_pars_pht:
+rule build_pars_pht_objects:
     input:
-        lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"),
-        lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht"),
         lambda wildcards: read_filelist_pars_cal_channel(
             wildcards,
             "pht_objects_pkl",
         ),
     output:
-        get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle),
         get_pattern_pars(
             setup,
             "pht",
             name="objects",
             extension="dir",
             check_in_cycle=check_in_cycle,
-        ),
-        get_pattern_plts(setup, "pht"),
+        )
     group:
         "merge-hit"
     shell:
         "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/merge_channels.py')} "
+        f"{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
 
+rule build_plts_pht:
+    input:
+        lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht"),
+    output:
+        get_pattern_plts(setup, "pht")
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+rule build_pars_pht:
+    input:
+        infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"),
+        plts = get_pattern_plts(setup, "pht"),
+        objects = get_pattern_pars(
+            setup,
+            "pht",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        )
+    output:
+        get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle),
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input.infiles} "
+        "--output {output} "
+
 
 rule build_pht:
     input:
@@ -134,9 +171,6 @@ rule build_pht:
 part_pht_rules = {}
 for key, dataset in part.datasets.items():
     for partition in dataset.keys():
-        print(
-            part.get_wildcard_constraints(partition, key),
-        )
 
         rule:
             input:

From fd67c2f0ce271ba67b70d0756f714fadad54cf4e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:31:24 +0100
Subject: [PATCH 011/103] first draft psp

---
 rules/psp.smk      | 256 +++++++++++++++++++++++++++++++++++++++++++++
 scripts/par_psp.py | 100 ++++++++++++++++++
 2 files changed, 356 insertions(+)
 create mode 100644 rules/psp.smk
 create mode 100644 scripts/par_psp.py

diff --git a/rules/psp.smk b/rules/psp.smk
new file mode 100644
index 0000000..811893e
--- /dev/null
+++ b/rules/psp.smk
@@ -0,0 +1,256 @@
+"""
+Snakemake rules for processing pht (partition hit) tier data. This is done in 4 steps:
+- extraction of calibration curves(s) for each run for each channel from cal data
+- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data
+- combining of all channels into single pars files with associated plot and results files
+- running build hit over all channels using par file
+"""
+
+from scripts.util.pars_loading import pars_catalog
+import scripts.util.create_pars_keylist import pars_key_resolve
+from scripts.util.utils import par_psp_path, set_last_rule_name
+from scripts.util.patterns import (
+    get_pattern_pars_tmp_channel,
+    get_pattern_plts_tmp_channel,
+    get_pattern_log_channel,
+    get_pattern_plts,
+    get_pattern_tier,
+    get_pattern_pars_tmp,
+    get_pattern_log,
+    get_pattern_pars,
+)
+
+pars_key_resolve.write_par_catalog(
+    ["-*-*-*-cal"],
+    os.path.join(pars_path(setup), "psp", "validity.jsonl"),
+    get_pattern_tier_raw(setup),
+    {"cal": ["par_psp"], "lar": ["par_psp"]},
+)
+
+part_pht_rules = {}
+for key, dataset in part.datasets.items():
+    for partition in dataset.keys():
+
+        rule:
+            input:
+                dsp_pars=part.get_par_files(
+                    f"{par_psp_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    tier="dsp"
+                ),
+                dsp_objs=part.get_par_files(
+                    f"{par_psp_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    tier="dsp",
+                    name="objects",
+                    extension="pkl",
+                ),
+                dsp_plots=part.get_plt_files(
+                    f"{par_psp_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    tier="dsp"
+                ),
+            wildcard_constraints:
+                channel=part.get_wildcard_constraints(partition, key),
+            params:
+                datatype="cal",
+                channel="{channel}" if key == "default" else key,
+                timestamp=part.get_timestamp(
+                    f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp"
+                ),
+            output:
+                psp_pars=part.get_par_files(
+                    f"{par_psp_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    tier="psp"
+                ),
+                psp_objs=part.get_par_files(
+                    f"{par_psp_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    tier="psp",
+                    name="objects",
+                    extension="pkl",
+                ),
+                psp_plots=part.get_plt_files(
+                    f"{par_psp_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    tier="psp"
+                ),
+            log:
+                part.get_log_file(
+                    f"{par_psp_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    "psp",
+                    name="par_psp",
+                ),
+            group:
+                "par-psp"
+            resources:
+                runtime=300,
+            shell:
+                "{swenv} python3 -B "
+                f"{basedir}/../scripts/par_psp.py "
+                "--log {log} "
+                "--configs {configs} "
+                "--datatype {params.datatype} "
+                "--timestamp {params.timestamp} "
+                "--channel {params.channel} "
+                "--in_plots {input.dsp_plots} "
+                "--out_plots {input.psp_plots} "
+                "--in_obj {input.dsp_objs} "
+                "--out_obj {input.psp_objs} "
+                "--input {input.plot_files} "
+                "--output {input.dsp_plots} "
+
+        set_last_rule_name(
+            workflow, f"{key}-{partition}-build_par_psp"
+        )
+
+        if key in part_pht_rules:
+            part_pht_rules[key].append(list(workflow.rules)[-1])
+        else:
+            part_pht_rules[key] = [list(workflow.rules)[-1]]
+
+
+# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
+# This rule builds the a/e calibration using the calibration dsp files for the whole partition
+rule build_psp:
+    input:
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
+        dsp_objs=temp(
+            get_pattern_pars_tmp_channel(
+                setup, "dsp", "objects", extension="pkl"
+            )
+        ),
+        dsp_plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")),
+    params:
+        datatype="cal",
+        channel="{channel}",
+        timestamp="{timestamp}",
+    output:
+        psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
+        psp_objs=temp(
+            get_pattern_pars_tmp_channel(
+                setup, "psp", "objects", extension="pkl"
+            )
+        ),
+        psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")),
+    log:
+        get_pattern_log_channel(setup, "pars_psp"),
+    group:
+        "par-psp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/par_psp.py "
+        "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--in_plots {input.dsp_plots} "
+        "--out_plots {input.psp_plots} "
+        "--in_obj {input.dsp_objs} "
+        "--out_obj {input.psp_objs} "
+        "--input {input.plot_files} "
+        "--output {input.dsp_plots} "
+        
+
+
+rule build_pars_psp_objects:
+    input:
+        lambda wildcards: read_filelist_pars_cal_channel(
+            wildcards,
+            "psp_objects_pkl",
+        ),
+    output:
+        get_pattern_pars(
+            setup,
+            "psp",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        )
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+rule build_plts_pht:
+    input:
+        lambda wildcards: read_filelist_plts_cal_channel(wildcards, "psp"),
+    output:
+        get_pattern_plts(setup, "psp")
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+rule build_pars_pht:
+    input:
+        infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"),
+        plts = get_pattern_plts(setup, "psp"),
+        objects = get_pattern_pars(
+            setup,
+            "psp",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        )
+    output:
+        get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle),
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input.infiles} "
+        "--output {output} "
+
+
+rule build_psp:
+    input:
+        raw_file=get_pattern_tier_raw(setup),
+        pars_file=ancient(
+            lambda wildcards: pars_catalog.get_par_file(
+                setup, wildcards.timestamp, "psp"
+            )
+        ),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+    output:
+        tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(setup, "psp_db"),
+    log:
+        get_pattern_log(setup, "tier_dsp"),
+    group:
+        "tier-dsp"
+    resources:
+        runtime=300,
+        mem_swap=50,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/build_dsp.py')} "
+        "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--input {input.raw_file} "
+        "--output {output.tier_file} "
+        "--db_file {output.db_file} "
+        "--pars_file {input.pars_file}"
\ No newline at end of file
diff --git a/scripts/par_psp.py b/scripts/par_psp.py
new file mode 100644
index 0000000..7ef0fad
--- /dev/null
+++ b/scripts/par_psp.py
@@ -0,0 +1,100 @@
+import argparse
+import json
+import os
+import pathlib
+from legendmeta.catalog import Props
+from util.FileKey import ChannelProcKey
+import numpy as np
+import matplotlib.pyplot as pyplot
+import matplotlib as mpl 
+from datetime import datetime
+import pickle as pkl
+mpl.use("Agg")
+
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--input", help="input files", nargs="*", type=str, required=True)
+argparser.add_argument("--output", help="output file", nargs="*", type=str, required=True)
+argparser.add_argument("--in_plots", help="input plot files", nargs="*", type=str, required=False)
+argparser.add_argument("--out_plots", help="output plot files", nargs="*", type=str, required=False)
+argparser.add_argument("--in_obj", help="input object files", nargs="*", type=str, required=False)
+argparser.add_argument("--out_obj", help="output object files", nargs="*", type=str, required=False)
+args = argparser.parse_args()
+
+conf = LegendMetadata(path=args.configs)
+configs = conf.on(args.timestamp, system=args.datatype)
+merge_config = configs["snakemake_rules"]["pars_psp"]["inputs"]["config"][
+    args.channel
+]
+
+ave_fields = merge_config["average_fields"]
+
+# partitions could be different for different channels - do separately for each channel
+in_dicts = {}
+for file in args.input:
+    tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+    in_dicts[tstamp] = Props.read_from(file)
+
+plot_dict = {}
+for field in ave_fields:
+    keys = field.split(".")
+    vals = []
+    for tstamp in in_dicts:
+        val = in_dicts[tstamp]
+        for key in keys:
+            val = val[key]
+        vals.append(val)
+    if len(vals[~np.isnan(vals)]) ==0:
+        mean = np.nan 
+    else:
+        mean = np.nanmean(vals)
+    for tstamp in in_dicts:
+        val = in_dicts[tstamp]
+        for key in keys:
+            val = val[key]
+        val = mean
+
+    fig = plt.figure()
+    plt.scatter([datetime.strptime(tstamp,'%Y%m%dT%H%M%SZ') for tstamp in in_dicts] , vals)
+    plt.axhline(y=mean, color='r', linestyle='-')
+    plt.xlabel("time")
+    plt.ylabel("value")
+    plt.title(f"{field} over time")
+    plot_dict[field] = fig
+    plt.close()
+
+for file in args.output:
+    tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+    with open(file, "w") as f:
+        json.dump(in_dicts[tstamp], f, indent=2)
+
+
+if args.out_plots:
+    for file in args.out_plots:
+        tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+        if args.in_plots:
+            for infile in args.in_plots:
+                if tstamp in infile:
+                    with open(infile, "rb") as f:
+                        old_plot_dict = pkl.load(f)
+                    break
+            new_plot_dict = old_plot_dict.update({"psp": plot_dict})
+        else:
+            new_plot_dict = {"psp": plot_dict}
+        with open(file, "w") as f:
+            pkl.dump(new_plot_dict, file, protocol=pkl.HIGHEST_PROTOCOL)
+
+if args.out_obj:
+    for file in args.out_obj:
+        tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+        if args.in_obj:
+            for infile in args.in_obj:
+                if tstamp in infile:
+                    with open(infile, "rb") as f:
+                        old_obj_dict = pkl.load(f)
+                    break
+            new_obj_dict = old_obj_dict
+        else:
+            new_obj_dict = {}
+        with open(file, "w") as f:
+            pkl.dump(new_obj_dict, file, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file

From 086fe6dd6424444df0a13f30e3a8e81fa6f5a511 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:31:59 +0100
Subject: [PATCH 012/103] add lh5 support to merge channels

---
 scripts/merge_channels.py | 239 ++++++++++++++++++--------------------
 1 file changed, 115 insertions(+), 124 deletions(-)

diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index 6df04bd..b45d16e 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -4,137 +4,128 @@
 import pathlib
 import pickle as pkl
 import shelve
+from legendmeta.catalog import Props
+from util.FileKey import ChannelProcKey
+import numpy as np
 
-import lgdo.lh5_store as lh5
+
+import lgdo.lh5 as lh5
 from lgdo import Array
 
-sto = lh5.LH5Store()
+def replace_path(d, old_path, new_path):
+    if isinstance(d, dict):
+        for k, v in d.items():
+            d[k] = replace_path(v, old_path, new_path)
+    elif isinstance(d, list):
+        for i in range(len(d)):
+            d[i] = replace_path(d[i], old_path, new_path)
+    elif isinstance(d, str):
+        if old_path in d:
+            d = d.replace(old_path, new_path)
+    return d
 
 argparser = argparse.ArgumentParser()
-argparser.add_argument("--input", help="input file", nargs="*", type=str)
-argparser.add_argument("--output", help="output file", nargs="*", type=str)
+argparser.add_argument("--input", help="input file", nargs="*", type=str, required=True)
+argparser.add_argument("--output", help="output file", type=str, required=True)
+argparser.add_argument("--in_db", help="in db file (used for when lh5 files refered to in db)", type=str, required=False)
+argparser.add_argument("--out_db", help="lh5 file (used for when lh5 files refered to in db)", type=str, required=False)
 args = argparser.parse_args()
 
+# change to only have 1 output file for mutliple inputs
+# don't care about processing step, check if extension matches
+
+
 
 channel_files = args.input
-for _i, out_file in enumerate(args.output):
-    file_extension = pathlib.Path(out_file).suffix
-    processing_step = os.path.splitext(out_file)[0].split("-")[-1]
-    if file_extension == ".json":
-        out_dict = {}
-        for channel in channel_files:
-            if os.path.splitext(channel)[0].split("-")[-1] == processing_step:
-                with open(channel) as r:
-                    channel_dict = json.load(r)
-                (
-                    experiment,
-                    period,
-                    run,
-                    datatype,
-                    timestamp,
-                    channel_name,
-                    name,
-                ) = os.path.basename(channel).split("-")
-                out_dict[channel_name] = channel_dict
-
-                for key in channel_dict:
-                    key_dict = channel_dict[key]
-                    for key_pars in key_dict:
-                        if isinstance(key_dict[key_pars], str) and (
-                            "loadlh5" in key_dict[key_pars]
-                        ):
-                            out_lh5 = out_file.replace(".json", ".lh5")
-                            out_dict[channel_name][key][
-                                key_pars
-                            ] = f"loadlh5('{out_lh5}', '{channel_name}/{key}')"
-            else:
-                pass
-
-        pathlib.Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True)
-        with open(out_file, "w") as w:
-            json.dump(out_dict, w, indent=4)
-
-    elif file_extension == ".pkl":
-        out_dict = {}
-        for channel in channel_files:
-            if os.path.splitext(channel)[0].split("-")[-1] == processing_step:
-                with open(channel, "rb") as r:
-                    channel_dict = pkl.load(r)
-                (
-                    experiment,
-                    period,
-                    run,
-                    datatype,
-                    timestamp,
-                    channel_name,
-                    name,
-                ) = os.path.basename(channel).split("-")
-                out_dict[channel_name] = channel_dict
-            else:
-                pass
-        pathlib.Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True)
-        with open(out_file, "wb") as w:
-            pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
-
-    elif file_extension == ".dat" or file_extension == ".dir":
-        _out_file = os.path.splitext(out_file)[0]
-        pathlib.Path(os.path.dirname(_out_file)).mkdir(parents=True, exist_ok=True)
-        common_dict = {}
-        with shelve.open(_out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
-            for channel in channel_files:
-                if os.path.splitext(channel)[0].split("-")[-1] == processing_step:
-                    with open(channel, "rb") as r:
-                        channel_dict = pkl.load(r)
-                    (
-                        experiment,
-                        period,
-                        run,
-                        datatype,
-                        timestamp,
-                        channel_name,
-                        name,
-                    ) = os.path.basename(channel).split("-")
-                    if isinstance(channel_dict, dict) and "common" in list(channel_dict):
-                        chan_common_dict = channel_dict.pop("common")
-                        common_dict[channel_name] = chan_common_dict
-                    shelf[channel_name] = channel_dict
-                else:
-                    pass
-            if len(common_dict) > 0:
-                shelf["common"] = common_dict
-
-    elif file_extension == ".lh5":
+
+file_extension = pathlib.Path(args.output).suffix
+
+if file_extension == ".dat" or file_extension == ".dir":
+    out_file = os.path.splitext(args.output)[0]
+else:
+    out_file = args.output
+
+rng = np.random.default_rng()
+rand_num = f"{rng.integers(0,99999):05d}"
+temp_output = f"{out_file}.{rand_num}"
+
+pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+
+    
+if file_extension == ".json":
+    out_dict = {}
+    for channel in channel_files:
+        if pathlib.Path(channel).suffix == file_extension:
+            channel_dict = Props.read_from(channel)
+
+            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+            channel_name = fkey.channel
+            out_dict[channel_name] = channel_dict
+        else:
+            raise RuntimeError("Output file extension does not match input file extension")
+
+    with open(temp_output, "w") as w:
+        json.dump(out_dict, w, indent=4)
+
+    os.rename(temp_output, out_file)
+
+elif file_extension == ".pkl":
+    out_dict = {}
+    for channel in channel_files:
+        with open(channel, "rb") as r:
+            channel_dict = pkl.load(r)
+        fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+        channel_name = fkey.channel
+        out_dict[channel_name] = channel_dict
+    
+    with open(temp_output, "wb") as w:
+        pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
+
+    os.rename(temp_output, out_file)
+
+elif file_extension == ".dat" or file_extension == ".dir":
+    common_dict = {}
+    with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
         for channel in channel_files:
-            if os.path.splitext(channel)[0].split("-")[-1] == processing_step:
-                with open(channel) as r:
-                    channel_dict = json.load(r)
-                (
-                    experiment,
-                    period,
-                    run,
-                    datatype,
-                    timestamp,
-                    channel_name,
-                    name,
-                ) = os.path.basename(channel).split("-")
-
-                out_dict[channel_name] = channel_dict
-
-                for key in channel_dict:
-                    key_dict = channel_dict[key]
-                    for key_pars in key_dict:
-                        if isinstance(key_dict[key_pars], str) and (
-                            "loadlh5" in key_dict[key_pars]
-                        ):
-                            path_to_file = key_dict[key_pars].split("'")[1]
-                            path_in_file = key_dict[key_pars].split("'")[3]
-                            data = sto.read_object(path_in_file, path_to_file)[0].nda
-                            sto.write_object(
-                                Array(data),
-                                name=key,
-                                lh5_file=out_file,
-                                wo_mode="overwrite",
-                                group=channel_name,
-                            )
-            else:
-                pass
+            with open(channel, "rb") as r:
+                channel_dict = pkl.load(r)
+            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+            channel_name = fkey.channel
+            if isinstance(channel_dict, dict) and "common" in list(channel_dict):
+                chan_common_dict = channel_dict.pop("common")
+                common_dict[channel_name] = chan_common_dict
+            shelf[channel_name] = channel_dict
+        if len(common_dict) > 0:
+            shelf["common"] = common_dict
+    
+
+elif file_extension == ".lh5":
+    sto = lh5.LH5Store()
+
+    if args.in_db:
+        db_dict = Props.read_from(args.in_db)
+    for channel in channel_files:
+        if pathlib.Path(channel).suffix == file_extension:
+            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+            channel_name = fkey.channel
+
+            tb_in = sto.read(
+                f"{channel_name}",
+                channel
+            )[0]
+
+            sto.write(
+                tb_in,
+                name = channel_name,
+                lh5_file = temp_output,
+                wo_mode="a",
+            )
+            if args.in_db:
+                db_dict[channel_name] = replace_path(db_dict[channel_name], channel, args.output)
+        else:
+            raise RuntimeError("Output file extension does not match input file extension")
+    if args.out_db:
+        with open(args.out_db, "w") as w:
+            json.dump(db_dict, w, indent=4)
+
+    os.rename(temp_output, out_file)
\ No newline at end of file

From 8e4b2b14317348dd1cbe33f74c9cc3e3e120380f Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:33:00 +0100
Subject: [PATCH 013/103] legendmeta info logging

---
 scripts/pars_dsp_nopt.py   | 5 +++--
 scripts/pars_dsp_tau.py    | 1 +
 scripts/pars_pht_aoecal.py | 1 +
 scripts/pars_pht_lqcal.py  | 1 +
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 1b2e798..bed75bf 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -43,7 +43,8 @@
 logging.getLogger("parse").setLevel(logging.INFO)
 logging.getLogger("lgdo").setLevel(logging.INFO)
 logging.getLogger("h5py._conv").setLevel(logging.INFO)
-logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO)
+logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 log = logging.getLogger(__name__)
 
@@ -87,7 +88,7 @@
 
     if args.plot_path:
         out_dict, plot_dict = pno.noise_optimization(
-            tb_data, dsp_config, db_dict, opt_dict, args.channel, display=1
+            tb_data, dsp_config, db_dict.copy(), opt_dict, args.channel, display=1
         )
     else:
         out_dict = pno.noise_optimization(
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 0a315ff..04d4cdc 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -35,6 +35,7 @@
 logging.getLogger("lgdo").setLevel(logging.INFO)
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 sto = lh5.LH5Store()
 
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index a646857..49303e7 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -98,6 +98,7 @@ def aoe_calibration(
 logging.getLogger("lgdo").setLevel(logging.INFO)
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 
 def run_splitter(files):
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 2d1bc06..2e656d6 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -116,6 +116,7 @@ def lq_calibration(
 logging.getLogger("lgdo").setLevel(logging.INFO)
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 
 def run_splitter(files):

From ed5a32c720c469bb386f4627bf88761198e4e144 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:33:35 +0100
Subject: [PATCH 014/103] add psp

---
 scripts/util/patterns.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 21fa1a5..c27ed5e 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -432,16 +432,18 @@ def get_pattern_pars_overwrite(setup, tier, name=None):
         )
 
 
-def get_pattern_pars_tmp(setup, tier, name=None):
+def get_pattern_pars_tmp(setup, tier, name=None, datatype=None):
+    if datatype is None:
+        datatype = "{datatype}"
     if name is None:
         return os.path.join(
             f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier + ".json",
+            "{experiment}-{period}-{run}-"+datatype+"-{timestamp}-par_" + tier + ".json",
         )
     else:
         return os.path.join(
             f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+            "{experiment}-{period}-{run}-"+datatype+"-{timestamp}-par_"
             + tier
             + "_"
             + name

From d645ed34055561655ff7f4ed3d3660b46294632a Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:34:10 +0100
Subject: [PATCH 015/103] update logging and better out dict handling

---
 scripts/pars_dsp_eopt.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index b61663c..ff9aa72 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -52,7 +52,8 @@
 logging.getLogger("lgdo").setLevel(logging.INFO)
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("pygama.dsp.processing_chain").setLevel(logging.INFO)
+logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 
 log = logging.getLogger(__name__)
@@ -395,8 +396,10 @@
         "expression": "trapEftp*(1+dt_eff*a)",
         "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)},
     }
-
-    db_dict.update({"ctc_params": out_alpha_dict})
+    if "ctc_params" in db_dict: 
+        db_dict["ctc_params"].update(out_alpha_dict)
+    else:
+        db_dict.update({"ctc_params": out_alpha_dict})
 
     pathlib.Path(os.path.dirname(args.qbb_grid_path)).mkdir(parents=True, exist_ok=True)
     with open(args.qbb_grid_path, "wb") as f:

From 85d135abedfbf545960543cde26a3737ebefc604 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:34:42 +0100
Subject: [PATCH 016/103] 2stage handling

---
 scripts/pars_pht_partcal.py | 149 +++++++++++++++++++++++++++++++++---
 1 file changed, 139 insertions(+), 10 deletions(-)

diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index a148946..f3c926e 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -33,6 +33,49 @@ def update_cal_dicts(cal_dicts, update_dict):
         cal_dicts.update(update_dict)
     return cal_dicts
 
+def get_results_dict(ecal_class, data):
+    if ecal_class.results:
+        fwhm_linear = ecal_class.fwhm_fit_linear.copy()
+        fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict()
+        fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict()
+        fwhm_linear["cov"] = fwhm_linear["cov"].tolist()
+        fwhm_quad = ecal_class.fwhm_fit_quadratic.copy()
+        fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict()
+        fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict()
+        fwhm_quad["cov"] = fwhm_quad["cov"].tolist()
+
+        pk_dict = {
+            Ei: {
+                "function": func_i.__name__,
+                "module": func_i.__module__,
+                "parameters_in_keV": parsi.to_dict(),
+                "uncertainties_in_keV": errorsi.to_dict(),
+                "p_val": pvali,
+                "fwhm_in_keV": list(fwhmi),
+                "pk_position":(posi, posuni),
+            }
+            for i, (Ei, parsi, errorsi, pvali, fwhmi,  posi, posuni, func_i) in enumerate(
+                zip(
+                    ecal_class.results["fitted_keV"],
+                    ecal_class.results["pk_pars"][ecal_class.results["pk_validities"]],
+                    ecal_class.results["pk_errors"][ecal_class.results["pk_validities"]],
+                    ecal_class.results["pk_pvals"][ecal_class.results["pk_validities"]],
+                    ecal_class.results["pk_fwhms"],
+                    ecal_class.results["pk_pos"],
+                    ecal_class.results["pk_pos_uncertainties"],
+                    ecal_class.funcs,
+                )
+            )
+        }
+
+        return {
+            "eres_linear": fwhm_linear,
+            "eres_quadratic": fwhm_quad,
+            "fitted_peaks": ecal_class.results["fitted_keV"].tolist(),
+            "pk_fits": pk_dict,
+        }
+    else:
+        return {}
 
 def partition_energy_cal_th(
     data: pd.Datframe,
@@ -44,28 +87,113 @@ def partition_energy_cal_th(
     plot_options: dict | None = None,
     simplex: bool = True,
     tail_weight: int = 20,
-    # cal_energy_params: list = None,
-    # deg:int=2,
+    cal_energy_params: list = None,
+    deg:int=2,
 ) -> tuple(dict, dict, dict, dict):
     results_dict = {}
     plot_dict = {}
     full_object_dict = {}
-    # if cal_energy_params is None:
-    #     cal_energy_params = [energy_param + "_cal" for energy_param in energy_params]
-    for energy_param in energy_params:
-        full_object_dict[energy_param] = high_stats_fitting(
+    if cal_energy_params is None:
+        cal_energy_params = [energy_param + "_cal" for energy_param in energy_params]
+    glines = [
+        238.632,
+        511,
+        583.191,
+        727.330,
+        763,
+        785,
+        860.564,
+        893,
+        1079,
+        1513,
+        1592.53,
+        1620.50,
+        2103.53,
+        2614.50,
+        3125,
+        3198,
+        3474,
+    ]  # gamma lines used for calibration
+    range_keV = [
+        (10, 10),
+        (30, 30),
+        (30, 30),
+        (30, 30),
+        (30, 15),
+        (15, 30),
+        (30, 25),
+        (25, 30),
+        (30, 30),
+        (30, 30),
+        (30, 20),
+        (20, 30),
+        (30, 30),
+        (30, 30),
+        (30, 30),
+        (30, 30),
+        (30, 30),
+    ]  # side bands width
+    funcs = [
+        pgf.extended_gauss_step_pdf,  # probably should be gauss on exp
+        pgf.extended_gauss_step_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_gauss_step_pdf,
+        pgf.extended_gauss_step_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_gauss_step_pdf,
+        pgf.extended_gauss_step_pdf,
+        pgf.extended_gauss_step_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_radford_pdf,
+        pgf.extended_gauss_step_pdf,
+        pgf.extended_gauss_step_pdf,
+        pgf.extended_gauss_step_pdf,
+    ]
+    gof_funcs = [
+        pgf.gauss_step_pdf,
+        pgf.gauss_step_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.gauss_step_pdf,
+        pgf.gauss_step_pdf,
+        pgf.radford_pdf,
+        pgf.gauss_step_pdf,
+        pgf.gauss_step_pdf,
+        pgf.gauss_step_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.radford_pdf,
+        pgf.gauss_step_pdf,
+        pgf.gauss_step_pdf,
+        pgf.gauss_step_pdf,
+    ]
+
+    for energy_param, cal_energy_param in zip(energy_params, cal_energy_params):
+        full_object_dict[cal_energy_param] = high_stats_fitting(
             energy_param=energy_param,
+            glines=glines,
+            range_keV=range_keV,
+            funcs=funcs,
+            gof_funcs=gof_funcs,
             selection_string=selection_string,
             threshold=threshold,
             p_val=p_val,
             plot_options=plot_options,
             simplex=simplex,
             tail_weight=tail_weight,
+            cal_energy_param=cal_energy_param,
+            deg=deg,
+            fixed={1:1}
         )
-        full_object_dict[energy_param].fit_peaks(data)
-        results_dict[energy_param] = full_object_dict[energy_param].get_results_dict(data)
-        if full_object_dict[energy_param].results:
-            plot_dict[energy_param] = full_object_dict[energy_param].fill_plot_dict(data).copy()
+        full_object_dict[cal_energy_param].update_calibration(data)
+        results_dict[cal_energy_param] = get_results_dict(full_object_dict[cal_energy_param], data)
+        hit_dicts = update_cal_dicts(hit_dicts, full_object_dict[cal_energy_param].hit_dict)
+        if full_object_dict[cal_energy_param].results:
+            plot_dict[cal_energy_param] = full_object_dict[cal_energy_param].fill_plot_dict(data).copy()
 
     log.info("Finished all calibrations")
     return hit_dicts, results_dict, plot_dict, full_object_dict
@@ -96,6 +224,7 @@ def partition_energy_cal_th(
 logging.getLogger("lgdo").setLevel(logging.INFO)
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 
 def run_splitter(files):

From d6570fa64e1c13cc108c9c1d81223e78a7351c14 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:35:04 +0100
Subject: [PATCH 017/103] first versions

---
 scripts/pars_dsp_event_selection.py | 108 ++++++++++++++++++++++++++++
 scripts/pars_hit_qc.py              | 104 +++++++++++++++++++++++++++
 2 files changed, 212 insertions(+)
 create mode 100644 scripts/pars_dsp_event_selection.py
 create mode 100644 scripts/pars_hit_qc.py

diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
new file mode 100644
index 0000000..9fc7197
--- /dev/null
+++ b/scripts/pars_dsp_event_selection.py
@@ -0,0 +1,108 @@
+import argparse
+import json
+import logging
+import os
+import pathlib
+import pickle as pkl
+import time
+import warnings
+
+os.environ["LGDO_CACHE"] = "false"
+os.environ["LGDO_BOUNDSCHECK"] = "false"
+os.environ["DSPEED_CACHE"] = "false"
+os.environ["DSPEED_BOUNDSCHECK"] = "false"
+
+import lgdo.lh5 as lh5
+import numpy as np
+import pygama.pargen.energy_optimisation as om
+from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
+from pygama.pargen.utils import get_tcm_pulser_ids
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
+argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
+argparser.add_argument("--configs", help="configs", type=str, required=True)
+
+argparser.add_argument("--log", help="log_file", type=str)
+
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+argparser.add_argument("--peak_file", help="peak_file", type=str, required=True)
+args = argparser.parse_args()
+
+logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+logging.getLogger("numba").setLevel(logging.INFO)
+logging.getLogger("parse").setLevel(logging.INFO)
+logging.getLogger("lgdo").setLevel(logging.INFO)
+logging.getLogger("h5py").setLevel(logging.INFO)
+logging.getLogger("matplotlib").setLevel(logging.INFO)
+logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
+
+
+log = logging.getLogger(__name__)
+
+t0 = time.time()
+
+conf = LegendMetadata(path=args.configs)
+configs = conf.on(args.timestamp, system=args.datatype)
+dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["processing_chain"][
+    args.channel
+]
+peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][args.channel]
+
+peak_dict = Props.read_from(opt_json)
+db_dict = Props.read_from(args.decay_const)
+
+if opt_dict.pop("run_selection") is True:
+    with open(args.raw_filelist) as f:
+        files = f.read().splitlines()
+
+    raw_files = sorted(files)
+
+    # get pulser mask from tcm files
+    with open(args.tcm_filelist) as f:
+        tcm_files = f.read().splitlines()
+    tcm_files = sorted(np.unique(tcm_files))
+    ids, mask = get_tcm_pulser_ids(
+        tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"]
+    )
+
+    sto = lh5.LH5Store()
+    idx_events, idx_list = om.event_selection(
+        raw_files,
+        f"{args.channel}/raw",
+        dsp_config,
+        db_dict,
+        peaks_keV,
+        np.arange(0, len(peaks_keV), 1).tolist(),
+        kev_widths,
+        pulser_mask=mask,
+        cut_parameters=peak_dict["cut_parameters"],
+        n_events=peak_dict["n_events"],
+        threshold=peak_dict["threshold"],
+        wf_field=peak_dict["wf_field"],
+    )
+
+    tb_data = sto.read(
+        f"{args.channel}/raw",
+        raw_files,
+        idx=idx_events,
+        n_rows=opt_dict["n_events"],
+    )[0]
+
+    pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True)
+    sto.write(
+        tb_data,
+        name="raw",
+        lh5_file=args.peak_file,
+        wo_mode="overwrite",
+    )
+else:
+    pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(args.peak_file).touch()
\ No newline at end of file
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
new file mode 100644
index 0000000..05254d8
--- /dev/null
+++ b/scripts/pars_hit_qc.py
@@ -0,0 +1,104 @@
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import os
+import pathlib
+import pickle as pkl
+import warnings
+
+from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
+from pygama.pargen.utils import get_tcm_pulser_ids, load_data
+from pygama.pargen.cuts import generate_cuts
+
+log = logging.getLogger(__name__)
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--files", help="files", nargs="*", type=str)
+    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+
+    argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+    argparser.add_argument("--tier", help="tier", type=str, default="hit")
+
+    argparser.add_argument("--log", help="log_file", type=str)
+
+    argparser.add_argument("--plot_path", help="plot_path", type=str, required=False, nargs="*")
+    argparser.add_argument("--save_path", help="save_path", type=str, nargs="*")
+    args = argparser.parse_args()
+
+    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+    logging.getLogger("numba").setLevel(logging.INFO)
+    logging.getLogger("parse").setLevel(logging.INFO)
+    logging.getLogger("lgdo").setLevel(logging.INFO)
+    logging.getLogger("h5py").setLevel(logging.INFO)
+    logging.getLogger("matplotlib").setLevel(logging.INFO)
+    logging.getLogger("legendmeta").setLevel(logging.INFO)
+
+
+    # get metadata dictionary
+    configs = LegendMetadata(path=args.configs)
+    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
+    if args.tier == "hit":
+        channel_dict = channel_dict["pars_hit_qc"]["inputs"]["ecal_config"][args.channel]
+    elif args.tier == "pht":
+        channel_dict = channel_dict["pars_pht_qc"]["inputs"]["ecal_config"][args.channel]
+    else:
+        msg = "invalid tier"
+        raise ValueError(msg)
+
+    kwarg_dict = Props.read_from(channel_dict)
+
+    # load data in
+    data, threshold_mask = load_data(
+        args.files,
+        f"{args.channel}/dsp",
+        hit_dict,
+        list(kwarg_dict["cut_parameters"])
+        + ["timestamp", "trapTmax"],
+        threshold=kwarg_dict["threshold"],
+        return_selection_mask=True,
+        cal_energy_param="trapTmax",
+    )
+
+    # get pulser mask from tcm files
+    with open(args.tcm_filelist) as f:
+        tcm_files = f.read().splitlines()
+    tcm_files = sorted(np.unique(tcm_files))
+    ids, mask = get_tcm_pulser_ids(
+        tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+    )
+    data["is_pulser"] = mask[threshold_mask]
+
+    hit_dict, plot_dict = generate_cuts(
+            data,
+            cut_dict,
+            kwarg_dict.get("rounding",4),
+            display=1 if args.plot_path else 0,
+        )
+    if isinstance(args.save_path, string):
+        save_path = [args.save_path]
+    else:
+        save_path = args.save_path
+    for file in save_path
+        pathlib.Path(os.path.dirname(save_path)).mkdir(parents=True, exist_ok=True)
+        with open(file, "w") as f:
+            json.dump(hit_dict, f, indent=4)
+
+    if args.plot_path:
+        if isinstance(args.plot_path, string):
+            plot_path = [args.plot_path]
+        else:
+            plot_path = args.plot_path
+        for file in plot_path:
+             pathlib.Path(os.path.dirname(plot_path)).mkdir(parents=True, exist_ok=True)
+            with open(plot_path, "wb") as f:
+                pkl.dump({"qc":plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file

From 98d5117e6236b5032b886008a8b44532253f595b Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 10 Mar 2024 21:35:42 +0100
Subject: [PATCH 018/103] wildcard constraints and move pht dict gen to rules

---
 Snakefile | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/Snakefile b/Snakefile
index 17fa780..279f320 100644
--- a/Snakefile
+++ b/Snakefile
@@ -40,24 +40,17 @@ swenv = runcmd(setup)
 part = ds.dataset_file(setup, os.path.join(configs, "partitions.json"))
 basedir = workflow.basedir
 
+wildcard_constraints:
+    experiment="\w+",
+    period="\w+",
+    run="\w+",
+    datatype="\w+",
+    timestamp="\w+",
+    channel="\w+",
+
 
 include: "rules/common.smk"
 include: "rules/main.smk"
-
-
-localrules:
-    gen_filelist,
-    autogen_output,
-
-
-ds.pars_key_resolve.write_par_catalog(
-    ["-*-*-*-cal"],
-    os.path.join(pars_path(setup), "pht", "validity.jsonl"),
-    get_pattern_tier_raw(setup),
-    {"cal": ["par_pht"], "lar": ["par_pht"]},
-)
-
-
 include: "rules/tcm.smk"
 include: "rules/dsp.smk"
 include: "rules/hit.smk"
@@ -66,6 +59,10 @@ include: "rules/evt.smk"
 include: "rules/skm.smk"
 include: "rules/blinding_calibration.smk"
 
+localrules:
+    gen_filelist,
+    autogen_output,
+
 
 onstart:
     print("Starting workflow")

From 6139322036fe09082a5d4173c8de187b251c5419 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:27:22 +0100
Subject: [PATCH 019/103] add psp paths

---
 scripts/util/patterns.py |  5 +++--
 scripts/util/utils.py    | 10 ++++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index c27ed5e..90c8f2c 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -10,6 +10,7 @@
     par_hit_path,
     par_overwrite_path,
     par_pht_path,
+    par_psp_path,
     par_raw_path,
     par_tcm_path,
     pars_path,
@@ -317,7 +318,7 @@ def get_pattern_par_evt(setup, name=None, extension="json"):
 def get_pattern_par_psp(setup, name=None, extension="json"):
     if name is not None:
         return os.path.join(
-            f"{par_evt_path(setup)}",
+            f"{par_psp_path(setup)}",
             "cal",
             "{period}",
             "{run}",
@@ -325,7 +326,7 @@ def get_pattern_par_psp(setup, name=None, extension="json"):
         )
     else:
         return os.path.join(
-            f"{par_evt_path(setup)}",
+            f"{par_psp_path(setup)}",
             "cal",
             "{period}",
             "{run}",
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index d767610..8b11b3b 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -128,6 +128,8 @@ def par_dsp_path(setup):
 def par_hit_path(setup):
     return setup["paths"]["par_hit"]
 
+def par_psp_path(setup):
+    return setup["paths"]["par_psp"]
 
 def par_pht_path(setup):
     return setup["paths"]["par_pht"]
@@ -150,10 +152,14 @@ def get_pars_path(setup, tier):
         return par_dsp_path(setup)
     elif tier == "hit":
         return par_hit_path(setup)
-    elif tier == "pht":
-        return par_pht_path(setup)
     elif tier == "evt":
         return par_evt_path(setup)
+    elif tier == "psp":
+        return par_psp_path(setup)
+    elif tier == "pht":
+        return par_pht_path(setup)
+    elif tier == "pet":
+        return par_pet_path(setup)
     else:
         msg = f"no tier matching:{tier}"
         raise ValueError(msg)

From 0a741cb0ff6e13f3c915bbc70d87d624701b14b7 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:27:42 +0100
Subject: [PATCH 020/103] updates for pargen refactor

---
 scripts/pars_dsp_dplms.py           |  55 ++---
 scripts/pars_dsp_eopt.py            | 178 ++++++--------
 scripts/pars_dsp_event_selection.py | 350 ++++++++++++++++++++++------
 scripts/pars_dsp_nopt.py            |   4 +-
 scripts/pars_dsp_tau.py             |  40 +++-
 5 files changed, 401 insertions(+), 226 deletions(-)

diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 52bb811..67b8bdd 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -16,14 +16,11 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
-from pygama.pargen.energy_optimisation import event_selection
-from pygama.pargen.utils import get_tcm_pulser_ids
 from lgdo import Array, Table
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
-argparser.add_argument("--cal_raw_filelist", help="cal_raw_filelist", type=str)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str)
 
 argparser.add_argument("--log", help="log_file", type=str)
@@ -64,8 +61,6 @@
 if dplms_dict["run_dplms"] is True:
     with open(args.fft_raw_filelist) as f:
         fft_files = sorted(f.read().splitlines())
-    with open(args.cal_raw_filelist) as f:
-        cal_files = sorted(f.read().splitlines())
 
     t0 = time.time()
     log.info("\nLoad fft data")
@@ -77,35 +72,20 @@
     t1 = time.time()
     log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}")
 
-    log.info("\nRemoving pulser")
-    # get pulser mask from tcm files
-    with open(args.tcm_filelist) as f:
-        tcm_files = f.read().splitlines()
-    tcm_files = sorted(np.unique(tcm_files))
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, dplms_dict.pop("pulser_multiplicity_threshold")
-    )
-
     log.info("\nRunning event selection")
-    peaks_keV = np.array(dplms_dict["peaks_keV"])
+    peaks_kev = np.array(dplms_dict["peaks_kev"])
     kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
-    idx_events, idx_list = event_selection(
-        cal_files,
-        f"{args.channel}/raw",
-        dsp_config,
-        db_dict,
-        peaks_keV,
-        np.arange(0, len(peaks_keV), 1).tolist(),
-        kev_widths,
-        pulser_mask=mask,
-        cut_parameters=dplms_dict["wfs_cut_pars"],
-        n_events=dplms_dict["n_signals"],
-        threshold=dplms_dict["threshold"],
-    )
+    
+    peaks_rounded = [int(peak) for peak in peaks_kev]
+    peaks = sto.read(f"{args.channel}/raw", args.peak_file , field_mask=["peak"])  [0]["peak"].nda
+    ids = np.in1d(peaks, peaks_rounded)
+    peaks = peaks[ids]
+    idx_list = [np.where(peaks==peak)[0] for peak in peaks_rounded]
+
     raw_cal = sto.read(
         f"{args.channel}/raw",
-        cal_files,
-        idx=idx_events,
+        args.peak_file,
+        idx=ids
     )[0]
     log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}")
 
@@ -126,9 +106,6 @@
                 inplot_dict = pkl.load(r)
             inplot_dict.update({"dplms":plot_dict})
         
-        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-        with open(args.plot_path, "wb") as f:
-            pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
     else:
         out_dict = dplms_ge_dict(
             raw_fft,
@@ -146,6 +123,11 @@
 else:
     out_dict = {}
     dplms_pars = Table(col_dict={"coefficients":Array([])})
+    if args.inplots:
+        with open(args.inplots, "rb") as r:
+            inplot_dict = pkl.load(r)
+    else:
+        inplot_dict={}
 
 db_dict.update(out_dict)
 
@@ -160,3 +142,8 @@
 pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True)
 with open(args.dsp_pars, "w") as w:
     json.dump(db_dict, w, indent=2)
+
+if args.plot_path:
+    pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
+    with open(args.plot_path, "wb") as f:
+        pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index ff9aa72..b176c65 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -14,20 +14,20 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-import pygama.math.peak_fitting as pgf
+from pygama.math.distributions import hpge_peak
 import pygama.pargen.energy_optimisation as om
 import sklearn.gaussian_process.kernels as ker
 from dspeed.units import unit_registry as ureg
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.dsp_optimize import run_one_dsp
-from pygama.pargen.utils import get_tcm_pulser_ids
+from pygama.pargen.dsp_optimize import run_one_dsp, BayesianOptimizer, run_bayesian_optimisation
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 argparser = argparse.ArgumentParser()
-argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+
+argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
+
 argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
 argparser.add_argument("--configs", help="configs", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str)
@@ -57,8 +57,7 @@
 
 
 log = logging.getLogger(__name__)
-
-
+sto = lh5.LH5Store()
 t0 = time.time()
 
 conf = LegendMetadata(path=args.configs)
@@ -72,105 +71,53 @@
 db_dict = Props.read_from(args.decay_const)
 
 if opt_dict.pop("run_eopt") is True:
-    with open(args.raw_filelist) as f:
-        files = f.read().splitlines()
-
-    raw_files = sorted(files)
 
-    # get pulser mask from tcm files
-    with open(args.tcm_filelist) as f:
-        tcm_files = f.read().splitlines()
-    tcm_files = sorted(np.unique(tcm_files))
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, opt_dict.pop("pulser_multiplicity_threshold")
-    )
 
-    peaks_keV = np.array(opt_dict["peaks"])
+    peaks_kev = np.array(opt_dict["peaks"])
     kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]]
 
     kwarg_dicts_cusp = []
     kwarg_dicts_trap = []
     kwarg_dicts_zac = []
-    for peak in peaks_keV:
-        peak_idx = np.where(peaks_keV == peak)[0][0]
+    for peak in peaks_kev:
+        peak_idx = np.where(peaks_kev == peak)[0][0]
         kev_width = kev_widths[peak_idx]
-        if peak == 238.632:
-            kwarg_dicts_cusp.append(
-                {
-                    "parameter": "cuspEmax",
-                    "func": pgf.extended_gauss_step_pdf,
-                    "gof_func": pgf.gauss_step_pdf,
-                    "peak": peak,
-                    "kev_width": kev_width,
-                }
-            )
-            kwarg_dicts_zac.append(
-                {
-                    "parameter": "zacEmax",
-                    "func": pgf.extended_gauss_step_pdf,
-                    "gof_func": pgf.gauss_step_pdf,
-                    "peak": peak,
-                    "kev_width": kev_width,
-                }
-            )
-            kwarg_dicts_trap.append(
-                {
-                    "parameter": "trapEmax",
-                    "func": pgf.extended_gauss_step_pdf,
-                    "gof_func": pgf.gauss_step_pdf,
-                    "peak": peak,
-                    "kev_width": kev_width,
-                }
-            )
-        else:
-            kwarg_dicts_cusp.append(
-                {
-                    "parameter": "cuspEmax",
-                    "func": pgf.extended_radford_pdf,
-                    "gof_func": pgf.radford_pdf,
-                    "peak": peak,
-                    "kev_width": kev_width,
-                }
-            )
-            kwarg_dicts_zac.append(
-                {
-                    "parameter": "zacEmax",
-                    "func": pgf.extended_radford_pdf,
-                    "gof_func": pgf.radford_pdf,
-                    "peak": peak,
-                    "kev_width": kev_width,
-                }
-            )
-            kwarg_dicts_trap.append(
-                {
-                    "parameter": "trapEmax",
-                    "func": pgf.extended_radford_pdf,
-                    "gof_func": pgf.radford_pdf,
-                    "peak": peak,
-                    "kev_width": kev_width,
-                }
-            )
-    sto = lh5.LH5Store()
-    idx_events, idx_list = om.event_selection(
-        raw_files,
-        f"{args.channel}/raw",
-        dsp_config,
-        db_dict,
-        peaks_keV,
-        np.arange(0, len(peaks_keV), 1).tolist(),
-        kev_widths,
-        pulser_mask=mask,
-        cut_parameters=opt_dict["cut_parameters"],
-        n_events=opt_dict["n_events"],
-        threshold=opt_dict["threshold"],
-        wf_field=opt_dict["wf_field"],
-    )
+
+        kwarg_dicts_cusp.append(
+            {
+                "parameter": "cuspEmax",
+                "func": hpge_peak,
+                "peak": peak,
+                "kev_width": kev_width,
+            }
+        )
+        kwarg_dicts_zac.append(
+            {
+                "parameter": "zacEmax",
+                "func": hpge_peak,
+                "peak": peak,
+                "kev_width": kev_width,
+            }
+        )
+        kwarg_dicts_trap.append(
+            {
+                "parameter": "trapEmax",
+                "func": hpge_peak,
+                "peak": peak,
+                "kev_width": kev_width,
+            }
+        )
+
+    peaks_rounded = [int(peak) for peak in peaks_kev]
+    peaks = sto.read(f"{args.channel}/raw",args.peak_file , field_mask=["peak"])  [0]["peak"].nda
+    ids = np.in1d(peaks, peaks_rounded)
+    peaks = peaks[ids]
+    idx_list = [np.where(peaks==peak)[0] for peak in peaks_rounded]
 
     tb_data = sto.read(
         f"{args.channel}/raw",
-        raw_files,
-        idx=idx_events,
-        n_rows=opt_dict["n_events"],
+        args.peak_file,
+        idx=ids
     )[0]
 
     t1 = time.time()
@@ -204,26 +151,27 @@
     kwarg_dict = [
         {
             "peak_dicts": kwarg_dicts_cusp,
-            "ctc_param": "QDrift",
+            "ctc_param": "dt_eff",
             "idx_list": idx_list,
-            "peaks_keV": peaks_keV,
+            "peaks_kev": peaks_kev,
         },
         {
             "peak_dicts": kwarg_dicts_zac,
-            "ctc_param": "QDrift",
+            "ctc_param": "dt_eff",
             "idx_list": idx_list,
-            "peaks_keV": peaks_keV,
+            "peaks_kev": peaks_kev,
         },
         {
             "peak_dicts": kwarg_dicts_trap,
-            "ctc_param": "QDrift",
+            "ctc_param": "dt_eff",
             "idx_list": idx_list,
-            "peaks_keV": peaks_keV,
+            "peaks_kev": peaks_kev,
         },
     ]
 
     fom = eval(opt_dict["fom"])
-
+    out_field = opt_dict["fom_field"]
+    out_err_field = opt_dict["fom_err_field"]
     sample_x = np.array(opt_dict["initial_samples"])
 
     results_cusp = []
@@ -249,18 +197,18 @@
 
         res = fom(tb_out, kwarg_dict[0])
         results_cusp.append(res)
-        sample_y_cusp.append(res["y_val"])
-        err_y_cusp.append(res["y_err"])
+        sample_y_cusp.append(res[out_field])
+        err_y_cusp.append(res[out_err_field])
 
         res = fom(tb_out, kwarg_dict[1])
         results_zac.append(res)
-        sample_y_zac.append(res["y_val"])
-        err_y_zac.append(res["y_err"])
+        sample_y_zac.append(res[out_field])
+        err_y_zac.append(res[out_err_field])
 
         res = fom(tb_out, kwarg_dict[2])
         results_trap.append(res)
-        sample_y_trap.append(res["y_val"])
-        err_y_trap.append(res["y_err"])
+        sample_y_trap.append(res[out_field])
+        err_y_trap.append(res[out_err_field])
 
         log.info(f"{i+1} Finished")
 
@@ -303,29 +251,35 @@
     sampling_unit = ureg.Quantity(tb_data["waveform_presummed"]["dt"].attrs["units"])
     waveform_sampling = sampling_rate * sampling_unit
 
-    bopt_cusp = om.BayesianOptimizer(
+    bopt_cusp = BayesianOptimizer(
         acq_func=opt_dict["acq_func"],
         batch_size=opt_dict["batch_size"],
         kernel=kernel,
         sampling_rate=waveform_sampling,
+        fom_value = out_field,
+        fom_error = out_err_field
     )
     bopt_cusp.lambda_param = lambda_param
     bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us")
 
-    bopt_zac = om.BayesianOptimizer(
+    bopt_zac = BayesianOptimizer(
         acq_func=opt_dict["acq_func"],
         batch_size=opt_dict["batch_size"],
         kernel=kernel,
         sampling_rate=waveform_sampling,
+        fom_value = out_field,
+        fom_error = out_err_field
     )
     bopt_zac.lambda_param = lambda_param
     bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us")
 
-    bopt_trap = om.BayesianOptimizer(
+    bopt_trap = BayesianOptimizer(
         acq_func=opt_dict["acq_func"],
         batch_size=opt_dict["batch_size"],
         kernel=kernel,
         sampling_rate=waveform_sampling,
+        fom_value = out_field,
+        fom_error = out_err_field
     )
     bopt_trap.lambda_param = lambda_param
     bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us")
@@ -348,7 +302,7 @@
 
     optimisers = [bopt_cusp, bopt_zac, bopt_trap]
 
-    out_param_dict, out_results_list = om.run_optimisation(
+    out_param_dict, out_results_list = run_bayesian_optimisation(
         tb_data,
         dsp_config,
         [fom],
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 9fc7197..6fc8292 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -12,97 +12,303 @@
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
 
+import lgdo
 import lgdo.lh5 as lh5
 import numpy as np
-import pygama.pargen.energy_optimisation as om
+from bisect import bisect_left
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.utils import get_tcm_pulser_ids
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys
+import pygama.math.histogram as pgh
+import pygama.pargen.energy_cal as pgc
+from pygama.pargen.dsp_optimize import run_one_dsp
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
-argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
-argparser.add_argument("--configs", help="configs", type=str, required=True)
+def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
+                ecal_pars, raw_dict, peak, final_cut_field="is_valid_cal",
+                energy_param="trapTmax"):
+    for outname, info in cut_dict.items():
+        outcol = dsp_data.eval(info["expression"], info.get("parameters", None))
+        dsp_data.add_column(outname, outcol)
+        
+    for outname, info in raw_dict.items():
+        outcol = raw_data.eval(info["expression"], info.get("parameters", None))
+        raw_data.add_column(outname, outcol)
 
-argparser.add_argument("--log", help="log_file", type=str)
+    final_mask = (dsp_data[energy_param].nda > e_lower_lim) & (dsp_data[energy_param].nda < e_upper_lim)&(dsp_data[final_cut_field].nda)
+    
+    wavefrom_windowed = lgdo.WaveformTable(
+    t0=raw_data["waveform_windowed"]["t0"].nda[final_mask],
+    t0_units=raw_data["waveform_windowed"]["t0"].attrs["units"],
+    dt=raw_data["waveform_windowed"]["dt"].nda[final_mask],
+    dt_units=raw_data["waveform_windowed"]["dt"].attrs["units"],
+    values=raw_data["waveform_windowed"]["values"].nda[final_mask]
+)
+    wavefrom_presummed = lgdo.WaveformTable(
+    t0=raw_data["waveform_presummed"]["t0"].nda[final_mask],
+    t0_units=raw_data["waveform_presummed"]["t0"].attrs["units"],
+    dt=raw_data["waveform_presummed"]["dt"].nda[final_mask],
+    dt_units=raw_data["waveform_presummed"]["dt"].attrs["units"],
+    values=raw_data["waveform_presummed"]["values"].nda[final_mask]
+)
+    
+    
+    out_tbl = lgdo.Table(col_dict = {"waveform_presummed": wavefrom_presummed, 
+                                "waveform_windowed":wavefrom_windowed,
+                                "presum_rate":lgdo.Array(raw_data["presum_rate"].nda[final_mask]),
+                                "timestamp":lgdo.Array(raw_data["timestamp"].nda[final_mask]),
+                                "baseline":lgdo.Array(raw_data["baseline"].nda[final_mask]),
+                                "daqenergy":lgdo.Array(raw_data["daqenergy"].nda[final_mask]),
+                                "daqenergy_cal":lgdo.Array(raw_data["daqenergy_cal"].nda[final_mask]),
+                                "trapTmax_cal":lgdo.Array(dsp_data["trapTmax"].nda[final_mask]*ecal_pars),
+                                "peak":lgdo.Array(np.full(len(np.where(final_mask)[0]),int(peak)))
+                               })
+    return out_tbl, len(np.where(final_mask)[0])
 
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-argparser.add_argument("--peak_file", help="peak_file", type=str, required=True)
-args = argparser.parse_args()
+if __name__ == "__main__":
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
+    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
+    argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
 
+    argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--raw_cal", help="raw_cal", type=str, required=True)
 
-log = logging.getLogger(__name__)
+    argparser.add_argument("--log", help="log_file", type=str)
 
-t0 = time.time()
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-conf = LegendMetadata(path=args.configs)
-configs = conf.on(args.timestamp, system=args.datatype)
-dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["processing_chain"][
-    args.channel
-]
-peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][args.channel]
+    argparser.add_argument("--peak_file", help="peak_file", type=str, required=True)
+    args = argparser.parse_args()
 
-peak_dict = Props.read_from(opt_json)
-db_dict = Props.read_from(args.decay_const)
+    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+    logging.getLogger("numba").setLevel(logging.INFO)
+    logging.getLogger("parse").setLevel(logging.INFO)
+    logging.getLogger("lgdo").setLevel(logging.INFO)
+    logging.getLogger("h5py").setLevel(logging.INFO)
+    logging.getLogger("matplotlib").setLevel(logging.INFO)
+    logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
 
-if opt_dict.pop("run_selection") is True:
-    with open(args.raw_filelist) as f:
-        files = f.read().splitlines()
 
-    raw_files = sorted(files)
+    log = logging.getLogger(__name__)
+    sto = lh5.LH5Store()
+    t0 = time.time()
 
-    # get pulser mask from tcm files
-    with open(args.tcm_filelist) as f:
-        tcm_files = f.read().splitlines()
-    tcm_files = sorted(np.unique(tcm_files))
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"]
-    )
+    conf = LegendMetadata(path=args.configs)
+    configs = conf.on(args.timestamp, system=args.datatype)
+    dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["processing_chain"][
+        args.channel
+    ]
+    peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][args.channel]
 
-    sto = lh5.LH5Store()
-    idx_events, idx_list = om.event_selection(
-        raw_files,
-        f"{args.channel}/raw",
-        dsp_config,
-        db_dict,
-        peaks_keV,
-        np.arange(0, len(peaks_keV), 1).tolist(),
-        kev_widths,
-        pulser_mask=mask,
-        cut_parameters=peak_dict["cut_parameters"],
-        n_events=peak_dict["n_events"],
-        threshold=peak_dict["threshold"],
-        wf_field=peak_dict["wf_field"],
-    )
-
-    tb_data = sto.read(
-        f"{args.channel}/raw",
-        raw_files,
-        idx=idx_events,
-        n_rows=opt_dict["n_events"],
-    )[0]
+    peak_dict = Props.read_from(peak_json)
+    db_dict = Props.read_from(args.decay_const)
 
     pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True)
-    sto.write(
-        tb_data,
-        name="raw",
-        lh5_file=args.peak_file,
-        wo_mode="overwrite",
-    )
-else:
-    pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True)
-    pathlib.Path(args.peak_file).touch()
\ No newline at end of file
+    if peak_dict.pop("run_selection") is True:
+
+        rng = np.random.default_rng()
+        rand_num = f"{rng.integers(0,99999):05d}"
+        temp_output = f"{args.peak_file}.{rand_num}"
+
+
+        with open(args.raw_filelist) as f:
+            files = f.read().splitlines()
+        raw_files = sorted(files)
+
+        if args.pulser_file:
+            with open(args.pulser_file) as f:
+                pulser_dict = json.load(f)
+            mask = np.array(pulser_dict["mask"])
+
+        elif args.tcm_filelist:
+            # get pulser mask from tcm files
+            with open(args.tcm_filelist) as f:
+                tcm_files = f.read().splitlines()
+            tcm_files = sorted(np.unique(tcm_files))
+            ids, mask = get_tcm_pulser_ids(
+                tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"]
+            )
+        else:
+            raise ValueError("No pulser file or tcm filelist provided") 
+
+        raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"]
+
+        peaks_kev = peak_dict["peaks"]
+        kev_widths = peak_dict["kev_widths"]
+        cut_parameters = peak_dict["cut_parameters"]
+        n_events = peak_dict["n_events"]
+        final_cut_field = peak_dict["final_cut_field"]
+        energy_parameter = peak_dict.get("energy_parameter", "trapTmax")
+
+        lh5_path = f"{args.channel}/raw"
+
+
+        if not isinstance(kev_widths, list):
+            kev_widths = [kev_widths]
+
+        if lh5_path[-1] != "/":
+            lh5_path += "/"
+
+        raw_fields = [
+            field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)
+        ]
+
+        tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy"])[0]
+
+        for outname, info in raw_dict.items():
+            outcol = tb.eval(info["expression"], info.get("parameters", None))
+            tb.add_column(outname, outcol)
+
+        rough_energy = tb["daqenergy_cal"].nda
+
+        masks = {}
+        for peak, kev_width in zip(peaks_kev,kev_widths) :
+            e_mask = (rough_energy > peak - 1.1* kev_width[0]) & (rough_energy < peak + 1.1* kev_width[0]) & (~mask)
+            masks[peak] = np.where(e_mask)[0]
+            log.debug(f"{len(masks[peak])} events found in energy range for {peak}")
+
+        input_data = sto.read(f"{lh5_path}", raw_files,  n_rows=10000)[0]
+
+        if isinstance(dsp_config, str):
+            dsp_config = Props.read_from(dsp_config)
+
+        dsp_config["outputs"] = get_keys(dsp_config["outputs"], cut_parameters) + [
+            energy_parameter
+        ]
+
+        log.debug("Processing data")
+        tb_data = run_one_dsp(input_data, dsp_config, db_dict=db_dict)
+
+        if cut_parameters is not None:
+            cut_dict = generate_cuts(tb_data, cut_parameters)
+            log.debug(f"Cuts are calculated: {cut_dict}")
+        else:
+            cut_dict = None
+
+        pk_dicts = {}
+        for peak, kev_width in zip(peaks_kev,kev_widths):
+            pk_dicts[peak] = {"idxs":(masks[peak],), 
+                            "n_rows_read":0,
+                            "obj_buf_start":0,
+                            "obj_buf":None,
+                            "kev_width":kev_width
+                            }
+
+        for i,file in enumerate(raw_files):
+            log.debug(os.path.basename(file))
+            for peak, peak_dict in pk_dicts.items():
+                if peak_dict["idxs"] is not None:
+                    # idx is a long continuous array
+                    n_rows_i = sto.read_n_rows(lh5_path, file)
+                    # find the length of the subset of idx that contains indices
+                    # that are less than n_rows_i
+                    n_rows_to_read_i = bisect_left(peak_dict["idxs"][0], n_rows_i)
+                    # now split idx into idx_i and the remainder
+                    idx_i = (peak_dict["idxs"][0][:n_rows_to_read_i],)
+                    peak_dict["idxs"] = (peak_dict["idxs"][0][n_rows_to_read_i:] - n_rows_i,)
+                    if len(idx_i[0])>0:
+                        peak_dict["obj_buf"], n_rows_read_i = sto.read(
+                            lh5_path,
+                            file,
+                            start_row=0,
+                            idx=idx_i,
+                            obj_buf=peak_dict["obj_buf"],
+                            obj_buf_start=peak_dict["obj_buf_start"],
+                        )
+
+                        peak_dict["n_rows_read"] += n_rows_read_i
+                        log.debug(f'{peak}: {peak_dict["n_rows_read"]}')
+                        peak_dict["obj_buf_start"] += n_rows_read_i
+                    if peak_dict["n_rows_read"] >=10000 or file ==raw_files[-1]:
+                        if "e_lower_lim" not in peak_dict:
+
+                            tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict)
+                            energy = tb_out[energy_parameter].nda
+
+                            hist, bins, var = pgh.get_hist(
+                                energy,
+                                range=(np.floor(np.nanmin(energy)), np.ceil(np.nanmax(energy))),
+                                dx=peak / (np.nanpercentile(energy, 50)),
+                            )
+                            peak_loc = pgh.get_bin_centers(bins)[np.nanargmax(hist)]
+
+                            mu, _, _ = pgc.hpge_fit_energy_peak_tops(
+                                hist,
+                                bins,
+                                var,
+                                [peak_loc],
+                                n_to_fit=7,
+                            )[
+                                0
+                            ][0]
+
+                            if mu is None or np.isnan(mu):
+                                log.debug("Fit failed, using max guess")
+                                rough_adc_to_kev = peak / peak_loc
+                                e_lower_lim = peak_loc - (1.5 * peak_dict["kev_width"][0]) / rough_adc_to_kev
+                                e_upper_lim = peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev
+                                hist, bins, var = pgh.get_hist(
+                                    energy, range=(int(e_lower_lim), int(e_upper_lim)), dx=1
+                                )
+                                mu = pgh.get_bin_centers(bins)[np.nanargmax(hist)]
+
+                            updated_adc_to_kev = peak / mu
+                            e_lower_lim = mu - (peak_dict["kev_width"][0]) / updated_adc_to_kev
+                            e_upper_lim = mu + (peak_dict["kev_width"][1]) / updated_adc_to_kev
+                            log.info(f"{peak}: lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}")
+                            peak_dict["e_lower_lim"] = e_lower_lim
+                            peak_dict["e_upper_lim"] = e_upper_lim
+                            peak_dict["ecal_par"] = updated_adc_to_kev
+
+                            out_tbl, n_wfs = get_out_data(peak_dict["obj_buf"], 
+                                                        tb_out, 
+                                                        cut_dict, 
+                                                        e_lower_lim, 
+                                                        e_upper_lim,
+                                                        peak_dict["ecal_par"],
+                                                        raw_dict,
+                                                        int(peak),
+                                                        final_cut_field=final_cut_field,
+                                                        energy_param=energy_parameter
+                                                        )
+                            sto.write(out_tbl ,name= lh5_path,
+                                    lh5_file=temp_output,
+                                    wo_mode="a")
+                            peak_dict["obj_buf"] = None
+                            peak_dict["obj_buf_start"] = 0
+                            peak_dict["n_events"] = n_wfs
+                        else:
+                            tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict)
+                            out_tbl, n_wfs = get_out_data(peak_dict["obj_buf"], 
+                                                        tb_out, 
+                                                        cut_dict, 
+                                                        peak_dict["e_lower_lim"], 
+                                                        peak_dict["e_upper_lim"],
+                                                        peak_dict["ecal_par"],
+                                                        raw_dict,
+                                                        int(peak),
+                                                        final_cut_field=final_cut_field,
+                                                        energy_param=energy_parameter
+                                                        )
+                            peak_dict["n_events"] += n_wfs
+                            sto.write(out_tbl ,name= lh5_path,
+                                    lh5_file=temp_output,
+                                    wo_mode="a")
+                            peak_dict["obj_buf"] = None
+                            peak_dict["obj_buf_start"] = 0
+                            if peak_dict["n_events"] >= n_events:
+                                peak_dict["idxs"] = None
+                                log.debug(f"{peak} has reached the required number of events")
+                                log.debug(f"{peak}: {peak_dict['idxs']}, {peak_dict['idxs'] is not None}")
+
+    else:
+        pathlib.Path(temp_output).touch()
+
+    os.rename(temp_output, args.peak_file)
\ No newline at end of file
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index bed75bf..d412e92 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -16,7 +16,7 @@
 import pygama.pargen.noise_optimization as pno
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.cuts import generate_cuts, get_cut_indexes
+from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
 
 sto = lh5.LH5Store()
@@ -76,7 +76,7 @@
 
     log.info(f"Select baselines {len(tb_data)}")
     dsp_data = run_one_dsp(tb_data, dsp_config)
-    cut_dict = generate_cuts(dsp_data, parameters=opt_dict.pop("cut_pars"))
+    cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars"))
     cut_idxs = get_cut_indexes(dsp_data, cut_dict)
     tb_data = sto.read(
         f"{args.channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs]
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 04d4cdc..0c150d2 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -14,8 +14,9 @@
 import numpy as np
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.extract_tau import dsp_preprocess_decay_const
-from pygama.pargen.utils import get_tcm_pulser_ids
+from pygama.pargen.extract_tau import ExtractTau
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids, get_cut_indexes
+from pygama.pargen.dsp_optimize import run_one_dsp
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
@@ -25,6 +26,9 @@
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
 argparser.add_argument("--plot_path", help="plot path", type=str, required=False)
 argparser.add_argument("--output_file", help="output file", type=str, required=True)
+
+argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False)
+
 argparser.add_argument("--raw_files", help="input files", nargs="*", type=str)
 argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str)
 args = argparser.parse_args()
@@ -38,6 +42,7 @@
 logging.getLogger("legendmeta").setLevel(logging.INFO)
 
 sto = lh5.LH5Store()
+log = logging.getLogger(__name__)
 
 configs = LegendMetadata(path=args.configs)
 config_dict = configs.on(args.timestamp, system=args.datatype)
@@ -49,6 +54,7 @@
 kwarg_dict = Props.read_from(kwarg_dict)
 
 if kwarg_dict["run_tau"] is True:
+    dsp_config = Props.read_from(channel_dict)
     kwarg_dict.pop("run_tau")
     if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist":
         input_file = args.raw_files[0]
@@ -81,17 +87,39 @@
         n_rows=kwarg_dict.pop("n_events"),
     )[0]
 
-    out_dict, plot_dict = dsp_preprocess_decay_const(
-        tb_data, channel_dict, **kwarg_dict, display=1
-    )
+    tb_out = run_one_dsp(tb_data, dsp_config)
+    log.debug("Processed Data")
+    cut_parameters = kwarg_dict.get("cut_parameters", None)
+    if cut_parameters is not None:
+        idxs = get_cut_indexes(tb_out, cut_parameters=cut_parameters)
+        log.debug("Applied cuts")
+        log.debug(f"{len(idxs)} events passed cuts")
+    else:
+        idxs = np.full(len(tb_out), True, dtype=bool)
+
+    tau = ExtractTau(dsp_config, kwarg_dict["wf_field"])
+    slopes = tb_out["tail_slope"].nda
+    log.debug("Calculating pz constant")
+    
+    tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]])        
 
     if args.plot_path:
         pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
+
+        plot_dict = tau.plot_waveforms_after_correction(tb_data, "wf_pz", 
+        norm_param=kwarg_dict.get("norm_param", "pz_mean"))
+        plot_dict.update(tau.plot_slopes(slopes[idxs]))
+
         with open(args.plot_path, "wb") as f:
             pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
 else:
     out_dict = {}
 
+if args.pulser_file:
+    pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True)
+    with open(args.pulser_file, "w") as f:
+        json.dump({"idxs": ids.tolist(), "mask": mask.tolist()} , f, indent=4)
+
 pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
 with open(args.output_file, "w") as f:
-    json.dump(out_dict, f, indent=4)
+    json.dump(tau.output_dict, f, indent=4)
\ No newline at end of file

From eb973d69d364db220d4ee6bfaff0707edee8c1d9 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:27:53 +0100
Subject: [PATCH 021/103] first version

---
 scripts/par_psp.py | 83 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 66 insertions(+), 17 deletions(-)

diff --git a/scripts/par_psp.py b/scripts/par_psp.py
index 7ef0fad..400edbc 100644
--- a/scripts/par_psp.py
+++ b/scripts/par_psp.py
@@ -3,10 +3,12 @@
 import os
 import pathlib
 from legendmeta.catalog import Props
+from legendmeta import LegendMetadata
 from util.FileKey import ChannelProcKey
 import numpy as np
-import matplotlib.pyplot as pyplot
+import matplotlib.pyplot as plt
 import matplotlib as mpl 
+import matplotlib.dates as mdates
 from datetime import datetime
 import pickle as pkl
 mpl.use("Agg")
@@ -19,13 +21,20 @@
 argparser.add_argument("--out_plots", help="output plot files", nargs="*", type=str, required=False)
 argparser.add_argument("--in_obj", help="input object files", nargs="*", type=str, required=False)
 argparser.add_argument("--out_obj", help="output object files", nargs="*", type=str, required=False)
+
+argparser.add_argument("--log", help="log_file", type=str)
+argparser.add_argument("--configs", help="configs", type=str, required=True)
+
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--channel", help="Channel", type=str, required=True)
 args = argparser.parse_args()
 
 conf = LegendMetadata(path=args.configs)
 configs = conf.on(args.timestamp, system=args.datatype)
-merge_config = configs["snakemake_rules"]["pars_psp"]["inputs"]["config"][
+merge_config = Props.read_from(configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][
     args.channel
-]
+])
 
 ave_fields = merge_config["average_fields"]
 
@@ -44,22 +53,61 @@
         for key in keys:
             val = val[key]
         vals.append(val)
+        if "dsp" in in_dicts[tstamp]:
+            tmp_dict = in_dicts[tstamp]["dsp"]
+        else:
+            tmp_dict = {}
+            in_dicts[tstamp]["dsp"] = tmp_dict
+        for i,key in enumerate(keys):
+            if i == len(keys)-1:
+                tmp_dict[key] = val
+            else:
+                if key in tmp_dict:
+                    tmp_dict = tmp_dict[key]
+                else:
+                    tmp_dict[key] = {}
+                    tmp_dict = tmp_dict[key]
+    if isinstance(vals[0], str):
+        if "*" in vals[0]:
+            unit = vals[0].split("*")[1]
+            if "." in vals[0]:
+                rounding = len(val.split("*")[0].split(".")[-1])
+            else:
+                rounding = 16
+            vals = np.array([float(val.split("*")[0]) for val in vals])
+        else:
+            unit = None
+            rounding = 16
+    else:
+        vals=np.array(vals)
+        unit = None
     if len(vals[~np.isnan(vals)]) ==0:
-        mean = np.nan 
+        mean_val = np.nan 
     else:
-        mean = np.nanmean(vals)
+        mean_val = np.nanmean(vals)
+    if unit is not None:
+        mean = f"{round(mean_val, rounding)}*{unit}"
+    else:
+        mean = mean_val
     for tstamp in in_dicts:
         val = in_dicts[tstamp]
-        for key in keys:
-            val = val[key]
-        val = mean
-
+        for i, key in enumerate(keys):
+            if i == len(keys)-1:
+                val[key]= mean
+            else:
+                val = val[key]
+       
     fig = plt.figure()
     plt.scatter([datetime.strptime(tstamp,'%Y%m%dT%H%M%SZ') for tstamp in in_dicts] , vals)
-    plt.axhline(y=mean, color='r', linestyle='-')
+    plt.axhline(y=mean_val, color='r', linestyle='-')
     plt.xlabel("time")
-    plt.ylabel("value")
-    plt.title(f"{field} over time")
+    if unit is not None:
+        plt.ylabel(f"value {unit}")
+    else:
+        plt.ylabel("value")
+    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%y'))
+    plt.gcf().autofmt_xdate()
+    plt.title(f"{field}")
     plot_dict[field] = fig
     plt.close()
 
@@ -78,11 +126,12 @@
                     with open(infile, "rb") as f:
                         old_plot_dict = pkl.load(f)
                     break
-            new_plot_dict = old_plot_dict.update({"psp": plot_dict})
+            old_plot_dict.update({"psp": plot_dict})
+            new_plot_dict = old_plot_dict
         else:
             new_plot_dict = {"psp": plot_dict}
-        with open(file, "w") as f:
-            pkl.dump(new_plot_dict, file, protocol=pkl.HIGHEST_PROTOCOL)
+        with open(file, "wb") as f:
+            pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
 if args.out_obj:
     for file in args.out_obj:
@@ -96,5 +145,5 @@
             new_obj_dict = old_obj_dict
         else:
             new_obj_dict = {}
-        with open(file, "w") as f:
-            pkl.dump(new_obj_dict, file, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file
+        with open(file, "wb") as f:
+            pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file

From b160497382f2813afeccf916cc87e21930e53cd0 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:28:31 +0100
Subject: [PATCH 022/103] fix pht bug, change to os.removes

---
 scripts/complete_run.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index b266d50..f5e900d 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -148,7 +148,9 @@ def build_valid_keys(input_files, output_dir):
         with open(out_file, "w") as w:
             w.write(out_string)
 
-    os.system(f"rm {input_files}")
+    for input_file in input_files:
+        if os.path.isfile(input_file):
+            os.remove(input_file)
 
 
 def build_file_dbs(input_files, output_dir):
@@ -188,7 +190,7 @@ def build_file_dbs(input_files, output_dir):
                 ut.tier_hit_path(setup), ""
             ),
             "pht": pat.get_pattern_tier(setup, "pht", check_in_cycle=False).replace(
-                ut.tier_hit_path(setup), ""
+                ut.tier_pht_path(setup), ""
             ),
             "evt": pat.get_pattern_tier(setup, "evt", check_in_cycle=False).replace(
                 ut.tier_evt_path(setup), ""
@@ -233,7 +235,7 @@ def build_file_dbs(input_files, output_dir):
                 ut.tier_hit_path(setup), ""
             ),
             "pht": pat.get_pattern_tier(setup, "pht", check_in_cycle=False).replace(
-                ut.tier_hit_path(setup), ""
+                ut.tier_pht_path(setup), ""
             ),
             "evt": pat.get_pattern_tier(setup, "evt", check_in_cycle=False).replace(
                 ut.tier_evt_path(setup), ""
@@ -269,8 +271,9 @@ def build_file_dbs(input_files, output_dir):
         json.dump(file_db_config, w, indent=2)
 
     build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path)
-    os.system(f"rm {os.path.join(snakemake.params.filedb_path, 'file_db_config.json')}")
+    os.remove(os.path.join(snakemake.params.filedb_path, 'file_db_config.json'))
 
     build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path)
 
 pathlib.Path(snakemake.output.gen_output).touch()
+

From 1dea790df9f6d9bea9231f7940e296f86e62dfd8 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:29:24 +0100
Subject: [PATCH 023/103] increase delta

---
 scripts/check_blinding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index 550f5a8..4829608 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -71,7 +71,7 @@
 
 # bin with 1 keV bins and get maxs
 hist, bins, var = get_hist(daqenergy_cal, np.arange(0, 3000, 1))
-maxs = get_i_local_maxima(hist, delta=5)
+maxs = get_i_local_maxima(hist, delta=25)
 log.info(f"peaks found at : {maxs}")
 
 # plot the energy spectrum to check calibration

From 8af1000ca7aba4bad84e8ab3bdde8d4314b4a702 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:29:49 +0100
Subject: [PATCH 024/103] first version psp

---
 rules/psp.smk | 79 ++++++++++++++++++++++++++-------------------------
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/rules/psp.smk b/rules/psp.smk
index 811893e..df9b7be 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -7,8 +7,8 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 """
 
 from scripts.util.pars_loading import pars_catalog
-import scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import par_psp_path, set_last_rule_name
+from scripts.util.create_pars_keylist import pars_key_resolve
+from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -27,20 +27,19 @@ pars_key_resolve.write_par_catalog(
     {"cal": ["par_psp"], "lar": ["par_psp"]},
 )
 
-part_pht_rules = {}
+psp_rules = {}
 for key, dataset in part.datasets.items():
     for partition in dataset.keys():
-
         rule:
             input:
                 dsp_pars=part.get_par_files(
-                    f"{par_psp_path(setup)}/validity.jsonl",
+                    f"{par_dsp_path(setup)}/validity.jsonl",
                     partition,
                     key,
                     tier="dsp"
                 ),
                 dsp_objs=part.get_par_files(
-                    f"{par_psp_path(setup)}/validity.jsonl",
+                    f"{par_dsp_path(setup)}/validity.jsonl",
                     partition,
                     key,
                     tier="dsp",
@@ -48,7 +47,7 @@ for key, dataset in part.datasets.items():
                     extension="pkl",
                 ),
                 dsp_plots=part.get_plt_files(
-                    f"{par_psp_path(setup)}/validity.jsonl",
+                    f"{par_dsp_path(setup)}/validity.jsonl",
                     partition,
                     key,
                     tier="dsp"
@@ -62,26 +61,26 @@ for key, dataset in part.datasets.items():
                     f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp"
                 ),
             output:
-                psp_pars=part.get_par_files(
+                psp_pars=temp(part.get_par_files(
                     f"{par_psp_path(setup)}/validity.jsonl",
                     partition,
                     key,
                     tier="psp"
-                ),
-                psp_objs=part.get_par_files(
+                )),
+                psp_objs=temp(part.get_par_files(
                     f"{par_psp_path(setup)}/validity.jsonl",
                     partition,
                     key,
                     tier="psp",
                     name="objects",
                     extension="pkl",
-                ),
-                psp_plots=part.get_plt_files(
+                )),
+                psp_plots=temp(part.get_plt_files(
                     f"{par_psp_path(setup)}/validity.jsonl",
                     partition,
                     key,
                     tier="psp"
-                ),
+                )),
             log:
                 part.get_log_file(
                     f"{par_psp_path(setup)}/validity.jsonl",
@@ -103,44 +102,40 @@ for key, dataset in part.datasets.items():
                 "--timestamp {params.timestamp} "
                 "--channel {params.channel} "
                 "--in_plots {input.dsp_plots} "
-                "--out_plots {input.psp_plots} "
+                "--out_plots {output.psp_plots} "
                 "--in_obj {input.dsp_objs} "
-                "--out_obj {input.psp_objs} "
-                "--input {input.plot_files} "
-                "--output {input.dsp_plots} "
+                "--out_obj {output.psp_objs} "
+                "--input {input.dsp_pars} "
+                "--output {output.psp_pars} "
 
         set_last_rule_name(
             workflow, f"{key}-{partition}-build_par_psp"
         )
 
-        if key in part_pht_rules:
-            part_pht_rules[key].append(list(workflow.rules)[-1])
+        if key in psp_rules:
+            psp_rules[key].append(list(workflow.rules)[-1])
         else:
-            part_pht_rules[key] = [list(workflow.rules)[-1]]
+            psp_rules[key] = [list(workflow.rules)[-1]]
 
 
 # Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
 # This rule builds the a/e calibration using the calibration dsp files for the whole partition
-rule build_psp:
+rule build_par_psp:
     input:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
-        dsp_objs=temp(
-            get_pattern_pars_tmp_channel(
+        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp"),
+        dsp_objs=get_pattern_pars_tmp_channel(
                 setup, "dsp", "objects", extension="pkl"
-            )
-        ),
-        dsp_plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")),
+            ),
+        dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"),
     params:
         datatype="cal",
         channel="{channel}",
         timestamp="{timestamp}",
     output:
         psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
-        psp_objs=temp(
-            get_pattern_pars_tmp_channel(
+        psp_objs=temp(get_pattern_pars_tmp_channel(
                 setup, "psp", "objects", extension="pkl"
-            )
-        ),
+        )),
         psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")),
     log:
         get_pattern_log_channel(setup, "pars_psp"),
@@ -157,12 +152,20 @@ rule build_psp:
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
         "--in_plots {input.dsp_plots} "
-        "--out_plots {input.psp_plots} "
+        "--out_plots {output.psp_plots} "
         "--in_obj {input.dsp_objs} "
-        "--out_obj {input.psp_objs} "
-        "--input {input.plot_files} "
-        "--output {input.dsp_plots} "
-        
+        "--out_obj {output.psp_objs} "
+        "--input {input.dsp_pars} "
+        "--output {output.psp_pars} "
+
+fallback_psp_rule = list(workflow.rules)[-1]
+rule_order_list = []
+ordered = OrderedDict(psp_rules)
+ordered.move_to_end("default")
+for key, items in ordered.items():
+    rule_order_list += [item.name for item in items]
+rule_order_list.append(fallback_psp_rule.name)
+workflow._ruleorder.add(*rule_order_list)  # [::-1]  
 
 
 rule build_pars_psp_objects:
@@ -187,7 +190,7 @@ rule build_pars_psp_objects:
         "--input {input} "
         "--output {output} "
 
-rule build_plts_pht:
+rule build_plts_psp:
     input:
         lambda wildcards: read_filelist_plts_cal_channel(wildcards, "psp"),
     output:
@@ -200,7 +203,7 @@ rule build_plts_pht:
         "--input {input} "
         "--output {output} "
 
-rule build_pars_pht:
+rule build_pars_psp:
     input:
         infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"),
         plts = get_pattern_plts(setup, "psp"),

From d9f7bd898aa51e0ecfbc43fad51f7bc5c5299f4e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:30:17 +0100
Subject: [PATCH 025/103] new event selection and get pulser mask from tau
 script

---
 rules/dsp.smk | 65 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 002496f..349b67b 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -20,6 +20,15 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
+onstart:
+    if os.path.isfile(os.path.join(pars_path(setup), "dsp", "validity.jsonl")):
+        os.remove(os.path.join(pars_path(setup), "dsp", "validity.jsonl"))
+    ds.pars_key_resolve.write_par_catalog(
+        ["-*-*-*-cal"],
+        os.path.join(pars_path(setup), "dsp", "validity.jsonl"),
+        get_pattern_tier_raw(setup),
+        {"cal": ["par_dsp"], "lar": ["par_dsp"]},
+    )
 
 rule build_pars_dsp_tau:
     input:
@@ -34,6 +43,7 @@ rule build_pars_dsp_tau:
     output:
         decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")),
         plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")),
+        pulser=temp(get_pattern_pars_tmp_channel(setup, "dsp", "pulser_ids")),
     log:
         get_pattern_log_channel(setup, "par_dsp_decay_constant"),
     group:
@@ -50,9 +60,44 @@ rule build_pars_dsp_tau:
         "--channel {params.channel} "
         "--plot_path {output.plots} "
         "--output_file {output.decay_const} "
+        "--pulser_file {output.pulser} "
         "--tcm_files {input.tcm_files} "
         "--raw_files {input.files}"
 
+rule build_pars_event_selection:
+    input:
+        files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
+        ),
+        pulser_file=get_pattern_pars_tmp_channel(setup, "dsp", "pulser_ids"),
+        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
+        raw_cal=get_blinding_curve_file,
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")),
+    log:
+        get_pattern_log_channel(setup, "par_dsp_event_selection"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/pars_dsp_event_selection.py')} "
+        "--configs {configs} "
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--peak_file {output.peak_file} "
+        "--pulser_file {input.pulser_file} "
+        "--decay_const {input.database} "
+        "--raw_cal {input.raw_cal} "
+        "--raw_filelist {input.files}"
+
 
 # This rule builds the optimal energy filter parameters for the dsp using fft files
 rule build_pars_dsp_nopt:
@@ -98,12 +143,7 @@ rule build_pars_dsp_dplms:
         fft_files=os.path.join(
             filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
         ),
-        cal_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
-        ),
-        tcm_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
-        ),
+        peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"),
         database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"),
         inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"),
     params:
@@ -124,8 +164,7 @@ rule build_pars_dsp_dplms:
         "{swenv} python3 -B "
         f"{workflow.source_path('../scripts/pars_dsp_dplms.py')} "
         "--fft_raw_filelist {input.fft_files} "
-        "--cal_raw_filelist {input.cal_files} "
-        "--tcm_filelist {input.tcm_files} "
+        "--peak_file {input.peak_file} "
         "--database {input.database} "
         "--inplots {input.inplots} "
         "--configs {configs} "
@@ -140,12 +179,7 @@ rule build_pars_dsp_dplms:
 # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
 rule build_pars_dsp_eopt:
     input:
-        files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
-        ),
-        tcm_filelist=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
-        ),
+        peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"),
         decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"),
         inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"),
     params:
@@ -172,8 +206,7 @@ rule build_pars_dsp_eopt:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--raw_filelist {input.files} "
-        "--tcm_filelist {input.tcm_filelist} "
+        "--peak_file {input.peak_file} "
         "--inplots {input.inplots} "
         "--decay_const {input.decay_const} "
         "--plot_path {output.plots} "

From 11b4dddbcf0fcf24fae29d0fe667fcaee552f265 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:30:58 +0100
Subject: [PATCH 026/103] include psp, move validity generation to rules, rms
 -> os.removes

---
 Snakefile | 63 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/Snakefile b/Snakefile
index 279f320..e71e7ad 100644
--- a/Snakefile
+++ b/Snakefile
@@ -10,7 +10,7 @@ This includes:
 - the same for partition level tiers
 """
 
-import pathlib, os, json, sys
+import pathlib, os, json, sys, glob
 import scripts.util as ds
 from scripts.util.pars_loading import pars_catalog
 from scripts.util.patterns import get_pattern_tier_raw
@@ -21,6 +21,7 @@ from scripts.util.utils import (
     chan_map_path,
     filelist_path,
     metadata_path,
+    tmp_log_path,
 )
 from datetime import datetime
 from collections import OrderedDict
@@ -55,6 +56,7 @@ include: "rules/tcm.smk"
 include: "rules/dsp.smk"
 include: "rules/hit.smk"
 include: "rules/pht.smk"
+include: "rules/psp.smk"
 include: "rules/evt.smk"
 include: "rules/skm.smk"
 include: "rules/blinding_calibration.smk"
@@ -66,39 +68,10 @@ localrules:
 
 onstart:
     print("Starting workflow")
-    shell(f"rm {pars_path(setup)}/dsp/validity.jsonl || true")
-    shell(f"rm {pars_path(setup)}/hit/validity.jsonl || true")
-    shell(f"rm {pars_path(setup)}/pht/validity.jsonl || true")
-    shell(f"rm {pars_path(setup)}/raw/validity.jsonl || true")
-    ds.pars_key_resolve.write_par_catalog(
-        ["-*-*-*-cal"],
-        os.path.join(pars_path(setup), "raw", "validity.jsonl"),
-        get_pattern_tier_raw(setup),
-        {"cal": ["par_raw"]},
-    )
-    ds.pars_key_resolve.write_par_catalog(
-        ["-*-*-*-cal"],
-        os.path.join(pars_path(setup), "dsp", "validity.jsonl"),
-        get_pattern_tier_raw(setup),
-        {"cal": ["par_dsp"], "lar": ["par_dsp"]},
-    )
-    ds.pars_key_resolve.write_par_catalog(
-        ["-*-*-*-cal"],
-        os.path.join(pars_path(setup), "hit", "validity.jsonl"),
-        get_pattern_tier_raw(setup),
-        {"cal": ["par_hit"], "lar": ["par_hit"]},
-    )
-    ds.pars_key_resolve.write_par_catalog(
-        ["-*-*-*-cal"],
-        os.path.join(pars_path(setup), "pht", "validity.jsonl"),
-        get_pattern_tier_raw(setup),
-        {"cal": ["par_pht"], "lar": ["par_pht"]},
-    )
-
 
 onsuccess:
     from snakemake.report import auto_report
-
+    
     rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
     pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True)
     # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html")
@@ -109,8 +82,32 @@ onsuccess:
         f.writelines(str(workflow.persistence.dag.rule_dot()))
         # shell(f"cat {rep_dir}/rg.txt | dot -Tpdf > {rep_dir}/rg.pdf")
     print("Workflow finished, no error")
-    shell("rm *.gen || true")
-    shell(f"rm {filelist_path(setup)}/* || true")
+    
+    # remove .gen files
+    files = glob.glob("*.gen")
+    for file in files:
+        if os.path.isfile(file):
+            os.remove(file)
+
+    # remove filelists
+    files = glob.glob(os.path.join(filelist_path(setup), "*"))
+    for file in files:
+        if os.path.isfile(file):
+            os.remove(file)
+    if os.path.exists(filelist_path(setup)):
+        os.rmdir(filelist_path(setup))
+    
+    # remove logs
+    files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
+    for file in files:
+        if os.path.isfile(file):
+            os.remove(file)
+    dirs = glob.glob(os.path.join(tmp_log_path(setup), "*"))
+    for d in dirs:
+        if os.path.isdir(d):
+            os.rmdir(d)
+    if os.path.exists(tmp_log_path(setup)):
+        os.rmdir(tmp_log_path(setup))
 
 
 # Placeholder, can email or maybe put message in slack

From c17107e99a2a71589532813f9f1544478cd0b1ee Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 21 Mar 2024 00:41:42 +0100
Subject: [PATCH 027/103] pre-commit fixes

---
 Snakefile                           |  13 +-
 rules/dsp.smk                       |  40 +++--
 rules/psp.smk                       |  90 ++++++------
 scripts/complete_run.py             |   3 +-
 scripts/merge_channels.py           |  53 ++++---
 scripts/par_psp.py                  |  69 +++++----
 scripts/pars_dsp_dplms.py           |  42 +++---
 scripts/pars_dsp_eopt.py            |  35 ++---
 scripts/pars_dsp_event_selection.py | 219 +++++++++++++++-------------
 scripts/pars_dsp_tau.py             |  17 ++-
 scripts/util/patterns.py            |   6 +-
 scripts/util/utils.py               |  10 +-
 12 files changed, 322 insertions(+), 275 deletions(-)

diff --git a/Snakefile b/Snakefile
index e71e7ad..c5149e8 100644
--- a/Snakefile
+++ b/Snakefile
@@ -41,6 +41,7 @@ swenv = runcmd(setup)
 part = ds.dataset_file(setup, os.path.join(configs, "partitions.json"))
 basedir = workflow.basedir
 
+
 wildcard_constraints:
     experiment="\w+",
     period="\w+",
@@ -61,6 +62,7 @@ include: "rules/evt.smk"
 include: "rules/skm.smk"
 include: "rules/blinding_calibration.smk"
 
+
 localrules:
     gen_filelist,
     autogen_output,
@@ -69,9 +71,10 @@ localrules:
 onstart:
     print("Starting workflow")
 
+
 onsuccess:
     from snakemake.report import auto_report
-    
+
     rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
     pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True)
     # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html")
@@ -82,22 +85,22 @@ onsuccess:
         f.writelines(str(workflow.persistence.dag.rule_dot()))
         # shell(f"cat {rep_dir}/rg.txt | dot -Tpdf > {rep_dir}/rg.pdf")
     print("Workflow finished, no error")
-    
+
     # remove .gen files
     files = glob.glob("*.gen")
     for file in files:
         if os.path.isfile(file):
             os.remove(file)
 
-    # remove filelists
+            # remove filelists
     files = glob.glob(os.path.join(filelist_path(setup), "*"))
     for file in files:
         if os.path.isfile(file):
             os.remove(file)
     if os.path.exists(filelist_path(setup)):
         os.rmdir(filelist_path(setup))
-    
-    # remove logs
+
+        # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 349b67b..5c27f42 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -20,6 +20,7 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
+
 onstart:
     if os.path.isfile(os.path.join(pars_path(setup), "dsp", "validity.jsonl")):
         os.remove(os.path.join(pars_path(setup), "dsp", "validity.jsonl"))
@@ -30,6 +31,7 @@ onstart:
         {"cal": ["par_dsp"], "lar": ["par_dsp"]},
     )
 
+
 rule build_pars_dsp_tau:
     input:
         files=os.path.join(
@@ -64,6 +66,7 @@ rule build_pars_dsp_tau:
         "--tcm_files {input.tcm_files} "
         "--raw_files {input.files}"
 
+
 rule build_pars_event_selection:
     input:
         files=os.path.join(
@@ -151,8 +154,10 @@ rule build_pars_dsp_dplms:
         datatype="cal",
         channel="{channel}",
     output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp",'dplms')),
-        lh5_path=temp(get_pattern_pars_tmp_channel(setup, "dsp","dplms",extension="lh5")),
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")),
+        lh5_path=temp(
+            get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5")
+        ),
         plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")),
     log:
         get_pattern_log_channel(setup, "pars_dsp_dplms"),
@@ -176,6 +181,7 @@ rule build_pars_dsp_dplms:
         "--lh5_path {output.lh5_path} "
         "--plot_path {output.plots} "
 
+
 # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
 rule build_pars_dsp_eopt:
     input:
@@ -213,6 +219,7 @@ rule build_pars_dsp_eopt:
         "--qbb_grid_path {output.qbb_grid} "
         "--final_dsp_pars {output.dsp_pars}"
 
+
 rule build_plts_dsp:
     input:
         lambda wildcards: read_filelist_plts_cal_channel(wildcards, "dsp"),
@@ -226,6 +233,7 @@ rule build_plts_dsp:
         "--input {input} "
         "--output {output} "
 
+
 rule build_pars_dsp_objects:
     input:
         lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_objects_pkl"),
@@ -245,15 +253,18 @@ rule build_pars_dsp_objects:
         "--input {input} "
         "--output {output} "
 
+
 rule build_pars_dsp_db:
     input:
         lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp"),
     output:
-        temp(get_pattern_pars_tmp(
-            setup,
-            "dsp",
-            datatype="cal",
-        )),
+        temp(
+            get_pattern_pars_tmp(
+                setup,
+                "dsp",
+                datatype="cal",
+            )
+        ),
     group:
         "merge-dsp"
     shell:
@@ -262,16 +273,19 @@ rule build_pars_dsp_db:
         "--input {input} "
         "--output {output} "
 
+
 rule build_pars_dsp:
     input:
-        in_files = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "dsp_dplms_lh5"),
-        in_db = get_pattern_pars_tmp(
+        in_files=lambda wildcards: read_filelist_pars_cal_channel(
+            wildcards, "dsp_dplms_lh5"
+        ),
+        in_db=get_pattern_pars_tmp(
             setup,
             "dsp",
             datatype="cal",
         ),
-        plts = get_pattern_plts(setup, "dsp"),
-        objects = get_pattern_pars(
+        plts=get_pattern_plts(setup, "dsp"),
+        objects=get_pattern_pars(
             setup,
             "dsp",
             name="objects",
@@ -279,13 +293,13 @@ rule build_pars_dsp:
             check_in_cycle=check_in_cycle,
         ),
     output:
-        out_file = get_pattern_pars(
+        out_file=get_pattern_pars(
             setup,
             "dsp",
             extension="lh5",
             check_in_cycle=check_in_cycle,
         ),
-        out_db = get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
+        out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
     group:
         "merge-dsp"
     shell:
diff --git a/rules/psp.smk b/rules/psp.smk
index df9b7be..a957e4b 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -30,13 +30,11 @@ pars_key_resolve.write_par_catalog(
 psp_rules = {}
 for key, dataset in part.datasets.items():
     for partition in dataset.keys():
+
         rule:
             input:
                 dsp_pars=part.get_par_files(
-                    f"{par_dsp_path(setup)}/validity.jsonl",
-                    partition,
-                    key,
-                    tier="dsp"
+                    f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp"
                 ),
                 dsp_objs=part.get_par_files(
                     f"{par_dsp_path(setup)}/validity.jsonl",
@@ -47,10 +45,7 @@ for key, dataset in part.datasets.items():
                     extension="pkl",
                 ),
                 dsp_plots=part.get_plt_files(
-                    f"{par_dsp_path(setup)}/validity.jsonl",
-                    partition,
-                    key,
-                    tier="dsp"
+                    f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp"
                 ),
             wildcard_constraints:
                 channel=part.get_wildcard_constraints(partition, key),
@@ -61,26 +56,32 @@ for key, dataset in part.datasets.items():
                     f"{par_psp_path(setup)}/validity.jsonl", partition, key, tier="psp"
                 ),
             output:
-                psp_pars=temp(part.get_par_files(
-                    f"{par_psp_path(setup)}/validity.jsonl",
-                    partition,
-                    key,
-                    tier="psp"
-                )),
-                psp_objs=temp(part.get_par_files(
-                    f"{par_psp_path(setup)}/validity.jsonl",
-                    partition,
-                    key,
-                    tier="psp",
-                    name="objects",
-                    extension="pkl",
-                )),
-                psp_plots=temp(part.get_plt_files(
-                    f"{par_psp_path(setup)}/validity.jsonl",
-                    partition,
-                    key,
-                    tier="psp"
-                )),
+                psp_pars=temp(
+                    part.get_par_files(
+                        f"{par_psp_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="psp",
+                    )
+                ),
+                psp_objs=temp(
+                    part.get_par_files(
+                        f"{par_psp_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="psp",
+                        name="objects",
+                        extension="pkl",
+                    )
+                ),
+                psp_plots=temp(
+                    part.get_plt_files(
+                        f"{par_psp_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="psp",
+                    )
+                ),
             log:
                 part.get_log_file(
                     f"{par_psp_path(setup)}/validity.jsonl",
@@ -108,9 +109,7 @@ for key, dataset in part.datasets.items():
                 "--input {input.dsp_pars} "
                 "--output {output.psp_pars} "
 
-        set_last_rule_name(
-            workflow, f"{key}-{partition}-build_par_psp"
-        )
+        set_last_rule_name(workflow, f"{key}-{partition}-build_par_psp")
 
         if key in psp_rules:
             psp_rules[key].append(list(workflow.rules)[-1])
@@ -123,9 +122,7 @@ for key, dataset in part.datasets.items():
 rule build_par_psp:
     input:
         dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp"),
-        dsp_objs=get_pattern_pars_tmp_channel(
-                setup, "dsp", "objects", extension="pkl"
-            ),
+        dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"),
         dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"),
     params:
         datatype="cal",
@@ -133,9 +130,9 @@ rule build_par_psp:
         timestamp="{timestamp}",
     output:
         psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
-        psp_objs=temp(get_pattern_pars_tmp_channel(
-                setup, "psp", "objects", extension="pkl"
-        )),
+        psp_objs=temp(
+            get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl")
+        ),
         psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")),
     log:
         get_pattern_log_channel(setup, "pars_psp"),
@@ -158,6 +155,7 @@ rule build_par_psp:
         "--input {input.dsp_pars} "
         "--output {output.psp_pars} "
 
+
 fallback_psp_rule = list(workflow.rules)[-1]
 rule_order_list = []
 ordered = OrderedDict(psp_rules)
@@ -165,7 +163,7 @@ ordered.move_to_end("default")
 for key, items in ordered.items():
     rule_order_list += [item.name for item in items]
 rule_order_list.append(fallback_psp_rule.name)
-workflow._ruleorder.add(*rule_order_list)  # [::-1]  
+workflow._ruleorder.add(*rule_order_list)  # [::-1]
 
 
 rule build_pars_psp_objects:
@@ -181,7 +179,7 @@ rule build_pars_psp_objects:
             name="objects",
             extension="dir",
             check_in_cycle=check_in_cycle,
-        )
+        ),
     group:
         "merge-hit"
     shell:
@@ -190,11 +188,12 @@ rule build_pars_psp_objects:
         "--input {input} "
         "--output {output} "
 
+
 rule build_plts_psp:
     input:
         lambda wildcards: read_filelist_plts_cal_channel(wildcards, "psp"),
     output:
-        get_pattern_plts(setup, "psp")
+        get_pattern_plts(setup, "psp"),
     group:
         "merge-hit"
     shell:
@@ -203,17 +202,18 @@ rule build_plts_psp:
         "--input {input} "
         "--output {output} "
 
+
 rule build_pars_psp:
     input:
-        infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"),
-        plts = get_pattern_plts(setup, "psp"),
-        objects = get_pattern_pars(
+        infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"),
+        plts=get_pattern_plts(setup, "psp"),
+        objects=get_pattern_pars(
             setup,
             "psp",
             name="objects",
             extension="dir",
             check_in_cycle=check_in_cycle,
-        )
+        ),
     output:
         get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle),
     group:
@@ -256,4 +256,4 @@ rule build_psp:
         "--input {input.raw_file} "
         "--output {output.tier_file} "
         "--db_file {output.db_file} "
-        "--pars_file {input.pars_file}"
\ No newline at end of file
+        "--pars_file {input.pars_file}"
diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index f5e900d..5829f1a 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -271,9 +271,8 @@ def build_file_dbs(input_files, output_dir):
         json.dump(file_db_config, w, indent=2)
 
     build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path)
-    os.remove(os.path.join(snakemake.params.filedb_path, 'file_db_config.json'))
+    os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json"))
 
     build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path)
 
 pathlib.Path(snakemake.output.gen_output).touch()
-
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index b45d16e..bc8337c 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -4,13 +4,12 @@
 import pathlib
 import pickle as pkl
 import shelve
+
+import lgdo.lh5 as lh5
+import numpy as np
 from legendmeta.catalog import Props
 from util.FileKey import ChannelProcKey
-import numpy as np
-
 
-import lgdo.lh5 as lh5
-from lgdo import Array
 
 def replace_path(d, old_path, new_path):
     if isinstance(d, dict):
@@ -19,23 +18,32 @@ def replace_path(d, old_path, new_path):
     elif isinstance(d, list):
         for i in range(len(d)):
             d[i] = replace_path(d[i], old_path, new_path)
-    elif isinstance(d, str):
-        if old_path in d:
-            d = d.replace(old_path, new_path)
+    elif isinstance(d, str) and old_path in d:
+        d = d.replace(old_path, new_path)
     return d
 
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", nargs="*", type=str, required=True)
 argparser.add_argument("--output", help="output file", type=str, required=True)
-argparser.add_argument("--in_db", help="in db file (used for when lh5 files refered to in db)", type=str, required=False)
-argparser.add_argument("--out_db", help="lh5 file (used for when lh5 files refered to in db)", type=str, required=False)
+argparser.add_argument(
+    "--in_db",
+    help="in db file (used for when lh5 files referred to in db)",
+    type=str,
+    required=False,
+)
+argparser.add_argument(
+    "--out_db",
+    help="lh5 file (used for when lh5 files referred to in db)",
+    type=str,
+    required=False,
+)
 args = argparser.parse_args()
 
-# change to only have 1 output file for mutliple inputs
+# change to only have 1 output file for multiple inputs
 # don't care about processing step, check if extension matches
 
 
-
 channel_files = args.input
 
 file_extension = pathlib.Path(args.output).suffix
@@ -51,7 +59,7 @@ def replace_path(d, old_path, new_path):
 
 pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
 
-    
+
 if file_extension == ".json":
     out_dict = {}
     for channel in channel_files:
@@ -62,7 +70,8 @@ def replace_path(d, old_path, new_path):
             channel_name = fkey.channel
             out_dict[channel_name] = channel_dict
         else:
-            raise RuntimeError("Output file extension does not match input file extension")
+            msg = "Output file extension does not match input file extension"
+            raise RuntimeError(msg)
 
     with open(temp_output, "w") as w:
         json.dump(out_dict, w, indent=4)
@@ -77,7 +86,7 @@ def replace_path(d, old_path, new_path):
         fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
         channel_name = fkey.channel
         out_dict[channel_name] = channel_dict
-    
+
     with open(temp_output, "wb") as w:
         pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
@@ -97,7 +106,7 @@ def replace_path(d, old_path, new_path):
             shelf[channel_name] = channel_dict
         if len(common_dict) > 0:
             shelf["common"] = common_dict
-    
+
 
 elif file_extension == ".lh5":
     sto = lh5.LH5Store()
@@ -109,23 +118,21 @@ def replace_path(d, old_path, new_path):
             fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
             channel_name = fkey.channel
 
-            tb_in = sto.read(
-                f"{channel_name}",
-                channel
-            )[0]
+            tb_in = sto.read(f"{channel_name}", channel)[0]
 
             sto.write(
                 tb_in,
-                name = channel_name,
-                lh5_file = temp_output,
+                name=channel_name,
+                lh5_file=temp_output,
                 wo_mode="a",
             )
             if args.in_db:
                 db_dict[channel_name] = replace_path(db_dict[channel_name], channel, args.output)
         else:
-            raise RuntimeError("Output file extension does not match input file extension")
+            msg = "Output file extension does not match input file extension"
+            raise RuntimeError(msg)
     if args.out_db:
         with open(args.out_db, "w") as w:
             json.dump(db_dict, w, indent=4)
 
-    os.rename(temp_output, out_file)
\ No newline at end of file
+    os.rename(temp_output, out_file)
diff --git a/scripts/par_psp.py b/scripts/par_psp.py
index 400edbc..3b07edf 100644
--- a/scripts/par_psp.py
+++ b/scripts/par_psp.py
@@ -1,16 +1,17 @@
 import argparse
 import json
 import os
-import pathlib
-from legendmeta.catalog import Props
+import pickle as pkl
+from datetime import datetime
+
+import matplotlib as mpl
+import matplotlib.dates as mdates
+import matplotlib.pyplot as plt
+import numpy as np
 from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
 from util.FileKey import ChannelProcKey
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib as mpl 
-import matplotlib.dates as mdates
-from datetime import datetime
-import pickle as pkl
+
 mpl.use("Agg")
 
 
@@ -18,9 +19,13 @@
 argparser.add_argument("--input", help="input files", nargs="*", type=str, required=True)
 argparser.add_argument("--output", help="output file", nargs="*", type=str, required=True)
 argparser.add_argument("--in_plots", help="input plot files", nargs="*", type=str, required=False)
-argparser.add_argument("--out_plots", help="output plot files", nargs="*", type=str, required=False)
+argparser.add_argument(
+    "--out_plots", help="output plot files", nargs="*", type=str, required=False
+)
 argparser.add_argument("--in_obj", help="input object files", nargs="*", type=str, required=False)
-argparser.add_argument("--out_obj", help="output object files", nargs="*", type=str, required=False)
+argparser.add_argument(
+    "--out_obj", help="output object files", nargs="*", type=str, required=False
+)
 
 argparser.add_argument("--log", help="log_file", type=str)
 argparser.add_argument("--configs", help="configs", type=str, required=True)
@@ -32,9 +37,9 @@
 
 conf = LegendMetadata(path=args.configs)
 configs = conf.on(args.timestamp, system=args.datatype)
-merge_config = Props.read_from(configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][
-    args.channel
-])
+merge_config = Props.read_from(
+    configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel]
+)
 
 ave_fields = merge_config["average_fields"]
 
@@ -58,8 +63,8 @@
         else:
             tmp_dict = {}
             in_dicts[tstamp]["dsp"] = tmp_dict
-        for i,key in enumerate(keys):
-            if i == len(keys)-1:
+        for i, key in enumerate(keys):
+            if i == len(keys) - 1:
                 tmp_dict[key] = val
             else:
                 if key in tmp_dict:
@@ -70,42 +75,36 @@
     if isinstance(vals[0], str):
         if "*" in vals[0]:
             unit = vals[0].split("*")[1]
-            if "." in vals[0]:
-                rounding = len(val.split("*")[0].split(".")[-1])
-            else:
-                rounding = 16
+            rounding = len(val.split("*")[0].split(".")[-1]) if "." in vals[0] else 16
             vals = np.array([float(val.split("*")[0]) for val in vals])
         else:
             unit = None
             rounding = 16
     else:
-        vals=np.array(vals)
+        vals = np.array(vals)
         unit = None
-    if len(vals[~np.isnan(vals)]) ==0:
-        mean_val = np.nan 
-    else:
-        mean_val = np.nanmean(vals)
-    if unit is not None:
-        mean = f"{round(mean_val, rounding)}*{unit}"
-    else:
-        mean = mean_val
+        rounding = 16
+
+    mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmean(vals)
+    mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val
+
     for tstamp in in_dicts:
         val = in_dicts[tstamp]
         for i, key in enumerate(keys):
-            if i == len(keys)-1:
-                val[key]= mean
+            if i == len(keys) - 1:
+                val[key] = mean
             else:
                 val = val[key]
-       
+
     fig = plt.figure()
-    plt.scatter([datetime.strptime(tstamp,'%Y%m%dT%H%M%SZ') for tstamp in in_dicts] , vals)
-    plt.axhline(y=mean_val, color='r', linestyle='-')
+    plt.scatter([datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals)
+    plt.axhline(y=mean_val, color="r", linestyle="-")
     plt.xlabel("time")
     if unit is not None:
         plt.ylabel(f"value {unit}")
     else:
         plt.ylabel("value")
-    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%y'))
+    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))
     plt.gcf().autofmt_xdate()
     plt.title(f"{field}")
     plot_dict[field] = fig
@@ -146,4 +145,4 @@
         else:
             new_obj_dict = {}
         with open(file, "wb") as f:
-            pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file
+            pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 67b8bdd..60143e7 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -15,8 +15,8 @@
 import numpy as np
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.dplms_ge_dict import dplms_ge_dict
 from lgdo import Array, Table
+from pygama.pargen.dplms_ge_dict import dplms_ge_dict
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
@@ -54,7 +54,7 @@
 dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel]
 
 dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel]
-dplms_dict = Props.read_from(dplms_json) 
+dplms_dict = Props.read_from(dplms_json)
 
 db_dict = Props.read_from(args.database)
 
@@ -75,18 +75,14 @@
     log.info("\nRunning event selection")
     peaks_kev = np.array(dplms_dict["peaks_kev"])
     kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
-    
+
     peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{args.channel}/raw", args.peak_file , field_mask=["peak"])  [0]["peak"].nda
+    peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
     ids = np.in1d(peaks, peaks_rounded)
     peaks = peaks[ids]
-    idx_list = [np.where(peaks==peak)[0] for peak in peaks_rounded]
+    idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
 
-    raw_cal = sto.read(
-        f"{args.channel}/raw",
-        args.peak_file,
-        idx=ids
-    )[0]
+    raw_cal = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0]
     log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}")
 
     if isinstance(dsp_config, (str, list)):
@@ -104,8 +100,8 @@
         if args.inplots:
             with open(args.inplots, "rb") as r:
                 inplot_dict = pkl.load(r)
-            inplot_dict.update({"dplms":plot_dict})
-        
+            inplot_dict.update({"dplms": plot_dict})
+
     else:
         out_dict = dplms_ge_dict(
             raw_fft,
@@ -115,28 +111,30 @@
             dplms_dict,
         )
 
-    coeffs = out_dict["dplms"].pop("coefficients") 
-    dplms_pars = Table(col_dict={"coefficients":Array(coeffs)})
-    out_dict["dplms"]["coefficients"] =f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')"
+    coeffs = out_dict["dplms"].pop("coefficients")
+    dplms_pars = Table(col_dict={"coefficients": Array(coeffs)})
+    out_dict["dplms"][
+        "coefficients"
+    ] = f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')"
 
     log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes")
 else:
     out_dict = {}
-    dplms_pars = Table(col_dict={"coefficients":Array([])})
+    dplms_pars = Table(col_dict={"coefficients": Array([])})
     if args.inplots:
         with open(args.inplots, "rb") as r:
             inplot_dict = pkl.load(r)
     else:
-        inplot_dict={}
+        inplot_dict = {}
 
 db_dict.update(out_dict)
 
-pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) 
+pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True)
 sto.write(
-    Table(col_dict={"dplms":dplms_pars}),
-    name = args.channel,
+    Table(col_dict={"dplms": dplms_pars}),
+    name=args.channel,
     lh5_file=args.lh5_path,
-    wo_mode="overwrite"
+    wo_mode="overwrite",
 )
 
 pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True)
@@ -146,4 +144,4 @@
 if args.plot_path:
     pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
     with open(args.plot_path, "wb") as f:
-        pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file
+        pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index b176c65..4b37737 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -14,13 +14,16 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from pygama.math.distributions import hpge_peak
-import pygama.pargen.energy_optimisation as om
 import sklearn.gaussian_process.kernels as ker
 from dspeed.units import unit_registry as ureg
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.dsp_optimize import run_one_dsp, BayesianOptimizer, run_bayesian_optimisation
+from pygama.math.distributions import hpge_peak
+from pygama.pargen.dsp_optimize import (
+    BayesianOptimizer,
+    run_bayesian_optimisation,
+    run_one_dsp,
+)
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -71,8 +74,6 @@
 db_dict = Props.read_from(args.decay_const)
 
 if opt_dict.pop("run_eopt") is True:
-
-
     peaks_kev = np.array(opt_dict["peaks"])
     kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]]
 
@@ -109,16 +110,12 @@
         )
 
     peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{args.channel}/raw",args.peak_file , field_mask=["peak"])  [0]["peak"].nda
+    peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
     ids = np.in1d(peaks, peaks_rounded)
     peaks = peaks[ids]
-    idx_list = [np.where(peaks==peak)[0] for peak in peaks_rounded]
+    idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
 
-    tb_data = sto.read(
-        f"{args.channel}/raw",
-        args.peak_file,
-        idx=ids
-    )[0]
+    tb_data = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0]
 
     t1 = time.time()
     log.info(f"Data Loaded in {(t1-t0)/60} minutes")
@@ -256,8 +253,8 @@
         batch_size=opt_dict["batch_size"],
         kernel=kernel,
         sampling_rate=waveform_sampling,
-        fom_value = out_field,
-        fom_error = out_err_field
+        fom_value=out_field,
+        fom_error=out_err_field,
     )
     bopt_cusp.lambda_param = lambda_param
     bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us")
@@ -267,8 +264,8 @@
         batch_size=opt_dict["batch_size"],
         kernel=kernel,
         sampling_rate=waveform_sampling,
-        fom_value = out_field,
-        fom_error = out_err_field
+        fom_value=out_field,
+        fom_error=out_err_field,
     )
     bopt_zac.lambda_param = lambda_param
     bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us")
@@ -278,8 +275,8 @@
         batch_size=opt_dict["batch_size"],
         kernel=kernel,
         sampling_rate=waveform_sampling,
-        fom_value = out_field,
-        fom_error = out_err_field
+        fom_value=out_field,
+        fom_error=out_err_field,
     )
     bopt_trap.lambda_param = lambda_param
     bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us")
@@ -350,7 +347,7 @@
         "expression": "trapEftp*(1+dt_eff*a)",
         "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)},
     }
-    if "ctc_params" in db_dict: 
+    if "ctc_params" in db_dict:
         db_dict["ctc_params"].update(out_alpha_dict)
     else:
         db_dict.update({"ctc_params": out_alpha_dict})
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 6fc8292..3802056 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -3,7 +3,6 @@
 import logging
 import os
 import pathlib
-import pickle as pkl
 import time
 import warnings
 
@@ -12,63 +11,79 @@
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
 
+from bisect import bisect_left
+
 import lgdo
 import lgdo.lh5 as lh5
 import numpy as np
-from bisect import bisect_left
-from legendmeta import LegendMetadata
-from legendmeta.catalog import Props
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys
 import pygama.math.histogram as pgh
 import pygama.pargen.energy_cal as pgc
+from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
+from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
-def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
-                ecal_pars, raw_dict, peak, final_cut_field="is_valid_cal",
-                energy_param="trapTmax"):
+
+def get_out_data(
+    raw_data,
+    dsp_data,
+    cut_dict,
+    e_lower_lim,
+    e_upper_lim,
+    ecal_pars,
+    raw_dict,
+    peak,
+    final_cut_field="is_valid_cal",
+    energy_param="trapTmax",
+):
     for outname, info in cut_dict.items():
         outcol = dsp_data.eval(info["expression"], info.get("parameters", None))
         dsp_data.add_column(outname, outcol)
-        
+
     for outname, info in raw_dict.items():
         outcol = raw_data.eval(info["expression"], info.get("parameters", None))
         raw_data.add_column(outname, outcol)
 
-    final_mask = (dsp_data[energy_param].nda > e_lower_lim) & (dsp_data[energy_param].nda < e_upper_lim)&(dsp_data[final_cut_field].nda)
-    
+    final_mask = (
+        (dsp_data[energy_param].nda > e_lower_lim)
+        & (dsp_data[energy_param].nda < e_upper_lim)
+        & (dsp_data[final_cut_field].nda)
+    )
+
     wavefrom_windowed = lgdo.WaveformTable(
-    t0=raw_data["waveform_windowed"]["t0"].nda[final_mask],
-    t0_units=raw_data["waveform_windowed"]["t0"].attrs["units"],
-    dt=raw_data["waveform_windowed"]["dt"].nda[final_mask],
-    dt_units=raw_data["waveform_windowed"]["dt"].attrs["units"],
-    values=raw_data["waveform_windowed"]["values"].nda[final_mask]
-)
+        t0=raw_data["waveform_windowed"]["t0"].nda[final_mask],
+        t0_units=raw_data["waveform_windowed"]["t0"].attrs["units"],
+        dt=raw_data["waveform_windowed"]["dt"].nda[final_mask],
+        dt_units=raw_data["waveform_windowed"]["dt"].attrs["units"],
+        values=raw_data["waveform_windowed"]["values"].nda[final_mask],
+    )
     wavefrom_presummed = lgdo.WaveformTable(
-    t0=raw_data["waveform_presummed"]["t0"].nda[final_mask],
-    t0_units=raw_data["waveform_presummed"]["t0"].attrs["units"],
-    dt=raw_data["waveform_presummed"]["dt"].nda[final_mask],
-    dt_units=raw_data["waveform_presummed"]["dt"].attrs["units"],
-    values=raw_data["waveform_presummed"]["values"].nda[final_mask]
-)
-    
-    
-    out_tbl = lgdo.Table(col_dict = {"waveform_presummed": wavefrom_presummed, 
-                                "waveform_windowed":wavefrom_windowed,
-                                "presum_rate":lgdo.Array(raw_data["presum_rate"].nda[final_mask]),
-                                "timestamp":lgdo.Array(raw_data["timestamp"].nda[final_mask]),
-                                "baseline":lgdo.Array(raw_data["baseline"].nda[final_mask]),
-                                "daqenergy":lgdo.Array(raw_data["daqenergy"].nda[final_mask]),
-                                "daqenergy_cal":lgdo.Array(raw_data["daqenergy_cal"].nda[final_mask]),
-                                "trapTmax_cal":lgdo.Array(dsp_data["trapTmax"].nda[final_mask]*ecal_pars),
-                                "peak":lgdo.Array(np.full(len(np.where(final_mask)[0]),int(peak)))
-                               })
+        t0=raw_data["waveform_presummed"]["t0"].nda[final_mask],
+        t0_units=raw_data["waveform_presummed"]["t0"].attrs["units"],
+        dt=raw_data["waveform_presummed"]["dt"].nda[final_mask],
+        dt_units=raw_data["waveform_presummed"]["dt"].attrs["units"],
+        values=raw_data["waveform_presummed"]["values"].nda[final_mask],
+    )
+
+    out_tbl = lgdo.Table(
+        col_dict={
+            "waveform_presummed": wavefrom_presummed,
+            "waveform_windowed": wavefrom_windowed,
+            "presum_rate": lgdo.Array(raw_data["presum_rate"].nda[final_mask]),
+            "timestamp": lgdo.Array(raw_data["timestamp"].nda[final_mask]),
+            "baseline": lgdo.Array(raw_data["baseline"].nda[final_mask]),
+            "daqenergy": lgdo.Array(raw_data["daqenergy"].nda[final_mask]),
+            "daqenergy_cal": lgdo.Array(raw_data["daqenergy_cal"].nda[final_mask]),
+            "trapTmax_cal": lgdo.Array(dsp_data["trapTmax"].nda[final_mask] * ecal_pars),
+            "peak": lgdo.Array(np.full(len(np.where(final_mask)[0]), int(peak))),
+        }
+    )
     return out_tbl, len(np.where(final_mask)[0])
 
 
 if __name__ == "__main__":
-
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
     argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
@@ -95,29 +110,28 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
 
-
     log = logging.getLogger(__name__)
     sto = lh5.LH5Store()
     t0 = time.time()
 
     conf = LegendMetadata(path=args.configs)
     configs = conf.on(args.timestamp, system=args.datatype)
-    dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["processing_chain"][
+    dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][
+        "processing_chain"
+    ][args.channel]
+    peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][
         args.channel
     ]
-    peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][args.channel]
 
     peak_dict = Props.read_from(peak_json)
     db_dict = Props.read_from(args.decay_const)
 
     pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True)
     if peak_dict.pop("run_selection") is True:
-
         rng = np.random.default_rng()
         rand_num = f"{rng.integers(0,99999):05d}"
         temp_output = f"{args.peak_file}.{rand_num}"
 
-
         with open(args.raw_filelist) as f:
             files = f.read().splitlines()
         raw_files = sorted(files)
@@ -136,7 +150,8 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
                 tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"]
             )
         else:
-            raise ValueError("No pulser file or tcm filelist provided") 
+            msg = "No pulser file or tcm filelist provided"
+            raise ValueError(msg)
 
         raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"]
 
@@ -149,16 +164,13 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
 
         lh5_path = f"{args.channel}/raw"
 
-
         if not isinstance(kev_widths, list):
             kev_widths = [kev_widths]
 
         if lh5_path[-1] != "/":
             lh5_path += "/"
 
-        raw_fields = [
-            field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)
-        ]
+        raw_fields = [field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)]
 
         tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy"])[0]
 
@@ -169,18 +181,23 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
         rough_energy = tb["daqenergy_cal"].nda
 
         masks = {}
-        for peak, kev_width in zip(peaks_kev,kev_widths) :
-            e_mask = (rough_energy > peak - 1.1* kev_width[0]) & (rough_energy < peak + 1.1* kev_width[0]) & (~mask)
+        for peak, kev_width in zip(peaks_kev, kev_widths):
+            e_mask = (
+                (rough_energy > peak - 1.1 * kev_width[0])
+                & (rough_energy < peak + 1.1 * kev_width[0])
+                & (~mask)
+            )
             masks[peak] = np.where(e_mask)[0]
             log.debug(f"{len(masks[peak])} events found in energy range for {peak}")
 
-        input_data = sto.read(f"{lh5_path}", raw_files,  n_rows=10000)[0]
+        input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000)[0]
 
         if isinstance(dsp_config, str):
             dsp_config = Props.read_from(dsp_config)
 
-        dsp_config["outputs"] = get_keys(dsp_config["outputs"], cut_parameters) + [
-            energy_parameter
+        dsp_config["outputs"] = [
+            *get_keys(dsp_config["outputs"], cut_parameters),
+            energy_parameter,
         ]
 
         log.debug("Processing data")
@@ -193,15 +210,16 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
             cut_dict = None
 
         pk_dicts = {}
-        for peak, kev_width in zip(peaks_kev,kev_widths):
-            pk_dicts[peak] = {"idxs":(masks[peak],), 
-                            "n_rows_read":0,
-                            "obj_buf_start":0,
-                            "obj_buf":None,
-                            "kev_width":kev_width
-                            }
-
-        for i,file in enumerate(raw_files):
+        for peak, kev_width in zip(peaks_kev, kev_widths):
+            pk_dicts[peak] = {
+                "idxs": (masks[peak],),
+                "n_rows_read": 0,
+                "obj_buf_start": 0,
+                "obj_buf": None,
+                "kev_width": kev_width,
+            }
+
+        for file in raw_files:
             log.debug(os.path.basename(file))
             for peak, peak_dict in pk_dicts.items():
                 if peak_dict["idxs"] is not None:
@@ -213,7 +231,7 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
                     # now split idx into idx_i and the remainder
                     idx_i = (peak_dict["idxs"][0][:n_rows_to_read_i],)
                     peak_dict["idxs"] = (peak_dict["idxs"][0][n_rows_to_read_i:] - n_rows_i,)
-                    if len(idx_i[0])>0:
+                    if len(idx_i[0]) > 0:
                         peak_dict["obj_buf"], n_rows_read_i = sto.read(
                             lh5_path,
                             file,
@@ -226,9 +244,8 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
                         peak_dict["n_rows_read"] += n_rows_read_i
                         log.debug(f'{peak}: {peak_dict["n_rows_read"]}')
                         peak_dict["obj_buf_start"] += n_rows_read_i
-                    if peak_dict["n_rows_read"] >=10000 or file ==raw_files[-1]:
+                    if peak_dict["n_rows_read"] >= 10000 or file == raw_files[-1]:
                         if "e_lower_lim" not in peak_dict:
-
                             tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict)
                             energy = tb_out[energy_parameter].nda
 
@@ -245,15 +262,17 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
                                 var,
                                 [peak_loc],
                                 n_to_fit=7,
-                            )[
-                                0
-                            ][0]
+                            )[0][0]
 
                             if mu is None or np.isnan(mu):
                                 log.debug("Fit failed, using max guess")
                                 rough_adc_to_kev = peak / peak_loc
-                                e_lower_lim = peak_loc - (1.5 * peak_dict["kev_width"][0]) / rough_adc_to_kev
-                                e_upper_lim = peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev
+                                e_lower_lim = (
+                                    peak_loc - (1.5 * peak_dict["kev_width"][0]) / rough_adc_to_kev
+                                )
+                                e_upper_lim = (
+                                    peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev
+                                )
                                 hist, bins, var = pgh.get_hist(
                                     energy, range=(int(e_lower_lim), int(e_upper_lim)), dx=1
                                 )
@@ -262,53 +281,55 @@ def get_out_data(raw_data, dsp_data,cut_dict, e_lower_lim, e_upper_lim,
                             updated_adc_to_kev = peak / mu
                             e_lower_lim = mu - (peak_dict["kev_width"][0]) / updated_adc_to_kev
                             e_upper_lim = mu + (peak_dict["kev_width"][1]) / updated_adc_to_kev
-                            log.info(f"{peak}: lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}")
+                            log.info(
+                                f"{peak}: lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}"
+                            )
                             peak_dict["e_lower_lim"] = e_lower_lim
                             peak_dict["e_upper_lim"] = e_upper_lim
                             peak_dict["ecal_par"] = updated_adc_to_kev
 
-                            out_tbl, n_wfs = get_out_data(peak_dict["obj_buf"], 
-                                                        tb_out, 
-                                                        cut_dict, 
-                                                        e_lower_lim, 
-                                                        e_upper_lim,
-                                                        peak_dict["ecal_par"],
-                                                        raw_dict,
-                                                        int(peak),
-                                                        final_cut_field=final_cut_field,
-                                                        energy_param=energy_parameter
-                                                        )
-                            sto.write(out_tbl ,name= lh5_path,
-                                    lh5_file=temp_output,
-                                    wo_mode="a")
+                            out_tbl, n_wfs = get_out_data(
+                                peak_dict["obj_buf"],
+                                tb_out,
+                                cut_dict,
+                                e_lower_lim,
+                                e_upper_lim,
+                                peak_dict["ecal_par"],
+                                raw_dict,
+                                int(peak),
+                                final_cut_field=final_cut_field,
+                                energy_param=energy_parameter,
+                            )
+                            sto.write(out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a")
                             peak_dict["obj_buf"] = None
                             peak_dict["obj_buf_start"] = 0
                             peak_dict["n_events"] = n_wfs
                         else:
                             tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict)
-                            out_tbl, n_wfs = get_out_data(peak_dict["obj_buf"], 
-                                                        tb_out, 
-                                                        cut_dict, 
-                                                        peak_dict["e_lower_lim"], 
-                                                        peak_dict["e_upper_lim"],
-                                                        peak_dict["ecal_par"],
-                                                        raw_dict,
-                                                        int(peak),
-                                                        final_cut_field=final_cut_field,
-                                                        energy_param=energy_parameter
-                                                        )
+                            out_tbl, n_wfs = get_out_data(
+                                peak_dict["obj_buf"],
+                                tb_out,
+                                cut_dict,
+                                peak_dict["e_lower_lim"],
+                                peak_dict["e_upper_lim"],
+                                peak_dict["ecal_par"],
+                                raw_dict,
+                                int(peak),
+                                final_cut_field=final_cut_field,
+                                energy_param=energy_parameter,
+                            )
                             peak_dict["n_events"] += n_wfs
-                            sto.write(out_tbl ,name= lh5_path,
-                                    lh5_file=temp_output,
-                                    wo_mode="a")
+                            sto.write(out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a")
                             peak_dict["obj_buf"] = None
                             peak_dict["obj_buf_start"] = 0
                             if peak_dict["n_events"] >= n_events:
                                 peak_dict["idxs"] = None
                                 log.debug(f"{peak} has reached the required number of events")
-                                log.debug(f"{peak}: {peak_dict['idxs']}, {peak_dict['idxs'] is not None}")
+                                log.debug(
+                                    f"{peak}: {peak_dict['idxs']}, {peak_dict['idxs'] is not None}"
+                                )
 
     else:
         pathlib.Path(temp_output).touch()
 
-    os.rename(temp_output, args.peak_file)
\ No newline at end of file
+    os.rename(temp_output, args.peak_file)
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 0c150d2..c5b939e 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -14,9 +14,9 @@
 import numpy as np
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.extract_tau import ExtractTau
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids, get_cut_indexes
+from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
+from pygama.pargen.extract_tau import ExtractTau
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
@@ -100,14 +100,15 @@
     tau = ExtractTau(dsp_config, kwarg_dict["wf_field"])
     slopes = tb_out["tail_slope"].nda
     log.debug("Calculating pz constant")
-    
-    tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]])        
+
+    tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]])
 
     if args.plot_path:
         pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
 
-        plot_dict = tau.plot_waveforms_after_correction(tb_data, "wf_pz", 
-        norm_param=kwarg_dict.get("norm_param", "pz_mean"))
+        plot_dict = tau.plot_waveforms_after_correction(
+            tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean")
+        )
         plot_dict.update(tau.plot_slopes(slopes[idxs]))
 
         with open(args.plot_path, "wb") as f:
@@ -118,8 +119,8 @@
 if args.pulser_file:
     pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True)
     with open(args.pulser_file, "w") as f:
-        json.dump({"idxs": ids.tolist(), "mask": mask.tolist()} , f, indent=4)
+        json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4)
 
 pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
 with open(args.output_file, "w") as f:
-    json.dump(tau.output_dict, f, indent=4)
\ No newline at end of file
+    json.dump(tau.output_dict, f, indent=4)
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 90c8f2c..e1538d0 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -439,12 +439,14 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None):
     if name is None:
         return os.path.join(
             f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-"+datatype+"-{timestamp}-par_" + tier + ".json",
+            "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + tier + ".json",
         )
     else:
         return os.path.join(
             f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-"+datatype+"-{timestamp}-par_"
+            "{experiment}-{period}-{run}-"
+            + datatype
+            + "-{timestamp}-par_"
             + tier
             + "_"
             + name
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index 8b11b3b..903187a 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -128,15 +128,21 @@ def par_dsp_path(setup):
 def par_hit_path(setup):
     return setup["paths"]["par_hit"]
 
+
+def par_evt_path(setup):
+    return setup["paths"]["par_evt"]
+
+
 def par_psp_path(setup):
     return setup["paths"]["par_psp"]
 
+
 def par_pht_path(setup):
     return setup["paths"]["par_pht"]
 
 
-def par_evt_path(setup):
-    return setup["paths"]["par_evt"]
+def par_pet_path(setup):
+    return setup["paths"]["par_pet"]
 
 
 def pars_path(setup):

From 162369995c28ffdd575ed53ea8489123dbae13f2 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 22 Mar 2024 16:36:00 +0100
Subject: [PATCH 028/103] Update build_evt.py to support latest
 pygama.build_evt()

---
 scripts/build_evt.py     | 43 ++++++++++++++++------------------------
 scripts/pars_hit_aoe.py  |  8 ++++----
 scripts/pars_hit_ecal.py |  6 +++---
 scripts/pars_hit_lq.py   |  8 ++++----
 4 files changed, 28 insertions(+), 37 deletions(-)

diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index e5febca..606dc50 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -15,19 +15,6 @@
 sto = lh5.LH5Store()
 
 
-def replace_evt_with_key(dic, new_key):
-    for key, d in dic.items():
-        if isinstance(d, dict):
-            dic[key] = replace_evt_with_key(d, new_key)
-        elif isinstance(d, list):
-            dic[key] = [item.replace("evt", new_key) for item in d]
-        elif isinstance(d, str):
-            dic[key] = d.replace("evt", new_key)
-        else:
-            pass
-    return dic
-
-
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--hit_file", help="hit file", type=str)
 argparser.add_argument("--dsp_file", help="dsp file", type=str)
@@ -45,8 +32,12 @@ def replace_evt_with_key(dic, new_key):
 argparser.add_argument("--output", help="output file", type=str)
 args = argparser.parse_args()
 
-pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+if args.log is not None:
+    pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+else:
+    logging.basicConfig(level=logging.DEBUG)
+
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
 logging.getLogger("lgdo").setLevel(logging.INFO)
@@ -88,7 +79,8 @@ def replace_evt_with_key(dic, new_key):
                     else:
                         chans = []
                     _evt_config["channels"][field] = chans
-            evt_config[key] = replace_evt_with_key(_evt_config, f"evt/{key}")
+
+            evt_config[key] = _evt_config
 else:
     evt_config = {"all": Props.read_from(evt_config_file)}
     # block for snakemake to fill in channel lists
@@ -118,17 +110,16 @@ def replace_evt_with_key(dic, new_key):
 
 tables = {}
 for key, config in evt_config.items():
+    datainfo = {
+        "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
+        "dsp": (args.dsp_file, "dsp", "ch{}"),
+        "hit": (args.hit_file, "hit", "ch{}"),
+        "evt": (None, "evt"),
+    }
+
     tables[key] = build_evt(
-        f_tcm=args.tcm_file,
-        f_dsp=args.dsp_file,
-        f_hit=args.hit_file,
-        f_evt=None,
-        evt_config=config,
-        evt_group=f"evt/{key}" if key != "all" else "evt",
-        tcm_group="hardware_tcm_1",
-        dsp_group="dsp",
-        hit_group="hit",
-        tcm_id_table_pattern="ch{}",
+        datainfo,
+        config,
     )
 
 tbl = Table(col_dict=tables)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index 3b6831f..2f7167b 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -221,7 +221,7 @@ def eres_func(x):
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
 pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True)
-results_dict = dict(**ecal_dict["results"], aoe =  out_dict)
+results_dict = dict(**ecal_dict["results"], aoe=out_dict)
 with open(args.hit_pars, "w") as w:
     final_hit_dict = {
         "pars": {"operations": cal_dict},
@@ -231,8 +231,8 @@ def eres_func(x):
 
 pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
-        **object_dict,
-        aoe=obj,
-    )
+    **object_dict,
+    aoe=obj,
+)
 with open(args.aoe_results, "wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 7f14c54..a7b399e 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -283,14 +283,14 @@ def energy_cal_th(
                     if plot in item:
                         param_dict.update({plot: item[plot]})
                 common_dict.update({key: param_dict})
-        plot_dict = {"ecal":plot_dict}
+        plot_dict = {"ecal": plot_dict}
         plot_dict["common"] = common_dict
 
         with open(args.plot_path, "wb") as f:
             pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
     # save output dictionary
-    output_dict = {"pars": out_dict, "results": {"ecal":result_dict}}
+    output_dict = {"pars": out_dict, "results": {"ecal": result_dict}}
     with open(args.save_path, "w") as fp:
         pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True)
         json.dump(output_dict, fp, indent=4)
@@ -298,4 +298,4 @@ def energy_cal_th(
     # save calibration objects
     with open(args.results_path, "wb") as fp:
         pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True)
-        pkl.dump({"ecal":ecal_object}, fp, protocol=pkl.HIGHEST_PROTOCOL)
+        pkl.dump({"ecal": ecal_object}, fp, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 860029f..3a43a45 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -220,7 +220,7 @@ def eres_func(x):
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
 
-results_dict = dict(**eres_dict,lq =  out_dict)
+results_dict = dict(**eres_dict, lq=out_dict)
 pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True)
 with open(args.hit_pars, "w") as w:
     final_hit_dict = {
@@ -231,8 +231,8 @@ def eres_func(x):
 
 pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
-        **object_dict,
-        lq=obj,
-    )
+    **object_dict,
+    lq=obj,
+)
 with open(args.lq_results, "wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)

From 9812097708473dd9bb296f747f39c2c32d038816 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:48:36 +0100
Subject: [PATCH 029/103] make sure arrays float32

---
 scripts/build_dsp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index 9906782..a94d547 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -51,7 +51,7 @@ def replace_list_with_array(dic):
         if isinstance(value, dict):
             dic[key] = replace_list_with_array(value)
         elif isinstance(value, list):
-            dic[key] = np.array(value)
+            dic[key] = np.array(value, dtype="float32")
         else:
             pass
     return dic

From b17cf7b5e0e780bbc36ab8f7c476b7787bbdace5 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:48:54 +0100
Subject: [PATCH 030/103] use median value

---
 scripts/par_psp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/par_psp.py b/scripts/par_psp.py
index 3b07edf..03bfeaf 100644
--- a/scripts/par_psp.py
+++ b/scripts/par_psp.py
@@ -85,7 +85,7 @@
         unit = None
         rounding = 16
 
-    mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmean(vals)
+    mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmedian(vals)
     mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val
 
     for tstamp in in_dicts:

From 82e6b3a575f30841be86f638e9b84de342f8dc80 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:49:58 +0100
Subject: [PATCH 031/103] pygama math settings

---
 scripts/pars_dsp_dplms.py |  2 ++
 scripts/pars_dsp_eopt.py  |  5 ++++-
 scripts/pars_dsp_nopt.py  |  2 ++
 scripts/pars_dsp_tau.py   | 28 ++++++++++++++++++----------
 4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 60143e7..bcf1ac0 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -10,6 +10,8 @@
 os.environ["LGDO_BOUNDSCHECK"] = "false"
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
 
 import lgdo.lh5 as lh5
 import numpy as np
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 4b37737..4af1c37 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -11,6 +11,8 @@
 os.environ["LGDO_BOUNDSCHECK"] = "false"
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -26,6 +28,8 @@
 )
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+warnings.filterwarnings(action="ignore", category=np.RankWarning)
+
 
 argparser = argparse.ArgumentParser()
 
@@ -45,7 +49,6 @@
 argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str)
 argparser.add_argument("--plot_path", help="plot_path", type=str)
 
-
 argparser.add_argument("--plot_save_path", help="plot_save_path", type=str, required=False)
 args = argparser.parse_args()
 
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index d412e92..c2c393d 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -10,6 +10,8 @@
 os.environ["LGDO_BOUNDSCHECK"] = "false"
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
 
 import lgdo.lh5 as lh5
 import numpy as np
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index c5b939e..1e10ea5 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -9,6 +9,8 @@
 os.environ["LGDO_BOUNDSCHECK"] = "false"
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -30,7 +32,7 @@
 argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False)
 
 argparser.add_argument("--raw_files", help="input files", nargs="*", type=str)
-argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str)
+argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False)
 args = argparser.parse_args()
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
@@ -63,17 +65,23 @@
     else:
         input_file = args.raw_files
 
-    if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
-        tcm_files = args.tcm_files[0]
-        with open(tcm_files) as f:
+    if args.pulser_file:
+        with open(args.pulser_file) as f:
+            pulser_dict = json.load(f)
+        mask = np.array(pulser_dict["mask"])
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
+        with open(args.tcm_filelist) as f:
             tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+        )
     else:
-        tcm_files = args.tcm_files
-    # get pulser mask from tcm files
-    tcm_files = sorted(np.unique(tcm_files))
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-    )
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
+
     data = sto.read(f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp"])[
         0
     ].view_as("pd")

From 489e3e5e9c0ea0c69f4d776faf20d871fb873eb7 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:50:32 +0100
Subject: [PATCH 032/103] add more checks and debug

---
 scripts/pars_dsp_event_selection.py | 79 +++++++++++++++++++----------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 3802056..44c1604 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -10,6 +10,8 @@
 os.environ["LGDO_BOUNDSCHECK"] = "false"
 os.environ["DSPEED_CACHE"] = "false"
 os.environ["DSPEED_BOUNDSCHECK"] = "false"
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
 
 from bisect import bisect_left
 
@@ -91,7 +93,7 @@ def get_out_data(
 
     argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
     argparser.add_argument("--configs", help="configs", type=str, required=True)
-    argparser.add_argument("--raw_cal", help="raw_cal", type=str, required=True)
+    argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True)
 
     argparser.add_argument("--log", help="log_file", type=str)
 
@@ -108,6 +110,7 @@ def get_out_data(
     logging.getLogger("lgdo").setLevel(logging.INFO)
     logging.getLogger("h5py").setLevel(logging.INFO)
     logging.getLogger("matplotlib").setLevel(logging.INFO)
+    logging.getLogger("legendmeta").setLevel(logging.INFO)
     logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
 
     log = logging.getLogger(__name__)
@@ -128,6 +131,7 @@ def get_out_data(
 
     pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True)
     if peak_dict.pop("run_selection") is True:
+        log.debug("Starting peak selection")
         rng = np.random.default_rng()
         rand_num = f"{rng.integers(0,99999):05d}"
         temp_output = f"{args.peak_file}.{rand_num}"
@@ -190,7 +194,7 @@ def get_out_data(
             masks[peak] = np.where(e_mask)[0]
             log.debug(f"{len(masks[peak])} events found in energy range for {peak}")
 
-        input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000)[0]
+        input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000, idx=np.where(~mask)[0])[0]
 
         if isinstance(dsp_config, str):
             dsp_config = Props.read_from(dsp_config)
@@ -205,7 +209,7 @@ def get_out_data(
 
         if cut_parameters is not None:
             cut_dict = generate_cuts(tb_data, cut_parameters)
-            log.debug(f"Cuts are calculated: {cut_dict}")
+            log.debug(f"Cuts are calculated: {json.dumps(cut_dict, indent=2)}")
         else:
             cut_dict = None
 
@@ -249,10 +253,22 @@ def get_out_data(
                             tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict)
                             energy = tb_out[energy_parameter].nda
 
+                            init_bin_width = (
+                                2
+                                * (np.nanpercentile(energy, 75) - np.nanpercentile(energy, 25))
+                                * len(energy) ** (-1 / 3)
+                            )
+
+                            if init_bin_width > 2:
+                                init_bin_width = 2
+
                             hist, bins, var = pgh.get_hist(
                                 energy,
-                                range=(np.floor(np.nanmin(energy)), np.ceil(np.nanmax(energy))),
-                                dx=peak / (np.nanpercentile(energy, 50)),
+                                range=(
+                                    np.floor(np.nanpercentile(energy, 1)),
+                                    np.ceil(np.nanpercentile(energy, 99)),
+                                ),
+                                dx=init_bin_width,
                             )
                             peak_loc = pgh.get_bin_centers(bins)[np.nanargmax(hist)]
 
@@ -274,7 +290,9 @@ def get_out_data(
                                     peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev
                                 )
                                 hist, bins, var = pgh.get_hist(
-                                    energy, range=(int(e_lower_lim), int(e_upper_lim)), dx=1
+                                    energy,
+                                    range=(int(e_lower_lim), int(e_upper_lim)),
+                                    dx=init_bin_width,
                                 )
                                 mu = pgh.get_bin_centers(bins)[np.nanargmax(hist)]
 
@@ -304,32 +322,37 @@ def get_out_data(
                             peak_dict["obj_buf"] = None
                             peak_dict["obj_buf_start"] = 0
                             peak_dict["n_events"] = n_wfs
+                            log.debug(f'found {peak_dict["n_events"]} events for {peak}')
                         else:
-                            tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict)
-                            out_tbl, n_wfs = get_out_data(
-                                peak_dict["obj_buf"],
-                                tb_out,
-                                cut_dict,
-                                peak_dict["e_lower_lim"],
-                                peak_dict["e_upper_lim"],
-                                peak_dict["ecal_par"],
-                                raw_dict,
-                                int(peak),
-                                final_cut_field=final_cut_field,
-                                energy_param=energy_parameter,
-                            )
-                            peak_dict["n_events"] += n_wfs
-                            sto.write(out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a")
-                            peak_dict["obj_buf"] = None
-                            peak_dict["obj_buf_start"] = 0
-                            if peak_dict["n_events"] >= n_events:
-                                peak_dict["idxs"] = None
-                                log.debug(f"{peak} has reached the required number of events")
-                                log.debug(
-                                    f"{peak}: {peak_dict['idxs']}, {peak_dict['idxs'] is not None}"
+                            if peak_dict["obj_buf"] is not None and len(peak_dict["obj_buf"]) > 0:
+                                tb_out = run_one_dsp(
+                                    peak_dict["obj_buf"], dsp_config, db_dict=db_dict
+                                )
+                                out_tbl, n_wfs = get_out_data(
+                                    peak_dict["obj_buf"],
+                                    tb_out,
+                                    cut_dict,
+                                    peak_dict["e_lower_lim"],
+                                    peak_dict["e_upper_lim"],
+                                    peak_dict["ecal_par"],
+                                    raw_dict,
+                                    int(peak),
+                                    final_cut_field=final_cut_field,
+                                    energy_param=energy_parameter,
+                                )
+                                peak_dict["n_events"] += n_wfs
+                                sto.write(
+                                    out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a"
                                 )
+                                peak_dict["obj_buf"] = None
+                                peak_dict["obj_buf_start"] = 0
+                                log.debug(f'found {peak_dict["n_events"]} events for {peak}')
+                                if peak_dict["n_events"] >= n_events:
+                                    peak_dict["idxs"] = None
+                                    log.debug(f"{peak} has reached the required number of events")
 
     else:
         pathlib.Path(temp_output).touch()
 
+    log.debug(f"event selection completed in {time.time()-t0} seconds")
     os.rename(temp_output, args.peak_file)

From 5c4ca7b0194b583b09d63b2f90f607dfabb2b259 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:51:07 +0100
Subject: [PATCH 033/103] changes for pargen refactor and add qc and pulser
 routines

---
 scripts/pars_hit_aoe.py     | 117 ++++--
 scripts/pars_hit_ecal.py    | 635 +++++++++++++++++++++---------
 scripts/pars_hit_lq.py      |  86 +++--
 scripts/pars_hit_qc.py      | 156 +++++---
 scripts/pars_pht_aoecal.py  | 144 ++++---
 scripts/pars_pht_lqcal.py   |  73 +++-
 scripts/pars_pht_partcal.py | 748 ++++++++++++++++++------------------
 scripts/pars_pht_qc.py      | 224 +++++++++++
 scripts/pars_tcm_pulser.py  |  64 +++
 9 files changed, 1518 insertions(+), 729 deletions(-)
 create mode 100644 scripts/pars_pht_qc.py
 create mode 100644 scripts/pars_tcm_pulser.py

diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index a425e20..afb90a8 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -9,18 +9,48 @@
 import warnings
 from typing import Callable
 
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
 import numpy as np
 import pandas as pd
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from pygama.pargen.AoE_cal import *  # noqa: F403
-from pygama.pargen.AoE_cal import cal_aoe, pol1, sigma_fit, standard_aoe
-from pygama.pargen.utils import get_tcm_pulser_ids, load_data
+from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from pygama.pargen.utils import load_data
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
+def get_results_dict(aoe_class):
+    return {
+        "cal_energy_param": aoe_class.cal_energy_param,
+        "dt_param": aoe_class.dt_param,
+        "rt_correction": aoe_class.dt_corr,
+        "1000-1300keV": aoe_class.timecorr_df.to_dict("index"),
+        "correction_fit_results": aoe_class.energy_corr_res_dict,
+        "low_cut": aoe_class.low_cut_val,
+        "high_cut": aoe_class.high_cut_val,
+        "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"),
+        "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"),
+    }
+
+
+def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None):
+    if plot_dict is not None:
+        for key, item in plot_options.items():
+            if item["options"] is not None:
+                plot_dict[key] = item["function"](aoe_class, data, **item["options"])
+            else:
+                plot_dict[key] = item["function"](aoe_class, data)
+    else:
+        plot_dict = {}
+    return plot_dict
+
+
 def aoe_calibration(
     data: pd.Dataframe,
     cal_dicts: dict,
@@ -28,36 +58,34 @@ def aoe_calibration(
     energy_param: str,
     cal_energy_param: str,
     eres_func: Callable,
-    pdf: Callable = standard_aoe,
+    pdf: Callable = aoe_peak,
     selection_string: str = "",
     dt_corr: bool = False,
     dep_correct: bool = False,
     dt_cut: dict | None = None,
     high_cut_val: int = 3,
-    mean_func: Callable = pol1,
-    sigma_func: Callable = sigma_fit,
-    dep_acc: float = 0.9,
+    mean_func: Callable = Pol1,
+    sigma_func: Callable = SigmaFit,
+    # dep_acc: float = 0.9,
     dt_param: str = "dt_eff",
     comptBands_width: int = 20,
     plot_options: dict | None = None,
 ):
     data["AoE_Uncorr"] = data[current_param] / data[energy_param]
-    aoe = cal_aoe(
-        cal_dicts,
-        cal_energy_param,
-        eres_func,
-        pdf,
-        selection_string,
-        dt_corr,
-        dep_acc,
-        dep_correct,
-        dt_cut,
-        dt_param,
-        high_cut_val,
-        mean_func,
-        sigma_func,
-        comptBands_width,
-        plot_options if plot_options is not None else {},
+    aoe = CalAoE(
+        cal_dicts=cal_dicts,
+        cal_energy_param=cal_energy_param,
+        eres_func=eres_func,
+        pdf=pdf,
+        selection_string=selection_string,
+        dt_corr=dt_corr,
+        dep_correct=dep_correct,
+        dt_cut=dt_cut,
+        dt_param=dt_param,
+        high_cut_val=high_cut_val,
+        mean_func=mean_func,
+        sigma_func=sigma_func,
+        compt_bands_width=comptBands_width,
     )
 
     aoe.update_cal_dicts(
@@ -71,12 +99,13 @@ def aoe_calibration(
 
     aoe.calibrate(data, "AoE_Uncorr")
     log.info("Calibrated A/E")
-    return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe
+    return cal_dicts, get_results_dict(aoe), fill_plot_dict(aoe, data, plot_options), aoe
 
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("files", help="files", nargs="*", type=str)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
+argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
 argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
 argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
@@ -119,11 +148,11 @@ def aoe_calibration(
 if kwarg_dict["run_aoe"] is True:
     kwarg_dict.pop("run_aoe")
 
-    pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else standard_aoe
+    pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak
 
-    sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else sigma_fit
+    sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit
 
-    mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else pol1
+    mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1
 
     if "plot_options" in kwarg_dict:
         for field, item in kwarg_dict["plot_options"].items():
@@ -173,13 +202,25 @@ def eres_func(x):
         return_selection_mask=True,
     )
 
-    # get pulser mask from tcm files
-    with open(args.tcm_filelist) as f:
-        tcm_files = f.read().splitlines()
-    tcm_files = sorted(tcm_files)
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-    )
+    if args.pulser_file:
+        with open(args.pulser_file) as f:
+            pulser_dict = json.load(f)
+        mask = np.array(pulser_dict["mask"])
+        if "pulser_multiplicity_threshold" in kwarg_dict:
+            kwarg_dict.pop("pulser_multiplicity_threshold")
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
+        with open(args.tcm_filelist) as f:
+            tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+        )
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
+
     data["is_pulser"] = mask[threshold_mask]
 
     cal_dict, out_dict, plot_dict, obj = aoe_calibration(
@@ -222,7 +263,7 @@ def eres_func(x):
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
 pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True)
-results_dict = dict(**ecal_dict["results"], aoe =  out_dict)
+results_dict = dict(**ecal_dict["results"], aoe=out_dict)
 with open(args.hit_pars, "w") as w:
     final_hit_dict = {
         "pars": {"operations": cal_dict},
@@ -232,8 +273,8 @@ def eres_func(x):
 
 pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
-        **object_dict,
-        aoe=obj,
-    )
+    **object_dict,
+    aoe=obj,
+)
 with open(args.aoe_results, "wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index ad44d6d..1d7d436 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import argparse
+import copy
 import json
 import logging
 import os
@@ -9,18 +10,22 @@
 import warnings
 from datetime import datetime
 
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
 import lgdo.lh5 as lh5
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
+import pygama.math.distributions as pgf
 import pygama.math.histogram as pgh
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from matplotlib.colors import LogNorm
-from pygama.pargen.ecal_th import *  # noqa: F403
-from pygama.pargen.ecal_th import apply_cuts, calibrate_parameter
-from pygama.pargen.utils import get_tcm_pulser_ids, load_data
+from pygama.math.distributions import nb_poly
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
+from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
 
 log = logging.getLogger(__name__)
@@ -28,6 +33,238 @@
 sto = lh5.LH5Store()
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+warnings.filterwarnings(action="ignore", category=np.RankWarning)
+
+
+def plot_2614_timemap(
+    data,
+    cal_energy_param,
+    selection_string,
+    figsize=(12, 8),
+    fontsize=12,
+    erange=(2580, 2630),
+    dx=1,
+    time_dx=180,
+):
+    plt.rcParams["figure.figsize"] = figsize
+    plt.rcParams["font.size"] = fontsize
+
+    selection = data.query(f"{cal_energy_param}>2560&{cal_energy_param}<2660&{selection_string}")
+
+    fig = plt.figure()
+    if len(selection) == 0:
+        pass
+    else:
+        time_bins = np.arange(
+            (np.amin(data["timestamp"]) // time_dx) * time_dx,
+            ((np.amax(data["timestamp"]) // time_dx) + 2) * time_dx,
+            time_dx,
+        )
+
+        plt.hist2d(
+            selection["timestamp"],
+            selection[cal_energy_param],
+            bins=[time_bins, np.arange(erange[0], erange[1] + dx, dx)],
+            norm=LogNorm(),
+        )
+
+    ticks, labels = plt.xticks()
+    plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}")
+    plt.ylabel("Energy(keV)")
+    plt.ylim([erange[0], erange[1]])
+
+    plt.xticks(
+        ticks,
+        [datetime.utcfromtimestamp(tick).strftime("%H:%M") for tick in ticks],
+    )
+    plt.close()
+    return fig
+
+
+def plot_pulser_timemap(
+    data,
+    cal_energy_param,
+    selection_string,  # noqa: ARG001
+    pulser_field="is_pulser",
+    figsize=(12, 8),
+    fontsize=12,
+    dx=0.2,
+    time_dx=180,
+    n_spread=3,
+):
+    plt.rcParams["figure.figsize"] = figsize
+    plt.rcParams["font.size"] = fontsize
+
+    time_bins = np.arange(
+        (np.amin(data["timestamp"]) // time_dx) * time_dx,
+        ((np.amax(data["timestamp"]) // time_dx) + 2) * time_dx,
+        time_dx,
+    )
+
+    selection = data.query(pulser_field)
+    fig = plt.figure()
+    if len(selection) == 0:
+        pass
+
+    else:
+        mean = np.nanpercentile(selection[cal_energy_param], 50)
+        spread = mean - np.nanpercentile(selection[cal_energy_param], 10)
+
+        plt.hist2d(
+            selection["timestamp"],
+            selection[cal_energy_param],
+            bins=[
+                time_bins,
+                np.arange(mean - n_spread * spread, mean + n_spread * spread + dx, dx),
+            ],
+            norm=LogNorm(),
+        )
+        plt.ylim([mean - n_spread * spread, mean + n_spread * spread])
+    ticks, labels = plt.xticks()
+    plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}")
+    plt.ylabel("Energy(keV)")
+
+    plt.xticks(
+        ticks,
+        [datetime.utcfromtimestamp(tick).strftime("%H:%M") for tick in ticks],
+    )
+    plt.close()
+    return fig
+
+
+def get_median(x):
+    if len(x[~np.isnan(x)]) >= 10:
+        return np.nan
+    else:
+        return np.nanpercentile(x, 50)
+
+
+def get_err(x):
+    if len(x[~np.isnan(x)]) >= 10:
+        return np.nan
+    else:
+        return np.nanvar(x) / np.sqrt(len(x))
+
+
+def bin_pulser_stability(
+    data,
+    cal_energy_param,
+    selection_string,  # noqa: ARG001
+    pulser_field="is_pulser",
+    time_slice=180,
+):
+    selection = data.query(pulser_field)
+
+    utime_array = data["timestamp"]
+    select_energies = selection[cal_energy_param].to_numpy()
+
+    time_bins = np.arange(
+        (np.amin(utime_array) // time_slice) * time_slice,
+        ((np.amax(utime_array) // time_slice) + 2) * time_slice,
+        time_slice,
+    )
+    # bin time values
+    times_average = (time_bins[:-1] + time_bins[1:]) / 2
+
+    if len(selection) == 0:
+        return {
+            "time": times_average,
+            "energy": np.full_like(times_average, np.nan),
+            "spread": np.full_like(times_average, np.nan),
+        }
+
+    par_average, _, _ = binned_statistic(
+        selection["timestamp"], select_energies, statistic=get_median, bins=time_bins
+    )
+    par_error, _, _ = binned_statistic(
+        selection["timestamp"], select_energies, statistic=get_err, bins=time_bins
+    )
+
+    return {"time": times_average, "energy": par_average, "spread": par_error}
+
+
+def bin_stability(
+    data,
+    cal_energy_param,
+    selection_string,
+    time_slice=180,
+    energy_range=(2585, 2660),
+):
+    selection = data.query(
+        f"{cal_energy_param}>{energy_range[0]}&{cal_energy_param}<{energy_range[1]}&{selection_string}"
+    )
+
+    utime_array = data["timestamp"]
+    select_energies = selection[cal_energy_param].to_numpy()
+
+    time_bins = np.arange(
+        (np.amin(utime_array) // time_slice) * time_slice,
+        ((np.amax(utime_array) // time_slice) + 2) * time_slice,
+        time_slice,
+    )
+    # bin time values
+    times_average = (time_bins[:-1] + time_bins[1:]) / 2
+
+    if len(selection) == 0:
+        return {
+            "time": times_average,
+            "energy": np.full_like(times_average, np.nan),
+            "spread": np.full_like(times_average, np.nan),
+        }
+
+    par_average, _, _ = binned_statistic(
+        selection["timestamp"], select_energies, statistic=get_median, bins=time_bins
+    )
+    par_error, _, _ = binned_statistic(
+        selection["timestamp"], select_energies, statistic=get_err, bins=time_bins
+    )
+
+    return {"time": times_average, "energy": par_average, "spread": par_error}
+
+
+def bin_spectrum(
+    data,
+    cal_energy_param,
+    selection_string,
+    cut_field="is_valid_cal",
+    pulser_field="is_pulser",
+    erange=(0, 3000),
+    dx=2,
+):
+    bins = np.arange(erange[0], erange[1] + dx, dx)
+    return {
+        "bins": pgh.get_bin_centers(bins),
+        "counts": np.histogram(data.query(selection_string)[cal_energy_param], bins)[0],
+        "cut_counts": np.histogram(
+            data.query(f"(~{cut_field})&(~{pulser_field})")[cal_energy_param],
+            bins,
+        )[0],
+        "pulser_counts": np.histogram(
+            data.query(pulser_field)[cal_energy_param],
+            bins,
+        )[0],
+    }
+
+
+def bin_survival_fraction(
+    data,
+    cal_energy_param,
+    selection_string,
+    cut_field="is_valid_cal",
+    pulser_field="is_pulser",
+    erange=(0, 3000),
+    dx=6,
+):
+    counts_pass, bins_pass, _ = pgh.get_hist(
+        data.query(selection_string)[cal_energy_param],
+        bins=np.arange(erange[0], erange[1] + dx, dx),
+    )
+    counts_fail, bins_fail, _ = pgh.get_hist(
+        data.query(f"(~{cut_field})&(~{pulser_field})")[cal_energy_param],
+        bins=np.arange(erange[0], erange[1] + dx, dx),
+    )
+    sf = 100 * (counts_pass + 10 ** (-6)) / (counts_pass + counts_fail + 10 ** (-6))
+    return {"bins": pgh.get_bin_centers(bins_pass), "sf": sf}
 
 
 def plot_baseline_timemap(
@@ -125,180 +362,64 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None):
             plot_dict[key] = item["function"](data)
     return plot_dict
 
-def get_results_dict(ecal_class, data):
+
+def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     if np.isnan(ecal_class.pars).all():
         return {}
     else:
-        fwhm_linear = ecal_class.fwhm_fit_linear.copy()
-        fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict()
-        fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict()
-        fwhm_linear["cov"] = fwhm_linear["cov"].tolist()
-        fwhm_quad = ecal_class.fwhm_fit_quadratic.copy()
-        fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict()
-        fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict()
-        fwhm_quad["cov"] = fwhm_quad["cov"].tolist()
-
-        pk_dict = {
-            Ei: {
-                "function": func_i.__name__,
-                "module": func_i.__module__,
-                "parameters_in_ADC": parsi.to_dict(),
-                "uncertainties_in_ADC": errorsi.to_dict(),
-                "p_val": pvali,
-                "fwhm_in_keV": list(fwhmi),
-                "pk_position":(posi, posuni),
-            }
-            for i, (Ei, parsi, errorsi, pvali, fwhmi, posi, posuni, func_i) in enumerate(
-                zip(
-                    ecal_class.results["fitted_keV"],
-                    ecal_class.results["pk_pars"][ecal_class.results["pk_validities"]],
-                    ecal_class.results["pk_errors"][ecal_class.results["pk_validities"]],
-                    ecal_class.results["pk_pvals"][ecal_class.results["pk_validities"]],
-                    ecal_class.results["pk_fwhms"],
-                    ecal_class.results["pk_pos"],
-                    ecal_class.results["pk_pos_uncertainties"],
-                    ecal_class.funcs,
-                )
-            )
-        }
+        results_dict = copy.deepcopy(ecal_class.results["hpge_fit_energy_peaks_1"])
+
+        if "FWHMLinear" in results_dict:
+            fwhm_linear = results_dict["FWHMLinear"]
+            fwhm_linear["function"] = fwhm_linear["function"].__name__
+            fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict()
+            fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict()
+            fwhm_linear["cov"] = fwhm_linear["cov"].tolist()
+        else:
+            fwhm_linear = None
+
+        if "FWHMQuadratic" in results_dict:
+            fwhm_quad = results_dict["FWHMQuadratic"]
+            fwhm_quad["function"] = fwhm_quad["function"].__name__
+            fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict()
+            fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict()
+            fwhm_quad["cov"] = fwhm_quad["cov"].tolist()
+        else:
+            fwhm_quad = None
+
+        pk_dict = results_dict["peak_parameters"]
+
+        for _, dic in pk_dict.items():
+            dic["function"] = dic["function"].name
+            dic["parameters"] = dic["parameters"].to_dict()
+            dic["uncertainties"] = dic["uncertainties"].to_dict()
+            dic.pop("covariance")
 
         return {
-            "total_fep": len(
-                data.query(
-                    f"{ecal_class.cal_energy_param}>2604&{ecal_class.cal_energy_param}<2624"
-                )
-            ),
-            "total_dep": len(
-                data.query(
-                    f"{ecal_class.cal_energy_param}>1587&{ecal_class.cal_energy_param}<1597"
-                )
-            ),
+            "total_fep": len(data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")),
+            "total_dep": len(data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")),
             "pass_fep": len(
-                data.query(
-                    f"{ecal_class.cal_energy_param}>2604&{ecal_class.cal_energy_param}<2624&{ecal_class.selection_string}"
-                )
+                data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}")
             ),
             "pass_dep": len(
-                data.query(
-                    f"{ecal_class.cal_energy_param}>1587&{ecal_class.cal_energy_param}<1597&{ecal_class.selection_string}"
-                )
+                data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}")
             ),
             "eres_linear": fwhm_linear,
             "eres_quadratic": fwhm_quad,
-            "fitted_peaks": ecal_class.results["fitted_keV"].tolist(),
+            "fitted_peaks": ecal_class.peaks_kev.tolist(),
             "pk_fits": pk_dict,
-            "mode":ecal_class.results["mode"],
         }
 
-def energy_cal_th(
-    data: pd.Dataframe,
-    energy_params: list[str],
-    cal_energy_params: list | None = None,
-    selection_string: str = "",
-    hit_dict: dict | None = None,
-    cut_parameters: dict[str, int] | None = None,
-    plot_options: dict | None = None,
-    threshold: int = 0,
-    p_val: float = 0,
-    n_events: int | None = None,
-    final_cut_field: str = "is_valid_cal",
-    simplex: bool = True,
-    guess_keV: float | None = None,
-    tail_weight=100,
-    deg: int = 1,
-) -> tuple(dict, dict, dict, dict):
-    data, hit_dict = apply_cuts(
-        data,
-        hit_dict if hit_dict is not None else {},
-        cut_parameters if cut_parameters is not None else {},
-        final_cut_field,
-    )
-
-    if cal_energy_params is None:
-        cal_energy_params = [energy_param + "_cal" for energy_param in energy_params]
-
-
-    glines = [
-        # 238.632,
-        583.191,
-        727.330,
-        860.564,
-        1592.53,
-        1620.50,
-        2103.53,
-        2614.50,
-    ]  # gamma lines used for calibration
-    range_keV = [
-        # (8, 8),
-        (20, 20),
-        (30, 30),
-        (30, 30),
-        (40, 20),
-        (20, 40),
-        (40, 40),
-        (60, 60),
-    ]  # side bands width
-    funcs = [
-        # pgf.extended_gauss_step_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-    ]
-    gof_funcs = [
-        # pgf.gauss_step_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-    ]
-
-    results_dict = {}
-    plot_dict = {}
-    full_object_dict = {}
-    for energy_param, cal_energy_param in zip(energy_params, cal_energy_params):
-        full_object_dict[cal_energy_param] = calibrate_parameter(
-            energy_param,
-            glines,
-            range_keV,
-            funcs,
-            gof_funcs,
-            selection_string,
-            plot_options,
-            guess_keV,
-            threshold,
-            p_val,
-            n_events,
-            simplex,
-            deg,
-            tail_weight=tail_weight,
-            cal_energy_param=cal_energy_param,
-        )
-        full_object_dict[cal_energy_param].calibrate_parameter(data)
-        results_dict[cal_energy_param] = get_results_dict(full_object_dict[cal_energy_param], data)
-        hit_dict.update(full_object_dict[cal_energy_param].hit_dict)
-        if ~np.isnan(full_object_dict[cal_energy_param].pars).all():
-            plot_dict[cal_energy_param] = (
-                full_object_dict[cal_energy_param].fill_plot_dict(data).copy()
-            )
-
-    log.info("Finished all calibrations")
-    return hit_dict, results_dict, plot_dict, full_object_dict
-
 
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--files", help="files", nargs="*", type=str)
-    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+    argparser.add_argument("--files", help="filelist", nargs="*", type=str)
+    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
+    argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
+
     argparser.add_argument("--ctc_dict", help="ctc_dict", nargs="*")
-    argparser.add_argument("--in_hit_dict", help="in_hit_dict", nargs="*", required=False)
-    argparser.add_argument("--inplot_dict", help="inplot_dict", nargs="*", required=False)
+    argparser.add_argument("--in_hit_dict", help="in_hit_dict", required=False)
+    argparser.add_argument("--inplot_dict", help="inplot_dict", required=False)
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -306,6 +427,8 @@ def energy_cal_th(
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
     argparser.add_argument("--tier", help="tier", type=str, default="hit")
 
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+
     argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument("--plot_path", help="plot_path", type=str, required=False)
@@ -321,12 +444,17 @@ def energy_cal_th(
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp)
+
+    det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"]
+
     if args.in_hit_dict:
         hit_dict = Props.read_from(args.in_hit_dict)
 
     database_dic = Props.read_from(args.ctc_dict)
 
-    hit_dict = hit_dict.update(database_dic[args.channel]["ctc_params"])
+    hit_dict.update(database_dic[args.channel]["ctc_params"])
 
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
@@ -350,40 +478,164 @@ def energy_cal_th(
         bl_plots[field]["function"] = eval(item["function"])
     common_plots = kwarg_dict.pop("common_plots")
 
+    with open(args.files[0]) as f:
+        files = f.read().splitlines()
+    files = sorted(files)
+
     # load data in
     data, threshold_mask = load_data(
-        args.files,
+        files,
         f"{args.channel}/dsp",
         hit_dict,
-        params=kwarg_dict["energy_params"]
-        + list(kwarg_dict["cut_parameters"])
-        + ["timestamp", "trapTmax"],
+        params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"],
         threshold=kwarg_dict["threshold"],
         return_selection_mask=True,
         cal_energy_param="trapTmax",
     )
 
-    # get pulser mask from tcm files
-    with open(args.tcm_filelist) as f:
-        tcm_files = f.read().splitlines()
-    tcm_files = sorted(np.unique(tcm_files))
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-    )
+    if args.pulser_file:
+        with open(args.pulser_file) as f:
+            pulser_dict = json.load(f)
+        mask = np.array(pulser_dict["mask"])
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
+        with open(args.tcm_filelist) as f:
+            tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+        )
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
+
     data["is_pulser"] = mask[threshold_mask]
 
-    # run energy calibration
-    out_dict, result_dict, plot_dict, ecal_object = energy_cal_th(
-        data,
-        hit_dict=hit_dict,
-        selection_string=f"({kwarg_dict['final_cut_field']})&(~is_pulser)",
-        **kwarg_dict,
-    )
+    pk_pars = [
+        (583.191, (20, 20), pgf.hpge_peak),
+        (727.330, (30, 30), pgf.hpge_peak),
+        (860.564, (30, 25), pgf.hpge_peak),
+        (1592.53, (40, 20), pgf.gauss_on_step),
+        (1620.50, (20, 40), pgf.gauss_on_step),
+        (2103.53, (40, 40), pgf.gauss_on_step),
+        (2614.50, (60, 60), pgf.hpge_peak),
+    ]
+
+    glines = [pk_par[0] for pk_par in pk_pars]
+
+    if "cal_energy_params" not in kwarg_dict:
+        cal_energy_params = [energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]]
+    else:
+        cal_energy_params = kwarg_dict["cal_energy_params"]
+
+    selection_string = f"~is_pulser&{kwarg_dict['cut_param']}"
+
+    results_dict = {}
+    plot_dict = {}
+    full_object_dict = {}
+
+    for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params):
+        e_uncal = data.query(selection_string)[energy_param].to_numpy()
+
+        hist, bins, bar = pgh.get_hist(
+            e_uncal[
+                (e_uncal > np.nanpercentile(e_uncal, 95))
+                & (e_uncal < np.nanpercentile(e_uncal, 99.9))
+            ],
+            dx=1,
+            range=[np.nanpercentile(e_uncal, 95), np.nanpercentile(e_uncal, 99.9)],
+        )
+
+        guess = 2614.553 / bins[np.nanargmax(hist)]
+        full_object_dict[cal_energy_param] = HPGeCalibration(
+            energy_param,
+            glines,
+            guess,
+            kwarg_dict.get("deg", 0),
+        )
+        full_object_dict[cal_energy_param].hpge_get_energy_peaks(e_uncal)
+        got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy()
+        full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
+            e_uncal,
+            peaks_kev=[2614.50],
+            peak_pars=pk_pars,
+            tail_weight=kwarg_dict.get("tail_weight", 0),
+            n_events=kwarg_dict.get("n_events", None),
+            allowed_p_val=kwarg_dict.get("p_val", 0),
+            update_cal_pars=bool(det_status == "on"),
+        )
+        full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
+            e_uncal,
+            peaks_kev=got_peaks_kev,
+            peak_pars=pk_pars,
+            tail_weight=kwarg_dict.get("tail_weight", 0),
+            n_events=kwarg_dict.get("n_events", None),
+            allowed_p_val=kwarg_dict.get("p_val", 0),
+            update_cal_pars=False,
+        )
+
+        full_object_dict[cal_energy_param].get_energy_res_curve(
+            FWHMLinear,
+            interp_energy_kev={"Qbb": 2039.0},
+        )
+        full_object_dict[cal_energy_param].get_energy_res_curve(
+            FWHMQuadratic,
+            interp_energy_kev={"Qbb": 2039.0},
+        )
+
+        data[cal_energy_param] = nb_poly(
+            data[energy_param].to_numpy(), full_object_dict[cal_energy_param].pars
+        )
+
+        results_dict[cal_energy_param] = get_results_dict(
+            full_object_dict[cal_energy_param], data, cal_energy_param, selection_string
+        )
+
+        hit_dict.update({cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()})
+        if args.plot_path:
+            param_plot_dict = {}
+            if ~np.isnan(full_object_dict[cal_energy_param].pars).all():
+                param_plot_dict["fwhm_fit"] = full_object_dict[cal_energy_param].plot_eres_fit(
+                    e_uncal
+                )
+                param_plot_dict["cal_fit"] = full_object_dict[cal_energy_param].plot_cal_fit(
+                    e_uncal
+                )
+                param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits(
+                    e_uncal
+                )
+
+                if "plot_options" in kwarg_dict:
+                    for key, item in kwarg_dict["plot_options"].items():
+                        if item["options"] is not None:
+                            param_plot_dict[key] = item["function"](
+                                data,
+                                cal_energy_param,
+                                selection_string,
+                                **item["options"],
+                            )
+                        else:
+                            param_plot_dict[key] = item["function"](
+                                data,
+                                cal_energy_param,
+                                selection_string,
+                            )
+            plot_dict[cal_energy_param] = param_plot_dict
+        
+        for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks_1"]["peak_parameters"].values():
+            peak_dict["function"] = peak_dict["function"].name
+            peak_dict["parameters"] = peak_dict["parameters"].to_dict()
+            peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict()
+        for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"]["peak_parameters"].values():
+            peak_dict["function"] = peak_dict["function"].name
+            peak_dict["parameters"] = peak_dict["parameters"].to_dict()
+            peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict()
 
     # get baseline plots and save all plots to file
     if args.plot_path:
         common_dict = baseline_tracking_plots(
-            sorted(args.files), f"{args.channel}/dsp", plot_options=bl_plots
+            sorted(files), f"{args.channel}/dsp", plot_options=bl_plots
         )
 
         for plot in list(common_dict):
@@ -398,25 +650,26 @@ def energy_cal_th(
                     if plot in item:
                         param_dict.update({plot: item[plot]})
                 common_dict.update({key: param_dict})
-        plot_dict = {"ecal":plot_dict}
-        plot_dict["common"] = common_dict
 
         if args.inplot_dict:
             with open(args.inplot_dict, "rb") as f:
-                total_plot_dict = pkl.load(args.inplot_dict, protocol=pkl.HIGHEST_PROTOCOL)
-            if "common" in total_plot_dict:
-                total_plot_dict["common"].update(common_dict)
-            else:
-                plot_dict["common"] = common_dict
+                total_plot_dict = pkl.load(f)
+        else:
+            total_plot_dict = {}
+
+        if "common" in total_plot_dict:
+            total_plot_dict["common"].update(common_dict)
+        else:
+            total_plot_dict["common"] = common_dict
 
-            total_plot_dict = total_plot_dict.update(plot_dict)
+        total_plot_dict.update({"ecal": plot_dict})
 
         pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
         with open(args.plot_path, "wb") as f:
-            pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
+            pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
     # save output dictionary
-    output_dict = {"pars": out_dict, "results": {"ecal":result_dict}}
+    output_dict = {"pars": hit_dict, "results": {"ecal": results_dict}}
     with open(args.save_path, "w") as fp:
         pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True)
         json.dump(output_dict, fp, indent=4)
@@ -424,4 +677,4 @@ def energy_cal_th(
     # save calibration objects
     with open(args.results_path, "wb") as fp:
         pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True)
-        pkl.dump({"ecal":ecal_object}, fp, protocol=pkl.HIGHEST_PROTOCOL)
+        pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 860029f..ca4cd80 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -8,26 +8,53 @@
 import pickle as pkl
 import warnings
 
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
 import numpy as np
 import pandas as pd
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.math.peak_fitting import gauss_cdf
+from pygama.math.distributions import gaussian
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
-from pygama.pargen.lq_cal import cal_lq
-from pygama.pargen.utils import get_tcm_pulser_ids, load_data
+from pygama.pargen.lq_cal import LQCal
+from pygama.pargen.utils import load_data
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
+def get_results_dict(lq_class):
+    return {
+        "cal_energy_param": lq_class.cal_energy_param,
+        "rt_correction": lq_class.dt_fit_pars,
+        # "cdf": lq_class.cdf.name,
+        "1590-1596keV": lq_class.timecorr_df.to_dict("index"),
+        "cut_value": lq_class.cut_val,
+        "sfs": lq_class.low_side_sf.to_dict("index"),
+    }
+
+
+def fill_plot_dict(lq_class, data, plot_options, plot_dict=None):
+    if plot_dict is not None:
+        for key, item in plot_options.items():
+            if item["options"] is not None:
+                plot_dict[key] = item["function"](lq_class, data, **item["options"])
+            else:
+                plot_dict[key] = item["function"](lq_class, data)
+    else:
+        plot_dict = {}
+    return plot_dict
+
+
 def lq_calibration(
     data: pd.DataFrame,
     cal_dicts: dict,
     energy_param: str,
     cal_energy_param: str,
     eres_func: callable,
-    cdf: callable = gauss_cdf,
+    cdf: callable = gaussian,
     selection_string: str = "",
     plot_options: dict | None = None,
 ):
@@ -62,17 +89,16 @@ def lq_calibration(
         A dict containing the results of the LQ calibration
     plot_dict: dict
         A dict containing all the figures specified by the plot options
-    lq: cal_lq class
-        The cal_lq object used for the LQ calibration
+    lq: LQCal class
+        The LQCal object used for the LQ calibration
     """
 
-    lq = cal_lq(
+    lq = LQCal(
         cal_dicts,
         cal_energy_param,
         eres_func,
         cdf,
         selection_string,
-        plot_options,
     )
 
     data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param])
@@ -88,12 +114,14 @@ def lq_calibration(
 
     lq.calibrate(data, "LQ_Ecorr")
     log.info("Calibrated LQ")
-    return cal_dicts, lq.get_results_dict(), lq.fill_plot_dict(data), lq
+    return cal_dicts, get_results_dict(lq), fill_plot_dict(lq, data, plot_options), lq
 
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("files", help="files", nargs="*", type=str)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
+argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
+
 argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
 argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
@@ -126,7 +154,7 @@ def lq_calibration(
 
 ecal_dict = Props.read_from(args.ecal_file)
 cal_dict = ecal_dict["pars"]["operations"]
-eres_dict = ecal_dict["results"]
+eres_dict = ecal_dict["results"]["ecal"]
 
 with open(args.eres_file, "rb") as o:
     object_dict = pkl.load(o)
@@ -134,7 +162,7 @@ def lq_calibration(
 if kwarg_dict["run_lq"] is True:
     kwarg_dict.pop("run_lq")
 
-    cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gauss_cdf
+    cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian
 
     if "plot_options" in kwarg_dict:
         for field, item in kwarg_dict["plot_options"].items():
@@ -173,13 +201,25 @@ def eres_func(x):
         return_selection_mask=True,
     )
 
-    # get pulser mask from tcm files
-    with open(args.tcm_filelist) as f:
-        tcm_files = f.read().splitlines()
-    tcm_files = sorted(tcm_files)
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-    )
+    if args.pulser_file:
+        with open(args.pulser_file) as f:
+            pulser_dict = json.load(f)
+        mask = np.array(pulser_dict["mask"])
+        if "pulser_multiplicity_threshold" in kwarg_dict:
+            kwarg_dict.pop("pulser_multiplicity_threshold")
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
+        with open(args.tcm_filelist) as f:
+            tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+        )
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
+
     data["is_pulser"] = mask[threshold_mask]
 
     cal_dict, out_dict, plot_dict, obj = lq_calibration(
@@ -220,7 +260,7 @@ def eres_func(x):
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
 
-results_dict = dict(**eres_dict,lq =  out_dict)
+results_dict = dict(**eres_dict, lq=out_dict)
 pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True)
 with open(args.hit_pars, "w") as w:
     final_hit_dict = {
@@ -231,8 +271,8 @@ def eres_func(x):
 
 pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
-        **object_dict,
-        lq=obj,
-    )
+    **object_dict,
+    lq=obj,
+)
 with open(args.lq_results, "wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 05254d8..09e14c6 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -6,12 +6,18 @@
 import os
 import pathlib
 import pickle as pkl
+import re
 import warnings
 
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
+import numpy as np
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.utils import get_tcm_pulser_ids, load_data
-from pygama.pargen.cuts import generate_cuts
+from lgdo.lh5 import ls
+from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids, generate_cut_classifiers
+from pygama.pargen.utils import load_data
 
 log = logging.getLogger(__name__)
 
@@ -20,8 +26,10 @@
 
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--files", help="files", nargs="*", type=str)
-    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=True)
+    argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
+    argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
+    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
+    argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -31,8 +39,8 @@
 
     argparser.add_argument("--log", help="log_file", type=str)
 
-    argparser.add_argument("--plot_path", help="plot_path", type=str, required=False, nargs="*")
-    argparser.add_argument("--save_path", help="save_path", type=str, nargs="*")
+    argparser.add_argument("--plot_path", help="plot_path", type=str, required=False)
+    argparser.add_argument("--save_path", help="save_path", type=str)
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
@@ -43,62 +51,122 @@
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
-
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    if args.tier == "hit":
-        channel_dict = channel_dict["pars_hit_qc"]["inputs"]["ecal_config"][args.channel]
-    elif args.tier == "pht":
-        channel_dict = channel_dict["pars_pht_qc"]["inputs"]["ecal_config"][args.channel]
-    else:
-        msg = "invalid tier"
-        raise ValueError(msg)
+    channel_dict = channel_dict["pars_hit_qc"]["inputs"]["qc_config"][args.channel]
 
     kwarg_dict = Props.read_from(channel_dict)
 
+    kwarg_dict_cal = kwarg_dict["cal_fields"]
+
+    cut_fields = get_keys(
+        [
+            key.replace(f"{args.channel}/dsp/", "")
+            for key in ls(args.cal_files[0], f"{args.channel}/dsp/")
+        ],
+        kwarg_dict_cal["cut_parameters"],
+    )
+    if "initial_cal_cuts" in kwarg_dict:
+        init_cal = kwarg_dict["initial_cal_cuts"]
+        cut_fields += get_keys(
+            [
+                key.replace(f"{args.channel}/dsp/", "")
+                for key in ls(args.cal_files[0], f"{args.channel}/dsp/")
+            ],
+            init_cal["cut_parameters"],
+        )
+
     # load data in
     data, threshold_mask = load_data(
-        args.files,
+        args.cal_files,
         f"{args.channel}/dsp",
-        hit_dict,
-        list(kwarg_dict["cut_parameters"])
-        + ["timestamp", "trapTmax"],
-        threshold=kwarg_dict["threshold"],
+        {},
+        [*cut_fields, "timestamp", "trapTmax"],
+        threshold=kwarg_dict_cal.get("threshold", 0),
         return_selection_mask=True,
         cal_energy_param="trapTmax",
     )
 
-    # get pulser mask from tcm files
-    with open(args.tcm_filelist) as f:
-        tcm_files = f.read().splitlines()
-    tcm_files = sorted(np.unique(tcm_files))
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-    )
+    if args.pulser_file:
+        with open(args.pulser_file) as f:
+            pulser_dict = json.load(f)
+        mask = np.array(pulser_dict["mask"])
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
+        with open(args.tcm_filelist) as f:
+            tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+        )
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
+
     data["is_pulser"] = mask[threshold_mask]
 
-    hit_dict, plot_dict = generate_cuts(
+    if "initial_cal_cuts" in kwarg_dict:
+        init_cal = kwarg_dict["initial_cal_cuts"]
+        hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers(
             data,
-            cut_dict,
-            kwarg_dict.get("rounding",4),
+            init_cal["cut_parameters"],
+            init_cal.get("rounding", 4),
+            display=1 if args.plot_path else 0,
+        )
+        ct_mask = np.full(len(data), True, dtype=bool)
+        for outname, info in hit_dict_init_cal.items():
+            # convert to pandas eval
+            exp = info["expression"]
+            for key in info.get("parameters", None):
+                exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
+            data[outname] = data.eval(exp, local_dict=info.get("parameters", None))
+            ct_mask = ct_mask & data[outname]
+
+        data = data[ct_mask]
+
+    else:
+        hit_dict_init_cal = {}
+        plot_dict_init_cal = {}
+
+    hit_dict_cal, plot_dict_cal = generate_cut_classifiers(
+        data,
+        kwarg_dict_cal["cut_parameters"],
+        kwarg_dict.get("rounding", 4),
+        display=1 if args.plot_path else 0,
+    )
+
+    kwarg_dict_fft = kwarg_dict["fft_fields"]
+    if len(args.fft_files) > 0:
+        fft_data = load_data(
+            args.fft_files,
+            f"{args.channel}/dsp",
+            {},
+            [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"],
+            threshold=kwarg_dict_fft["threshold"],
+            return_selection_mask=False,
+            cal_energy_param="trapTmax",
+        )
+
+        hit_dict_fft, plot_dict_fft = generate_cut_classifiers(
+            data,
+            kwarg_dict_fft["cut_parameters"],
+            kwarg_dict.get("rounding", 4),
             display=1 if args.plot_path else 0,
         )
-    if isinstance(args.save_path, string):
-        save_path = [args.save_path]
     else:
-        save_path = args.save_path
-    for file in save_path
-        pathlib.Path(os.path.dirname(save_path)).mkdir(parents=True, exist_ok=True)
-        with open(file, "w") as f:
-            json.dump(hit_dict, f, indent=4)
+        hit_dict_fft = {}
+        plot_dict_fft = {}
+
+    hit_dict = {**hit_dict_init_cal, **hit_dict_cal, **hit_dict_fft}
+    plot_dict = {**plot_dict_init_cal, **plot_dict_cal, **plot_dict_fft}
+
+    pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True)
+    with open(args.save_path, "w") as f:
+        json.dump(hit_dict, f, indent=4)
 
     if args.plot_path:
-        if isinstance(args.plot_path, string):
-            plot_path = [args.plot_path]
-        else:
-            plot_path = args.plot_path
-        for file in plot_path:
-             pathlib.Path(os.path.dirname(plot_path)).mkdir(parents=True, exist_ok=True)
-            with open(plot_path, "wb") as f:
-                pkl.dump({"qc":plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file
+        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
+        with open(args.plot_path, "wb") as f:
+            pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 49303e7..34fa8f8 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import argparse
+import copy
 import json
 import logging
 import os
@@ -9,19 +10,54 @@
 import warnings
 from typing import Callable
 
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
 import numpy as np
 import pandas as pd
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from pygama.pargen.AoE_cal import *  # noqa: F403
-from pygama.pargen.AoE_cal import cal_aoe, pol1, sigma_fit, standard_aoe
-from pygama.pargen.utils import get_tcm_pulser_ids, load_data
+from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
+def get_results_dict(aoe_class):
+    result_dict = {}
+    for tstamp in aoe_class.low_side_sfs_by_run:
+        result_dict[tstamp] = {
+            "cal_energy_param": aoe_class.cal_energy_param,
+            "dt_param": aoe_class.dt_param,
+            "rt_correction": aoe_class.dt_corr,
+            "1000-1300keV": aoe_class.timecorr_df.to_dict("index"),
+            "correction_fit_results": aoe_class.energy_corr_res_dict,
+            "low_cut": aoe_class.low_cut_val,
+            "high_cut": aoe_class.high_cut_val,
+            "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"),
+            "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"),
+            "low_side_sfs_by_run": aoe_class.low_side_sfs_by_run[tstamp].to_dict("index"),
+            "2_side_sfs_by_run": aoe_class.two_side_sfs_by_run[tstamp].to_dict("index"),
+        }
+    return result_dict
+
+
+def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None):
+    if plot_dict is  None:
+        plot_dict = {}
+    for key, item in plot_options.items():
+        if item["options"] is not None:
+            plot_dict[key] = item["function"](aoe_class, data, **item["options"])
+        else:
+            plot_dict[key] = item["function"](aoe_class, data)
+
+    return plot_dict
+
+
 def aoe_calibration(
     data: pd.Dataframe,
     cal_dicts: dict,
@@ -29,36 +65,34 @@ def aoe_calibration(
     energy_param: str,
     cal_energy_param: str,
     eres_func: Callable,
-    pdf: Callable = standard_aoe,
+    pdf: Callable = aoe_peak,
     selection_string: str = "",
     dt_corr: bool = False,
     dep_correct: bool = False,
     dt_cut: dict | None = None,
     high_cut_val: int = 3,
-    mean_func: Callable = pol1,
-    sigma_func: Callable = sigma_fit,
-    dep_acc: float = 0.9,
+    mean_func: Callable = Pol1,
+    sigma_func: Callable = SigmaFit,
+    # dep_acc: float = 0.9,
     dt_param: str = "dt_eff",
     comptBands_width: int = 20,
     plot_options: dict | None = None,
 ):
     data["AoE_Uncorr"] = data[current_param] / data[energy_param]
-    aoe = cal_aoe(
-        cal_dicts,
-        cal_energy_param,
-        eres_func,
-        pdf,
-        selection_string,
-        dt_corr,
-        dep_acc,
-        dep_correct,
-        dt_cut,
-        dt_param,
-        high_cut_val,
-        mean_func,
-        sigma_func,
-        comptBands_width,
-        plot_options if plot_options is not None else {},
+    aoe = CalAoE(
+        cal_dicts=cal_dicts,
+        cal_energy_param=cal_energy_param,
+        eres_func=eres_func,
+        pdf=pdf,
+        selection_string=selection_string,
+        dt_corr=dt_corr,
+        dep_correct=dep_correct,
+        dt_cut=dt_cut,
+        dt_param=dt_param,
+        high_cut_val=high_cut_val,
+        mean_func=mean_func,
+        sigma_func=sigma_func,
+        compt_bands_width=comptBands_width,
     )
     aoe.update_cal_dicts(
         {
@@ -70,12 +104,13 @@ def aoe_calibration(
     )
     aoe.calibrate(data, "AoE_Uncorr")
     log.info("Calibrated A/E")
-    return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe
+    return cal_dicts, get_results_dict(aoe), fill_plot_dict(aoe, data, plot_options), aoe
 
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=True)
+argparser.add_argument("--pulser_files", help="pulser_file", nargs="*", type=str, required=False)
+argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False)
 argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True)
 argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
 argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
@@ -226,20 +261,28 @@ def run_splitter(files):
         return_selection_mask=True,
     )
 
-    # get pulser mask from tcm files
-    if isinstance(args.tcm_filelist, list):
-        tcm_files = []
-        for file in args.tcm_filelist:
+    if args.pulser_files:
+        mask = np.array([], dtype=bool)
+        for file in args.pulser_files:
             with open(file) as f:
-                tcm_files += f.read().splitlines()
-    else:
+                pulser_dict = json.load(f)
+            pulser_mask = np.array(pulser_dict["mask"])
+            mask = np.append(mask, pulser_mask)
+        if "pulser_multiplicity_threshold" in kwarg_dict:
+            kwarg_dict.pop("pulser_multiplicity_threshold")
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
         with open(args.tcm_filelist) as f:
             tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+        )
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
 
-    tcm_files = sorted(np.unique(tcm_files))
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-    )
     data["is_pulser"] = mask[threshold_mask]
 
     for tstamp in cal_dict:
@@ -249,19 +292,18 @@ def run_splitter(files):
             row = pd.DataFrame(row)
             data = pd.concat([data, row])
 
-    pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else standard_aoe
+    pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak
 
-    mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else pol1
+    mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1
 
-    if "sigma_func" in kwarg_dict:
-        sigma_func = eval(kwarg_dict.pop("sigma_func"))
-    else:
-        sigma_func = sigma_fit
+    sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit
 
     try:
-        eres = results_dicts[next(iter(results_dicts))]["partition_ecal"][
-            kwarg_dict["cal_energy_param"]
-        ]["eres_linear"].copy()
+        eres = copy.deepcopy(
+            results_dicts[next(iter(results_dicts))]["partition_ecal"][
+                kwarg_dict["cal_energy_param"]
+            ]["eres_linear"]
+        )
 
         def eres_func(x):
             return eval(eres["expression"], dict(x=x, **eres["parameters"]))
@@ -270,9 +312,11 @@ def eres_func(x):
             raise RuntimeError
     except (KeyError, RuntimeError):
         try:
-            eres = results_dicts[next(iter(results_dicts))]["ecal"][
-                kwarg_dict["cal_energy_param"]
-            ]["eres_linear"].copy()
+            eres = copy.deepcopy(
+                results_dicts[next(iter(results_dicts))]["ecal"][kwarg_dict["cal_energy_param"]][
+                    "eres_linear"
+                ]
+            )
 
             def eres_func(x):
                 return eval(eres["expression"], dict(x=x, **eres["parameters"]))
@@ -292,16 +336,16 @@ def eres_func(x):
         sigma_func=sigma_func,
         **kwarg_dict,
     )
-
+    aoe_obj.pdf = aoe_obj.pdf.name
     # need to change eres func as can't pickle lambdas
     try:
         aoe_obj.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][
             kwarg_dict["cal_energy_param"]
-        ]["eres_linear"].copy()
+        ]["eres_linear"]
     except KeyError:
         aoe_obj.eres_func = {}
 else:
-    out_dict = {}
+    out_dict = {tstamp:None for tstamp in cal_dict}
     plot_dict = {}
     aoe_obj = None
 
@@ -346,7 +390,7 @@ def eres_func(x):
         "pars": {"operations": cal_dict[fk.timestamp]},
         "results": dict(
             **results_dicts[fk.timestamp],
-            aoe=out_dict,
+            aoe=out_dict[fk.timestamp],
         ),
     }
     pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 2e656d6..9937281 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -8,27 +8,54 @@
 import pickle as pkl
 import warnings
 
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
 import numpy as np
 import pandas as pd
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.math.peak_fitting import gauss_cdf
+from pygama.math.distributions import gaussian
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
-from pygama.pargen.lq_cal import cal_lq
-from pygama.pargen.utils import get_tcm_pulser_ids, load_data
+from pygama.pargen.lq_cal import LQCal
+from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
+def get_results_dict(lq_class):
+    return {
+        "cal_energy_param": lq_class.cal_energy_param,
+        "rt_correction": lq_class.dt_fit_pars,
+        # "cdf": lq_class.cdf.name,
+        "1590-1596keV": lq_class.timecorr_df.to_dict("index"),
+        "cut_value": lq_class.cut_val,
+        "sfs": lq_class.low_side_sf.to_dict("index"),
+    }
+
+
+def fill_plot_dict(lq_class, data, plot_options, plot_dict=None):
+    if plot_dict is  None:
+        plot_dict = {}
+    for key, item in plot_options.items():
+        if item["options"] is not None:
+            plot_dict[key] = item["function"](lq_class, data, **item["options"])
+        else:
+            plot_dict[key] = item["function"](lq_class, data)
+
+    return plot_dict
+
+
 def lq_calibration(
     data: pd.DataFrame,
     cal_dicts: dict,
     energy_param: str,
     cal_energy_param: str,
     eres_func: callable,
-    cdf: callable = gauss_cdf,
+    cdf: callable = gaussian,
     selection_string: str = "",
     plot_options: dict | None = None,
 ):
@@ -66,13 +93,12 @@ def lq_calibration(
         The cal_lq object used for the LQ calibration
     """
 
-    lq = cal_lq(
+    lq = LQCal(
         cal_dicts,
         cal_energy_param,
         eres_func,
         cdf,
         selection_string,
-        plot_options,
     )
 
     data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param])
@@ -88,12 +114,13 @@ def lq_calibration(
 
     lq.calibrate(data, "LQ_Ecorr")
     log.info("Calibrated LQ")
-    return cal_dicts, lq.get_results_dict(), lq.fill_plot_dict(data), lq
+    return cal_dicts, get_results_dict(lq), fill_plot_dict(lq, data, plot_options), lq
 
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=True)
+argparser.add_argument("--pulser_files", help="pulser_file", type=str, nargs="*", required=False)
+argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False)
 argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True)
 argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
 argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
@@ -233,20 +260,28 @@ def run_splitter(files):
         return_selection_mask=True,
     )
 
-    # get pulser mask from tcm files
-    if isinstance(args.tcm_filelist, list):
-        tcm_files = []
-        for file in args.tcm_filelist:
+    if args.pulser_files:
+        mask = np.array([], dtype=bool)
+        for file in args.pulser_files:
             with open(file) as f:
-                tcm_files += f.read().splitlines()
-    else:
+                pulser_dict = json.load(f)
+            pulser_mask = np.array(pulser_dict["mask"])
+            mask = np.append(mask, pulser_mask)
+        if "pulser_multiplicity_threshold" in kwarg_dict:
+            kwarg_dict.pop("pulser_multiplicity_threshold")
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
         with open(args.tcm_filelist) as f:
             tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+        )
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
 
-    tcm_files = sorted(np.unique(tcm_files))
-    ids, mask = get_tcm_pulser_ids(
-        tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-    )
     data["is_pulser"] = mask[threshold_mask]
 
     for tstamp in cal_dict:
@@ -256,7 +291,7 @@ def run_splitter(files):
             row = pd.DataFrame(row)
             data = pd.concat([data, row])
 
-    cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gauss_cdf
+    cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian
 
     try:
         eres = results_dicts[next(iter(results_dicts))]["partition_ecal"][
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index f3c926e..e11f965 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import argparse
+import copy
 import json
 import logging
 import os
@@ -9,19 +10,43 @@
 import re
 import warnings
 
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
 import numpy as np
 import pandas as pd
+import pygama.math.distributions as pgf
+import pygama.math.histogram as pgh
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from pygama.pargen.ecal_th import *  # noqa: F403
-from pygama.pargen.ecal_th import high_stats_fitting
-from pygama.pargen.utils import get_tcm_pulser_ids, load_data
+from pygama.math.distributions import nb_poly
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
+from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
+def run_splitter(files):
+    """
+    Returns list containing lists of each run
+    """
+
+    runs = []
+    run_files = []
+    for file in files:
+        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        if f"{fk.period}-{fk.run}" not in runs:
+            runs.append(f"{fk.period}-{fk.run}")
+            run_files.append([])
+        for i, run in enumerate(runs):
+            if run == f"{fk.period}-{fk.run}":
+                run_files[i].append(file)
+    return run_files
+
+
 def update_cal_dicts(cal_dicts, update_dict):
     if re.match(r"(\d{8})T(\d{6})Z", next(iter(cal_dicts))):
         for tstamp in cal_dicts:
@@ -33,398 +58,393 @@ def update_cal_dicts(cal_dicts, update_dict):
         cal_dicts.update(update_dict)
     return cal_dicts
 
-def get_results_dict(ecal_class, data):
-    if ecal_class.results:
-        fwhm_linear = ecal_class.fwhm_fit_linear.copy()
-        fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict()
-        fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict()
-        fwhm_linear["cov"] = fwhm_linear["cov"].tolist()
-        fwhm_quad = ecal_class.fwhm_fit_quadratic.copy()
-        fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict()
-        fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict()
-        fwhm_quad["cov"] = fwhm_quad["cov"].tolist()
-
-        pk_dict = {
-            Ei: {
-                "function": func_i.__name__,
-                "module": func_i.__module__,
-                "parameters_in_keV": parsi.to_dict(),
-                "uncertainties_in_keV": errorsi.to_dict(),
-                "p_val": pvali,
-                "fwhm_in_keV": list(fwhmi),
-                "pk_position":(posi, posuni),
-            }
-            for i, (Ei, parsi, errorsi, pvali, fwhmi,  posi, posuni, func_i) in enumerate(
-                zip(
-                    ecal_class.results["fitted_keV"],
-                    ecal_class.results["pk_pars"][ecal_class.results["pk_validities"]],
-                    ecal_class.results["pk_errors"][ecal_class.results["pk_validities"]],
-                    ecal_class.results["pk_pvals"][ecal_class.results["pk_validities"]],
-                    ecal_class.results["pk_fwhms"],
-                    ecal_class.results["pk_pos"],
-                    ecal_class.results["pk_pos_uncertainties"],
-                    ecal_class.funcs,
-                )
-            )
-        }
+
+def bin_spectrum(
+    data,
+    cal_energy_param,
+    selection_string,
+    cut_field="is_valid_cal",
+    pulser_field="is_pulser",
+    erange=(0, 3000),
+    dx=2,
+):
+    bins = np.arange(erange[0], erange[1] + dx, dx)
+    return {
+        "bins": pgh.get_bin_centers(bins),
+        "counts": np.histogram(data.query(selection_string)[cal_energy_param], bins)[0],
+        "cut_counts": np.histogram(
+            data.query(f"(~{cut_field})&(~{pulser_field})")[cal_energy_param],
+            bins,
+        )[0],
+        "pulser_counts": np.histogram(
+            data.query(pulser_field)[cal_energy_param],
+            bins,
+        )[0],
+    }
+
+
+def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
+    if np.isnan(ecal_class.pars).all():
+        return {}
+    else:
+        results_dict = copy.deepcopy(ecal_class.results["hpge_fit_energy_peaks"])
+
+        if "FWHMLinear" in results_dict:
+            fwhm_linear = results_dict["FWHMLinear"]
+            fwhm_linear["function"] = fwhm_linear["function"].__name__
+            fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict()
+            fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict()
+            fwhm_linear["cov"] = fwhm_linear["cov"].tolist()
+        else:
+            fwhm_linear = None
+
+        if "FWHMQuadratic" in results_dict:
+            fwhm_quad = results_dict["FWHMQuadratic"]
+            fwhm_quad["function"] = fwhm_quad["function"].__name__
+            fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict()
+            fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict()
+            fwhm_quad["cov"] = fwhm_quad["cov"].tolist()
+        else:
+            fwhm_quad = None
+
+        pk_dict = results_dict["peak_parameters"]
+
+        for _, dic in pk_dict.items():
+            dic["function"] = dic["function"].name
+            dic["parameters"] = dic["parameters"].to_dict()
+            dic["uncertainties"] = dic["uncertainties"].to_dict()
+            dic.pop("covariance")
 
         return {
+            "total_fep": len(data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")),
+            "total_dep": len(data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")),
+            "pass_fep": len(
+                data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}")
+            ),
+            "pass_dep": len(
+                data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}")
+            ),
             "eres_linear": fwhm_linear,
             "eres_quadratic": fwhm_quad,
-            "fitted_peaks": ecal_class.results["fitted_keV"].tolist(),
+            "fitted_peaks": ecal_class.peaks_kev.tolist(),
             "pk_fits": pk_dict,
+            "peak_param":results_dict["peak_param"]
         }
-    else:
-        return {}
-
-def partition_energy_cal_th(
-    data: pd.Datframe,
-    hit_dicts: dict,
-    energy_params: list[str],
-    selection_string: str = "",
-    threshold: int = 0,
-    p_val: float = 0,
-    plot_options: dict | None = None,
-    simplex: bool = True,
-    tail_weight: int = 20,
-    cal_energy_params: list = None,
-    deg:int=2,
-) -> tuple(dict, dict, dict, dict):
-    results_dict = {}
-    plot_dict = {}
-    full_object_dict = {}
-    if cal_energy_params is None:
-        cal_energy_params = [energy_param + "_cal" for energy_param in energy_params]
-    glines = [
-        238.632,
-        511,
-        583.191,
-        727.330,
-        763,
-        785,
-        860.564,
-        893,
-        1079,
-        1513,
-        1592.53,
-        1620.50,
-        2103.53,
-        2614.50,
-        3125,
-        3198,
-        3474,
-    ]  # gamma lines used for calibration
-    range_keV = [
-        (10, 10),
-        (30, 30),
-        (30, 30),
-        (30, 30),
-        (30, 15),
-        (15, 30),
-        (30, 25),
-        (25, 30),
-        (30, 30),
-        (30, 30),
-        (30, 20),
-        (20, 30),
-        (30, 30),
-        (30, 30),
-        (30, 30),
-        (30, 30),
-        (30, 30),
-    ]  # side bands width
-    funcs = [
-        pgf.extended_gauss_step_pdf,  # probably should be gauss on exp
-        pgf.extended_gauss_step_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_gauss_step_pdf,
-        pgf.extended_gauss_step_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_gauss_step_pdf,
-        pgf.extended_gauss_step_pdf,
-        pgf.extended_gauss_step_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_radford_pdf,
-        pgf.extended_gauss_step_pdf,
-        pgf.extended_gauss_step_pdf,
-        pgf.extended_gauss_step_pdf,
-    ]
-    gof_funcs = [
-        pgf.gauss_step_pdf,
-        pgf.gauss_step_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.gauss_step_pdf,
-        pgf.gauss_step_pdf,
-        pgf.radford_pdf,
-        pgf.gauss_step_pdf,
-        pgf.gauss_step_pdf,
-        pgf.gauss_step_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.radford_pdf,
-        pgf.gauss_step_pdf,
-        pgf.gauss_step_pdf,
-        pgf.gauss_step_pdf,
-    ]
-
-    for energy_param, cal_energy_param in zip(energy_params, cal_energy_params):
-        full_object_dict[cal_energy_param] = high_stats_fitting(
-            energy_param=energy_param,
-            glines=glines,
-            range_keV=range_keV,
-            funcs=funcs,
-            gof_funcs=gof_funcs,
-            selection_string=selection_string,
-            threshold=threshold,
-            p_val=p_val,
-            plot_options=plot_options,
-            simplex=simplex,
-            tail_weight=tail_weight,
-            cal_energy_param=cal_energy_param,
-            deg=deg,
-            fixed={1:1}
-        )
-        full_object_dict[cal_energy_param].update_calibration(data)
-        results_dict[cal_energy_param] = get_results_dict(full_object_dict[cal_energy_param], data)
-        hit_dicts = update_cal_dicts(hit_dicts, full_object_dict[cal_energy_param].hit_dict)
-        if full_object_dict[cal_energy_param].results:
-            plot_dict[cal_energy_param] = full_object_dict[cal_energy_param].fill_plot_dict(data).copy()
 
-    log.info("Finished all calibrations")
-    return hit_dicts, results_dict, plot_dict, full_object_dict
 
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
+    )
+    argparser.add_argument(
+        "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
+    )
+    argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True)
+    argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
+    argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
+
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+
+    argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
+    argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
+    argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
+    args = argparser.parse_args()
+
+    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+    logging.getLogger("numba").setLevel(logging.INFO)
+    logging.getLogger("parse").setLevel(logging.INFO)
+    logging.getLogger("lgdo").setLevel(logging.INFO)
+    logging.getLogger("h5py").setLevel(logging.INFO)
+    logging.getLogger("matplotlib").setLevel(logging.INFO)
+    logging.getLogger("legendmeta").setLevel(logging.INFO)
+
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp)
+
+    det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"]
+
+    configs = LegendMetadata(path=args.configs)
+    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
+        "pars_pht_partcal"
+    ]["inputs"]["pars_pht_partcal_config"][args.channel]
+
+    kwarg_dict = Props.read_from(channel_dict)
+
+    cal_dict = {}
+    results_dicts = {}
+    if isinstance(args.ecal_file, list):
+        for ecal in args.ecal_file:
+            cal = Props.read_from(ecal)
 
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=True)
-argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True)
-argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
-argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
-
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
+            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            cal_dict[fk.timestamp] = cal["pars"]
+            results_dicts[fk.timestamp] = cal["results"]
+    else:
+        cal = Props.read_from(args.ecal_file)
 
-argparser.add_argument("--log", help="log_file", type=str)
+        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.ecal_file))
+        cal_dict[fk.timestamp] = cal["pars"]
+        results_dicts[fk.timestamp] = cal["results"]
 
-argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
-argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
-argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
-args = argparser.parse_args()
+    object_dict = {}
+    if isinstance(args.eres_file, list):
+        for ecal in args.eres_file:
+            with open(ecal, "rb") as o:
+                cal = pkl.load(o)
+            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            object_dict[fk.timestamp] = cal
+    else:
+        with open(args.eres_file, "rb") as o:
+            cal = pkl.load(o)
+        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.eres_file))
+        object_dict[fk.timestamp] = cal
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
+    inplots_dict = {}
+    if args.inplots:
+        if isinstance(args.inplots, list):
+            for ecal in args.inplots:
+                with open(ecal, "rb") as o:
+                    cal = pkl.load(o)
+                fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+                inplots_dict[fk.timestamp] = cal
+        else:
+            with open(args.inplots, "rb") as o:
+                cal = pkl.load(o)
+            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.inplots))
+            inplots_dict[fk.timestamp] = cal
 
+    if "plot_options" in kwarg_dict:
+        for field, item in kwarg_dict["plot_options"].items():
+            kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
 
-def run_splitter(files):
-    """
-    Returns list containing lists of each run
-    """
+    # sort files in dictionary where keys are first timestamp from run
+    if isinstance(args.input_files, list):
+        files = []
+        for file in args.input_files:
+            with open(file) as f:
+                files += f.read().splitlines()
+    else:
+        with open(args.input_files) as f:
+            files = f.read().splitlines()
+
+    files = sorted(
+        np.unique(files)
+    )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
+
+    final_dict = {}
+    all_file = run_splitter(sorted(files))
+    for filelist in all_file:
+        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        timestamp = fk.timestamp
+        final_dict[timestamp] = sorted(filelist)
+
+    params = [
+        kwarg_dict["final_cut_field"],
+        "timestamp",
+    ]
+    params += kwarg_dict["energy_params"]
+
+    # load data in
+    data, threshold_mask = load_data(
+        final_dict,
+        f"{args.channel}/dsp",
+        cal_dict,
+        params=params,
+        threshold=kwarg_dict["threshold"],
+        return_selection_mask=True,
+        cal_energy_param=kwarg_dict["energy_params"][0],
+    )
+
+    if args.pulser_files:
+        mask = np.array([], dtype=bool)
+        for file in args.pulser_files:
+            with open(file) as f:
+                pulser_dict = json.load(f)
+            pulser_mask = np.array(pulser_dict["mask"])
+            mask = np.append(mask, pulser_mask)
+        if "pulser_multiplicity_threshold" in kwarg_dict:
+            kwarg_dict.pop("pulser_multiplicity_threshold")
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
+        with open(args.tcm_filelist) as f:
+            tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+        )
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
+
+    data["is_pulser"] = mask[threshold_mask]
+
+    for tstamp in cal_dict:
+        if tstamp not in np.unique(data["run_timestamp"]):
+            row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data}
+            row["run_timestamp"] = tstamp
+            row = pd.DataFrame(row)
+            data = pd.concat([data, row])
+
+    pk_pars = [
+        (238.632, (10, 10), pgf.gauss_on_step),
+        (511, (30, 30), pgf.gauss_on_step),
+        (583.191, (30, 30), pgf.hpge_peak),
+        (727.330, (30, 30), pgf.hpge_peak),
+        (763, (30, 15), pgf.gauss_on_step),
+        (785, (15, 30), pgf.gauss_on_step),
+        (860.564, (30, 25), pgf.hpge_peak),
+        (893, (25, 30), pgf.gauss_on_step),
+        (1079, (30, 30), pgf.gauss_on_step),
+        (1513, (30, 30), pgf.gauss_on_step),
+        (1592.53, (30, 20), pgf.hpge_peak),
+        (1620.50, (20, 30), pgf.hpge_peak),
+        (2103.53, (30, 30), pgf.hpge_peak),
+        (2614.50, (30, 30), pgf.hpge_peak),
+        (3125, (30, 30), pgf.gauss_on_step),
+        (3198, (30, 30), pgf.gauss_on_step),
+        (3474, (30, 30), pgf.gauss_on_step),
+    ]
 
-    runs = []
-    run_files = []
-    for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
-        if f"{fk.period}-{fk.run}" not in runs:
-            runs.append(f"{fk.period}-{fk.run}")
-            run_files.append([])
-        for i, run in enumerate(runs):
-            if run == f"{fk.period}-{fk.run}":
-                run_files[i].append(file)
-    return run_files
+    glines = [pk_par[0] for pk_par in pk_pars]
 
+    if "cal_energy_params" not in kwarg_dict:
+        cal_energy_params = [energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]]
+    else:
+        cal_energy_params = kwarg_dict["cal_energy_params"]
 
-configs = LegendMetadata(path=args.configs)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-    "pars_pht_partcal"
-]["inputs"]["pars_pht_partcal_config"][args.channel]
+    selection_string = f"~is_pulser&{kwarg_dict['final_cut_field']}"
 
-kwarg_dict = Props.read_from(channel_dict)
+    ecal_results = {}
+    plot_dict = {}
+    full_object_dict = {}
 
-cal_dict = {}
-results_dicts = {}
-if isinstance(args.ecal_file, list):
-    for ecal in args.ecal_file:
-        cal = Props.read_from(ecal)
+    for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params):
+        energy = data.query(selection_string)[energy_param].to_numpy()
+        full_object_dict[cal_energy_param] = HPGeCalibration(
+            energy_param, glines, 1, kwarg_dict.get("deg", 0), fixed={1: 1}
+        )
+        full_object_dict[cal_energy_param].hpge_get_energy_peaks(energy)
+        full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
+            energy,
+            peak_pars=pk_pars,
+            tail_weight=kwarg_dict.get("tail_weight", 0),
+            n_events=kwarg_dict.get("n_events", None),
+            allowed_p_val=kwarg_dict.get("p_val", 0),
+            update_cal_pars=bool(det_status == "on"),
+        )
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
-        cal_dict[fk.timestamp] = cal["pars"]
-        results_dicts[fk.timestamp] = cal["results"]
-else:
-    cal = Props.read_from(args.ecal_file)
+        full_object_dict[cal_energy_param].get_energy_res_curve(
+            FWHMLinear,
+            interp_energy_kev={"Qbb": 2039.0},
+        )
+        full_object_dict[cal_energy_param].get_energy_res_curve(
+            FWHMQuadratic,
+            interp_energy_kev={"Qbb": 2039.0},
+        )
 
-    fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.ecal_file))
-    cal_dict[fk.timestamp] = cal["pars"]
-    results_dicts[fk.timestamp] = cal["results"]
+        data[cal_energy_param] = nb_poly(
+            data[energy_param].to_numpy(), full_object_dict[cal_energy_param].pars
+        )
 
-object_dict = {}
-if isinstance(args.eres_file, list):
-    for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
-            cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
-        object_dict[fk.timestamp] = cal
-else:
-    with open(args.eres_file, "rb") as o:
-        cal = pkl.load(o)
-    fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.eres_file))
-    object_dict[fk.timestamp] = cal
-
-inplots_dict = {}
-if args.inplots:
-    if isinstance(args.inplots, list):
-        for ecal in args.inplots:
-            with open(ecal, "rb") as o:
-                cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
-            inplots_dict[fk.timestamp] = cal
-    else:
-        with open(args.inplots, "rb") as o:
-            cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.inplots))
-        inplots_dict[fk.timestamp] = cal
-
-
-if "plot_options" in kwarg_dict:
-    for field, item in kwarg_dict["plot_options"].items():
-        kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
-
-
-# sort files in dictionary where keys are first timestamp from run
-if isinstance(args.input_files, list):
-    files = []
-    for file in args.input_files:
-        with open(file) as f:
-            files += f.read().splitlines()
-else:
-    with open(args.input_files) as f:
-        files = f.read().splitlines()
-
-files = sorted(
-    np.unique(files)
-)  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
-
-final_dict = {}
-all_file = run_splitter(sorted(files))
-for filelist in all_file:
-    fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
-    timestamp = fk.timestamp
-    final_dict[timestamp] = sorted(filelist)
-
-params = [
-    kwarg_dict["final_cut_field"],
-    "timestamp",
-]
-params += kwarg_dict["energy_params"]
-
-# load data in
-data, threshold_mask = load_data(
-    final_dict,
-    f"{args.channel}/dsp",
-    cal_dict,
-    params=params,
-    threshold=kwarg_dict["threshold"],
-    return_selection_mask=True,
-    cal_energy_param=kwarg_dict["energy_params"][0],
-)
-
-# get pulser mask from tcm files
-if isinstance(args.tcm_filelist, list):
-    tcm_files = []
-    for file in args.tcm_filelist:
-        with open(file) as f:
-            tcm_files += f.read().splitlines()
-else:
-    with open(args.tcm_filelist) as f:
-        tcm_files = f.read().splitlines()
-
-tcm_files = sorted(np.unique(tcm_files))
-ids, mask = get_tcm_pulser_ids(
-    tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-)
-data["is_pulser"] = mask[threshold_mask]
-
-for tstamp in cal_dict:
-    if tstamp not in np.unique(data["run_timestamp"]):
-        row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data}
-        row["run_timestamp"] = tstamp
-        row = pd.DataFrame(row)
-        data = pd.concat([data, row])
-
-# run energy supercal
-hit_dicts, ecal_results, plot_dict, ecal_obj = partition_energy_cal_th(
-    data,
-    cal_dict,
-    selection_string=f"{kwarg_dict.pop('final_cut_field')}&(~is_pulser)",
-    **kwarg_dict,
-)
+        ecal_results[cal_energy_param] = get_results_dict(
+            full_object_dict[cal_energy_param], data, cal_energy_param, selection_string
+        )
+        cal_dict = update_cal_dicts(
+            cal_dict, {cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()}
+        )
 
-if args.plot_file:
-    common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
+        if args.plot_file:
+            param_plot_dict = {}
+            if ~np.isnan(full_object_dict[cal_energy_param].pars).all():
+                param_plot_dict["fwhm_fit"] = full_object_dict[cal_energy_param].plot_eres_fit(
+                    energy
+                )
+                param_plot_dict["cal_fit"] = full_object_dict[cal_energy_param].plot_cal_fit(
+                    energy
+                )
+                param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits(
+                    energy, ncols=4, nrows=5
+                )
 
-    if isinstance(args.plot_file, list):
-        for plot_file in args.plot_file:
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(plot_file))
+                if "plot_options" in kwarg_dict:
+                    for key, item in kwarg_dict["plot_options"].items():
+                        if item["options"] is not None:
+                            param_plot_dict[key] = item["function"](
+                                data,
+                                cal_energy_param,
+                                selection_string,
+                                **item["options"],
+                            )
+                        else:
+                            param_plot_dict[key] = item["function"](
+                                data,
+                                cal_energy_param,
+                                selection_string,
+                            )
+            plot_dict[cal_energy_param] = param_plot_dict
+
+        for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"]["peak_parameters"].values():
+            peak_dict["function"] = peak_dict["function"].name
+            peak_dict["parameters"] = peak_dict["parameters"].to_dict()
+            peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict()
+
+    if args.plot_file:
+        common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
+
+        if isinstance(args.plot_file, list):
+            for plot_file in args.plot_file:
+                fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(plot_file))
+                if args.inplots:
+                    out_plot_dict = inplots_dict[fk.timestamp]
+                    out_plot_dict.update({"partition_ecal": plot_dict})
+                else:
+                    out_plot_dict = {"partition_ecal": plot_dict}
+
+                if "common" in list(out_plot_dict) and common_dict is not None:
+                    out_plot_dict["common"].update(common_dict)
+                elif common_dict is not None:
+                    out_plot_dict["common"] = common_dict
+
+                pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
+                with open(plot_file, "wb") as w:
+                    pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
+        else:
             if args.inplots:
+                fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.plot_file))
                 out_plot_dict = inplots_dict[fk.timestamp]
                 out_plot_dict.update({"partition_ecal": plot_dict})
             else:
                 out_plot_dict = {"partition_ecal": plot_dict}
-
             if "common" in list(out_plot_dict) and common_dict is not None:
                 out_plot_dict["common"].update(common_dict)
             elif common_dict is not None:
                 out_plot_dict["common"] = common_dict
-
-            pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-            with open(plot_file, "wb") as w:
+            pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True)
+            with open(args.plot_file, "wb") as w:
                 pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
-    else:
-        if args.inplots:
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(args.plot_file))
-            out_plot_dict = inplots_dict[fk.timestamp]
-            out_plot_dict.update({"partition_ecal": plot_dict})
-        else:
-            out_plot_dict = {"partition_ecal": plot_dict}
-        if "common" in list(out_plot_dict) and common_dict is not None:
-            out_plot_dict["common"].update(common_dict)
-        elif common_dict is not None:
-            out_plot_dict["common"] = common_dict
-        pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True)
-        with open(args.plot_file, "wb") as w:
-            pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
-
-
-for out in sorted(args.hit_pars):
-    fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-    final_hit_dict = {
-        "pars": hit_dicts[fk.timestamp],
-        "results": {
-            "ecal": results_dicts[fk.timestamp],
-            "partition_ecal": ecal_results,
-        },
-    }
-    pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-    with open(out, "w") as w:
-        json.dump(final_hit_dict, w, indent=4)
-
-for out in args.fit_results:
-    fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-    final_object_dict = {
-        "ecal": object_dict[fk.timestamp],
-        "partition_ecal": ecal_obj,
-    }
-    pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-    with open(out, "wb") as w:
-        pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
+
+    for out in sorted(args.hit_pars):
+        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+        final_hit_dict = {
+            "pars": cal_dict[fk.timestamp],
+            "results": dict(**results_dicts[fk.timestamp], partition_ecal= ecal_results)
+        }
+        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
+        with open(out, "w") as w:
+            json.dump(final_hit_dict, w, indent=4)
+
+    for out in args.fit_results:
+        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+        final_object_dict = dict(**object_dict[fk.timestamp], partition_ecal = full_object_dict)
+        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
+        with open(out, "wb") as w:
+            pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
new file mode 100644
index 0000000..c9801be
--- /dev/null
+++ b/scripts/pars_pht_qc.py
@@ -0,0 +1,224 @@
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import os
+import pathlib
+import pickle as pkl
+import re
+import warnings
+
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
+import numpy as np
+from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
+from lgdo.lh5 import ls
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys, generate_cut_classifiers
+from pygama.pargen.utils import load_data
+from util.FileKey import ChannelProcKey, ProcessingFileKey
+
+log = logging.getLogger(__name__)
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
+    argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
+    argparser.add_argument("--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False)
+    argparser.add_argument("--pulser_files", help="pulser_file", nargs="*", type=str, required=False)
+
+    argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument("--log", help="log_file", type=str)
+
+    argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
+    argparser.add_argument("--save_path", help="save_path", type=str, nargs="*", )
+    args = argparser.parse_args()
+
+    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+    logging.getLogger("numba").setLevel(logging.INFO)
+    logging.getLogger("parse").setLevel(logging.INFO)
+    logging.getLogger("lgdo").setLevel(logging.INFO)
+    logging.getLogger("h5py").setLevel(logging.INFO)
+    logging.getLogger("matplotlib").setLevel(logging.INFO)
+    logging.getLogger("legendmeta").setLevel(logging.INFO)
+
+    # get metadata dictionary
+    configs = LegendMetadata(path=args.configs)
+    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
+    channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel]
+
+
+    # sort files in dictionary where keys are first timestamp from run
+    if isinstance(args.cal_files, list):
+        cal_files = []
+        for file in args.cal_files:
+            with open(file) as f:
+                cal_files += f.read().splitlines()
+    else:
+        with open(args.cal_files) as f:
+            cal_files = f.read().splitlines()
+
+    cal_files = sorted(
+        np.unique(cal_files)
+    )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
+
+
+
+    kwarg_dict = Props.read_from(channel_dict)
+    kwarg_dict_cal = kwarg_dict["cal_fields"]
+
+    cut_fields = get_keys(
+        [
+            key.replace(f"{args.channel}/dsp/", "")
+            for key in ls(cal_files[0], f"{args.channel}/dsp/")
+        ],
+        kwarg_dict_cal["cut_parameters"],
+    )
+    if "initial_cal_cuts" in kwarg_dict:
+        init_cal = kwarg_dict["initial_cal_cuts"]
+        cut_fields += get_keys(
+            [
+                key.replace(f"{args.channel}/dsp/", "")
+                for key in ls(cal_files[0], f"{args.channel}/dsp/")
+            ],
+            init_cal["cut_parameters"],
+        )
+
+    # load data in
+    data, threshold_mask = load_data(
+        cal_files,
+        f"{args.channel}/dsp",
+        {},
+        [*cut_fields, "timestamp", "trapTmax"],
+        threshold=kwarg_dict_cal.get("threshold", 0),
+        return_selection_mask=True,
+        cal_energy_param="trapTmax",
+    )
+
+    if args.pulser_files:
+        mask = np.array([], dtype=bool)
+        for file in args.pulser_files:
+            with open(file, 'r') as f:
+                pulser_dict = json.load(f)
+            pulser_mask = np.array(pulser_dict["mask"])
+            mask = np.append(mask, pulser_mask)
+        if "pulser_multiplicity_threshold" in kwarg_dict:
+            kwarg_dict.pop("pulser_multiplicity_threshold")
+
+    elif args.tcm_filelist:
+        # get pulser mask from tcm files
+        with open(args.tcm_filelist) as f:
+            tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        ids, mask = get_tcm_pulser_ids(
+            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+        )
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
+
+    data["is_pulser"] = mask[threshold_mask]
+
+    if "initial_cal_cuts" in kwarg_dict:
+        init_cal = kwarg_dict["initial_cal_cuts"]
+        hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers(
+            data,
+            init_cal["cut_parameters"],
+            init_cal.get("rounding", 4),
+            display=1 if args.plot_path else 0,
+        )
+        ct_mask = np.full(len(data), True, dtype=bool)
+        for outname, info in hit_dict_init_cal.items():
+            # convert to pandas eval
+            exp = info["expression"]
+            for key in info.get("parameters", None):
+                exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
+            data[outname] = data.eval(exp, local_dict=info.get("parameters", None))
+            if "classifier" not in outname:
+                ct_mask = ct_mask & data[outname]
+
+        data = data[ct_mask]
+        log.debug("initial cal cuts applied")
+        log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}")
+
+    else:
+        hit_dict_init_cal = {}
+        plot_dict_init_cal = {}
+
+    hit_dict_cal, plot_dict_cal = generate_cut_classifiers(
+        data,
+        kwarg_dict_cal["cut_parameters"],
+        kwarg_dict.get("rounding", 4),
+        display=1 if args.plot_path else 0,
+    )
+
+    log.debug("initial cuts applied")
+    log.debug(f"cut_dict is: {json.dumps(hit_dict_cal, indent=2)}")
+
+    kwarg_dict_fft = kwarg_dict["fft_fields"]
+    if len(args.fft_files) > 0:
+
+        # sort files in dictionary where keys are first timestamp from run
+        if isinstance(args.fft_files, list):
+            fft_files = []
+            for file in args.fft_files:
+                with open(file) as f:
+                    fft_files += f.read().splitlines()
+        else:
+            with open(args.fft_files) as f:
+                fft_files = f.read().splitlines()
+
+        fft_files = sorted(
+            np.unique(fft_files)
+        )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
+
+        if len(fft_files)>0:
+            fft_data = load_data(
+                fft_files,
+                f"{args.channel}/dsp",
+                {},
+                [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"],
+                threshold=kwarg_dict_fft["threshold"],
+                return_selection_mask=False,
+                cal_energy_param="trapTmax",
+            )
+
+            hit_dict_fft, plot_dict_fft = generate_cut_classifiers(
+                data,
+                kwarg_dict_fft["cut_parameters"],
+                kwarg_dict.get("rounding", 4),
+                display=1 if args.plot_path else 0,
+            )
+
+            log.debug("fft cuts applied")
+            log.debug(f"cut_dict is: {json.dumps(hit_dict_fft, indent=2)}")
+
+        else:
+            hit_dict_fft = {}
+            plot_dict_fft = {}
+    else:
+        hit_dict_fft = {}
+        plot_dict_fft = {}
+
+    hit_dict = {**hit_dict_init_cal, **hit_dict_cal, **hit_dict_fft}
+    plot_dict = {**plot_dict_init_cal, **plot_dict_cal, **plot_dict_fft}
+
+    for file in args.save_path:
+        pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
+        with open(file, "w") as f:
+            json.dump(hit_dict, f, indent=4)
+
+    if args.plot_path:
+        for file in args.plot_path:
+            pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
+            with open(file, "wb") as f:
+                pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
new file mode 100644
index 0000000..3c6d88a
--- /dev/null
+++ b/scripts/pars_tcm_pulser.py
@@ -0,0 +1,64 @@
+import argparse
+import json
+import logging
+import os
+import pathlib
+import pickle as pkl
+
+os.environ["LGDO_CACHE"] = "false"
+os.environ["LGDO_BOUNDSCHECK"] = "false"
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
+import lgdo.lh5 as lh5
+import numpy as np
+from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--log", help="log file", type=str)
+
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False)
+
+argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str)
+args = argparser.parse_args()
+
+logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+logging.getLogger("numba").setLevel(logging.INFO)
+logging.getLogger("parse").setLevel(logging.INFO)
+logging.getLogger("lgdo").setLevel(logging.INFO)
+logging.getLogger("h5py").setLevel(logging.INFO)
+logging.getLogger("matplotlib").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
+
+sto = lh5.LH5Store()
+log = logging.getLogger(__name__)
+
+configs = LegendMetadata(path=args.configs)
+config_dict = configs.on(args.timestamp, system=args.datatype)
+kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"]
+
+kwarg_dict = Props.read_from(kwarg_dict)
+
+if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
+    tcm_files = args.tcm_files[0]
+    with open(tcm_files) as f:
+        tcm_files = f.read().splitlines()
+else:
+    tcm_files = args.tcm_files
+# get pulser mask from tcm files
+tcm_files = sorted(np.unique(tcm_files))
+ids, mask = get_tcm_pulser_ids(
+    tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+    )
+
+pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True)
+with open(args.pulser_file, "w") as f:
+    json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4)
\ No newline at end of file

From 49869d95e172aa3984b193cd75fe5b8812bf3e9f Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:51:28 +0100
Subject: [PATCH 034/103] fix psp path

---
 scripts/util/patterns.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index e1538d0..7d381b2 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -23,6 +23,7 @@
     tier_path,
     tier_pet_path,
     tier_pht_path,
+    tier_psp_path,
     tier_raw_blind_path,
     tier_raw_path,
     tier_skm_path,
@@ -148,7 +149,7 @@ def get_pattern_tier_evt(setup):
 
 def get_pattern_tier_psp(setup):
     return os.path.join(
-        f"{tier_evt_path(setup)}",
+        f"{tier_psp_path(setup)}",
         "{datatype}",
         "{period}",
         "{run}",

From 904685a93baf2ea6a19e7a17e2f1d5382af0e034 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:51:46 +0100
Subject: [PATCH 035/103] add fft read

---
 rules/common.smk | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/rules/common.smk b/rules/common.smk
index 1d4282b..068100b 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -38,6 +38,15 @@ def read_filelist_cal(wildcards, tier):
         return files
 
 
+def read_filelist_fft(wildcards, tier):
+    label = f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-fft"
+    with checkpoints.gen_filelist.get(label=label, tier=tier, extension="file").output[
+        0
+    ].open() as f:
+        files = f.read().splitlines()
+        return files
+
+
 def read_filelist_pars_cal_channel(wildcards, tier):
     """
     This function will read the filelist of the channels and return a list of dsp files one for each channel

From 5f8e4a88e3e0dec19cba00f3481fc3102a74934b Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:52:09 +0100
Subject: [PATCH 036/103] split pulser into own rule

---
 rules/dsp.smk |  9 ++++-----
 rules/tcm.smk | 30 ++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 5c27f42..94ccf13 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -37,7 +37,7 @@ rule build_pars_dsp_tau:
         files=os.path.join(
             filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
         ),
-        tcm_files=lambda wildcards: read_filelist_cal(wildcards, "tcm"),
+        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
@@ -45,7 +45,6 @@ rule build_pars_dsp_tau:
     output:
         decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")),
         plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")),
-        pulser=temp(get_pattern_pars_tmp_channel(setup, "dsp", "pulser_ids")),
     log:
         get_pattern_log_channel(setup, "par_dsp_decay_constant"),
     group:
@@ -62,8 +61,7 @@ rule build_pars_dsp_tau:
         "--channel {params.channel} "
         "--plot_path {output.plots} "
         "--output_file {output.decay_const} "
-        "--pulser_file {output.pulser} "
-        "--tcm_files {input.tcm_files} "
+        "--pulser_file {input.pulser} "
         "--raw_files {input.files}"
 
 
@@ -72,7 +70,7 @@ rule build_pars_event_selection:
         files=os.path.join(
             filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
         ),
-        pulser_file=get_pattern_pars_tmp_channel(setup, "dsp", "pulser_ids"),
+        pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
         raw_cal=get_blinding_curve_file,
     params:
@@ -87,6 +85,7 @@ rule build_pars_event_selection:
         "par-dsp"
     resources:
         runtime=300,
+        mem_swap=70,
     shell:
         "{swenv} python3 -B "
         f"{workflow.source_path('../scripts/pars_dsp_event_selection.py')} "
diff --git a/rules/tcm.smk b/rules/tcm.smk
index 380c882..cfdf72c 100644
--- a/rules/tcm.smk
+++ b/rules/tcm.smk
@@ -6,6 +6,8 @@ from scripts.util.patterns import (
     get_pattern_tier_raw,
     get_pattern_tier,
     get_pattern_log,
+    get_pattern_pars_tmp_channel,
+    get_pattern_log_channel,
 )
 
 
@@ -33,3 +35,31 @@ rule build_tier_tcm:
         "--timestamp {params.timestamp} "
         "{input} "
         "{output}"
+
+
+# This rule builds the tcm files each raw file
+rule build_pulser_ids:
+    input:
+        tcm_files=lambda wildcards: read_filelist_cal(wildcards, "tcm"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        pulser=temp(get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids")),
+    log:
+        get_pattern_log_channel(setup, "tcm_pulsers"),
+    group:
+        "tier-tcm"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/pars_tcm_pulser.py')} "
+        "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--tcm_files {input.tcm_files} "
+        "--pulser_file {output.pulser} "

From 57cdd606facfc31cf80ece546536e79449c53a37 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Mar 2024 13:52:33 +0100
Subject: [PATCH 037/103] add qc and use pulser files

---
 rules/hit.smk |  72 +++++++--
 rules/pht.smk | 435 ++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 371 insertions(+), 136 deletions(-)

diff --git a/rules/hit.smk b/rules/hit.smk
index de918b3..bbce0bd 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -21,18 +21,67 @@ from scripts.util.patterns import (
 )
 
 
+onstart:
+    if os.path.isfile(os.path.join(pars_path(setup), "hit", "validity.jsonl")):
+        os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl"))
+
+
+    ds.pars_key_resolve.write_par_catalog(
+        ["-*-*-*-cal"],
+        os.path.join(pars_path(setup), "hit", "validity.jsonl"),
+        get_pattern_tier_raw(setup),
+        {"cal": ["par_hit"], "lar": ["par_hit"]},
+    )
+
+
+# This rule builds the qc using the calibration dsp files and fft files
+rule build_qc:
+    input:
+        files=lambda wildcards: read_filelist_cal(wildcards, "dsp"),
+        fft_files=lambda wildcards: read_filelist_fft(wildcards, "dsp"),
+        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        qc_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "qc")),
+        plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "qc")),
+    log:
+        get_pattern_log_channel(setup, "pars_hit_qc"),
+    group:
+        "par-hit"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/pars_hit_qc.py')} "
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--configs {configs} "
+        "--plot_path {output.plot_file} "
+        "--save_path {output.qc_file} "
+        "--pulser_file {input.pulser} "
+        "--cal_files {input.files} "
+        "--fft_files {input.fft_files} "
+
+
 # This rule builds the energy calibration using the calibration dsp files
 rule build_energy_calibration:
     input:
-        files=lambda wildcards: read_filelist_cal(wildcards, "dsp"),
-        tcm_filelist=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
+        files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
         ),
+        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         ctc_dict=ancient(
             lambda wildcards: pars_catalog.get_par_file(
                 setup, wildcards.timestamp, "dsp"
             )
         ),
+        inplots=get_pattern_plts_tmp_channel(setup, "hit", "qc"),
+        in_hit_dict=get_pattern_pars_tmp_channel(setup, "hit", "qc"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
@@ -59,11 +108,14 @@ rule build_energy_calibration:
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--plot_path {output.plot_file} "
         "--results_path {output.results_file} "
         "--save_path {output.ecal_file} "
+        "--inplot_dict {input.inplots} "
+        "--in_hit_dict {input.in_hit_dict} "
         "--ctc_dict {input.ctc_dict} "
-        "--tcm_filelist {input.tcm_filelist} "
+        "--pulser_file {input.pulser} "
         "--files {input.files}"
 
 
@@ -73,9 +125,7 @@ rule build_aoe_calibration:
         files=os.path.join(
             filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
         ),
-        tcm_filelist=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
-        ),
+        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "energy_cal"),
         eres_file=get_pattern_pars_tmp_channel(
             setup, "hit", "energy_cal_objects", extension="pkl"
@@ -112,7 +162,7 @@ rule build_aoe_calibration:
         "--eres_file {input.eres_file} "
         "--hit_pars {output.hit_pars} "
         "--plot_file {output.plot_file} "
-        "--tcm_filelist {input.tcm_filelist} "
+        "--pulser_file {input.pulser} "
         "--ecal_file {input.ecal_file} "
         "{input.files}"
 
@@ -123,9 +173,7 @@ rule build_lq_calibration:
         files=os.path.join(
             filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
         ),
-        tcm_filelist=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
-        ),
+        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "aoe_cal"),
         eres_file=get_pattern_pars_tmp_channel(
             setup, "hit", "aoe_cal_objects", extension="pkl"
@@ -160,7 +208,7 @@ rule build_lq_calibration:
         "--eres_file {input.eres_file} "
         "--hit_pars {output.hit_pars} "
         "--plot_file {output.plot_file} "
-        "--tcm_filelist {input.tcm_filelist} "
+        "--pulser_file {input.pulser} "
         "--ecal_file {input.ecal_file} "
         "{input.files}"
 
diff --git a/rules/pht.smk b/rules/pht.smk
index f375fe6..cd11a9b 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -7,7 +7,7 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 """
 
 from scripts.util.pars_loading import pars_catalog
-import scripts.util.create_pars_keylist import pars_key_resolve
+from scripts.util.create_pars_keylist import pars_key_resolve
 from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
@@ -15,7 +15,6 @@ from scripts.util.patterns import (
     get_pattern_log_channel,
     get_pattern_par_pht,
     get_pattern_plts,
-    get_pattern_tier_dsp,
     get_pattern_tier,
     get_pattern_pars_tmp,
     get_pattern_log,
@@ -29,17 +28,174 @@ ds.pars_key_resolve.write_par_catalog(
     {"cal": ["par_pht"], "lar": ["par_pht"]},
 )
 
+intier = "dsp"
+
+
+rule pht_checkpoint:
+    input:
+        files=lambda wildcards: read_filelist_cal(wildcards, intier),
+    output:
+        get_pattern_pars_tmp_channel(setup, "pht", "check"),
+    shell:
+        "touch {output}"
+
+
+qc_pht_rules = {}
+for key, dataset in part.datasets.items():
+    for partition in dataset.keys():
+
+        rule:
+            input:
+                cal_files=part.get_filelists(partition, key, intier),
+                fft_files=part.get_filelists(partition, key, intier, datatype="fft"),
+                pulser_files=[
+                    file.replace("pht", "tcm")
+                    for file in part.get_par_files(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                        name="pulser_ids",
+                    )
+                ],
+                check_files=part.get_par_files(
+                    f"{par_pht_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    tier="pht",
+                    name="check",
+                ),
+            wildcard_constraints:
+                channel=part.get_wildcard_constraints(partition, key),
+            params:
+                datatype="cal",
+                channel="{channel}" if key == "default" else key,
+                timestamp=part.get_timestamp(
+                    f"{par_pht_path(setup)}/validity.jsonl", partition, key, tier="pht"
+                ),
+            output:
+                hit_pars=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                        name="qc",
+                    )
+                ],
+                plot_file=[
+                    temp(file)
+                    for file in part.get_plt_files(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                        name="qc",
+                    )
+                ],
+            log:
+                part.get_log_file(
+                    f"{par_pht_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    "pht",
+                    name="par_pht_qc",
+                ),
+            group:
+                "par-pht"
+            resources:
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
+                runtime=300,
+            shell:
+                "{swenv} python3 -B "
+                f"{basedir}/../scripts/pars_pht_qc.py "
+                "--log {log} "
+                "--configs {configs} "
+                "--datatype {params.datatype} "
+                "--timestamp {params.timestamp} "
+                "--channel {params.channel} "
+                "--save_path {output.hit_pars} "
+                "--plot_path {output.plot_file} "
+                "--pulser_files {input.pulser_files} "
+                "--fft_files {input.fft_files} "
+                "--cal_files {input.cal_files}"
+
+        set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc")
+
+        if key in qc_pht_rules:
+            qc_pht_rules[key].append(list(workflow.rules)[-1])
+        else:
+            qc_pht_rules[key] = [list(workflow.rules)[-1]]
+
+
+# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
+# This rule builds the a/e calibration using the calibration dsp files for the whole partition
+rule build_pht_qc:
+    input:
+        cal_files=os.path.join(
+            filelist_path(setup),
+            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
+        ),
+        fft_files=os.path.join(
+            filelist_path(setup),
+            "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist",
+        ),
+        pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"),
+    params:
+        datatype="cal",
+        channel="{channel}",
+        timestamp="{timestamp}",
+    output:
+        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")),
+        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")),
+    log:
+        get_pattern_log_channel(setup, "pars_pht_qc"),
+    group:
+        "par-pht"
+    resources:
+        mem_swap=60,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/pars_pht_qc.py "
+        "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--save_path {output.hit_pars} "
+        "--plot_path {output.plot_file} "
+        "--pulser_files {input.pulser_files} "
+        "--fft_files {input.fft_files} "
+        "--cal_files {input.cal_files}"
+
+
+fallback_qc_rule = list(workflow.rules)[-1]
+
+rule_order_list = []
+ordered = OrderedDict(qc_pht_rules)
+ordered.move_to_end("default")
+for key, items in ordered.items():
+    rule_order_list += [item.name for item in items]
+rule_order_list.append(fallback_qc_rule.name)
+workflow._ruleorder.add(*rule_order_list)  # [::-1]
+
 
 # This rule builds the energy calibration using the calibration dsp files
 rule build_per_energy_calibration:
     input:
-        files=lambda wildcards: read_filelist_cal(wildcards, "dsp"),
-        tcm_filelist=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
+        files=os.path.join(
+            filelist_path(setup),
+            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
         ),
+        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"),
+        inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"),
         ctc_dict=ancient(
             lambda wildcards: pars_catalog.get_par_file(
-                setup, wildcards.timestamp, "dsp"
+                setup, wildcards.timestamp, intier
             )
         ),
     params:
@@ -70,103 +226,16 @@ rule build_per_energy_calibration:
         "--channel {params.channel} "
         "--configs {configs} "
         "--tier {params.tier} "
+        "--metadata {meta} "
         "--plot_path {output.plot_file} "
         "--results_path {output.results_file} "
         "--save_path {output.ecal_file} "
+        "--inplot_dict {input.inplots} "
+        "--in_hit_dict {input.pht_dict} "
         "--ctc_dict {input.ctc_dict} "
-        "--tcm_filelist {input.tcm_filelist} "
+        "--pulser_file {input.pulser} "
         "--files {input.files}"
 
-rule build_pars_pht_objects:
-    input:
-        lambda wildcards: read_filelist_pars_cal_channel(
-            wildcards,
-            "pht_objects_pkl",
-        ),
-    output:
-        get_pattern_pars(
-            setup,
-            "pht",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        )
-    group:
-        "merge-hit"
-    shell:
-        "{swenv} python3 -B "
-        f"{basedir}/../scripts/merge_channels.py "
-        "--input {input} "
-        "--output {output} "
-
-rule build_plts_pht:
-    input:
-        lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht"),
-    output:
-        get_pattern_plts(setup, "pht")
-    group:
-        "merge-hit"
-    shell:
-        "{swenv} python3 -B "
-        f"{basedir}/../scripts/merge_channels.py "
-        "--input {input} "
-        "--output {output} "
-
-rule build_pars_pht:
-    input:
-        infiles = lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"),
-        plts = get_pattern_plts(setup, "pht"),
-        objects = get_pattern_pars(
-            setup,
-            "pht",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        )
-    output:
-        get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle),
-    group:
-        "merge-hit"
-    shell:
-        "{swenv} python3 -B "
-        f"{basedir}/../scripts/merge_channels.py "
-        "--input {input.infiles} "
-        "--output {output} "
-
-
-rule build_pht:
-    input:
-        dsp_file=get_pattern_tier_dsp(setup),
-        #hit_file = get_pattern_tier_hit(setup),
-        pars_file=lambda wildcards: pars_catalog.get_par_file(
-            setup, wildcards.timestamp, "pht"
-        ),
-    output:
-        tier_file=get_pattern_tier(setup, "pht", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "pht_db"),
-    params:
-        timestamp="{timestamp}",
-        datatype="{datatype}",
-        tier="pht",
-    log:
-        get_pattern_log(setup, "tier_pht"),
-    group:
-        "tier-pht"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/build_hit.py')} "
-        "--configs {configs} "
-        "--log {log} "
-        "--tier {params.tier} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--pars_file {input.pars_file} "
-        "--output {output.tier_file} "
-        "--input {input.dsp_file} "
-        "--db_file {output.db_file}"
-
 
 part_pht_rules = {}
 for key, dataset in part.datasets.items():
@@ -174,8 +243,17 @@ for key, dataset in part.datasets.items():
 
         rule:
             input:
-                files=part.get_filelists(partition, key, "dsp"),
-                tcm_files=part.get_filelists(partition, key, "tcm"),
+                files=part.get_filelists(partition, key, intier),
+                pulser_files=[
+                    file.replace("pht", "tcm")
+                    for file in part.get_par_files(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                        name="pulser_ids",
+                    )
+                ],
                 ecal_file=part.get_par_files(
                     f"{par_pht_path(setup)}/validity.jsonl",
                     partition,
@@ -249,7 +327,7 @@ for key, dataset in part.datasets.items():
             group:
                 "par-pht"
             resources:
-                mem_swap=300,
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
                 runtime=300,
             shell:
                 "{swenv} python3 -B "
@@ -260,12 +338,13 @@ for key, dataset in part.datasets.items():
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
                 "--channel {params.channel} "
+                "--metadata {meta} "
                 "--fit_results {output.partcal_results} "
                 "--eres_file {input.eres_file} "
                 "--hit_pars {output.hit_pars} "
                 "--plot_file {output.plot_file} "
                 "--ecal_file {input.ecal_file} "
-                "--tcm_filelist {input.tcm_files} "
+                "--pulser_files {input.pulser_files} "
                 "--input_files {input.files}"
 
         set_last_rule_name(
@@ -283,11 +362,10 @@ for key, dataset in part.datasets.items():
 rule build_pht_energy_super_calibrations:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
-        ),
-        tcm_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
+            filelist_path(setup),
+            "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist",
         ),
+        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "energy_cal"),
         eres_file=get_pattern_pars_tmp_channel(
             setup, "pht", "energy_cal_objects", extension="pkl"
@@ -320,13 +398,14 @@ rule build_pht_energy_super_calibrations:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
+        "--metadata {meta} "
         "--inplots {input.inplots} "
         "--fit_results {output.partcal_results} "
         "--eres_file {input.eres_file} "
         "--hit_pars {output.hit_pars} "
         "--plot_file {output.plot_file} "
         "--ecal_file {input.ecal_file} "
-        "--tcm_filelist {input.tcm_files} "
+        "--pulser_files {input.pulser_files} "
         "--input_files {input.files}"
 
 
@@ -346,8 +425,17 @@ for key, dataset in part.datasets.items():
 
         rule:
             input:
-                files=part.get_filelists(partition, key, "dsp"),
-                tcm_files=part.get_filelists(partition, key, "tcm"),
+                files=part.get_filelists(partition, key, intier),
+                pulser_files=[
+                    file.replace("pht", "tcm")
+                    for file in part.get_par_files(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                        name="pulser_ids",
+                    )
+                ],
                 ecal_file=part.get_par_files(
                     f"{par_pht_path(setup)}/validity.jsonl",
                     partition,
@@ -421,7 +509,7 @@ for key, dataset in part.datasets.items():
             group:
                 "par-pht"
             resources:
-                mem_swap=300,
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
                 runtime=300,
             shell:
                 "{swenv} python3 -B "
@@ -437,7 +525,7 @@ for key, dataset in part.datasets.items():
                 "--hit_pars {output.hit_pars} "
                 "--plot_file {output.plot_file} "
                 "--ecal_file {input.ecal_file} "
-                "--tcm_filelist {input.tcm_files} "
+                "--pulser_files {input.pulser_files} "
                 "--input_files {input.files}"
 
         set_last_rule_name(
@@ -455,11 +543,10 @@ for key, dataset in part.datasets.items():
 rule build_pht_aoe_calibrations:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
-        ),
-        tcm_filelist=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
+            filelist_path(setup),
+            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
         ),
+        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "partcal"),
         eres_file=get_pattern_pars_tmp_channel(
             setup, "pht", "partcal_objects", extension="pkl"
@@ -498,7 +585,7 @@ rule build_pht_aoe_calibrations:
         "--hit_pars {output.hit_pars} "
         "--plot_file {output.plot_file} "
         "--ecal_file {input.ecal_file} "
-        "--tcm_filelist {input.tcm_filelist} "
+        "--pulser_files {input.pulser_files} "
         "--input_files {input.files}"
 
 
@@ -518,8 +605,17 @@ for key, dataset in part.datasets.items():
 
         rule:
             input:
-                files=part.get_filelists(partition, key, "dsp"),
-                tcm_files=part.get_filelists(partition, key, "tcm"),
+                files=part.get_filelists(partition, key, intier),
+                pulser_files=[
+                    file.replace("pht", "tcm")
+                    for file in part.get_par_files(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                        name="pulser_ids",
+                    )
+                ],
                 ecal_file=part.get_par_files(
                     f"{par_pht_path(setup)}/validity.jsonl",
                     partition,
@@ -591,7 +687,7 @@ for key, dataset in part.datasets.items():
             group:
                 "par-pht"
             resources:
-                mem_swap=300,
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
                 runtime=300,
             shell:
                 "{swenv} python3 -B "
@@ -607,7 +703,7 @@ for key, dataset in part.datasets.items():
                 "--hit_pars {output.hit_pars} "
                 "--plot_file {output.plot_file} "
                 "--ecal_file {input.ecal_file} "
-                "--tcm_filelist {input.tcm_files} "
+                "--pulser_files {input.pulser_files} "
                 "--input_files {input.files}"
 
         set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration")
@@ -622,11 +718,10 @@ for key, dataset in part.datasets.items():
 rule build_pht_lq_calibration:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
-        ),
-        tcm_filelist=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
+            filelist_path(setup),
+            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
         ),
+        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "aoecal"),
         eres_file=get_pattern_pars_tmp_channel(
             setup, "pht", "aoecal_objects", extension="pkl"
@@ -663,7 +758,7 @@ rule build_pht_lq_calibration:
         "--hit_pars {output.hit_pars} "
         "--plot_file {output.plot_file} "
         "--ecal_file {input.ecal_file} "
-        "--tcm_filelist {input.tcm_filelist} "
+        "--pulser_files {input.pulser_files} "
         "--input_files {input.files}"
 
 
@@ -676,3 +771,95 @@ for key, items in ordered.items():
     rule_order_list += [item.name for item in items]
 rule_order_list.append(fallback_pht_rule.name)
 workflow._ruleorder.add(*rule_order_list)  # [::-1]
+
+
+rule build_pars_pht_objects:
+    input:
+        lambda wildcards: read_filelist_pars_cal_channel(
+            wildcards,
+            "pht_objects_pkl",
+        ),
+    output:
+        get_pattern_pars(
+            setup,
+            "pht",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        ),
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+
+rule build_plts_pht:
+    input:
+        lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht"),
+    output:
+        get_pattern_plts(setup, "pht"),
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+
+rule build_pars_pht:
+    input:
+        infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"),
+        plts=get_pattern_plts(setup, "pht"),
+        objects=get_pattern_pars(
+            setup,
+            "pht",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        ),
+    output:
+        get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle),
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input.infiles} "
+        "--output {output} "
+
+
+rule build_pht:
+    input:
+        dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False),
+        pars_file=lambda wildcards: pars_catalog.get_par_file(
+            setup, wildcards.timestamp, "pht"
+        ),
+    output:
+        tier_file=get_pattern_tier(setup, "pht", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(setup, "pht_db"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        tier="pht",
+    log:
+        get_pattern_log(setup, "tier_pht"),
+    group:
+        "tier-pht"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/build_hit.py')} "
+        "--configs {configs} "
+        "--log {log} "
+        "--tier {params.tier} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--pars_file {input.pars_file} "
+        "--output {output.tier_file} "
+        "--input {input.dsp_file} "
+        "--db_file {output.db_file}"

From 03a85674f949935d6239b7092a92b4186928bacf Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 19:29:20 +0200
Subject: [PATCH 038/103] move onstart to snakefile

---
 rules/dsp.smk | 11 -----------
 rules/hit.smk | 13 -------------
 2 files changed, 24 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 94ccf13..9ea2e7f 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -21,17 +21,6 @@ from scripts.util.patterns import (
 )
 
 
-onstart:
-    if os.path.isfile(os.path.join(pars_path(setup), "dsp", "validity.jsonl")):
-        os.remove(os.path.join(pars_path(setup), "dsp", "validity.jsonl"))
-    ds.pars_key_resolve.write_par_catalog(
-        ["-*-*-*-cal"],
-        os.path.join(pars_path(setup), "dsp", "validity.jsonl"),
-        get_pattern_tier_raw(setup),
-        {"cal": ["par_dsp"], "lar": ["par_dsp"]},
-    )
-
-
 rule build_pars_dsp_tau:
     input:
         files=os.path.join(
diff --git a/rules/hit.smk b/rules/hit.smk
index bbce0bd..3d2c292 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -21,19 +21,6 @@ from scripts.util.patterns import (
 )
 
 
-onstart:
-    if os.path.isfile(os.path.join(pars_path(setup), "hit", "validity.jsonl")):
-        os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl"))
-
-
-    ds.pars_key_resolve.write_par_catalog(
-        ["-*-*-*-cal"],
-        os.path.join(pars_path(setup), "hit", "validity.jsonl"),
-        get_pattern_tier_raw(setup),
-        {"cal": ["par_hit"], "lar": ["par_hit"]},
-    )
-
-
 # This rule builds the qc using the calibration dsp files and fft files
 rule build_qc:
     input:

From bfe4bf4b148ce8cb4676dd0117365ae4eccf726c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 19:29:36 +0200
Subject: [PATCH 039/103] bugfix

---
 rules/pht.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/pht.smk b/rules/pht.smk
index cd11a9b..028e0e3 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -141,7 +141,7 @@ rule build_pht_qc:
             filelist_path(setup),
             "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist",
         ),
-        pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"),
     params:
         datatype="cal",

From d44e737d342efc500dc02c15dbb963fd82e9a95d Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 19:31:36 +0200
Subject: [PATCH 040/103] add muon table

---
 scripts/build_evt.py | 41 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index e5febca..9fe9724 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -58,9 +58,8 @@ def replace_evt_with_key(dic, new_key):
 # load in config
 configs = LegendMetadata(path=args.configs)
 if args.tier == "evt" or args.tier == "pet":
-    evt_config_file = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-        "tier_evt"
-    ]["inputs"]["evt_config"]
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"]["inputs"]
+    evt_config_file = config_dict["evt_config"]
 else:
     msg = "unknown tier"
     raise ValueError(msg)
@@ -88,7 +87,6 @@ def replace_evt_with_key(dic, new_key):
                     else:
                         chans = []
                     _evt_config["channels"][field] = chans
-            evt_config[key] = replace_evt_with_key(_evt_config, f"evt/{key}")
 else:
     evt_config = {"all": Props.read_from(evt_config_file)}
     # block for snakemake to fill in channel lists
@@ -124,15 +122,48 @@ def replace_evt_with_key(dic, new_key):
         f_hit=args.hit_file,
         f_evt=None,
         evt_config=config,
-        evt_group=f"evt/{key}" if key != "all" else "evt",
+        evt_group="evt",
         tcm_group="hardware_tcm_1",
         dsp_group="dsp",
         hit_group="hit",
         tcm_id_table_pattern="ch{}",
     )
 
+if "muon_config" in config_dict and config_dict["muon_config"] is not None:
+    muon_config = Props.read_from(config_dict["muon_config"])
+    # block for snakemake to fill in channel lists
+    for field, dic in muon_config["channels"].items():
+        if isinstance(dic, dict):
+            chans = chmap.map("system", unique=False)[dic["system"]]
+            if "selectors" in dic:
+                try:
+                    for k, val in dic["selectors"].items():
+                        chans = chans.map(k, unique=False)[val]
+                except KeyError:
+                    chans = None
+            if chans is not None:
+                chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))]
+            else:
+                chans = []
+            muon_config["channels"][field] = chans
+
+muon_table = build_evt(
+    f_tcm=args.tcm_file,
+    f_dsp=args.dsp_file,
+    f_hit=args.hit_file,
+    f_evt=None,
+    evt_config=muon_config,
+    evt_group="evt",
+    tcm_group="hardware_tcm_2",
+    dsp_group="dsp",
+    hit_group="hit",
+    tcm_id_table_pattern="ch{}",
+)
+
 tbl = Table(col_dict=tables)
 sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a")
+sto.write(obj=muon_table, name="muon", lh5_file=temp_output, wo_mode="a")
+
 
 os.rename(temp_output, args.output)
 t_elap = time.time() - t_start

From 56c5d5d49e499f43be748720ad7a104d701c3ed0 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 19:32:07 +0200
Subject: [PATCH 041/103] fix pickling

---
 scripts/pars_hit_ecal.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 1d7d436..c859eea 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -352,9 +352,7 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None):
     if plot_options is None:
         plot_options = {}
     plot_dict = {}
-    data = sto.read(lh5_path, files, field_mask=["bl_mean", "baseline", "timestamp"])[0].view_as(
-        "pd"
-    )
+    data = lh5.read_as(lh5_path, files, "pd", field_mask=["bl_mean", "baseline", "timestamp"])
     for key, item in plot_options.items():
         if item["options"] is not None:
             plot_dict[key] = item["function"](data, **item["options"])
@@ -362,6 +360,13 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None):
             plot_dict[key] = item["function"](data)
     return plot_dict
 
+def monitor_parameters(files, lh5_path, parameters):
+    data = lh5.read_as(lh5_path, files, "pd", field_mask=parameters)
+    out_dict = {}
+    for param in parameters:
+        mode, stdev = get_mode_stdev(data[param].to_numpy())
+        out_dict[param] = {"mode": mode, "stdev": stdev}
+    return out_dict
 
 def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     if np.isnan(ecal_class.pars).all():
@@ -632,6 +637,10 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             peak_dict["parameters"] = peak_dict["parameters"].to_dict()
             peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict()
 
+    if "monitor_parameters" in kwarg_dict:
+        monitor_dict = monitor_parameters(files, f"{args.channel}/dsp", kwarg_dict["monitor_parameters"])
+        results_dict.update({"monitoring_parameters":monitor_dict})
+
     # get baseline plots and save all plots to file
     if args.plot_path:
         common_dict = baseline_tracking_plots(

From 207acedc90700b7932a5f43070c35ca6e1f622ff Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 19:32:44 +0200
Subject: [PATCH 042/103] add onstart

---
 Snakefile | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index c5149e8..bee9673 100644
--- a/Snakefile
+++ b/Snakefile
@@ -22,6 +22,7 @@ from scripts.util.utils import (
     filelist_path,
     metadata_path,
     tmp_log_path,
+    pars_path
 )
 from datetime import datetime
 from collections import OrderedDict
@@ -53,14 +54,20 @@ wildcard_constraints:
 
 include: "rules/common.smk"
 include: "rules/main.smk"
+
 include: "rules/tcm.smk"
+
 include: "rules/dsp.smk"
+include: "rules/psp.smk"
+
 include: "rules/hit.smk"
 include: "rules/pht.smk"
-include: "rules/psp.smk"
+
 include: "rules/evt.smk"
 include: "rules/skm.smk"
+
 include: "rules/blinding_calibration.smk"
+include: "rules/qc_phy.smk"
 
 
 localrules:
@@ -70,6 +77,26 @@ localrules:
 
 onstart:
     print("Starting workflow")
+    if os.path.isfile(os.path.join(pars_path(setup), "hit", "validity.jsonl")):
+        os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl"))
+
+
+    ds.pars_key_resolve.write_par_catalog(
+        ["-*-*-*-cal"],
+        os.path.join(pars_path(setup), "hit", "validity.jsonl"),
+        get_pattern_tier_raw(setup),
+        {"cal": ["par_hit"], "lar": ["par_hit"]},
+    )
+
+    if os.path.isfile(os.path.join(pars_path(setup), "dsp", "validity.jsonl")):
+        os.remove(os.path.join(pars_path(setup), "dsp", "validity.jsonl"))
+    ds.pars_key_resolve.write_par_catalog(
+        ["-*-*-*-cal"],
+        os.path.join(pars_path(setup), "dsp", "validity.jsonl"),
+        get_pattern_tier_raw(setup),
+        {"cal": ["par_dsp"], "lar": ["par_dsp"]},
+    )
+    
 
 
 onsuccess:

From 297b20ce142282bf7aa138a1b1063c7518247942 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 19:33:55 +0200
Subject: [PATCH 043/103] add scripts for qc from phy files for overrides

---
 rules/qc_phy.smk           | 160 +++++++++++++++++++++++++++++++++++++
 scripts/pars_pht_qc_phy.py | 116 +++++++++++++++++++++++++++
 2 files changed, 276 insertions(+)
 create mode 100644 rules/qc_phy.smk
 create mode 100644 scripts/pars_pht_qc_phy.py

diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk
new file mode 100644
index 0000000..f4f3b7e
--- /dev/null
+++ b/rules/qc_phy.smk
@@ -0,0 +1,160 @@
+from scripts.util.pars_loading import pars_catalog
+from scripts.util.create_pars_keylist import pars_key_resolve
+from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
+from scripts.util.patterns import (
+    get_pattern_pars_tmp_channel,
+    get_pattern_plts_tmp_channel,
+    get_pattern_log_channel,
+    get_pattern_par_pht,
+    get_pattern_plts,
+    get_pattern_tier,
+    get_pattern_pars_tmp,
+    get_pattern_log,
+    get_pattern_pars,
+)
+
+intier = "dsp"
+
+
+qc_pht_rules = {}
+for key, dataset in part.datasets.items():
+    for partition in dataset.keys():
+
+        rule:
+            input:
+                phy_files=part.get_filelists(partition, key, intier, datatype="phy"),
+            wildcard_constraints:
+                channel=part.get_wildcard_constraints(partition, key),
+            params:
+                datatype="cal",
+                channel="{channel}" if key == "default" else key,
+                timestamp=part.get_timestamp(
+                    f"{par_pht_path(setup)}/validity.jsonl", partition, key, tier="pht"
+                ),
+            output:
+                hit_pars=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                        name="qcphy",
+                    )
+                ],
+                plot_file=[
+                    temp(file)
+                    for file in part.get_plt_files(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                        name="qcphy",
+                    )
+                ],
+            log:
+                part.get_log_file(
+                    f"{par_pht_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    "pht",
+                    name="par_pht_qc_phy",
+                ),
+            group:
+                "par-pht"
+            resources:
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
+                runtime=300,
+            shell:
+                "{swenv} python3 -B "
+                f"{basedir}/../scripts/pars_pht_qc_phy.py "
+                "--log {log} "
+                "--configs {configs} "
+                "--datatype {params.datatype} "
+                "--timestamp {params.timestamp} "
+                "--channel {params.channel} "
+                "--save_path {output.hit_pars} "
+                "--plot_path {output.plot_file} "
+                "--phy_files {input.phy_files}"
+
+        set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc_phy")
+
+        if key in qc_pht_rules:
+            qc_pht_rules[key].append(list(workflow.rules)[-1])
+        else:
+            qc_pht_rules[key] = [list(workflow.rules)[-1]]
+
+
+# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
+# This rule builds the a/e calibration using the calibration dsp files for the whole partition
+rule build_pht_qc_phy:
+    input:
+        phy_files=os.path.join(
+            filelist_path(setup),
+            "all-{experiment}-{period}-{run}-phy-" + f"{intier}.filelist",
+        ),
+    params:
+        datatype="cal",
+        channel="{channel}",
+        timestamp="{timestamp}",
+    output:
+        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qcphy")),
+        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qcphy")),
+    log:
+        get_pattern_log_channel(setup, "pars_pht_qc_phy"),
+    group:
+        "par-pht"
+    resources:
+        mem_swap=60,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/pars_pht_qc_phy.py "
+        "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--save_path {output.hit_pars} "
+        "--plot_path {output.plot_file} "
+        "--phy_files {input.phy_files}"
+
+
+fallback_qc_rule = list(workflow.rules)[-1]
+
+rule_order_list = []
+ordered = OrderedDict(qc_pht_rules)
+ordered.move_to_end("default")
+for key, items in ordered.items():
+    rule_order_list += [item.name for item in items]
+rule_order_list.append(fallback_qc_rule.name)
+workflow._ruleorder.add(*rule_order_list)  # [::-1]
+
+rule build_plts_pht_phy:
+    input:
+        lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht_qcphy"),
+    output:
+        get_pattern_plts(setup, "pht", "qc_phy"),
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+
+rule build_pars_pht_phy:
+    input:
+        infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht_qcphy"),
+        plts=get_pattern_plts(setup, "pht" , "qc_phy"),
+    output:
+        get_pattern_pars(setup, "pht", name= "qc_phy", check_in_cycle=check_in_cycle),
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input.infiles} "
+        "--output {output} "
+
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
new file mode 100644
index 0000000..804ffd4
--- /dev/null
+++ b/scripts/pars_pht_qc_phy.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import os
+import pathlib
+import pickle as pkl
+import re
+import warnings
+
+os.environ["PYGAMA_PARALLEL"] = "false"
+os.environ["PYGAMA_FASTMATH"] = "false"
+
+import numpy as np
+from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
+from lgdo.lh5 import ls
+import lgdo.lh5 as lh5
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys, generate_cut_classifiers
+from pygama.pargen.utils import load_data
+from util.FileKey import ChannelProcKey, ProcessingFileKey
+
+log = logging.getLogger(__name__)
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str)
+
+    argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument("--log", help="log_file", type=str)
+
+    argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
+    argparser.add_argument("--save_path", help="save_path", type=str, nargs="*", )
+    args = argparser.parse_args()
+
+    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+    logging.getLogger("numba").setLevel(logging.INFO)
+    logging.getLogger("parse").setLevel(logging.INFO)
+    logging.getLogger("lgdo").setLevel(logging.INFO)
+    logging.getLogger("h5py").setLevel(logging.INFO)
+    logging.getLogger("matplotlib").setLevel(logging.INFO)
+    logging.getLogger("legendmeta").setLevel(logging.INFO)
+
+    # get metadata dictionary
+    configs = LegendMetadata(path=args.configs)
+    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
+    channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel]
+
+    sto = lh5.LH5Store()
+
+    # sort files in dictionary where keys are first timestamp from run
+    bl_mask = np.array([], dtype=bool)
+    if isinstance(args.phy_files, list):
+        phy_files = []
+        for file in sorted(args.phy_files):
+            with open(file) as f:
+                run_files = f.read().splitlines()
+            if len(run_files) == 0:
+                continue
+            else:
+                run_files = sorted(np.unique(run_files))
+                phy_files += run_files
+                bls = sto.read("ch1027200/dsp/", run_files, field_mask = ["wf_max", "bl_mean"])[0]
+                puls = sto.read("ch1027201/dsp/",  run_files, field_mask = ["trapTmax"])[0]
+                bl_idxs = ((bls["wf_max"].nda - bls["bl_mean"].nda)>1000) &(puls["trapTmax"].nda<200)
+                bl_mask = np.append(bl_mask, bl_idxs)
+    else:
+        with open(args.phy_files) as f:
+            phy_files = f.read().splitlines()
+        phy_files = sorted(np.unique(phy_files))
+        bls = sto.read("ch1027200/dsp/", phy_files, field_mask = ["wf_max", "bl_mean"])[0]
+        puls = sto.read("ch1027201/dsp/",  phy_files, field_mask = ["trapTmax"])[0]
+        bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda)>1000) &(puls["trapTmax"].nda<200)
+
+    kwarg_dict = Props.read_from(channel_dict)
+    kwarg_dict_fft = kwarg_dict["fft_fields"]
+
+    cut_fields = get_keys(
+        [
+            key.replace(f"{args.channel}/dsp/", "")
+            for key in ls(phy_files[0], f"{args.channel}/dsp/")
+        ],
+        kwarg_dict_fft["cut_parameters"],
+    )
+
+    data = sto.read(f"{args.channel}/dsp/", phy_files, 
+    field_mask=cut_fields, idx = np.where(bl_mask)[0])[0]
+
+    hit_dict, plot_dict = generate_cut_classifiers(
+        data,
+        kwarg_dict_fft["cut_parameters"],
+        kwarg_dict.get("rounding", 4),
+        display=1 if args.plot_path else 0,
+    )
+
+    log.debug("fft cuts applied")
+    log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}")
+
+    for file in args.save_path:
+        pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
+        with open(file, "w") as f:
+            json.dump({"pars":{"operations":hit_dict}}, f, indent=4)
+
+    if args.plot_path:
+        for file in args.plot_path:
+            pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
+            with open(file, "wb") as f:
+                pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)

From 2f8a5b2d46c5e62bfd03270b80f25c74df7338a3 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 19:42:14 +0200
Subject: [PATCH 044/103] pre-commit

---
 Snakefile                   |  8 +-------
 rules/qc_phy.smk            |  6 +++---
 scripts/build_evt.py        |  6 ++++--
 scripts/pars_hit_ecal.py    | 26 ++++++++++++++++++-------
 scripts/pars_hit_qc.py      |  6 +++++-
 scripts/pars_pht_aoecal.py  |  4 ++--
 scripts/pars_pht_lqcal.py   |  2 +-
 scripts/pars_pht_partcal.py | 12 ++++++++----
 scripts/pars_pht_qc.py      | 30 ++++++++++++++++++-----------
 scripts/pars_pht_qc_phy.py  | 38 ++++++++++++++++++++++---------------
 scripts/pars_tcm_pulser.py  |  6 ++----
 11 files changed, 87 insertions(+), 57 deletions(-)

diff --git a/Snakefile b/Snakefile
index bee9673..4d732bf 100644
--- a/Snakefile
+++ b/Snakefile
@@ -22,7 +22,7 @@ from scripts.util.utils import (
     filelist_path,
     metadata_path,
     tmp_log_path,
-    pars_path
+    pars_path,
 )
 from datetime import datetime
 from collections import OrderedDict
@@ -54,18 +54,13 @@ wildcard_constraints:
 
 include: "rules/common.smk"
 include: "rules/main.smk"
-
 include: "rules/tcm.smk"
-
 include: "rules/dsp.smk"
 include: "rules/psp.smk"
-
 include: "rules/hit.smk"
 include: "rules/pht.smk"
-
 include: "rules/evt.smk"
 include: "rules/skm.smk"
-
 include: "rules/blinding_calibration.smk"
 include: "rules/qc_phy.smk"
 
@@ -96,7 +91,6 @@ onstart:
         get_pattern_tier_raw(setup),
         {"cal": ["par_dsp"], "lar": ["par_dsp"]},
     )
-    
 
 
 onsuccess:
diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk
index f4f3b7e..10eceb9 100644
--- a/rules/qc_phy.smk
+++ b/rules/qc_phy.smk
@@ -130,6 +130,7 @@ for key, items in ordered.items():
 rule_order_list.append(fallback_qc_rule.name)
 workflow._ruleorder.add(*rule_order_list)  # [::-1]
 
+
 rule build_plts_pht_phy:
     input:
         lambda wildcards: read_filelist_plts_cal_channel(wildcards, "pht_qcphy"),
@@ -147,9 +148,9 @@ rule build_plts_pht_phy:
 rule build_pars_pht_phy:
     input:
         infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht_qcphy"),
-        plts=get_pattern_plts(setup, "pht" , "qc_phy"),
+        plts=get_pattern_plts(setup, "pht", "qc_phy"),
     output:
-        get_pattern_pars(setup, "pht", name= "qc_phy", check_in_cycle=check_in_cycle),
+        get_pattern_pars(setup, "pht", name="qc_phy", check_in_cycle=check_in_cycle),
     group:
         "merge-hit"
     shell:
@@ -157,4 +158,3 @@ rule build_pars_pht_phy:
         f"{basedir}/../scripts/merge_channels.py "
         "--input {input.infiles} "
         "--output {output} "
-
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 9fe9724..f109871 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -58,7 +58,9 @@ def replace_evt_with_key(dic, new_key):
 # load in config
 configs = LegendMetadata(path=args.configs)
 if args.tier == "evt" or args.tier == "pet":
-    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"]["inputs"]
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"][
+        "inputs"
+    ]
     evt_config_file = config_dict["evt_config"]
 else:
     msg = "unknown tier"
@@ -69,7 +71,7 @@ def replace_evt_with_key(dic, new_key):
 
 if isinstance(evt_config_file, dict):
     evt_config = {}
-    for key, _evt_config in evt_config_file.items():
+    for _evt_config in evt_config_file.values():
         if _evt_config is not None:
             _evt_config = Props.read_from(_evt_config)
             # block for snakemake to fill in channel lists
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index c859eea..4efc19f 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -23,7 +23,7 @@
 from legendmeta.catalog import Props
 from matplotlib.colors import LogNorm
 from pygama.math.distributions import nb_poly
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from pygama.pargen.data_cleaning import get_mode_stdev, get_tcm_pulser_ids
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
@@ -360,6 +360,7 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None):
             plot_dict[key] = item["function"](data)
     return plot_dict
 
+
 def monitor_parameters(files, lh5_path, parameters):
     data = lh5.read_as(lh5_path, files, "pd", field_mask=parameters)
     out_dict = {}
@@ -368,6 +369,7 @@ def monitor_parameters(files, lh5_path, parameters):
         out_dict[param] = {"mode": mode, "stdev": stdev}
     return out_dict
 
+
 def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     if np.isnan(ecal_class.pars).all():
         return {}
@@ -627,19 +629,29 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
                                 selection_string,
                             )
             plot_dict[cal_energy_param] = param_plot_dict
-        
-        for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks_1"]["peak_parameters"].values():
+
+        for peak_dict in (
+            full_object_dict[cal_energy_param]
+            .results["hpge_fit_energy_peaks_1"]["peak_parameters"]
+            .values()
+        ):
             peak_dict["function"] = peak_dict["function"].name
             peak_dict["parameters"] = peak_dict["parameters"].to_dict()
             peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict()
-        for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"]["peak_parameters"].values():
+        for peak_dict in (
+            full_object_dict[cal_energy_param]
+            .results["hpge_fit_energy_peaks"]["peak_parameters"]
+            .values()
+        ):
             peak_dict["function"] = peak_dict["function"].name
             peak_dict["parameters"] = peak_dict["parameters"].to_dict()
             peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict()
 
-    if "monitor_parameters" in kwarg_dict:
-        monitor_dict = monitor_parameters(files, f"{args.channel}/dsp", kwarg_dict["monitor_parameters"])
-        results_dict.update({"monitoring_parameters":monitor_dict})
+    if "monitoring_parameters" in kwarg_dict:
+        monitor_dict = monitor_parameters(
+            files, f"{args.channel}/dsp", kwarg_dict["monitor_parameters"]
+        )
+        results_dict.update({"monitoring_parameters": monitor_dict})
 
     # get baseline plots and save all plots to file
     if args.plot_path:
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 09e14c6..110dfa9 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -16,7 +16,11 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
-from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids, generate_cut_classifiers
+from pygama.pargen.data_cleaning import (
+    generate_cut_classifiers,
+    get_keys,
+    get_tcm_pulser_ids,
+)
 from pygama.pargen.utils import load_data
 
 log = logging.getLogger(__name__)
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 34fa8f8..30e1a9e 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -47,7 +47,7 @@ def get_results_dict(aoe_class):
 
 
 def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None):
-    if plot_dict is  None:
+    if plot_dict is None:
         plot_dict = {}
     for key, item in plot_options.items():
         if item["options"] is not None:
@@ -345,7 +345,7 @@ def eres_func(x):
     except KeyError:
         aoe_obj.eres_func = {}
 else:
-    out_dict = {tstamp:None for tstamp in cal_dict}
+    out_dict = {tstamp: None for tstamp in cal_dict}
     plot_dict = {}
     aoe_obj = None
 
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 9937281..c5ba80b 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -38,7 +38,7 @@ def get_results_dict(lq_class):
 
 
 def fill_plot_dict(lq_class, data, plot_options, plot_dict=None):
-    if plot_dict is  None:
+    if plot_dict is None:
         plot_dict = {}
     for key, item in plot_options.items():
         if item["options"] is not None:
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index e11f965..7063f8a 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -128,7 +128,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             "eres_quadratic": fwhm_quad,
             "fitted_peaks": ecal_class.peaks_kev.tolist(),
             "pk_fits": pk_dict,
-            "peak_param":results_dict["peak_param"]
+            "peak_param": results_dict["peak_param"],
         }
 
 
@@ -392,7 +392,11 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
                             )
             plot_dict[cal_energy_param] = param_plot_dict
 
-        for peak_dict in full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"]["peak_parameters"].values():
+        for peak_dict in (
+            full_object_dict[cal_energy_param]
+            .results["hpge_fit_energy_peaks"]["peak_parameters"]
+            .values()
+        ):
             peak_dict["function"] = peak_dict["function"].name
             peak_dict["parameters"] = peak_dict["parameters"].to_dict()
             peak_dict["uncertainties"] = peak_dict["uncertainties"].to_dict()
@@ -436,7 +440,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
         final_hit_dict = {
             "pars": cal_dict[fk.timestamp],
-            "results": dict(**results_dicts[fk.timestamp], partition_ecal= ecal_results)
+            "results": dict(**results_dicts[fk.timestamp], partition_ecal=ecal_results),
         }
         pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
         with open(out, "w") as w:
@@ -444,7 +448,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     for out in args.fit_results:
         fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-        final_object_dict = dict(**object_dict[fk.timestamp], partition_ecal = full_object_dict)
+        final_object_dict = dict(**object_dict[fk.timestamp], partition_ecal=full_object_dict)
         pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
         with open(out, "wb") as w:
             pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index c9801be..8eff510 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -16,9 +16,12 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys, generate_cut_classifiers
+from pygama.pargen.data_cleaning import (
+    generate_cut_classifiers,
+    get_keys,
+    get_tcm_pulser_ids,
+)
 from pygama.pargen.utils import load_data
-from util.FileKey import ChannelProcKey, ProcessingFileKey
 
 log = logging.getLogger(__name__)
 
@@ -29,8 +32,12 @@
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
     argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
-    argparser.add_argument("--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False)
-    argparser.add_argument("--pulser_files", help="pulser_file", nargs="*", type=str, required=False)
+    argparser.add_argument(
+        "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False
+    )
+    argparser.add_argument(
+        "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
+    )
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -40,7 +47,12 @@
     argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
-    argparser.add_argument("--save_path", help="save_path", type=str, nargs="*", )
+    argparser.add_argument(
+        "--save_path",
+        help="save_path",
+        type=str,
+        nargs="*",
+    )
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
@@ -56,7 +68,6 @@
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
     channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel]
 
-
     # sort files in dictionary where keys are first timestamp from run
     if isinstance(args.cal_files, list):
         cal_files = []
@@ -71,8 +82,6 @@
         np.unique(cal_files)
     )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
 
-
-
     kwarg_dict = Props.read_from(channel_dict)
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
@@ -107,7 +116,7 @@
     if args.pulser_files:
         mask = np.array([], dtype=bool)
         for file in args.pulser_files:
-            with open(file, 'r') as f:
+            with open(file) as f:
                 pulser_dict = json.load(f)
             pulser_mask = np.array(pulser_dict["mask"])
             mask = np.append(mask, pulser_mask)
@@ -166,7 +175,6 @@
 
     kwarg_dict_fft = kwarg_dict["fft_fields"]
     if len(args.fft_files) > 0:
-
         # sort files in dictionary where keys are first timestamp from run
         if isinstance(args.fft_files, list):
             fft_files = []
@@ -181,7 +189,7 @@
             np.unique(fft_files)
         )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
 
-        if len(fft_files)>0:
+        if len(fft_files) > 0:
             fft_data = load_data(
                 fft_files,
                 f"{args.channel}/dsp",
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 804ffd4..8fe0a1f 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -6,20 +6,20 @@
 import os
 import pathlib
 import pickle as pkl
-import re
 import warnings
 
 os.environ["PYGAMA_PARALLEL"] = "false"
 os.environ["PYGAMA_FASTMATH"] = "false"
 
+import lgdo.lh5 as lh5
 import numpy as np
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
-import lgdo.lh5 as lh5
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids, generate_cuts, get_keys, generate_cut_classifiers
-from pygama.pargen.utils import load_data
-from util.FileKey import ChannelProcKey, ProcessingFileKey
+from pygama.pargen.data_cleaning import (
+    generate_cut_classifiers,
+    get_keys,
+)
 
 log = logging.getLogger(__name__)
 
@@ -38,7 +38,12 @@
     argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
-    argparser.add_argument("--save_path", help="save_path", type=str, nargs="*", )
+    argparser.add_argument(
+        "--save_path",
+        help="save_path",
+        type=str,
+        nargs="*",
+    )
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
@@ -68,17 +73,19 @@
             else:
                 run_files = sorted(np.unique(run_files))
                 phy_files += run_files
-                bls = sto.read("ch1027200/dsp/", run_files, field_mask = ["wf_max", "bl_mean"])[0]
-                puls = sto.read("ch1027201/dsp/",  run_files, field_mask = ["trapTmax"])[0]
-                bl_idxs = ((bls["wf_max"].nda - bls["bl_mean"].nda)>1000) &(puls["trapTmax"].nda<200)
+                bls = sto.read("ch1027200/dsp/", run_files, field_mask=["wf_max", "bl_mean"])[0]
+                puls = sto.read("ch1027201/dsp/", run_files, field_mask=["trapTmax"])[0]
+                bl_idxs = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (
+                    puls["trapTmax"].nda < 200
+                )
                 bl_mask = np.append(bl_mask, bl_idxs)
     else:
         with open(args.phy_files) as f:
             phy_files = f.read().splitlines()
         phy_files = sorted(np.unique(phy_files))
-        bls = sto.read("ch1027200/dsp/", phy_files, field_mask = ["wf_max", "bl_mean"])[0]
-        puls = sto.read("ch1027201/dsp/",  phy_files, field_mask = ["trapTmax"])[0]
-        bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda)>1000) &(puls["trapTmax"].nda<200)
+        bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0]
+        puls = sto.read("ch1027201/dsp/", phy_files, field_mask=["trapTmax"])[0]
+        bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (puls["trapTmax"].nda < 200)
 
     kwarg_dict = Props.read_from(channel_dict)
     kwarg_dict_fft = kwarg_dict["fft_fields"]
@@ -91,8 +98,9 @@
         kwarg_dict_fft["cut_parameters"],
     )
 
-    data = sto.read(f"{args.channel}/dsp/", phy_files, 
-    field_mask=cut_fields, idx = np.where(bl_mask)[0])[0]
+    data = sto.read(
+        f"{args.channel}/dsp/", phy_files, field_mask=cut_fields, idx=np.where(bl_mask)[0]
+    )[0]
 
     hit_dict, plot_dict = generate_cut_classifiers(
         data,
@@ -107,7 +115,7 @@
     for file in args.save_path:
         pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
         with open(file, "w") as f:
-            json.dump({"pars":{"operations":hit_dict}}, f, indent=4)
+            json.dump({"pars": {"operations": hit_dict}}, f, indent=4)
 
     if args.plot_path:
         for file in args.plot_path:
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 3c6d88a..5a6a336 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -3,7 +3,6 @@
 import logging
 import os
 import pathlib
-import pickle as pkl
 
 os.environ["LGDO_CACHE"] = "false"
 os.environ["LGDO_BOUNDSCHECK"] = "false"
@@ -16,7 +15,6 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 
-
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
 argparser.add_argument("--log", help="log file", type=str)
@@ -57,8 +55,8 @@
 tcm_files = sorted(np.unique(tcm_files))
 ids, mask = get_tcm_pulser_ids(
     tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-    )
+)
 
 pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True)
 with open(args.pulser_file, "w") as f:
-    json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4)
\ No newline at end of file
+    json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4)

From efbee94cb1d2ed003cdc42224be772f03ef52a2c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 19:58:59 +0200
Subject: [PATCH 045/103] fix monitoring fields

---
 scripts/pars_hit_ecal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 4efc19f..07a3c8f 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -649,7 +649,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     if "monitoring_parameters" in kwarg_dict:
         monitor_dict = monitor_parameters(
-            files, f"{args.channel}/dsp", kwarg_dict["monitor_parameters"]
+            files, f"{args.channel}/dsp", kwarg_dict["monitoring_parameters"]
         )
         results_dict.update({"monitoring_parameters": monitor_dict})
 

From 174fd8984077490d47290f7b39e9077414c97e83 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 31 Mar 2024 20:01:33 +0200
Subject: [PATCH 046/103] update packages

---
 templates/config.json | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/templates/config.json b/templates/config.json
index fcb4a8d..1884061 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -53,11 +53,11 @@
         "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif"
       },
       "pkg_versions": {
-        "pygama": "pygama==1.6.0a1",
-        "pylegendmeta": "pylegendmeta==0.9.0a2",
-        "dspeed": "dspeed==1.3.0a4",
-        "legend-pydataobj": "legend-pydataobj==1.5.0a5",
-        "legend-daq2lh5": "legend-daq2lh5==1.2.0a1"
+        "pygama": "pygama==1.6.0",
+        "pylegendmeta": "pylegendmeta==0.9.0",
+        "dspeed": "dspeed==1.3.0",
+        "legend-pydataobj": "legend-pydataobj==1.5.1",
+        "legend-daq2lh5": "legend-daq2lh5==1.2.0"
       }
     }
   }

From f6028b37ab1e56719781ab4239cb475fc97ac5ae Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 1 Apr 2024 15:07:50 +0200
Subject: [PATCH 047/103] fix muon idnetations

---
 scripts/build_evt.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index f109871..c3560bf 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -149,22 +149,23 @@ def replace_evt_with_key(dic, new_key):
                 chans = []
             muon_config["channels"][field] = chans
 
-muon_table = build_evt(
-    f_tcm=args.tcm_file,
-    f_dsp=args.dsp_file,
-    f_hit=args.hit_file,
-    f_evt=None,
-    evt_config=muon_config,
-    evt_group="evt",
-    tcm_group="hardware_tcm_2",
-    dsp_group="dsp",
-    hit_group="hit",
-    tcm_id_table_pattern="ch{}",
-)
+    muon_table = build_evt(
+        f_tcm=args.tcm_file,
+        f_dsp=args.dsp_file,
+        f_hit=args.hit_file,
+        f_evt=None,
+        evt_config=muon_config,
+        evt_group="evt",
+        tcm_group="hardware_tcm_2",
+        dsp_group="dsp",
+        hit_group="hit",
+        tcm_id_table_pattern="ch{}",
+    )
+    muon_tbl = Table(col_dict={"muon": muon_table})
+    sto.write(obj=muon_tbl, name="evt2", lh5_file=temp_output, wo_mode="a")
 
 tbl = Table(col_dict=tables)
 sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a")
-sto.write(obj=muon_table, name="muon", lh5_file=temp_output, wo_mode="a")
 
 
 os.rename(temp_output, args.output)

From 56ea627bef0b260fee131d7457054ceef0b3217d Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 1 Apr 2024 17:43:22 +0200
Subject: [PATCH 048/103] fix config loop

---
 scripts/build_evt.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index c3560bf..e060ce7 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -71,7 +71,7 @@ def replace_evt_with_key(dic, new_key):
 
 if isinstance(evt_config_file, dict):
     evt_config = {}
-    for _evt_config in evt_config_file.values():
+    for key, _evt_config in evt_config_file.items():
         if _evt_config is not None:
             _evt_config = Props.read_from(_evt_config)
             # block for snakemake to fill in channel lists
@@ -89,6 +89,7 @@ def replace_evt_with_key(dic, new_key):
                     else:
                         chans = []
                     _evt_config["channels"][field] = chans
+            evt_config[key] = _evt_config
 else:
     evt_config = {"all": Props.read_from(evt_config_file)}
     # block for snakemake to fill in channel lists

From 829139384fbda28fea76cd9a2cff6a0e35b817dc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 1 Apr 2024 17:48:02 +0200
Subject: [PATCH 049/103] make check files temp

---
 rules/pht.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/pht.smk b/rules/pht.smk
index 028e0e3..142ca72 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -35,7 +35,7 @@ rule pht_checkpoint:
     input:
         files=lambda wildcards: read_filelist_cal(wildcards, intier),
     output:
-        get_pattern_pars_tmp_channel(setup, "pht", "check"),
+        temp(get_pattern_pars_tmp_channel(setup, "pht", "check")),
     shell:
         "touch {output}"
 

From b7f652bac148ef9e24a0be9aee68ed7f1c1fd07e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 1 Apr 2024 22:14:41 +0200
Subject: [PATCH 050/103] check if hardware_tcm_2 in tcm before running muon
 evt build

---
 scripts/build_evt.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index e060ce7..7c2ed11 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -149,21 +149,21 @@ def replace_evt_with_key(dic, new_key):
             else:
                 chans = []
             muon_config["channels"][field] = chans
-
-    muon_table = build_evt(
-        f_tcm=args.tcm_file,
-        f_dsp=args.dsp_file,
-        f_hit=args.hit_file,
-        f_evt=None,
-        evt_config=muon_config,
-        evt_group="evt",
-        tcm_group="hardware_tcm_2",
-        dsp_group="dsp",
-        hit_group="hit",
-        tcm_id_table_pattern="ch{}",
-    )
-    muon_tbl = Table(col_dict={"muon": muon_table})
-    sto.write(obj=muon_tbl, name="evt2", lh5_file=temp_output, wo_mode="a")
+    if "hardware_tcm_2" in lh5.ls(args.tcm_file):
+        muon_table = build_evt(
+            f_tcm=args.tcm_file,
+            f_dsp=args.dsp_file,
+            f_hit=args.hit_file,
+            f_evt=None,
+            evt_config=muon_config,
+            evt_group="evt",
+            tcm_group="hardware_tcm_2",
+            dsp_group="dsp",
+            hit_group="hit",
+            tcm_id_table_pattern="ch{}",
+        )
+        muon_tbl = Table(col_dict={"muon": muon_table})
+        sto.write(obj=muon_tbl, name="evt2", lh5_file=temp_output, wo_mode="a")
 
 tbl = Table(col_dict=tables)
 sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a")

From 340e36df704774f062cc4f80b2577bed159d23e2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 2 Apr 2024 00:34:30 +0200
Subject: [PATCH 051/103] remove threshold for fft files

---
 scripts/pars_hit_qc.py | 3 ---
 scripts/pars_pht_qc.py | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 110dfa9..2d6e47f 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -148,9 +148,6 @@
             f"{args.channel}/dsp",
             {},
             [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"],
-            threshold=kwarg_dict_fft["threshold"],
-            return_selection_mask=False,
-            cal_energy_param="trapTmax",
         )
 
         hit_dict_fft, plot_dict_fft = generate_cut_classifiers(
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 8eff510..6613084 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -195,9 +195,6 @@
                 f"{args.channel}/dsp",
                 {},
                 [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"],
-                threshold=kwarg_dict_fft["threshold"],
-                return_selection_mask=False,
-                cal_energy_param="trapTmax",
             )
 
             hit_dict_fft, plot_dict_fft = generate_cut_classifiers(

From ecc5481b59688b11222eb0add5b1204360b7d888 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 2 Apr 2024 00:41:56 +0200
Subject: [PATCH 052/103] bugfix to use correct fft data

---
 scripts/pars_hit_qc.py | 2 +-
 scripts/pars_pht_qc.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 2d6e47f..08e317b 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -151,7 +151,7 @@
         )
 
         hit_dict_fft, plot_dict_fft = generate_cut_classifiers(
-            data,
+            fft_data,
             kwarg_dict_fft["cut_parameters"],
             kwarg_dict.get("rounding", 4),
             display=1 if args.plot_path else 0,
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 6613084..1e2f712 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -198,7 +198,7 @@
             )
 
             hit_dict_fft, plot_dict_fft = generate_cut_classifiers(
-                data,
+                fft_data,
                 kwarg_dict_fft["cut_parameters"],
                 kwarg_dict.get("rounding", 4),
                 display=1 if args.plot_path else 0,

From 0898a47bf1c6d90a69e872919795e93b1c5ecadc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 2 Apr 2024 00:55:54 +0200
Subject: [PATCH 053/103] fix fft field loading

---
 scripts/pars_hit_qc.py | 10 +++++++++-
 scripts/pars_pht_qc.py | 10 +++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 08e317b..a214941 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -143,11 +143,19 @@
 
     kwarg_dict_fft = kwarg_dict["fft_fields"]
     if len(args.fft_files) > 0:
+        fft_fields = get_keys(
+            [
+                key.replace(f"{args.channel}/dsp/", "")
+                for key in ls(args.fft_files[0], f"{args.channel}/dsp/")
+            ],
+            kwarg_dict_fft["cut_parameters"],
+        )
+
         fft_data = load_data(
             args.fft_files,
             f"{args.channel}/dsp",
             {},
-            [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"],
+            [*fft_fields, "timestamp", "trapTmax"],
         )
 
         hit_dict_fft, plot_dict_fft = generate_cut_classifiers(
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 1e2f712..1c9bc19 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -190,11 +190,19 @@
         )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
 
         if len(fft_files) > 0:
+            fft_fields = get_keys(
+                [
+                    key.replace(f"{args.channel}/dsp/", "")
+                    for key in ls(fft_files[0], f"{args.channel}/dsp/")
+                ],
+                kwarg_dict_fft["cut_parameters"],
+            )
+
             fft_data = load_data(
                 fft_files,
                 f"{args.channel}/dsp",
                 {},
-                [*list(kwarg_dict_fft["cut_parameters"]), "timestamp", "trapTmax"],
+                [*fft_fields, "timestamp", "trapTmax"],
             )
 
             hit_dict_fft, plot_dict_fft = generate_cut_classifiers(

From d25354d7dceb4144014edc39ed1de33db706e8b1 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 2 Apr 2024 16:27:20 +0200
Subject: [PATCH 054/103] higher tol for ac channels

---
 scripts/pars_hit_ecal.py    | 6 ++++--
 scripts/pars_pht_partcal.py | 4 +++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 07a3c8f..0f9138d 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -550,7 +550,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
                 (e_uncal > np.nanpercentile(e_uncal, 95))
                 & (e_uncal < np.nanpercentile(e_uncal, 99.9))
             ],
-            dx=1,
+            dx=9,
             range=[np.nanpercentile(e_uncal, 95), np.nanpercentile(e_uncal, 99.9)],
         )
 
@@ -561,7 +561,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             guess,
             kwarg_dict.get("deg", 0),
         )
-        full_object_dict[cal_energy_param].hpge_get_energy_peaks(e_uncal)
+        full_object_dict[cal_energy_param].hpge_get_energy_peaks(
+            e_uncal, etol_kev=5 if det_status == "on" else 10
+        )
         got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy()
         full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
             e_uncal,
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 7063f8a..623be81 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -332,7 +332,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         full_object_dict[cal_energy_param] = HPGeCalibration(
             energy_param, glines, 1, kwarg_dict.get("deg", 0), fixed={1: 1}
         )
-        full_object_dict[cal_energy_param].hpge_get_energy_peaks(energy)
+        full_object_dict[cal_energy_param].hpge_get_energy_peaks(
+            energy, etol_kev=5 if det_status == "on" else 10
+        )
         full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
             energy,
             peak_pars=pk_pars,

From 824c4f0435d459a0060afc0f0e2554f20dcc0ee9 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 2 Apr 2024 16:41:44 +0200
Subject: [PATCH 055/103] add bin widths

---
 scripts/pars_hit_ecal.py    | 2 ++
 scripts/pars_pht_partcal.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 0f9138d..f94f803 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -573,6 +573,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             n_events=kwarg_dict.get("n_events", None),
             allowed_p_val=kwarg_dict.get("p_val", 0),
             update_cal_pars=bool(det_status == "on"),
+            bin_width_kev=0.5,
         )
         full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
             e_uncal,
@@ -582,6 +583,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             n_events=kwarg_dict.get("n_events", None),
             allowed_p_val=kwarg_dict.get("p_val", 0),
             update_cal_pars=False,
+            bin_width_kev=0.5,
         )
 
         full_object_dict[cal_energy_param].get_energy_res_curve(
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 623be81..73461f4 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -342,6 +342,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             n_events=kwarg_dict.get("n_events", None),
             allowed_p_val=kwarg_dict.get("p_val", 0),
             update_cal_pars=bool(det_status == "on"),
+            bin_width_kev=0.25,
         )
 
         full_object_dict[cal_energy_param].get_energy_res_curve(

From 667c06716172c41daa5e9e37ee0dec173f3d730d Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 3 Apr 2024 13:40:55 +0200
Subject: [PATCH 056/103] add muon flag code

---
 scripts/build_evt.py | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 7c2ed11..f1897fa 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -9,7 +9,7 @@
 import numpy as np
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from lgdo.types import Table
+from lgdo.types import Array, Table
 from pygama.evt.build_evt import build_evt
 
 sto = lh5.LH5Store()
@@ -28,6 +28,22 @@ def replace_evt_with_key(dic, new_key):
     return dic
 
 
+def find_matching_values_with_delay(arr1, arr2, jit_delay):
+    matching_values = []
+
+    # Create an array with all possible delay values
+    delays = np.arange(0, int(1e9 * jit_delay)) * jit_delay
+
+    for delay in delays:
+        arr2_delayed = arr2 + delay
+
+        # Find matching values and indices
+        mask = np.isin(arr1, arr2_delayed, assume_unique=True)
+        matching_values.extend(arr1[mask])
+
+    return np.unique(matching_values)
+
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--hit_file", help="hit file", type=str)
 argparser.add_argument("--dsp_file", help="dsp file", type=str)
@@ -133,7 +149,8 @@ def replace_evt_with_key(dic, new_key):
     )
 
 if "muon_config" in config_dict and config_dict["muon_config"] is not None:
-    muon_config = Props.read_from(config_dict["muon_config"])
+    muon_config = Props.read_from(config_dict["muon_config"]["evt_config"])
+    field_config = Props.read_from(config_dict["muon_config"]["field_config"])
     # block for snakemake to fill in channel lists
     for field, dic in muon_config["channels"].items():
         if isinstance(dic, dict):
@@ -149,6 +166,10 @@ def replace_evt_with_key(dic, new_key):
             else:
                 chans = []
             muon_config["channels"][field] = chans
+
+    trigger_timestamp = tables[field_config["ged_timestamp"]["table"]][
+        field_config["ged_timestamp"]["field"]
+    ].nda
     if "hardware_tcm_2" in lh5.ls(args.tcm_file):
         muon_table = build_evt(
             f_tcm=args.tcm_file,
@@ -165,6 +186,21 @@ def replace_evt_with_key(dic, new_key):
         muon_tbl = Table(col_dict={"muon": muon_table})
         sto.write(obj=muon_tbl, name="evt2", lh5_file=temp_output, wo_mode="a")
 
+        muon_timestamp = muon_table[field_config["muon_timestamp"]["field"]].nda
+        muon_tbl_flag = muon_table[field_config["muon_flag"]["field"]].nda
+        if len(muon_timestamp[muon_tbl_flag]) > 0:
+            is_muon_veto_triggered = find_matching_values_with_delay(
+                trigger_timestamp, muon_timestamp[muon_tbl_flag], field_config["jitter"]
+            )
+            muon_flag = np.isin(trigger_timestamp, is_muon_veto_triggered)
+        else:
+            muon_flag = np.zeros(len(trigger_timestamp), dtype=bool)
+    else:
+        muon_flag = np.zeros(len(trigger_timestamp), dtype=bool)
+    tables[field_config["output_field"]["table"]].add_column(
+        field_config["output_field"]["field"], Array(muon_flag)
+    )
+
 tbl = Table(col_dict=tables)
 sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a")
 

From 35c91d2bfe23f091b45ad61b46b8bdc560b57327 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 7 Apr 2024 00:41:17 +0200
Subject: [PATCH 057/103] use pulser for cut determ

---
 scripts/pars_hit_qc.py | 2 +-
 scripts/pars_pht_qc.py | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index a214941..3eeef8d 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -109,7 +109,7 @@
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    data["is_pulser"] = mask[threshold_mask]
+    data = data[mask[threshold_mask]]
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 1c9bc19..d427fe3 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -135,7 +135,11 @@
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    data["is_pulser"] = mask[threshold_mask]
+    if len(mask[threshold_mask])==0:
+        mask= np.random.choice(len(data), 20000)
+        data = data[mask]
+    else:
+        data = data[mask[threshold_mask]]
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]

From d8c7976b501cc52fa01e796f001fd55b33b8eb14 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 7 Apr 2024 00:41:45 +0200
Subject: [PATCH 058/103] fix log names, update memory requirements

---
 rules/pht.smk | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/rules/pht.smk b/rules/pht.smk
index 142ca72..c19e35e 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -105,7 +105,7 @@ for key, dataset in part.datasets.items():
             group:
                 "par-pht"
             resources:
-                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
                 "{swenv} python3 -B "
@@ -151,7 +151,7 @@ rule build_pht_qc:
         hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")),
     log:
-        get_pattern_log_channel(setup, "pars_pht_qc"),
+        get_pattern_log_channel(setup, "par_pht_qc"),
     group:
         "par-pht"
     resources:
@@ -212,7 +212,7 @@ rule build_per_energy_calibration:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "energy_cal")),
     log:
-        get_pattern_log_channel(setup, "pars_pht_energy_cal"),
+        get_pattern_log_channel(setup, "par_pht_energy_cal"),
     group:
         "par-pht"
     resources:
@@ -327,7 +327,7 @@ for key, dataset in part.datasets.items():
             group:
                 "par-pht"
             resources:
-                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
                 "{swenv} python3 -B "
@@ -384,7 +384,7 @@ rule build_pht_energy_super_calibrations:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "partcal")),
     log:
-        get_pattern_log_channel(setup, "pars_pht_partcal"),
+        get_pattern_log_channel(setup, "par_pht_partcal"),
     group:
         "par-pht"
     resources:
@@ -509,7 +509,7 @@ for key, dataset in part.datasets.items():
             group:
                 "par-pht"
             resources:
-                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
                 "{swenv} python3 -B "
@@ -565,7 +565,7 @@ rule build_pht_aoe_calibrations:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "aoecal")),
     log:
-        get_pattern_log_channel(setup, "pars_pht_aoe_cal"),
+        get_pattern_log_channel(setup, "par_pht_aoe_cal"),
     group:
         "par-pht"
     resources:
@@ -687,7 +687,7 @@ for key, dataset in part.datasets.items():
             group:
                 "par-pht"
             resources:
-                mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
                 "{swenv} python3 -B "
@@ -738,7 +738,7 @@ rule build_pht_lq_calibration:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")),
     log:
-        get_pattern_log_channel(setup, "pars_pht_lq_cal"),
+        get_pattern_log_channel(setup, "par_pht_lq_cal"),
     group:
         "par-pht"
     resources:

From 699ff4588cf46b8f3fd0c63e8b37c82bb3c79245 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 7 Apr 2024 12:13:11 +0200
Subject: [PATCH 059/103] widen window for ac dets and fix for qc for det with
 no pulser

---
 Snakefile                | 6 +++---
 scripts/pars_hit_ecal.py | 2 +-
 scripts/pars_hit_qc.py   | 6 +++++-
 scripts/pars_pht_qc.py   | 4 ++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/Snakefile b/Snakefile
index 4d732bf..ae61549 100644
--- a/Snakefile
+++ b/Snakefile
@@ -55,9 +55,9 @@ wildcard_constraints:
 include: "rules/common.smk"
 include: "rules/main.smk"
 include: "rules/tcm.smk"
-include: "rules/dsp.smk"
-include: "rules/psp.smk"
-include: "rules/hit.smk"
+# include: "rules/dsp.smk"
+# include: "rules/psp.smk"
+# include: "rules/hit.smk"
 include: "rules/pht.smk"
 include: "rules/evt.smk"
 include: "rules/skm.smk"
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index f94f803..553c051 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -562,7 +562,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             kwarg_dict.get("deg", 0),
         )
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
-            e_uncal, etol_kev=5 if det_status == "on" else 10
+            e_uncal, etol_kev=5 if det_status == "on" else 20
         )
         got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy()
         full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 3eeef8d..d5917e8 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -109,7 +109,11 @@
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    data = data[mask[threshold_mask]]
+    if len(mask[threshold_mask]) < 100:
+        mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False)
+        data = data[mask]
+    else:
+        data = data[mask[threshold_mask]]
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index d427fe3..510d00c 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -135,8 +135,8 @@
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    if len(mask[threshold_mask])==0:
-        mask= np.random.choice(len(data), 20000)
+    if len(mask[threshold_mask]) < 100:
+        mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False)
         data = data[mask]
     else:
         data = data[mask[threshold_mask]]

From 3f63f5d1b4435d897e4339aa01e9a536b67442b8 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 7 Apr 2024 13:25:24 +0200
Subject: [PATCH 060/103] lower find peaks threshold as some dets have low
 events

---
 scripts/pars_hit_ecal.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 553c051..edee334 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -550,7 +550,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
                 (e_uncal > np.nanpercentile(e_uncal, 95))
                 & (e_uncal < np.nanpercentile(e_uncal, 99.9))
             ],
-            dx=9,
+            dx=1,
             range=[np.nanpercentile(e_uncal, 95), np.nanpercentile(e_uncal, 99.9)],
         )
 
@@ -564,6 +564,10 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
             e_uncal, etol_kev=5 if det_status == "on" else 20
         )
+        if 2614.50 not in full_object_dict[cal_energy_param].peaks_kev:
+            full_object_dict[cal_energy_param].hpge_get_energy_peaks(
+                e_uncal, peaks_kev=glines, etol_kev=5 if det_status == "on" else 30, n_sigma=2
+            )
         got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy()
         full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
             e_uncal,

From b99c9881833915e8cab5709377f554827f3c0ec7 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 7 Apr 2024 13:25:36 +0200
Subject: [PATCH 061/103] require more events

---
 scripts/pars_pht_qc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 510d00c..1015840 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -135,7 +135,7 @@
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    if len(mask[threshold_mask]) < 100:
+    if len(mask[threshold_mask]) < 100 * len(args.cal_files):
         mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False)
         data = data[mask]
     else:

From e037915f929aa88501b9a5d1149e94bb93a18ea1 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 7 Apr 2024 15:43:49 +0200
Subject: [PATCH 062/103] initial cuts use non pulser waveforms, normal use
 pulser if possible

---
 scripts/pars_hit_qc.py | 17 +++++++++++------
 scripts/pars_pht_qc.py | 17 +++++++++++------
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index d5917e8..841a5c1 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -109,16 +109,16 @@
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    if len(mask[threshold_mask]) < 100:
-        mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False)
-        data = data[mask]
-    else:
-        data = data[mask[threshold_mask]]
+    data["is_pulser"] = mask[threshold_mask]
+
+    mask = np.random.Generator.choice(
+        len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False
+    )
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers(
-            data,
+            data.query("~is_pulser")[mask],
             init_cal["cut_parameters"],
             init_cal.get("rounding", 4),
             display=1 if args.plot_path else 0,
@@ -138,6 +138,11 @@
         hit_dict_init_cal = {}
         plot_dict_init_cal = {}
 
+    if len(data.query("is_pulser")) > 500:
+        data = data.query("is_pulser")
+    else:
+        data = data.query("~is_pulser")[mask]
+
     hit_dict_cal, plot_dict_cal = generate_cut_classifiers(
         data,
         kwarg_dict_cal["cut_parameters"],
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 1015840..6376f02 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -135,16 +135,16 @@
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    if len(mask[threshold_mask]) < 100 * len(args.cal_files):
-        mask = np.random.Generator.choice(len(data), 4000 * len(args.cal_files), replace=False)
-        data = data[mask]
-    else:
-        data = data[mask[threshold_mask]]
+    data["is_pulser"] = mask[threshold_mask]
+
+    mask = np.random.Generator.choice(
+        len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False
+    )
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers(
-            data,
+            data.query("~is_pulser")[mask],
             init_cal["cut_parameters"],
             init_cal.get("rounding", 4),
             display=1 if args.plot_path else 0,
@@ -167,6 +167,11 @@
         hit_dict_init_cal = {}
         plot_dict_init_cal = {}
 
+    if len(data.query("is_pulser")) > 500 * len(args.cal_files):
+        data = data.query("is_pulser")
+    else:
+        data = data.query("~is_pulser")[mask]
+
     hit_dict_cal, plot_dict_cal = generate_cut_classifiers(
         data,
         kwarg_dict_cal["cut_parameters"],

From 0316188af4983b6b2eb73ee04d9e929e07112102 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 7 Apr 2024 17:27:05 +0200
Subject: [PATCH 063/103] fix numpy choice

---
 scripts/pars_hit_qc.py | 6 ++----
 scripts/pars_pht_qc.py | 5 +++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 841a5c1..beae8f1 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -110,10 +110,8 @@
         raise ValueError(msg)
 
     data["is_pulser"] = mask[threshold_mask]
-
-    mask = np.random.Generator.choice(
-        len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False
-    )
+    rng = np.random.default_rng()
+    mask = sorted(rng.choice(len(data.query("~is_pulser")), 4000, replace=False))
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 6376f02..bfad2b7 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -137,8 +137,9 @@
 
     data["is_pulser"] = mask[threshold_mask]
 
-    mask = np.random.Generator.choice(
-        len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False
+    rng = np.random.default_rng()
+    mask = sorted(
+        rng.choice(len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False)
     )
 
     if "initial_cal_cuts" in kwarg_dict:

From bb96fa45ed6f40520430caf6bde8d466c411b558 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 7 Apr 2024 21:56:09 +0200
Subject: [PATCH 064/103] fix mask

---
 scripts/pars_hit_qc.py | 3 ++-
 scripts/pars_pht_qc.py | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index beae8f1..e3cf429 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -111,7 +111,8 @@
 
     data["is_pulser"] = mask[threshold_mask]
     rng = np.random.default_rng()
-    mask = sorted(rng.choice(len(data.query("~is_pulser")), 4000, replace=False))
+    mask = np.full(len(data.query("~is_pulser")), False, dtype=bool)
+    mask[rng.choice(len(data.query("~is_pulser")), 4000, replace=False)] = True
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index bfad2b7..a13e8cb 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -138,9 +138,10 @@
     data["is_pulser"] = mask[threshold_mask]
 
     rng = np.random.default_rng()
-    mask = sorted(
+    mask = np.full(len(data.query("~is_pulser")), False, dtype=bool)
+    mask[
         rng.choice(len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False)
-    )
+    ] = True
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]

From e0d5c278b99933bb892ac0fb837894d0e0bb524e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 8 Apr 2024 11:52:18 +0200
Subject: [PATCH 065/103] fix mask

---
 scripts/pars_hit_qc.py | 1 +
 scripts/pars_pht_qc.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index e3cf429..c59e99d 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -132,6 +132,7 @@
             ct_mask = ct_mask & data[outname]
 
         data = data[ct_mask]
+        mask = mask[ct_mask]
 
     else:
         hit_dict_init_cal = {}
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index a13e8cb..5e9a722 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -140,7 +140,7 @@
     rng = np.random.default_rng()
     mask = np.full(len(data.query("~is_pulser")), False, dtype=bool)
     mask[
-        rng.choice(len(data.query("~is_pulser")), 4000 * len(args.cal_files), replace=False)
+        rng.choice(len(data.query("~is_pulser")), 2000 * len(args.cal_files), replace=False)
     ] = True
 
     if "initial_cal_cuts" in kwarg_dict:
@@ -162,6 +162,7 @@
                 ct_mask = ct_mask & data[outname]
 
         data = data[ct_mask]
+        mask = mask[ct_mask]
         log.debug("initial cal cuts applied")
         log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}")
 
@@ -169,7 +170,7 @@
         hit_dict_init_cal = {}
         plot_dict_init_cal = {}
 
-    if len(data.query("is_pulser")) > 500 * len(args.cal_files):
+    if len(data.query("is_pulser")) > 200 * len(args.cal_files):
         data = data.query("is_pulser")
     else:
         data = data.query("~is_pulser")[mask]

From e930a53fb99c94d12447d979967f433f099daa6e Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 9 Apr 2024 15:34:26 +0200
Subject: [PATCH 066/103] Update build_evt.py to support latest pygama
 build_evt()

---
 scripts/build_evt.py | 81 ++++++++++++++------------------------------
 1 file changed, 26 insertions(+), 55 deletions(-)

diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 606dc50..aabd961 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -9,7 +9,6 @@
 import numpy as np
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from lgdo.types import Table
 from pygama.evt.build_evt import build_evt
 
 sto = lh5.LH5Store()
@@ -38,6 +37,7 @@
 else:
     logging.basicConfig(level=logging.DEBUG)
 
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
 logging.getLogger("lgdo").setLevel(logging.INFO)
@@ -59,47 +59,25 @@
 meta = LegendMetadata(path=args.metadata)
 chmap = meta.channelmap(args.timestamp)
 
-if isinstance(evt_config_file, dict):
-    evt_config = {}
-    for key, _evt_config in evt_config_file.items():
-        if _evt_config is not None:
-            _evt_config = Props.read_from(_evt_config)
-            # block for snakemake to fill in channel lists
-            for field, dic in _evt_config["channels"].items():
-                if isinstance(dic, dict):
-                    chans = chmap.map("system", unique=False)[dic["system"]]
-                    if "selectors" in dic:
-                        try:
-                            for k, val in dic["selectors"].items():
-                                chans = chans.map(k, unique=False)[val]
-                        except KeyError:
-                            chans = None
-                    if chans is not None:
-                        chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))]
-                    else:
-                        chans = []
-                    _evt_config["channels"][field] = chans
-
-            evt_config[key] = _evt_config
-else:
-    evt_config = {"all": Props.read_from(evt_config_file)}
-    # block for snakemake to fill in channel lists
-    for field, dic in evt_config["channels"].items():
-        if isinstance(dic, dict):
-            chans = chmap.map("system", unique=False)[dic["system"]]
-            if "selectors" in dic:
-                try:
-                    for k, val in dic["selectors"].items():
-                        chans = chans.map(k, unique=False)[val]
-                except KeyError:
-                    chans = None
-            if chans is not None:
-                chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))]
-            else:
-                chans = []
-            evt_config["channels"][field] = chans
-
-log.debug(json.dumps(evt_config, indent=2))
+evt_config = Props.read_from(evt_config_file)
+
+# block for snakemake to fill in channel lists
+for field, dic in evt_config["channels"].items():
+    if isinstance(dic, dict):
+        chans = chmap.map("system", unique=False)[dic["system"]]
+        if "selectors" in dic:
+            try:
+                for k, val in dic["selectors"].items():
+                    chans = chans.map(k, unique=False)[val]
+            except KeyError:
+                chans = None
+        if chans is not None:
+            chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))]
+        else:
+            chans = []
+        evt_config["channels"][field] = chans
+
+log.debug(json.dumps(evt_config["channels"], indent=2))
 
 t_start = time.time()
 pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
@@ -108,22 +86,15 @@
 rand_num = f"{rng.integers(0,99999):05d}"
 temp_output = f"{args.output}.{rand_num}"
 
-tables = {}
-for key, config in evt_config.items():
-    datainfo = {
+build_evt(
+    {
         "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
         "dsp": (args.dsp_file, "dsp", "ch{}"),
         "hit": (args.hit_file, "hit", "ch{}"),
-        "evt": (None, "evt"),
-    }
-
-    tables[key] = build_evt(
-        datainfo,
-        config,
-    )
-
-tbl = Table(col_dict=tables)
-sto.write(obj=tbl, name="evt", lh5_file=temp_output, wo_mode="a")
+        "evt": (temp_output, "evt"),
+    },
+    evt_config,
+)
 
 os.rename(temp_output, args.output)
 t_elap = time.time() - t_start

From 1d381aaa4e1fd3f334172a91570d6302d11ab976 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 16 Apr 2024 08:33:44 +0000
Subject: [PATCH 067/103] style: pre-commit fixes

---
 scripts/pars_hit_ecal.py | 2 +-
 scripts/pars_pht_qc.py   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index e6cb61a..edee334 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -706,4 +706,4 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     # save calibration objects
     with open(args.results_path, "wb") as fp:
         pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True)
-        pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL)
\ No newline at end of file
+        pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 5e9a722..2390097 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -139,9 +139,9 @@
 
     rng = np.random.default_rng()
     mask = np.full(len(data.query("~is_pulser")), False, dtype=bool)
-    mask[
-        rng.choice(len(data.query("~is_pulser")), 2000 * len(args.cal_files), replace=False)
-    ] = True
+    mask[rng.choice(len(data.query("~is_pulser")), 2000 * len(args.cal_files), replace=False)] = (
+        True
+    )
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]

From 34c32c0b0929e3f03bb90dbe9b2ecb9e995f1ab5 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 16 Apr 2024 11:22:05 +0200
Subject: [PATCH 068/103] Simplify evt.smk

---
 rules/common.smk | 25 ++++++++++++
 rules/evt.smk    | 99 ++++++++++++++++++------------------------------
 2 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index b5fba4d..427d465 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -99,3 +99,28 @@ def get_pattern(tier):
         return get_pattern_tier_daq(setup)
     else:
         return get_pattern_tier_raw(setup)
+
+
+def set_last_rule_name(workflow, new_name):
+    """Sets the name of the most recently created rule to be `new_name`.
+    Useful when creating rules dynamically (i.e. unnamed).
+
+    Warning
+    -------
+    This could mess up the workflow. Use at your own risk.
+    """
+    rules = workflow._rules
+    last_key = next(reversed(rules))
+    assert last_key == rules[last_key].name
+
+    rules[new_name] = rules.pop(last_key)
+    rules[new_name].name = new_name
+
+    if workflow.default_target == last_key:
+        workflow.default_target = new_name
+
+    if last_key in workflow._localrules:
+        workflow._localrules.remove(last_key)
+        workflow._localrules.add(new_name)
+
+    workflow.check_localrules()
diff --git a/rules/evt.smk b/rules/evt.smk
index 9cc6e13..c880c88 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -14,67 +14,42 @@ from scripts.util.patterns import (
 )
 
 
-rule build_evt:
-    input:
-        dsp_file=get_pattern_tier_dsp(setup),
-        hit_file=get_pattern_tier_hit(setup),
-        tcm_file=get_pattern_tier_tcm(setup),
-    output:
-        evt_file=get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle),
-    params:
-        timestamp="{timestamp}",
-        datatype="{datatype}",
-        tier="evt",
-    log:
-        get_pattern_log(setup, "tier_evt"),
-    group:
-        "tier-evt"
-    resources:
-        runtime=300,
-        mem_swap=70,
-    shell:
-        "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/build_evt.py')} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--log {log} "
-        "--tier {params.tier} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--hit_file {input.hit_file} "
-        "--tcm_file {input.tcm_file} "
-        "--dsp_file {input.dsp_file} "
-        "--output {output.evt_file} "
+for tier in ("evt", "pet"):
 
+    rule:
+        input:
+            dsp_file=get_pattern_tier_dsp(setup),
+            hit_file=(
+                get_pattern_tier_hit(setup)
+                if tier == "evt"
+                else get_pattern_tier_pht(setup)
+            ),
+            tcm_file=get_pattern_tier_tcm(setup),
+        output:
+            evt_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
+        params:
+            timestamp="{timestamp}",
+            datatype="{datatype}",
+            tier=tier,
+        log:
+            get_pattern_log(setup, f"tier_{tier}"),
+        group:
+            "tier-evt"
+        resources:
+            runtime=300,
+            mem_swap=70,
+        shell:
+            "{swenv} python3 -B "
+            f"{workflow.source_path('../scripts/build_evt.py')} "
+            "--configs {configs} "
+            "--metadata {meta} "
+            "--log {log} "
+            "--tier {params.tier} "
+            "--datatype {params.datatype} "
+            "--timestamp {params.timestamp} "
+            "--hit_file {input.hit_file} "
+            "--tcm_file {input.tcm_file} "
+            "--dsp_file {input.dsp_file} "
+            "--output {output.evt_file} "
 
-rule build_pet:
-    input:
-        dsp_file=get_pattern_tier_dsp(setup),
-        hit_file=get_pattern_tier_pht(setup),
-        tcm_file=get_pattern_tier_tcm(setup),
-    output:
-        evt_file=get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle),
-    params:
-        timestamp="{timestamp}",
-        datatype="{datatype}",
-        tier="pet",
-    log:
-        get_pattern_log(setup, "tier_pet"),
-    group:
-        "tier-evt"
-    resources:
-        runtime=300,
-        mem_swap=70,
-    shell:
-        "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/build_evt.py')} "
-        "--configs {configs} "
-        "--log {log} "
-        "--tier {params.tier} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--metadata {meta} "
-        "--hit_file {input.hit_file} "
-        "--tcm_file {input.tcm_file} "
-        "--dsp_file {input.dsp_file} "
-        "--output {output.evt_file} "
+    set_last_rule_name(workflow, f"build_{tier}")

From a39dcae9f85d129249d1da181945d1515f838f53 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 16 Apr 2024 12:11:09 +0200
Subject: [PATCH 069/103] Add rule to concatenate evt files in a run

---
 rules/evt.smk         | 19 +++++++++++++++++++
 scripts/util/utils.py |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/rules/evt.smk b/rules/evt.smk
index c880c88..c84dce8 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -53,3 +53,22 @@ for tier in ("evt", "pet"):
             "--output {output.evt_file} "
 
     set_last_rule_name(workflow, f"build_{tier}")
+
+    rule:
+        input:
+            lambda wildcards: sorted(read_filelist_phy(wildcards, tier)),
+        output:
+            get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
+        params:
+            timestamp="all",
+            datatype="{datatype}",
+        log:
+            get_pattern_log(setup, "tier_skm"),
+        group:
+            "tier-evt"
+        shell:
+            "{swenv} lh5concat --verbose --overwrite "
+            "--output {output} "
+            "-- {input} &> {log}"
+
+    set_last_rule_name(workflow, f"concat_{tier}")
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index d767610..b4cbdcf 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -191,7 +191,7 @@ def runcmd(setup):
     exec_cmd = setup["execenv"]["cmd"]
     exec_arg = setup["execenv"]["arg"]
     path_install = setup["paths"]["install"]
-    return f"PYTHONUSERBASE={path_install} {exec_cmd} {exec_arg}"
+    return f"PYTHONUSERBASE={path_install} APPTAINERENV_PREPEND_PATH={path_install}/bin {exec_cmd} {exec_arg}"
 
 
 def subst_vars_impl(x, var_values, ignore_missing=False):

From 915aa326b7be573d1c3c9c84580629d7d6026be8 Mon Sep 17 00:00:00 2001
From: Legend Data Management User <legend-mgt@legend-data.lngs.infn.it>
Date: Thu, 18 Apr 2024 14:37:08 +0200
Subject: [PATCH 070/103] Add better wildcard_constraints

---
 Snakefile            | 9 ++++-----
 Snakefile-build-raw  | 8 ++++++++
 rules/evt.smk        | 2 ++
 scripts/build_evt.py | 2 +-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/Snakefile b/Snakefile
index ae61549..5d0f359 100644
--- a/Snakefile
+++ b/Snakefile
@@ -45,11 +45,10 @@ basedir = workflow.basedir
 
 wildcard_constraints:
     experiment="\w+",
-    period="\w+",
-    run="\w+",
-    datatype="\w+",
-    timestamp="\w+",
-    channel="\w+",
+    period="p\d{2}",
+    run="r\d{3}",
+    datatype="\w{3}",
+    timestamp="\d{8}T\d{6}Z"
 
 
 include: "rules/common.smk"
diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 02362c6..ecb08b4 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -40,6 +40,14 @@ meta = metadata_path(setup)
 basedir = workflow.basedir
 
 
+wildcard_constraints:
+    experiment="\w+",
+    period="p\d{2}",
+    run="r\d{3}",
+    datatype="\w{3}",
+    timestamp="\d{8}T\d{6}Z"
+
+
 localrules:
     gen_filelist,
     autogen_output,
diff --git a/rules/evt.smk b/rules/evt.smk
index c84dce8..c399808 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -55,6 +55,8 @@ for tier in ("evt", "pet"):
     set_last_rule_name(workflow, f"build_{tier}")
 
     rule:
+        wildcard_constraints:
+            timestamp="(?!\d{8}T\d{6}Z)"
         input:
             lambda wildcards: sorted(read_filelist_phy(wildcards, tier)),
         output:
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index baef99d..bba8084 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -109,7 +109,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
         "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
         "dsp": (args.dsp_file, "dsp", "ch{}"),
         "hit": (args.hit_file, "hit", "ch{}"),
-        "evt": (temp_output, "evt"),
+        "evt": (None, "evt"),
     },
     evt_config,
 )

From efb11aadc5241cd4b7f9d33355afdf95fc0d849d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 18 Apr 2024 12:37:42 +0000
Subject: [PATCH 071/103] style: pre-commit fixes

---
 Snakefile           | 2 +-
 Snakefile-build-raw | 2 +-
 rules/evt.smk       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index 5d0f359..7a0cbb9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -48,7 +48,7 @@ wildcard_constraints:
     period="p\d{2}",
     run="r\d{3}",
     datatype="\w{3}",
-    timestamp="\d{8}T\d{6}Z"
+    timestamp="\d{8}T\d{6}Z",
 
 
 include: "rules/common.smk"
diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index ecb08b4..edbc7d8 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -45,7 +45,7 @@ wildcard_constraints:
     period="p\d{2}",
     run="r\d{3}",
     datatype="\w{3}",
-    timestamp="\d{8}T\d{6}Z"
+    timestamp="\d{8}T\d{6}Z",
 
 
 localrules:
diff --git a/rules/evt.smk b/rules/evt.smk
index c399808..9da6d63 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -56,7 +56,7 @@ for tier in ("evt", "pet"):
 
     rule:
         wildcard_constraints:
-            timestamp="(?!\d{8}T\d{6}Z)"
+            timestamp="(?!\d{8}T\d{6}Z)",
         input:
             lambda wildcards: sorted(read_filelist_phy(wildcards, tier)),
         output:

From 313f7b6c3e53105059440218265cef2c4412bc0c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 18 Apr 2024 23:24:51 +0200
Subject: [PATCH 072/103] switch to psp

---
 rules/evt.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/evt.smk b/rules/evt.smk
index 9cc6e13..0ab5f4d 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -49,7 +49,7 @@ rule build_evt:
 
 rule build_pet:
     input:
-        dsp_file=get_pattern_tier_dsp(setup),
+        dsp_file=get_pattern_tier_psp(setup),
         hit_file=get_pattern_tier_pht(setup),
         tcm_file=get_pattern_tier_tcm(setup),
     output:

From 808e8c4a69a8441d1ac682fc42a41cc8fda64eae Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 18 Apr 2024 23:25:56 +0200
Subject: [PATCH 073/103] fix missing rules

---
 Snakefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index ae61549..83b7f3e 100644
--- a/Snakefile
+++ b/Snakefile
@@ -55,9 +55,9 @@ wildcard_constraints:
 include: "rules/common.smk"
 include: "rules/main.smk"
 include: "rules/tcm.smk"
-# include: "rules/dsp.smk"
-# include: "rules/psp.smk"
-# include: "rules/hit.smk"
+include: "rules/dsp.smk"
+include: "rules/psp.smk"
+include: "rules/hit.smk"
 include: "rules/pht.smk"
 include: "rules/evt.smk"
 include: "rules/skm.smk"
@@ -113,7 +113,7 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-            # remove filelists
+    # remove filelists
     files = glob.glob(os.path.join(filelist_path(setup), "*"))
     for file in files:
         if os.path.isfile(file):
@@ -121,7 +121,7 @@ onsuccess:
     if os.path.exists(filelist_path(setup)):
         os.rmdir(filelist_path(setup))
 
-        # remove logs
+    # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):

From ce1329f66586973cdf7e2ca6950170f142c82954 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 18 Apr 2024 23:26:27 +0200
Subject: [PATCH 074/103] v1 svm

---
 rules/dsp.smk | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 9ea2e7f..f526132 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -208,6 +208,28 @@ rule build_pars_dsp_eopt:
         "--final_dsp_pars {output.dsp_pars}"
 
 
+# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
+# rule build_pars_dsp_svm:
+#     input:
+#         hyperpars="",
+#         train_data="",
+#     output:
+#         dsp_pars=get_pattern_pars(setup, "dsp", "svm"),
+#     log:
+#         get_pattern_log_channel(setup, "pars_dsp_svm"),
+#     group:
+#         "par-dsp"
+#     resources:
+#         runtime=300,
+#     shell:
+#         "{swenv} python3 -B "
+#         f"{workflow.source_path('../scripts/pars_dsp_svm.py')} "
+#         "--log {log} "
+#         "--train_data {input.train_data} "
+#         "--train_hyperpars {input.hyperpars} "
+#         "--output_file {output.dsp_pars}"
+
+
 rule build_plts_dsp:
     input:
         lambda wildcards: read_filelist_plts_cal_channel(wildcards, "dsp"),

From bdd6adbb81fe133527865f123043ee05cad446c6 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 18 Apr 2024 23:27:04 +0200
Subject: [PATCH 075/103] use non pulser evts

---
 scripts/pars_pht_qc.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 5e9a722..a7b6657 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -114,12 +114,12 @@
     )
 
     if args.pulser_files:
-        mask = np.array([], dtype=bool)
+        total_mask = np.array([], dtype=bool)
         for file in args.pulser_files:
             with open(file) as f:
                 pulser_dict = json.load(f)
             pulser_mask = np.array(pulser_dict["mask"])
-            mask = np.append(mask, pulser_mask)
+            total_mask = np.append(total_mask, pulser_mask)
         if "pulser_multiplicity_threshold" in kwarg_dict:
             kwarg_dict.pop("pulser_multiplicity_threshold")
 
@@ -128,14 +128,14 @@
         with open(args.tcm_filelist) as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
-        ids, mask = get_tcm_pulser_ids(
+        ids, total_mask = get_tcm_pulser_ids(
             tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    data["is_pulser"] = mask[threshold_mask]
+    data["is_pulser"] = total_mask[threshold_mask]
 
     rng = np.random.default_rng()
     mask = np.full(len(data.query("~is_pulser")), False, dtype=bool)
@@ -161,8 +161,8 @@
             if "classifier" not in outname:
                 ct_mask = ct_mask & data[outname]
 
-        data = data[ct_mask]
-        mask = mask[ct_mask]
+        mask = mask[ct_mask[~data["is_pulser"].to_numpy()]]
+        data = data[ct_mask]    
         log.debug("initial cal cuts applied")
         log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}")
 
@@ -170,10 +170,7 @@
         hit_dict_init_cal = {}
         plot_dict_init_cal = {}
 
-    if len(data.query("is_pulser")) > 200 * len(args.cal_files):
-        data = data.query("is_pulser")
-    else:
-        data = data.query("~is_pulser")[mask]
+    data = data.query("~is_pulser")[mask]
 
     hit_dict_cal, plot_dict_cal = generate_cut_classifiers(
         data,

From cfc9fd542e6d4021e9081ec9f49a9c0af57f60a0 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 19 Apr 2024 14:54:11 +0200
Subject: [PATCH 076/103] support evt and skm 1 file per run

---
 rules/evt.smk              |  5 +++--
 rules/skm.smk              | 24 ++++++------------------
 scripts/create_filelist.py | 28 +++++++++++++++++++---------
 scripts/util/patterns.py   | 38 ++++++++++++++++++++++++++++++--------
 4 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/rules/evt.smk b/rules/evt.smk
index fb86875..2e29306 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -11,6 +11,7 @@ from scripts.util.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
+    get_pattern_log_concat,
 )
 
 
@@ -64,12 +65,12 @@ for tier in ("evt", "pet"):
         input:
             lambda wildcards: sorted(read_filelist_phy(wildcards, tier)),
         output:
-            get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
+            get_pattern_tier(setup, f"{tier}_concat", check_in_cycle=check_in_cycle),
         params:
             timestamp="all",
             datatype="{datatype}",
         log:
-            get_pattern_log(setup, "tier_skm"),
+            get_pattern_log_concat(setup, f"tier_{tier}_concat"),
         group:
             "tier-evt"
         shell:
diff --git a/rules/skm.smk b/rules/skm.smk
index c4356fa..d83b8a8 100644
--- a/rules/skm.smk
+++ b/rules/skm.smk
@@ -6,28 +6,20 @@ from scripts.util.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
+    get_pattern_log_concat,
 )
 
 
 rule build_skm:
     input:
-        dsp_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-phy-dsp.filelist"
-        ),
-        hit_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-phy-pht.filelist"
-        ),
-        tcm_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-phy-tcm.filelist"
-        ),
-        evt_files=lambda wildcards: read_filelist_phy(wildcards, "pet"),
+        evt_file = get_pattern_tier(setup, "pet_concat", check_in_cycle=False),
     output:
         skm_file=get_pattern_tier(setup, "skm", check_in_cycle=check_in_cycle),
     params:
-        timestamp="{timestamp}",
-        datatype="{datatype}",
+        timestamp="all",
+        datatype="phy",
     log:
-        get_pattern_log(setup, "tier_skm"),
+        get_pattern_log_concat(setup, "tier_skm"),
     group:
         "tier-skm"
     resources:
@@ -39,9 +31,5 @@ rule build_skm:
         "--metadata {meta} "
         "--log {log} "
         "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--hit_files {input.hit_files} "
-        "--tcm_files {input.tcm_files} "
-        "--dsp_files {input.dsp_files} "
-        "--evt_files {input.evt_files} "
+        "--evt_file {input.evt_file} "
         "--output {output.skm_file} "
diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py
index 8900343..1de40e2 100644
--- a/scripts/create_filelist.py
+++ b/scripts/create_filelist.py
@@ -57,6 +57,8 @@
 other_filenames = []
 if tier == "blind":
     fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False)
+elif tier == "skm":
+    fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
 else:
     fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
 
@@ -70,7 +72,7 @@
         else:
             if tier == "blind" and _key.datatype == "phy":
                 filename = FileKey.get_path_from_filekey(_key, get_pattern_tier_raw_blind(setup))
-            elif tier == "skm" and _key.datatype != "phy":
+            elif tier == "skm": #and _key.datatype != "phy"
                 filename = FileKey.get_path_from_filekey(
                     _key, get_pattern_tier(setup, "pet", check_in_cycle=False)
                 )
@@ -101,17 +103,25 @@
 phy_filenames = sorted(phy_filenames)
 other_filenames = sorted(other_filenames)
 
-if tier == "skm":
+if tier == "skm" or tier == "pet" or tier == "evt":
     sorted_phy_filenames = run_grouper(phy_filenames)
     phy_filenames = []
     for run in sorted_phy_filenames:
-        run_files = sorted(
-            run,
-            key=lambda filename: FileKey.get_filekey_from_pattern(
-                filename, fn_pattern
-            ).get_unix_timestamp(),
-        )
-        phy_filenames.append(run_files[0])
+        key = FileKey.get_filekey_from_pattern(run[0], fn_pattern)
+        if tier == "skm":
+            out_key = FileKey.get_path_from_filekey(
+                    key, get_pattern_tier(setup, "skm", check_in_cycle=False)
+                )[0]
+        elif tier == "pet":
+            out_key = FileKey.get_path_from_filekey(
+                    key, get_pattern_tier(setup, "pet_concat", check_in_cycle=False)
+                )[0]
+        elif tier == "evt":
+            out_key = FileKey.get_path_from_filekey(
+                    key, get_pattern_tier(setup, "evt_concat", check_in_cycle=False)
+                )[0]
+
+        phy_filenames.append(out_key)
 
 filenames = phy_filenames + other_filenames
 
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 7d381b2..52c9f9e 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -3,6 +3,7 @@
 """
 
 import os
+import pathlib
 
 from .utils import (
     par_dsp_path,
@@ -146,6 +147,13 @@ def get_pattern_tier_evt(setup):
         "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5",
     )
 
+def get_pattern_tier_evt_concat(setup):
+    return os.path.join(
+        f"{tier_evt_path(setup)}",
+        "{datatype}",
+        "{experiment}-{period}-{run}-{datatype}-tier_evt.lh5",
+    )
+
 
 def get_pattern_tier_psp(setup):
     return os.path.join(
@@ -176,14 +184,19 @@ def get_pattern_tier_pet(setup):
         "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5",
     )
 
+def get_pattern_tier_pet_concat(setup):
+    return os.path.join(
+        f"{tier_pet_path(setup)}",
+        "{datatype}",
+        "{experiment}-{period}-{run}-{datatype}-tier_pet.lh5",
+    )
+
 
 def get_pattern_tier_skm(setup):
     return os.path.join(
         f"{tier_skm_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_skm.lh5",
+        "phy",
+        "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5",
     )
 
 
@@ -200,21 +213,24 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
         file_pattern = get_pattern_tier_hit(setup)
     elif tier == "evt":
         file_pattern = get_pattern_tier_evt(setup)
+    elif tier == "evt_concat":
+        file_pattern = get_pattern_tier_evt_concat(setup)
     elif tier == "psp":
         file_pattern = get_pattern_tier_psp(setup)
     elif tier == "pht":
         file_pattern = get_pattern_tier_pht(setup)
     elif tier == "pet":
         file_pattern = get_pattern_tier_pet(setup)
+    elif tier == "pet_concat":
+        file_pattern = get_pattern_tier_pet_concat(setup)
     elif tier == "skm":
         file_pattern = get_pattern_tier_skm(setup)
     else:
         msg = "invalid tier"
         raise Exception(msg)
     if (
-        tier_path(setup) not in file_pattern
+        tier_path(setup) not in str(pathlib.Path(file_pattern).resolve())
         and check_in_cycle is True
-        and ".." not in file_pattern
     ):
         return "/tmp/{experiment}-{period}-{run}-{datatype}-{timestamp}" + f"tier_{tier}.lh5"
     else:
@@ -394,9 +410,8 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr
         msg = "invalid tier"
         raise Exception(msg)
     if (
-        pars_path(setup) not in file_pattern
+        pars_path(setup) not in str(pathlib.Path(file_pattern).resolve())
         and check_in_cycle is True
-        and ".." not in file_pattern
     ):
         if name is None:
             return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}" + f"par_{tier}.{extension}"
@@ -527,6 +542,13 @@ def get_pattern_log(setup, processing_step):
         "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log",
     )
 
+def get_pattern_log_concat(setup, processing_step):
+    return os.path.join(
+        f"{tmp_log_path(setup)}",
+        processing_step,
+        "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log",
+    )
+
 
 def get_pattern_log_channel(setup, processing_step):
     return os.path.join(

From d364f490d345bfe1b8f238942edf2ffb48b32bce Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 19 Apr 2024 12:54:29 +0000
Subject: [PATCH 077/103] style: pre-commit fixes

---
 Snakefile                  |  4 ++--
 rules/skm.smk              |  2 +-
 scripts/create_filelist.py | 14 +++++++-------
 scripts/pars_pht_qc.py     |  2 +-
 scripts/util/patterns.py   |  3 +++
 5 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3440d8f..67bfaba 100644
--- a/Snakefile
+++ b/Snakefile
@@ -112,7 +112,7 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-    # remove filelists
+        # remove filelists
     files = glob.glob(os.path.join(filelist_path(setup), "*"))
     for file in files:
         if os.path.isfile(file):
@@ -120,7 +120,7 @@ onsuccess:
     if os.path.exists(filelist_path(setup)):
         os.rmdir(filelist_path(setup))
 
-    # remove logs
+        # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
diff --git a/rules/skm.smk b/rules/skm.smk
index d83b8a8..3c9a619 100644
--- a/rules/skm.smk
+++ b/rules/skm.smk
@@ -12,7 +12,7 @@ from scripts.util.patterns import (
 
 rule build_skm:
     input:
-        evt_file = get_pattern_tier(setup, "pet_concat", check_in_cycle=False),
+        evt_file=get_pattern_tier(setup, "pet_concat", check_in_cycle=False),
     output:
         skm_file=get_pattern_tier(setup, "skm", check_in_cycle=check_in_cycle),
     params:
diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py
index 1de40e2..217b6bb 100644
--- a/scripts/create_filelist.py
+++ b/scripts/create_filelist.py
@@ -72,7 +72,7 @@
         else:
             if tier == "blind" and _key.datatype == "phy":
                 filename = FileKey.get_path_from_filekey(_key, get_pattern_tier_raw_blind(setup))
-            elif tier == "skm": #and _key.datatype != "phy"
+            elif tier == "skm":  # and _key.datatype != "phy"
                 filename = FileKey.get_path_from_filekey(
                     _key, get_pattern_tier(setup, "pet", check_in_cycle=False)
                 )
@@ -110,16 +110,16 @@
         key = FileKey.get_filekey_from_pattern(run[0], fn_pattern)
         if tier == "skm":
             out_key = FileKey.get_path_from_filekey(
-                    key, get_pattern_tier(setup, "skm", check_in_cycle=False)
-                )[0]
+                key, get_pattern_tier(setup, "skm", check_in_cycle=False)
+            )[0]
         elif tier == "pet":
             out_key = FileKey.get_path_from_filekey(
-                    key, get_pattern_tier(setup, "pet_concat", check_in_cycle=False)
-                )[0]
+                key, get_pattern_tier(setup, "pet_concat", check_in_cycle=False)
+            )[0]
         elif tier == "evt":
             out_key = FileKey.get_path_from_filekey(
-                    key, get_pattern_tier(setup, "evt_concat", check_in_cycle=False)
-                )[0]
+                key, get_pattern_tier(setup, "evt_concat", check_in_cycle=False)
+            )[0]
 
         phy_filenames.append(out_key)
 
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index b560db3..3d142b2 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -162,7 +162,7 @@
                 ct_mask = ct_mask & data[outname]
 
         mask = mask[ct_mask[~data["is_pulser"].to_numpy()]]
-        data = data[ct_mask]    
+        data = data[ct_mask]
         log.debug("initial cal cuts applied")
         log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}")
 
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 52c9f9e..9f4338a 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -147,6 +147,7 @@ def get_pattern_tier_evt(setup):
         "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5",
     )
 
+
 def get_pattern_tier_evt_concat(setup):
     return os.path.join(
         f"{tier_evt_path(setup)}",
@@ -184,6 +185,7 @@ def get_pattern_tier_pet(setup):
         "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5",
     )
 
+
 def get_pattern_tier_pet_concat(setup):
     return os.path.join(
         f"{tier_pet_path(setup)}",
@@ -542,6 +544,7 @@ def get_pattern_log(setup, processing_step):
         "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log",
     )
 
+
 def get_pattern_log_concat(setup, processing_step):
     return os.path.join(
         f"{tmp_log_path(setup)}",

From 479e3ac1c638d5b0b22f4e20b804fb5f340e7dbf Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 19 Apr 2024 15:26:31 +0200
Subject: [PATCH 078/103] fix cyclic dependence

---
 scripts/create_filelist.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py
index 1de40e2..c06af68 100644
--- a/scripts/create_filelist.py
+++ b/scripts/create_filelist.py
@@ -57,8 +57,10 @@
 other_filenames = []
 if tier == "blind":
     fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False)
-elif tier == "skm":
+elif tier == "skm" or tier=="pet_concat":
     fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
+elif tier == "skm" or tier=="evt_concat":
+    fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
 else:
     fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
 
@@ -103,23 +105,14 @@
 phy_filenames = sorted(phy_filenames)
 other_filenames = sorted(other_filenames)
 
-if tier == "skm" or tier == "pet" or tier == "evt":
+if tier == "skm" or tier == "pet_concat" or tier == "evt_concat":
     sorted_phy_filenames = run_grouper(phy_filenames)
     phy_filenames = []
     for run in sorted_phy_filenames:
         key = FileKey.get_filekey_from_pattern(run[0], fn_pattern)
-        if tier == "skm":
-            out_key = FileKey.get_path_from_filekey(
-                    key, get_pattern_tier(setup, "skm", check_in_cycle=False)
-                )[0]
-        elif tier == "pet":
-            out_key = FileKey.get_path_from_filekey(
-                    key, get_pattern_tier(setup, "pet_concat", check_in_cycle=False)
-                )[0]
-        elif tier == "evt":
-            out_key = FileKey.get_path_from_filekey(
-                    key, get_pattern_tier(setup, "evt_concat", check_in_cycle=False)
-                )[0]
+        out_key = FileKey.get_path_from_filekey(
+                        key, get_pattern_tier(setup, tier, check_in_cycle=False)
+                    )[0]
 
         phy_filenames.append(out_key)
 

From 22400642bb5f24256b363689581bd687830a9c55 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 19 Apr 2024 13:27:53 +0000
Subject: [PATCH 079/103] style: pre-commit fixes

---
 Snakefile                  | 2 +-
 scripts/create_filelist.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index 67bfaba..e9e0a03 100644
--- a/Snakefile
+++ b/Snakefile
@@ -112,7 +112,7 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-        # remove filelists
+            # remove filelists
     files = glob.glob(os.path.join(filelist_path(setup), "*"))
     for file in files:
         if os.path.isfile(file):
diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py
index 8a8596b..9ea6b4e 100644
--- a/scripts/create_filelist.py
+++ b/scripts/create_filelist.py
@@ -57,9 +57,9 @@
 other_filenames = []
 if tier == "blind":
     fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False)
-elif tier == "skm" or tier=="pet_concat":
+elif tier == "skm" or tier == "pet_concat":
     fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
-elif tier == "skm" or tier=="evt_concat":
+elif tier == "skm" or tier == "evt_concat":
     fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
 else:
     fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
@@ -111,8 +111,8 @@
     for run in sorted_phy_filenames:
         key = FileKey.get_filekey_from_pattern(run[0], fn_pattern)
         out_key = FileKey.get_path_from_filekey(
-                        key, get_pattern_tier(setup, tier, check_in_cycle=False)
-                    )[0]
+            key, get_pattern_tier(setup, tier, check_in_cycle=False)
+        )[0]
 
         phy_filenames.append(out_key)
 

From 9062bfd62314e95b3a22ae78d35e9945cd3b7c9b Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 22 Apr 2024 18:51:11 +0200
Subject: [PATCH 080/103] bugfixes and formatting

---
 Snakefile                  | 4 ++--
 scripts/create_filelist.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Snakefile b/Snakefile
index 67bfaba..a806425 100644
--- a/Snakefile
+++ b/Snakefile
@@ -112,7 +112,7 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-        # remove filelists
+    #remove filelists
     files = glob.glob(os.path.join(filelist_path(setup), "*"))
     for file in files:
         if os.path.isfile(file):
@@ -120,7 +120,7 @@ onsuccess:
     if os.path.exists(filelist_path(setup)):
         os.rmdir(filelist_path(setup))
 
-        # remove logs
+    # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
diff --git a/scripts/create_filelist.py b/scripts/create_filelist.py
index 8a8596b..a40b77c 100644
--- a/scripts/create_filelist.py
+++ b/scripts/create_filelist.py
@@ -57,9 +57,9 @@
 other_filenames = []
 if tier == "blind":
     fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False)
-elif tier == "skm" or tier=="pet_concat":
+elif tier == "skm" or tier == "pet_concat":
     fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
-elif tier == "skm" or tier=="evt_concat":
+elif tier == "evt_concat":
     fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
 else:
     fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
@@ -111,8 +111,8 @@
     for run in sorted_phy_filenames:
         key = FileKey.get_filekey_from_pattern(run[0], fn_pattern)
         out_key = FileKey.get_path_from_filekey(
-                        key, get_pattern_tier(setup, tier, check_in_cycle=False)
-                    )[0]
+            key, get_pattern_tier(setup, tier, check_in_cycle=False)
+        )[0]
 
         phy_filenames.append(out_key)
 

From 198e4c924560db19f804b67d60c34127afe63407 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 22 Apr 2024 16:52:16 +0000
Subject: [PATCH 081/103] style: pre-commit fixes

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index a806425..67bfaba 100644
--- a/Snakefile
+++ b/Snakefile
@@ -112,7 +112,7 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-    #remove filelists
+        # remove filelists
     files = glob.glob(os.path.join(filelist_path(setup), "*"))
     for file in files:
         if os.path.isfile(file):
@@ -120,7 +120,7 @@ onsuccess:
     if os.path.exists(filelist_path(setup)):
         os.rmdir(filelist_path(setup))
 
-    # remove logs
+        # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):

From f4dd3288ab7b65d87d3d2c01c6cc98ef0a7773d7 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 17:47:10 +0200
Subject: [PATCH 082/103] add svm scripts

---
 scripts/pars_dsp_build_svm.py | 59 +++++++++++++++++++++++++++++++++++
 scripts/pars_dsp_svm.py       | 36 +++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 scripts/pars_dsp_build_svm.py
 create mode 100644 scripts/pars_dsp_svm.py

diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
new file mode 100644
index 0000000..6a44fec
--- /dev/null
+++ b/scripts/pars_dsp_build_svm.py
@@ -0,0 +1,59 @@
+import argparse
+import json
+import logging
+import os
+import pickle as pkl
+
+os.environ["LGDO_CACHE"] = "false"
+os.environ["LGDO_BOUNDSCHECK"] = "false"
+os.environ["DSPEED_CACHE"] = "false"
+os.environ["DSPEED_BOUNDSCHECK"] = "false"
+
+import lgdo.lh5 as lh5
+from sklearn.svm import SVC
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--log", help="log file", type=str)
+argparser.add_argument("--output_file", help="output SVM file", type=str, required=True)
+argparser.add_argument("--train_data", help="input data file", type=str, required=True)
+argparser.add_argument("--train_hyperpars", help="input hyperparameter file", required=True)
+args = argparser.parse_args()
+
+logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+logging.getLogger("parse").setLevel(logging.INFO)
+logging.getLogger("lgdo").setLevel(logging.INFO)
+logging.getLogger("h5py").setLevel(logging.INFO)
+
+sto = lh5.LH5Store()
+log = logging.getLogger(__name__)
+
+# Load files
+tb, _ = sto.read("ml_train/dsp", args.train_data)
+log.debug("loaded data")
+
+with open(args.train_hyperpars) as hyperpars_file:
+    hyperpars = json.load(hyperpars_file)
+
+# Define training inputs
+dwts_norm = tb["dwt_norm"].nda
+labels = tb["dc_label"].nda
+
+
+log.debug("training model")
+# Initialize and train SVM
+svm = SVC(
+    random_state=int(hyperpars["random_state"]),
+    kernel=hyperpars["kernel"],
+    decision_function_shape=hyperpars["decision_function_shape"],
+    class_weight=hyperpars["class_weight"],
+    C=float(hyperpars["C"]),
+    gamma=float(hyperpars["gamma"]),
+)
+
+svm.fit(dwts_norm, labels)
+
+log.debug("trained model")
+
+# Save trained model with pickle
+with open(args.output_file, "wb") as svm_file:
+    pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py
new file mode 100644
index 0000000..40f0a25
--- /dev/null
+++ b/scripts/pars_dsp_svm.py
@@ -0,0 +1,36 @@
+import argparse
+import json
+import logging
+import os
+import pathlib
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--log", help="log file", type=str)
+argparser.add_argument("--output_file", help="output par file", type=str, required=True)
+argparser.add_argument("--input_file", help="input par file", type=str, required=True)
+argparser.add_argument("--svm_file", help="svm file", required=True)
+args = argparser.parse_args()
+
+
+if args.log is not None:
+    pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+else:
+    logging.basicConfig(level=logging.DEBUG)
+
+logging.getLogger("parse").setLevel(logging.INFO)
+logging.getLogger("lgdo").setLevel(logging.INFO)
+logging.getLogger("h5py").setLevel(logging.INFO)
+
+log = logging.getLogger(__name__)
+
+with open(args.input_file) as r:
+    par_data = json.load(r)
+
+file = f"'$_/{os.path.basename(args.svm_file)}'"
+
+par_data["svm"] = {"model_file": file}
+
+pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
+with open(args.output_file, "w") as w:
+    json.dump(par_data, w, indent=4)

From 7d051627812397f596b7e23042cd17acf12be251 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 17:52:12 +0200
Subject: [PATCH 083/103] update cuts

---
 scripts/pars_dsp_eopt.py            |  4 ++++
 scripts/pars_dsp_event_selection.py | 21 +++++++++++++++++----
 scripts/pars_dsp_tau.py             | 23 ++++++++++++++---------
 3 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 4af1c37..86b5f7b 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -16,6 +16,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
+import pygama.pargen.energy_optimisation as om  # noqa: F401
 import sklearn.gaussian_process.kernels as ker
 from dspeed.units import unit_registry as ureg
 from legendmeta import LegendMetadata
@@ -93,6 +94,7 @@
                 "func": hpge_peak,
                 "peak": peak,
                 "kev_width": kev_width,
+                "bin_width": 5,
             }
         )
         kwarg_dicts_zac.append(
@@ -101,6 +103,7 @@
                 "func": hpge_peak,
                 "peak": peak,
                 "kev_width": kev_width,
+                "bin_width": 5,
             }
         )
         kwarg_dicts_trap.append(
@@ -109,6 +112,7 @@
                 "func": hpge_peak,
                 "peak": peak,
                 "kev_width": kev_width,
+                "bin_width": 5,
             }
         )
 
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 44c1604..9100689 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -176,7 +176,17 @@ def get_out_data(
 
         raw_fields = [field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)]
 
-        tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy"])[0]
+        tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"])[0]
+
+        discharges = tb["t_sat_lo"].nda > 0
+        discharge_timestamps = np.where(tb["timestamp"].nda[discharges])[0]
+        is_recovering = np.full(len(tb), False, dtype=bool)
+        for tstamp in discharge_timestamps:
+            is_recovering = is_recovering | np.where(
+                (((tb["timestamp"].nda - tstamp) < 0.01) & ((tb["timestamp"].nda - tstamp) > 0)),
+                True,
+                False,
+            )
 
         for outname, info in raw_dict.items():
             outcol = tb.eval(info["expression"], info.get("parameters", None))
@@ -191,7 +201,7 @@ def get_out_data(
                 & (rough_energy < peak + 1.1 * kev_width[0])
                 & (~mask)
             )
-            masks[peak] = np.where(e_mask)[0]
+            masks[peak] = np.where(e_mask & (~is_recovering))[0]
             log.debug(f"{len(masks[peak])} events found in energy range for {peak}")
 
         input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000, idx=np.where(~mask)[0])[0]
@@ -272,14 +282,17 @@ def get_out_data(
                             )
                             peak_loc = pgh.get_bin_centers(bins)[np.nanargmax(hist)]
 
-                            mu, _, _ = pgc.hpge_fit_energy_peak_tops(
+                            peak_top_pars = pgc.hpge_fit_energy_peak_tops(
                                 hist,
                                 bins,
                                 var,
                                 [peak_loc],
                                 n_to_fit=7,
                             )[0][0]
-
+                            try:
+                                mu = peak_top_pars[0]
+                            except Exception:
+                                mu = np.nan
                             if mu is None or np.isnan(mu):
                                 log.debug("Fit failed, using max guess")
                                 rough_adc_to_kev = peak / peak_loc
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 1e10ea5..8064308 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -82,11 +82,21 @@
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
-    data = sto.read(f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp"])[
-        0
-    ].view_as("pd")
+    data = sto.read(
+        f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"]
+    )[0].view_as("pd")
     threshold = kwarg_dict.pop("threshold")
-    cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask))[0]
+
+    discharges = data["t_sat_lo"] > 0
+    discharge_timestamps = np.where(data["timestamp"][discharges])[0]
+    is_recovering = np.full(len(data), False, dtype=bool)
+    for tstamp in discharge_timestamps:
+        is_recovering = is_recovering | np.where(
+            (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)),
+            True,
+            False,
+        )
+    cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0]
 
     tb_data = sto.read(
         f"{args.channel}/raw",
@@ -124,11 +134,6 @@
 else:
     out_dict = {}
 
-if args.pulser_file:
-    pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True)
-    with open(args.pulser_file, "w") as f:
-        json.dump({"idxs": ids.tolist(), "mask": mask.tolist()}, f, indent=4)
-
 pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
 with open(args.output_file, "w") as f:
     json.dump(tau.output_dict, f, indent=4)

From fbafe64a215d3220137ce210edb7c5bf07d5b073 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 17:59:33 +0200
Subject: [PATCH 084/103] filter db files to json or yaml

---
 scripts/pars_hit_ecal.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index edee334..e84e51f 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -459,7 +459,13 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     if args.in_hit_dict:
         hit_dict = Props.read_from(args.in_hit_dict)
 
-    database_dic = Props.read_from(args.ctc_dict)
+    db_files = [
+        par_file
+        for par_file in args.ctc_dict
+        if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml"
+    ]
+
+    database_dic = Props.read_from(db_files)
 
     hit_dict.update(database_dic[args.channel]["ctc_params"])
 

From 0d50328e779726d0e3ce3bee4152bf1f7fa3fea1 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:00:55 +0200
Subject: [PATCH 085/103] add new qc

---
 scripts/pars_hit_qc.py     | 120 +++++++++++++++++-------
 scripts/pars_pht_qc.py     | 182 +++++++++++++++++++++++++------------
 scripts/pars_pht_qc_phy.py |  55 +++++++++--
 3 files changed, 257 insertions(+), 100 deletions(-)

diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index c59e99d..c432d69 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -62,6 +62,71 @@
 
     kwarg_dict = Props.read_from(channel_dict)
 
+    kwarg_dict_fft = kwarg_dict["fft_fields"]
+    if len(args.fft_files) > 0:
+        fft_fields = get_keys(
+            [
+                key.replace(f"{args.channel}/dsp/", "")
+                for key in ls(args.fft_files[0], f"{args.channel}/dsp/")
+            ],
+            kwarg_dict_fft["cut_parameters"],
+        )
+
+        fft_data = load_data(
+            args.fft_files,
+            f"{args.channel}/dsp",
+            {},
+            [*fft_fields, "timestamp", "trapTmax"],
+        )
+
+        discharges = fft_data["t_sat_lo"] > 0
+        discharge_timestamps = np.where(fft_data["timestamp"][discharges])[0]
+        is_recovering = np.full(len(fft_data), False, dtype=bool)
+        for tstamp in discharge_timestamps:
+            is_recovering = is_recovering | np.where(
+                (
+                    ((fft_data["timestamp"] - tstamp) < 0.01)
+                    & ((fft_data["timestamp"] - tstamp) > 0)
+                ),
+                True,
+                False,
+            )
+        fft_data["is_recovering"] = is_recovering
+
+        hit_dict_fft = {}
+        plot_dict_fft = {}
+        cut_data = fft_data.query("is_recovering==0")
+        log.debug(f"cut_data shape: {len(cut_data)}")
+        for name, cut in kwarg_dict_fft["cut_parameters"].items():
+            cut_dict, cut_plots = generate_cut_classifiers(
+                cut_data,
+                {name: cut},
+                kwarg_dict.get("rounding", 4),
+                display=1 if args.plot_path else 0,
+            )
+            hit_dict_fft.update(cut_dict)
+            plot_dict_fft.update(cut_plots)
+
+            log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}")
+
+            ct_mask = np.full(len(cut_data), True, dtype=bool)
+            for outname, info in cut_dict.items():
+                # convert to pandas eval
+                exp = info["expression"]
+                for key in info.get("parameters", None):
+                    exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
+                cut_data[outname] = cut_data.eval(exp, local_dict=info.get("parameters", None))
+                if "_classifier" not in outname:
+                    ct_mask = ct_mask & cut_data[outname]
+            cut_data = cut_data[ct_mask]
+
+        log.debug("fft cuts applied")
+        log.debug(f"cut_dict is: {json.dumps(hit_dict_fft, indent=2)}")
+
+    else:
+        hit_dict_fft = {}
+        plot_dict_fft = {}
+
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
     cut_fields = get_keys(
@@ -110,14 +175,26 @@
         raise ValueError(msg)
 
     data["is_pulser"] = mask[threshold_mask]
+
+    discharges = data["t_sat_lo"] > 0
+    discharge_timestamps = np.where(data["timestamp"][discharges])[0]
+    is_recovering = np.full(len(data), False, dtype=bool)
+    for tstamp in discharge_timestamps:
+        is_recovering = is_recovering | np.where(
+            (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)),
+            True,
+            False,
+        )
+    data["is_recovering"] = is_recovering
+
     rng = np.random.default_rng()
-    mask = np.full(len(data.query("~is_pulser")), False, dtype=bool)
-    mask[rng.choice(len(data.query("~is_pulser")), 4000, replace=False)] = True
+    mask = np.full(len(data.query("~is_pulser & ~is_recovering")), False, dtype=bool)
+    mask[rng.choice(len(data.query("~is_pulser & ~is_recovering")), 4000, replace=False)] = True
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         hit_dict_init_cal, plot_dict_init_cal = generate_cut_classifiers(
-            data.query("~is_pulser")[mask],
+            data.query("~is_pulser & ~is_recovering")[mask],
             init_cal["cut_parameters"],
             init_cal.get("rounding", 4),
             display=1 if args.plot_path else 0,
@@ -138,10 +215,10 @@
         hit_dict_init_cal = {}
         plot_dict_init_cal = {}
 
-    if len(data.query("is_pulser")) > 500:
-        data = data.query("is_pulser")
+    if len(data.query("is_pulser & ~is_recovering")) > 500:
+        data = data.query("is_pulser & ~is_recovering")
     else:
-        data = data.query("~is_pulser")[mask]
+        data = data.query("~is_pulser & ~is_recovering")[mask]
 
     hit_dict_cal, plot_dict_cal = generate_cut_classifiers(
         data,
@@ -150,35 +227,8 @@
         display=1 if args.plot_path else 0,
     )
 
-    kwarg_dict_fft = kwarg_dict["fft_fields"]
-    if len(args.fft_files) > 0:
-        fft_fields = get_keys(
-            [
-                key.replace(f"{args.channel}/dsp/", "")
-                for key in ls(args.fft_files[0], f"{args.channel}/dsp/")
-            ],
-            kwarg_dict_fft["cut_parameters"],
-        )
-
-        fft_data = load_data(
-            args.fft_files,
-            f"{args.channel}/dsp",
-            {},
-            [*fft_fields, "timestamp", "trapTmax"],
-        )
-
-        hit_dict_fft, plot_dict_fft = generate_cut_classifiers(
-            fft_data,
-            kwarg_dict_fft["cut_parameters"],
-            kwarg_dict.get("rounding", 4),
-            display=1 if args.plot_path else 0,
-        )
-    else:
-        hit_dict_fft = {}
-        plot_dict_fft = {}
-
-    hit_dict = {**hit_dict_init_cal, **hit_dict_cal, **hit_dict_fft}
-    plot_dict = {**plot_dict_init_cal, **plot_dict_cal, **plot_dict_fft}
+    hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal}
+    plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
     pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True)
     with open(args.save_path, "w") as f:
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 3d142b2..18ff865 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -38,6 +38,9 @@
     argparser.add_argument(
         "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
     )
+    argparser.add_argument(
+        "--overwrite_files", help="overwrite_files", nargs="*", type=str, required=False
+    )
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -83,6 +86,101 @@
     )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
 
     kwarg_dict = Props.read_from(channel_dict)
+
+    if args.overwrite_files:
+        overwrite = Props.read_from(args.overwrite_files)[args.channel]["pars"]["operations"]
+    else:
+        overwrite = None
+
+    kwarg_dict_fft = kwarg_dict["fft_fields"]
+    if len(args.fft_files) > 0:
+        # sort files in dictionary where keys are first timestamp from run
+        if isinstance(args.fft_files, list):
+            fft_files = []
+            for file in args.fft_files:
+                with open(file) as f:
+                    fft_files += f.read().splitlines()
+        else:
+            with open(args.fft_files) as f:
+                fft_files = f.read().splitlines()
+
+        fft_files = sorted(
+            np.unique(fft_files)
+        )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
+
+        if len(fft_files) > 0:
+            fft_fields = get_keys(
+                [
+                    key.replace(f"{args.channel}/dsp/", "")
+                    for key in ls(fft_files[0], f"{args.channel}/dsp/")
+                ],
+                kwarg_dict_fft["cut_parameters"],
+            )
+
+            fft_data = load_data(
+                fft_files,
+                f"{args.channel}/dsp",
+                {},
+                [*fft_fields, "timestamp", "trapTmax", "t_sat_lo"],
+            )
+
+            discharges = fft_data["t_sat_lo"] > 0
+            discharge_timestamps = np.where(fft_data["timestamp"][discharges])[0]
+            is_recovering = np.full(len(fft_data), False, dtype=bool)
+            for tstamp in discharge_timestamps:
+                is_recovering = is_recovering | np.where(
+                    (
+                        ((fft_data["timestamp"] - tstamp) < 0.01)
+                        & ((fft_data["timestamp"] - tstamp) > 0)
+                    ),
+                    True,
+                    False,
+                )
+            fft_data["is_recovering"] = is_recovering
+
+            hit_dict_fft = {}
+            plot_dict_fft = {}
+            cut_data = fft_data.query("is_recovering==0")
+            log.debug(f"cut_data shape: {len(cut_data)}")
+            for name, cut in kwarg_dict_fft["cut_parameters"].items():
+                cut_dict, cut_plots = generate_cut_classifiers(
+                    cut_data,
+                    {name: cut},
+                    kwarg_dict.get("rounding", 4),
+                    display=1 if args.plot_path else 0,
+                )
+                hit_dict_fft.update(cut_dict)
+                plot_dict_fft.update(cut_plots)
+
+                log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}")
+
+                ct_mask = np.full(len(cut_data), True, dtype=bool)
+                for outname, info in cut_dict.items():
+                    # convert to pandas eval
+                    exp = info["expression"]
+                    for key in info.get("parameters", None):
+                        exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
+                    cut_data[outname] = cut_data.eval(exp, local_dict=info.get("parameters", None))
+                    if "_classifier" not in outname:
+                        ct_mask = ct_mask & cut_data[outname]
+                cut_data = cut_data[ct_mask]
+
+            log.debug("fft cuts applied")
+            log.debug(f"cut_dict is: {json.dumps(hit_dict_fft, indent=2)}")
+
+        else:
+            hit_dict_fft = {}
+            plot_dict_fft = {}
+    else:
+        hit_dict_fft = {}
+        plot_dict_fft = {}
+
+    if overwrite is not None:
+        for name in kwarg_dict_fft["cut_parameters"]:
+            for cut_name, cut_dict in overwrite.items():
+                if name in cut_name:
+                    hit_dict_fft.update({cut_name: cut_dict})
+
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
     cut_fields = get_keys(
@@ -137,11 +235,26 @@
 
     data["is_pulser"] = total_mask[threshold_mask]
 
+    discharges = data["t_sat_lo"] > 0
+    discharge_timestamps = np.where(data["timestamp"][discharges])[0]
+    is_recovering = np.full(len(data), False, dtype=bool)
+    for tstamp in discharge_timestamps:
+        is_recovering = is_recovering | np.where(
+            (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)),
+            True,
+            False,
+        )
+    data["is_recovering"] = is_recovering
+
     rng = np.random.default_rng()
-    mask = np.full(len(data.query("~is_pulser")), False, dtype=bool)
-    mask[rng.choice(len(data.query("~is_pulser")), 2000 * len(args.cal_files), replace=False)] = (
-        True
-    )
+    mask = np.full(len(data.query("~is_pulser & ~is_recovering")), False, dtype=bool)
+    mask[
+        rng.choice(
+            len(data.query("~is_pulser & ~is_recovering")),
+            2000 * len(args.cal_files),
+            replace=False,
+        )
+    ] = True
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
@@ -161,7 +274,7 @@
             if "classifier" not in outname:
                 ct_mask = ct_mask & data[outname]
 
-        mask = mask[ct_mask[~data["is_pulser"].to_numpy()]]
+        mask = mask[ct_mask[~data["is_pulser & ~is_recovering"].to_numpy()]]
         data = data[ct_mask]
         log.debug("initial cal cuts applied")
         log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}")
@@ -170,7 +283,7 @@
         hit_dict_init_cal = {}
         plot_dict_init_cal = {}
 
-    data = data.query("~is_pulser")[mask]
+    data = data.query("~is_pulser & ~is_recovering")[mask]
 
     hit_dict_cal, plot_dict_cal = generate_cut_classifiers(
         data,
@@ -182,57 +295,14 @@
     log.debug("initial cuts applied")
     log.debug(f"cut_dict is: {json.dumps(hit_dict_cal, indent=2)}")
 
-    kwarg_dict_fft = kwarg_dict["fft_fields"]
-    if len(args.fft_files) > 0:
-        # sort files in dictionary where keys are first timestamp from run
-        if isinstance(args.fft_files, list):
-            fft_files = []
-            for file in args.fft_files:
-                with open(file) as f:
-                    fft_files += f.read().splitlines()
-        else:
-            with open(args.fft_files) as f:
-                fft_files = f.read().splitlines()
-
-        fft_files = sorted(
-            np.unique(fft_files)
-        )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
-
-        if len(fft_files) > 0:
-            fft_fields = get_keys(
-                [
-                    key.replace(f"{args.channel}/dsp/", "")
-                    for key in ls(fft_files[0], f"{args.channel}/dsp/")
-                ],
-                kwarg_dict_fft["cut_parameters"],
-            )
-
-            fft_data = load_data(
-                fft_files,
-                f"{args.channel}/dsp",
-                {},
-                [*fft_fields, "timestamp", "trapTmax"],
-            )
-
-            hit_dict_fft, plot_dict_fft = generate_cut_classifiers(
-                fft_data,
-                kwarg_dict_fft["cut_parameters"],
-                kwarg_dict.get("rounding", 4),
-                display=1 if args.plot_path else 0,
-            )
-
-            log.debug("fft cuts applied")
-            log.debug(f"cut_dict is: {json.dumps(hit_dict_fft, indent=2)}")
-
-        else:
-            hit_dict_fft = {}
-            plot_dict_fft = {}
-    else:
-        hit_dict_fft = {}
-        plot_dict_fft = {}
+    if overwrite is not None:
+        for name in kwarg_dict_cal["cut_parameters"]:
+            for cut_name, cut_dict in overwrite.items():
+                if name in cut_name:
+                    hit_dict_cal.update({cut_name: cut_dict})
 
-    hit_dict = {**hit_dict_init_cal, **hit_dict_cal, **hit_dict_fft}
-    plot_dict = {**plot_dict_init_cal, **plot_dict_cal, **plot_dict_fft}
+    hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal}
+    plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
     for file in args.save_path:
         pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 8fe0a1f..8d26fdf 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -6,6 +6,7 @@
 import os
 import pathlib
 import pickle as pkl
+import re
 import warnings
 
 os.environ["PYGAMA_PARALLEL"] = "false"
@@ -99,15 +100,51 @@
     )
 
     data = sto.read(
-        f"{args.channel}/dsp/", phy_files, field_mask=cut_fields, idx=np.where(bl_mask)[0]
-    )[0]
-
-    hit_dict, plot_dict = generate_cut_classifiers(
-        data,
-        kwarg_dict_fft["cut_parameters"],
-        kwarg_dict.get("rounding", 4),
-        display=1 if args.plot_path else 0,
-    )
+        f"{args.channel}/dsp/",
+        phy_files,
+        field_mask=[*cut_fields, "daqenergy", "t_sat_lo", "timestamp"],
+        idx=np.where(bl_mask)[0],
+    )[0].view_as("pd")
+
+    discharges = data["t_sat_lo"] > 0
+    discharge_timestamps = np.where(data["timestamp"][discharges])[0]
+    is_recovering = np.full(len(data), False, dtype=bool)
+    for tstamp in discharge_timestamps:
+        is_recovering = is_recovering | np.where(
+            (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)),
+            True,
+            False,
+        )
+    data["is_recovering"] = is_recovering
+
+    log.debug(f"{len(discharge_timestamps)} discharges found in {len(data)} events")
+
+    hit_dict = {}
+    plot_dict = {}
+    cut_data = data.query("is_recovering==0")
+    log.debug(f"cut_data shape: {len(cut_data)}")
+    for name, cut in kwarg_dict_fft["cut_parameters"].items():
+        cut_dict, cut_plots = generate_cut_classifiers(
+            cut_data,
+            {name: cut},
+            kwarg_dict.get("rounding", 4),
+            display=1 if args.plot_path else 0,
+        )
+        hit_dict.update(cut_dict)
+        plot_dict.update(cut_plots)
+
+        log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}")
+
+        ct_mask = np.full(len(cut_data), True, dtype=bool)
+        for outname, info in cut_dict.items():
+            # convert to pandas eval
+            exp = info["expression"]
+            for key in info.get("parameters", None):
+                exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
+            cut_data[outname] = cut_data.eval(exp, local_dict=info.get("parameters", None))
+            if "_classifier" not in outname:
+                ct_mask = ct_mask & cut_data[outname]
+        cut_data = cut_data[ct_mask]
 
     log.debug("fft cuts applied")
     log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}")

From 417565fd8a321b326c2f964bf12215973ab929a2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:01:16 +0200
Subject: [PATCH 086/103] replace paths with relative

---
 scripts/merge_channels.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index bc8337c..c2698eb 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -20,6 +20,7 @@ def replace_path(d, old_path, new_path):
             d[i] = replace_path(d[i], old_path, new_path)
     elif isinstance(d, str) and old_path in d:
         d = d.replace(old_path, new_path)
+        d = f"$_/{os.path.basename(new_path)}"
     return d
 
 

From 81306feb6da232f744962ddefbee8b3bc77ffaa2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:01:44 +0200
Subject: [PATCH 087/103] lists to flat32 arrays filter pars files

---
 scripts/build_dsp.py | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index a94d547..2fd2248 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -18,6 +18,18 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 
+
+def replace_list_with_array(dic):
+    for key, value in dic.items():
+        if isinstance(value, dict):
+            dic[key] = replace_list_with_array(value)
+        elif isinstance(value, list):
+            dic[key] = np.array(value, dtype="float32")
+        else:
+            pass
+    return dic
+
+
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 argparser = argparse.ArgumentParser()
@@ -43,20 +55,14 @@
     "inputs"
 ]["processing_chain"]
 
-database_dic = Props.read_from(args.pars_file)
-
-
-def replace_list_with_array(dic):
-    for key, value in dic.items():
-        if isinstance(value, dict):
-            dic[key] = replace_list_with_array(value)
-        elif isinstance(value, list):
-            dic[key] = np.array(value, dtype="float32")
-        else:
-            pass
-    return dic
-
+channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
+db_files = [
+    par_file
+    for par_file in args.pars_file
+    if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml"
+]
 
+database_dic = Props.read_from(db_files, subst_pathvar=True)
 database_dic = replace_list_with_array(database_dic)
 
 pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
@@ -88,8 +94,8 @@ def replace_list_with_array(dic):
 
 outputs = {}
 channels = []
-for channel, file in channel_dict.items():
-    output = Props.read_from(file)["outputs"]
+for channel, chan_dict in channel_dict.items():
+    output = chan_dict["outputs"]
     in_dict = False
     for entry in outputs:
         if outputs[entry]["fields"] == output:

From 221a15437b3aedbfca37b3554022c35e93d3fd6b Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:02:50 +0200
Subject: [PATCH 088/103] change order

---
 scripts/complete_run.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index 5829f1a..da65b49 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -167,6 +167,23 @@ def build_file_dbs(input_files, output_dir):
 setup = snakemake.params.setup
 basedir = snakemake.params.basedir
 
+check_log_files(
+    snakemake.params.log_path,
+    snakemake.output.summary_log,
+    snakemake.output.gen_output,
+    warning_file=snakemake.output.warning_log,
+)
+
+if snakemake.wildcards.tier != "daq":
+    os.makedirs(snakemake.params.filedb_path, exist_ok=True)
+    with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as w:
+        json.dump(file_db_config, w, indent=2)
+
+    build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path)
+    os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json"))
+
+    build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path)
+
 if os.getenv("PRODENV") in snakemake.params.filedb_path:
     file_db_config = {
         "data_dir": "$PRODENV",
@@ -258,21 +275,4 @@ def build_file_dbs(input_files, output_dir):
         },
     }
 
-check_log_files(
-    snakemake.params.log_path,
-    snakemake.output.summary_log,
-    snakemake.output.gen_output,
-    warning_file=snakemake.output.warning_log,
-)
-
-if snakemake.wildcards.tier != "daq":
-    os.makedirs(snakemake.params.filedb_path, exist_ok=True)
-    with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as w:
-        json.dump(file_db_config, w, indent=2)
-
-    build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path)
-    os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json"))
-
-    build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path)
-
 pathlib.Path(snakemake.output.gen_output).touch()

From 1cac12c037b9619b399e09958fdecea597b7e0ff Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:04:08 +0200
Subject: [PATCH 089/103] add svm paths, update tmp paths and add concat paths

---
 scripts/util/patterns.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 9f4338a..91a4fa1 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -234,7 +234,7 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
         tier_path(setup) not in str(pathlib.Path(file_pattern).resolve())
         and check_in_cycle is True
     ):
-        return "/tmp/{experiment}-{period}-{run}-{datatype}-{timestamp}" + f"tier_{tier}.lh5"
+        return "/tmp/{experiment}-{period}-{run}-{datatype}-{timestamp}-" + f"tier_{tier}.lh5"
     else:
         return file_pattern
 
@@ -416,16 +416,37 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr
         and check_in_cycle is True
     ):
         if name is None:
-            return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}" + f"par_{tier}.{extension}"
+            return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}"
         else:
             return (
-                "/tmp/{experiment}-{period}-{run}-cal-{timestamp}"
+                "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-"
                 + f"par_{tier}_{name}.{extension}"
             )
     else:
         return file_pattern
 
 
+def get_pattern_pars_svm(setup, tier, name=None, ext="json"):
+    if name is not None:
+        return os.path.join(
+            f"{par_overwrite_path(setup)}",
+            tier,
+            "cal",
+            "{period}",
+            "{run}",
+            "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}",
+        )
+    else:
+        return os.path.join(
+            f"{par_overwrite_path(setup)}",
+            tier,
+            "cal",
+            "{period}",
+            "{run}",
+            "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}",
+        )
+
+
 def get_pattern_pars_overwrite(setup, tier, name=None):
     if name is not None:
         return os.path.join(

From 61c1acc94c1725678f2365d7f3f0a1990f9f1eb6 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:05:12 +0200
Subject: [PATCH 090/103] svm rules

---
 rules/dsp.smk |  67 ++++++++++++++++++++++-----------
 rules/psp.smk | 102 ++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 123 insertions(+), 46 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index bb05278..7617d48 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -7,6 +7,7 @@ Snakemake rules for processing dsp tier. This is done in 4 steps:
 """
 
 from scripts.util.pars_loading import pars_catalog
+from scripts.util.utils import par_dsp_path
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -19,6 +20,8 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp,
     get_pattern_log,
     get_pattern_pars,
+    get_pattern_pars_overwrite,
+    get_pattern_pars_svm,
 )
 
 
@@ -182,7 +185,7 @@ rule build_pars_dsp_eopt:
         datatype="cal",
         channel="{channel}",
     output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")),
         qbb_grid=temp(
             get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl")
         ),
@@ -209,26 +212,46 @@ rule build_pars_dsp_eopt:
         "--final_dsp_pars {output.dsp_pars}"
 
 
-# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
-# rule build_pars_dsp_svm:
-#     input:
-#         hyperpars="",
-#         train_data="",
-#     output:
-#         dsp_pars=get_pattern_pars(setup, "dsp", "svm"),
-#     log:
-#         get_pattern_log_channel(setup, "pars_dsp_svm"),
-#     group:
-#         "par-dsp"
-#     resources:
-#         runtime=300,
-#     shell:
-#         "{swenv} python3 -B "
-#         f"{workflow.source_path('../scripts/pars_dsp_svm.py')} "
-#         "--log {log} "
-#         "--train_data {input.train_data} "
-#         "--train_hyperpars {input.hyperpars} "
-#         "--output_file {output.dsp_pars}"
+rule build_svm_dsp:
+    input:
+        hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"),
+        train_data=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_train"),
+    output:
+        dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
+    log:
+        get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"),
+    group:
+        "par-dsp-svm"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/pars_dsp_build_svm.py')} "
+        "--log {log} "
+        "--train_data {input.train_data} "
+        "--train_hyperpars {input.hyperpars} "
+        "--output_file {output.dsp_pars}"
+
+
+rule build_pars_dsp_svm:
+    input:
+        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"),
+        svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"),
+    output:
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
+    log:
+        get_pattern_log_channel(setup, "pars_dsp_svm"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/pars_dsp_svm.py')} "
+        "--log {log} "
+        "--input_file {input.dsp_pars} "
+        "--output_file {output.dsp_pars} "
+        "--svm_file {input.svm_file}"
 
 
 rule build_plts_dsp:
@@ -353,4 +376,4 @@ rule build_dsp:
         "--input {input.raw_file} "
         "--output {output.tier_file} "
         "--db_file {output.db_file} "
-        "--pars_file {input.pars_file}"
+        "--pars_file {input.pars_file} "
diff --git a/rules/psp.smk b/rules/psp.smk
index 6b60b0e..6591f1b 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -20,6 +20,13 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
+pars_key_resolve.write_par_catalog(
+    ["-*-*-*-cal"],
+    os.path.join(pars_path(setup), "dsp", "validity.jsonl"),
+    get_pattern_tier_raw(setup),
+    {"cal": ["par_dsp"], "lar": ["par_dsp"]},
+)
+
 pars_key_resolve.write_par_catalog(
     ["-*-*-*-cal"],
     os.path.join(pars_path(setup), "psp", "validity.jsonl"),
@@ -34,7 +41,11 @@ for key, dataset in part.datasets.items():
         rule:
             input:
                 dsp_pars=part.get_par_files(
-                    f"{par_dsp_path(setup)}/validity.jsonl", partition, key, tier="dsp"
+                    f"{par_dsp_path(setup)}/validity.jsonl",
+                    partition,
+                    key,
+                    tier="dsp",
+                    name="eopt",
                 ),
                 dsp_objs=part.get_par_files(
                     f"{par_dsp_path(setup)}/validity.jsonl",
@@ -62,6 +73,7 @@ for key, dataset in part.datasets.items():
                         partition,
                         key,
                         tier="psp",
+                        name="eopt",
                     )
                 ),
                 psp_objs=temp(
@@ -121,7 +133,7 @@ for key, dataset in part.datasets.items():
 # This rule builds the a/e calibration using the calibration dsp files for the whole partition
 rule build_par_psp:
     input:
-        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp"),
+        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"),
         dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"),
         dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"),
     params:
@@ -129,7 +141,7 @@ rule build_par_psp:
         channel="{channel}",
         timestamp="{timestamp}",
     output:
-        psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
+        psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")),
         psp_objs=temp(
             get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl")
         ),
@@ -166,6 +178,48 @@ rule_order_list.append(fallback_psp_rule.name)
 workflow._ruleorder.add(*rule_order_list)  # [::-1]
 
 
+rule build_svm_psp:
+    input:
+        hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"),
+        train_data=lambda wildcards: get_svm_file(wildcards, "psp", "svm_train"),
+    output:
+        dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
+    log:
+        get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"),
+    group:
+        "par-dsp-svm"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/pars_dsp_build_svm.py')} "
+        "--log {log} "
+        "--train_data {input.train_data} "
+        "--train_hyperpars {input.hyperpars} "
+        "--output_file {output.dsp_pars}"
+
+
+rule build_pars_psp_svm:
+    input:
+        dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"),
+        svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"),
+    output:
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
+    log:
+        get_pattern_log_channel(setup, "pars_dsp_svm"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/pars_dsp_svm.py')} "
+        "--log {log} "
+        "--input_file {input.dsp_pars} "
+        "--output_file {output.dsp_pars} "
+        "--svm_file {input.svm_model}"
+
+
 rule build_pars_psp_objects:
     input:
         lambda wildcards: read_filelist_pars_cal_channel(
@@ -203,26 +257,26 @@ rule build_plts_psp:
         "--output {output} "
 
 
-# rule build_pars_psp:
-#     input:
-#         infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"),
-#         plts=get_pattern_plts(setup, "psp"),
-#         objects=get_pattern_pars(
-#             setup,
-#             "psp",
-#             name="objects",
-#             extension="dir",
-#             check_in_cycle=check_in_cycle,
-#         ),
-#     output:
-#         get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle),
-#     group:
-#         "merge-hit"
-#     shell:
-#         "{swenv} python3 -B "
-#         f"{basedir}/../scripts/merge_channels.py "
-#         "--input {input.infiles} "
-#         "--output {output} "
+rule build_pars_psp:
+    input:
+        infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"),
+        plts=get_pattern_plts(setup, "psp"),
+        objects=get_pattern_pars(
+            setup,
+            "psp",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        ),
+    output:
+        get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle),
+    group:
+        "merge-hit"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input.infiles} "
+        "--output {output} "
 
 
 rule build_psp:
@@ -256,4 +310,4 @@ rule build_psp:
         "--input {input.raw_file} "
         "--output {output.tier_file} "
         "--db_file {output.db_file} "
-        "--pars_file {input.pars_file}"
+        "--pars_file {input.pars_file} "

From c3dc6ef2045fe2ae13e10fcb79d2ae7b284057a4 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:05:41 +0200
Subject: [PATCH 091/103] add overwrites for qc

---
 rules/pht.smk | 52 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/rules/pht.smk b/rules/pht.smk
index cbb05e4..e3efae7 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -65,6 +65,15 @@ for key, dataset in part.datasets.items():
                     tier="pht",
                     name="check",
                 ),
+                overwrite_files=get_overwrite_file(
+                    "pht",
+                    timestamp=part.get_timestamp(
+                        f"{par_pht_path(setup)}/validity.jsonl",
+                        partition,
+                        key,
+                        tier="pht",
+                    ),
+                ),
             wildcard_constraints:
                 channel=part.get_wildcard_constraints(partition, key),
             params:
@@ -117,6 +126,7 @@ for key, dataset in part.datasets.items():
                 "--channel {params.channel} "
                 "--save_path {output.hit_pars} "
                 "--plot_path {output.plot_file} "
+                "--overwrite_files {input.overwrite_files} "
                 "--pulser_files {input.pulser_files} "
                 "--fft_files {input.fft_files} "
                 "--cal_files {input.cal_files}"
@@ -143,6 +153,7 @@ rule build_pht_qc:
         ),
         pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"),
+        overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards),
     params:
         datatype="cal",
         channel="{channel}",
@@ -167,6 +178,7 @@ rule build_pht_qc:
         "--channel {params.channel} "
         "--save_path {output.hit_pars} "
         "--plot_path {output.plot_file} "
+        "--overwrite_files {input.overwrite_files} "
         "--pulser_files {input.pulser_files} "
         "--fft_files {input.fft_files} "
         "--cal_files {input.cal_files}"
@@ -810,26 +822,26 @@ rule build_plts_pht:
         "--output {output} "
 
 
-rule build_pars_pht:
-    input:
-        infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"),
-        plts=get_pattern_plts(setup, "pht"),
-        objects=get_pattern_pars(
-            setup,
-            "pht",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    output:
-        get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle),
-    group:
-        "merge-hit"
-    shell:
-        "{swenv} python3 -B "
-        f"{basedir}/../scripts/merge_channels.py "
-        "--input {input.infiles} "
-        "--output {output} "
+# rule build_pars_pht:
+#     input:
+#         infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "pht"),
+#         plts=get_pattern_plts(setup, "pht"),
+#         objects=get_pattern_pars(
+#             setup,
+#             "pht",
+#             name="objects",
+#             extension="dir",
+#             check_in_cycle=check_in_cycle,
+#         ),
+#     output:
+#         get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle),
+#     group:
+#         "merge-hit"
+#     shell:
+#         "{swenv} python3 -B "
+#         f"{basedir}/../scripts/merge_channels.py "
+#         "--input {input.infiles} "
+#         "--output {output} "
 
 
 rule build_pht:

From 6102536d0ba18e58572335855a6c68c29e18e786 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:05:57 +0200
Subject: [PATCH 092/103] func for svm

---
 rules/common.smk | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/rules/common.smk b/rules/common.smk
index 6359ded..6cb5d40 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -11,6 +11,7 @@ from scripts.util.patterns import (
     get_pattern_tier_raw,
     get_pattern_plts_tmp_channel,
 )
+from scripts.util import ProcessingFileKey
 
 
 def read_filelist(wildcards):
@@ -133,3 +134,38 @@ def set_last_rule_name(workflow, new_name):
         workflow._localrules.add(new_name)
 
     workflow.check_localrules()
+
+
+def get_svm_file(wildcards, tier, name):
+    par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl")
+    pars_files_overwrite = pars_catalog.get_calib_files(
+        par_overwrite_file, wildcards.timestamp
+    )
+    for pars_file in pars_files_overwrite:
+        if name in pars_file:
+            return os.path.join(par_overwrite_path(setup), tier, pars_file)
+    raise ValueError(f"Could not find model in {pars_files_overwrite}")
+
+
+def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
+    par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl")
+    if timestamp is not None:
+        pars_files_overwrite = pars_catalog.get_calib_files(
+            par_overwrite_file, timestamp
+        )
+    else:
+        pars_files_overwrite = pars_catalog.get_calib_files(
+            par_overwrite_file, wildcards.timestamp
+        )
+    if name is None:
+        fullname = f"{tier}-overwrite.json"
+    else:
+        fullname = f"{tier}_{name}-overwrite.json"
+    out_files = []
+    for pars_file in pars_files_overwrite:
+        if fullname in pars_file:
+            out_files.append(os.path.join(par_overwrite_path(setup), tier, pars_file))
+    if len(out_files) == 0:
+        raise ValueError(f"Could not find name in {pars_files_overwrite}")
+    else:
+        return out_files

From 5149b694674f1372a4692203780ea5e4a5a1733c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 24 Apr 2024 18:06:22 +0200
Subject: [PATCH 093/103] increase number simultaneous jobs

---
 rules/evt.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/evt.smk b/rules/evt.smk
index 2e29306..7454957 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -42,7 +42,7 @@ for tier in ("evt", "pet"):
             "tier-evt"
         resources:
             runtime=300,
-            mem_swap=70,
+            mem_swap=50,
         shell:
             "{swenv} python3 -B "
             f"{workflow.source_path('../scripts/build_evt.py')} "

From 5de206ce8f3402f33c9ccd2588bfce22bb5eb296 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 26 Apr 2024 15:37:14 +0200
Subject: [PATCH 094/103] update ac to do top fit

---
 scripts/pars_hit_ecal.py    | 13 ++++++++++---
 scripts/pars_pht_partcal.py | 12 ++++++++++--
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index e84e51f..b324b62 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -532,7 +532,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         (1592.53, (40, 20), pgf.gauss_on_step),
         (1620.50, (20, 40), pgf.gauss_on_step),
         (2103.53, (40, 40), pgf.gauss_on_step),
-        (2614.50, (60, 60), pgf.hpge_peak),
+        (2614.553, (60, 60), pgf.hpge_peak),
     ]
 
     glines = [pk_par[0] for pk_par in pk_pars]
@@ -570,14 +570,21 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
             e_uncal, etol_kev=5 if det_status == "on" else 20
         )
-        if 2614.50 not in full_object_dict[cal_energy_param].peaks_kev:
+        if 2614.553 not in full_object_dict[cal_energy_param].peaks_kev:
             full_object_dict[cal_energy_param].hpge_get_energy_peaks(
                 e_uncal, peaks_kev=glines, etol_kev=5 if det_status == "on" else 30, n_sigma=2
             )
         got_peaks_kev = full_object_dict[cal_energy_param].peaks_kev.copy()
+        if det_status != "on":
+            full_object_dict[cal_energy_param].hpge_cal_energy_peak_tops(
+                e_uncal,
+                peaks_kev=got_peaks_kev,
+                update_cal_pars=True,
+                allowed_p_val=0,
+            )
         full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
             e_uncal,
-            peaks_kev=[2614.50],
+            peaks_kev=[2614.553],
             peak_pars=pk_pars,
             tail_weight=kwarg_dict.get("tail_weight", 0),
             n_events=kwarg_dict.get("n_events", None),
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 73461f4..21a2654 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -308,7 +308,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         (1592.53, (30, 20), pgf.hpge_peak),
         (1620.50, (20, 30), pgf.hpge_peak),
         (2103.53, (30, 30), pgf.hpge_peak),
-        (2614.50, (30, 30), pgf.hpge_peak),
+        (2614.553, (30, 30), pgf.hpge_peak),
         (3125, (30, 30), pgf.gauss_on_step),
         (3198, (30, 30), pgf.gauss_on_step),
         (3474, (30, 30), pgf.gauss_on_step),
@@ -330,11 +330,19 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params):
         energy = data.query(selection_string)[energy_param].to_numpy()
         full_object_dict[cal_energy_param] = HPGeCalibration(
-            energy_param, glines, 1, kwarg_dict.get("deg", 0), fixed={1: 1}
+            energy_param, glines, 1, kwarg_dict.get("deg", 0)  # , fixed={1: 1}
         )
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
             energy, etol_kev=5 if det_status == "on" else 10
         )
+
+        if det_status != "on":
+            full_object_dict[cal_energy_param].hpge_cal_energy_peak_tops(
+                energy,
+                update_cal_pars=True,
+                allowed_p_val=0,
+            )
+
         full_object_dict[cal_energy_param].hpge_fit_energy_peaks(
             energy,
             peak_pars=pk_pars,

From b0530263fd7267808378368cfbb73c3f44604a19 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 26 Apr 2024 15:37:32 +0200
Subject: [PATCH 095/103] fix psp merging

---
 rules/psp.smk | 49 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/rules/psp.smk b/rules/psp.smk
index 6591f1b..de08064 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -235,7 +235,7 @@ rule build_pars_psp_objects:
             check_in_cycle=check_in_cycle,
         ),
     group:
-        "merge-hit"
+        "merge-psp"
     shell:
         "{swenv} python3 -B "
         f"{basedir}/../scripts/merge_channels.py "
@@ -249,7 +249,27 @@ rule build_plts_psp:
     output:
         get_pattern_plts(setup, "psp"),
     group:
-        "merge-hit"
+        "merge-psp"
+    shell:
+        "{swenv} python3 -B "
+        f"{basedir}/../scripts/merge_channels.py "
+        "--input {input} "
+        "--output {output} "
+
+
+rule build_pars_psp_db:
+    input:
+        lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"),
+    output:
+        temp(
+            get_pattern_pars_tmp(
+                setup,
+                "psp",
+                datatype="cal",
+            )
+        ),
+    group:
+        "merge-psp"
     shell:
         "{swenv} python3 -B "
         f"{basedir}/../scripts/merge_channels.py "
@@ -259,7 +279,14 @@ rule build_plts_psp:
 
 rule build_pars_psp:
     input:
-        infiles=lambda wildcards: read_filelist_pars_cal_channel(wildcards, "psp"),
+        in_files=lambda wildcards: read_filelist_pars_cal_channel(
+            wildcards, "dsp_dplms_lh5"
+        ),
+        in_db=get_pattern_pars_tmp(
+            setup,
+            "psp",
+            datatype="cal",
+        ),
         plts=get_pattern_plts(setup, "psp"),
         objects=get_pattern_pars(
             setup,
@@ -269,14 +296,22 @@ rule build_pars_psp:
             check_in_cycle=check_in_cycle,
         ),
     output:
-        get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle),
+        out_file=get_pattern_pars(
+            setup,
+            "psp",
+            extension="lh5",
+            check_in_cycle=check_in_cycle,
+        ),
+        out_db=get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle),
     group:
-        "merge-hit"
+        "merge-psp"
     shell:
         "{swenv} python3 -B "
         f"{basedir}/../scripts/merge_channels.py "
-        "--input {input.infiles} "
-        "--output {output} "
+        "--output {output.out_file} "
+        "--in_db {input.in_db} "
+        "--out_db {output.out_db} "
+        "--input {input.in_files} "
 
 
 rule build_psp:

From 8ef060e99e5508ce46ffde8133c412d8380f35b6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 26 Apr 2024 13:55:51 +0000
Subject: [PATCH 096/103] style: pre-commit fixes

---
 Snakefile      | 4 ++--
 rules/main.smk | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Snakefile b/Snakefile
index 67bfaba..3c7d486 100644
--- a/Snakefile
+++ b/Snakefile
@@ -95,7 +95,7 @@ onstart:
 onsuccess:
     from snakemake.report import auto_report
 
-    rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
+    rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow() , '%Y%m%dT%H%M%SZ')}"
     pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True)
     # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html")
     with open(os.path.join(rep_dir, "dag.txt"), "w") as f:
@@ -112,7 +112,7 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-        # remove filelists
+            # remove filelists
     files = glob.glob(os.path.join(filelist_path(setup), "*"))
     for file in files:
         if os.path.isfile(file):
diff --git a/rules/main.smk b/rules/main.smk
index b67ea46..86d940a 100644
--- a/rules/main.smk
+++ b/rules/main.smk
@@ -29,10 +29,10 @@ rule autogen_output:
         gen_output="{label}-{tier}.gen",
         summary_log=f"{log_path(setup)}/summary-"
         + "{label}-{tier}"
-        + f"-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}.log",
+        + f"-{datetime.strftime(datetime.utcnow() , '%Y%m%dT%H%M%SZ')}.log",
         warning_log=f"{log_path(setup)}/warning-"
         + "{label}-{tier}"
-        + f"-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}.log",
+        + f"-{datetime.strftime(datetime.utcnow() , '%Y%m%dT%H%M%SZ')}.log",
     params:
         log_path=tmp_log_path(setup),
         tmp_par_path=os.path.join(tmp_par_path(setup), "*_db.json"),

From c88b3c8c85925fe13ff1b0649cf724ad28e66e0d Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 28 Apr 2024 00:09:22 +0200
Subject: [PATCH 097/103] update svm rules and increase dsp jobs

---
 rules/dsp.smk | 4 ++--
 rules/psp.smk | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 7617d48..5f4f355 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -215,7 +215,7 @@ rule build_pars_dsp_eopt:
 rule build_svm_dsp:
     input:
         hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_train"),
+        train_data=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars").replace("hyperpars.json", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
@@ -365,7 +365,7 @@ rule build_dsp:
         "tier-dsp"
     resources:
         runtime=300,
-        mem_swap=50,
+        mem_swap=40,
     shell:
         "{swenv} python3 -B "
         f"{workflow.source_path('../scripts/build_dsp.py')} "
diff --git a/rules/psp.smk b/rules/psp.smk
index de08064..7ec81a2 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -181,7 +181,7 @@ workflow._ruleorder.add(*rule_order_list)  # [::-1]
 rule build_svm_psp:
     input:
         hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(wildcards, "psp", "svm_train"),
+        train_data=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars").replace("hyperpars.json", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
@@ -329,12 +329,12 @@ rule build_psp:
         tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle),
         db_file=get_pattern_pars_tmp(setup, "psp_db"),
     log:
-        get_pattern_log(setup, "tier_dsp"),
+        get_pattern_log(setup, "tier_psp"),
     group:
         "tier-dsp"
     resources:
         runtime=300,
-        mem_swap=50,
+        mem_swap=40,
     shell:
         "{swenv} python3 -B "
         f"{workflow.source_path('../scripts/build_dsp.py')} "

From b26896f385ac8710ae4bd91918300869cc277541 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 28 Apr 2024 00:09:37 +0200
Subject: [PATCH 098/103] bugfix for merging

---
 scripts/merge_channels.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index c2698eb..b169d29 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -20,7 +20,7 @@ def replace_path(d, old_path, new_path):
             d[i] = replace_path(d[i], old_path, new_path)
     elif isinstance(d, str) and old_path in d:
         d = d.replace(old_path, new_path)
-        d = f"$_/{os.path.basename(new_path)}"
+        d = d.replace(new_path, f"$_/{os.path.basename(new_path)}")
     return d
 
 

From 7c4435b3914ac843a46be53ca6fc4e980f1953dc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 28 Apr 2024 00:09:46 +0200
Subject: [PATCH 099/103] update to latest versions

---
 templates/config.json | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/templates/config.json b/templates/config.json
index 1884061..86091e0 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -53,11 +53,11 @@
         "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif"
       },
       "pkg_versions": {
-        "pygama": "pygama==1.6.0",
-        "pylegendmeta": "pylegendmeta==0.9.0",
-        "dspeed": "dspeed==1.3.0",
-        "legend-pydataobj": "legend-pydataobj==1.5.1",
-        "legend-daq2lh5": "legend-daq2lh5==1.2.0"
+        "pygama": "pygama==2.0.0a1",
+        "pylegendmeta": "pylegendmeta==0.10.0",
+        "dspeed": "dspeed==1.3.0a6",
+        "legend-pydataobj": "legend-pydataobj==1.6.1",
+        "legend-daq2lh5": "legend-daq2lh5==1.2.1"
       }
     }
   }

From c775e01ee32454475dc4dd7e4e4b9021de311891 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 28 Apr 2024 00:11:56 +0200
Subject: [PATCH 100/103] pc fixes

---
 rules/dsp.smk | 4 +++-
 rules/psp.smk | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 5f4f355..3a917b6 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -215,7 +215,9 @@ rule build_pars_dsp_eopt:
 rule build_svm_dsp:
     input:
         hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars").replace("hyperpars.json", "train.lh5"),
+        train_data=lambda wildcards: get_svm_file(
+            wildcards, "dsp", "svm_hyperpars"
+        ).replace("hyperpars.json", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
diff --git a/rules/psp.smk b/rules/psp.smk
index 7ec81a2..84c3f03 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -181,7 +181,9 @@ workflow._ruleorder.add(*rule_order_list)  # [::-1]
 rule build_svm_psp:
     input:
         hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars").replace("hyperpars.json", "train.lh5"),
+        train_data=lambda wildcards: get_svm_file(
+            wildcards, "psp", "svm_hyperpars"
+        ).replace("hyperpars.json", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:

From 5cd871f06d19bbba490565db57eb6cb990777dd6 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 29 Apr 2024 12:57:28 +0200
Subject: [PATCH 101/103] dsp job increase

---
 rules/dsp.smk | 2 +-
 rules/psp.smk | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 3a917b6..d44a6db 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -367,7 +367,7 @@ rule build_dsp:
         "tier-dsp"
     resources:
         runtime=300,
-        mem_swap=40,
+        mem_swap=25,
     shell:
         "{swenv} python3 -B "
         f"{workflow.source_path('../scripts/build_dsp.py')} "
diff --git a/rules/psp.smk b/rules/psp.smk
index 84c3f03..d581107 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -336,7 +336,7 @@ rule build_psp:
         "tier-dsp"
     resources:
         runtime=300,
-        mem_swap=40,
+        mem_swap=25,
     shell:
         "{swenv} python3 -B "
         f"{workflow.source_path('../scripts/build_dsp.py')} "

From a049094e66ed338d6ba2378838919562bdd228d6 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 29 Apr 2024 12:59:38 +0200
Subject: [PATCH 102/103] bugfix

---
 scripts/complete_run.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index da65b49..722b244 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -174,16 +174,6 @@ def build_file_dbs(input_files, output_dir):
     warning_file=snakemake.output.warning_log,
 )
 
-if snakemake.wildcards.tier != "daq":
-    os.makedirs(snakemake.params.filedb_path, exist_ok=True)
-    with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as w:
-        json.dump(file_db_config, w, indent=2)
-
-    build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path)
-    os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json"))
-
-    build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path)
-
 if os.getenv("PRODENV") in snakemake.params.filedb_path:
     file_db_config = {
         "data_dir": "$PRODENV",
@@ -275,4 +265,14 @@ def build_file_dbs(input_files, output_dir):
         },
     }
 
+if snakemake.wildcards.tier != "daq":
+    os.makedirs(snakemake.params.filedb_path, exist_ok=True)
+    with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as w:
+        json.dump(file_db_config, w, indent=2)
+
+    build_file_dbs(snakemake.params.tmp_par_path, snakemake.params.filedb_path)
+    os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json"))
+
+    build_valid_keys(snakemake.params.tmp_par_path, snakemake.params.valid_keys_path)
+
 pathlib.Path(snakemake.output.gen_output).touch()

From 90a38fae4e0041376861d5c9857f1f94737dbcfe Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 29 Apr 2024 13:00:04 +0200
Subject: [PATCH 103/103] increment dspeed version

---
 templates/config.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/config.json b/templates/config.json
index 86091e0..5c46803 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -55,7 +55,7 @@
       "pkg_versions": {
         "pygama": "pygama==2.0.0a1",
         "pylegendmeta": "pylegendmeta==0.10.0",
-        "dspeed": "dspeed==1.3.0a6",
+        "dspeed": "dspeed==1.4.0a1",
         "legend-pydataobj": "legend-pydataobj==1.6.1",
         "legend-daq2lh5": "legend-daq2lh5==1.2.1"
       }