add averaging consecutive and interpolation between consecutive samples

legend-exp · Nov 18, 2024 · c7b6d7d · c7b6d7d
1 parent e82937b
commit c7b6d7d
Showing 1 changed file with 151 additions and 10 deletions.
diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py
@@ -421,8 +421,18 @@ def unbinned_aoe_fit(
 
     fit2 = (m2.values, m2.errors, m2.covariance, cs2, pdf, mask, valid2, m2)
 
-    frac_errors1 = np.sum(np.abs(np.array(m.errors)[mask] / np.array(m.values)[mask]))
-    frac_errors2 = np.sum(np.abs(np.array(m2.errors)[mask] / np.array(m2.values)[mask]))
+    frac_errors1 = np.sum(
+        np.abs(
+            np.array(m.errors)[mask & (np.array(m.values) > 0)]
+            / np.array(m.values)[mask & (np.array(m.values) > 0)]
+        )
+    )
+    frac_errors2 = np.sum(
+        np.abs(
+            np.array(m2.errors)[mask & (np.array(m2.values) > 0)]
+            / np.array(m2.values)[mask & (np.array(m2.values) > 0)]
+        )
+    )
 
     if valid2 is False:
         fit = fit1
@@ -545,6 +555,73 @@ def fit_time_means(tstamps, means, sigmas):
     return out_dict
 
 
+def average_consecutive(tstamps, means):
+    """
+    Fit the time dependence of the means of the A/E distribution by average consecutive entries
+
+    Parameters
+    ----------
+    tstamps: np.array
+        Timestamps of the data
+    means: np.array
+        Means of the A/E distribution
+    sigmas: np.array
+        Sigmas of the A/E distribution
+
+    Returns: dict
+        Dictionary of the time dependence of the means
+    """
+    out_dict = {}
+    for i, tstamp in enumerate(tstamps):
+        if i + 1 == len(means):
+            out_dict[tstamp] = means[i]
+        else:
+            out_dict[tstamp] = np.mean([means[i], means[i + 1]])
+    return out_dict
+
+
+def interpolate_consecutive(tstamps, means, times, aoe_param, output_name):
+    """
+    Fit the time dependence of the means of the A/E distribution by average consecutive entries
+
+    Parameters
+    ----------
+    tstamps: np.array
+        Timestamps of the data
+    means: np.array
+        Means of the A/E distribution
+    sigmas: np.array
+        Sigmas of the A/E distribution
+    times: np.array
+        Times of the mean samples in unix time format
+
+    Returns: dict
+        Dictionary of the time dependence of the means
+    """
+    out_dict = {}
+    for i, tstamp in enumerate(tstamps):
+        if i + 1 == len(means):
+            out_dict[tstamp] = {
+                output_name: {
+                    "expression": f"{aoe_param}/a",
+                    "parameters": {"a": means[i]},
+                }
+            }
+        else:
+            out_dict[tstamp] = {
+                output_name: {
+                    "expression": f"{aoe_param} / ((timestamp - a ) * b + c) ",
+                    "parameters": {
+                        "a": times[i],
+                        "b": (means[i + 1] - means[i]) / (times[i + 1] - times[i]),
+                        "c": means[i],
+                    },
+                }
+            }
+
+    return out_dict
+
+
 class CalAoE:
     """
     Class for calibrating the A/E,
@@ -677,6 +754,8 @@ def time_correction(
                      then group and take mean otherwise when a run higher than 0.4 sigma
                      is found if it is a single run set to nan otherwise start a new block
             full : each run will be corrected individually
+            average_consecutive: average the consecutive centroids
+            interpolate_consecutive: interpolate between the consecutive centroids
         output_name: str
             Name of the output parameter for the time corrected A/E in the dataframe and to
             be added to the calibration dictionary
@@ -749,6 +828,15 @@ def time_correction(
                             np.array(self.timecorr_df["mean"]),
                             np.array(self.timecorr_df["sigma"]),
                         )
+                        final_time_dict = {
+                            tstamp: {
+                                output_name: {
+                                    "expression": f"{aoe_param}/a",
+                                    "parameters": {"a": t_dict},
+                                }
+                            }
+                            for tstamp, t_dict in time_dict.items()
+                        }
 
                     elif mode == "full":
                         time_dict = {
@@ -758,21 +846,37 @@ def time_correction(
                                 np.array(self.timecorr_df["mean"]),
                             )
                         }
+                        final_time_dict = {
+                            tstamp: {
+                                output_name: {
+                                    "expression": f"{aoe_param}/a",
+                                    "parameters": {"a": t_dict},
+                                }
+                            }
+                            for tstamp, t_dict in time_dict.items()
+                        }
 
                     elif mode == "none":
                         time_dict = {
                             time: np.nanmean(self.timecorr_df["mean"])
                             for time in np.array(self.timecorr_df.index)
                         }
+                        final_time_dict = {
+                            tstamp: {
+                                output_name: {
+                                    "expression": f"{aoe_param}/a",
+                                    "parameters": {"a": t_dict},
+                                }
+                            }
+                            for tstamp, t_dict in time_dict.items()
+                        }
 
-                    else:
-                        raise ValueError("unknown mode")
-
-                    df[output_name] = df[aoe_param] / np.array(
-                        [time_dict[tstamp] for tstamp in df["run_timestamp"]]
-                    )
-                    self.update_cal_dicts(
-                        {
+                    elif mode == "average_consecutive":
+                        time_dict = average_consecutive(
+                            np.array(self.timecorr_df.index),
+                            np.array(self.timecorr_df["mean"]),
+                        )
+                        final_time_dict = {
                             tstamp: {
                                 output_name: {
                                     "expression": f"{aoe_param}/a",
@@ -781,7 +885,44 @@ def time_correction(
                             }
                             for tstamp, t_dict in time_dict.items()
                         }
+
+                    elif mode == "interpolate_consecutive":
+                        if "timestamp" in df:
+                            times = []
+                            for tstamp in np.array(self.timecorr_df.index):
+                                times.append(
+                                    np.nanmedian(
+                                        df.query(f"run_timestamp=='{tstamp}'")[
+                                            "timestamp"
+                                        ]
+                                    )
+                                )
+                            final_time_dict = interpolate_consecutive(
+                                np.array(self.timecorr_df.index),
+                                np.array(self.timecorr_df["mean"]),
+                                times,
+                                aoe_param,
+                                output_name,
+                            )
+                            time_dict = {
+                                time: mean
+                                for time, mean in zip(
+                                    np.array(self.timecorr_df.index),
+                                    np.array(self.timecorr_df["mean"]),
+                                )
+                            }
+                        else:
+                            raise ValueError(
+                                "need timestamp column in dataframe for interpolation"
+                            )
+
+                    else:
+                        raise ValueError("unknown mode")
+
+                    df[output_name] = df[aoe_param] / np.array(
+                        [time_dict[tstamp] for tstamp in df["run_timestamp"]]
                     )
+                    self.update_cal_dicts(final_time_dict)
                 else:
                     df[output_name] = (
                         df[aoe_param] / np.array(self.timecorr_df["mean"])[0]