Merge pull request #411 from OpenCOMPES/energy_calibration_bias_shift

Energy calibration bias shift
OpenCOMPES · Jul 16, 2024 · cb7c97a · cb7c97a
2 parents 3eb5dea + c717114
commit cb7c97a
Show file tree

Hide file tree

Showing 17 changed files with 505 additions and 263 deletions.
diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt
@@ -38,6 +38,7 @@ caldir
 calib
 calibdict
 caplog
+capsys
 cdeform
 cdeformfield
 cdisp
@@ -77,6 +78,7 @@ datastreams
 datestring
 ddir
 delaxes
+delayeds
 Desy
 dfield
 dfops

diff --git a/benchmarks/benchmark_sed.py b/benchmarks/benchmark_sed.py
@@ -121,6 +121,7 @@ def test_workflow_1d() -> None:
         system_config={},
         verbose=True,
     )
+    processor.dataframe["sampleBias"] = 16.7
     processor.add_jitter()
     processor.apply_momentum_correction()
     processor.apply_momentum_calibration()
@@ -155,6 +156,7 @@ def test_workflow_4d() -> None:
         system_config={},
         verbose=True,
     )
+    processor.dataframe["sampleBias"] = 16.7
     processor.add_jitter()
     processor.apply_momentum_correction()
     processor.apply_momentum_calibration()

diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py
@@ -4,7 +4,6 @@
 from __future__ import annotations
 
 import itertools as it
-import warnings as wn
 from collections.abc import Sequence
 from copy import deepcopy
 from datetime import datetime
@@ -505,7 +504,7 @@ def feature_extract(
 
     def calibrate(
         self,
-        ref_id: int = 0,
+        ref_energy: float = 0,
         method: str = "lmfit",
         energy_scale: str = "kinetic",
         landmarks: np.ndarray = None,
@@ -518,8 +517,7 @@ def calibrate(
         scale using optimization methods.
 
         Args:
-            ref_id (int, optional): The reference trace index (an integer).
-                Defaults to 0.
+            ref_energy (float): Binding/kinetic energy of the detected feature.
             method (str, optional):  Method for determining the energy calibration.
 
                 - **'lmfit'**: Energy calibration using lmfit and 1/t^2 form.
@@ -574,7 +572,7 @@ def calibrate(
                 sign * biases,
                 binwidth,
                 binning,
-                ref_id=ref_id,
+                ref_energy=ref_energy,
                 t=t,
                 energy_scale=energy_scale,
                 verbose=verbose,
@@ -584,7 +582,7 @@ def calibrate(
             self.calibration = poly_energy_calibration(
                 landmarks,
                 sign * biases,
-                ref_id=ref_id,
+                ref_energy=ref_energy,
                 aug=self.dup,
                 method=method,
                 t=t,
@@ -652,7 +650,7 @@ def view(  # pylint: disable=dangerous-default-value
             for itr, trace in enumerate(traces):
                 if align:
                     ax.plot(
-                        xaxis + sign * (self.biases[itr] - self.biases[self.calibration["refid"]]),
+                        xaxis + sign * (self.biases[itr]),
                         trace,
                         ls="-",
                         linewidth=1,
@@ -715,7 +713,7 @@ def view(  # pylint: disable=dangerous-default-value
                 trace = traces[itr, :]
                 if align:
                     fig.line(
-                        xaxis + sign * (self.biases[itr] - self.biases[self.calibration["refid"]]),
+                        xaxis + sign * (self.biases[itr]),
                         trace,
                         color=color,
                         line_dash="solid",
@@ -774,6 +772,7 @@ def append_energy_axis(
         tof_column: str = None,
         energy_column: str = None,
         calibration: dict = None,
+        bias_voltage: float = None,
         verbose: bool = True,
         **kwds,
     ) -> tuple[pd.DataFrame | dask.dataframe.DataFrame, dict]:
@@ -789,6 +788,9 @@ def append_energy_axis(
             calibration (dict, optional): Calibration dictionary. If provided,
                 overrides calibration from class or config.
                 Defaults to self.calibration or config["energy"]["calibration"].
+            bias_voltage (float, optional): Sample bias voltage of the scan data. If omitted,
+                the bias voltage is being read from the dataframe. If it is not found there,
+                a warning is printed and the calibrated data might have an offset.
             verbose (bool, optional): Option to print out diagnostic information.
                 Defaults to True.
             **kwds: additional keyword arguments for the energy conversion. They are
@@ -838,6 +840,8 @@ def append_energy_axis(
 
             elif "coeffs" in calibration and "E0" in calibration:
                 calibration["calib_type"] = "poly"
+                if "energy_scale" not in calibration:
+                    calibration["energy_scale"] = "kinetic"
             else:
                 raise ValueError("No valid calibration parameters provided!")
 
@@ -877,6 +881,20 @@ def append_energy_axis(
         else:
             raise NotImplementedError
 
+        # apply bias offset
+        scale_sign: Literal[-1, 1] = -1 if calibration["energy_scale"] == "binding" else 1
+        if bias_voltage is not None:
+            df[energy_column] = df[energy_column] + scale_sign * bias_voltage
+        elif self._config["dataframe"]["bias_column"] in df.columns:
+            df = dfops.offset_by_other_columns(
+                df=df,
+                target_column=energy_column,
+                offset_columns=self._config["dataframe"]["bias_column"],
+                weights=scale_sign,
+            )
+        else:
+            print("Sample bias data not found or provided. Calibrated energy might be incorrect.")
+
         metadata = self.gather_calibration_metadata(calibration)
 
         return df, metadata
@@ -1527,6 +1545,9 @@ def add_offsets(
             offsets["creation_date"] = datetime.now().timestamp()
             # column-based offsets
             if columns is not None:
+                if isinstance(columns, str):
+                    columns = [columns]
+
                 if weights is None:
                     weights = 1
                 if isinstance(weights, (int, float, np.integer, np.floating)):
@@ -1538,10 +1559,13 @@ def add_offsets(
                 if not all(isinstance(s, (int, float, np.integer, np.floating)) for s in weights):
                     raise TypeError(f"Invalid type for weights: {type(weights)}")
 
-                if isinstance(columns, str):
-                    columns = [columns]
-                if isinstance(preserve_mean, bool):
-                    preserve_mean = [preserve_mean] * len(columns)
+                if preserve_mean is None:
+                    preserve_mean = False
+                if not isinstance(preserve_mean, Sequence):
+                    preserve_mean = [preserve_mean]
+                if len(preserve_mean) == 1:
+                    preserve_mean = [preserve_mean[0]] * len(columns)
+
                 if not isinstance(reductions, Sequence):
                     reductions = [reductions]
                 if len(reductions) == 1:
@@ -1625,10 +1649,7 @@ def add_offsets(
         if constant:
             if not isinstance(constant, (int, float, np.integer, np.floating)):
                 raise TypeError(f"Invalid type for constant: {type(constant)}")
-            df[energy_column] = df.map_partitions(
-                lambda x: x[energy_column] + constant,
-                meta=(energy_column, np.float64),
-            )
+            df[energy_column] = df[energy_column] + constant
 
         self.offsets = offsets
         metadata["offsets"] = offsets
@@ -2082,8 +2103,7 @@ def fit_energy_calibration(
     vals: list[float] | np.ndarray,
     binwidth: float,
     binning: int,
-    ref_id: int = 0,
-    ref_energy: float = None,
+    ref_energy: float,
     t: list[float] | np.ndarray = None,
     energy_scale: str = "kinetic",
     verbose: bool = True,
@@ -2100,9 +2120,8 @@ def fit_energy_calibration(
             each EDC.
         binwidth (float): Time width of each original TOF bin in ns.
         binning (int): Binning factor of the TOF values.
-        ref_id (int, optional): Reference dataset index. Defaults to 0.
-        ref_energy (float, optional): Energy value of the feature in the reference
-            trace (eV). required to output the calibration. Defaults to None.
+        ref_energy (float): Energy value of the feature in the reference
+            trace (eV).
         t (list[float] | np.ndarray, optional): Array of TOF values. Required
             to calculate calibration trace. Defaults to None.
         energy_scale (str, optional): Direction of increasing energy scale.
@@ -2127,14 +2146,6 @@ def fit_energy_calibration(
         - "axis": Fitted energy axis.
     """
     vals = np.asarray(vals)
-    nvals = vals.size
-
-    if ref_id >= nvals:
-        wn.warn(
-            "Reference index (refid) cannot be larger than the number of traces!\
-                Reset to the largest allowed number.",
-        )
-        ref_id = nvals - 1
 
     def residual(pars, time, data, binwidth, binning, energy_scale):
         model = tof2ev(
@@ -2200,22 +2211,20 @@ def residual(pars, time, data, binwidth, binning, energy_scale):
     ecalibdict["t0"] = result.params["t0"].value
     ecalibdict["E0"] = result.params["E0"].value
     ecalibdict["energy_scale"] = energy_scale
+    energy_offset = pfunc(-1 * ref_energy, pos[0])
+    ecalibdict["E0"] = -(energy_offset - vals[0])
 
-    if (ref_energy is not None) and (t is not None):
-        energy_offset = pfunc(-1 * ref_energy, pos[ref_id])
-        ecalibdict["axis"] = pfunc(-energy_offset, t)
-        ecalibdict["E0"] = -energy_offset
-        ecalibdict["refid"] = ref_id
+    if t is not None:
+        ecalibdict["axis"] = pfunc(ecalibdict["E0"], t)
 
     return ecalibdict
 
 
 def poly_energy_calibration(
     pos: list[float] | np.ndarray,
     vals: list[float] | np.ndarray,
+    ref_energy: float,
     order: int = 3,
-    ref_id: int = 0,
-    ref_energy: float = None,
     t: list[float] | np.ndarray = None,
     aug: int = 1,
     method: str = "lstsq",
@@ -2235,10 +2244,9 @@ def poly_energy_calibration(
             (e.g. peaks) in the EDCs.
         vals (list[float] | np.ndarray): Bias voltage value associated with
             each EDC.
+        ref_energy (float): Energy value of the feature in the reference
+            trace (eV).
         order (int, optional): Polynomial order of the fitting function. Defaults to 3.
-        ref_id (int, optional): Reference dataset index. Defaults to 0.
-        ref_energy (float, optional): Energy value of the feature in the reference
-            trace (eV). required to output the calibration. Defaults to None.
         t (list[float] | np.ndarray, optional): Array of TOF values. Required
             to calculate calibration trace. Defaults to None.
         aug (int, optional): Fitting dimension augmentation
@@ -2266,21 +2274,14 @@ def poly_energy_calibration(
     vals = np.asarray(vals)
     nvals = vals.size
 
-    if ref_id >= nvals:
-        wn.warn(
-            "Reference index (refid) cannot be larger than the number of traces!\
-                Reset to the largest allowed number.",
-        )
-        ref_id = nvals - 1
-
     # Top-to-bottom ordering of terms in the T matrix
-    termorder = np.delete(range(0, nvals, 1), ref_id)
+    termorder = np.delete(range(0, nvals, 1), 0)
     termorder = np.tile(termorder, aug)
     # Left-to-right ordering of polynomials in the T matrix
     polyorder = np.linspace(order, 1, order, dtype="int")
 
     # Construct the T (differential drift time) matrix, Tmat = Tmain - Tsec
-    t_main = np.array([pos[ref_id] ** p for p in polyorder])
+    t_main = np.array([pos[0] ** p for p in polyorder])
     # Duplicate to the same order as the polynomials
     t_main = np.tile(t_main, (aug * (nvals - 1), 1))
 
@@ -2292,7 +2293,7 @@ def poly_energy_calibration(
     t_mat = t_main - np.asarray(t_sec)
 
     # Construct the b vector (differential bias)
-    bvec = vals[ref_id] - np.delete(vals, ref_id)
+    bvec = vals[0] - np.delete(vals, 0)
     bvec = np.tile(bvec, aug)
 
     # Solve for the a vector (polynomial coefficients) using least squares
@@ -2312,12 +2313,10 @@ def poly_energy_calibration(
     ecalibdict["Tmat"] = t_mat
     ecalibdict["bvec"] = bvec
     ecalibdict["energy_scale"] = energy_scale
+    ecalibdict["E0"] = -(pfunc(-1 * ref_energy, pos[0]) + vals[0])
 
-    if ref_energy is not None and t is not None:
-        energy_offset = pfunc(-1 * ref_energy, pos[ref_id])
-        ecalibdict["axis"] = pfunc(-energy_offset, t)
-        ecalibdict["E0"] = -energy_offset
-        ecalibdict["refid"] = ref_id
+    if t is not None:
+        ecalibdict["axis"] = pfunc(-ecalibdict["E0"], t)
 
     return ecalibdict
 

diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml
@@ -15,18 +15,10 @@ core:
     gid: 1001
 
 dataframe:
-  # hdf5 group names to read from the h5 files (for mpes reader)
-  hdf5_groupnames: ["Stream_0", "Stream_1", "Stream_2", "Stream_4"]
-  # aliases to assign to the dataframe columns for the corresponding hdf5 streams
-  hdf5_aliases:
-    Stream_0: "X"
-    Stream_1: "Y"
-    Stream_2: "t"
-    Stream_4: "ADC"
   # dataframe column name for the time stamp column
   time_stamp_alias: "timeStamps"
   # hdf5 group name containing eventIDs occurring at every millisecond (used to calculate timestamps)
-  ms_markers_group: "msMarkers"
+  ms_markers_key: "msMarkers"
   # hdf5 attribute containing the timestamp of the first event in a file
   first_event_time_stamp_key: "FirstEventTimeStamp"
   # Time stepping in seconds of the successive events in the timed dataframe
@@ -41,6 +33,8 @@ dataframe:
   tof_column: "t"
   # dataframe column containing analog-to-digital data
   adc_column: "ADC"
+  # dataframe column containing bias voltage data
+  bias_column: "sampleBias"
   # dataframe column containing corrected x coordinates
   corrected_x_column: "Xm"
   # dataframe column containing corrected y coordinates
@@ -79,6 +73,29 @@ dataframe:
     kx: '1/A'
     ky: '1/A'
 
+  # dataframe channels and group names to read from the h5 files
+  channels:
+    # The X-channel
+    X:
+      format: per_electron
+      dataset_key: "Stream_0"
+    # The Y-channel
+    Y:
+      format: per_electron
+      dataset_key: "Stream_1"
+    # The tof-channel
+    t:
+      format: per_electron
+      dataset_key: "Stream_2"
+    # The ADC-channel
+    ADC:
+      format: per_electron
+      dataset_key: "Stream_4"
+    # The sample Bias-channel
+    sampleBias:
+      format: per_file
+      dataset_key: "KTOF:Lens:Sample:V"
+
 energy:
   # Number of bins to use for energy calibration traces
   bins: 1000
-Original file line number
+Diff line change
@@ Expand Up / @@ -38,6 +38,7 @@ caldir @@
     calib
     calibdict
     caplog
+    capsys
     cdeform
     cdeformfield
     cdisp
@@ Expand Down Expand Up / @@ -77,6 +78,7 @@ datastreams @@
     datestring
     ddir
     delaxes
+    delayeds
     Desy
     dfield
     dfops
@@ Expand Down @@