OpenCOMPES · steinnymir · Nov 2, 2023 · Oct 10, 2023 · Oct 10, 2023 · Oct 10, 2023
diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py
@@ -35,6 +35,7 @@
 from scipy.sparse.linalg import lsqr
 
 from sed.binning import bin_dataframe
+from sed.core import dfops
 from sed.loader.base.loader import BaseLoader
 
 
@@ -95,6 +96,7 @@ def __init__(
         self.calibration: Dict[Any, Any] = {}
 
         self.tof_column = self._config["dataframe"]["tof_column"]
+        self.tof_ns_column = self._config["dataframe"].get("tof_ns_column", None)
         self.corrected_tof_column = self._config["dataframe"]["corrected_tof_column"]
         self.energy_column = self._config["dataframe"]["energy_column"]
         self.x_column = self._config["dataframe"]["x_column"]
@@ -108,7 +110,9 @@ def __init__(
         ) / 2 ** (self.binning - 1)
         self.tof_fermi = self._config["energy"]["tof_fermi"] / 2 ** (self.binning - 1)
         self.color_clip = self._config["energy"]["color_clip"]
-
+        self.sector_delays = self._config["dataframe"].get("sector_delays", None)
+        self.sector_id_column = self._config["dataframe"].get("sector_id_column", None)
+        self.offset: Dict[str, Any] = self._config["energy"].get("offset", {})
         self.correction: Dict[Any, Any] = {}
 
     @property
@@ -769,6 +773,26 @@ def view(  # pylint: disable=dangerous-default-value
 
             pbk.show(fig)
 
+    def get_current_calibration(self) -> dict:
+        """Return the current calibration dictionary.
+
+         if none is present, return the one from the config. If none is present there,
+        return an empty dictionary.
+
+         Returns:
+             dict: Calibration dictionary.
+        """
+        if self.calibration:
+            calibration = deepcopy(self.calibration)
+        else:
+            calibration = deepcopy(
+                self._config["energy"].get(
+                    "calibration",
+                    {},
+                ),
+            )
+        return calibration
+
     def append_energy_axis(
         self,
         df: Union[pd.DataFrame, dask.dataframe.DataFrame],
@@ -812,17 +836,8 @@ def append_energy_axis(
         binwidth = kwds.pop("binwidth", self.binwidth)
         binning = kwds.pop("binning", self.binning)
 
-        # pylint: disable=duplicate-code
         if calibration is None:
-            if self.calibration:
-                calibration = deepcopy(self.calibration)
-            else:
-                calibration = deepcopy(
-                    self._config["energy"].get(
-                        "calibration",
-                        {},
-                    ),
-                )
+            calibration = self.get_current_calibration()
 
         for key, value in kwds.items():
             calibration[key] = value
@@ -879,6 +894,53 @@ def append_energy_axis(
 
         return df, metadata
 
+    def append_tof_ns_axis(
+        self,
+        df: Union[pd.DataFrame, dask.dataframe.DataFrame],
+        tof_column: str = None,
+        tof_ns_column: str = None,
+        **kwds,
+    ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
+        """Converts the time-of-flight time from steps to time in ns.
+
+        # TODO: needs tests
+
+        Args:
+            df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to convert.
+            tof_column (str, optional): Name of the column containing the
+                time-of-flight steps. Defaults to config["dataframe"]["tof_column"].
+            tof_ns_column (str, optional): Name of the column to store the
+                time-of-flight in nanoseconds. Defaults to config["dataframe"]["tof_ns_column"].
+
+        Returns:
+            dask.dataframe.DataFrame: Dataframe with the new columns.
+            dict: Metadata dictionary.
+        """
+        binwidth = kwds.pop("binwidth", self.binwidth)
+        binning = kwds.pop("binning", self.binning)
+        if tof_column is None:
+            if self.corrected_tof_column in df.columns:
+                tof_column = self.corrected_tof_column
+            else:
+                tof_column = self.tof_column
+
+        if tof_ns_column is None:
+            tof_ns_column = self.tof_ns_column
+        if tof_ns_column is None:
+            raise AttributeError("tof_ns_column not set!")
+
+        df[tof_ns_column] = tof2ns(
+            binwidth,
+            binning,
+            df[tof_column].astype("float64"),
+        )
+        metadata: Dict[str, Any] = {
+            "applied": True,
+            "binwidth": binwidth,
+            "binning": binning,
+        }
+        return df, metadata
+
     def gather_calibration_metadata(self, calibration: dict = None) -> dict:
         """Collects metadata from the energy calibration
 
@@ -1358,6 +1420,170 @@ def gather_correction_metadata(self, correction: dict = None) -> dict:
 
         return metadata
 
+    def align_dld_sectors(
+        self,
+        df: Union[pd.DataFrame, dask.dataframe.DataFrame],
+        **kwds,
+    ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
+        """Aligns the time-of-flight axis of the different sections of a detector.
+
+        Args:
+            df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
+
+                    Returns:
+            dask.dataframe.DataFrame: Dataframe with the new columns.
+            dict: Metadata dictionary.
+        """
+        sector_delays = kwds.pop("sector_delays", self.sector_delays)
+        sector_id_column = kwds.pop("sector_id_column", self.sector_id_column)
+
+        if sector_delays is None or sector_id_column is None:
+            raise ValueError(
+                "No value for sector_delays or sector_id_column found in config."
+                "config file is not properly configured for dld sector correction.",
+            )
+        tof_column = kwds.pop("tof_column", self.tof_column)
+
+        # align the 8s sectors
+        sector_delays_arr = dask.array.from_array(sector_delays)
+
+        def align_sector(x):
+            val = x[tof_column] - sector_delays_arr[x[sector_id_column].values.astype(int)]
+            return val.astype(np.float32)
+
+        df[tof_column] = df.map_partitions(align_sector, meta=(tof_column, np.float32))
+        metadata: Dict[str, Any] = {
+            "applied": True,
+            "sector_delays": sector_delays,
+        }
+        return df, metadata
+
+    def apply_energy_offset(
+        self,
+        df: Union[pd.DataFrame, dask.dataframe.DataFrame] = None,
+        constant: float = None,
+        columns: Union[str, Sequence[str]] = None,
+        signs: Union[int, Sequence[int]] = None,
+        subtract_mean: Union[bool, Sequence[bool]] = None,
+        energy_column: str = None,
+        reductions: Union[str, Sequence[str]] = None,
+    ) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+        """Apply an energy shift to the given column(s).
+
+        If no parameter is passed to this function, the offset is applied as defined in the
+        config file. If parameters are passed, they are used to generate a new offset dictionary
+        and the offset is applied using the ``dfops.apply_offset_from_columns()`` function.
+
+        # TODO: This funcion can still be improved and needs testsing
+
+        Args:
+            df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
+            constant (float, optional): The constant to shift the energy axis by.
+            columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift to.
+            signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive
+                sign shifts the energy axis to higher kinetic energies. Defaults to +1.
+            energy_column (str, optional): Name of the column containing the energy values.
+            reductions (str): The reduction to apply to the column. If "rolled" it searches for
+                columns with suffix "_rolled", e.g. "sampleBias_rolled", as those generated by the
+                ``SedProcessor.smooth_columns()`` function. Otherwise should be an available method
+                of dask.dataframe.Series. For example "mean". In this case the function is applied
+                to the column to generate a single value for the whole dataset. If None, the shift
+                is applied per-dataframe-row. Defaults to None.
+            subtract_mean (bool): Whether to subtract the mean of the column before applying the
+                shift. Defaults to False.
+            **kwargs: Additional arguments for the rolling average function.
+        """
+        if energy_column is None:
+            energy_column = self.energy_column
+        if columns is None:
+            # load from config
+            columns = []
+            signs = []
+            subtract_mean = []
+            reductions = []
+            for k, v in self.offset.items():
+                if k == "constant":
+                    constant = v
+                    print(f"Applying constant offset of {constant} to energy axis.")
+                else:
+                    assert k in df.columns, f"Column {k} not found in dataframe."
+                    columns.append(k)
+                    signs.append(v.get("sign", 1))
+                    subtract_mean.append(v.get("subtract_mean", False))
+                    reductions.append(v.get("reduction", None))
+                    s = "+" if signs[-1] > 0 else "-"
+                    msg = f"Shifting {energy_column} by {s} {k}"
+                    if subtract_mean[-1]:
+                        msg += " and subtracting mean"
+                    print(msg)
+        else:
+            # use passed parameters
+            if columns is not None and (signs is None or subtract_mean is None):
+                raise ValueError(
+                    "If columns is passed, signs and subtract_mean must also be passed.",
+                )
+            if isinstance(columns, str):
+                columns = [columns]
+            if isinstance(signs, int):
+                signs = [signs]
+            if len(signs) != len(columns):
+                raise ValueError("signs and columns must have the same length.")
+            if isinstance(subtract_mean, bool):
+                subtract_mean = [subtract_mean] * len(columns)
+            if reductions is None:
+                reductions = [None] * len(columns)
+        # flip sign for binding energy scale
+        energy_scale = self.get_current_calibration().get("energy_scale", None)
+        if energy_scale == "binding":
+            signs = [-1 * s for s in signs if s is not None]
+        elif energy_scale == "kinetic":
+            pass
+        elif energy_scale is None:
+            raise ValueError("Energy scale not set. Please run `set_energy_scale` first.")
+        # check if columns have been smoothed
+        columns_: List[str] = []
+        reductions_: List[str] = []
+        to_roll: List[str] = []
+        for c, r in zip(columns, reductions):
+            if r == "rolled":
+                cname = c + "_rolled"
+                if cname not in df.columns:
+                    to_roll.append(cname)
+                else:
+                    columns_.append(cname)
+                    reductions_.append(None)
+            else:
+                columns_.append(c)
+                reductions_.append(r)
+        if len(to_roll) > 0:
+            raise RuntimeError(
+                f"Columns {to_roll} have not been smoothed. please run `smooth_column`",
+            )
+        # apply offset
+        df = dfops.apply_offset_from_columns(
+            df=df,
+            target_column=energy_column,
+            offset_columns=columns_,
+            signs=signs,
+            subtract_mean=subtract_mean,
+            reductions=reductions_,
+            inplace=True,
+        )
+        # apply constant
+        if constant is not None:
+            df[energy_column] += constant
+
+        metadata: Dict[str, Any] = {
+            "applied": True,
+            "constant": constant,
+            "energy_column": energy_column,
+            "column_names": columns,
+            "signs": signs,
+            "subtract_mean": subtract_mean,
+            "reductions": reductions,
+        }
+        return df, metadata
+
 
 def extract_bias(files: List[str], bias_key: str) -> np.ndarray:
     """Read bias values from hdf5 files
@@ -1868,13 +2094,33 @@ def residual(pars, time, data, binwidth, binning, energy_scale):
         return model - data
 
     pars = Parameters()
-    pars.add(name="d", value=kwds.pop("d_init", 1))
+    d_pars = kwds.pop("d", {})
+    pars.add(
+        name="d",
+        value=d_pars.get("value", 1),
+        min=d_pars.get("min", -np.inf),
+        max=d_pars.get("max", np.inf),
+        vary=d_pars.get("vary", True),
+    )
+    t0_pars = kwds.pop("t0", {})
     pars.add(
         name="t0",
-        value=kwds.pop("t0_init", 1e-6),
-        max=(min(pos) - 1) * binwidth * 2**binning,
+        value=t0_pars.get("value", 1e-6),
+        min=t0_pars.get("min", -np.inf),
+        max=t0_pars.get(
+            "max",
+            (min(pos) - 1) * binwidth * 2**binning,
+        ),
+        vary=t0_pars.get("vary", True),
+    )
+    E0_pars = kwds.pop("E0", {})  # pylint: disable=invalid-name
+    pars.add(
+        name="E0",
+        value=E0_pars.get("value", min(vals)),
+        min=E0_pars.get("min", -np.inf),
+        max=E0_pars.get("max", np.inf),
+        vary=E0_pars.get("vary", True),
     )
-    pars.add(name="E0", value=kwds.pop("E0_init", min(vals)))
     fit = Minimizer(
         residual,
         pars,
@@ -2085,3 +2331,23 @@ def tof2evpoly(
     energy += energy_offset
 
     return energy
+
+
+def tof2ns(
+    binwidth: float,
+    binning: int,
+    t: float,
+) -> Union[List[float], np.ndarray]:
+    """Converts the time-of-flight steps to time-of-flight in nanoseconds.
+
+    designed for use with dask.dataframe.DataFrame.map_partitions.
+
+    Args:
+        binwidth (float): Time step size in seconds.
+        binning (int): Binning of the time-of-flight steps.
+        t (float): TOF value in bin number.
+    Returns:
+        float: Converted time in nanoseconds.
+    """
+    val = t * 1e9 * binwidth * 2**binning
+    return val
diff --git a/sed/config/default.yaml b/sed/config/default.yaml
@@ -27,7 +27,7 @@ dataframe:
   energy_column: "energy"
   # dataframe column containing delay data
   delay_column: "delay"
-  # time length of a base time-of-flight bin in ns
+  # time length of a base time-of-flight bin in s
   tof_binwidth: 4.125e-12
   # Binning factor of the tof_column-data compared to tof_binwidth (2^(tof_binning-1))
   tof_binning: 1