From c3676371d6050be89e585f39855bfb635181ee39 Mon Sep 17 00:00:00 2001
From: Steinn Ymir Agustsson <sagustss@uni-mainz.de>
Date: Tue, 10 Oct 2023 23:15:25 +0200
Subject: [PATCH] linting and bugfix

---
 sed/calibrator/hextof.py   | 23 +++++++++-----
 sed/core/processor.py      | 65 ++++++++++++++++++++++++++++++++++++++
 sed/loader/flash/loader.py |  6 +---
 3 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/sed/calibrator/hextof.py b/sed/calibrator/hextof.py
index 83672735..884028f7 100644
--- a/sed/calibrator/hextof.py
+++ b/sed/calibrator/hextof.py
@@ -31,6 +31,8 @@ def unravel_8s_detector_time_channel(
         if config is None:
             raise ValueError("Either time_sector_column or config must be given.")
         time_sector_column = config["dataframe"]["time_sector_column"]
+        if time_sector_column not in df.columns:
+            raise ValueError(f"Column {time_sector_column} not in dataframe.")
     if tof_step_column is None:
         if config is None:
             raise ValueError("Either tof_step_column or config must be given.")
@@ -40,15 +42,16 @@ def unravel_8s_detector_time_channel(
             raise ValueError("Either sector_id_column or config must be given.")
         sector_id_column = config["dataframe"]["sector_id_column"]
 
-    # extract dld sector id information
     df[sector_id_column] = (df[time_sector_column] % 8).astype(np.int8)
     df[tof_step_column] = (df[time_sector_column] // 8).astype(np.int32)
     return df
 
 
 def align_8s_sectors(
-        dataframe: dask.dataframe.DataFrame,
+        df: dask.dataframe.DataFrame,
         sector_delays: Sequence[float] = None,
+        sector_id_column: str = "dldSectorID",
+        tof_step_column: str = "dldTimeSteps",
         config: dict = None,
 ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
     """Aligns the 8s sectors to the first sector.
@@ -63,19 +66,19 @@ def align_8s_sectors(
             raise ValueError("Either sector_delays or config must be given.")
         sector_delays = config["dataframe"]["sector_delays"]
     # align the 8s sectors
+    sector_delays = dask.array.from_array(sector_delays)
 
     def align_sector(x):
-        return x - sector_delays[x['dldSectorID']]
-
-    dataframe['dldTimeSteps'] = dataframe.map_partitions(
-        align_sector, meta=('dldTimeSteps', np.int32)
+        return x[tof_step_column] - sector_delays[x[sector_id_column].values.astype(int)]
+    df[tof_step_column] = df.map_partitions(
+        align_sector, meta=(tof_step_column, np.float64)
     )
 
     metadata = {}
     metadata["applied"] = True
     metadata["sector_delays"] = sector_delays
 
-    return dataframe, metadata
+    return df, metadata
 
 
 def convert_8s_time_to_ns(
@@ -88,8 +91,12 @@ def convert_8s_time_to_ns(
     """Converts the 8s time in steps to time in ns.
 
     Args:
-        time_step_size (float, optional): Size of one time step in ns.
+        time_step_size (float, optional): Time step size in nanoseconds.
             Defaults to config["dataframe"]["time_step_size"].
+        tof_step_column (str, optional): Name of the column containing the
+            time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"].
+        tof_column (str, optional): Name of the column containing the
+            time-of-flight. Defaults to config["dataframe"]["tof_column"].
     """
     if time_step_size is None:
         if config is None:
diff --git a/sed/core/processor.py b/sed/core/processor.py
index 9069215c..d73ad7d8 100644
--- a/sed/core/processor.py
+++ b/sed/core/processor.py
@@ -21,6 +21,7 @@
 from sed.calibrator import DelayCalibrator
 from sed.calibrator import EnergyCalibrator
 from sed.calibrator import MomentumCorrector
+from sed.calibrator import hextof
 from sed.core.config import parse_config
 from sed.core.config import save_config
 from sed.core.dfops import apply_jitter
@@ -1203,6 +1204,70 @@ def add_jitter(self, cols: Sequence[str] = None):
             metadata.append(col)
         self._attributes.add(metadata, "jittering", duplicate_policy="append")
 
+    def hextof_step_to_ns(
+            self,
+            time_step_size: float = None,
+            tof_step_column: str = None,
+            tof_column: str = None,
+    ):
+        """Convert time-of-flight channel steps to nanoseconds.
+
+        Intended for use with HEXTOF endstation
+
+        Args:
+            time_step_size (float, optional): Time step size in nanoseconds.
+                Defaults to config["dataframe"]["time_step_size"].
+            tof_step_column (str, optional): Name of the column containing the
+                time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"].
+            tof_column (str, optional): Name of the column containing the
+                time-of-flight. Defaults to config["dataframe"]["tof_column"].
+        """
+        if self._dataframe is not None:
+            print("Adding energy column to dataframe:")
+            # TODO assert order of execution through metadata
+
+            self._dataframe, metadata = hextof.convert_8s_time_to_ns(
+                df=self._dataframe,
+                time_step_size=time_step_size or self._config["dataframe"]["time_step_size"],
+                tof_step_column=tof_step_column or self._config["dataframe"]["tof_step_column"],
+                tof_column=tof_column or self._config["dataframe"]["tof_column"],
+            )
+            self._attributes.add(
+                metadata,
+                "energy_calibration",
+                duplicate_policy="merge",
+            )
+
+    def hextof_align_8s_sectors(
+            self,
+            sector_delays: Sequence[float] = None,
+    ):
+        """ Align the 8s sectors of the HEXTOF endstation.
+
+        Intended for use with HEXTOF endstation
+
+        Args:
+            sector_delays (Sequence[float], optional): Delays of the 8s sectors in
+                picoseconds. Defaults to config["dataframe"]["sector_delays"].
+        """
+        if self._dataframe is not None:
+            print("Aligning 8s sectors of dataframe")
+            # TODO assert order of execution through metadata
+            sector_delays = sector_delays or self._config["dataframe"].get("sector_delays", [0.0] * 8)
+            if len(sector_delays) != 8:
+                raise ValueError("sector_delays must be a list of 8 floats")
+            if all(sector_delays == 0):
+                print("All sector delays are 0, skipping alignment")
+            self._dataframe, metadata = hextof.align_8s_sectors(
+                df=self._dataframe,
+                sector_delays=sector_delays,
+            )
+            self._attributes.add(
+                metadata,
+                "energy_calibration",
+                duplicate_policy="merge",
+            )
+
     def pre_binning(
         self,
         df_partitions: int = 100,
diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py
index d89c3f02..31d73e8b 100644
--- a/sed/loader/flash/loader.py
+++ b/sed/loader/flash/loader.py
@@ -619,11 +619,7 @@ def create_buffer_file(self, h5_path: Path, parquet_path: Path) -> None:
                 .reset_index(level=self.multi_index)
                 .to_parquet(parquet_path, index=False)
             )
-        # except ValueError as failed_string_error:
-        #     print(f"Conversion failed for {parquet_path}:\nValueError: {failed_string_error}")
-        #     error = f"{parquet_path}: {failed_string_error}"
-        #     self.failed_files_error.append(error)
-        except Exception as exc: # pylint: disable=broad-except
+        except Exception as exc:  # pylint: disable=broad-except
             self.failed_files_error.append(f"{parquet_path}: {type(exc)} {exc}")
             return exc
         return False