fixes from suggestions and linting

OpenCOMPES · Oct 11, 2023 · 9dbb379 · 9dbb379
1 parent 9cd0d67
commit 9dbb379
Show file tree

Hide file tree

Showing 5 changed files with 178 additions and 89 deletions.
diff --git a/sed/calibrator/hextof.py b/sed/calibrator/hextof.py
@@ -12,9 +12,8 @@
 
 def unravel_8s_detector_time_channel(
     df: dask.dataframe.DataFrame,
-    time_sector_column: str = "dldTimeAndSector",
-    tof_step_column: str = "dldTimeSteps",
-    sector_id_column: str = "dldSectorID",
+    tof_column: str = None,
+    sector_id_column: str = None,
     config: dict = None,
 ) -> dask.dataframe.DataFrame:
     """Converts the 8s time in steps to time in steps and sectorID.
@@ -26,32 +25,28 @@ def unravel_8s_detector_time_channel(
         sector_delays (Sequece[float], optional): Sector delays for the 8s time.
             Defaults to config["dataframe"]["sector_delays"].
     """
-    df = df.dropna(subset=[time_sector_column])
-    if time_sector_column is None:
-        if config is None:
-            raise ValueError("Either time_sector_column or config must be given.")
-        time_sector_column = config["dataframe"]["time_sector_column"]
-        if time_sector_column not in df.columns:
-            raise ValueError(f"Column {time_sector_column} not in dataframe.")
-    if tof_step_column is None:
+    if tof_column is None:
         if config is None:
-            raise ValueError("Either tof_step_column or config must be given.")
-        tof_step_column = config["dataframe"]["tof_step_column"]
+            raise ValueError("Either tof_column or config must be given.")
+        tof_column = config["dataframe"]["tof_column"]
     if sector_id_column is None:
         if config is None:
             raise ValueError("Either sector_id_column or config must be given.")
         sector_id_column = config["dataframe"]["sector_id_column"]
 
-    df[sector_id_column] = (df[time_sector_column] % 8).astype(np.int8)
-    df[tof_step_column] = (df[time_sector_column] // 8).astype(np.int32)
+    if sector_id_column in df.columns:
+        raise ValueError(f"Column {sector_id_column} already in dataframe. "
+                         "This function is not idempotent.")
+    df[sector_id_column] = (df[tof_column] % 8).astype(np.int8)
+    df[tof_column] = (df[tof_column] // 8).astype(np.int32)
     return df
 
 
-def align_8s_sectors(
+def align_dld_sectors(
         df: dask.dataframe.DataFrame,
         sector_delays: Sequence[float] = None,
-        sector_id_column: str = "dldSectorID",
-        tof_step_column: str = "dldTimeSteps",
+        sector_id_column: str = None,
+        tof_column: str = None,
         config: dict = None,
 ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
     """Aligns the 8s sectors to the first sector.
@@ -65,13 +60,22 @@ def align_8s_sectors(
         if config is None:
             raise ValueError("Either sector_delays or config must be given.")
         sector_delays = config["dataframe"]["sector_delays"]
+    if sector_id_column is None:
+        if config is None:
+            raise ValueError("Either sector_id_column or config must be given.")
+        sector_id_column = config["dataframe"]["sector_id_column"]
+    if tof_column is None:
+        if config is None:
+            raise ValueError("Either tof_column or config must be given.")
+        tof_column = config["dataframe"]["tof_column"]
     # align the 8s sectors
     sector_delays_arr = dask.array.from_array(sector_delays)
 
     def align_sector(x):
-        return x[tof_step_column] - sector_delays_arr[x[sector_id_column].values.astype(int)]
-    df[tof_step_column] = df.map_partitions(
-        align_sector, meta=(tof_step_column, np.float64)
+        val = x[tof_column] - sector_delays_arr[x[sector_id_column].values.astype(int)]
+        return val.astype(np.float32)
+    df[tof_column] = df.map_partitions(
+        align_sector, meta=(tof_column, np.float32)
     )
 
     metadata = {}
@@ -81,43 +85,50 @@ def align_sector(x):
     return df, metadata
 
 
-def convert_8s_time_to_ns(
+def dld_time_to_ns(
         df: Union[pd.DataFrame, dask.dataframe.DataFrame],
-        time_step_size: float = None,
-        tof_step_column: str = "dldTimeSteps",
-        tof_column: str = "dldTime",
+        tof_ns_column: str = None,
+        tof_binwidth: float = None,
+        tof_column: str = None,
+        tof_binning: int = None,
         config: dict = None,
 ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
     """Converts the 8s time in steps to time in ns.
 
     Args:
-        time_step_size (float, optional): Time step size in nanoseconds.
-            Defaults to config["dataframe"]["time_step_size"].
-        tof_step_column (str, optional): Name of the column containing the
-            time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"].
+        tof_binwidth (float, optional): Time step size in nanoseconds.
+            Defaults to config["dataframe"]["tof_binwidth"].
+        tof_column (str, optional): Name of the column containing the
+            time-of-flight steps. Defaults to config["dataframe"]["tof_column"].
         tof_column (str, optional): Name of the column containing the
             time-of-flight. Defaults to config["dataframe"]["tof_column"].
+        tof_binning (int, optional): Binning of the time-of-flight steps.
     """
-    if time_step_size is None:
-        if config is None:
-            raise ValueError("Either time_step_size or config must be given.")
-        time_step_size: float = config["dataframe"]["time_step_size"]
-    if tof_step_column is None:
+    if tof_binwidth is None:
         if config is None:
-            raise ValueError("Either tof_step_column or config must be given.")
-        tof_step_column: str = config["dataframe"]["tof_step_column"]
+            raise ValueError("Either tof_binwidth or config must be given.")
+        tof_binwidth: float = config["dataframe"]["tof_binwidth"]
     if tof_column is None:
         if config is None:
-            raise ValueError("Either tof_time_column or config must be given.")
+            raise ValueError("Either tof_column or config must be given.")
         tof_column: str = config["dataframe"]["tof_column"]
+    if tof_binning is None:
+        if config is None:
+            raise ValueError("Either tof_binning or config must be given.")
+        tof_binning: int = config["dataframe"]["tof_binning"]
+    if tof_ns_column is None:
+        if config is None:
+            raise ValueError("Either tof_ns_column or config must be given.")
+        tof_ns_column: str = config["dataframe"]["tof_ns_column"]
 
     def convert_to_ns(x):
-        return x[tof_step_column] * time_step_size
-    df[tof_column] = df.map_partitions(
-        convert_to_ns, meta=(tof_column, np.float64)
+        val = x[tof_column] * tof_binwidth * 2**tof_binning
+        return val.astype(np.float32)
+    df[tof_ns_column] = df.map_partitions(
+        convert_to_ns, meta=(tof_column, np.float32)
     )
     metadata = {}
     metadata["applied"] = True
-    metadata["time_step_size"] = time_step_size
+    metadata["tof_binwidth"] = tof_binwidth
 
     return df, metadata
diff --git a/sed/config/flash_example_config.yaml b/sed/config/flash_example_config.yaml
@@ -23,19 +23,60 @@ dataframe:
   daq: fl1user3
   # The offset correction to the pulseId
   ubid_offset: 5
+
   # the number of iterations to fill the pulseId forward. 
   forward_fill_iterations: 2 
-  # if true, removes the 3 bits reserved for dldSectorID from the dldTimeandSector column
+  # if true, removes the 3 bits reserved for dldSectorID from the dldTimeSteps column
   unravel_8s_detector_time_channel: True
-  time_step_size: 0.16460905596613884 # 0.020576131995767355
-  raw_time_column: dldTimeAndSector
-  time_step_column: dldTimeSteps
-  tof_step_column: dldTimeSteps
+
+  # dataframe column containing x coordinates
+  x_column: dldPosX
+  # dataframe column containing corrected x coordinates
+  corrected_x_column: "X"
+  # dataframe column containing kx coordinates
+  kx_column: "kx"
+  # dataframe column containing y coordinates
+
+  y_column: dldPosY
+  # dataframe column containing corrected y coordinates
+  corrected_y_column: "Y"
+  # dataframe column containing kx coordinates
+  ky_column: "ky"
+  # dataframe column containing time-of-flight data
+
+  tof_column: dldTimeSteps
+  # dataframe column containing time-of-flight data in ns
+  tof_ns_column: dldTime
+  # dataframe column containing corrected time-of-flight data
+  corrected_tof_column: "tm"
+
+  # time length of a base time-of-flight bin in ns
+  tof_binwidth: 0.020576131995767355 # 0.16460905596613884
+  # binning parameter for time-of-flight data. 2**tof_binning bins per base bin
+  tof_binning: 3  # power of 2, 4 means 8 bins per step
+  # dataframe column containing sector ID. obtained from dldTimeSteps column
   sector_id_column: dldSectorID
+
   sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.]
-  tof_column: dldTime
+
   jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"]
 
+  units:
+    dldPosX: 'step'
+    dldPosY: 'step'
+    dldTimeSteps: 'step'
+    tof_voltage: 'V'
+    extractorVoltage: 'V'
+    extractorCurrent: 'A'
+    cryoTemperature: 'K'
+    sampleTemperature: 'K'
+    dldTime: 'ns'
+    # delay: 'ps'
+    timeStamp: 's'
+    # energy: 'eV'
+    # E: 'eV'
+    kx: '1/A'
+    ky: '1/A'
 
   # The channels to load.
   # channels have the following structure:
@@ -62,7 +103,7 @@ dataframe:
       slice: 0
     # This channel will actually create dldTimeSteps and dldSectorID, 
     # if unravel_8s_detector_time_channel is set to True
-    dldTimeAndSector:
+    dldTimeSteps:
       format: per_electron
       group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/"
       slice: 3

diff --git a/sed/core/processor.py b/sed/core/processor.py
@@ -1204,43 +1204,48 @@ def add_jitter(self, cols: Sequence[str] = None):
             metadata.append(col)
         self._attributes.add(metadata, "jittering", duplicate_policy="append")
 
-    def hextof_step_to_ns(
+    def dld_time_to_ns(
             self,
-            time_step_size: float = None,
-            tof_step_column: str = None,
+            tof_ns_column: str = None,
+            tof_binwidth: float = None,
             tof_column: str = None,
+            tof_binning: int = None,
     ):
         """Convert time-of-flight channel steps to nanoseconds.
 
-        Intended for use with HEXTOF endstation
-
         Args:
-            time_step_size (float, optional): Time step size in nanoseconds.
-                Defaults to config["dataframe"]["time_step_size"].
-            tof_step_column (str, optional): Name of the column containing the
-                time-of-flight steps. Defaults to config["dataframe"]["tof_step_column"].
+            tof_binwidth (float, optional): Time step size in nanoseconds.
+                Defaults to config["dataframe"]["tof_binwidth"].
+            tof_column (str, optional): Name of the column containing the
+                time-of-flight steps. Defaults to config["dataframe"]["tof_column"].
             tof_column (str, optional): Name of the column containing the
                 time-of-flight. Defaults to config["dataframe"]["tof_column"].
+            tof_binning (int, optional): Binning of the time-of-flight steps.
+
         """
         if self._dataframe is not None:
             print("Adding energy column to dataframe:")
             # TODO assert order of execution through metadata
 
-            self._dataframe, metadata = hextof.convert_8s_time_to_ns(
+            self._dataframe, metadata = hextof.dld_time_to_ns(
                 df=self._dataframe,
-                time_step_size=time_step_size or self._config["dataframe"]["time_step_size"],
-                tof_step_column=tof_step_column or self._config["dataframe"]["tof_step_column"],
-                tof_column=tof_column or self._config["dataframe"]["tof_column"],
+                tof_ns_column=tof_ns_column,
+                tof_binwidth=tof_binwidth,
+                tof_column=tof_column,
+                tof_binning=tof_binning,
+                config=self._config,
             )
             self._attributes.add(
                 metadata,
                 "energy_calibration",
                 duplicate_policy="merge",
             )
 
-    def hextof_align_8s_sectors(
+    def align_dld_sectors(
             self,
             sector_delays: Sequence[float] = None,
+            sector_id_column: str = None,
+            tof_column: str = None,
     ):
         """ Align the 8s sectors of the HEXTOF endstation.
 
@@ -1253,19 +1258,16 @@ def hextof_align_8s_sectors(
         if self._dataframe is not None:
             print("Aligning 8s sectors of dataframe")
             # TODO assert order of execution through metadata
-            if sector_delays is None:
-                sector_delays = self._config["dataframe"].get("sector_delays", [0.0] * 8)
-            if len(sector_delays) != 8:
-                raise ValueError("sector_delays must be a list of 8 floats")
-            if all(delay == 0.0 for delay in sector_delays):
-                print("All sector delays are 0, skipping alignment")
-            self._dataframe, metadata = hextof.align_8s_sectors(
+            self._dataframe, metadata = hextof.align_dld_sectors(
                 df=self._dataframe,
                 sector_delays=sector_delays,
+                sector_id_column=sector_id_column,
+                tof_column=tof_column,
+                config=self._config,
             )
             self._attributes.add(
                 metadata,
-                "energy_calibration",
+                "sector_alignment",
                 duplicate_policy="merge",
             )
 

diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py
@@ -593,9 +593,10 @@ def create_dataframe_per_file(
         with h5py.File(file_path, "r") as h5_file:
             self.reset_multi_index()  # Reset MultiIndexes for next file
             df = self.concatenate_channels(h5_file)
-            df = df.dropna(subset=['dldTimeAndSector'])
+            df = df.dropna(subset=self._config['dataframe'].get('tof_column', 'dldTimeSteps'))
             # correct the 3 bit shift which encodes the detector ID in the 8s time
-            df = unravel_8s_detector_time_channel(df)
+            if self._config['dataframe'].get('unravel_8s_detector_time_channel', False):
+                df = unravel_8s_detector_time_channel(df, config=self._config)
             return df
 
     def create_buffer_file(self, h5_path: Path, parquet_path: Path) -> Union[bool, Exception]: