Merge pull request #116 from OpenCOMPES/histograms_from_timed_dataframe

Histograms from timed dataframe
OpenCOMPES · Nov 5, 2023 · bb717a7 · bb717a7
2 parents 35bd293 + 3dc7c5f
commit bb717a7
Show file tree

Hide file tree

Showing 18 changed files with 853 additions and 113 deletions.
diff --git a/sed/binning/binning.py b/sed/binning/binning.py
@@ -433,6 +433,77 @@ def bin_dataframe(
     return data_array
 
 
+def normalization_histogram_from_timestamps(
+    df: dask.dataframe.DataFrame,
+    axis: str,
+    bin_centers: np.ndarray,
+    time_stamp_column: str,
+) -> xr.DataArray:
+    """Get a normalization histogram from the time stamps column in the dataframe.
+
+    Args:
+        df (dask.dataframe.DataFrame): a dask.DataFrame on which to perform the
+            histogram.
+        axis (str): The axis (dataframe column) on which to calculate the normalization
+            histogram.
+        bin_centers (np.ndarray): Bin centers used for binning of the axis.
+        time_stamp_column (str): Dataframe column containing the time stamps.
+
+    Returns:
+        xr.DataArray: Calculated normalization histogram.
+    """
+    time_per_electron = df[time_stamp_column].diff()
+
+    bins = df[axis].map_partitions(
+        pd.cut,
+        bins=bin_centers_to_bin_edges(bin_centers),
+    )
+
+    histogram = time_per_electron.groupby([bins]).sum().compute().values
+
+    data_array = xr.DataArray(
+        data=histogram,
+        coords={axis: bin_centers},
+    )
+
+    return data_array
+
+
+def normalization_histogram_from_timed_dataframe(
+    df: dask.dataframe.DataFrame,
+    axis: str,
+    bin_centers: np.ndarray,
+    time_unit: float,
+) -> xr.DataArray:
+    """Get a normalization histogram from a timed datafram.
+
+    Args:
+        df (dask.dataframe.DataFrame): a dask.DataFrame on which to perform the
+            histogram. Entries should be based on an equal time unit.
+        axis (str): The axis (dataframe column) on which to calculate the normalization
+            histogram.
+        bin_centers (np.ndarray): Bin centers used for binning of the axis.
+        time_unit (float): Time unit the data frame entries are based on.
+
+    Returns:
+        xr.DataArray: Calculated normalization histogram.
+    """
+    bins = df[axis].map_partitions(
+        pd.cut,
+        bins=bin_centers_to_bin_edges(bin_centers),
+    )
+
+    histogram = df[axis].groupby([bins]).count().compute().values * time_unit
+    # histogram = bin_dataframe(df, axes=[axis], bins=[bin_centers]) * time_unit
+
+    data_array = xr.DataArray(
+        data=histogram,
+        coords={axis: bin_centers},
+    )
+
+    return data_array
+
+
 def apply_jitter_on_column(
     df: Union[dask.dataframe.core.DataFrame, pd.DataFrame],
     amp: float,

diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py
@@ -236,7 +236,7 @@ def bin_data(
                         "Either Bias Values or a valid bias_key has to be present!",
                     ) from exc
 
-        dataframe, _ = self.loader.read_dataframe(
+        dataframe, _, _ = self.loader.read_dataframe(
             files=data_files,
             collect_metadata=False,
         )
@@ -1442,8 +1442,10 @@ def align_dld_sectors(
             dask.dataframe.DataFrame: Dataframe with the new columns.
             dict: Metadata dictionary.
         """
-        sector_delays = sector_delays or self.sector_delays
-        sector_id_column = sector_id_column or self.sector_id_column
+        if sector_delays is None:
+            sector_delays = self.sector_delays
+        if sector_id_column is None:
+            sector_id_column = self.sector_id_column
 
         if sector_delays is None or sector_id_column is None:
             raise ValueError(

diff --git a/sed/config/default.yaml b/sed/config/default.yaml
@@ -39,6 +39,8 @@ dataframe:
   jitter_cols: ["@x_column", "@y_column", "@tof_column"]
   # Jitter amplitude or list of jitter amplitudes. Should equal half the digitial step size of each jitter_column
   jitter_amps: 0.5
+  # Time stepping in seconds of the succesive events in the timed dataframe
+  timed_dataframe_unit_time: 0.001
 
 energy:
   # Number of bins to use for energy calibration traces

diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml
@@ -29,6 +29,8 @@ dataframe:
   ms_markers_group: "msMarkers"
   # hdf5 attribute containing the timestamp of the first event in a file
   first_event_time_stamp_key: "FirstEventTimeStamp"
+  # Time stepping in seconds of the succesive events in the timed dataframe
+  timed_dataframe_unit_time: 0.001
   # list of columns to apply jitter to
   jitter_cols: ["X", "Y", "t", "ADC"]
   # dataframe column containing x coordinates