OpenCOMPES · steinnymir · Oct 12, 2023 · Oct 11, 2023 · Oct 11, 2023 · Oct 11, 2023
diff --git a/sed/calibrator/hextof.py b/sed/calibrator/hextof.py
@@ -10,6 +10,7 @@
 import numpy as np
 import pandas as pd
 import dask.dataframe
+from dask.diagnostics import ProgressBar
 
 
 def unravel_8s_detector_time_channel(
@@ -151,6 +152,145 @@ def dld_time_to_ns(
     }
     return df, metadata
 
+
+def rolling_average_on_acquisition_time(
+        df: Union[pd.DataFrame, dask.dataframe.DataFrame], 
+        rolling_group_channel: str,
+        columns: str = None, 
+        window: float = None, 
+        sigma: float = 2,
+        config: dict = None,
+
+) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+    """ Perform a rolling average with a gaussian weighted window.
+
+    In order to preserve the number of points, the first and last "widnow"
+    number of points are substituted with the original signal.
+    # TODO: this is currently very slow, and could do with a remake.
+
+    Args:
+        df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
+        group_channel: (str): Name of the column on which to group the data
+        cols (str): Name of the column on which to perform the rolling average
+        window (float): Size of the rolling average window
+        sigma (float): number of standard deviations for the gaussian weighting of the window. 
+            a value of 2 corresponds to a gaussian with sigma equal to half the window size.
+            Smaller values reduce the weighting in the window frame.
+
+    Returns:
+        Union[pd.DataFrame, dask.dataframe.DataFrame]: Dataframe with the new columns.
+    """
+    if rolling_group_channel is None:
+        if config is None:
+            raise ValueError("Either group_channel or config must be given.")
+        rolling_group_channel = config["dataframe"]["rolling_group_channel"]        
+    with ProgressBar():
+        print(f'rolling average over {columns}...')
+        if isinstance(columns,str):
+            columns=[columns]
+        df_ = df.groupby(rolling_group_channel).agg({c:'mean' for c in columns}).compute()
+        df_['dt'] = pd.to_datetime(df_.index, unit='s')
+        df_['ts'] = df_.index
+        for c in columns:
+            df_[c+'_rolled'] = df_[c].interpolate(
+                method='nearest'
+            ).rolling(
+                window,center=True,win_type='gaussian'
+            ).mean(
+                std=window/sigma
+            ).fillna(df_[c])
+            df_ = df_.drop(c, axis=1)
+            if c+'_rolled' in df.columns:
+                df = df.drop(c+'_rolled',axis=1)
+    return df.merge(df_,left_on='timeStamp',right_on='ts').drop(['ts','dt'], axis=1)
+
+
+def shift_energy_axis(
+        df: Union[pd.DataFrame, dask.dataframe.DataFrame],
+        columns: Union[str,Sequence[str]],
+        signs: Union[int,Sequence[int]],
+        energy_column: str = None,
+        mode: Union[str,Sequence[str]] = "direct",
+        window: float = None,
+        sigma: float = 2,
+        rolling_group_channel: str = None,
+        config: dict = None, 
+) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+    """ Apply an energy shift to the given column(s).
+
+    Args: 
+        df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
+        energy_column (str): Name of the column containing the energy values.
+        column_name (Union[str,Sequence[str]]): Name of the column(s) to apply the shift to.
+        sign (Union[int,Sequence[int]]): Sign of the shift to apply. (+1 or -1)
+        mode (str): The mode of the shift. One of 'direct', 'average' or rolled.
+            if rolled, window and sigma must be given.
+        config (dict): Configuration dictionary.
+        **kwargs: Additional arguments for the rolling average function.
+    """
+    if energy_column is None:
+        if config is None:
+            raise ValueError("Either energy_column or config must be given.")
+        energy_column = config["dataframe"]["energy_column"]
+
+    if isinstance(columns,str):
+        columns=[columns]
+    if isinstance(signs,int):
+        signs=[signs]
+    if isinstance(mode,str):
+        mode = [mode] * len(columns)
+    if len(columns) != len(signs):
+        raise ValueError("column_name and sign must have the same length.")
+    with ProgressBar(minimum=5,):
+        if mode == "rolled":
+            if window is None:
+                if config is None:
+                    raise ValueError("Either window or config must be given.")
+                window = config["dataframe"]["rolling_window"]
+            if sigma is None:
+                if config is None:
+                    raise ValueError("Either sigma or config must be given.")
+                sigma = config["dataframe"]["rolling_sigma"]
+            if rolling_group_channel is None:
+                if config is None:
+                    raise ValueError("Either rolling_group_channel or config must be given.")
+                rolling_group_channel = config["dataframe"].get("rolling_group_channel",None)
+            if rolling_group_channel is None:
+                raise ValueError("T f mode is 'rolled', rolling_group_channel must be"
+                                 "given or present in config.")
+            print('rolling averages...')
+            df = rolling_average_on_acquisition_time(
+                df,
+                rolling_group_channel=rolling_group_channel,
+                columns=columns,
+                window=window,
+                sigma=sigma,
+            )
+        for col, s, m in zip(columns,signs, mode):
+            s = s/np.abs(s) # enusre s is either +1 or -1
+            if m == "rolled":
+                col = col + '_rolled'
+            if m == "direct" or m == "rolled":
+                df[col] = df.map_partitions(
+                    lambda x: x[col] + s * x[energy_column], meta=(col, np.float32)
+                )
+            elif m == 'mean':
+                print('computing means...')
+                col_mean = df[col].mean()
+                df[col] = df.map_partitions(
+                    lambda x: x[col] + s * (x[energy_column] - col_mean), meta=(col, np.float32)
+                )
+            else:
+                raise ValueError(f"mode must be one of 'direct', 'mean' or 'rolled'. Got {m}.")
+    metadata: dict[str,Any] = {
+        "applied": True,
+        "energy_column": energy_column,
+        "column_name": columns,
+        "sign": signs,
+    }    
+    return df, metadata
+
+
 def step2ns(
         df: Union[pd.DataFrame, dask.dataframe.DataFrame],
         tof_column: str, 
@@ -172,4 +312,4 @@ def step2ns(
         Union[pd.DataFrame, dask.dataframe.DataFrame]: Dataframe with the new column.
     """
     val = df[tof_column].astype(dtype) * tof_binwidth * 2**tof_binning
-    return val.astype(dtype)
+    return val.astype(dtype)
diff --git a/sed/core/dfops.py b/sed/core/dfops.py
@@ -3,6 +3,8 @@
 """
 # Note: some of the functions presented here were
 # inspired by https://github.com/mpes-kit/mpes
+from typing import Any
+from typing import Dict
 from typing import Callable
 from typing import Sequence
 from typing import Union
@@ -193,3 +195,138 @@ def forward_fill_partition(df):
             after=0,
         )
     return df
+
+
+def rolling_average_on_acquisition_time(
+        df: Union[pd.DataFrame, dask.dataframe.DataFrame], 
+        rolling_group_channel: str,
+        columns: str = None, 
+        window: float = None, 
+        sigma: float = 2,
+        config: dict = None,
+
+) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+    """ Perform a rolling average with a gaussian weighted window.
+
+    In order to preserve the number of points, the first and last "widnow"
+    number of points are substituted with the original signal.
+    # TODO: this is currently very slow, and could do with a remake.
+
+    Args:
+        df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
+        group_channel: (str): Name of the column on which to group the data
+        cols (str): Name of the column on which to perform the rolling average
+        window (float): Size of the rolling average window
+        sigma (float): number of standard deviations for the gaussian weighting of the window. 
+            a value of 2 corresponds to a gaussian with sigma equal to half the window size.
+            Smaller values reduce the weighting in the window frame.
+
+    Returns:
+        Union[pd.DataFrame, dask.dataframe.DataFrame]: Dataframe with the new columns.
+    """
+    if rolling_group_channel is None:
+        if config is None:
+            raise ValueError("Either group_channel or config must be given.")
+        rolling_group_channel = config["dataframe"]["rolling_group_channel"]        
+    with ProgressBar():
+        print(f'rolling average over {columns}...')
+        if isinstance(columns,str):
+            columns=[columns]
+        df_ = df.groupby(rolling_group_channel).agg({c:'mean' for c in columns}).compute()
+        df_['dt'] = pd.to_datetime(df_.index, unit='s')
+        df_['ts'] = df_.index
+        for c in columns:
+            df_[c+'_rolled'] = df_[c].interpolate(
+                method='nearest'
+            ).rolling(
+                window,center=True,win_type='gaussian'
+            ).mean(
+                std=window/sigma
+            ).fillna(df_[c])
+            df_ = df_.drop(c, axis=1)
+            if c+'_rolled' in df.columns:
+                df = df.drop(c+'_rolled',axis=1)
+    return df.merge(df_,left_on='timeStamp',right_on='ts').drop(['ts','dt'], axis=1)
+
+
+def apply_energy_shift(
+        df: Union[pd.DataFrame, dask.dataframe.DataFrame],
+        columns: Union[str,Sequence[str]],
+        signs: Union[int,Sequence[int]],
+        energy_column: str = None,
+        mode: Union[str,Sequence[str]] = "direct",
+        window: float = None,
+        sigma: float = 2,
+        rolling_group_channel: str = None,
+        config: dict = None, 
+) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+    """ Apply an energy shift to the given column(s).
+
+    Args: 
+        df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
+        energy_column (str): Name of the column containing the energy values.
+        column_name (Union[str,Sequence[str]]): Name of the column(s) to apply the shift to.
+        sign (Union[int,Sequence[int]]): Sign of the shift to apply. (+1 or -1)
+        mode (str): The mode of the shift. One of 'direct', 'average' or rolled.
+            if rolled, window and sigma must be given.
+        config (dict): Configuration dictionary.
+        **kwargs: Additional arguments for the rolling average function.
+    """
+    if energy_column is None:
+        if config is None:
+            raise ValueError("Either energy_column or config must be given.")
+        energy_column = config["dataframe"]["energy_column"]
+    if isinstance(columns,str):
+        columns=[columns]
+    if isinstance(signs,int):
+        signs=[signs]
+    if isinstance(mode,str):
+        mode = [mode] * len(columns)
+    if len(columns) != len(signs):
+        raise ValueError("column_name and sign must have the same length.")
+    with ProgressBar(minimum=5,):
+        if mode == "rolled":
+            if window is None:
+                if config is None:
+                    raise ValueError("Either window or config must be given.")
+                window = config["dataframe"]["rolling_window"]
+            if sigma is None:
+                if config is None:
+                    raise ValueError("Either sigma or config must be given.")
+                sigma = config["dataframe"]["rolling_sigma"]
+            if rolling_group_channel is None:
+                if config is None:
+                    raise ValueError("Either rolling_group_channel or config must be given.")
+                rolling_group_channel = config["dataframe"]["rolling_group_channel"]
+            print('rolling averages...')
+            df = rolling_average_on_acquisition_time(
+                df,
+                rolling_group_channel=rolling_group_channel,
+                columns=columns,
+                window=window,
+                sigma=sigma,
+            )
+        for col, s, m in zip(columns,signs, mode):
+            s = s/np.abs(s) # enusre s is either +1 or -1
+            if m == "rolled":
+                col = col + '_rolled'
+            if m == "direct" or m == "rolled":
+                df[col] = df.map_partitions(
+                    lambda x: x[col] + s * x[energy_column], meta=(col, np.float32)
+                )
+            elif m == 'mean':
+                print('computing means...')
+                col_mean = df[col].mean()
+                df[col] = df.map_partitions(
+                    lambda x: x[col] + s * (x[energy_column] - col_mean), meta=(col, np.float32)
+                )
+            else:
+                raise ValueError(f"mode must be one of 'direct', 'mean' or 'rolled'. Got {m}.")
+    metadata: dict[str,Any] = {
+        "applied": True,
+        "energy_column": energy_column,
+        "column_name": columns,
+        "sign": signs,
+    }    
+    return df, metadata
+
diff --git a/sed/core/processor.py b/sed/core/processor.py
@@ -1181,6 +1181,37 @@ def calibrate_delay_axis(
             else:
                 print(self._dataframe)
 
+    def shift_energy_axis(
+        self,
+        columns: Union[str,Sequence[str]],
+        signs: Union[int,Sequence[int]],
+        mode: Union[str,Sequence[str]] = "direct",
+        window: float = None,
+        sigma: float = 2,
+        rolling_group_channel: str = None,
+    ) -> None:
+        energy_column = self._config["dataframe"]["energy_column"]
+        if energy_column not in self._dataframe.columns:
+            raise ValueError(
+                f"Energy column {energy_column} not found in dataframe! "
+                "Run energy calibration first",
+            )
+        self._dataframe, metadata = hextof.shift_energy_axis(
+            df=self._dataframe,
+            columns=columns,
+            signs=signs,
+            mode=mode,
+            window=window,
+            sigma=sigma,
+            rolling_group_channel=rolling_group_channel,
+            config=self._config,
+        )
+        self._attributes.add(
+            metadata,
+            "shift_energy_axis",
+            duplicate_policy="raise",
+        )
+
     def add_jitter(self, cols: Sequence[str] = None):
         """Add jitter to the selected dataframe columns.