From 9f0998446b46362727c8c1bf16d1ce3f5ef2b50a Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 31 Oct 2023 00:00:29 +0100
Subject: [PATCH 01/11] basic function for adding time-stamped data to
 dataframe

---
 sed/core/dfops.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/sed/core/dfops.py b/sed/core/dfops.py
index 975f8c1e..93bfced2 100644
--- a/sed/core/dfops.py
+++ b/sed/core/dfops.py
@@ -112,6 +112,45 @@ def apply_filter(
     return out_df
 
 
+def add_time_stamped_data(
+    df: Union[pd.DataFrame, dask.dataframe.DataFrame],
+    time_stamps: np.ndarray,
+    data: np.ndarray,
+    dest_column: str,
+    time_stamp_column: str,
+    **kwds,
+) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+    """Add data in form of timestamp/value pairs to the dataframe using interpolation to the
+    timestamps in the dataframe.
+
+    Args:
+        df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
+        time_stamps (np.ndarray): Time stamps of the values to add
+        data (np.ndarray): Values corresponding at the time stamps in time_stamps
+        dest_column (str): destination column name
+        time_stamp_column (str): Time stamp column name
+
+    Returns:
+        Union[pd.DataFrame, dask.dataframe.DataFrame]: Dataframe with added column
+    """
+    if time_stamp_column not in df.columns:
+        raise ValueError(f"{time_stamp_column} not found in dataframe!")
+
+    if len(time_stamps) != len(data):
+        raise ValueError("time_stamps and data have to be of same length!")
+
+    def interpolate_timestamps(
+        df: Union[pd.DataFrame, dask.dataframe.DataFrame],
+    ) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+        df_timestamps = df[time_stamp_column]
+        df[dest_column] = np.interp(df_timestamps, time_stamps, data)
+        return df
+
+    df = df.map_partitions(interpolate_timestamps, **kwds)
+
+    return df
+
+
 def map_columns_2d(
     df: Union[pd.DataFrame, dask.dataframe.DataFrame],
     map_2d: Callable,

From 5c352dd61d7cd093bfa820fb381c37bb1331453b Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 31 Oct 2023 23:08:55 +0100
Subject: [PATCH 02/11] add tests, and limit function to only work with dask
 dataframes because it uses map_partitions

---
 sed/core/dfops.py   | 11 ++++++----
 tests/test_dfops.py | 49 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/sed/core/dfops.py b/sed/core/dfops.py
index 93bfced2..33198ef1 100644
--- a/sed/core/dfops.py
+++ b/sed/core/dfops.py
@@ -113,13 +113,13 @@ def apply_filter(
 
 
 def add_time_stamped_data(
-    df: Union[pd.DataFrame, dask.dataframe.DataFrame],
+    df: dask.dataframe.DataFrame,
     time_stamps: np.ndarray,
     data: np.ndarray,
     dest_column: str,
     time_stamp_column: str,
     **kwds,
-) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+) -> dask.dataframe.DataFrame:
     """Add data in form of timestamp/value pairs to the dataframe using interpolation to the
     timestamps in the dataframe.
 
@@ -140,12 +140,15 @@ def add_time_stamped_data(
         raise ValueError("time_stamps and data have to be of same length!")
 
     def interpolate_timestamps(
-        df: Union[pd.DataFrame, dask.dataframe.DataFrame],
-    ) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
+        df: dask.dataframe.DataFrame,
+    ) -> dask.dataframe.DataFrame:
         df_timestamps = df[time_stamp_column]
         df[dest_column] = np.interp(df_timestamps, time_stamps, data)
         return df
 
+    if not isinstance(df, dask.dataframe.DataFrame):
+        raise ValueError("This function only works for Dask Dataframes!")
+
     df = df.map_partitions(interpolate_timestamps, **kwds)
 
     return df
diff --git a/tests/test_dfops.py b/tests/test_dfops.py
index 3425003c..b41dd8ab 100644
--- a/tests/test_dfops.py
+++ b/tests/test_dfops.py
@@ -1,10 +1,13 @@
 """This file contains code that performs several tests for the dfops functions
 """
+import datetime as dt
+
 import dask.dataframe as ddf
 import numpy as np
 import pandas as pd
 import pytest
 
+from sed.core.dfops import add_time_stamped_data
 from sed.core.dfops import apply_filter
 from sed.core.dfops import apply_jitter
 from sed.core.dfops import backward_fill_lazy
@@ -56,6 +59,52 @@ def test_apply_filter():
     assert np.all(df_filtered[colname] < upper_bound)
 
 
+def test_add_time_stamped_data():
+    """Test the addition of time-stamped data to the df."""
+    df_ts = df
+    time_stamp = dt.datetime.now().timestamp()
+    df_ts["timeStamps"] = time_stamp + np.linspace(0, 100, N_PTS)
+    data = np.linspace(0, 1, 20)
+    time_stamps = time_stamp + np.linspace(0, 100, 20)
+    with pytest.raises(ValueError):
+        add_time_stamped_data(
+            df=df_ts,
+            time_stamps=time_stamps,
+            data=data,
+            dest_column="time_stamped_data",
+            time_stamp_column="timeStamps",
+        )
+    dd_ts = ddf.from_pandas(df_ts, npartitions=N_PARTITIONS)
+    with pytest.raises(ValueError):
+        add_time_stamped_data(
+            df=dd_ts,
+            time_stamps=time_stamps,
+            data=data,
+            dest_column="time_stamped_data",
+            time_stamp_column="invalidColumn",
+        )
+    dd_ts = add_time_stamped_data(
+        df=dd_ts,
+        time_stamps=time_stamps,
+        data=data,
+        dest_column="time_stamped_data",
+        time_stamp_column="timeStamps",
+    )
+    assert "time_stamped_data" in dd_ts
+    res = dd_ts["time_stamped_data"].compute().values
+    assert res[0] == 0
+    assert res[-1] == 1
+    with pytest.raises(ValueError):
+        data = np.linspace(0, 1, 19)
+        add_time_stamped_data(
+            df=dd_ts,
+            time_stamps=time_stamps,
+            data=data,
+            dest_column="time_stamped_data",
+            time_stamp_column="timeStamps",
+        )
+
+
 def test_map_columns_2d():
     """Test mapping of a 2D-function onto the df."""
 

From 06a20158e5bf143ab43ae856ea5e639c79355bea Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Wed, 1 Nov 2023 21:35:51 +0100
Subject: [PATCH 03/11] move determination of start and end time stamps and
 archiver data extraction into seperate functions

---
 sed/loader/mpes/loader.py | 88 ++++++++++++++++++++++++++-------------
 1 file changed, 59 insertions(+), 29 deletions(-)

diff --git a/sed/loader/mpes/loader.py b/sed/loader/mpes/loader.py
index b145cb0d..23b9cf48 100644
--- a/sed/loader/mpes/loader.py
+++ b/sed/loader/mpes/loader.py
@@ -441,6 +441,34 @@ def get_elapsed_time(
     return secs
 
 
+def get_archiver_data(
+    archiver_url: str,
+    archiver_channel: str,
+    ts_from: float,
+    ts_to: float,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Extract time stamps and corresponding data from and EPICS archiver instance
+
+    Args:
+        archiver_url (str): URL of the archiver data extraction interface
+        archiver_channel (str): EPICS channel to extract data for
+        ts_from (float): starting time stamp of the range of interest
+        ts_to (float): ending time stamp of the range of interest
+
+    Returns:
+        Tuple[List, List]: The extracted time stamps and corresponding data
+    """
+    iso_from = datetime.datetime.utcfromtimestamp(ts_from).isoformat()
+    iso_to = datetime.datetime.utcfromtimestamp(ts_to).isoformat()
+    req_str = archiver_url + archiver_channel + "&from=" + iso_from + "Z&to=" + iso_to + "Z"
+    with urllib.request.urlopen(req_str) as req:
+        data = json.load(req)
+        secs = [x["secs"] + x["nanos"] * 1e-9 for x in data[0]["data"]]
+        vals = [x["val"] for x in data[0]["data"]]
+
+    return (np.asarray(secs), np.asarray(vals))
+
+
 class MpesLoader(BaseLoader):
     """Mpes implementation of the Loader. Reads from h5 files or folders of the
     SPECS Metis 1000 (FHI Berlin)
@@ -645,6 +673,28 @@ def get_files_from_run_id(
         # Return the list of found files
         return files
 
+    def get_start_and_end_time(self) -> Tuple[float, float]:
+        """Extract the start and end time stamps from the loaded files
+
+        Returns:
+            Tuple[float, float]: A tuple containing the start and end time stamps
+        """
+        h5file = h5py.File(self.files[0])
+        timestamps = hdf5_to_array(
+            h5file,
+            group_names=self._config["dataframe"]["hdf5_groupnames"],
+            time_stamps=True,
+        )
+        ts_from = timestamps[-1][1]
+        h5file = h5py.File(self.files[-1])
+        timestamps = hdf5_to_array(
+            h5file,
+            group_names=self._config["dataframe"]["hdf5_groupnames"],
+            time_stamps=True,
+        )
+        ts_to = timestamps[-1][-1]
+        return (ts_from, ts_to)
+
     def gather_metadata(
         self,
         files: Sequence[str],
@@ -666,21 +716,7 @@ def gather_metadata(
         print("Gathering metadata from different locations")
         # Read events in with ms time stamps
         print("Collecting time stamps...")
-
-        h5file = h5py.File(files[0])
-        timestamps = hdf5_to_array(
-            h5file,
-            group_names=self._config["dataframe"]["hdf5_groupnames"],
-            time_stamps=True,
-        )
-        ts_from = timestamps[-1][1]
-        h5file = h5py.File(files[-1])
-        timestamps = hdf5_to_array(
-            h5file,
-            group_names=self._config["dataframe"]["hdf5_groupnames"],
-            time_stamps=True,
-        )
-        ts_to = timestamps[-1][-1]
+        (ts_from, ts_to) = self.get_start_and_end_time()
 
         metadata["timing"] = {
             "acquisition_start": datetime.datetime.utcfromtimestamp(ts_from)
@@ -709,28 +745,22 @@ def gather_metadata(
 
         print("Collecting data from the EPICS archive...")
         # Get metadata from Epics archive if not present already
-        start = datetime.datetime.utcfromtimestamp(ts_from).isoformat()
-        end = datetime.datetime.utcfromtimestamp(ts_to).isoformat()
         epics_channels = self._config["metadata"]["epics_pvs"]
 
+        start = datetime.datetime.utcfromtimestamp(ts_from).isoformat()
+
         channels_missing = set(epics_channels) - set(
             metadata["file"].keys(),
         )
         for channel in channels_missing:
             try:
-                req_str = (
-                    "http://aa0.fhi-berlin.mpg.de:17668/retrieval/data/getData.json?pv="
-                    + channel
-                    + "&from="
-                    + start
-                    + "Z&to="
-                    + end
-                    + "Z"
+                _, vals = get_archiver_data(
+                    archiver_url=self._config["metadata"].get("archiver_url"),
+                    archiver_channel=channel,
+                    ts_from=ts_from,
+                    ts_to=ts_to,
                 )
-                with urllib.request.urlopen(req_str) as req:
-                    data = json.load(req)
-                    vals = [x["val"] for x in data[0]["data"]]
-                    metadata["file"][f"{channel}"] = np.mean(vals)
+                metadata["file"][f"{channel}"] = np.mean(vals)
 
             except IndexError:
                 metadata["file"][f"{channel}"] = np.nan

From 685bcbc599fa80c753d161cbcb18840aa799789b Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Wed, 1 Nov 2023 21:50:56 +0100
Subject: [PATCH 04/11] add processor function to add time-stamped data either
 from directly provided data or from data extracted from an EPICS archiver
 instance, and add tests for it

---
 sed/config/mpes_example_config.yaml |  2 +
 sed/core/processor.py               | 62 +++++++++++++++++++++++++++++
 tests/test_processor.py             | 27 +++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml
index 5b9eca7b..b3e47670 100644
--- a/sed/config/mpes_example_config.yaml
+++ b/sed/config/mpes_example_config.yaml
@@ -216,6 +216,8 @@ histogram:
   ranges: [[0, 1800], [0, 1800], [128000, 138000], [0, 32000]]
 
 metadata:
+  # URL of the epics archiver request engine
+  archiver_url: "http://aa0.fhi-berlin.mpg.de:17668/retrieval/data/getData.json?pv="
   # EPICS channels to collect from EPICS archiver
   epics_pvs: ["KTOF:Lens:Extr:I", "trARPES:Carving:TEMP_RBV", "trARPES:XGS600:PressureAC:P_RD", "KTOF:Lens:UDLD:V", "KTOF:Lens:Sample:V", "KTOF:Apertures:m1.RBV", "KTOF:Apertures:m2.RBV", "KTOF:Apertures:m3.RBV", "trARPES:Carving:TRX.RBV", "trARPES:Carving:TRY.RBV", "trARPES:Carving:TRZ.RBV", "trARPES:Carving:THT.RBV", "trARPES:Carving:PHI.RBV", "trARPES:Carving:OMG.RBV"]
   # hdf5 attribute containing the field aperture "in" motor position
diff --git a/sed/core/processor.py b/sed/core/processor.py
index 3b5cc7a1..0b60e323 100644
--- a/sed/core/processor.py
+++ b/sed/core/processor.py
@@ -26,6 +26,7 @@
 from sed.core.config import parse_config
 from sed.core.config import save_config
 from sed.core.dfops import apply_filter
+from sed.core.dfops import add_time_stamped_data
 from sed.core.dfops import apply_jitter
 from sed.core.metadata import MetaHandler
 from sed.diagnostics import grid_histogram
@@ -34,6 +35,8 @@
 from sed.io import to_tiff
 from sed.loader import CopyTool
 from sed.loader import get_loader
+from sed.loader.mpes.loader import get_archiver_data
+from sed.loader.mpes.loader import MpesLoader
 
 N_CPU = psutil.cpu_count()
 
@@ -1715,6 +1718,65 @@ def add_jitter(
             metadata.append(col)
         self._attributes.add(metadata, "jittering", duplicate_policy="append")
 
+    def add_time_stamped_data(
+        self,
+        dest_column: str,
+        time_stamps: np.ndarray = None,
+        data: np.ndarray = None,
+        archiver_channel: str = None,
+        **kwds,
+    ):
+        """Add data in form of timestamp/value pairs to the dataframe using interpolation to the
+        timestamps in the dataframe. The time-stamped data can either be provided, or fetched from
+        an EPICS archiver instance.
+
+        Args:
+            dest_column (str): destination column name
+            time_stamps (np.ndarray, optional): Time stamps of the values to add. If omitted,
+                time stamps are retrieved from the epics archiver
+            data (np.ndarray, optional): Values corresponding at the time stamps in time_stamps.
+                If omitted, data are retrieved from the epics archiver.
+            archiver_channel (str, optional): EPICS archiver channel from which to retrieve data.
+                Either this or data and time_stamps have to be present.
+            **kwds: additional keyword arguments passed to add_time_stamped_data
+        """
+        time_stamp_column = kwds.pop(
+            "time_stamp_column",
+            self._config["dataframe"].get("time_stamp_alias", ""),
+        )
+
+        if time_stamps is None and data is None:
+            if archiver_channel is None:
+                raise ValueError(
+                    "Either archiver_channel or both time_stamps and data have to be present!",
+                )
+            if self.loader.__name__ != "mpes":
+                raise NotImplementedError(
+                    "This function is currently only implemented for the mpes loader!",
+                )
+            ts_from, ts_to = cast(MpesLoader, self.loader).get_start_and_end_time()
+            # get channel data with +-5 seconds safety margin
+            time_stamps, data = get_archiver_data(
+                archiver_url=self._config["metadata"].get("archiver_url", ""),
+                archiver_channel=archiver_channel,
+                ts_from=ts_from - 5,
+                ts_to=ts_to + 5,
+            )
+
+        self._dataframe = add_time_stamped_data(
+            self._dataframe,
+            time_stamps=time_stamps,
+            data=data,
+            dest_column=dest_column,
+            time_stamp_column=time_stamp_column,
+            **kwds,
+        )
+        metadata: List[Any] = []
+        metadata.append(dest_column)
+        metadata.append(time_stamps)
+        metadata.append(data)
+        self._attributes.add(metadata, "time_stamped_data", duplicate_policy="append")
+
     def pre_binning(
         self,
         df_partitions: int = 100,
diff --git a/tests/test_processor.py b/tests/test_processor.py
index 01097670..701cbce3 100644
--- a/tests/test_processor.py
+++ b/tests/test_processor.py
@@ -726,6 +726,33 @@ def test_add_jitter():
     np.testing.assert_allclose(res1a, res2a)
 
 
+def test_add_time_stamped_data():
+    """Test the function to add time-stamped data"""
+    processor = SedProcessor(
+        folder=df_folder + "../mpes/",
+        config=package_dir + "/config/mpes_example_config.yaml",
+        folder_config={},
+        user_config={},
+        system_config={},
+        time_stamps=True,
+    )
+    df_ts = processor.dataframe.timeStamps.compute().values
+    data = np.linspace(0, 1, 20)
+    time_stamps = np.linspace(df_ts[0], df_ts[-1], 20)
+    processor.add_time_stamped_data(
+        time_stamps=time_stamps,
+        data=data,
+        dest_column="time_stamped_data",
+    )
+    assert "time_stamped_data" in processor.dataframe
+    res = processor.dataframe["time_stamped_data"].compute().values
+    assert res[0] == 0
+    assert res[-1] == 1
+    assert processor.attributes["time_stamped_data"][0] == "time_stamped_data"
+    np.testing.assert_array_equal(processor.attributes["time_stamped_data"][1], time_stamps)
+    np.testing.assert_array_equal(processor.attributes["time_stamped_data"][2], data)
+
+
 def test_event_histogram():
     """Test histogram plotting function"""
     config = parse_config(

From d4684dce89e5228366514e131fec2382599271ab Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Sun, 5 Nov 2023 23:12:54 +0100
Subject: [PATCH 05/11] fix time stamps of timed data frame in mpes loader

---
 sed/loader/mpes/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sed/loader/mpes/loader.py b/sed/loader/mpes/loader.py
index 23b9cf48..da10d8ef 100644
--- a/sed/loader/mpes/loader.py
+++ b/sed/loader/mpes/loader.py
@@ -369,7 +369,7 @@ def hdf5_to_timed_array(
             # need to correct for the time it took to write the file
             start_time -= len(ms_marker) / 1000
 
-        time_stamp_data = start_time + ms_marker / 1000
+        time_stamp_data = start_time + np.arange(len(ms_marker)) / 1000
 
         data_list.append(time_stamp_data)
 

From a732be61d267cc1e1b9491d114213e130610e820 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Sun, 5 Nov 2023 23:13:20 +0100
Subject: [PATCH 06/11] add time-stamped data to timed dataframe

---
 sed/core/processor.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sed/core/processor.py b/sed/core/processor.py
index 0b60e323..1233dc1b 100644
--- a/sed/core/processor.py
+++ b/sed/core/processor.py
@@ -1771,6 +1771,16 @@ def add_time_stamped_data(
             time_stamp_column=time_stamp_column,
             **kwds,
         )
+        if self._timed_dataframe is not None:
+            if time_stamp_column in self._timed_dataframe:
+                self._timed_dataframe = add_time_stamped_data(
+                    self._timed_dataframe,
+                    time_stamps=time_stamps,
+                    data=data,
+                    dest_column=dest_column,
+                    time_stamp_column=time_stamp_column,
+                    **kwds,
+                )
         metadata: List[Any] = []
         metadata.append(dest_column)
         metadata.append(time_stamps)

From aa9f91293b1ef367057375a5f10b199a73ae2692 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Sun, 12 Nov 2023 23:19:41 +0100
Subject: [PATCH 07/11] use independent loader for energy corrector class
 correct k-distance for k calibration

---
 sed/core/processor.py                                        | 5 ++++-
 ...rsion_pipeline_for_example_time-resolved_ARPES_data.ipynb | 2 +-
 tutorial/sed_config.yaml                                     | 5 +++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/sed/core/processor.py b/sed/core/processor.py
index 1233dc1b..dc879fcd 100644
--- a/sed/core/processor.py
+++ b/sed/core/processor.py
@@ -122,7 +122,10 @@ def __init__(
         )
 
         self.ec = EnergyCalibrator(
-            loader=self.loader,
+            loader=get_loader(
+                loader_name=loader_name,
+                config=self._config,
+                ),
             config=self._config,
         )
 
diff --git a/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb b/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb
index b920bd20..6c39b602 100644
--- a/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb
+++ b/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb
@@ -273,7 +273,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_distance = 4/3*np.pi/3.28\n",
+    "k_distance = 2/np.sqrt(3)*np.pi/3.28 # k-distance of the K-point in a hexagonal Brilloiun zone\n",
     "#sp.calibrate_momentum_axes(k_distance = k_distance)\n",
     "point_a = [308, 345]\n",
     "sp.calibrate_momentum_axes(point_a=point_a, k_distance = k_distance, apply=True)\n",
diff --git a/tutorial/sed_config.yaml b/tutorial/sed_config.yaml
index 5577b42e..30888f19 100644
--- a/tutorial/sed_config.yaml
+++ b/tutorial/sed_config.yaml
@@ -13,12 +13,13 @@ energy:
     diameter: 3000.0
     gamma: 920.0
     sigma: 700.0
+  offset: {}
 momentum:
   calibration:
     cstart: -256.0
     cstep: 3.9921875
-    kx_scale: 0.012389400615413859
-    ky_scale: 0.012389400615413859
+    kx_scale: 0.010729535670610963
+    ky_scale: 0.010729535670610963
     rstart: -256.0
     rstep: 3.9921875
     x_center: 256.0

From b9ff427c0e66403f86359da3888273298c5c8510 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Sun, 12 Nov 2023 23:31:26 +0100
Subject: [PATCH 08/11] fix linting

---
 sed/core/processor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sed/core/processor.py b/sed/core/processor.py
index dc879fcd..22c5abf1 100644
--- a/sed/core/processor.py
+++ b/sed/core/processor.py
@@ -125,7 +125,7 @@ def __init__(
             loader=get_loader(
                 loader_name=loader_name,
                 config=self._config,
-                ),
+            ),
             config=self._config,
         )
 

From bdd9e897368e3bac6e1a5b71179e6efd2be94479 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 20 Nov 2023 22:54:45 +0100
Subject: [PATCH 09/11] fix missing endpoint in bin_ranges in momentum
 corrector

---
 sed/calibrator/momentum.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sed/calibrator/momentum.py b/sed/calibrator/momentum.py
index 7fb585de..995d9613 100644
--- a/sed/calibrator/momentum.py
+++ b/sed/calibrator/momentum.py
@@ -173,7 +173,7 @@ def load_data(
                 self.bin_ranges.append(
                     (
                         data.coords[axis][0].values,
-                        data.coords[axis][-1].values,
+                        2 * data.coords[axis][-1].values - data.coords[axis][-2].values,  # endpoint
                     ),
                 )
         else:

From dee63664c729775e92f4cedfd0a4767a733f9cb4 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 20 Nov 2023 22:55:18 +0100
Subject: [PATCH 10/11] update momentum scaling

---
 sed/config/mpes_example_config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml
index b3e47670..95f93218 100644
--- a/sed/config/mpes_example_config.yaml
+++ b/sed/config/mpes_example_config.yaml
@@ -156,9 +156,9 @@ momentum:
   # default momentum calibration
   calibration:
     # x momentum scaleing factor
-    kx_scale: 0.012389400615413859
+    kx_scale: 0.010729535670610963
     # y momentum scaleing factor
-    ky_scale: 0.012389400615413859
+    ky_scale: 0.010729535670610963
     # x BZ center pixel
     x_center: 256.0
     # y BZ center pixel

From 5dc117a790425c0df4d0560a3f751baf0851bfa1 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 21 Nov 2023 10:30:39 +0100
Subject: [PATCH 11/11] add notebook and documentation

---
 .github/workflows/documentation.yml           |   3 +-
 docs/index.rst                                |   1 +
 .../6_binning_with_time-stamped_data.ipynb    | 352 ++++++++++++++++++
 tutorial/sed_config.yaml                      |  32 +-
 4 files changed, 371 insertions(+), 17 deletions(-)
 create mode 100644 tutorial/6_binning_with_time-stamped_data.ipynb

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 162abb2c..7dc25f70 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -74,12 +74,13 @@ jobs:
       #     path: $GITHUB_WORKSPACE/_build
       #     key: ${{ runner.os }}-docs
 
-      - name: download WSe2 data
+      - name: download RAW data
         # if: steps.cache-primes.outputs.cache-hit != 'true'
         run: |
           cd $GITHUB_WORKSPACE/docs/tutorial
           curl -L --output ./WSe2.zip https://zenodo.org/record/6369728/files/WSe2.zip
           unzip -o ./WSe2.zip -d .
+          curl -L --output ./TaS2.zip https://zenodo.org/records/10160182/files/TaS2.zip
 
       - name: build Sphinx docs
         run: poetry run sphinx-build -b html $GITHUB_WORKSPACE/docs $GITHUB_WORKSPACE/_build
diff --git a/docs/index.rst b/docs/index.rst
index 85e55254..bb7ad38b 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,6 +9,7 @@ Single-Event DataFrame (SED) documentation
    tutorial/1_binning_fake_data
    tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data
    tutorial/3_metadata_collection_and_export_to_NeXus
+   tutorial/6_binning_with_time-stamped_data
 
 .. toctree::
    :maxdepth: 1
diff --git a/tutorial/6_binning_with_time-stamped_data.ipynb b/tutorial/6_binning_with_time-stamped_data.ipynb
new file mode 100644
index 00000000..930903f4
--- /dev/null
+++ b/tutorial/6_binning_with_time-stamped_data.ipynb
@@ -0,0 +1,352 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "8ad4167a-e4e7-498d-909a-c04da9f177ed",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Binning of temperature-dependent ARPES data using time-stamped external temperature data\n",
+    "In this example, we pull some temperature-dependent ARPES data from Zenodo, which was recorded as a continous temperture ramp. We then add the respective temperature informtion from the respective timestamp/temperature values to the dataframe, and bin the data as function of temperature\n",
+    "For performance reasons, best store the data on a locally attached storage (no network drive). This can also be achieved transparently using the included MirrorUtil class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb045e17-fa89-4c11-9d51-7f06e80d96d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import os\n",
+    "import time\n",
+    "import glob\n",
+    "\n",
+    "import sed\n",
+    "\n",
+    "%matplotlib widget"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "42a6afaa-17dd-4637-ba75-a28c4ead1adf",
+   "metadata": {},
+   "source": [
+    "# Load Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34f46d54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_path = './' # Put in Path to a storage of at least 20 Gbyte free space.\n",
+    "if not os.path.exists(data_path + \"/TaS2.zip\"):\n",
+    "    os.system(f\"curl -L --output {data_path}/TaS2.zip https://zenodo.org/records/10160182/files/TaS2.zip\")\n",
+    "if not os.path.isdir(data_path + \"/Scan0121_1\") or not os.path.isdir(data_path + \"/energycal_2020_07_20/\") or not os.path.isfile(data_path + \"/temperature_data.h5\"):\n",
+    "    os.system(f\"unzip -d {data_path} -o {data_path}/TaS2.zip\")\n",
+    "\n",
+    "# correct timestamps if not correct timezone set\n",
+    "tzoffset = os.path.getmtime(data_path + '/Scan0121_1/Scan0121_1.h5') - 1594998158.0\n",
+    "if tzoffset:\n",
+    "    for file in glob.glob(data_path + '/Scan0121_1/*.h5'):\n",
+    "        os.utime(file, (os.path.getmtime(file)-tzoffset, os.path.getmtime(file)-tzoffset))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1f82054",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The Scan directory\n",
+    "fdir = data_path + '/Scan0121_1'\n",
+    "# create sed processor using the config file with time-stamps:\n",
+    "sp = sed.SedProcessor(folder=fdir, user_config=\"../sed/config/mpes_example_config.yaml\", time_stamps=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85ac3c83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Apply jittering to X, Y, t, ADC columns.\n",
+    "sp.add_jitter()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "76bf8aad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sp.bin_and_load_momentum_calibration(df_partitions=10, plane=33, width=3, apply=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "features = np.array([[337., 242.], [289., 327.], [187., 344.], [137., 258.], [189., 161.], [289., 158.], [236.0, 250.0]])\n",
+    "sp.define_features(features=features, rotation_symmetry=6, include_center=True, apply=True)\n",
+    "sp.generate_splinewarp(include_center=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62abfa41",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Adjust pose alignment, using stored distortion correction\n",
+    "sp.pose_adjustment(xtrans=15, ytrans=8, angle=-5, apply=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "845f002d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Apply stored momentum correction\n",
+    "sp.apply_momentum_correction()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f9ae5066",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Apply stored config momentum calibration\n",
+    "sp.apply_momentum_calibration()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb1e2bee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Apply stored config energy correction\n",
+    "sp.apply_energy_correction()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load energy calibration EDCs\n",
+    "energycalfolder = data_path + \"/energycal_2020_07_20/\"\n",
+    "scans = np.arange(127,136)\n",
+    "voltages = np.arange(22,13,-1)\n",
+    "files = [energycalfolder + r'Scan' + str(num).zfill(4) + '_1.h5' for num in scans]\n",
+    "sp.load_bias_series(data_files=files, normalize=True, biases=voltages, ranges=[(64000, 76000)])\n",
+    "rg = (65500, 66000)\n",
+    "sp.find_bias_peaks(ranges=rg, ref_id=5, infer_others=True, apply=True)\n",
+    "sp.calibrate_energy_axis(ref_energy=-0.5, ref_id=4, energy_scale=\"kinetic\", method=\"lmfit\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c470ffd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Apply stored config energy calibration\n",
+    "sp.append_energy_axis()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0943d349",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add time-stamped temperature data\n",
+    "# either, directly retrieve data from EPICS archiver instance (within FHI network),\n",
+    "#sp.add_time_stamped_data(dest_column=\"T_B\", archiver_channel=\"trARPES:Carving:TEMP-B\")\n",
+    "# or use externally provided timestamp/data pairs\n",
+    "import h5py\n",
+    "with h5py.File(\"temperature_data.h5\", \"r\") as file:\n",
+    "    data = file[\"temperatures\"][()]\n",
+    "    time_stamps = file[\"timestamps\"][()]\n",
+    "sp.add_time_stamped_data(dest_column=\"sample_temperature\", time_stamps=time_stamps, data=data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c330da64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# inspect calibrated event histogram\n",
+    "axes = ['kx', 'ky', 'energy', 'sample_temperature']\n",
+    "ranges = [[-3, 3], [-3, 3], [-6, 2], [10, 300]]\n",
+    "sp.view_event_histogram(dfpid=80, axes=axes, ranges=ranges)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "6902fd56-1456-4da6-83a4-0f3f6b831eb6",
+   "metadata": {},
+   "source": [
+    "# Define the binning ranges and compute calibrated data volume"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7601cd7-cd51-40a9-8fc7-8b7d32ff15d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "axes = ['kx', 'ky', 'energy', 'sample_temperature']\n",
+    "bins = [100, 100, 300, 100]\n",
+    "ranges = [[-2, 2], [-2, 2], [-6, 2], [20, 270]]\n",
+    "res = sp.compute(bins=bins, axes=axes, ranges=ranges, normalize_to_acquisition_time=\"sample_temperature\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "523794dc",
+   "metadata": {},
+   "source": [
+    "# Some visualization:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99d7d136-b677-4c16-bc8f-31ba8216579c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, axs = plt.subplots(4, 1, figsize=(4, 12), constrained_layout=True)\n",
+    "res.loc[{'energy':slice(-.1, 0)}].sum(axis=(2,3)).T.plot(ax=axs[0])\n",
+    "res.loc[{'kx':slice(-.2, .2)}].sum(axis=(0,3)).T.plot(ax=axs[1])\n",
+    "res.loc[{'ky':slice(-.2, .2)}].sum(axis=(1,3)).T.plot(ax=axs[2])\n",
+    "res.loc[{'kx':slice(-.2, .2), 'ky':slice(-.2, .2), 'energy':slice(-2, 0.2)}].sum(axis=(0,1)).plot(ax=axs[3])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "596a3217",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Inspect effect of histogram normalization\n",
+    "fig, ax = plt.subplots(1,1)\n",
+    "(sp._normalization_histogram/sp._normalization_histogram.sum()).plot(ax=ax)\n",
+    "(sp._binned.sum(axis=(0,1,2))/sp._binned.sum(axis=(0,1,2,3))).plot(ax=ax)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05488944",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Remaining fluctiations are an effect of the varying count rate throught the scan\n",
+    "plt.figure()\n",
+    "rate, secs = sp.loader.get_count_rate()\n",
+    "plt.plot(secs, rate)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Normalize for intensity around the Gamma point\n",
+    "res_norm = res.copy()\n",
+    "res_norm = res_norm/res_norm.loc[{'kx':slice(-.3, .3), 'ky':slice(-.3, .3)}].sum(axis=(0,1,2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, axs = plt.subplots(4, 1, figsize=(4, 12), constrained_layout=True)\n",
+    "res_norm.loc[{'energy':slice(-.1, 0)}].sum(axis=(2,3)).T.plot(ax=axs[0])\n",
+    "res_norm.loc[{'kx':slice(-.2, .2)}].sum(axis=(0,3)).T.plot(ax=axs[1])\n",
+    "res_norm.loc[{'ky':slice(-.2, .2)}].sum(axis=(1,3)).T.plot(ax=axs[2])\n",
+    "res_norm.loc[{'kx':slice(-.2, .2), 'ky':slice(-.2, .2), 'energy':slice(-2, 0.5)}].sum(axis=(0,1)).plot(ax=axs[3])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lower Hubbard band intensity versus temperature\n",
+    "plt.figure()\n",
+    "res_norm.loc[{'kx':slice(-.2, .2), 'ky':slice(-.2, .2), 'energy':slice(-.6, 0.1)}].sum(axis=(0,1,2)).plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "728003ee06929e5fa5ff815d1b96bf487266025e4b7440930c6bf4536d02d243"
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tutorial/sed_config.yaml b/tutorial/sed_config.yaml
index 30888f19..f402b251 100644
--- a/tutorial/sed_config.yaml
+++ b/tutorial/sed_config.yaml
@@ -17,29 +17,29 @@ energy:
 momentum:
   calibration:
     cstart: -256.0
-    cstep: 3.9921875
+    cstep: 4.0
     kx_scale: 0.010729535670610963
     ky_scale: 0.010729535670610963
     rstart: -256.0
-    rstep: 3.9921875
+    rstep: 4.0
     x_center: 256.0
     y_center: 256.0
   correction:
     feature_points:
-    - - 202.99667164649654
-      - 342.9841737181237
-    - - 299.87095669185146
-      - 346.1951264748602
-    - - 350.95080745426304
-      - 244.7908230308385
-    - - 305.6268110815786
-      - 150.20132111991873
-    - - 199.5398499983996
-      - 152.77801048162016
-    - - 153.40923361300395
-      - 243.06399842230255
-    - - 249.232157094759
-      - 249.2577242394875
+    - - 203.11575556771575
+      - 343.1023874450215
+    - - 299.9643115931048
+      - 346.2942034781325
+    - - 351.05271790029917
+      - 244.87949469676045
+    - - 305.76331680416877
+      - 150.31266296600884
+    - - 199.64692385066613
+      - 152.8942716287488
+    - - 153.52099335728917
+      - 243.17230043901452
+    - - 249.32627242026467
+      - 249.34641745326562
     include_center: true
     rotation_symmetry: 6
     use_center: true