From 9f0998446b46362727c8c1bf16d1ce3f5ef2b50a Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 31 Oct 2023 00:00:29 +0100 Subject: [PATCH 01/11] basic function for adding time-stamped data to dataframe --- sed/core/dfops.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/sed/core/dfops.py b/sed/core/dfops.py index 975f8c1e..93bfced2 100644 --- a/sed/core/dfops.py +++ b/sed/core/dfops.py @@ -112,6 +112,45 @@ def apply_filter( return out_df +def add_time_stamped_data( + df: Union[pd.DataFrame, dask.dataframe.DataFrame], + time_stamps: np.ndarray, + data: np.ndarray, + dest_column: str, + time_stamp_column: str, + **kwds, +) -> Union[pd.DataFrame, dask.dataframe.DataFrame]: + """Add data in form of timestamp/value pairs to the dataframe using interpolation to the + timestamps in the dataframe. + + Args: + df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use. + time_stamps (np.ndarray): Time stamps of the values to add + data (np.ndarray): Values corresponding at the time stamps in time_stamps + dest_column (str): destination column name + time_stamp_column (str): Time stamp column name + + Returns: + Union[pd.DataFrame, dask.dataframe.DataFrame]: Dataframe with added column + """ + if time_stamp_column not in df.columns: + raise ValueError(f"{time_stamp_column} not found in dataframe!") + + if len(time_stamps) != len(data): + raise ValueError("time_stamps and data have to be of same length!") + + def interpolate_timestamps( + df: Union[pd.DataFrame, dask.dataframe.DataFrame], + ) -> Union[pd.DataFrame, dask.dataframe.DataFrame]: + df_timestamps = df[time_stamp_column] + df[dest_column] = np.interp(df_timestamps, time_stamps, data) + return df + + df = df.map_partitions(interpolate_timestamps, **kwds) + + return df + + def map_columns_2d( df: Union[pd.DataFrame, dask.dataframe.DataFrame], map_2d: Callable, From 5c352dd61d7cd093bfa820fb381c37bb1331453b Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 31 Oct 2023 23:08:55 +0100 Subject: [PATCH 02/11] add tests, and limit function to only work with dask dataframes because it uses map_partitions --- sed/core/dfops.py | 11 ++++++---- tests/test_dfops.py | 49 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/sed/core/dfops.py b/sed/core/dfops.py index 93bfced2..33198ef1 100644 --- a/sed/core/dfops.py +++ b/sed/core/dfops.py @@ -113,13 +113,13 @@ def apply_filter( def add_time_stamped_data( - df: Union[pd.DataFrame, dask.dataframe.DataFrame], + df: dask.dataframe.DataFrame, time_stamps: np.ndarray, data: np.ndarray, dest_column: str, time_stamp_column: str, **kwds, -) -> Union[pd.DataFrame, dask.dataframe.DataFrame]: +) -> dask.dataframe.DataFrame: """Add data in form of timestamp/value pairs to the dataframe using interpolation to the timestamps in the dataframe. @@ -140,12 +140,15 @@ def add_time_stamped_data( raise ValueError("time_stamps and data have to be of same length!") def interpolate_timestamps( - df: Union[pd.DataFrame, dask.dataframe.DataFrame], - ) -> Union[pd.DataFrame, dask.dataframe.DataFrame]: + df: dask.dataframe.DataFrame, + ) -> dask.dataframe.DataFrame: df_timestamps = df[time_stamp_column] df[dest_column] = np.interp(df_timestamps, time_stamps, data) return df + if not isinstance(df, dask.dataframe.DataFrame): + raise ValueError("This function only works for Dask Dataframes!") + df = df.map_partitions(interpolate_timestamps, **kwds) return df diff --git a/tests/test_dfops.py b/tests/test_dfops.py index 3425003c..b41dd8ab 100644 --- a/tests/test_dfops.py +++ b/tests/test_dfops.py @@ -1,10 +1,13 @@ """This file contains code that performs several tests for the dfops functions """ +import datetime as dt + import dask.dataframe as ddf import numpy as np import pandas as pd import pytest +from sed.core.dfops import add_time_stamped_data from sed.core.dfops import apply_filter from sed.core.dfops import apply_jitter from sed.core.dfops import backward_fill_lazy @@ -56,6 +59,52 @@ def test_apply_filter(): assert np.all(df_filtered[colname] < upper_bound) +def test_add_time_stamped_data(): + """Test the addition of time-stamped data to the df.""" + df_ts = df + time_stamp = dt.datetime.now().timestamp() + df_ts["timeStamps"] = time_stamp + np.linspace(0, 100, N_PTS) + data = np.linspace(0, 1, 20) + time_stamps = time_stamp + np.linspace(0, 100, 20) + with pytest.raises(ValueError): + add_time_stamped_data( + df=df_ts, + time_stamps=time_stamps, + data=data, + dest_column="time_stamped_data", + time_stamp_column="timeStamps", + ) + dd_ts = ddf.from_pandas(df_ts, npartitions=N_PARTITIONS) + with pytest.raises(ValueError): + add_time_stamped_data( + df=dd_ts, + time_stamps=time_stamps, + data=data, + dest_column="time_stamped_data", + time_stamp_column="invalidColumn", + ) + dd_ts = add_time_stamped_data( + df=dd_ts, + time_stamps=time_stamps, + data=data, + dest_column="time_stamped_data", + time_stamp_column="timeStamps", + ) + assert "time_stamped_data" in dd_ts + res = dd_ts["time_stamped_data"].compute().values + assert res[0] == 0 + assert res[-1] == 1 + with pytest.raises(ValueError): + data = np.linspace(0, 1, 19) + add_time_stamped_data( + df=dd_ts, + time_stamps=time_stamps, + data=data, + dest_column="time_stamped_data", + time_stamp_column="timeStamps", + ) + + def test_map_columns_2d(): """Test mapping of a 2D-function onto the df.""" From 06a20158e5bf143ab43ae856ea5e639c79355bea Mon Sep 17 00:00:00 2001 From: rettigl Date: Wed, 1 Nov 2023 21:35:51 +0100 Subject: [PATCH 03/11] move determination of start and end time stamps and archiver data extraction into seperate functions --- sed/loader/mpes/loader.py | 88 ++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/sed/loader/mpes/loader.py b/sed/loader/mpes/loader.py index b145cb0d..23b9cf48 100644 --- a/sed/loader/mpes/loader.py +++ b/sed/loader/mpes/loader.py @@ -441,6 +441,34 @@ def get_elapsed_time( return secs +def get_archiver_data( + archiver_url: str, + archiver_channel: str, + ts_from: float, + ts_to: float, +) -> Tuple[np.ndarray, np.ndarray]: + """Extract time stamps and corresponding data from and EPICS archiver instance + + Args: + archiver_url (str): URL of the archiver data extraction interface + archiver_channel (str): EPICS channel to extract data for + ts_from (float): starting time stamp of the range of interest + ts_to (float): ending time stamp of the range of interest + + Returns: + Tuple[List, List]: The extracted time stamps and corresponding data + """ + iso_from = datetime.datetime.utcfromtimestamp(ts_from).isoformat() + iso_to = datetime.datetime.utcfromtimestamp(ts_to).isoformat() + req_str = archiver_url + archiver_channel + "&from=" + iso_from + "Z&to=" + iso_to + "Z" + with urllib.request.urlopen(req_str) as req: + data = json.load(req) + secs = [x["secs"] + x["nanos"] * 1e-9 for x in data[0]["data"]] + vals = [x["val"] for x in data[0]["data"]] + + return (np.asarray(secs), np.asarray(vals)) + + class MpesLoader(BaseLoader): """Mpes implementation of the Loader. Reads from h5 files or folders of the SPECS Metis 1000 (FHI Berlin) @@ -645,6 +673,28 @@ def get_files_from_run_id( # Return the list of found files return files + def get_start_and_end_time(self) -> Tuple[float, float]: + """Extract the start and end time stamps from the loaded files + + Returns: + Tuple[float, float]: A tuple containing the start and end time stamps + """ + h5file = h5py.File(self.files[0]) + timestamps = hdf5_to_array( + h5file, + group_names=self._config["dataframe"]["hdf5_groupnames"], + time_stamps=True, + ) + ts_from = timestamps[-1][1] + h5file = h5py.File(self.files[-1]) + timestamps = hdf5_to_array( + h5file, + group_names=self._config["dataframe"]["hdf5_groupnames"], + time_stamps=True, + ) + ts_to = timestamps[-1][-1] + return (ts_from, ts_to) + def gather_metadata( self, files: Sequence[str], @@ -666,21 +716,7 @@ def gather_metadata( print("Gathering metadata from different locations") # Read events in with ms time stamps print("Collecting time stamps...") - - h5file = h5py.File(files[0]) - timestamps = hdf5_to_array( - h5file, - group_names=self._config["dataframe"]["hdf5_groupnames"], - time_stamps=True, - ) - ts_from = timestamps[-1][1] - h5file = h5py.File(files[-1]) - timestamps = hdf5_to_array( - h5file, - group_names=self._config["dataframe"]["hdf5_groupnames"], - time_stamps=True, - ) - ts_to = timestamps[-1][-1] + (ts_from, ts_to) = self.get_start_and_end_time() metadata["timing"] = { "acquisition_start": datetime.datetime.utcfromtimestamp(ts_from) @@ -709,28 +745,22 @@ def gather_metadata( print("Collecting data from the EPICS archive...") # Get metadata from Epics archive if not present already - start = datetime.datetime.utcfromtimestamp(ts_from).isoformat() - end = datetime.datetime.utcfromtimestamp(ts_to).isoformat() epics_channels = self._config["metadata"]["epics_pvs"] + start = datetime.datetime.utcfromtimestamp(ts_from).isoformat() + channels_missing = set(epics_channels) - set( metadata["file"].keys(), ) for channel in channels_missing: try: - req_str = ( - "http://aa0.fhi-berlin.mpg.de:17668/retrieval/data/getData.json?pv=" - + channel - + "&from=" - + start - + "Z&to=" - + end - + "Z" + _, vals = get_archiver_data( + archiver_url=self._config["metadata"].get("archiver_url"), + archiver_channel=channel, + ts_from=ts_from, + ts_to=ts_to, ) - with urllib.request.urlopen(req_str) as req: - data = json.load(req) - vals = [x["val"] for x in data[0]["data"]] - metadata["file"][f"{channel}"] = np.mean(vals) + metadata["file"][f"{channel}"] = np.mean(vals) except IndexError: metadata["file"][f"{channel}"] = np.nan From 685bcbc599fa80c753d161cbcb18840aa799789b Mon Sep 17 00:00:00 2001 From: rettigl Date: Wed, 1 Nov 2023 21:50:56 +0100 Subject: [PATCH 04/11] add processor function to add time-stamped data either from directly provided data or from data extracted from an EPICS archiver instance, and add tests for it --- sed/config/mpes_example_config.yaml | 2 + sed/core/processor.py | 62 +++++++++++++++++++++++++++++ tests/test_processor.py | 27 +++++++++++++ 3 files changed, 91 insertions(+) diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml index 5b9eca7b..b3e47670 100644 --- a/sed/config/mpes_example_config.yaml +++ b/sed/config/mpes_example_config.yaml @@ -216,6 +216,8 @@ histogram: ranges: [[0, 1800], [0, 1800], [128000, 138000], [0, 32000]] metadata: + # URL of the epics archiver request engine + archiver_url: "http://aa0.fhi-berlin.mpg.de:17668/retrieval/data/getData.json?pv=" # EPICS channels to collect from EPICS archiver epics_pvs: ["KTOF:Lens:Extr:I", "trARPES:Carving:TEMP_RBV", "trARPES:XGS600:PressureAC:P_RD", "KTOF:Lens:UDLD:V", "KTOF:Lens:Sample:V", "KTOF:Apertures:m1.RBV", "KTOF:Apertures:m2.RBV", "KTOF:Apertures:m3.RBV", "trARPES:Carving:TRX.RBV", "trARPES:Carving:TRY.RBV", "trARPES:Carving:TRZ.RBV", "trARPES:Carving:THT.RBV", "trARPES:Carving:PHI.RBV", "trARPES:Carving:OMG.RBV"] # hdf5 attribute containing the field aperture "in" motor position diff --git a/sed/core/processor.py b/sed/core/processor.py index 3b5cc7a1..0b60e323 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -26,6 +26,7 @@ from sed.core.config import parse_config from sed.core.config import save_config from sed.core.dfops import apply_filter +from sed.core.dfops import add_time_stamped_data from sed.core.dfops import apply_jitter from sed.core.metadata import MetaHandler from sed.diagnostics import grid_histogram @@ -34,6 +35,8 @@ from sed.io import to_tiff from sed.loader import CopyTool from sed.loader import get_loader +from sed.loader.mpes.loader import get_archiver_data +from sed.loader.mpes.loader import MpesLoader N_CPU = psutil.cpu_count() @@ -1715,6 +1718,65 @@ def add_jitter( metadata.append(col) self._attributes.add(metadata, "jittering", duplicate_policy="append") + def add_time_stamped_data( + self, + dest_column: str, + time_stamps: np.ndarray = None, + data: np.ndarray = None, + archiver_channel: str = None, + **kwds, + ): + """Add data in form of timestamp/value pairs to the dataframe using interpolation to the + timestamps in the dataframe. The time-stamped data can either be provided, or fetched from + an EPICS archiver instance. + + Args: + dest_column (str): destination column name + time_stamps (np.ndarray, optional): Time stamps of the values to add. If omitted, + time stamps are retrieved from the epics archiver + data (np.ndarray, optional): Values corresponding at the time stamps in time_stamps. + If omitted, data are retrieved from the epics archiver. + archiver_channel (str, optional): EPICS archiver channel from which to retrieve data. + Either this or data and time_stamps have to be present. + **kwds: additional keyword arguments passed to add_time_stamped_data + """ + time_stamp_column = kwds.pop( + "time_stamp_column", + self._config["dataframe"].get("time_stamp_alias", ""), + ) + + if time_stamps is None and data is None: + if archiver_channel is None: + raise ValueError( + "Either archiver_channel or both time_stamps and data have to be present!", + ) + if self.loader.__name__ != "mpes": + raise NotImplementedError( + "This function is currently only implemented for the mpes loader!", + ) + ts_from, ts_to = cast(MpesLoader, self.loader).get_start_and_end_time() + # get channel data with +-5 seconds safety margin + time_stamps, data = get_archiver_data( + archiver_url=self._config["metadata"].get("archiver_url", ""), + archiver_channel=archiver_channel, + ts_from=ts_from - 5, + ts_to=ts_to + 5, + ) + + self._dataframe = add_time_stamped_data( + self._dataframe, + time_stamps=time_stamps, + data=data, + dest_column=dest_column, + time_stamp_column=time_stamp_column, + **kwds, + ) + metadata: List[Any] = [] + metadata.append(dest_column) + metadata.append(time_stamps) + metadata.append(data) + self._attributes.add(metadata, "time_stamped_data", duplicate_policy="append") + def pre_binning( self, df_partitions: int = 100, diff --git a/tests/test_processor.py b/tests/test_processor.py index 01097670..701cbce3 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -726,6 +726,33 @@ def test_add_jitter(): np.testing.assert_allclose(res1a, res2a) +def test_add_time_stamped_data(): + """Test the function to add time-stamped data""" + processor = SedProcessor( + folder=df_folder + "../mpes/", + config=package_dir + "/config/mpes_example_config.yaml", + folder_config={}, + user_config={}, + system_config={}, + time_stamps=True, + ) + df_ts = processor.dataframe.timeStamps.compute().values + data = np.linspace(0, 1, 20) + time_stamps = np.linspace(df_ts[0], df_ts[-1], 20) + processor.add_time_stamped_data( + time_stamps=time_stamps, + data=data, + dest_column="time_stamped_data", + ) + assert "time_stamped_data" in processor.dataframe + res = processor.dataframe["time_stamped_data"].compute().values + assert res[0] == 0 + assert res[-1] == 1 + assert processor.attributes["time_stamped_data"][0] == "time_stamped_data" + np.testing.assert_array_equal(processor.attributes["time_stamped_data"][1], time_stamps) + np.testing.assert_array_equal(processor.attributes["time_stamped_data"][2], data) + + def test_event_histogram(): """Test histogram plotting function""" config = parse_config( From d4684dce89e5228366514e131fec2382599271ab Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 5 Nov 2023 23:12:54 +0100 Subject: [PATCH 05/11] fix time stamps of timed data frame in mpes loader --- sed/loader/mpes/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sed/loader/mpes/loader.py b/sed/loader/mpes/loader.py index 23b9cf48..da10d8ef 100644 --- a/sed/loader/mpes/loader.py +++ b/sed/loader/mpes/loader.py @@ -369,7 +369,7 @@ def hdf5_to_timed_array( # need to correct for the time it took to write the file start_time -= len(ms_marker) / 1000 - time_stamp_data = start_time + ms_marker / 1000 + time_stamp_data = start_time + np.arange(len(ms_marker)) / 1000 data_list.append(time_stamp_data) From a732be61d267cc1e1b9491d114213e130610e820 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 5 Nov 2023 23:13:20 +0100 Subject: [PATCH 06/11] add time-stamped data to timed dataframe --- sed/core/processor.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sed/core/processor.py b/sed/core/processor.py index 0b60e323..1233dc1b 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -1771,6 +1771,16 @@ def add_time_stamped_data( time_stamp_column=time_stamp_column, **kwds, ) + if self._timed_dataframe is not None: + if time_stamp_column in self._timed_dataframe: + self._timed_dataframe = add_time_stamped_data( + self._timed_dataframe, + time_stamps=time_stamps, + data=data, + dest_column=dest_column, + time_stamp_column=time_stamp_column, + **kwds, + ) metadata: List[Any] = [] metadata.append(dest_column) metadata.append(time_stamps) From aa9f91293b1ef367057375a5f10b199a73ae2692 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 12 Nov 2023 23:19:41 +0100 Subject: [PATCH 07/11] use independent loader for energy corrector class correct k-distance for k calibration --- sed/core/processor.py | 5 ++++- ...rsion_pipeline_for_example_time-resolved_ARPES_data.ipynb | 2 +- tutorial/sed_config.yaml | 5 +++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sed/core/processor.py b/sed/core/processor.py index 1233dc1b..dc879fcd 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -122,7 +122,10 @@ def __init__( ) self.ec = EnergyCalibrator( - loader=self.loader, + loader=get_loader( + loader_name=loader_name, + config=self._config, + ), config=self._config, ) diff --git a/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb b/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb index b920bd20..6c39b602 100644 --- a/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb +++ b/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb @@ -273,7 +273,7 @@ "metadata": {}, "outputs": [], "source": [ - "k_distance = 4/3*np.pi/3.28\n", + "k_distance = 2/np.sqrt(3)*np.pi/3.28 # k-distance of the K-point in a hexagonal Brilloiun zone\n", "#sp.calibrate_momentum_axes(k_distance = k_distance)\n", "point_a = [308, 345]\n", "sp.calibrate_momentum_axes(point_a=point_a, k_distance = k_distance, apply=True)\n", diff --git a/tutorial/sed_config.yaml b/tutorial/sed_config.yaml index 5577b42e..30888f19 100644 --- a/tutorial/sed_config.yaml +++ b/tutorial/sed_config.yaml @@ -13,12 +13,13 @@ energy: diameter: 3000.0 gamma: 920.0 sigma: 700.0 + offset: {} momentum: calibration: cstart: -256.0 cstep: 3.9921875 - kx_scale: 0.012389400615413859 - ky_scale: 0.012389400615413859 + kx_scale: 0.010729535670610963 + ky_scale: 0.010729535670610963 rstart: -256.0 rstep: 3.9921875 x_center: 256.0 From b9ff427c0e66403f86359da3888273298c5c8510 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 12 Nov 2023 23:31:26 +0100 Subject: [PATCH 08/11] fix linting --- sed/core/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sed/core/processor.py b/sed/core/processor.py index dc879fcd..22c5abf1 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -125,7 +125,7 @@ def __init__( loader=get_loader( loader_name=loader_name, config=self._config, - ), + ), config=self._config, ) From bdd9e897368e3bac6e1a5b71179e6efd2be94479 Mon Sep 17 00:00:00 2001 From: rettigl Date: Mon, 20 Nov 2023 22:54:45 +0100 Subject: [PATCH 09/11] fix missing endpoint in bin_ranges in momentum corrector --- sed/calibrator/momentum.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sed/calibrator/momentum.py b/sed/calibrator/momentum.py index 7fb585de..995d9613 100644 --- a/sed/calibrator/momentum.py +++ b/sed/calibrator/momentum.py @@ -173,7 +173,7 @@ def load_data( self.bin_ranges.append( ( data.coords[axis][0].values, - data.coords[axis][-1].values, + 2 * data.coords[axis][-1].values - data.coords[axis][-2].values, # endpoint ), ) else: From dee63664c729775e92f4cedfd0a4767a733f9cb4 Mon Sep 17 00:00:00 2001 From: rettigl Date: Mon, 20 Nov 2023 22:55:18 +0100 Subject: [PATCH 10/11] update momentum scaling --- sed/config/mpes_example_config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml index b3e47670..95f93218 100644 --- a/sed/config/mpes_example_config.yaml +++ b/sed/config/mpes_example_config.yaml @@ -156,9 +156,9 @@ momentum: # default momentum calibration calibration: # x momentum scaleing factor - kx_scale: 0.012389400615413859 + kx_scale: 0.010729535670610963 # y momentum scaleing factor - ky_scale: 0.012389400615413859 + ky_scale: 0.010729535670610963 # x BZ center pixel x_center: 256.0 # y BZ center pixel From 5dc117a790425c0df4d0560a3f751baf0851bfa1 Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 21 Nov 2023 10:30:39 +0100 Subject: [PATCH 11/11] add notebook and documentation --- .github/workflows/documentation.yml | 3 +- docs/index.rst | 1 + .../6_binning_with_time-stamped_data.ipynb | 352 ++++++++++++++++++ tutorial/sed_config.yaml | 32 +- 4 files changed, 371 insertions(+), 17 deletions(-) create mode 100644 tutorial/6_binning_with_time-stamped_data.ipynb diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 162abb2c..7dc25f70 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -74,12 +74,13 @@ jobs: # path: $GITHUB_WORKSPACE/_build # key: ${{ runner.os }}-docs - - name: download WSe2 data + - name: download RAW data # if: steps.cache-primes.outputs.cache-hit != 'true' run: | cd $GITHUB_WORKSPACE/docs/tutorial curl -L --output ./WSe2.zip https://zenodo.org/record/6369728/files/WSe2.zip unzip -o ./WSe2.zip -d . + curl -L --output ./TaS2.zip https://zenodo.org/records/10160182/files/TaS2.zip - name: build Sphinx docs run: poetry run sphinx-build -b html $GITHUB_WORKSPACE/docs $GITHUB_WORKSPACE/_build diff --git a/docs/index.rst b/docs/index.rst index 85e55254..bb7ad38b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,6 +9,7 @@ Single-Event DataFrame (SED) documentation tutorial/1_binning_fake_data tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data tutorial/3_metadata_collection_and_export_to_NeXus + tutorial/6_binning_with_time-stamped_data .. toctree:: :maxdepth: 1 diff --git a/tutorial/6_binning_with_time-stamped_data.ipynb b/tutorial/6_binning_with_time-stamped_data.ipynb new file mode 100644 index 00000000..930903f4 --- /dev/null +++ b/tutorial/6_binning_with_time-stamped_data.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "8ad4167a-e4e7-498d-909a-c04da9f177ed", + "metadata": { + "tags": [] + }, + "source": [ + "# Binning of temperature-dependent ARPES data using time-stamped external temperature data\n", + "In this example, we pull some temperature-dependent ARPES data from Zenodo, which was recorded as a continous temperture ramp. We then add the respective temperature informtion from the respective timestamp/temperature values to the dataframe, and bin the data as function of temperature\n", + "For performance reasons, best store the data on a locally attached storage (no network drive). This can also be achieved transparently using the included MirrorUtil class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb045e17-fa89-4c11-9d51-7f06e80d96d5", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "import time\n", + "import glob\n", + "\n", + "import sed\n", + "\n", + "%matplotlib widget" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "42a6afaa-17dd-4637-ba75-a28c4ead1adf", + "metadata": {}, + "source": [ + "# Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34f46d54", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = './' # Put in Path to a storage of at least 20 Gbyte free space.\n", + "if not os.path.exists(data_path + \"/TaS2.zip\"):\n", + " os.system(f\"curl -L --output {data_path}/TaS2.zip https://zenodo.org/records/10160182/files/TaS2.zip\")\n", + "if not os.path.isdir(data_path + \"/Scan0121_1\") or not os.path.isdir(data_path + \"/energycal_2020_07_20/\") or not os.path.isfile(data_path + \"/temperature_data.h5\"):\n", + " os.system(f\"unzip -d {data_path} -o {data_path}/TaS2.zip\")\n", + "\n", + "# correct timestamps if not correct timezone set\n", + "tzoffset = os.path.getmtime(data_path + '/Scan0121_1/Scan0121_1.h5') - 1594998158.0\n", + "if tzoffset:\n", + " for file in glob.glob(data_path + '/Scan0121_1/*.h5'):\n", + " os.utime(file, (os.path.getmtime(file)-tzoffset, os.path.getmtime(file)-tzoffset))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1f82054", + "metadata": {}, + "outputs": [], + "source": [ + "# The Scan directory\n", + "fdir = data_path + '/Scan0121_1'\n", + "# create sed processor using the config file with time-stamps:\n", + "sp = sed.SedProcessor(folder=fdir, user_config=\"../sed/config/mpes_example_config.yaml\", time_stamps=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85ac3c83", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply jittering to X, Y, t, ADC columns.\n", + "sp.add_jitter()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76bf8aad", + "metadata": {}, + "outputs": [], + "source": [ + "sp.bin_and_load_momentum_calibration(df_partitions=10, plane=33, width=3, apply=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "features = np.array([[337., 242.], [289., 327.], [187., 344.], [137., 258.], [189., 161.], [289., 158.], [236.0, 250.0]])\n", + "sp.define_features(features=features, rotation_symmetry=6, include_center=True, apply=True)\n", + "sp.generate_splinewarp(include_center=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62abfa41", + "metadata": {}, + "outputs": [], + "source": [ + "# Adjust pose alignment, using stored distortion correction\n", + "sp.pose_adjustment(xtrans=15, ytrans=8, angle=-5, apply=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "845f002d", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply stored momentum correction\n", + "sp.apply_momentum_correction()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9ae5066", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply stored config momentum calibration\n", + "sp.apply_momentum_calibration()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb1e2bee", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply stored config energy correction\n", + "sp.apply_energy_correction()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load energy calibration EDCs\n", + "energycalfolder = data_path + \"/energycal_2020_07_20/\"\n", + "scans = np.arange(127,136)\n", + "voltages = np.arange(22,13,-1)\n", + "files = [energycalfolder + r'Scan' + str(num).zfill(4) + '_1.h5' for num in scans]\n", + "sp.load_bias_series(data_files=files, normalize=True, biases=voltages, ranges=[(64000, 76000)])\n", + "rg = (65500, 66000)\n", + "sp.find_bias_peaks(ranges=rg, ref_id=5, infer_others=True, apply=True)\n", + "sp.calibrate_energy_axis(ref_energy=-0.5, ref_id=4, energy_scale=\"kinetic\", method=\"lmfit\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c470ffd9", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply stored config energy calibration\n", + "sp.append_energy_axis()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0943d349", + "metadata": {}, + "outputs": [], + "source": [ + "# add time-stamped temperature data\n", + "# either, directly retrieve data from EPICS archiver instance (within FHI network),\n", + "#sp.add_time_stamped_data(dest_column=\"T_B\", archiver_channel=\"trARPES:Carving:TEMP-B\")\n", + "# or use externally provided timestamp/data pairs\n", + "import h5py\n", + "with h5py.File(\"temperature_data.h5\", \"r\") as file:\n", + " data = file[\"temperatures\"][()]\n", + " time_stamps = file[\"timestamps\"][()]\n", + "sp.add_time_stamped_data(dest_column=\"sample_temperature\", time_stamps=time_stamps, data=data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c330da64", + "metadata": {}, + "outputs": [], + "source": [ + "# inspect calibrated event histogram\n", + "axes = ['kx', 'ky', 'energy', 'sample_temperature']\n", + "ranges = [[-3, 3], [-3, 3], [-6, 2], [10, 300]]\n", + "sp.view_event_histogram(dfpid=80, axes=axes, ranges=ranges)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "6902fd56-1456-4da6-83a4-0f3f6b831eb6", + "metadata": {}, + "source": [ + "# Define the binning ranges and compute calibrated data volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7601cd7-cd51-40a9-8fc7-8b7d32ff15d0", + "metadata": {}, + "outputs": [], + "source": [ + "axes = ['kx', 'ky', 'energy', 'sample_temperature']\n", + "bins = [100, 100, 300, 100]\n", + "ranges = [[-2, 2], [-2, 2], [-6, 2], [20, 270]]\n", + "res = sp.compute(bins=bins, axes=axes, ranges=ranges, normalize_to_acquisition_time=\"sample_temperature\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "523794dc", + "metadata": {}, + "source": [ + "# Some visualization:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99d7d136-b677-4c16-bc8f-31ba8216579c", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axs = plt.subplots(4, 1, figsize=(4, 12), constrained_layout=True)\n", + "res.loc[{'energy':slice(-.1, 0)}].sum(axis=(2,3)).T.plot(ax=axs[0])\n", + "res.loc[{'kx':slice(-.2, .2)}].sum(axis=(0,3)).T.plot(ax=axs[1])\n", + "res.loc[{'ky':slice(-.2, .2)}].sum(axis=(1,3)).T.plot(ax=axs[2])\n", + "res.loc[{'kx':slice(-.2, .2), 'ky':slice(-.2, .2), 'energy':slice(-2, 0.2)}].sum(axis=(0,1)).plot(ax=axs[3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "596a3217", + "metadata": {}, + "outputs": [], + "source": [ + "# Inspect effect of histogram normalization\n", + "fig, ax = plt.subplots(1,1)\n", + "(sp._normalization_histogram/sp._normalization_histogram.sum()).plot(ax=ax)\n", + "(sp._binned.sum(axis=(0,1,2))/sp._binned.sum(axis=(0,1,2,3))).plot(ax=ax)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05488944", + "metadata": {}, + "outputs": [], + "source": [ + "# Remaining fluctiations are an effect of the varying count rate throught the scan\n", + "plt.figure()\n", + "rate, secs = sp.loader.get_count_rate()\n", + "plt.plot(secs, rate)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize for intensity around the Gamma point\n", + "res_norm = res.copy()\n", + "res_norm = res_norm/res_norm.loc[{'kx':slice(-.3, .3), 'ky':slice(-.3, .3)}].sum(axis=(0,1,2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, axs = plt.subplots(4, 1, figsize=(4, 12), constrained_layout=True)\n", + "res_norm.loc[{'energy':slice(-.1, 0)}].sum(axis=(2,3)).T.plot(ax=axs[0])\n", + "res_norm.loc[{'kx':slice(-.2, .2)}].sum(axis=(0,3)).T.plot(ax=axs[1])\n", + "res_norm.loc[{'ky':slice(-.2, .2)}].sum(axis=(1,3)).T.plot(ax=axs[2])\n", + "res_norm.loc[{'kx':slice(-.2, .2), 'ky':slice(-.2, .2), 'energy':slice(-2, 0.5)}].sum(axis=(0,1)).plot(ax=axs[3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Lower Hubbard band intensity versus temperature\n", + "plt.figure()\n", + "res_norm.loc[{'kx':slice(-.2, .2), 'ky':slice(-.2, .2), 'energy':slice(-.6, 0.1)}].sum(axis=(0,1,2)).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "728003ee06929e5fa5ff815d1b96bf487266025e4b7440930c6bf4536d02d243" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorial/sed_config.yaml b/tutorial/sed_config.yaml index 30888f19..f402b251 100644 --- a/tutorial/sed_config.yaml +++ b/tutorial/sed_config.yaml @@ -17,29 +17,29 @@ energy: momentum: calibration: cstart: -256.0 - cstep: 3.9921875 + cstep: 4.0 kx_scale: 0.010729535670610963 ky_scale: 0.010729535670610963 rstart: -256.0 - rstep: 3.9921875 + rstep: 4.0 x_center: 256.0 y_center: 256.0 correction: feature_points: - - - 202.99667164649654 - - 342.9841737181237 - - - 299.87095669185146 - - 346.1951264748602 - - - 350.95080745426304 - - 244.7908230308385 - - - 305.6268110815786 - - 150.20132111991873 - - - 199.5398499983996 - - 152.77801048162016 - - - 153.40923361300395 - - 243.06399842230255 - - - 249.232157094759 - - 249.2577242394875 + - - 203.11575556771575 + - 343.1023874450215 + - - 299.9643115931048 + - 346.2942034781325 + - - 351.05271790029917 + - 244.87949469676045 + - - 305.76331680416877 + - 150.31266296600884 + - - 199.64692385066613 + - 152.8942716287488 + - - 153.52099335728917 + - 243.17230043901452 + - - 249.32627242026467 + - 249.34641745326562 include_center: true rotation_symmetry: 6 use_center: true