diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt index 74914d19..da7ec322 100644 --- a/.cspell/custom-dictionary.txt +++ b/.cspell/custom-dictionary.txt @@ -38,6 +38,7 @@ caldir calib calibdict caplog +capsys cdeform cdeformfield cdisp @@ -77,6 +78,7 @@ datastreams datestring ddir delaxes +delayeds Desy dfield dfops diff --git a/benchmarks/benchmark_sed.py b/benchmarks/benchmark_sed.py index 3b633c30..ba181f42 100644 --- a/benchmarks/benchmark_sed.py +++ b/benchmarks/benchmark_sed.py @@ -121,6 +121,7 @@ def test_workflow_1d() -> None: system_config={}, verbose=True, ) + processor.dataframe["sampleBias"] = 16.7 processor.add_jitter() processor.apply_momentum_correction() processor.apply_momentum_calibration() @@ -155,6 +156,7 @@ def test_workflow_4d() -> None: system_config={}, verbose=True, ) + processor.dataframe["sampleBias"] = 16.7 processor.add_jitter() processor.apply_momentum_correction() processor.apply_momentum_calibration() diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py index 3eefef60..79ef59ac 100644 --- a/sed/calibrator/energy.py +++ b/sed/calibrator/energy.py @@ -4,7 +4,6 @@ from __future__ import annotations import itertools as it -import warnings as wn from collections.abc import Sequence from copy import deepcopy from datetime import datetime @@ -505,7 +504,7 @@ def feature_extract( def calibrate( self, - ref_id: int = 0, + ref_energy: float = 0, method: str = "lmfit", energy_scale: str = "kinetic", landmarks: np.ndarray = None, @@ -518,8 +517,7 @@ def calibrate( scale using optimization methods. Args: - ref_id (int, optional): The reference trace index (an integer). - Defaults to 0. + ref_energy (float): Binding/kinetic energy of the detected feature. method (str, optional): Method for determining the energy calibration. - **'lmfit'**: Energy calibration using lmfit and 1/t^2 form. @@ -574,7 +572,7 @@ def calibrate( sign * biases, binwidth, binning, - ref_id=ref_id, + ref_energy=ref_energy, t=t, energy_scale=energy_scale, verbose=verbose, @@ -584,7 +582,7 @@ def calibrate( self.calibration = poly_energy_calibration( landmarks, sign * biases, - ref_id=ref_id, + ref_energy=ref_energy, aug=self.dup, method=method, t=t, @@ -652,7 +650,7 @@ def view( # pylint: disable=dangerous-default-value for itr, trace in enumerate(traces): if align: ax.plot( - xaxis + sign * (self.biases[itr] - self.biases[self.calibration["refid"]]), + xaxis + sign * (self.biases[itr]), trace, ls="-", linewidth=1, @@ -715,7 +713,7 @@ def view( # pylint: disable=dangerous-default-value trace = traces[itr, :] if align: fig.line( - xaxis + sign * (self.biases[itr] - self.biases[self.calibration["refid"]]), + xaxis + sign * (self.biases[itr]), trace, color=color, line_dash="solid", @@ -774,6 +772,7 @@ def append_energy_axis( tof_column: str = None, energy_column: str = None, calibration: dict = None, + bias_voltage: float = None, verbose: bool = True, **kwds, ) -> tuple[pd.DataFrame | dask.dataframe.DataFrame, dict]: @@ -789,6 +788,9 @@ def append_energy_axis( calibration (dict, optional): Calibration dictionary. If provided, overrides calibration from class or config. Defaults to self.calibration or config["energy"]["calibration"]. + bias_voltage (float, optional): Sample bias voltage of the scan data. If omitted, + the bias voltage is being read from the dataframe. If it is not found there, + a warning is printed and the calibrated data might have an offset. verbose (bool, optional): Option to print out diagnostic information. Defaults to True. **kwds: additional keyword arguments for the energy conversion. They are @@ -838,6 +840,8 @@ def append_energy_axis( elif "coeffs" in calibration and "E0" in calibration: calibration["calib_type"] = "poly" + if "energy_scale" not in calibration: + calibration["energy_scale"] = "kinetic" else: raise ValueError("No valid calibration parameters provided!") @@ -877,6 +881,20 @@ def append_energy_axis( else: raise NotImplementedError + # apply bias offset + scale_sign: Literal[-1, 1] = -1 if calibration["energy_scale"] == "binding" else 1 + if bias_voltage is not None: + df[energy_column] = df[energy_column] + scale_sign * bias_voltage + elif self._config["dataframe"]["bias_column"] in df.columns: + df = dfops.offset_by_other_columns( + df=df, + target_column=energy_column, + offset_columns=self._config["dataframe"]["bias_column"], + weights=scale_sign, + ) + else: + print("Sample bias data not found or provided. Calibrated energy might be incorrect.") + metadata = self.gather_calibration_metadata(calibration) return df, metadata @@ -1527,6 +1545,9 @@ def add_offsets( offsets["creation_date"] = datetime.now().timestamp() # column-based offsets if columns is not None: + if isinstance(columns, str): + columns = [columns] + if weights is None: weights = 1 if isinstance(weights, (int, float, np.integer, np.floating)): @@ -1538,10 +1559,13 @@ def add_offsets( if not all(isinstance(s, (int, float, np.integer, np.floating)) for s in weights): raise TypeError(f"Invalid type for weights: {type(weights)}") - if isinstance(columns, str): - columns = [columns] - if isinstance(preserve_mean, bool): - preserve_mean = [preserve_mean] * len(columns) + if preserve_mean is None: + preserve_mean = False + if not isinstance(preserve_mean, Sequence): + preserve_mean = [preserve_mean] + if len(preserve_mean) == 1: + preserve_mean = [preserve_mean[0]] * len(columns) + if not isinstance(reductions, Sequence): reductions = [reductions] if len(reductions) == 1: @@ -1625,10 +1649,7 @@ def add_offsets( if constant: if not isinstance(constant, (int, float, np.integer, np.floating)): raise TypeError(f"Invalid type for constant: {type(constant)}") - df[energy_column] = df.map_partitions( - lambda x: x[energy_column] + constant, - meta=(energy_column, np.float64), - ) + df[energy_column] = df[energy_column] + constant self.offsets = offsets metadata["offsets"] = offsets @@ -2082,8 +2103,7 @@ def fit_energy_calibration( vals: list[float] | np.ndarray, binwidth: float, binning: int, - ref_id: int = 0, - ref_energy: float = None, + ref_energy: float, t: list[float] | np.ndarray = None, energy_scale: str = "kinetic", verbose: bool = True, @@ -2100,9 +2120,8 @@ def fit_energy_calibration( each EDC. binwidth (float): Time width of each original TOF bin in ns. binning (int): Binning factor of the TOF values. - ref_id (int, optional): Reference dataset index. Defaults to 0. - ref_energy (float, optional): Energy value of the feature in the reference - trace (eV). required to output the calibration. Defaults to None. + ref_energy (float): Energy value of the feature in the reference + trace (eV). t (list[float] | np.ndarray, optional): Array of TOF values. Required to calculate calibration trace. Defaults to None. energy_scale (str, optional): Direction of increasing energy scale. @@ -2127,14 +2146,6 @@ def fit_energy_calibration( - "axis": Fitted energy axis. """ vals = np.asarray(vals) - nvals = vals.size - - if ref_id >= nvals: - wn.warn( - "Reference index (refid) cannot be larger than the number of traces!\ - Reset to the largest allowed number.", - ) - ref_id = nvals - 1 def residual(pars, time, data, binwidth, binning, energy_scale): model = tof2ev( @@ -2200,12 +2211,11 @@ def residual(pars, time, data, binwidth, binning, energy_scale): ecalibdict["t0"] = result.params["t0"].value ecalibdict["E0"] = result.params["E0"].value ecalibdict["energy_scale"] = energy_scale + energy_offset = pfunc(-1 * ref_energy, pos[0]) + ecalibdict["E0"] = -(energy_offset - vals[0]) - if (ref_energy is not None) and (t is not None): - energy_offset = pfunc(-1 * ref_energy, pos[ref_id]) - ecalibdict["axis"] = pfunc(-energy_offset, t) - ecalibdict["E0"] = -energy_offset - ecalibdict["refid"] = ref_id + if t is not None: + ecalibdict["axis"] = pfunc(ecalibdict["E0"], t) return ecalibdict @@ -2213,9 +2223,8 @@ def residual(pars, time, data, binwidth, binning, energy_scale): def poly_energy_calibration( pos: list[float] | np.ndarray, vals: list[float] | np.ndarray, + ref_energy: float, order: int = 3, - ref_id: int = 0, - ref_energy: float = None, t: list[float] | np.ndarray = None, aug: int = 1, method: str = "lstsq", @@ -2235,10 +2244,9 @@ def poly_energy_calibration( (e.g. peaks) in the EDCs. vals (list[float] | np.ndarray): Bias voltage value associated with each EDC. + ref_energy (float): Energy value of the feature in the reference + trace (eV). order (int, optional): Polynomial order of the fitting function. Defaults to 3. - ref_id (int, optional): Reference dataset index. Defaults to 0. - ref_energy (float, optional): Energy value of the feature in the reference - trace (eV). required to output the calibration. Defaults to None. t (list[float] | np.ndarray, optional): Array of TOF values. Required to calculate calibration trace. Defaults to None. aug (int, optional): Fitting dimension augmentation @@ -2266,21 +2274,14 @@ def poly_energy_calibration( vals = np.asarray(vals) nvals = vals.size - if ref_id >= nvals: - wn.warn( - "Reference index (refid) cannot be larger than the number of traces!\ - Reset to the largest allowed number.", - ) - ref_id = nvals - 1 - # Top-to-bottom ordering of terms in the T matrix - termorder = np.delete(range(0, nvals, 1), ref_id) + termorder = np.delete(range(0, nvals, 1), 0) termorder = np.tile(termorder, aug) # Left-to-right ordering of polynomials in the T matrix polyorder = np.linspace(order, 1, order, dtype="int") # Construct the T (differential drift time) matrix, Tmat = Tmain - Tsec - t_main = np.array([pos[ref_id] ** p for p in polyorder]) + t_main = np.array([pos[0] ** p for p in polyorder]) # Duplicate to the same order as the polynomials t_main = np.tile(t_main, (aug * (nvals - 1), 1)) @@ -2292,7 +2293,7 @@ def poly_energy_calibration( t_mat = t_main - np.asarray(t_sec) # Construct the b vector (differential bias) - bvec = vals[ref_id] - np.delete(vals, ref_id) + bvec = vals[0] - np.delete(vals, 0) bvec = np.tile(bvec, aug) # Solve for the a vector (polynomial coefficients) using least squares @@ -2312,12 +2313,10 @@ def poly_energy_calibration( ecalibdict["Tmat"] = t_mat ecalibdict["bvec"] = bvec ecalibdict["energy_scale"] = energy_scale + ecalibdict["E0"] = -(pfunc(-1 * ref_energy, pos[0]) + vals[0]) - if ref_energy is not None and t is not None: - energy_offset = pfunc(-1 * ref_energy, pos[ref_id]) - ecalibdict["axis"] = pfunc(-energy_offset, t) - ecalibdict["E0"] = -energy_offset - ecalibdict["refid"] = ref_id + if t is not None: + ecalibdict["axis"] = pfunc(-ecalibdict["E0"], t) return ecalibdict diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml index 5aa99157..4848945f 100644 --- a/sed/config/mpes_example_config.yaml +++ b/sed/config/mpes_example_config.yaml @@ -15,18 +15,10 @@ core: gid: 1001 dataframe: - # hdf5 group names to read from the h5 files (for mpes reader) - hdf5_groupnames: ["Stream_0", "Stream_1", "Stream_2", "Stream_4"] - # aliases to assign to the dataframe columns for the corresponding hdf5 streams - hdf5_aliases: - Stream_0: "X" - Stream_1: "Y" - Stream_2: "t" - Stream_4: "ADC" # dataframe column name for the time stamp column time_stamp_alias: "timeStamps" # hdf5 group name containing eventIDs occurring at every millisecond (used to calculate timestamps) - ms_markers_group: "msMarkers" + ms_markers_key: "msMarkers" # hdf5 attribute containing the timestamp of the first event in a file first_event_time_stamp_key: "FirstEventTimeStamp" # Time stepping in seconds of the successive events in the timed dataframe @@ -41,6 +33,8 @@ dataframe: tof_column: "t" # dataframe column containing analog-to-digital data adc_column: "ADC" + # dataframe column containing bias voltage data + bias_column: "sampleBias" # dataframe column containing corrected x coordinates corrected_x_column: "Xm" # dataframe column containing corrected y coordinates @@ -79,6 +73,29 @@ dataframe: kx: '1/A' ky: '1/A' + # dataframe channels and group names to read from the h5 files + channels: + # The X-channel + X: + format: per_electron + dataset_key: "Stream_0" + # The Y-channel + Y: + format: per_electron + dataset_key: "Stream_1" + # The tof-channel + t: + format: per_electron + dataset_key: "Stream_2" + # The ADC-channel + ADC: + format: per_electron + dataset_key: "Stream_4" + # The sample Bias-channel + sampleBias: + format: per_file + dataset_key: "KTOF:Lens:Sample:V" + energy: # Number of bins to use for energy calibration traces bins: 1000 diff --git a/sed/core/dfops.py b/sed/core/dfops.py index f449e481..d29bf2e0 100644 --- a/sed/core/dfops.py +++ b/sed/core/dfops.py @@ -392,53 +392,20 @@ def offset_by_other_columns( "Please open a request on GitHub if this feature is required.", ) - # calculate the mean of the columns to reduce - means = { - col: dask.delayed(df[col].mean()) - for col, red, pm in zip(offset_columns, reductions, preserve_mean) - if red or pm - } - - # define the functions to apply the offsets - def shift_by_mean(x, cols, signs, means, flip_signs=False): - """Shift the target column by the mean of the offset columns.""" - for col in cols: - s = -signs[col] if flip_signs else signs[col] - x[target_column] = x[target_column] + s * means[col] - return x[target_column] - - def shift_by_row(x, cols, signs): - """Apply the offsets to the target column.""" - for col in cols: - x[target_column] = x[target_column] + signs[col] * x[col] - return x[target_column] - # apply offset from the reduced columns - df[target_column] = df.map_partitions( - shift_by_mean, - cols=[col for col, red in zip(offset_columns, reductions) if red], - signs=signs_dict, - means=means, - meta=df[target_column].dtype, - ) + for col, red in zip(offset_columns, reductions): + if red == "mean": + df[target_column] = df[target_column] + signs_dict[col] * df[col].mean() # apply offset from the offset columns - df[target_column] = df.map_partitions( - shift_by_row, - cols=[col for col, red in zip(offset_columns, reductions) if not red], - signs=signs_dict, - meta=df[target_column].dtype, - ) + for col, red in zip(offset_columns, reductions): + if not red: + df[target_column] = df[target_column] + signs_dict[col] * df[col] # compensate shift from the preserved mean columns if any(preserve_mean): - df[target_column] = df.map_partitions( - shift_by_mean, - cols=[col for col, pmean in zip(offset_columns, preserve_mean) if pmean], - signs=signs_dict, - means=means, - flip_signs=True, - meta=df[target_column].dtype, - ) + for col, pmean in zip(offset_columns, preserve_mean): + if pmean: + df[target_column] = df[target_column] - signs_dict[col] * df[col].mean() return df diff --git a/sed/core/processor.py b/sed/core/processor.py index 1c1398e2..ae1b9082 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -1290,7 +1290,6 @@ def find_bias_peaks( # 3. Fit the energy calibration relation def calibrate_energy_axis( self, - ref_id: int, ref_energy: float, method: str = None, energy_scale: str = None, @@ -1303,10 +1302,7 @@ def calibrate_energy_axis( approximation, and a d^2/(t-t0)^2 relation. Args: - ref_id (int): id of the trace at the bias where the reference energy is - given. - ref_energy (float): Absolute energy of the detected feature at the bias - of ref_id + ref_energy (float): Binding/kinetic energy of the detected feature. method (str, optional): Method for determining the energy calibration. - **'lmfit'**: Energy calibration using lmfit and 1/t^2 form. @@ -1333,7 +1329,6 @@ def calibrate_energy_axis( energy_scale = self._config["energy"]["energy_scale"] self.ec.calibrate( - ref_id=ref_id, ref_energy=ref_energy, method=method, energy_scale=energy_scale, @@ -1350,23 +1345,29 @@ def calibrate_energy_axis( backend="bokeh", ) print("E/TOF relationship:") - self.ec.view( - traces=self.ec.calibration["axis"][None, :], - xaxis=self.ec.tof, - backend="matplotlib", - show_legend=False, - ) if energy_scale == "kinetic": + self.ec.view( + traces=self.ec.calibration["axis"][None, :] + self.ec.biases[0], + xaxis=self.ec.tof, + backend="matplotlib", + show_legend=False, + ) plt.scatter( self.ec.peaks[:, 0], - -(self.ec.biases - self.ec.biases[ref_id]) + ref_energy, + -(self.ec.biases - self.ec.biases[0]) + ref_energy, s=50, c="k", ) elif energy_scale == "binding": + self.ec.view( + traces=self.ec.calibration["axis"][None, :] - self.ec.biases[0], + xaxis=self.ec.tof, + backend="matplotlib", + show_legend=False, + ) plt.scatter( self.ec.peaks[:, 0], - self.ec.biases - self.ec.biases[ref_id] + ref_energy, + self.ec.biases - self.ec.biases[0] + ref_energy, s=50, c="k", ) @@ -1419,6 +1420,7 @@ def save_energy_calibration( def append_energy_axis( self, calibration: dict = None, + bias_voltage: float = None, preview: bool = False, verbose: bool = None, **kwds, @@ -1432,6 +1434,9 @@ def append_energy_axis( calibration (dict, optional): Calibration dict containing calibration parameters. Overrides calibration from class or config. Defaults to None. + bias_voltage (float, optional): Sample bias voltage of the scan data. If omitted, + the bias voltage is being read from the dataframe. If it is not found there, + a warning is printed and the calibrated data might have an offset. preview (bool): Option to preview the first elements of the data frame. verbose (bool, optional): Option to print out diagnostic information. Defaults to config["core"]["verbose"]. @@ -1449,6 +1454,7 @@ def append_energy_axis( df, metadata = self.ec.append_energy_axis( df=self._dataframe, calibration=calibration, + bias_voltage=bias_voltage, verbose=verbose, **kwds, ) @@ -1456,6 +1462,7 @@ def append_energy_axis( tdf, _ = self.ec.append_energy_axis( df=self._timed_dataframe, calibration=calibration, + bias_voltage=bias_voltage, verbose=False, **kwds, ) diff --git a/sed/loader/base/loader.py b/sed/loader/base/loader.py index 2b4c4c20..4a9962d3 100644 --- a/sed/loader/base/loader.py +++ b/sed/loader/base/loader.py @@ -108,7 +108,7 @@ def read_dataframe( elif files is None: raise ValueError( - "Either folder, file paths, or runs should be provided!", + "Either folders, files, or runs have to be provided!", ) if files is not None: diff --git a/sed/loader/mpes/loader.py b/sed/loader/mpes/loader.py index 5c8a793f..3fd7136a 100644 --- a/sed/loader/mpes/loader.py +++ b/sed/loader/mpes/loader.py @@ -10,6 +10,7 @@ import json import os from collections.abc import Sequence +from typing import Any from urllib.error import HTTPError from urllib.error import URLError from urllib.request import urlopen @@ -27,11 +28,10 @@ def hdf5_to_dataframe( files: Sequence[str], - group_names: Sequence[str] = None, - alias_dict: dict[str, str] = None, + channels: dict[str, Any] = None, time_stamps: bool = False, time_stamp_alias: str = "timeStamps", - ms_markers_group: str = "msMarkers", + ms_markers_key: str = "msMarkers", first_event_time_stamp_key: str = "FirstEventTimeStamp", **kwds, ) -> ddf.DataFrame: @@ -40,17 +40,14 @@ def hdf5_to_dataframe( Args: files (List[str]): A list of the file paths to load. - group_names (List[str], optional): hdf5 group names to load. Defaults to load - all groups containing "Stream" - alias_dict (dict[str, str], optional): Dictionary of aliases for the dataframe - columns. Keys are the hdf5 groupnames, and values the aliases. If an alias - is not found, its group name is used. Defaults to read the attribute - "Name" from each group. + channels (dict[str, str], optional): hdf5 channels names to load. Each entry in the dict + should contain the keys "format" and "dataset_key". Defaults to load all groups + containing "Stream", and to read the attribute "Name" from each group. time_stamps (bool, optional): Option to calculate time stamps. Defaults to False. time_stamp_alias (str): Alias name for the timestamp column. Defaults to "timeStamps". - ms_markers_group (str): h5 column containing timestamp information. + ms_markers_key (str): hdf5 path containing timestamp information. Defaults to "msMarkers". first_event_time_stamp_key (str): h5 attribute containing the start timestamp of a file. Defaults to "FirstEventTimeStamp". @@ -58,30 +55,45 @@ def hdf5_to_dataframe( Returns: ddf.DataFrame: The delayed Dask DataFrame """ - if group_names is None: - group_names = [] - if alias_dict is None: - alias_dict = {} - # Read a file to parse the file structure test_fid = kwds.pop("test_fid", 0) test_proc = h5py.File(files[test_fid]) - if group_names == []: - group_names, alias_dict = get_groups_and_aliases( + + if channels is None: + channels = get_datasets_and_aliases( h5file=test_proc, search_pattern="Stream", ) - column_names = [alias_dict.get(group, group) for group in group_names] + electron_channels = [] + column_names = [] + + for name, channel in channels.items(): + if channel["format"] == "per_electron": + if channel["dataset_key"] in test_proc: + electron_channels.append(channel) + column_names.append(name) + else: + print( + f"Entry \"{channel['dataset_key']}\" for channel \"{name}\" not found.", + "Skipping the channel.", + ) + elif channel["format"] != "per_file": + raise ValueError( + f"Invalid 'format':{channel['format']} for channel {name}.", + ) + + if not electron_channels: + raise ValueError("No valid 'per_electron' channels found.") if time_stamps: column_names.append(time_stamp_alias) test_array = hdf5_to_array( h5file=test_proc, - group_names=group_names, + channels=electron_channels, time_stamps=time_stamps, - ms_markers_group=ms_markers_group, + ms_markers_key=ms_markers_key, first_event_time_stamp_key=first_event_time_stamp_key, ) @@ -90,9 +102,9 @@ def hdf5_to_dataframe( da.from_delayed( dask.delayed(hdf5_to_array)( h5file=h5py.File(f), - group_names=group_names, + channels=electron_channels, time_stamps=time_stamps, - ms_markers_group=ms_markers_group, + ms_markers_key=ms_markers_key, first_event_time_stamp_key=first_event_time_stamp_key, ), dtype=test_array.dtype, @@ -102,16 +114,33 @@ def hdf5_to_dataframe( ] array_stack = da.concatenate(arrays, axis=1).T - return ddf.from_dask_array(array_stack, columns=column_names) + dataframe = ddf.from_dask_array(array_stack, columns=column_names) + + for name, channel in channels.items(): + if channel["format"] == "per_file": + if channel["dataset_key"] in test_proc.attrs: + values = [float(get_attribute(h5py.File(f), channel["dataset_key"])) for f in files] + delayeds = [ + add_value(partition, name, value) + for partition, value in zip(dataframe.partitions, values) + ] + dataframe = ddf.from_delayed(delayeds) + + else: + print( + f"Entry \"{channel['dataset_key']}\" for channel \"{name}\" not found.", + "Skipping the channel.", + ) + + return dataframe def hdf5_to_timed_dataframe( files: Sequence[str], - group_names: Sequence[str] = None, - alias_dict: dict[str, str] = None, + channels: dict[str, Any] = None, time_stamps: bool = False, time_stamp_alias: str = "timeStamps", - ms_markers_group: str = "msMarkers", + ms_markers_key: str = "msMarkers", first_event_time_stamp_key: str = "FirstEventTimeStamp", **kwds, ) -> ddf.DataFrame: @@ -121,17 +150,14 @@ def hdf5_to_timed_dataframe( Args: files (List[str]): A list of the file paths to load. - group_names (List[str], optional): hdf5 group names to load. Defaults to load - all groups containing "Stream" - alias_dict (dict[str, str], optional): Dictionary of aliases for the dataframe - columns. Keys are the hdf5 groupnames, and values the aliases. If an alias - is not found, its group name is used. Defaults to read the attribute - "Name" from each group. + channels (dict[str, str], optional): hdf5 channels names to load. Each entry in the dict + should contain the keys "format" and "groupName". Defaults to load all groups + containing "Stream", and to read the attribute "Name" from each group. time_stamps (bool, optional): Option to calculate time stamps. Defaults to False. time_stamp_alias (str): Alias name for the timestamp column. Defaults to "timeStamps". - ms_markers_group (str): h5 column containing timestamp information. + ms_markers_key (str): hdf5 dataset containing timestamp information. Defaults to "msMarkers". first_event_time_stamp_key (str): h5 attribute containing the start timestamp of a file. Defaults to "FirstEventTimeStamp". @@ -139,30 +165,45 @@ def hdf5_to_timed_dataframe( Returns: ddf.DataFrame: The delayed Dask DataFrame """ - if group_names is None: - group_names = [] - if alias_dict is None: - alias_dict = {} - # Read a file to parse the file structure test_fid = kwds.pop("test_fid", 0) test_proc = h5py.File(files[test_fid]) - if group_names == []: - group_names, alias_dict = get_groups_and_aliases( + + if channels is None: + channels = get_datasets_and_aliases( h5file=test_proc, search_pattern="Stream", ) - column_names = [alias_dict.get(group, group) for group in group_names] + electron_channels = [] + column_names = [] + + for name, channel in channels.items(): + if channel["format"] == "per_electron": + if channel["dataset_key"] in test_proc: + electron_channels.append(channel) + column_names.append(name) + else: + print( + f"Entry \"{channel['dataset_key']}\" for channel \"{name}\" not found.", + "Skipping the channel.", + ) + elif channel["format"] != "per_file": + raise ValueError( + f"Invalid 'format':{channel['format']} for channel {name}.", + ) + + if not electron_channels: + raise ValueError("No valid 'per_electron' channels found.") if time_stamps: column_names.append(time_stamp_alias) test_array = hdf5_to_timed_array( h5file=test_proc, - group_names=group_names, + channels=electron_channels, time_stamps=time_stamps, - ms_markers_group=ms_markers_group, + ms_markers_key=ms_markers_key, first_event_time_stamp_key=first_event_time_stamp_key, ) @@ -171,9 +212,9 @@ def hdf5_to_timed_dataframe( da.from_delayed( dask.delayed(hdf5_to_timed_array)( h5file=h5py.File(f), - group_names=group_names, + channels=electron_channels, time_stamps=time_stamps, - ms_markers_group=ms_markers_group, + ms_markers_key=ms_markers_key, first_event_time_stamp_key=first_event_time_stamp_key, ), dtype=test_array.dtype, @@ -183,15 +224,49 @@ def hdf5_to_timed_dataframe( ] array_stack = da.concatenate(arrays, axis=1).T - return ddf.from_dask_array(array_stack, columns=column_names) + dataframe = ddf.from_dask_array(array_stack, columns=column_names) + for name, channel in channels.items(): + if channel["format"] == "per_file": + if channel["dataset_key"] in test_proc.attrs: + values = [float(get_attribute(h5py.File(f), channel["dataset_key"])) for f in files] + delayeds = [ + add_value(partition, name, value) + for partition, value in zip(dataframe.partitions, values) + ] + dataframe = ddf.from_delayed(delayeds) -def get_groups_and_aliases( + else: + print( + f"Entry \"{channel['dataset_key']}\" for channel \"{name}\" not found.", + "Skipping the channel.", + ) + + return dataframe + + +@dask.delayed +def add_value(partition: ddf.DataFrame, name: str, value: float) -> ddf.DataFrame: + """Dask delayed helper function to add a value to each dataframe partition + + Args: + partition (ddf.DataFrame): Dask dataframe partition + name (str): Name of the column to add + value (float): value to add to this partition + + Returns: + ddf.DataFrame: Dataframe partition with added column + """ + partition[name] = value + return partition + + +def get_datasets_and_aliases( h5file: h5py.File, search_pattern: str = None, alias_key: str = "Name", -) -> tuple[list[str], dict[str, str]]: - """Read groups and aliases from a provided hdf5 file handle +) -> dict[str, Any]: + """Read datasets and aliases from a provided hdf5 file handle Args: h5file (h5py.File): @@ -202,31 +277,33 @@ def get_groups_and_aliases( Attribute key where aliases are stored. Defaults to "Name". Returns: - tuple[list[str], dict[str, str]]: - The list of groupnames and the alias dictionary parsed from the file + dict[str, Any]: + A dict of aliases and groupnames parsed from the file """ # get group names: - group_names = list(h5file) + dataset_names = list(h5file) # Filter the group names if search_pattern is None: - filtered_group_names = group_names + filtered_dataset_names = dataset_names else: - filtered_group_names = [name for name in group_names if search_pattern in name] + filtered_dataset_names = [name for name in dataset_names if search_pattern in name] alias_dict = {} - for name in filtered_group_names: + for name in filtered_dataset_names: alias_dict[name] = get_attribute(h5file[name], alias_key) - return filtered_group_names, alias_dict + return { + alias_dict[name]: {"format": "per_electron", "dataset_key": name} + for name in filtered_dataset_names + } def hdf5_to_array( h5file: h5py.File, - group_names: Sequence[str], - data_type: str = "float32", + channels: Sequence[dict[str, Any]], time_stamps=False, - ms_markers_group: str = "msMarkers", + ms_markers_key: str = "msMarkers", first_event_time_stamp_key: str = "FirstEventTimeStamp", ) -> np.ndarray: """Reads the content of the given groups in an hdf5 file, and returns a @@ -235,13 +312,11 @@ def hdf5_to_array( Args: h5file (h5py.File): hdf5 file handle to read from - group_names (str): - group names to read - data_type (str, optional): - Data type of the output data. Defaults to "float32". + channels (Sequence[dict[str, any]]): + channel dicts containing group names and types to read. time_stamps (bool, optional): Option to calculate time stamps. Defaults to False. - ms_markers_group (str): h5 column containing timestamp information. + ms_markers_group (str): hdf5 dataset containing timestamp information. Defaults to "msMarkers". first_event_time_stamp_key (str): h5 attribute containing the start timestamp of a file. Defaults to "FirstEventTimeStamp". @@ -251,13 +326,19 @@ def hdf5_to_array( """ # Delayed array for loading an HDF5 file of reasonable size (e.g. < 1GB) - # Read out groups: data_list = [] - for group in group_names: - g_dataset = np.asarray(h5file[group]) - if bool(data_type): - g_dataset = g_dataset.astype(data_type) + for channel in channels: + if channel["format"] == "per_electron": + g_dataset = np.asarray(h5file[channel["dataset_key"]]) + else: + raise ValueError( + f"Invalid 'format':{channel['format']} for channel {channel['dataset_key']}.", + ) + if "dtype" in channel.keys(): + g_dataset = g_dataset.astype(channel["dtype"]) + else: + g_dataset = g_dataset.astype("float32") data_list.append(g_dataset) # calculate time stamps @@ -266,7 +347,7 @@ def hdf5_to_array( time_stamp_data = np.zeros(len(data_list[0])) # the ms marker contains a list of events that occurred at full ms intervals. # It's monotonically increasing, and can contain duplicates - ms_marker = np.asarray(h5file[ms_markers_group]) + ms_marker = np.asarray(h5file[ms_markers_key]) # try to get start timestamp from "FirstEventTimeStamp" attribute try: @@ -306,10 +387,9 @@ def hdf5_to_array( def hdf5_to_timed_array( h5file: h5py.File, - group_names: Sequence[str], - data_type: str = "float32", + channels: Sequence[dict[str, Any]], time_stamps=False, - ms_markers_group: str = "msMarkers", + ms_markers_key: str = "msMarkers", first_event_time_stamp_key: str = "FirstEventTimeStamp", ) -> np.ndarray: """Reads the content of the given groups in an hdf5 file, and returns a @@ -318,13 +398,11 @@ def hdf5_to_timed_array( Args: h5file (h5py.File): hdf5 file handle to read from - group_names (str): - group names to read - data_type (str, optional): - Data type of the output data. Defaults to "float32". + channels (Sequence[dict[str, any]]): + channel dicts containing group names and types to read. time_stamps (bool, optional): Option to calculate time stamps. Defaults to False. - ms_markers_group (str): h5 column containing timestamp information. + ms_markers_group (str): hdf5 dataset containing timestamp information. Defaults to "msMarkers". first_event_time_stamp_key (str): h5 attribute containing the start timestamp of a file. Defaults to "FirstEventTimeStamp". @@ -338,15 +416,21 @@ def hdf5_to_timed_array( # Read out groups: data_list = [] - ms_marker = np.asarray(h5file[ms_markers_group]) - for group in group_names: - g_dataset = np.asarray(h5file[group]) - if bool(data_type): - g_dataset = g_dataset.astype(data_type) - + ms_marker = np.asarray(h5file[ms_markers_key]) + for channel in channels: timed_dataset = np.zeros_like(ms_marker) - for i, point in enumerate(ms_marker): - timed_dataset[i] = g_dataset[int(point) - 1] + if channel["format"] == "per_electron": + g_dataset = np.asarray(h5file[channel["dataset_key"]]) + for i, point in enumerate(ms_marker): + timed_dataset[i] = g_dataset[int(point) - 1] + else: + raise ValueError( + f"Invalid 'format':{channel['format']} for channel {channel['dataset_key']}.", + ) + if "dtype" in channel.keys(): + timed_dataset = timed_dataset.astype(channel["dtype"]) + else: + timed_dataset = timed_dataset.astype("float32") data_list.append(timed_dataset) @@ -398,20 +482,20 @@ def get_attribute(h5group: h5py.Group, attribute: str) -> str: def get_count_rate( h5file: h5py.File, - ms_markers_group: str = "msMarkers", + ms_markers_key: str = "msMarkers", ) -> tuple[np.ndarray, np.ndarray]: """Create count rate in the file from the msMarker column. Args: h5file (h5py.File): The h5file from which to get the count rate. - ms_markers_group (str, optional): The hdf5 group where the millisecond markers + ms_markers_key (str, optional): The hdf5 path where the millisecond markers are stored. Defaults to "msMarkers". Returns: tuple[np.ndarray, np.ndarray]: The count rate in Hz and the seconds into the scan. """ - ms_markers = np.asarray(h5file[ms_markers_group]) + ms_markers = np.asarray(h5file[ms_markers_key]) secs = np.arange(0, len(ms_markers)) / 1000 msmarker_spline = sint.InterpolatedUnivariateSpline(secs, ms_markers, k=1) rate_spline = msmarker_spline.derivative() @@ -422,19 +506,19 @@ def get_count_rate( def get_elapsed_time( h5file: h5py.File, - ms_markers_group: str = "msMarkers", + ms_markers_key: str = "msMarkers", ) -> float: """Return the elapsed time in the file from the msMarkers wave Args: h5file (h5py.File): The h5file from which to get the count rate. - ms_markers_group (str, optional): The hdf5 group where the millisecond markers + ms_markers_key (str, optional): The hdf5 path where the millisecond markers are stored. Defaults to "msMarkers". Return: float: The acquisition time of the file in seconds. """ - secs = h5file[ms_markers_group].len() / 1000 + secs = h5file[ms_markers_key].len() / 1000 return secs @@ -528,7 +612,7 @@ def read_dataframe( - **hdf5_groupnames** : List of groupnames to look for in the file. - **hdf5_aliases**: Dictionary of aliases for the groupnames. - **time_stamp_alias**: Alias for the timestamp column - - **ms_markers_group**: Group name of the millisecond marker column. + - **ms_markers_key**: HDF5 path of the millisecond marker column. - **first_event_time_stamp_key**: Attribute name containing the start timestamp of the file. @@ -567,13 +651,9 @@ def read_dataframe( metadata=metadata, ) - hdf5_groupnames = kwds.pop( - "hdf5_groupnames", - self._config.get("dataframe", {}).get("hdf5_groupnames", []), - ) - hdf5_aliases = kwds.pop( - "hdf5_aliases", - self._config.get("dataframe", {}).get("hdf5_aliases", {}), + channels = kwds.pop( + "channels", + self._config.get("dataframe", {}).get("channels", None), ) time_stamp_alias = kwds.pop( "time_stamp_alias", @@ -582,10 +662,10 @@ def read_dataframe( "timeStamps", ), ) - ms_markers_group = kwds.pop( - "ms_markers_group", + ms_markers_key = kwds.pop( + "ms_markers_key", self._config.get("dataframe", {}).get( - "ms_markers_group", + "ms_markers_key", "msMarkers", ), ) @@ -598,21 +678,19 @@ def read_dataframe( ) df = hdf5_to_dataframe( files=self.files, - group_names=hdf5_groupnames, - alias_dict=hdf5_aliases, + channels=channels, time_stamps=time_stamps, time_stamp_alias=time_stamp_alias, - ms_markers_group=ms_markers_group, + ms_markers_key=ms_markers_key, first_event_time_stamp_key=first_event_time_stamp_key, **kwds, ) timed_df = hdf5_to_timed_dataframe( files=self.files, - group_names=hdf5_groupnames, - alias_dict=hdf5_aliases, + channels=channels, time_stamps=time_stamps, time_stamp_alias=time_stamp_alias, - ms_markers_group=ms_markers_group, + ms_markers_key=ms_markers_key, first_event_time_stamp_key=first_event_time_stamp_key, **kwds, ) @@ -678,16 +756,23 @@ def get_start_and_end_time(self) -> tuple[float, float]: tuple[float, float]: A tuple containing the start and end time stamps """ h5file = h5py.File(self.files[0]) + channels = [] + for channel in self._config["dataframe"]["channels"].values(): + if channel["format"] == "per_electron": + channels = [channel] + break + if not channels: + raise ValueError("No valid 'per_electron' channels found.") timestamps = hdf5_to_array( h5file, - group_names=self._config["dataframe"]["hdf5_groupnames"], + channels=channels, time_stamps=True, ) ts_from = timestamps[-1][1] h5file = h5py.File(self.files[-1]) timestamps = hdf5_to_array( h5file, - group_names=self._config["dataframe"]["hdf5_groupnames"], + channels=channels, time_stamps=True, ) ts_to = timestamps[-1][-1] @@ -891,7 +976,7 @@ def get_count_rate( include. Defaults to list of all file ids. kwds: Keyword arguments: - - **ms_markers_group**: Name of the hdf5 group containing the ms-markers + - **ms_markers_key**: HDF5 path of the ms-markers Returns: tuple[np.ndarray, np.ndarray]: Arrays containing countrate and seconds @@ -900,10 +985,10 @@ def get_count_rate( if fids is None: fids = range(0, len(self.files)) - ms_markers_group = kwds.pop( - "ms_markers_group", + ms_markers_key = kwds.pop( + "ms_markers_key", self._config.get("dataframe", {}).get( - "ms_markers_group", + "ms_markers_key", "msMarkers", ), ) @@ -914,7 +999,7 @@ def get_count_rate( for fid in fids: count_rate_, secs_ = get_count_rate( h5py.File(self.files[fid]), - ms_markers_group=ms_markers_group, + ms_markers_key=ms_markers_key, ) secs_list.append((accumulated_time + secs_).T) count_rate_list.append(count_rate_.T) @@ -934,7 +1019,7 @@ def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float: include. Defaults to list of all file ids. kwds: Keyword arguments: - - **ms_markers_group**: Name of the hdf5 group containing the ms-markers + - **ms_markers_key**: HDF5 path of the millisecond marker column. Return: float: The elapsed time in the files in seconds. @@ -942,10 +1027,10 @@ def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float: if fids is None: fids = range(0, len(self.files)) - ms_markers_group = kwds.pop( - "ms_markers_group", + ms_markers_key = kwds.pop( + "ms_markers_key", self._config.get("dataframe", {}).get( - "ms_markers_group", + "ms_markers_key", "msMarkers", ), ) @@ -954,7 +1039,7 @@ def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float: for fid in fids: secs += get_elapsed_time( h5py.File(self.files[fid]), - ms_markers_group=ms_markers_group, + ms_markers_key=ms_markers_key, ) return secs diff --git a/tests/calibrator/test_energy.py b/tests/calibrator/test_energy.py index 378ce095..8d02270b 100644 --- a/tests/calibrator/test_energy.py +++ b/tests/calibrator/test_energy.py @@ -195,11 +195,9 @@ def test_calibrate_append(energy_scale: str, calibration_method: str) -> None: ref_id = 5 ec.add_ranges(ranges=rng, ref_id=ref_id) ec.feature_extract() - refid = 4 e_ref = -0.5 calibdict = ec.calibrate( ref_energy=e_ref, - ref_id=refid, energy_scale=energy_scale, method=calibration_method, ) diff --git a/tests/data/loader/mpes/config.yaml b/tests/data/loader/mpes/config.yaml index 877c531a..1a411be2 100644 --- a/tests/data/loader/mpes/config.yaml +++ b/tests/data/loader/mpes/config.yaml @@ -1,3 +1,85 @@ core: paths: data_raw_dir: "tests/data/loader/mpes/" + +dataframe: + # dataframe column name for the time stamp column + time_stamp_alias: "timeStamps" + # hdf5 group name containing eventIDs occuring at every millisecond (used to calculate timestamps) + ms_markers_key: "msMarkers" + # hdf5 attribute containing the timestamp of the first event in a file + first_event_time_stamp_key: "FirstEventTimeStamp" + # Time stepping in seconds of the succesive events in the timed dataframe + timed_dataframe_unit_time: 0.001 + # list of columns to apply jitter to + jitter_cols: ["X", "Y", "t", "ADC"] + # dataframe column containing x coordinates + x_column: "X" + # dataframe column containing y coordinates + y_column: "Y" + # dataframe column containing time-of-flight data + tof_column: "t" + # dataframe column containing analog-to-digital data + adc_column: "ADC" + # dataframe column containing bias voltage data + bias_column: "sampleBias" + # dataframe column containing corrected x coordinates + corrected_x_column: "Xm" + # dataframe column containing corrected y coordinates + corrected_y_column: "Ym" + # dataframe column containing corrected time-of-flight data + corrected_tof_column: "tm" + # dataframe column containing kx coordinates + kx_column: "kx" + # dataframe column containing ky coordinates + ky_column: "ky" + # dataframe column containing energy data + energy_column: "energy" + # dataframe column containing delay data + delay_column: "delay" + # time length of a base time-of-flight bin in ns + tof_binwidth: 4.125e-12 + # Binning factor of the tof_column-data compared to tof_binwidth (2^(tof_binning-1)) + tof_binning: 2 + # binning factor used for the adc coordinate (2^(adc_binning-1)) + adc_binning: 3 + # Default units for dataframe entries + units: + X: 'step' + Y: 'step' + t: 'step' + tof_voltage: 'V' + extractor_voltage: 'V' + extractor_current: 'A' + cryo_temperature: 'K' + sample_temperature: 'K' + dld_time: 'ns' + delay: 'ps' + timeStamp: 's' + energy: 'eV' + E: 'eV' + kx: '1/A' + ky: '1/A' + + # dataframe channels and group names to read from the h5 files + channels: + # The X-channel + X: + format: per_electron + dataset_key: "Stream_0" + # The Y-channel + Y: + format: per_electron + dataset_key: "Stream_1" + # The tof-channel + t: + format: per_electron + dataset_key: "Stream_2" + # The ADC-channel + ADC: + format: per_electron + dataset_key: "Stream_4" + # The sample Bias-channel + sampleBias: + format: per_file + dataset_key: "KTOF:Lens:Sample:V" diff --git a/tests/loader/mpes/__init__.py b/tests/loader/mpes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/loader/mpes/test_mpes_loader.py b/tests/loader/mpes/test_mpes_loader.py new file mode 100644 index 00000000..9d4513a0 --- /dev/null +++ b/tests/loader/mpes/test_mpes_loader.py @@ -0,0 +1,79 @@ +"""Tests specific for Mpes loader""" +from __future__ import annotations + +import os +from copy import deepcopy +from importlib.util import find_spec + +import pytest + +from sed.core.config import parse_config +from sed.loader.mpes.loader import MpesLoader + +package_dir = os.path.dirname(find_spec("sed").origin) + +test_data_dir = os.path.join(package_dir, "..", "tests", "data", "loader", "mpes") + +config = parse_config( + os.path.join(test_data_dir, "config.yaml"), + folder_config={}, + user_config={}, + system_config={}, +) + + +def test_channel_not_found_warning(capsys) -> None: + """Test if the mpes loader gives the correct warning if a channel cannot be found.""" + ml = MpesLoader(config=config) + + ml.read_dataframe(folders=test_data_dir) + captured = capsys.readouterr() + assert captured.out == "" + + # modify per_file channel + config_ = deepcopy(config) + config_["dataframe"]["channels"]["sampleBias"]["dataset_key"] = "invalid" + ml = MpesLoader(config=config_) + + ml.read_dataframe(folders=test_data_dir) + captured = capsys.readouterr() + assert 'Entry "invalid" for channel "sampleBias" not found.' in captured.out + + # modify per_electron channel + config_ = deepcopy(config) + config_["dataframe"]["channels"]["X"]["dataset_key"] = "invalid" + ml = MpesLoader(config=config_) + + ml.read_dataframe(folders=test_data_dir) + captured = capsys.readouterr() + assert 'Entry "invalid" for channel "X" not found.' in captured.out + + +def test_invalid_channel_format_raises() -> None: + """Test if the mpes loader raises an exception if an illegal channel format is provided.""" + config_ = deepcopy(config) + config_["dataframe"]["channels"]["sampleBias"]["format"] = "per_train" + ml = MpesLoader(config=config_) + + with pytest.raises(ValueError) as e: + ml.read_dataframe(folders=test_data_dir) + + expected_error = e.value.args[0] + + assert "Invalid 'format':per_train for channel sampleBias." in expected_error + + +def test_no_electron_channels_raises() -> None: + """Test if the mpes loader raises an exception if no per-electron channels are provided.""" + config_ = deepcopy(config) + config_["dataframe"]["channels"] = { + "sampleBias": {"format": "per_file", "dataset_key": "KTOF:Lens:Sample:V"}, + } + ml = MpesLoader(config=config_) + + with pytest.raises(ValueError) as e: + ml.read_dataframe(folders=test_data_dir) + + expected_error = e.value.args[0] + + assert "No valid 'per_electron' channels found." in expected_error diff --git a/tests/test_processor.py b/tests/test_processor.py index 9664ef19..a52bf959 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -578,18 +578,15 @@ def test_energy_calibration_workflow(energy_scale: str, calibration_method: str) with pytest.raises(ValueError): processor.calibrate_energy_axis( ref_energy=ref_energy, - ref_id=ref_id, energy_scale="myfantasyscale", ) with pytest.raises(NotImplementedError): processor.calibrate_energy_axis( ref_energy=ref_energy, - ref_id=ref_id, method="myfantasymethod", ) processor.calibrate_energy_axis( ref_energy=ref_energy, - ref_id=ref_id, energy_scale=energy_scale, method=calibration_method, ) diff --git a/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb b/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb index 7b9cb1e2..34c448e1 100644 --- a/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb +++ b/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb @@ -458,9 +458,9 @@ "metadata": {}, "source": [ "#### 3. Step:\n", - "Next, the detected peak positions and bias voltages are used to determine the calibration function. This can be either done by fitting the functional form d^2/(t-t0)^2 via lmfit (\"lmfit\"), or using a polynomial approximation (\"lstsq\" or \"lsqr\"). Here, one can also define a reference id, and a reference energy. Those define the absolute energy position of the feature used for calibration in the \"reference\" trace, at the bias voltage where the final measurement has been performed. The energy scale can be either \"kinetic\" (decreasing energy with increasing TOF), or \"binding\" (increasing energy with increasing TOF).\n", + "Next, the detected peak positions and bias voltages are used to determine the calibration function. Essentially, the functional Energy(TOF) is being determined by either least-squares fitting of the functional form d^2/(t-t0)^2 via lmfit (``method``: \"lmfit\"), or by analytically obtaining a polynomial approximation (``method``: \"lstsq\" or \"lsqr\"). The parameter ``ref_energy`` is used to define the absolute energy position of the feature used for calibration in the calibrated energy scale. ``energy_scale`` can be either \"kinetic\" (decreasing energy with increasing TOF), or \"binding\" (increasing energy with increasing TOF).\n", "\n", - "After calculating the calibration, all traces corrected with the calibration are plotted ontop of each other, the calibration function together with the extracted features is plotted." + "After calculating the calibration, all traces corrected with the calibration are plotted ontop of each other, and the calibration function (Energy(TOF)) together with the extracted features is being plotted." ] }, { @@ -470,14 +470,11 @@ "metadata": {}, "outputs": [], "source": [ - "# use the refid of the bias that the measurement was taken at\n", - "# Eref can be used to set the absolute energy (kinetic energy, E-EF) of the feature used for energy calibration (if known)\n", - "refid=4\n", - "Eref=-0.5\n", + "# Eref can be used to set the absolute energy (kinetic energy, E-EF, etc.) of the feature used for energy calibration (if known)\n", + "Eref=-1.3\n", "# the lmfit method uses a fit of (d/(t-t0))**2 to determine the energy calibration\n", "# limits and starting values for the fitting parameters can be provided as dictionaries\n", "sp.calibrate_energy_axis(\n", - " ref_id=refid,\n", " ref_energy=Eref,\n", " method=\"lmfit\",\n", " energy_scale='kinetic',\n", @@ -515,7 +512,7 @@ "metadata": {}, "source": [ "#### 4. Step:\n", - "Finally, the the energy axis is added to the dataframe." + "Finally, the the energy axis is added to the dataframe. Here, the applied bias voltages of the measurement is taken into account to provide the correct energy offset. If the bias cannot be read from the file, it can be provided manually." ] }, { @@ -525,7 +522,7 @@ "metadata": {}, "outputs": [], "source": [ - "sp.append_energy_axis()" + "sp.append_energy_axis(bias_voltage=16.8)" ] }, { @@ -538,6 +535,16 @@ "The delay axis is calculated from the ADC input column based on the provided delay range. ALternatively, the delay scan range can also be extracted from attributes inside a source file, if present." ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "1619cbc6", + "metadata": {}, + "outputs": [], + "source": [ + "sp.dataframe.head()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -661,7 +668,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/tutorial/3_metadata_collection_and_export_to_NeXus.ipynb b/tutorial/3_metadata_collection_and_export_to_NeXus.ipynb index 42fede36..260e5e4d 100644 --- a/tutorial/3_metadata_collection_and_export_to_NeXus.ipynb +++ b/tutorial/3_metadata_collection_and_export_to_NeXus.ipynb @@ -219,7 +219,7 @@ "outputs": [], "source": [ "# Apply stored config energy calibration\n", - "sp.append_energy_axis()" + "sp.append_energy_axis(bias_voltage=16.8)" ] }, { diff --git a/tutorial/4_hextof_workflow.ipynb b/tutorial/4_hextof_workflow.ipynb index 4fcbaa9d..4de9c825 100644 --- a/tutorial/4_hextof_workflow.ipynb +++ b/tutorial/4_hextof_workflow.ipynb @@ -577,7 +577,7 @@ "metadata": {}, "source": [ "### correct offsets\n", - "The energy axis is now correct, but still the curves do not stack on each other as we are not compensating for the `sampleBias`. In the same way, we can compensate the photon energy (`monochromatorPhotonEnergy`) and the `tofVoltage` " + "The energy axis is now correct, taking the sample bias of the measurement into account. Additionally, we can compensate the photon energy (`monochromatorPhotonEnergy`) and the `tofVoltage`." ] }, { @@ -587,10 +587,9 @@ "outputs": [], "source": [ "sp.add_energy_offset(\n", - " constant=-32, # Sample bias used as reference for energy calibration\n", - " columns=['sampleBias','monochromatorPhotonEnergy','tofVoltage'],\n", - " weights=[1,-1,-1],\n", - " preserve_mean=[False, True, True],\n", + " columns=['monochromatorPhotonEnergy','tofVoltage'],\n", + " weights=[-1,-1],\n", + " preserve_mean=[True, True],\n", ")" ] }, diff --git a/tutorial/6_binning_with_time-stamped_data.ipynb b/tutorial/6_binning_with_time-stamped_data.ipynb index 4a103808..93080372 100644 --- a/tutorial/6_binning_with_time-stamped_data.ipynb +++ b/tutorial/6_binning_with_time-stamped_data.ipynb @@ -157,7 +157,7 @@ "source": [ "# Load energy calibration EDCs\n", "scans = np.arange(127,136)\n", - "voltages = np.arange(22,13,-1)\n", + "voltages = np.arange(21,12,-1)\n", "files = [caldir + r'/Scan' + str(num).zfill(4) + '_1.h5' for num in scans]\n", "sp.load_bias_series(data_files=files, normalize=True, biases=voltages, ranges=[(64000, 76000)])\n", "rg = (65500, 66000)\n", @@ -173,6 +173,7 @@ "outputs": [], "source": [ "# Apply stored config energy calibration\n", + "#sp.append_energy_axis(bias_voltage=17)\n", "sp.append_energy_axis()" ] },