Skip to content

Commit

Permalink
Merge pull request #411 from OpenCOMPES/energy_calibration_bias_shift
Browse files Browse the repository at this point in the history
Energy calibration bias shift
  • Loading branch information
rettigl authored Jul 16, 2024
2 parents 3eb5dea + c717114 commit cb7c97a
Show file tree
Hide file tree
Showing 17 changed files with 505 additions and 263 deletions.
2 changes: 2 additions & 0 deletions .cspell/custom-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ caldir
calib
calibdict
caplog
capsys
cdeform
cdeformfield
cdisp
Expand Down Expand Up @@ -77,6 +78,7 @@ datastreams
datestring
ddir
delaxes
delayeds
Desy
dfield
dfops
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/benchmark_sed.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def test_workflow_1d() -> None:
system_config={},
verbose=True,
)
processor.dataframe["sampleBias"] = 16.7
processor.add_jitter()
processor.apply_momentum_correction()
processor.apply_momentum_calibration()
Expand Down Expand Up @@ -155,6 +156,7 @@ def test_workflow_4d() -> None:
system_config={},
verbose=True,
)
processor.dataframe["sampleBias"] = 16.7
processor.add_jitter()
processor.apply_momentum_correction()
processor.apply_momentum_calibration()
Expand Down
107 changes: 53 additions & 54 deletions sed/calibrator/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from __future__ import annotations

import itertools as it
import warnings as wn
from collections.abc import Sequence
from copy import deepcopy
from datetime import datetime
Expand Down Expand Up @@ -505,7 +504,7 @@ def feature_extract(

def calibrate(
self,
ref_id: int = 0,
ref_energy: float = 0,
method: str = "lmfit",
energy_scale: str = "kinetic",
landmarks: np.ndarray = None,
Expand All @@ -518,8 +517,7 @@ def calibrate(
scale using optimization methods.
Args:
ref_id (int, optional): The reference trace index (an integer).
Defaults to 0.
ref_energy (float): Binding/kinetic energy of the detected feature.
method (str, optional): Method for determining the energy calibration.
- **'lmfit'**: Energy calibration using lmfit and 1/t^2 form.
Expand Down Expand Up @@ -574,7 +572,7 @@ def calibrate(
sign * biases,
binwidth,
binning,
ref_id=ref_id,
ref_energy=ref_energy,
t=t,
energy_scale=energy_scale,
verbose=verbose,
Expand All @@ -584,7 +582,7 @@ def calibrate(
self.calibration = poly_energy_calibration(
landmarks,
sign * biases,
ref_id=ref_id,
ref_energy=ref_energy,
aug=self.dup,
method=method,
t=t,
Expand Down Expand Up @@ -652,7 +650,7 @@ def view( # pylint: disable=dangerous-default-value
for itr, trace in enumerate(traces):
if align:
ax.plot(
xaxis + sign * (self.biases[itr] - self.biases[self.calibration["refid"]]),
xaxis + sign * (self.biases[itr]),
trace,
ls="-",
linewidth=1,
Expand Down Expand Up @@ -715,7 +713,7 @@ def view( # pylint: disable=dangerous-default-value
trace = traces[itr, :]
if align:
fig.line(
xaxis + sign * (self.biases[itr] - self.biases[self.calibration["refid"]]),
xaxis + sign * (self.biases[itr]),
trace,
color=color,
line_dash="solid",
Expand Down Expand Up @@ -774,6 +772,7 @@ def append_energy_axis(
tof_column: str = None,
energy_column: str = None,
calibration: dict = None,
bias_voltage: float = None,
verbose: bool = True,
**kwds,
) -> tuple[pd.DataFrame | dask.dataframe.DataFrame, dict]:
Expand All @@ -789,6 +788,9 @@ def append_energy_axis(
calibration (dict, optional): Calibration dictionary. If provided,
overrides calibration from class or config.
Defaults to self.calibration or config["energy"]["calibration"].
bias_voltage (float, optional): Sample bias voltage of the scan data. If omitted,
the bias voltage is being read from the dataframe. If it is not found there,
a warning is printed and the calibrated data might have an offset.
verbose (bool, optional): Option to print out diagnostic information.
Defaults to True.
**kwds: additional keyword arguments for the energy conversion. They are
Expand Down Expand Up @@ -838,6 +840,8 @@ def append_energy_axis(

elif "coeffs" in calibration and "E0" in calibration:
calibration["calib_type"] = "poly"
if "energy_scale" not in calibration:
calibration["energy_scale"] = "kinetic"
else:
raise ValueError("No valid calibration parameters provided!")

Expand Down Expand Up @@ -877,6 +881,20 @@ def append_energy_axis(
else:
raise NotImplementedError

# apply bias offset
scale_sign: Literal[-1, 1] = -1 if calibration["energy_scale"] == "binding" else 1
if bias_voltage is not None:
df[energy_column] = df[energy_column] + scale_sign * bias_voltage
elif self._config["dataframe"]["bias_column"] in df.columns:
df = dfops.offset_by_other_columns(
df=df,
target_column=energy_column,
offset_columns=self._config["dataframe"]["bias_column"],
weights=scale_sign,
)
else:
print("Sample bias data not found or provided. Calibrated energy might be incorrect.")

metadata = self.gather_calibration_metadata(calibration)

return df, metadata
Expand Down Expand Up @@ -1527,6 +1545,9 @@ def add_offsets(
offsets["creation_date"] = datetime.now().timestamp()
# column-based offsets
if columns is not None:
if isinstance(columns, str):
columns = [columns]

if weights is None:
weights = 1
if isinstance(weights, (int, float, np.integer, np.floating)):
Expand All @@ -1538,10 +1559,13 @@ def add_offsets(
if not all(isinstance(s, (int, float, np.integer, np.floating)) for s in weights):
raise TypeError(f"Invalid type for weights: {type(weights)}")

if isinstance(columns, str):
columns = [columns]
if isinstance(preserve_mean, bool):
preserve_mean = [preserve_mean] * len(columns)
if preserve_mean is None:
preserve_mean = False
if not isinstance(preserve_mean, Sequence):
preserve_mean = [preserve_mean]
if len(preserve_mean) == 1:
preserve_mean = [preserve_mean[0]] * len(columns)

if not isinstance(reductions, Sequence):
reductions = [reductions]
if len(reductions) == 1:
Expand Down Expand Up @@ -1625,10 +1649,7 @@ def add_offsets(
if constant:
if not isinstance(constant, (int, float, np.integer, np.floating)):
raise TypeError(f"Invalid type for constant: {type(constant)}")
df[energy_column] = df.map_partitions(
lambda x: x[energy_column] + constant,
meta=(energy_column, np.float64),
)
df[energy_column] = df[energy_column] + constant

self.offsets = offsets
metadata["offsets"] = offsets
Expand Down Expand Up @@ -2082,8 +2103,7 @@ def fit_energy_calibration(
vals: list[float] | np.ndarray,
binwidth: float,
binning: int,
ref_id: int = 0,
ref_energy: float = None,
ref_energy: float,
t: list[float] | np.ndarray = None,
energy_scale: str = "kinetic",
verbose: bool = True,
Expand All @@ -2100,9 +2120,8 @@ def fit_energy_calibration(
each EDC.
binwidth (float): Time width of each original TOF bin in ns.
binning (int): Binning factor of the TOF values.
ref_id (int, optional): Reference dataset index. Defaults to 0.
ref_energy (float, optional): Energy value of the feature in the reference
trace (eV). required to output the calibration. Defaults to None.
ref_energy (float): Energy value of the feature in the reference
trace (eV).
t (list[float] | np.ndarray, optional): Array of TOF values. Required
to calculate calibration trace. Defaults to None.
energy_scale (str, optional): Direction of increasing energy scale.
Expand All @@ -2127,14 +2146,6 @@ def fit_energy_calibration(
- "axis": Fitted energy axis.
"""
vals = np.asarray(vals)
nvals = vals.size

if ref_id >= nvals:
wn.warn(
"Reference index (refid) cannot be larger than the number of traces!\
Reset to the largest allowed number.",
)
ref_id = nvals - 1

def residual(pars, time, data, binwidth, binning, energy_scale):
model = tof2ev(
Expand Down Expand Up @@ -2200,22 +2211,20 @@ def residual(pars, time, data, binwidth, binning, energy_scale):
ecalibdict["t0"] = result.params["t0"].value
ecalibdict["E0"] = result.params["E0"].value
ecalibdict["energy_scale"] = energy_scale
energy_offset = pfunc(-1 * ref_energy, pos[0])
ecalibdict["E0"] = -(energy_offset - vals[0])

if (ref_energy is not None) and (t is not None):
energy_offset = pfunc(-1 * ref_energy, pos[ref_id])
ecalibdict["axis"] = pfunc(-energy_offset, t)
ecalibdict["E0"] = -energy_offset
ecalibdict["refid"] = ref_id
if t is not None:
ecalibdict["axis"] = pfunc(ecalibdict["E0"], t)

return ecalibdict


def poly_energy_calibration(
pos: list[float] | np.ndarray,
vals: list[float] | np.ndarray,
ref_energy: float,
order: int = 3,
ref_id: int = 0,
ref_energy: float = None,
t: list[float] | np.ndarray = None,
aug: int = 1,
method: str = "lstsq",
Expand All @@ -2235,10 +2244,9 @@ def poly_energy_calibration(
(e.g. peaks) in the EDCs.
vals (list[float] | np.ndarray): Bias voltage value associated with
each EDC.
ref_energy (float): Energy value of the feature in the reference
trace (eV).
order (int, optional): Polynomial order of the fitting function. Defaults to 3.
ref_id (int, optional): Reference dataset index. Defaults to 0.
ref_energy (float, optional): Energy value of the feature in the reference
trace (eV). required to output the calibration. Defaults to None.
t (list[float] | np.ndarray, optional): Array of TOF values. Required
to calculate calibration trace. Defaults to None.
aug (int, optional): Fitting dimension augmentation
Expand Down Expand Up @@ -2266,21 +2274,14 @@ def poly_energy_calibration(
vals = np.asarray(vals)
nvals = vals.size

if ref_id >= nvals:
wn.warn(
"Reference index (refid) cannot be larger than the number of traces!\
Reset to the largest allowed number.",
)
ref_id = nvals - 1

# Top-to-bottom ordering of terms in the T matrix
termorder = np.delete(range(0, nvals, 1), ref_id)
termorder = np.delete(range(0, nvals, 1), 0)
termorder = np.tile(termorder, aug)
# Left-to-right ordering of polynomials in the T matrix
polyorder = np.linspace(order, 1, order, dtype="int")

# Construct the T (differential drift time) matrix, Tmat = Tmain - Tsec
t_main = np.array([pos[ref_id] ** p for p in polyorder])
t_main = np.array([pos[0] ** p for p in polyorder])
# Duplicate to the same order as the polynomials
t_main = np.tile(t_main, (aug * (nvals - 1), 1))

Expand All @@ -2292,7 +2293,7 @@ def poly_energy_calibration(
t_mat = t_main - np.asarray(t_sec)

# Construct the b vector (differential bias)
bvec = vals[ref_id] - np.delete(vals, ref_id)
bvec = vals[0] - np.delete(vals, 0)
bvec = np.tile(bvec, aug)

# Solve for the a vector (polynomial coefficients) using least squares
Expand All @@ -2312,12 +2313,10 @@ def poly_energy_calibration(
ecalibdict["Tmat"] = t_mat
ecalibdict["bvec"] = bvec
ecalibdict["energy_scale"] = energy_scale
ecalibdict["E0"] = -(pfunc(-1 * ref_energy, pos[0]) + vals[0])

if ref_energy is not None and t is not None:
energy_offset = pfunc(-1 * ref_energy, pos[ref_id])
ecalibdict["axis"] = pfunc(-energy_offset, t)
ecalibdict["E0"] = -energy_offset
ecalibdict["refid"] = ref_id
if t is not None:
ecalibdict["axis"] = pfunc(-ecalibdict["E0"], t)

return ecalibdict

Expand Down
35 changes: 26 additions & 9 deletions sed/config/mpes_example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,10 @@ core:
gid: 1001

dataframe:
# hdf5 group names to read from the h5 files (for mpes reader)
hdf5_groupnames: ["Stream_0", "Stream_1", "Stream_2", "Stream_4"]
# aliases to assign to the dataframe columns for the corresponding hdf5 streams
hdf5_aliases:
Stream_0: "X"
Stream_1: "Y"
Stream_2: "t"
Stream_4: "ADC"
# dataframe column name for the time stamp column
time_stamp_alias: "timeStamps"
# hdf5 group name containing eventIDs occurring at every millisecond (used to calculate timestamps)
ms_markers_group: "msMarkers"
ms_markers_key: "msMarkers"
# hdf5 attribute containing the timestamp of the first event in a file
first_event_time_stamp_key: "FirstEventTimeStamp"
# Time stepping in seconds of the successive events in the timed dataframe
Expand All @@ -41,6 +33,8 @@ dataframe:
tof_column: "t"
# dataframe column containing analog-to-digital data
adc_column: "ADC"
# dataframe column containing bias voltage data
bias_column: "sampleBias"
# dataframe column containing corrected x coordinates
corrected_x_column: "Xm"
# dataframe column containing corrected y coordinates
Expand Down Expand Up @@ -79,6 +73,29 @@ dataframe:
kx: '1/A'
ky: '1/A'

# dataframe channels and group names to read from the h5 files
channels:
# The X-channel
X:
format: per_electron
dataset_key: "Stream_0"
# The Y-channel
Y:
format: per_electron
dataset_key: "Stream_1"
# The tof-channel
t:
format: per_electron
dataset_key: "Stream_2"
# The ADC-channel
ADC:
format: per_electron
dataset_key: "Stream_4"
# The sample Bias-channel
sampleBias:
format: per_file
dataset_key: "KTOF:Lens:Sample:V"

energy:
# Number of bins to use for energy calibration traces
bins: 1000
Expand Down
Loading

0 comments on commit cb7c97a

Please sign in to comment.