Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hextof_workflow_steps #169

Merged
merged 61 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from 51 commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
60053bf
unravel 8s detector to tof and sectorID in loader
steinnymir Oct 10, 2023
de0c5a2
fix loader for 8s unravelling
steinnymir Oct 10, 2023
41d5213
linting and bugfix
steinnymir Oct 10, 2023
c367637
linting and bugfix
steinnymir Oct 10, 2023
153373d
bugfix
steinnymir Oct 10, 2023
7051b8a
linting
steinnymir Oct 10, 2023
d51b01f
linting
steinnymir Oct 10, 2023
b70bee4
update default flash config
steinnymir Oct 10, 2023
9cd0d67
update test settings for flash
steinnymir Oct 11, 2023
9dbb379
fixes from suggestions and linting
steinnymir Oct 11, 2023
ccff3f8
linting and docstrings
steinnymir Oct 11, 2023
27adcc3
add rolling average and energy shift
steinnymir Oct 11, 2023
77a6ab5
local testing
steinnymir Oct 11, 2023
8f4bded
make step2ns global
steinnymir Oct 11, 2023
73d9bb5
Merge branch 'main' into hextof_workflow_steps
steinnymir Oct 11, 2023
5848a22
implement shift_energy_axis
steinnymir Oct 11, 2023
a1ccb7a
Merge branch 'hextof_workflow_steps' into hextof_energy_shifts
steinnymir Oct 11, 2023
4945d9b
Merge pull request #174 from OpenCOMPES/hextof_energy_shifts
steinnymir Oct 12, 2023
991df97
linting
steinnymir Oct 12, 2023
98c1857
moving functions to their rightful place
steinnymir Oct 12, 2023
bfd3daa
move more functions
steinnymir Oct 13, 2023
ea5c5fd
fix tof_step_to_ns
steinnymir Oct 13, 2023
c79a106
fix linting and typos
steinnymir Oct 13, 2023
b579b8f
move split_sector_id to flash loader + tests
steinnymir Oct 16, 2023
abd3de7
apply suggestions and move sector_alignment
steinnymir Oct 16, 2023
8c75446
minor fixes and tests
steinnymir Oct 18, 2023
0afe469
add smooth functions
steinnymir Oct 18, 2023
35af57c
add tutorial notebook
steinnymir Oct 18, 2023
9479ce8
fix linting and tests
steinnymir Oct 19, 2023
43776a3
add bfill and tests
steinnymir Oct 19, 2023
9edf50a
move tof_to_ns inside ec class
steinnymir Oct 19, 2023
8ba65e9
move dld_sector_correction to ec
steinnymir Oct 19, 2023
b93cd7b
fix
steinnymir Oct 19, 2023
eaeeff4
harder fix
steinnymir Oct 19, 2023
acc9c4a
fix linting
steinnymir Oct 23, 2023
a1066e5
fix linting
steinnymir Oct 23, 2023
ff7012a
Merge branch 'main' into hextof_workflow_steps
steinnymir Oct 23, 2023
b42bb3e
Merge branch 'main' into hextof_workflow_steps
steinnymir Oct 24, 2023
0d73776
add option to initialize fit params
steinnymir Oct 24, 2023
b1c158d
energy calibration notebook and config
steinnymir Oct 24, 2023
d42f2e0
partial working energy calibration
rettigl Oct 25, 2023
0433019
working energy calibration
rettigl Oct 25, 2023
4cad800
tof2ns fix units and linting
steinnymir Oct 25, 2023
5b866ca
fix energy calibration and hextof notebook
steinnymir Oct 25, 2023
fa1cd5c
refactor and move apply_energy_offset
steinnymir Oct 25, 2023
babc325
remove redundant notebooks
steinnymir Oct 25, 2023
09b2dad
linting
steinnymir Oct 25, 2023
68bf2b4
bug fix
steinnymir Oct 25, 2023
6fc47ac
linting
steinnymir Oct 25, 2023
60242a7
linting
steinnymir Oct 25, 2023
050f89c
Merge branch 'main' into hextof_workflow_steps
steinnymir Oct 27, 2023
61045d0
apply suggested changes and remove rolling avg
steinnymir Oct 30, 2023
7554c71
refactor energy offset for performance and tests
steinnymir Nov 1, 2023
3160d20
Merge branch 'main' into hextof_workflow_steps
steinnymir Nov 1, 2023
fdf12ec
linting and bugfix
steinnymir Nov 1, 2023
b8911ea
linting and more tests
steinnymir Nov 1, 2023
2538e28
test tof_ns
steinnymir Nov 1, 2023
3dafe4e
update notebook and add offset to save ecalib
steinnymir Nov 1, 2023
8308eba
test sector_alignment
steinnymir Nov 1, 2023
956de39
yet an other linting
steinnymir Nov 1, 2023
266444f
apply suggested changes
steinnymir Nov 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
296 changes: 281 additions & 15 deletions sed/calibrator/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from scipy.sparse.linalg import lsqr

from sed.binning import bin_dataframe
from sed.core import dfops
from sed.loader.base.loader import BaseLoader


Expand Down Expand Up @@ -95,6 +96,7 @@ def __init__(
self.calibration: Dict[Any, Any] = {}

self.tof_column = self._config["dataframe"]["tof_column"]
self.tof_ns_column = self._config["dataframe"].get("tof_ns_column", None)
self.corrected_tof_column = self._config["dataframe"]["corrected_tof_column"]
self.energy_column = self._config["dataframe"]["energy_column"]
self.x_column = self._config["dataframe"]["x_column"]
Expand All @@ -108,7 +110,9 @@ def __init__(
) / 2 ** (self.binning - 1)
self.tof_fermi = self._config["energy"]["tof_fermi"] / 2 ** (self.binning - 1)
self.color_clip = self._config["energy"]["color_clip"]

self.sector_delays = self._config["dataframe"].get("sector_delays", None)
self.sector_id_column = self._config["dataframe"].get("sector_id_column", None)
self.offset: Dict[str, Any] = self._config["energy"].get("offset", {})
self.correction: Dict[Any, Any] = {}

@property
Expand Down Expand Up @@ -769,6 +773,26 @@ def view( # pylint: disable=dangerous-default-value

pbk.show(fig)

def get_current_calibration(self) -> dict:
"""Return the current calibration dictionary.

if none is present, return the one from the config. If none is present there,
return an empty dictionary.

Returns:
dict: Calibration dictionary.
"""
if self.calibration:
calibration = deepcopy(self.calibration)
else:
calibration = deepcopy(
self._config["energy"].get(
"calibration",
{},
),
)
return calibration

def append_energy_axis(
self,
df: Union[pd.DataFrame, dask.dataframe.DataFrame],
Expand Down Expand Up @@ -812,17 +836,8 @@ def append_energy_axis(
binwidth = kwds.pop("binwidth", self.binwidth)
binning = kwds.pop("binning", self.binning)

# pylint: disable=duplicate-code
if calibration is None:
if self.calibration:
calibration = deepcopy(self.calibration)
else:
calibration = deepcopy(
self._config["energy"].get(
"calibration",
{},
),
)
calibration = self.get_current_calibration()

for key, value in kwds.items():
calibration[key] = value
Expand Down Expand Up @@ -879,6 +894,53 @@ def append_energy_axis(

return df, metadata

def append_tof_ns_axis(
self,
df: Union[pd.DataFrame, dask.dataframe.DataFrame],
tof_column: str = None,
tof_ns_column: str = None,
**kwds,
) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
"""Converts the time-of-flight time from steps to time in ns.

# TODO: needs tests

Args:
df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to convert.
tof_column (str, optional): Name of the column containing the
time-of-flight steps. Defaults to config["dataframe"]["tof_column"].
tof_ns_column (str, optional): Name of the column to store the
time-of-flight in nanoseconds. Defaults to config["dataframe"]["tof_ns_column"].

Returns:
dask.dataframe.DataFrame: Dataframe with the new columns.
dict: Metadata dictionary.
"""
binwidth = kwds.pop("binwidth", self.binwidth)
binning = kwds.pop("binning", self.binning)
if tof_column is None:
if self.corrected_tof_column in df.columns:
tof_column = self.corrected_tof_column
else:
tof_column = self.tof_column

if tof_ns_column is None:
tof_ns_column = self.tof_ns_column
if tof_ns_column is None:
raise AttributeError("tof_ns_column not set!")

df[tof_ns_column] = tof2ns(
binwidth,
binning,
df[tof_column].astype("float64"),
)
metadata: Dict[str, Any] = {
"applied": True,
"binwidth": binwidth,
"binning": binning,
}
return df, metadata

def gather_calibration_metadata(self, calibration: dict = None) -> dict:
"""Collects metadata from the energy calibration

Expand Down Expand Up @@ -1358,6 +1420,170 @@ def gather_correction_metadata(self, correction: dict = None) -> dict:

return metadata

def align_dld_sectors(
self,
df: Union[pd.DataFrame, dask.dataframe.DataFrame],
**kwds,
) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]:
"""Aligns the time-of-flight axis of the different sections of a detector.

Args:
df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.

Returns:
dask.dataframe.DataFrame: Dataframe with the new columns.
dict: Metadata dictionary.
"""
sector_delays = kwds.pop("sector_delays", self.sector_delays)
sector_id_column = kwds.pop("sector_id_column", self.sector_id_column)

if sector_delays is None or sector_id_column is None:
raise ValueError(
"No value for sector_delays or sector_id_column found in config."
"config file is not properly configured for dld sector correction.",
)
tof_column = kwds.pop("tof_column", self.tof_column)

# align the 8s sectors
sector_delays_arr = dask.array.from_array(sector_delays)

def align_sector(x):
val = x[tof_column] - sector_delays_arr[x[sector_id_column].values.astype(int)]
return val.astype(np.float32)

df[tof_column] = df.map_partitions(align_sector, meta=(tof_column, np.float32))
metadata: Dict[str, Any] = {
"applied": True,
"sector_delays": sector_delays,
}
return df, metadata

def apply_energy_offset(
self,
df: Union[pd.DataFrame, dask.dataframe.DataFrame] = None,
constant: float = None,
columns: Union[str, Sequence[str]] = None,
signs: Union[int, Sequence[int]] = None,
subtract_mean: Union[bool, Sequence[bool]] = None,
energy_column: str = None,
reductions: Union[str, Sequence[str]] = None,
) -> Union[pd.DataFrame, dask.dataframe.DataFrame]:
"""Apply an energy shift to the given column(s).

If no parameter is passed to this function, the offset is applied as defined in the
config file. If parameters are passed, they are used to generate a new offset dictionary
and the offset is applied using the ``dfops.apply_offset_from_columns()`` function.

# TODO: This funcion can still be improved and needs testsing

Args:
df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
constant (float, optional): The constant to shift the energy axis by.
columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift to.
signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive
sign shifts the energy axis to higher kinetic energies. Defaults to +1.
energy_column (str, optional): Name of the column containing the energy values.
reductions (str): The reduction to apply to the column. If "rolled" it searches for
columns with suffix "_rolled", e.g. "sampleBias_rolled", as those generated by the
``SedProcessor.smooth_columns()`` function. Otherwise should be an available method
of dask.dataframe.Series. For example "mean". In this case the function is applied
to the column to generate a single value for the whole dataset. If None, the shift
is applied per-dataframe-row. Defaults to None.
subtract_mean (bool): Whether to subtract the mean of the column before applying the
shift. Defaults to False.
**kwargs: Additional arguments for the rolling average function.
"""
if energy_column is None:
energy_column = self.energy_column
if columns is None:
# load from config
columns = []
signs = []
subtract_mean = []
reductions = []
for k, v in self.offset.items():
if k == "constant":
constant = v
print(f"Applying constant offset of {constant} to energy axis.")
else:
assert k in df.columns, f"Column {k} not found in dataframe."
columns.append(k)
signs.append(v.get("sign", 1))
subtract_mean.append(v.get("subtract_mean", False))
reductions.append(v.get("reduction", None))
s = "+" if signs[-1] > 0 else "-"
msg = f"Shifting {energy_column} by {s} {k}"
if subtract_mean[-1]:
msg += " and subtracting mean"
print(msg)
else:
# use passed parameters
if columns is not None and (signs is None or subtract_mean is None):
raise ValueError(
"If columns is passed, signs and subtract_mean must also be passed.",
)
if isinstance(columns, str):
columns = [columns]
if isinstance(signs, int):
signs = [signs]
if len(signs) != len(columns):
raise ValueError("signs and columns must have the same length.")
if isinstance(subtract_mean, bool):
subtract_mean = [subtract_mean] * len(columns)
if reductions is None:
reductions = [None] * len(columns)
# flip sign for binding energy scale
energy_scale = self.get_current_calibration().get("energy_scale", None)
if energy_scale == "binding":
signs = [-1 * s for s in signs if s is not None]
elif energy_scale == "kinetic":
pass
elif energy_scale is None:
raise ValueError("Energy scale not set. Please run `set_energy_scale` first.")
# check if columns have been smoothed
columns_: List[str] = []
reductions_: List[str] = []
to_roll: List[str] = []
for c, r in zip(columns, reductions):
if r == "rolled":
cname = c + "_rolled"
if cname not in df.columns:
to_roll.append(cname)
else:
columns_.append(cname)
reductions_.append(None)
else:
columns_.append(c)
reductions_.append(r)
if len(to_roll) > 0:
raise RuntimeError(
f"Columns {to_roll} have not been smoothed. please run `smooth_column`",
)
# apply offset
df = dfops.apply_offset_from_columns(
df=df,
target_column=energy_column,
offset_columns=columns_,
signs=signs,
subtract_mean=subtract_mean,
reductions=reductions_,
inplace=True,
)
# apply constant
if constant is not None:
df[energy_column] += constant

metadata: Dict[str, Any] = {
"applied": True,
"constant": constant,
"energy_column": energy_column,
"column_names": columns,
"signs": signs,
"subtract_mean": subtract_mean,
"reductions": reductions,
}
return df, metadata


def extract_bias(files: List[str], bias_key: str) -> np.ndarray:
"""Read bias values from hdf5 files
Expand Down Expand Up @@ -1868,13 +2094,33 @@ def residual(pars, time, data, binwidth, binning, energy_scale):
return model - data

pars = Parameters()
pars.add(name="d", value=kwds.pop("d_init", 1))
d_pars = kwds.pop("d", {})
pars.add(
name="d",
value=d_pars.get("value", 1),
min=d_pars.get("min", -np.inf),
max=d_pars.get("max", np.inf),
vary=d_pars.get("vary", True),
)
t0_pars = kwds.pop("t0", {})
pars.add(
name="t0",
value=kwds.pop("t0_init", 1e-6),
max=(min(pos) - 1) * binwidth * 2**binning,
value=t0_pars.get("value", 1e-6),
min=t0_pars.get("min", -np.inf),
max=t0_pars.get(
"max",
(min(pos) - 1) * binwidth * 2**binning,
),
vary=t0_pars.get("vary", True),
)
E0_pars = kwds.pop("E0", {}) # pylint: disable=invalid-name
pars.add(
name="E0",
value=E0_pars.get("value", min(vals)),
min=E0_pars.get("min", -np.inf),
max=E0_pars.get("max", np.inf),
vary=E0_pars.get("vary", True),
)
pars.add(name="E0", value=kwds.pop("E0_init", min(vals)))
fit = Minimizer(
residual,
pars,
Expand Down Expand Up @@ -2085,3 +2331,23 @@ def tof2evpoly(
energy += energy_offset

return energy


def tof2ns(
binwidth: float,
binning: int,
t: float,
) -> Union[List[float], np.ndarray]:
"""Converts the time-of-flight steps to time-of-flight in nanoseconds.

designed for use with dask.dataframe.DataFrame.map_partitions.

Args:
binwidth (float): Time step size in seconds.
binning (int): Binning of the time-of-flight steps.
t (float): TOF value in bin number.
Returns:
float: Converted time in nanoseconds.
"""
val = t * 1e9 * binwidth * 2**binning
return val
2 changes: 1 addition & 1 deletion sed/config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dataframe:
energy_column: "energy"
# dataframe column containing delay data
delay_column: "delay"
# time length of a base time-of-flight bin in ns
# time length of a base time-of-flight bin in s
tof_binwidth: 4.125e-12
# Binning factor of the tof_column-data compared to tof_binwidth (2^(tof_binning-1))
tof_binning: 1
Expand Down
Loading