From fa1cd5cec6d7a8c63ab2f3f498eb5f976a04f5ab Mon Sep 17 00:00:00 2001 From: Steinn Ymir Agustsson Date: Wed, 25 Oct 2023 22:30:24 +0200 Subject: [PATCH] refactor and move apply_energy_offset --- sed/calibrator/energy.py | 237 ++++++++++++++++++----------- sed/core/processor.py | 25 ++- tutorial/5 - hextof workflow.ipynb | 99 +++++++++++- tutorial/hextof_config.yaml | 10 +- 4 files changed, 256 insertions(+), 115 deletions(-) diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py index ca555a8c..d56ba5b3 100644 --- a/sed/calibrator/energy.py +++ b/sed/calibrator/energy.py @@ -112,7 +112,7 @@ def __init__( self.color_clip = self._config["energy"]["color_clip"] self.sector_delays = self._config["dataframe"].get("sector_delays", None) self.sector_id_column = self._config["dataframe"].get("sector_id_column", None) - + self.offset: Dict[str, Any] = self._config["energy"].get("offset", {}) self.correction: Dict[Any, Any] = {} @property @@ -773,6 +773,26 @@ def view( # pylint: disable=dangerous-default-value pbk.show(fig) + def get_current_calibration(self) -> dict: + """Return the current calibration dictionary. + + if none is present, return the one from the config. If none is present there, + return an empty dictionary. + + Returns: + dict: Calibration dictionary. + """ + if self.calibration: + calibration = deepcopy(self.calibration) + else: + calibration = deepcopy( + self._config["energy"].get( + "calibration", + {}, + ), + ) + return calibration + def append_energy_axis( self, df: Union[pd.DataFrame, dask.dataframe.DataFrame], @@ -816,17 +836,7 @@ def append_energy_axis( binwidth = kwds.pop("binwidth", self.binwidth) binning = kwds.pop("binning", self.binning) - # pylint: disable=duplicate-code - if calibration is None: - if self.calibration: - calibration = deepcopy(self.calibration) - else: - calibration = deepcopy( - self._config["energy"].get( - "calibration", - {}, - ), - ) + calibration = self.get_current_calibration() for key, value in kwds.items(): calibration[key] = value @@ -1413,14 +1423,9 @@ def align_dld_sectors( self, df: Union[pd.DataFrame, dask.dataframe.DataFrame], **kwds, - # sector_delays: Sequence[float] = None, - # sector_id_column: str = None, - # tof_column: str = None, ) -> Tuple[Union[pd.DataFrame, dask.dataframe.DataFrame], dict]: """Aligns the time-of-flight axis of the different sections of a detector. - # TODO: move inside the ec class - Args: df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use. @@ -1452,6 +1457,128 @@ def align_sector(x): } return df, metadata + def apply_energy_offset( + self, + df: Union[pd.DataFrame, dask.dataframe.DataFrame] = None, + constant: float = None, + columns: Union[str, Sequence[str]] = None, + signs: Union[int, Sequence[int]] = None, + subtract_mean: Union[bool, Sequence[bool]] = None, + energy_column: str = None, + reductions: Union[str, Sequence[str]] = None, + ) -> Union[pd.DataFrame, dask.dataframe.DataFrame]: + """Apply an energy shift to the given column(s). + + If no parameter is passed to this function, the offset is applied as defined in the + config file. If parameters are passed, they are used to generate a new offset dictionary + and the offset is applied using the ``dfops.apply_offset_from_columns()`` function. + + # TODO: This funcion can still be improved and needs testsing + + Args: + df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use. + constant (float, optional): The constant to shift the energy axis by. + columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift to. + signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive + sign shifts the energy axis to higher kinetic energies. Defaults to +1. + energy_column (str, optional): Name of the column containing the energy values. + reductions (str): The reduction to apply to the column. If "rolled" it searches for + columns with suffix "_rolled", e.g. "sampleBias_rolled", as those generated by the + ``SedProcessor.smooth_columns()`` function. Otherwise should be an available method + of dask.dataframe.Series. For example "mean". In this case the function is applied + to the column to generate a single value for the whole dataset. If None, the shift + is applied per-dataframe-row. Defaults to None. + subtract_mean (bool): Whether to subtract the mean of the column before applying the + shift. Defaults to False. + **kwargs: Additional arguments for the rolling average function. + """ + if energy_column is None: + energy_column = self.energy_column + if columns is None: + # load from config + columns = [] + signs = [] + subtract_mean = [] + reductions = [] + for k, v in self.offset.items(): + if k == "constant": + constant = v + print(f"Applying constant offset of {constant} to energy axis.") + else: + assert k in df.columns, f"Column {k} not found in dataframe." + columns.append(k) + signs.append(v.get("sign", 1)) + subtract_mean.append(v.get("subtract_mean", False)) + reductions.append(v.get("reduction", None)) + s = "+" if signs[-1] > 0 else "-" + msg = f"Shifting {energy_column} by {s} {k}" + if subtract_mean[-1]: + msg += " and subtracting mean" + print(msg) + else: + # use passed parameters + if isinstance(columns, str): + columns = [columns] + if isinstance(signs, int): + signs = [signs] + if len(signs) != len(columns): + raise ValueError("signs and columns must have the same length.") + if isinstance(subtract_mean, bool): + subtract_mean = [subtract_mean] * len(columns) + if reductions is None: + reductions = [None] * len(columns) + # flip sign for binding energy scale + energy_scale = self.get_current_calibration().get("energy_scale", None) + if energy_scale == "binding": + signs = [-s for s in signs] + elif energy_scale == "kinetic": + pass + elif energy_scale is None: + raise ValueError("Energy scale not set. Please run `set_energy_scale` first.") + # check if columns have been smoothed + columns_: List[str] = [] + reductions_: List[str] = [] + to_roll: List[str] = [] + for c, r in zip(columns, reductions): + if r == "rolled": + cname = c + "_rolled" + if cname not in df.columns: + to_roll.append(cname) + else: + columns_.append(cname) + reductions_.append(None) + else: + columns_.append(c) + reductions_.append(r) + if len(to_roll) > 0: + raise RuntimeError( + f"Columns {to_roll} have not been smoothed. please run `smooth_column`", + ) + # apply offset + df = dfops.apply_offset_from_columns( + df=df, + target_column=energy_column, + offset_columns=columns_, + signs=signs, + subtract_mean=subtract_mean, + reductions=reductions_, + inplace=True, + ) + # apply constant + if constant is not None: + df[energy_column] += constant + + metadata: Dict[str, Any] = { + "applied": True, + "constant": constant, + "energy_column": energy_column, + "column_names": columns, + "signs": signs, + "subtract_mean": subtract_mean, + "reductions": reductions, + } + return df, metadata + def extract_bias(files: List[str], bias_key: str) -> np.ndarray: """Read bias values from hdf5 files @@ -2219,79 +2346,3 @@ def tof2ns( """ val = t * 1e9 * binwidth * 2**binning return val - - -def apply_energy_offset( - df: Union[pd.DataFrame, dask.dataframe.DataFrame], - columns: Union[str, Sequence[str]], - signs: Union[int, Sequence[int]], - subtract_mean: Union[bool, Sequence[bool]] = True, - energy_column: str = None, - reductions: Union[str, Sequence[str]] = None, - config: dict = None, -) -> Union[pd.DataFrame, dask.dataframe.DataFrame]: - """Apply an energy shift to the given column(s). - - # TODO: This funcion can still be improved and needs testsing - # TODO: move inside the ec class - - Args: - df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use. - columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift to. - signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) - energy_column (str, optional): Name of the column containing the energy values. - reductions (str): The reduction to apply to the column. If "rolled" it searches for columns - with suffix "_rolled", e.g. "sampleBias_rolled", as those generated by the - ``SedProcessor.smooth_columns()`` function. Otherwise should be an available method of - dask.dataframe.Series. For example "mean". In this case the function is applied to the - column to generate a single value for the whole dataset. If None, the shift is applied - per-dataframe-row. Defaults to None. - **kwargs: Additional arguments for the rolling average function. - """ - if energy_column is None: - if config is None: - raise ValueError("Either energy_column or config must be given.") - energy_column = config["dataframe"]["energy_column"] - if isinstance(columns, str): - columns = [columns] - if isinstance(signs, int): - signs = [signs] - if len(signs) != len(columns): - raise ValueError("signs and columns must have the same length.") - if isinstance(subtract_mean, bool): - subtract_mean = [subtract_mean] * len(columns) - if reductions is None: - reductions = [None] * len(columns) - columns_: List[str] = [] - reductions_: List[str] = [] - to_roll: List[str] = [] - for c, r in zip(columns, reductions): - if r == "rolled": - cname = c + "_rolled" - if cname not in df.columns: - to_roll.append(cname) - else: - columns_.append(cname) - reductions_.append(None) - else: - columns_.append(c) - reductions_.append(r) - if len(to_roll) > 0: - raise RuntimeError(f"Columns {to_roll} have not been smoothed. please run `smooth_column`") - - df = dfops.apply_offset_from_columns( - df=df, - target_column=energy_column, - offset_columns=columns_, - signs=signs, - subtract_mean=subtract_mean, - reductions=reductions_, - inplace=True, - ) - metadata: Dict[str, Any] = { - "applied": True, - "energy_column": energy_column, - "column_names": columns, - "sign": signs, - } - return df, metadata diff --git a/sed/core/processor.py b/sed/core/processor.py index 7997b43c..ddbb01bd 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -20,7 +20,6 @@ from sed.binning import bin_dataframe from sed.calibrator import DelayCalibrator -from sed.calibrator import energy from sed.calibrator import EnergyCalibrator from sed.calibrator import MomentumCorrector from sed.core.config import parse_config @@ -1182,6 +1181,8 @@ def apply_energy_offset( of dask.dataframe.Series. For example "mean". In this case the function is applied to the column to generate a single value for the whole dataset. If None, the shift is applied per-dataframe-row. Defaults to None. + subtract_mean (bool): Whether to subtract the mean of the column before applying the + shift. Defaults to False. Raises: ValueError: If the energy column is not in the dataframe. """ @@ -1192,19 +1193,15 @@ def apply_energy_offset( "Run energy calibration first", ) metadata = {} - if columns is not None: - self._dataframe, metadata = energy.apply_energy_offset( - df=self._dataframe, - columns=columns, - energy_column=energy_column, - signs=signs, - reductions=reductions, - subtract_mean=subtract_mean, - config=self._config, - ) - if constant is not None: - self._dataframe[energy_column] += constant - metadata["offset"] = constant + self._dataframe, metadata = self.ec.apply_energy_offset( + df=self._dataframe, + constant=constant, + columns=columns, + energy_column=energy_column, + signs=signs, + reductions=reductions, + subtract_mean=subtract_mean, + ) if len(metadata) > 0: self._attributes.add( metadata, diff --git a/tutorial/5 - hextof workflow.ipynb b/tutorial/5 - hextof workflow.ipynb index e7265e9c..9675d266 100644 --- a/tutorial/5 - hextof workflow.ipynb +++ b/tutorial/5 - hextof workflow.ipynb @@ -3,7 +3,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, "outputs": [], "source": [ "from pathlib import Path\n", @@ -21,7 +25,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, "outputs": [], "source": [ "%matplotlib widget" @@ -30,7 +38,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, "outputs": [], "source": [ "config_file = Path(sed.__file__).parent.parent/'tutorial/hextof_config.yaml'\n", @@ -415,6 +427,87 @@ " plt.plot(x, result.best_fit+i, 'r-')\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# load and process from config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "config={\"core\": {\"paths\": {\n", + " \"data_raw_dir\": \"/asap3/flash/gpfs/pg2/2023/data/11019101/raw/hdf/offline/fl1user3\", \n", + " \"data_parquet_dir\": \"/home/agustsss/temp/sed_parquet/\"\n", + "}}}\n", + "sp = SedProcessor(runs=[44797], config=config, user_config=config_file, system_config={}, collect_metadata=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "sp.add_jitter()\n", + "sp.align_dld_sectors()\n", + "sp.append_tof_ns_axis()\n", + "sp.append_energy_axis()\n", + "sp.apply_energy_offset()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "axes = ['sampleBias', 'energy']\n", + "bins = [5, 500]\n", + "ranges = [[28,33], [-10,10]]\n", + "res_fit = sp.compute(bins=bins, axes=axes, ranges=ranges)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "plt.figure()\n", + "ax = plt.subplot(111)\n", + "res_fit.energy.attrs['unit'] = 'eV'\n", + "res_fit.mean('sampleBias').plot.line(x='energy',linewidth=3, ax=ax)\n", + "res_fit.plot.line(x='energy',linewidth=1,alpha=.5,label='all',ax=ax);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/tutorial/hextof_config.yaml b/tutorial/hextof_config.yaml index 0a8289af..482860e8 100644 --- a/tutorial/hextof_config.yaml +++ b/tutorial/hextof_config.yaml @@ -124,17 +124,17 @@ energy: calibration: d: 2.7342492951998603 t0: 3.6049383256584405e-08 - E0: -51.289659014865784 + E0: -51.289659014865784 # flip sign if switching between kinetic and binding energy energy_scale: kinetic refid: 0 offset: - constant: 2.0 + constant: 0.0 sampleBias: sign: 1 - substract_mean: True + subtract_mean: True monochromatorPhotonEnergy: sign: -1 - substract_mean: True + subtract_mean: True tofVoltage: sign: -1 - substract_mean: True + subtract_mean: True