From 006921c58366858e2493fff1e89b8d477ff00473 Mon Sep 17 00:00:00 2001 From: Steinn Ymir Agustsson Date: Wed, 18 Oct 2023 14:47:18 +0200 Subject: [PATCH] add tutorial notebook --- sed/calibrator/energy.py | 10 +- sed/core/dfops.py | 2 +- sed/core/processor.py | 45 ++------- tutorial/5 - hextof workflow.ipynb | 142 +++++++---------------------- 4 files changed, 50 insertions(+), 149 deletions(-) diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py index 1aff5b6e..9bb7201e 100644 --- a/sed/calibrator/energy.py +++ b/sed/calibrator/energy.py @@ -2183,8 +2183,8 @@ def apply_energy_offset( columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift to. signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) energy_column (str, optional): Name of the column containing the energy values. - reduce (str): The reduction to apply to the column. If "rolled" it searches for columns with - suffix "_rolled", e.g. "sampleBias_rolled", as those generated by the + reductions (str): The reduction to apply to the column. If "rolled" it searches for columns + with suffix "_rolled", e.g. "sampleBias_rolled", as those generated by the ``SedProcessor.smooth_columns()`` function. Otherwise should be an available method of dask.dataframe.Series. For example "mean". In this case the function is applied to the column to generate a single value for the whole dataset. If None, the shift is applied @@ -2199,7 +2199,8 @@ def apply_energy_offset( columns = [columns] if isinstance(signs, int): signs = [signs] - + if reductions is None: + reductions = [None] * len(columns) columns_ = [] reductions_ = [] to_roll = [] @@ -2219,7 +2220,8 @@ def apply_energy_offset( raise RuntimeError(f"Columns {to_roll} have not been smoothed. please run `smooth_column`") df = dfops.apply_offset_from_columns( - tartget_column=energy_column, + df=df, + target_column=energy_column, offset_columns=columns_, signs=signs, reductions=reductions_, diff --git a/sed/core/dfops.py b/sed/core/dfops.py index 05ba219f..9cdce49b 100644 --- a/sed/core/dfops.py +++ b/sed/core/dfops.py @@ -289,7 +289,7 @@ def apply_offset_from_columns( if len(signs) != len(offset_columns): raise ValueError("signs and offset_columns must have the same length!") - for col, sign, red in zip(offset_columns, signs): + for col, sign, red in zip(offset_columns, signs, reductions): assert col in df.columns, f"{col} not in dataframe!" if red is not None: df[target_column] = df[target_column] + sign * df[col].agg(red) diff --git a/sed/core/processor.py b/sed/core/processor.py index d63dd35d..891c2dc1 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -1132,10 +1132,7 @@ def apply_energy_offset( constant: float = None, columns: Union[str, Sequence[str]] = None, signs: Union[int, Sequence[int]] = None, - mode: Union[str, Sequence[str]] = "direct", - window: float = None, - sigma: float = None, - rolling_group_channel: str = None, + reductions: Union[str, Sequence[str]] = None, ) -> None: """Shift the energy axis of the dataframe by a given amount. @@ -1143,37 +1140,15 @@ def apply_energy_offset( constant (float, optional): The constant to shift the energy axis by. columns (Union[str, Sequence[str]]): The columns to shift. signs (Union[int, Sequence[int]]): The sign of the shift. - mode (Union[str, Sequence[str]], optional): The mode of the shift. - Defaults to "direct". - window (float, optional): The window size for the rolling mean. - Defaults to None. - sigma (float, optional): The sigma for the rolling mean. - Defaults to 2. - rolling_group_channel (str, optional): The channel to use for the rolling - mean. Defaults to None. - + reductions (str): The reduction to apply to the column. If "rolled" it searches for + columns with suffix "_rolled", e.g. "sampleBias_rolled", as those generated by the + ``SedProcessor.smooth_columns()`` function. Otherwise should be an available method + of dask.dataframe.Series. For example "mean". In this case the function is applied + to the column to generate a single value for the whole dataset. If None, the shift + is applied per-dataframe-row. Defaults to None. Raises: ValueError: If the energy column is not in the dataframe. """ - if columns is None and constant is None: - offset_dict = self._config["energy"].get("offset", None) - if offset_dict is None: - raise ValueError( - "No offset parameters provided and no offset found in config file!", - ) - constant = offset_dict["constant"] - columns = [] - signs = [] - modes = [] - windows = [] - sigmas = [] - for k, v in offset_dict: - columns.append(k) - signs.append(v["sign"]) - modes.append(v["mode"]) - windows.append(v.get("window", None)) - sigmas.append(v.get("sigma", None)) - energy_column = self._config["dataframe"]["energy_column"] if energy_column not in self._dataframe.columns: raise ValueError( @@ -1183,11 +1158,9 @@ def apply_energy_offset( self._dataframe, metadata = energy.apply_energy_offset( df=self._dataframe, columns=columns, + energy_column=energy_column, signs=signs, - mode=mode, - window=window, - sigma=sigma, - rolling_group_channel=rolling_group_channel, + reductions=reductions, config=self._config, ) self._dataframe[energy_column] += constant diff --git a/tutorial/5 - hextof workflow.ipynb b/tutorial/5 - hextof workflow.ipynb index 8bb9f28d..a9980191 100644 --- a/tutorial/5 - hextof workflow.ipynb +++ b/tutorial/5 - hextof workflow.ipynb @@ -60,10 +60,7 @@ "metadata": {}, "outputs": [], "source": [ - "# axes = ['sampleBias', 'dldTimeSteps']\n", - "# bins = [5, 500]\n", - "# ranges = [[28,33], [4000, 6500]]\n", - "# res = sp.compute(bins=bins, axes=axes, ranges=ranges)" + "sp.append_energy_axis()\n" ] }, { @@ -72,79 +69,11 @@ "metadata": {}, "outputs": [], "source": [ - "# plt.figure()\n", - "# res.plot.line(x='dldTimeSteps');" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# posMax = np.zeros(5)\n", - "# bias = np.zeros(5)\n", - "# for i in range(5):\n", - "# posMax[i] = res['dldTimeSteps'][np.argmax(res[i,:].values)]\n", - "# bias[i] = res['sampleBias'][i]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# parameters = sed.calibrator.energy.poly_energy_calibration(\n", - "# pos=posMax, \n", - "# vals=bias, \n", - "# order=2, \n", - "# ref_id = 3, \n", - "# ref_energy=0.0,\n", - "# t = 0)\n", - "# #t=42720.0)\n", - "# print(\"parameters:\")\n", - "# print(parameters.keys())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sp.append_energy_axis()#calibration=parameters)\n", - "# sp_enCal._dataframe['energy'] = -sp_enCal._dataframe['energy']-4.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# %matplotlib widget" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# axes = ['sampleBias', 'energy']\n", - "# bins = [5, 500]\n", - "# ranges = [[28,33], [-5, 10]]\n", - "# res = sp.compute(bins=bins, axes=axes, ranges=ranges)" + "sp.apply_energy_offset(\n", + " constant=31.6, \n", + " columns=['sampleBias'],\n", + " signs=[-1],\n", + ")" ] }, { @@ -153,8 +82,10 @@ "metadata": {}, "outputs": [], "source": [ - "# plt.figure()\n", - "# res.plot.line(x='energy');" + "axes = ['sampleBias', 'energy']\n", + "bins = [5, 500]\n", + "ranges = [[28,33], [-1,5]]\n", + "res = sp.compute(bins=bins, axes=axes, ranges=ranges)" ] }, { @@ -163,7 +94,7 @@ "metadata": {}, "outputs": [], "source": [ - "# sp.attributes.pop('apply_energy_offset')" + "%matplotlib widget" ] }, { @@ -172,12 +103,9 @@ "metadata": {}, "outputs": [], "source": [ - "sp.apply_energy_offset(\n", - " constant=32, \n", - " columns=['sampleBias'],# 'tofVoltage'], \n", - " signs=[-1],#, +1], \n", - " mode='direct'\n", - ")" + "plt.figure()\n", + "res.mean('sampleBias').plot.line(x='energy',linewidth=3);\n", + "res.plot.line(x='energy',linewidth=1,alpha=.5,label='all');\n" ] }, { @@ -186,8 +114,7 @@ "metadata": {}, "outputs": [], "source": [ - "h = sp.dataframe[['energy','sampleBias','tofVoltage','monochromatorPhotonEnergy']].head()\n", - "h" + "sp.dataframe['binding_energy'] = -sp.dataframe['energy']" ] }, { @@ -196,9 +123,9 @@ "metadata": {}, "outputs": [], "source": [ - "axes = ['sampleBias', 'energy']\n", + "axes = ['sampleBias', 'binding_energy']\n", "bins = [5, 500]\n", - "ranges = [[28,33], [h['energy'][0]-5,h['energy'][0]+5]]\n", + "ranges = [[28,33], [-5,1]]\n", "res = sp.compute(bins=bins, axes=axes, ranges=ranges)" ] }, @@ -209,8 +136,10 @@ "outputs": [], "source": [ "plt.figure()\n", - "res.mean('sampleBias').plot.line(x='energy',linewidth=3);\n", - "res.plot.line(x='energy',linewidth=1,alpha=.5,label='all');\n" + "ax = plt.subplot(111)\n", + "res.binding_energy.attrs['unit'] = 'eV'\n", + "res.mean('sampleBias').plot.line(x='binding_energy',linewidth=3, ax=ax);\n", + "res.plot.line(x='binding_energy',linewidth=1,alpha=.5,label='all',ax=ax);" ] }, { @@ -218,22 +147,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "\n", - "# sp._dataframe['energy'] = -sp._dataframe['energy'] - 9" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "axes = ['sampleBias', 'energy']\n", - "bins = [5, 500]\n", - "ranges = [[28,33], [h['energy'][0]-5,h['energy'][0]+5]]\n", - "res = sp.compute(bins=bins, axes=axes, ranges=ranges)" - ] + "source": [] } ], "metadata": { @@ -241,6 +155,18 @@ "display_name": "sed38", "language": "python", "name": "sed38" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" } }, "nbformat": 4,