From 0832a026ff7196161f4641083bf41d159a375c68 Mon Sep 17 00:00:00 2001 From: Steinn Ymir Agustsson Date: Tue, 14 Nov 2023 13:23:04 +0100 Subject: [PATCH] refactor signs into weights --- sed/calibrator/delay.py | 2 +- sed/calibrator/energy.py | 40 ++++++++++++++++----------------- sed/core/dfops.py | 16 ++++++------- sed/core/processor.py | 16 ++++++------- tests/calibrator/test_energy.py | 18 +++++++-------- tests/test_dfops.py | 10 ++++----- 6 files changed, 51 insertions(+), 51 deletions(-) diff --git a/sed/calibrator/delay.py b/sed/calibrator/delay.py index d8a3badc..355bbfd8 100644 --- a/sed/calibrator/delay.py +++ b/sed/calibrator/delay.py @@ -271,7 +271,7 @@ def add_offsets( df=df, target_column=delay_column, offset_columns=columns, - signs=weights, + weights=weights, preserve_mean=preserve_mean, reductions=reductions, ) diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py index 2879a301..c710c5de 100644 --- a/sed/calibrator/energy.py +++ b/sed/calibrator/energy.py @@ -1474,7 +1474,7 @@ def add_offsets( df: Union[pd.DataFrame, dask.dataframe.DataFrame] = None, constant: float = None, columns: Union[str, Sequence[str]] = None, - signs: Union[int, Sequence[int]] = None, + weights: Union[int, Sequence[int]] = None, preserve_mean: Union[bool, Sequence[bool]] = False, reductions: Union[str, Sequence[str]] = None, energy_column: str = None, @@ -1491,8 +1491,8 @@ def add_offsets( df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use. constant (float, optional): The constant to shift the energy axis by. columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift from. - signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive - sign shifts the energy axis to higher kinetic energies. Defaults to +1. + weights (Union[int, Sequence[int]]): weight of the shift to apply. (+1 or -1) A positive + weight shifts the energy axis to higher kinetic energies. Defaults to +1. preserve_mean (bool): Whether to subtract the mean of the column before applying the shift. Defaults to False. reductions (str): The reduction to apply to the column. Should be an available method @@ -1512,7 +1512,7 @@ def add_offsets( if columns is None and constant is None: # load from config columns = [] - signs = [] + weights = [] preserve_mean = [] reductions = [] for k, v in self.offsets.items(): @@ -1521,9 +1521,9 @@ def add_offsets( else: columns.append(k) try: - signs.append(v["sign"]) + weights.append(v["weight"]) except KeyError as exc: - raise KeyError(f"Missing sign for offset column {k} in config.") from exc + raise KeyError(f"Missing weight for offset column {k} in config.") from exc pm = v.get("preserve_mean", False) if str(pm).lower() in ["false", "0", "no"]: pm = False @@ -1549,35 +1549,35 @@ def add_offsets( # apply offset if columns is not None: # use passed parameters - if isinstance(signs, int): - signs = [signs] - elif not isinstance(signs, Sequence): - raise TypeError(f"Invalid type for signs: {type(signs)}") - if not all(isinstance(s, int) for s in signs): - raise TypeError(f"Invalid type for signs: {type(signs)}") - # flip signs if binding energy scale - signs = [s * scale_sign for s in signs] + if isinstance(weights, int): + weights = [weights] + elif not isinstance(weights, Sequence): + raise TypeError(f"Invalid type for weights: {type(weights)}") + if not all(isinstance(s, int) for s in weights): + raise TypeError(f"Invalid type for weights: {type(weights)}") + # flip weights if binding energy scale + weights = [s * scale_sign for s in weights] df = dfops.offset_by_other_columns( df=df, target_column=energy_column, offset_columns=columns, - signs=signs, + weights=weights, preserve_mean=preserve_mean, reductions=reductions, inplace=True, ) metadata["energy_column"] = energy_column metadata["columns"] = columns - metadata["signs"] = signs + metadata["weights"] = weights metadata["preserve_mean"] = preserve_mean metadata["reductions"] = reductions # overwrite the current offset dictionary with the parameters used if not isinstance(columns, Sequence): columns = [columns] - if not isinstance(signs, Sequence): - signs = [signs] + if not isinstance(weights, Sequence): + weights = [weights] if isinstance(preserve_mean, bool): preserve_mean = [preserve_mean] * len(columns) if not isinstance(reductions, Sequence): @@ -1585,9 +1585,9 @@ def add_offsets( if len(reductions) == 1: reductions = [reductions[0]] * len(columns) - for col, sign, pmean, red in zip(columns, signs, preserve_mean, reductions): + for col, weight, pmean, red in zip(columns, weights, preserve_mean, reductions): self.offsets[col] = { - "sign": sign, + "weight": weight, "preserve_mean": pmean, "reduction": red, } diff --git a/sed/core/dfops.py b/sed/core/dfops.py index aec8fed2..fd219da2 100644 --- a/sed/core/dfops.py +++ b/sed/core/dfops.py @@ -269,7 +269,7 @@ def offset_by_other_columns( df: dask.dataframe.DataFrame, target_column: str, offset_columns: Union[str, Sequence[str]], - signs: Union[int, Sequence[int]], + weights: Union[float, Sequence[float]], reductions: Union[str, Sequence[str]] = None, preserve_mean: Union[bool, Sequence[bool]] = False, inplace: bool = True, @@ -281,7 +281,7 @@ def offset_by_other_columns( df (dask.dataframe.DataFrame): Dataframe to use. Currently supports only dask dataframes. target_column (str): Name of the column to apply the offset to. offset_columns (str): Name of the column(s) to use for the offset. - signs (int): Sign of the offset. + weights (int): weights to apply on each column before adding. Used also for changing sign. reductions (str, optional): Reduction function to use for the offset. Defaults to "mean". Currently, only mean is supported. preserve_mean (bool, optional): Whether to subtract the mean of the offset column. @@ -304,13 +304,13 @@ def offset_by_other_columns( if any(c not in df.columns for c in offset_columns): raise KeyError(f"{offset_columns} not in dataframe!") - if isinstance(signs, int): - signs = [signs] - elif not isinstance(signs, Sequence): - raise TypeError(f"Invalid type for signs: {type(signs)}") - if len(signs) != len(offset_columns): + if isinstance(weights, int): + weights = [weights] + elif not isinstance(weights, Sequence): + raise TypeError(f"Invalid type for signs: {type(weights)}") + if len(weights) != len(offset_columns): raise ValueError("signs and offset_columns must have the same length!") - signs_dict = dict(zip(offset_columns, signs)) + signs_dict = dict(zip(offset_columns, weights)) if isinstance(reductions, str) or reductions is None: reductions = [reductions] * len(offset_columns) diff --git a/sed/core/processor.py b/sed/core/processor.py index 37315be4..d5463390 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -1270,7 +1270,7 @@ def add_energy_offset( self, constant: float = None, columns: Union[str, Sequence[str]] = None, - signs: Union[int, Sequence[int]] = None, + weights: Union[float, Sequence[float]] = None, reductions: Union[str, Sequence[str]] = None, preserve_mean: Union[bool, Sequence[bool]] = None, ) -> None: @@ -1279,7 +1279,7 @@ def add_energy_offset( Args: constant (float, optional): The constant to shift the energy axis by. columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift from. - signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive + weights (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive sign shifts the energy axis to higher kinetic energies. Defaults to +1. preserve_mean (bool): Whether to subtract the mean of the column before applying the shift. Defaults to False. @@ -1304,7 +1304,7 @@ def add_energy_offset( constant=constant, columns=columns, energy_column=energy_column, - signs=signs, + weights=weights, reductions=reductions, preserve_mean=preserve_mean, ) @@ -1315,7 +1315,7 @@ def add_energy_offset( constant=constant, columns=columns, energy_column=energy_column, - signs=signs, + weights=weights, reductions=reductions, preserve_mean=preserve_mean, ) @@ -1517,7 +1517,7 @@ def add_delay_offset( constant: float = None, flip_delay_axis: bool = None, columns: Union[str, Sequence[str]] = None, - signs: Union[int, Sequence[int]] = None, + weights: Union[float, Sequence[float]] = None, reductions: Union[str, Sequence[str]] = None, preserve_mean: Union[bool, Sequence[bool]] = None, inplace: bool = False, @@ -1528,7 +1528,7 @@ def add_delay_offset( Args: constant (float, optional): The constant to shift the delay axis by. columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift from. - signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive + weights (Union[float, Sequence[float]]): Weight of the shift to apply. A positive sign shifts the delay axis to higher values. Defaults to +1. preserve_mean (bool): Whether to subtract the mean of the column before applying the shift. Defaults to False. @@ -1557,7 +1557,7 @@ def add_delay_offset( flip_delay_axis=flip_delay_axis, columns=columns, delay_column=delay_column, - weights=signs, + weights=weights, reductions=reductions, preserve_mean=preserve_mean, inplace=inplace, @@ -1571,7 +1571,7 @@ def add_delay_offset( flip_delay_axis=flip_delay_axis, columns=columns, delay_column=delay_column, - weights=signs, + weights=weights, reductions=reductions, preserve_mean=preserve_mean, inplace=inplace, diff --git a/tests/calibrator/test_energy.py b/tests/calibrator/test_energy.py index 5f9cd56b..67409237 100644 --- a/tests/calibrator/test_energy.py +++ b/tests/calibrator/test_energy.py @@ -583,11 +583,11 @@ def test_add_offsets_functionality(): "offset": { "constant": 1, "off1": { - "sign": 1, + "weight": 1, "preserve_mean": True, }, - "off2": {"sign": -1, "preserve_mean": False}, - "off3": {"sign": 1, "preserve_mean": False, "reduction": "mean"}, + "off2": {"weight": -1, "preserve_mean": False}, + "off3": {"weight": 1, "preserve_mean": False, "reduction": "mean"}, }, }, }, @@ -599,7 +599,7 @@ def test_add_offsets_functionality(): "constant": 1, "energy_column": "energy", "columns": ["off1", "off2", "off3"], - "signs": [1, -1, 1], + "weights": [1, -1, 1], "preserve_mean": [True, False, False], "reductions": [None, None, "mean"], } @@ -648,9 +648,9 @@ def test_add_offset_raises(): }, "offset": { "constant": 1, - "off1": {"sign": -1, "preserve_mean": True}, - "off2": {"sign": -1, "preserve_mean": False}, - "off3": {"sign": 1, "preserve_mean": False, "reduction": "mean"}, + "off1": {"weight": -1, "preserve_mean": True}, + "off2": {"weight": -1, "preserve_mean": False}, + "off3": {"weight": 1, "preserve_mean": False, "reduction": "mean"}, }, }, } @@ -667,7 +667,7 @@ def test_add_offset_raises(): # no sign in config with pytest.raises(KeyError): cfg = deepcopy(cfg_dict) - cfg["energy"]["offset"]["off1"].pop("sign") + cfg["energy"]["offset"]["off1"].pop("weight") config = parse_config(config=cfg, folder_config={}, user_config={}, system_config={}) ec = EnergyCalibrator(config=cfg, loader=get_loader("flash", config=config)) _ = ec.add_offsets(t_df) @@ -691,7 +691,7 @@ def test_add_offset_raises(): # invalid sign with pytest.raises(TypeError): cfg = deepcopy(cfg_dict) - cfg["energy"]["offset"]["off1"]["sign"] = "wrong_type" + cfg["energy"]["offset"]["off1"]["weight"] = "wrong_type" config = parse_config(config=cfg, folder_config={}, user_config={}, system_config={}) ec = EnergyCalibrator(config=cfg, loader=get_loader("flash", config=config)) _ = ec.add_offsets(t_df) diff --git a/tests/test_dfops.py b/tests/test_dfops.py index 59d5cd1a..a3125bcc 100644 --- a/tests/test_dfops.py +++ b/tests/test_dfops.py @@ -263,7 +263,7 @@ def test_offset_by_other_columns_functionality(): df=t_df.copy(), target_column="target", offset_columns=["off1"], - signs=[1], + weights=[1], ) expected = [11, 22, 33, 44, 55, 66] np.testing.assert_allclose(res["target"].values, expected) @@ -272,7 +272,7 @@ def test_offset_by_other_columns_functionality(): df=t_df.copy(), target_column="target", offset_columns=["off1", "off2"], - signs=[1, -1], + weights=[1, -1], ) expected = [10.9, 21.8, 32.7, 43.6, 54.5, 65.4] np.testing.assert_allclose(res["target"].values, expected) @@ -281,7 +281,7 @@ def test_offset_by_other_columns_functionality(): df=t_df.copy(), target_column="target", offset_columns=["off3"], - signs=[1], + weights=[1], preserve_mean=True, ) expected = [9.75, 19.85, 29.95, 40.05, 50.15, 60.25] @@ -291,7 +291,7 @@ def test_offset_by_other_columns_functionality(): df=t_df.copy(), target_column="target", offset_columns=["off3"], # off3 has mean of 10 - signs=[1], + weights=[1], reductions="mean", ) expected = [20, 30, 40, 50, 60, 70] @@ -315,7 +315,7 @@ def test_offset_by_other_columns_pandas_not_working(): df=pd_df, target_column="target", offset_columns=["off1"], - signs=[1], + weights=[1], )