Skip to content

Commit

Permalink
refactor signs into weights
Browse files Browse the repository at this point in the history
  • Loading branch information
steinnymir committed Nov 14, 2023
1 parent 905b65a commit 0832a02
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 51 deletions.
2 changes: 1 addition & 1 deletion sed/calibrator/delay.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def add_offsets(
df=df,
target_column=delay_column,
offset_columns=columns,
signs=weights,
weights=weights,
preserve_mean=preserve_mean,
reductions=reductions,
)
Expand Down
40 changes: 20 additions & 20 deletions sed/calibrator/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1474,7 +1474,7 @@ def add_offsets(
df: Union[pd.DataFrame, dask.dataframe.DataFrame] = None,
constant: float = None,
columns: Union[str, Sequence[str]] = None,
signs: Union[int, Sequence[int]] = None,
weights: Union[int, Sequence[int]] = None,
preserve_mean: Union[bool, Sequence[bool]] = False,
reductions: Union[str, Sequence[str]] = None,
energy_column: str = None,
Expand All @@ -1491,8 +1491,8 @@ def add_offsets(
df (Union[pd.DataFrame, dask.dataframe.DataFrame]): Dataframe to use.
constant (float, optional): The constant to shift the energy axis by.
columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift from.
signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive
sign shifts the energy axis to higher kinetic energies. Defaults to +1.
weights (Union[int, Sequence[int]]): weight of the shift to apply. (+1 or -1) A positive
weight shifts the energy axis to higher kinetic energies. Defaults to +1.
preserve_mean (bool): Whether to subtract the mean of the column before applying the
shift. Defaults to False.
reductions (str): The reduction to apply to the column. Should be an available method
Expand All @@ -1512,7 +1512,7 @@ def add_offsets(
if columns is None and constant is None:
# load from config
columns = []
signs = []
weights = []
preserve_mean = []
reductions = []
for k, v in self.offsets.items():
Expand All @@ -1521,9 +1521,9 @@ def add_offsets(
else:
columns.append(k)
try:
signs.append(v["sign"])
weights.append(v["weight"])
except KeyError as exc:
raise KeyError(f"Missing sign for offset column {k} in config.") from exc
raise KeyError(f"Missing weight for offset column {k} in config.") from exc
pm = v.get("preserve_mean", False)
if str(pm).lower() in ["false", "0", "no"]:
pm = False
Expand All @@ -1549,45 +1549,45 @@ def add_offsets(
# apply offset
if columns is not None:
# use passed parameters
if isinstance(signs, int):
signs = [signs]
elif not isinstance(signs, Sequence):
raise TypeError(f"Invalid type for signs: {type(signs)}")
if not all(isinstance(s, int) for s in signs):
raise TypeError(f"Invalid type for signs: {type(signs)}")
# flip signs if binding energy scale
signs = [s * scale_sign for s in signs]
if isinstance(weights, int):
weights = [weights]
elif not isinstance(weights, Sequence):
raise TypeError(f"Invalid type for weights: {type(weights)}")
if not all(isinstance(s, int) for s in weights):
raise TypeError(f"Invalid type for weights: {type(weights)}")
# flip weights if binding energy scale
weights = [s * scale_sign for s in weights]

df = dfops.offset_by_other_columns(
df=df,
target_column=energy_column,
offset_columns=columns,
signs=signs,
weights=weights,
preserve_mean=preserve_mean,
reductions=reductions,
inplace=True,
)
metadata["energy_column"] = energy_column
metadata["columns"] = columns
metadata["signs"] = signs
metadata["weights"] = weights
metadata["preserve_mean"] = preserve_mean
metadata["reductions"] = reductions

# overwrite the current offset dictionary with the parameters used
if not isinstance(columns, Sequence):
columns = [columns]
if not isinstance(signs, Sequence):
signs = [signs]
if not isinstance(weights, Sequence):
weights = [weights]
if isinstance(preserve_mean, bool):
preserve_mean = [preserve_mean] * len(columns)
if not isinstance(reductions, Sequence):
reductions = [reductions]
if len(reductions) == 1:
reductions = [reductions[0]] * len(columns)

for col, sign, pmean, red in zip(columns, signs, preserve_mean, reductions):
for col, weight, pmean, red in zip(columns, weights, preserve_mean, reductions):
self.offsets[col] = {
"sign": sign,
"weight": weight,
"preserve_mean": pmean,
"reduction": red,
}
Expand Down
16 changes: 8 additions & 8 deletions sed/core/dfops.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def offset_by_other_columns(
df: dask.dataframe.DataFrame,
target_column: str,
offset_columns: Union[str, Sequence[str]],
signs: Union[int, Sequence[int]],
weights: Union[float, Sequence[float]],
reductions: Union[str, Sequence[str]] = None,
preserve_mean: Union[bool, Sequence[bool]] = False,
inplace: bool = True,
Expand All @@ -281,7 +281,7 @@ def offset_by_other_columns(
df (dask.dataframe.DataFrame): Dataframe to use. Currently supports only dask dataframes.
target_column (str): Name of the column to apply the offset to.
offset_columns (str): Name of the column(s) to use for the offset.
signs (int): Sign of the offset.
weights (int): weights to apply on each column before adding. Used also for changing sign.
reductions (str, optional): Reduction function to use for the offset. Defaults to "mean".
Currently, only mean is supported.
preserve_mean (bool, optional): Whether to subtract the mean of the offset column.
Expand All @@ -304,13 +304,13 @@ def offset_by_other_columns(
if any(c not in df.columns for c in offset_columns):
raise KeyError(f"{offset_columns} not in dataframe!")

if isinstance(signs, int):
signs = [signs]
elif not isinstance(signs, Sequence):
raise TypeError(f"Invalid type for signs: {type(signs)}")
if len(signs) != len(offset_columns):
if isinstance(weights, int):
weights = [weights]
elif not isinstance(weights, Sequence):
raise TypeError(f"Invalid type for signs: {type(weights)}")
if len(weights) != len(offset_columns):
raise ValueError("signs and offset_columns must have the same length!")
signs_dict = dict(zip(offset_columns, signs))
signs_dict = dict(zip(offset_columns, weights))

if isinstance(reductions, str) or reductions is None:
reductions = [reductions] * len(offset_columns)
Expand Down
16 changes: 8 additions & 8 deletions sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,7 +1270,7 @@ def add_energy_offset(
self,
constant: float = None,
columns: Union[str, Sequence[str]] = None,
signs: Union[int, Sequence[int]] = None,
weights: Union[float, Sequence[float]] = None,
reductions: Union[str, Sequence[str]] = None,
preserve_mean: Union[bool, Sequence[bool]] = None,
) -> None:
Expand All @@ -1279,7 +1279,7 @@ def add_energy_offset(
Args:
constant (float, optional): The constant to shift the energy axis by.
columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift from.
signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive
weights (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive
sign shifts the energy axis to higher kinetic energies. Defaults to +1.
preserve_mean (bool): Whether to subtract the mean of the column before applying the
shift. Defaults to False.
Expand All @@ -1304,7 +1304,7 @@ def add_energy_offset(
constant=constant,
columns=columns,
energy_column=energy_column,
signs=signs,
weights=weights,
reductions=reductions,
preserve_mean=preserve_mean,
)
Expand All @@ -1315,7 +1315,7 @@ def add_energy_offset(
constant=constant,
columns=columns,
energy_column=energy_column,
signs=signs,
weights=weights,
reductions=reductions,
preserve_mean=preserve_mean,
)
Expand Down Expand Up @@ -1517,7 +1517,7 @@ def add_delay_offset(
constant: float = None,
flip_delay_axis: bool = None,
columns: Union[str, Sequence[str]] = None,
signs: Union[int, Sequence[int]] = None,
weights: Union[float, Sequence[float]] = None,
reductions: Union[str, Sequence[str]] = None,
preserve_mean: Union[bool, Sequence[bool]] = None,
inplace: bool = False,
Expand All @@ -1528,7 +1528,7 @@ def add_delay_offset(
Args:
constant (float, optional): The constant to shift the delay axis by.
columns (Union[str, Sequence[str]]): Name of the column(s) to apply the shift from.
signs (Union[int, Sequence[int]]): Sign of the shift to apply. (+1 or -1) A positive
weights (Union[float, Sequence[float]]): Weight of the shift to apply. A positive
sign shifts the delay axis to higher values. Defaults to +1.
preserve_mean (bool): Whether to subtract the mean of the column before applying the
shift. Defaults to False.
Expand Down Expand Up @@ -1557,7 +1557,7 @@ def add_delay_offset(
flip_delay_axis=flip_delay_axis,
columns=columns,
delay_column=delay_column,
weights=signs,
weights=weights,
reductions=reductions,
preserve_mean=preserve_mean,
inplace=inplace,
Expand All @@ -1571,7 +1571,7 @@ def add_delay_offset(
flip_delay_axis=flip_delay_axis,
columns=columns,
delay_column=delay_column,
weights=signs,
weights=weights,
reductions=reductions,
preserve_mean=preserve_mean,
inplace=inplace,
Expand Down
18 changes: 9 additions & 9 deletions tests/calibrator/test_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,11 +583,11 @@ def test_add_offsets_functionality():
"offset": {
"constant": 1,
"off1": {
"sign": 1,
"weight": 1,
"preserve_mean": True,
},
"off2": {"sign": -1, "preserve_mean": False},
"off3": {"sign": 1, "preserve_mean": False, "reduction": "mean"},
"off2": {"weight": -1, "preserve_mean": False},
"off3": {"weight": 1, "preserve_mean": False, "reduction": "mean"},
},
},
},
Expand All @@ -599,7 +599,7 @@ def test_add_offsets_functionality():
"constant": 1,
"energy_column": "energy",
"columns": ["off1", "off2", "off3"],
"signs": [1, -1, 1],
"weights": [1, -1, 1],
"preserve_mean": [True, False, False],
"reductions": [None, None, "mean"],
}
Expand Down Expand Up @@ -648,9 +648,9 @@ def test_add_offset_raises():
},
"offset": {
"constant": 1,
"off1": {"sign": -1, "preserve_mean": True},
"off2": {"sign": -1, "preserve_mean": False},
"off3": {"sign": 1, "preserve_mean": False, "reduction": "mean"},
"off1": {"weight": -1, "preserve_mean": True},
"off2": {"weight": -1, "preserve_mean": False},
"off3": {"weight": 1, "preserve_mean": False, "reduction": "mean"},
},
},
}
Expand All @@ -667,7 +667,7 @@ def test_add_offset_raises():
# no sign in config
with pytest.raises(KeyError):
cfg = deepcopy(cfg_dict)
cfg["energy"]["offset"]["off1"].pop("sign")
cfg["energy"]["offset"]["off1"].pop("weight")
config = parse_config(config=cfg, folder_config={}, user_config={}, system_config={})
ec = EnergyCalibrator(config=cfg, loader=get_loader("flash", config=config))
_ = ec.add_offsets(t_df)
Expand All @@ -691,7 +691,7 @@ def test_add_offset_raises():
# invalid sign
with pytest.raises(TypeError):
cfg = deepcopy(cfg_dict)
cfg["energy"]["offset"]["off1"]["sign"] = "wrong_type"
cfg["energy"]["offset"]["off1"]["weight"] = "wrong_type"
config = parse_config(config=cfg, folder_config={}, user_config={}, system_config={})
ec = EnergyCalibrator(config=cfg, loader=get_loader("flash", config=config))
_ = ec.add_offsets(t_df)
Expand Down
10 changes: 5 additions & 5 deletions tests/test_dfops.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def test_offset_by_other_columns_functionality():
df=t_df.copy(),
target_column="target",
offset_columns=["off1"],
signs=[1],
weights=[1],
)
expected = [11, 22, 33, 44, 55, 66]
np.testing.assert_allclose(res["target"].values, expected)
Expand All @@ -272,7 +272,7 @@ def test_offset_by_other_columns_functionality():
df=t_df.copy(),
target_column="target",
offset_columns=["off1", "off2"],
signs=[1, -1],
weights=[1, -1],
)
expected = [10.9, 21.8, 32.7, 43.6, 54.5, 65.4]
np.testing.assert_allclose(res["target"].values, expected)
Expand All @@ -281,7 +281,7 @@ def test_offset_by_other_columns_functionality():
df=t_df.copy(),
target_column="target",
offset_columns=["off3"],
signs=[1],
weights=[1],
preserve_mean=True,
)
expected = [9.75, 19.85, 29.95, 40.05, 50.15, 60.25]
Expand All @@ -291,7 +291,7 @@ def test_offset_by_other_columns_functionality():
df=t_df.copy(),
target_column="target",
offset_columns=["off3"], # off3 has mean of 10
signs=[1],
weights=[1],
reductions="mean",
)
expected = [20, 30, 40, 50, 60, 70]
Expand All @@ -315,7 +315,7 @@ def test_offset_by_other_columns_pandas_not_working():
df=pd_df,
target_column="target",
offset_columns=["off1"],
signs=[1],
weights=[1],
)


Expand Down

0 comments on commit 0832a02

Please sign in to comment.