From 503e9f2c93d83ea2d5b04f54aa60fba8c0111e67 Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 10 Oct 2023 00:07:27 +0200 Subject: [PATCH 1/2] add default jitter axes to default config, and add tests for that. Also, add **kwds arguments to add_jitter --- sed/config/default.yaml | 2 ++ sed/core/processor.py | 14 ++++++++------ tests/test_processor.py | 4 ++++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/sed/config/default.yaml b/sed/config/default.yaml index 85066a04..1eca22cc 100644 --- a/sed/config/default.yaml +++ b/sed/config/default.yaml @@ -31,6 +31,8 @@ dataframe: tof_binning: 1 # binning factor used for the adc coordinate (2^(adc_binning-1)) adc_binning: 1 + # list of columns to apply jitter to + jitter_cols: ["@x_column", "@y_column", "@tof_column"] energy: # Number of bins to use for energy calibration traces diff --git a/sed/core/processor.py b/sed/core/processor.py index 9069215c..ef6b041d 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -1180,23 +1180,25 @@ def calibrate_delay_axis( else: print(self._dataframe) - def add_jitter(self, cols: Sequence[str] = None): + def add_jitter(self, cols: List[str] = None, **kwds): """Add jitter to the selected dataframe columns. Args: - cols (Sequence[str], optional): The colums onto which to apply jitter. + cols (List[str], optional): The colums onto which to apply jitter. Defaults to config["dataframe"]["jitter_cols"]. + **kwds: keyword arguments passed to apply_jitter """ if cols is None: - cols = self._config["dataframe"].get( - "jitter_cols", - self._dataframe.columns, - ) # jitter all columns + cols = self._config["dataframe"]["jitter_cols"] + for loc, col in enumerate(cols): + if col.startswith("@"): + cols[loc] = self._config["dataframe"].get(col.strip("@")) self._dataframe = self._dataframe.map_partitions( apply_jitter, cols=cols, cols_jittered=cols, + **kwds, ) metadata = [] for col in cols: diff --git a/tests/test_processor.py b/tests/test_processor.py index fba83d77..270d1614 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -581,11 +581,15 @@ def test_add_jitter(): system_config={}, ) res1 = processor.dataframe["X"].compute() + res1a = processor.dataframe["ADC"].compute() processor.add_jitter() res2 = processor.dataframe["X"].compute() + res2a = processor.dataframe["ADC"].compute() np.testing.assert_allclose(res1, np.round(res1)) np.testing.assert_allclose(res1, np.round(res2)) assert (res1 != res2).all() + # test that jittering is not applied on ADC column + np.testing.assert_allclose(res1a, res2a) def test_event_histogram(): From cf64de25c5895f0ceb634074809f10bd5f600a83 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sat, 14 Oct 2023 21:40:48 +0200 Subject: [PATCH 2/2] add amps as argument to add_jitter, and provide better description. Also add jitter_amps to default config --- sed/config/default.yaml | 4 +++- sed/core/dfops.py | 2 ++ sed/core/processor.py | 17 +++++++++++++++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sed/config/default.yaml b/sed/config/default.yaml index 1eca22cc..2972c345 100644 --- a/sed/config/default.yaml +++ b/sed/config/default.yaml @@ -31,8 +31,10 @@ dataframe: tof_binning: 1 # binning factor used for the adc coordinate (2^(adc_binning-1)) adc_binning: 1 - # list of columns to apply jitter to + # list of columns to apply jitter to. jitter_cols: ["@x_column", "@y_column", "@tof_column"] + # Jitter amplitude or list of jitter amplitudes. Should equal half the digitial step size of each jitter_column + jitter_amps: [0.5, 0.5, 0.5] energy: # Number of bins to use for energy calibration traces diff --git a/sed/core/dfops.py b/sed/core/dfops.py index 4bc7c386..c5ef1cef 100644 --- a/sed/core/dfops.py +++ b/sed/core/dfops.py @@ -29,6 +29,8 @@ def apply_jitter( with added jitter. Defaults to None. amps (Union[float, Sequence[float]], optional): Amplitude scalings for the jittering noise. If one number is given, the same is used for all axes. + For normal noise, the added noise will have sdev [-amp, +amp], for + uniform noise it will cover the interval [-amp, +amp]. Defaults to 0.5. jitter_type (str, optional): the type of jitter to add. 'uniform' or 'normal' distributed noise. Defaults to "uniform". diff --git a/sed/core/processor.py b/sed/core/processor.py index ef6b041d..93a0413f 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -1180,13 +1180,22 @@ def calibrate_delay_axis( else: print(self._dataframe) - def add_jitter(self, cols: List[str] = None, **kwds): + def add_jitter( + self, + cols: List[str] = None, + amps: Union[float, Sequence[float]] = None, + **kwds, + ): """Add jitter to the selected dataframe columns. Args: cols (List[str], optional): The colums onto which to apply jitter. Defaults to config["dataframe"]["jitter_cols"]. - **kwds: keyword arguments passed to apply_jitter + amps (Union[float, Sequence[float]], optional): Amplitude scalings for the + jittering noise. If one number is given, the same is used for all axes. + For uniform noise (default) it will cover the interval [-amp, +amp]. + Defaults to config["dataframe"]["jitter_amps"]. + **kwds: additional keyword arguments passed to apply_jitter """ if cols is None: cols = self._config["dataframe"]["jitter_cols"] @@ -1194,10 +1203,14 @@ def add_jitter(self, cols: List[str] = None, **kwds): if col.startswith("@"): cols[loc] = self._config["dataframe"].get(col.strip("@")) + if amps is None: + amps = self._config["dataframe"]["jitter_amps"] + self._dataframe = self._dataframe.map_partitions( apply_jitter, cols=cols, cols_jittered=cols, + amps=amps, **kwds, ) metadata = []