From 4f5aa00b9be156b49e14925651bc660b43318da5 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 3 May 2024 13:55:44 +0200 Subject: [PATCH 1/9] Add API to combine corrected detector data with mask --- extra_data/components.py | 92 +++++++++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 15 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index 26220b7c..c1ffb8eb 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -122,6 +122,7 @@ class MultimodDetectorBase: _source_re = re.compile(r'(?P.+)/DET/(\d+)CH') # Override in subclass _main_data_key = '' # Key to use for checking data counts match + _mask_data_key = '' _frames_per_entry = 1 # Override if separate pulse dimension in files _modnos_start_at = 0 # Override if module numbers start at 1 (JUNGFRAU) module_shape = (0, 0) @@ -176,6 +177,13 @@ def __init__(self, data: DataCollection, detector_name=None, modules=None, def __getitem__(self, item): return MultimodKeyData(self, item) + def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): + key = key or self._main_data_key + return DetectorMaskedKeyData( + self, key, mask_key=self._mask_data_key, + mask_bits=mask_bits, masked_value=masked_value + ) + @classmethod def _find_detector_name(cls, data): detector_names = set() @@ -471,6 +479,7 @@ class XtdfDetectorBase(MultimodDetectorBase): """ n_modules = 16 _main_data_key = 'image.data' + _mask_data_key = 'image.mask' def __init__(self, data: DataCollection, detector_name=None, modules=None, *, min_modules=1): @@ -481,6 +490,14 @@ def __getitem__(self, item): return XtdfImageMultimodKeyData(self, item) return super().__getitem__(item) + def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): + key = key or self._main_data_key + assert key.startswith('image.') + return XtdfMaskedKeyData( + self, key, mask_key=self._mask_data_key, + mask_bits=mask_bits, masked_value=masked_value + ) + # Several methods below are overridden in LPD1M for parallel gain mode @staticmethod @@ -746,7 +763,6 @@ def zip_trains_pulses(trains, pulses): return res - class MultimodKeyData: def __init__(self, det: MultimodDetectorBase, key): self.det = det @@ -755,6 +771,9 @@ def __init__(self, det: MultimodDetectorBase, key): m: det.data[s, key] for (m, s) in det.modno_to_source.items() } + def _init_kwargs(self): # Extended in subclasses + return dict(det=self.det, key=self.key) + @property def train_ids(self): return self.det.train_ids @@ -799,9 +818,10 @@ def dimensions(self): def dtype(self): return self._eg_keydata.dtype + # For select_trains() & split_trains() to work correctly with subclasses def _with_selected_det(self, det_selected): - # Overridden for XtdfImageMultimodKeyData to preserve pulse selection - return MultimodKeyData(det_selected, self.key) + kw = self._init_kwargs() | {'det': det_selected} + return type(self)(**kw) def select_trains(self, trains): return self._with_selected_det(self.det.select_trains(trains)) @@ -831,13 +851,16 @@ def ndarray(self, *, fill_value=None, out=None, roi=(), astype=None, module_gaps ) return out - def xarray(self, *, fill_value=None, roi=(), astype=None): + def _wrap_xarray(self, arr): from xarray import DataArray - arr = self.ndarray(fill_value=fill_value, roi=roi, astype=astype) coords = {'module': self.modules, 'trainId': self.train_id_coordinates()} return DataArray(arr, dims=self.dimensions, coords=coords) + def xarray(self, *, fill_value=None, roi=(), astype=None): + arr = self.ndarray(fill_value=fill_value, roi=roi, astype=astype) + return self._wrap_xarray(arr) + def dask_array(self, *, labelled=False, fill_value=None, astype=None): from dask.delayed import delayed from dask.array import concatenate, from_delayed @@ -854,9 +877,7 @@ def dask_array(self, *, labelled=False, fill_value=None, astype=None): ) for c in split], axis=1) if labelled: - from xarray import DataArray - coords = {'module': self.modules, 'trainId': self.train_id_coordinates()} - return DataArray(arr, dims=self.dimensions, coords=coords) + return self._wrap_xarray(arr) return arr @@ -880,6 +901,42 @@ def data_availability(self, module_gaps=False): return out +def _load_mask(mask_kd, module_gaps, mask_bits=None): + """Load the mask & convert to boolean (True for bad pixels)""" + mask_data = mask_kd.ndarray(module_gaps=module_gaps) + if mask_bits is None: + return mask_data != 0 # Skip extra temporary array from & + else: + return (mask_data & mask_bits) != 0 + + +class DetectorMaskedKeyData(MultimodKeyData): + def __init__(self, *args, mask_key, mask_bits, masked_value, **kwargs): + super().__init__(*args, **kwargs) + self._mask_key = mask_key + self._mask_bits = mask_bits + self._masked_value = masked_value + + def __repr__(self): + return f"" + + def _init_kwargs(self): + return super()._init_kwargs() | dict( + mask_key=self._mask_key, + mask_bits=self._mask_bits, + masked_value=self._masked_value, + ) + + def ndarray(self, *, module_gaps=False, **kwargs): + # Load mask first: it shrinks from 4 bytes/px to 1, so peak memory use + # is lower than loading it after the data + mask = _load_mask(self.det[self._mask_key], module_gaps, self._mask_bits) + + data = super().ndarray(module_gaps=module_gaps, **kwargs) + data[mask] = self._masked_value + return data + + class XtdfImageMultimodKeyData(MultimodKeyData): _sel_frames_cached = None det: XtdfDetectorBase @@ -890,6 +947,9 @@ def __init__(self, det: XtdfDetectorBase, key, pulse_sel=by_index[0:MAX_PULSES:1 entry_shape = self._eg_keydata.entry_shape self._extraneous_dim = (len(entry_shape) >= 1) and (entry_shape[0] == 1) + def _init_kwargs(self): + return super()._init_kwargs() | dict(pulse_sel=self._pulse_sel) + @property def ndim(self): return super().ndim - (1 if self._extraneous_dim else 0) @@ -958,14 +1018,9 @@ def dimensions(self): entry_dims = [f'dim_{i}' for i in range(ndim_inner)] return ['module', 'train_pulse'] + entry_dims - # Used for .select_trains() and .split_trains() - def _with_selected_det(self, det_selected): - return XtdfImageMultimodKeyData(det_selected, self.key, self._pulse_sel) - def select_pulses(self, pulses): - pulses = _check_pulse_selection(pulses) - - return XtdfImageMultimodKeyData(self.det, self.key, pulses) + kw = self._init_kwargs() | {'pulse_sel': _check_pulse_selection(pulses)} + return type(self)(**kw) @property def _sel_frames(self): @@ -1102,6 +1157,12 @@ def dask_array(self, *, labelled=False, subtrain_index='pulseId', return arr + +class XtdfMaskedKeyData(DetectorMaskedKeyData, XtdfImageMultimodKeyData): + # Created from xtdf_det.masked_data() + pass + + class FramesFileWriter(FileWriter): """Write selected detector frames in European XFEL HDF5 format""" def __init__(self, path, data, inc_tp_ids): @@ -1625,6 +1686,7 @@ class JUNGFRAU(MultimodDetectorBase): r'(MODULE_|RECEIVER-|JNGFR)(?P\d+)' ) _main_data_key = 'data.adc' + _mask_data_key = 'data.mask' _modnos_start_at = 1 module_shape = (512, 1024) From d89c9692b26715b4f46ccf7579860a2273ad8972 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 3 May 2024 14:31:56 +0200 Subject: [PATCH 2/9] Add docstrings for new masked_data methods --- docs/agipd_lpd_data.rst | 4 ++++ extra_data/components.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/docs/agipd_lpd_data.rst b/docs/agipd_lpd_data.rst index d4e5ac4f..a4fa7474 100644 --- a/docs/agipd_lpd_data.rst +++ b/docs/agipd_lpd_data.rst @@ -31,6 +31,8 @@ DSSC and JUNGFRAU, pulling together the separate modules into a single array. arranged along the first axis. So ``det['image.data'].ndarray()`` will load all the selected data as a NumPy array. + .. automethod:: masked_data + .. automethod:: get_array .. automethod:: get_dask_array @@ -58,6 +60,8 @@ DSSC and JUNGFRAU, pulling together the separate modules into a single array. arranged along the first axis. So ``jf['data.adc'].ndarray()`` will load all the selected data as a NumPy array. + .. automethod:: masked_data + .. automethod:: get_array .. automethod:: get_dask_array diff --git a/extra_data/components.py b/extra_data/components.py index c1ffb8eb..c289f488 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -178,6 +178,23 @@ def __getitem__(self, item): return MultimodKeyData(self, item) def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): + """Combine corrected data with the mask in the files + + This provides an interface similar to ``det['data.adc']``, but masking + out pixels with the mask from the correction pipeline. + + Parameters + ---------- + + key: str + The data key to look at, by default the main data key of the detector + (e.g. 'data.adc'). + mask_bits: int + Bitmask of reasons to exclude pixels. By default, all types of bad + pixel are masked out. + masked_value: int, float + The replacement value to use for masked data. By default this is NaN. + """ key = key or self._main_data_key return DetectorMaskedKeyData( self, key, mask_key=self._mask_data_key, @@ -491,6 +508,23 @@ def __getitem__(self, item): return super().__getitem__(item) def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): + """Combine corrected data with the mask in the files + + This provides an interface similar to ``det['image.data']``, but masking + out pixels with the mask from the correction pipeline. + + Parameters + ---------- + + key: str + The data key to look at, by default the main data key of the detector + (e.g. 'image.data'). + mask_bits: int + Bitmask of reasons to exclude pixels. By default, all types of bad + pixel are masked out. + masked_value: int, float + The replacement value to use for masked data. By default this is NaN. + """ key = key or self._main_data_key assert key.startswith('image.') return XtdfMaskedKeyData( @@ -928,6 +962,7 @@ def _init_kwargs(self): ) def ndarray(self, *, module_gaps=False, **kwargs): + """Load data into a NumPy array & apply the mask""" # Load mask first: it shrinks from 4 bytes/px to 1, so peak memory use # is lower than loading it after the data mask = _load_mask(self.det[self._mask_key], module_gaps, self._mask_bits) From af0a2797ce1fea9c6bcc46c547ce0b997e38fd0a Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 3 May 2024 14:40:41 +0200 Subject: [PATCH 3/9] Move _load_mask() into class --- extra_data/components.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index c289f488..2aedf163 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -935,15 +935,6 @@ def data_availability(self, module_gaps=False): return out -def _load_mask(mask_kd, module_gaps, mask_bits=None): - """Load the mask & convert to boolean (True for bad pixels)""" - mask_data = mask_kd.ndarray(module_gaps=module_gaps) - if mask_bits is None: - return mask_data != 0 # Skip extra temporary array from & - else: - return (mask_data & mask_bits) != 0 - - class DetectorMaskedKeyData(MultimodKeyData): def __init__(self, *args, mask_key, mask_bits, masked_value, **kwargs): super().__init__(*args, **kwargs) @@ -961,11 +952,19 @@ def _init_kwargs(self): masked_value=self._masked_value, ) + def _load_mask(self, module_gaps): + """Load the mask & convert to boolean (True for bad pixels)""" + mask_data = self.det[self._mask_key].ndarray(module_gaps=module_gaps) + if self._mask_bits is None: + return mask_data != 0 # Skip extra temporary array from & + else: + return (mask_data & self._mask_bits) != 0 + def ndarray(self, *, module_gaps=False, **kwargs): """Load data into a NumPy array & apply the mask""" # Load mask first: it shrinks from 4 bytes/px to 1, so peak memory use # is lower than loading it after the data - mask = _load_mask(self.det[self._mask_key], module_gaps, self._mask_bits) + mask = self._load_mask(module_gaps=module_gaps) data = super().ndarray(module_gaps=module_gaps, **kwargs) data[mask] = self._masked_value From 1f7fb9320f8fed00f15795abf6cc5a087c85bc5e Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 3 May 2024 16:09:04 +0200 Subject: [PATCH 4/9] Fix combining dicts on Python < 3.9 --- extra_data/components.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index 2aedf163..00466cc0 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -854,7 +854,8 @@ def dtype(self): # For select_trains() & split_trains() to work correctly with subclasses def _with_selected_det(self, det_selected): - kw = self._init_kwargs() | {'det': det_selected} + kw = self._init_kwargs() + kw.update(det=det_selected) return type(self)(**kw) def select_trains(self, trains): @@ -946,11 +947,13 @@ def __repr__(self): return f"" def _init_kwargs(self): - return super()._init_kwargs() | dict( + kw = super()._init_kwargs() + kw.update( mask_key=self._mask_key, mask_bits=self._mask_bits, masked_value=self._masked_value, ) + return kw def _load_mask(self, module_gaps): """Load the mask & convert to boolean (True for bad pixels)""" @@ -982,7 +985,9 @@ def __init__(self, det: XtdfDetectorBase, key, pulse_sel=by_index[0:MAX_PULSES:1 self._extraneous_dim = (len(entry_shape) >= 1) and (entry_shape[0] == 1) def _init_kwargs(self): - return super()._init_kwargs() | dict(pulse_sel=self._pulse_sel) + kw = super()._init_kwargs() + kw.update(pulse_sel=self._pulse_sel) + return kw @property def ndim(self): @@ -1053,7 +1058,8 @@ def dimensions(self): return ['module', 'train_pulse'] + entry_dims def select_pulses(self, pulses): - kw = self._init_kwargs() | {'pulse_sel': _check_pulse_selection(pulses)} + kw = self._init_kwargs() + kw.update(pulse_sel=_check_pulse_selection(pulses)) return type(self)(**kw) @property From 137ed9a19b0042980749b5c73d1b64c403f946f2 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 3 May 2024 16:54:06 +0200 Subject: [PATCH 5/9] Add some tests for det.masked_data() API --- extra_data/components.py | 1 + extra_data/tests/make_examples.py | 10 ++++++ extra_data/tests/test_components.py | 56 +++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/extra_data/components.py b/extra_data/components.py index 00466cc0..d252764c 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -968,6 +968,7 @@ def ndarray(self, *, module_gaps=False, **kwargs): # Load mask first: it shrinks from 4 bytes/px to 1, so peak memory use # is lower than loading it after the data mask = self._load_mask(module_gaps=module_gaps) + print(mask[0, 0, 0, :35]) data = super().ndarray(module_gaps=module_gaps, **kwargs) data[mask] = self._masked_value diff --git a/extra_data/tests/make_examples.py b/extra_data/tests/make_examples.py index 12a36b23..961bd12f 100644 --- a/extra_data/tests/make_examples.py +++ b/extra_data/tests/make_examples.py @@ -326,6 +326,12 @@ def make_reduced_spb_run(dir_path, raw=True, rng=None, format_version='0.5'): frames_per_train=frame_counts) ], ntrains=64, chunksize=32, format_version=format_version) + if modno == 9 and not raw: + # For testing masked_data + with h5py.File(path, 'a') as f: + mask_ds = f['INSTRUMENT/SPB_DET_AGIPD1M-1/DET/9CH0:xtdf/image/mask'] + mask_ds[0, 0, :32] = np.arange(32) + write_file(osp.join(dir_path, '{}-R0238-DA01-S00000.h5'.format(prefix)), [ XGM('SA1_XTD2_XGM/DOOCS/MAIN'), XGM('SPB_XTD9_XGM/DOOCS/MAIN'), @@ -408,6 +414,10 @@ def make_fxe_jungfrau_run(dir_path): write_file(path, [ JUNGFRAUModule(f'FXE_XAD_JF500K/DET/JNGFR03') ], ntrains=100, chunksize=1, format_version='1.0') + with h5py.File(path, 'a') as f: + # For testing masked_data + mask_ds = f['INSTRUMENT/FXE_XAD_JF500K/DET/JNGFR03:daqOutput/data/mask'] + mask_ds[0, 0, 0, :32] = np.arange(32) write_file(osp.join(dir_path, f'RAW-R0052-JNGFRCTRL00-S00000.h5'), [ JUNGFRAUControl('FXE_XAD_JF1M/DET/CONTROL'), diff --git a/extra_data/tests/test_components.py b/extra_data/tests/test_components.py index db771143..589973f5 100644 --- a/extra_data/tests/test_components.py +++ b/extra_data/tests/test_components.py @@ -252,6 +252,62 @@ def test_jungfraus_first_modno(mock_jungfrau_run, mock_fxe_jungfrau_run): assert np.all(arr['module'] == [modno]) +def test_jungfrau_masked_data(mock_fxe_jungfrau_run): + run = RunDirectory(mock_fxe_jungfrau_run) + jf = JUNGFRAU(run, 'FXE_XAD_JF500K') + + # Default options + kd = jf.masked_data().select_trains(np.s_[:1]) + arr = kd.ndarray() + assert arr.shape == (1, 1, 16, 512, 1024) + assert arr.dtype == np.float32 + line0 = np.zeros(1024, dtype=np.float32) + line0[1:32] = np.nan + np.testing.assert_array_equal(arr[0, 0, 0, 0, :], line0) + + # Xarray + xarr = kd.xarray() + assert xarr.dims[:2] == ('module', 'trainId') + np.testing.assert_array_equal(xarr.values[0, 0, 0, 0, :], line0) + + # Specify which mask bits to use, & replace masked values with 99 + kd = jf.masked_data(mask_bits=1, masked_value=99).select_trains(np.s_[:1]) + arr = kd.ndarray() + assert arr.shape == (1, 1, 16, 512, 1024) + line0 = np.zeros(1024, dtype=np.float32) + line0[1:32:2] = 99 + np.testing.assert_array_equal(arr[0, 0, 0, 0, :], line0) + + # Different field + kd = jf.masked_data('data.gain', masked_value=255).select_trains(np.s_[:1]) + arr = kd.ndarray() + assert arr.shape == (1, 1, 16, 512, 1024) + assert arr.dtype == np.uint8 + line0 = np.zeros(1024, dtype=np.uint8) + line0[1:32] = 255 + np.testing.assert_array_equal(arr[0, 0, 0, 0, :], line0) + + +def test_xtdf_masked_data(mock_reduced_spb_proc_run): + run = RunDirectory(mock_reduced_spb_proc_run) + agipd = AGIPD1M(run, modules=[8, 9]) + + kd = agipd.masked_data().select_trains(np.s_[:1]) + assert kd.shape == (2, kd.shape[1], 512, 128) + arr = kd.ndarray() + assert arr.shape == kd.shape + assert arr.dtype == np.float32 + line0_2mod = np.zeros((2, 128), dtype=np.float32) + line0_2mod[1, 1:32] = np.nan + np.testing.assert_array_equal(arr[:, 0, 0, :], line0_2mod) + + kd = agipd.masked_data(mask_bits=1, masked_value=-1).select_trains(np.s_[:1]) + arr = kd.ndarray() + line0_2mod = np.zeros((2, 128), dtype=np.float32) + line0_2mod[1, 1:32:2] = -1 + np.testing.assert_array_equal(arr[:, 0, 0, :], line0_2mod) + + def test_get_dask_array(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run) det = LPD1M(run) From f336dc4a9c5a24b987016bce14c3115037651e5c Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 3 May 2024 17:23:19 +0200 Subject: [PATCH 6/9] Check that the mask key exists when using .masked_data() --- extra_data/components.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extra_data/components.py b/extra_data/components.py index d252764c..30cb287a 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -196,6 +196,7 @@ def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): The replacement value to use for masked data. By default this is NaN. """ key = key or self._main_data_key + self[self._mask_data_key] # Check that the mask is there return DetectorMaskedKeyData( self, key, mask_key=self._mask_data_key, mask_bits=mask_bits, masked_value=masked_value @@ -527,6 +528,7 @@ def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): """ key = key or self._main_data_key assert key.startswith('image.') + self[self._mask_data_key] # Check that the mask is there return XtdfMaskedKeyData( self, key, mask_key=self._mask_data_key, mask_bits=mask_bits, masked_value=masked_value From 3251393dbb27d48c5b92dd37f86c1bb736b1b850 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Tue, 7 May 2024 12:11:14 +0100 Subject: [PATCH 7/9] Allow passing bits to mask out as an iterable --- extra_data/components.py | 24 ++++++++++++++++++------ extra_data/tests/test_components.py | 4 ++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index 30cb287a..a6d7b2a7 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -2,6 +2,7 @@ """ import logging import re +from collections.abc import Iterable from copy import copy from warnings import warn @@ -189,19 +190,28 @@ def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): key: str The data key to look at, by default the main data key of the detector (e.g. 'data.adc'). - mask_bits: int - Bitmask of reasons to exclude pixels. By default, all types of bad - pixel are masked out. + mask_bits: int or list of ints + Reasons to exclude pixels, as a bitmask or a list of integers. + By default, all types of bad pixel are masked out. masked_value: int, float The replacement value to use for masked data. By default this is NaN. """ key = key or self._main_data_key self[self._mask_data_key] # Check that the mask is there + if isinstance(mask_bits, Iterable): + mask_bits = self._combine_bitfield(mask_bits) return DetectorMaskedKeyData( self, key, mask_key=self._mask_data_key, mask_bits=mask_bits, masked_value=masked_value ) + @staticmethod + def _combine_bitfield(ints): + res = 0 + for i in ints: + res |= i + return res + @classmethod def _find_detector_name(cls, data): detector_names = set() @@ -520,15 +530,17 @@ def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): key: str The data key to look at, by default the main data key of the detector (e.g. 'image.data'). - mask_bits: int - Bitmask of reasons to exclude pixels. By default, all types of bad - pixel are masked out. + mask_bits: int or list of ints + Reasons to exclude pixels, as a bitmask or a list of integers. + By default, all types of bad pixel are masked out. masked_value: int, float The replacement value to use for masked data. By default this is NaN. """ key = key or self._main_data_key assert key.startswith('image.') self[self._mask_data_key] # Check that the mask is there + if isinstance(mask_bits, Iterable): + mask_bits = self._combine_bitfield(mask_bits) return XtdfMaskedKeyData( self, key, mask_key=self._mask_data_key, mask_bits=mask_bits, masked_value=masked_value diff --git a/extra_data/tests/test_components.py b/extra_data/tests/test_components.py index 589973f5..f28a2814 100644 --- a/extra_data/tests/test_components.py +++ b/extra_data/tests/test_components.py @@ -301,10 +301,10 @@ def test_xtdf_masked_data(mock_reduced_spb_proc_run): line0_2mod[1, 1:32] = np.nan np.testing.assert_array_equal(arr[:, 0, 0, :], line0_2mod) - kd = agipd.masked_data(mask_bits=1, masked_value=-1).select_trains(np.s_[:1]) + kd = agipd.masked_data(mask_bits=[1, 4], masked_value=-1).select_trains(np.s_[:1]) arr = kd.ndarray() line0_2mod = np.zeros((2, 128), dtype=np.float32) - line0_2mod[1, 1:32:2] = -1 + line0_2mod[1, np.nonzero(np.arange(32) & 5)] = -1 np.testing.assert_array_equal(arr[:, 0, 0, :], line0_2mod) From 31459bc0aad38ccb03dbf9f9a7ad8ff742fe3e5d Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Wed, 22 May 2024 16:56:41 +0100 Subject: [PATCH 8/9] Clean up debugging print() --- extra_data/components.py | 1 - 1 file changed, 1 deletion(-) diff --git a/extra_data/components.py b/extra_data/components.py index a6d7b2a7..9238f0ac 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -982,7 +982,6 @@ def ndarray(self, *, module_gaps=False, **kwargs): # Load mask first: it shrinks from 4 bytes/px to 1, so peak memory use # is lower than loading it after the data mask = self._load_mask(module_gaps=module_gaps) - print(mask[0, 0, 0, :35]) data = super().ndarray(module_gaps=module_gaps, **kwargs) data[mask] = self._masked_value From 259f2c3bffd6522a64eb0ffd92694f2a506fa8be Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Mon, 3 Jun 2024 17:39:20 +0100 Subject: [PATCH 9/9] Link to BadPixels in EXtra docs, better error for missing mask --- extra_data/components.py | 18 +++++++++++++++--- extra_data/tests/test_components.py | 8 ++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index 9238f0ac..119a3f3e 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -178,6 +178,9 @@ def __init__(self, data: DataCollection, detector_name=None, modules=None, def __getitem__(self, item): return MultimodKeyData(self, item) + def __contains__(self, item): + return all(item in self.data[s] for s in self.source_to_modno) + def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): """Combine corrected data with the mask in the files @@ -192,12 +195,17 @@ def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): (e.g. 'data.adc'). mask_bits: int or list of ints Reasons to exclude pixels, as a bitmask or a list of integers. - By default, all types of bad pixel are masked out. + By default, all types of bad pixel are masked out. See the possible + values at: https://extra.readthedocs.io/en/latest/calibration/#extra.calibration.BadPixels masked_value: int, float The replacement value to use for masked data. By default this is NaN. """ key = key or self._main_data_key - self[self._mask_data_key] # Check that the mask is there + if self._mask_data_key not in self: + raise RuntimeError( + f"This data doesn't include a mask ({self._mask_data_key}). " + f"You might be using raw instead of corrected data." + ) if isinstance(mask_bits, Iterable): mask_bits = self._combine_bitfield(mask_bits) return DetectorMaskedKeyData( @@ -538,7 +546,11 @@ def masked_data(self, key=None, *, mask_bits=None, masked_value=np.nan): """ key = key or self._main_data_key assert key.startswith('image.') - self[self._mask_data_key] # Check that the mask is there + if self._mask_data_key not in self: + raise RuntimeError( + f"This data doesn't include a mask ({self._mask_data_key}). " + f"You might be using raw instead of corrected data." + ) if isinstance(mask_bits, Iterable): mask_bits = self._combine_bitfield(mask_bits) return XtdfMaskedKeyData( diff --git a/extra_data/tests/test_components.py b/extra_data/tests/test_components.py index f28a2814..ad0a8080 100644 --- a/extra_data/tests/test_components.py +++ b/extra_data/tests/test_components.py @@ -308,6 +308,14 @@ def test_xtdf_masked_data(mock_reduced_spb_proc_run): np.testing.assert_array_equal(arr[:, 0, 0, :], line0_2mod) +def test_masked_data_raw_error(mock_fxe_raw_run): + run = RunDirectory(mock_fxe_raw_run) + lpd = LPD1M(run) + + with pytest.raises(RuntimeError, match="image.mask"): + lpd.masked_data() + + def test_get_dask_array(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run) det = LPD1M(run)