From 8fe8289a160a3fee6a6c372524ade2d1afcb1afd Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 10 Mar 2023 15:11:51 +0000 Subject: [PATCH 1/6] First attempt at LPD Mini components class --- extra_data/components.py | 99 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 3 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index 1ad51fd9..1878e45b 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -1542,7 +1542,7 @@ class LPD1M(LPDBase, XtdfDetectorBase): @multimod_detectors -class LPDMini(LPDBase, XtdfDetectorBase): +class LPDMini(XtdfDetectorBase): """An interface to LPD-Mini data. Parameters @@ -1561,8 +1561,101 @@ class LPDMini(LPDBase, XtdfDetectorBase): repeat the pulse & cell IDs from the first 1/3 of each train, and add gain stage labels from 0 (high-gain) to 2 (low-gain). """ - _source_re = re.compile(r'(?P.+_LPD_MINI.*)/DET/(?P\d+)CH') - module_shape = (256, 256) + _source_re_raw = re.compile(r'(?P.+_LPD_MINI.*)/DET/(?P\d+)CH') + _source_re_corr = re.compile(r'(?P.+_LPD_MINI.*)/CORR/(?P\d+)CH') + module_shape = (32, 256) + + def __init__(self, data: DataCollection, detector_name=None, modules=None, + *, corrected=True): + self.corrected = corrected + self._source_re = self._source_re_corr if corrected else self._source_re_raw + super().__init__(data, detector_name, modules=[0]) + + def __getitem__(self, item): + if item == 'image.data' and not self.corrected: + return LPDMiniRawDataKey(self, item) + return super().__getitem__(item) + + +class LPDMiniImageKey(XtdfImageMultimodKeyData): + def __init__(self, det: XtdfDetectorBase, key, pulse_sel=by_index[0:MAX_PULSES:1], modules=None, corrected=True): + super().__init__(det, key, pulse_sel) + if modules is None: + eshape = self._eg_keydata.entry_shape + nmod = eshape[-3] if corrected else (eshape[-2] // 32) + modules = list(range(nmod)) + self._modules = modules + self.corrected = corrected + + @property + def _has_modules(self): + return (self._eg_keydata.ndim - self._extraneous_dim) > 1 + + @property + def ndim(self): + return super().ndim if self._has_modules else (super().ndim - 1) + + @property + def dimensions(self): + if self._has_modules: + return ['trainId', 'module'] + ['dim_%d' % i for i in range(self.ndim - 2)] + return ['trainId'] + + @property + def modules(self): + return self._modules + + def buffer_shape(self, module_gaps=False, roi=()): + """Get the array shape for this data + + If *module_gaps* is True, include space for modules which are missing + from the data. *roi* may be a tuple of slices defining a region of + interest on the inner dimensions of the data. + """ + if self._has_modules: + nframes_sel = len(self.train_id_coordinates()) + module_dim = 8 if module_gaps else len(self.modules) + + return (module_dim, nframes_sel) + roi_shape(self.det.module_shape, roi) + else: + return super().buffer_shape(module_gaps, roi)[1:] + + # Used for .select_trains() and .split_trains() + def _with_selected_det(self, det_selected): + return LPDMiniRawDataKey(det_selected, self.key, self._pulse_sel, modules=self.modules) + + def select_pulses(self, pulses): + pulses = _check_pulse_selection(pulses) + + return LPDMiniRawDataKey(self.det, self.key, pulses, modules=self.modules) + + def ndarray(self, *, fill_value=None, out=None, roi=(), astype=None, module_gaps=False): + """Get an array of per-pulse data (image.*) for xtdf detector""" + if roi or module_gaps: + raise NotImplementedError + + # trains, modules, 32, 256 + out_shape = self.buffer_shape(module_gaps=module_gaps, roi=roi) + + if out is None: + dtype = self._eg_keydata.dtype if astype is None else np.dtype(astype) + out = _out_array(out_shape, dtype, fill_value=fill_value) + elif out.shape != out_shape: + raise ValueError(f'requires output array of shape {out_shape}') + + reading_view = out.view() + if not self.corrected: + reading_view.shape = ( + out_shape[:1] + + (1,) if self._extraneous_dim else () + + (self.modules * out_shape[2], out_shape[3]) if self._has_modules else () + ) + + for _modno, kd in self.modno_to_keydata.items(): + for chunk in kd._data_chunks: + self._read_chunk(chunk, reading_view, roi) + + return out @multimod_detectors From 16dde8e196b1d594f8a95e70f1cc216550c7c514 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 10 Mar 2023 15:20:35 +0000 Subject: [PATCH 2/6] Identify detector name correctly in LPDMini class --- extra_data/components.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index 1878e45b..d235d2e6 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -178,14 +178,16 @@ def __getitem__(self, item): return MultimodKeyData(self, item) @classmethod - def _find_detector_name(cls, data): + def _find_detector_name(cls, data, source_re=None): detector_names = set() + if source_re is None: + source_re = cls._source_re for source in data.instrument_sources: - m = cls._source_re.match(source) + m = source_re.match(source) if m: detector_names.add(m.group('detname')) if not detector_names: - raise SourceNameError(cls._source_re.pattern) + raise SourceNameError(source_re.pattern) elif len(detector_names) > 1: raise ValueError( "Multiple detectors found in the data: {}. " @@ -1569,6 +1571,8 @@ def __init__(self, data: DataCollection, detector_name=None, modules=None, *, corrected=True): self.corrected = corrected self._source_re = self._source_re_corr if corrected else self._source_re_raw + if detector_name is None: + detector_name = self._find_detector_name(data=self._source_re) super().__init__(data, detector_name, modules=[0]) def __getitem__(self, item): From bc974c88de37c0bbaa050bdcc6658b2140210b9f Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 10 Mar 2023 17:05:21 +0100 Subject: [PATCH 3/6] Various fixes --- extra_data/components.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index d235d2e6..7b7223fe 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -1572,19 +1572,19 @@ def __init__(self, data: DataCollection, detector_name=None, modules=None, self.corrected = corrected self._source_re = self._source_re_corr if corrected else self._source_re_raw if detector_name is None: - detector_name = self._find_detector_name(data=self._source_re) + detector_name = self._find_detector_name(data, self._source_re) super().__init__(data, detector_name, modules=[0]) def __getitem__(self, item): - if item == 'image.data' and not self.corrected: - return LPDMiniRawDataKey(self, item) + if item.startswith('image.') and not self.corrected: + return LPDMiniImageKey(self, item, corrected=self.corrected) return super().__getitem__(item) class LPDMiniImageKey(XtdfImageMultimodKeyData): def __init__(self, det: XtdfDetectorBase, key, pulse_sel=by_index[0:MAX_PULSES:1], modules=None, corrected=True): super().__init__(det, key, pulse_sel) - if modules is None: + if modules is None and self._has_modules: eshape = self._eg_keydata.entry_shape nmod = eshape[-3] if corrected else (eshape[-2] // 32) modules = list(range(nmod)) @@ -1620,18 +1620,18 @@ def buffer_shape(self, module_gaps=False, roi=()): nframes_sel = len(self.train_id_coordinates()) module_dim = 8 if module_gaps else len(self.modules) - return (module_dim, nframes_sel) + roi_shape(self.det.module_shape, roi) + return (nframes_sel, module_dim) + roi_shape(self.det.module_shape, roi) else: return super().buffer_shape(module_gaps, roi)[1:] # Used for .select_trains() and .split_trains() def _with_selected_det(self, det_selected): - return LPDMiniRawDataKey(det_selected, self.key, self._pulse_sel, modules=self.modules) + return LPDMiniImageKey(det_selected, self.key, self._pulse_sel, modules=self.modules, corrected=self.corrected) def select_pulses(self, pulses): pulses = _check_pulse_selection(pulses) - return LPDMiniRawDataKey(self.det, self.key, pulses, modules=self.modules) + return LPDMiniImageKey(self.det, self.key, pulses, modules=self.modules, corrected=self.corrected) def ndarray(self, *, fill_value=None, out=None, roi=(), astype=None, module_gaps=False): """Get an array of per-pulse data (image.*) for xtdf detector""" @@ -1651,8 +1651,8 @@ def ndarray(self, *, fill_value=None, out=None, roi=(), astype=None, module_gaps if not self.corrected: reading_view.shape = ( out_shape[:1] - + (1,) if self._extraneous_dim else () - + (self.modules * out_shape[2], out_shape[3]) if self._has_modules else () + + ((1,) if self._extraneous_dim else ()) + + ((out_shape[1] * out_shape[2], out_shape[3]) if self._has_modules else ()) ) for _modno, kd in self.modno_to_keydata.items(): From 9fd2e5fad4910c1b2e1418776031789a88eb6fa0 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Tue, 21 Mar 2023 17:35:49 +0000 Subject: [PATCH 4/6] More fixes, test updates --- extra_data/components.py | 20 ++++++++++++++------ extra_data/tests/make_examples.py | 8 ++++---- extra_data/tests/mockdata/detectors.py | 9 +++++++++ extra_data/tests/test_components.py | 12 +++++------- 4 files changed, 32 insertions(+), 17 deletions(-) diff --git a/extra_data/components.py b/extra_data/components.py index 7b7223fe..582f70f1 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -1544,7 +1544,7 @@ class LPD1M(LPDBase, XtdfDetectorBase): @multimod_detectors -class LPDMini(XtdfDetectorBase): +class LPDMini(LPDBase, XtdfDetectorBase): """An interface to LPD-Mini data. Parameters @@ -1568,15 +1568,15 @@ class LPDMini(XtdfDetectorBase): module_shape = (32, 256) def __init__(self, data: DataCollection, detector_name=None, modules=None, - *, corrected=True): + *, corrected=True, parallel_gain=False): self.corrected = corrected self._source_re = self._source_re_corr if corrected else self._source_re_raw if detector_name is None: detector_name = self._find_detector_name(data, self._source_re) - super().__init__(data, detector_name, modules=[0]) + super().__init__(data, detector_name, modules=[0], parallel_gain=parallel_gain) def __getitem__(self, item): - if item.startswith('image.') and not self.corrected: + if item.startswith('image.'): return LPDMiniImageKey(self, item, corrected=self.corrected) return super().__getitem__(item) @@ -1601,9 +1601,17 @@ def ndim(self): @property def dimensions(self): + ndim_inner = self.ndim - 1 - self._has_modules + if ndim_inner == 2: + # 2D pixel data + entry_dims = ['slow_scan', 'fast_scan'] + else: + # Everything else seems to be 1D, but just in case + entry_dims = [f'dim_{i}' for i in range(ndim_inner)] + if self._has_modules: - return ['trainId', 'module'] + ['dim_%d' % i for i in range(self.ndim - 2)] - return ['trainId'] + return ['train_pulse', 'module'] + entry_dims + return ['train_pulse'] + entry_dims @property def modules(self): diff --git a/extra_data/tests/make_examples.py b/extra_data/tests/make_examples.py index 07092794..1b91e18a 100644 --- a/extra_data/tests/make_examples.py +++ b/extra_data/tests/make_examples.py @@ -10,7 +10,7 @@ from .mockdata.base import write_base_index from .mockdata.basler_camera import BaslerCamera as BaslerCam from .mockdata.dctrl import DCtrl -from .mockdata.detectors import AGIPDModule, DSSCModule, LPDModule +from .mockdata.detectors import AGIPDModule, DSSCModule, LPDModule, LPDMini from .mockdata.gauge import Gauge from .mockdata.gec_camera import GECCamera from .mockdata.imgfel import IMGFELCamera, IMGFELMotor @@ -243,8 +243,8 @@ def make_fxe_run(dir_path, raw=True, format_version='0.5'): path = osp.join(dir_path, f'{prefix}-R0450-LPDMINI00-S00000.h5') write_file(path, [ - LPDModule('FXE_DET_LPD_MINI/DET/0CH0', raw=raw, frames_per_train=128) - ], ntrains=480, chunksize=32, format_version=format_version) + LPDMini('FXE_DET_LPD_MINI/DET/0CH0', raw=raw, frames_per_train=128, modules=2) + ], ntrains=480, chunksize=32, format_version=format_version) if not raw: return @@ -274,7 +274,7 @@ def make_lpd_parallelgain_run(dir_path, raw=True, format_version='0.5'): path = osp.join(dir_path, f'{prefix}-R0450-LPDMINI00-S00000.h5') write_file(path, [ - LPDModule('FXE_DET_LPD_MINI/DET/0CH0', raw=raw, frames_per_train=300) + LPDMini('FXE_DET_LPD_MINI/DET/0CH0', raw=raw, frames_per_train=300, modules=2) ], ntrains=100, chunksize=32, format_version=format_version) def make_lpd_run_mini_missed_train(dir_path): diff --git a/extra_data/tests/mockdata/detectors.py b/extra_data/tests/mockdata/detectors.py index d83d5289..de18f89c 100644 --- a/extra_data/tests/mockdata/detectors.py +++ b/extra_data/tests/mockdata/detectors.py @@ -162,6 +162,15 @@ class LPDModule(DetectorModule): image_dims = (1, 256, 256) detector_data_size = 416 +class LPDMini(DetectorModule): + def __init__(self, device_id, frames_per_train=64, raw=True, modules=1): + if raw: + self.image_dims = (1, modules * 32, 256) + else: + # The 1 is removed in parent class + self.image_dims = (1, modules, 32, 256) + super().__init__(device_id, frames_per_train, raw) + class DSSCModule(DetectorModule): image_dims = (1, 128, 512) detector_data_size = 416 diff --git a/extra_data/tests/test_components.py b/extra_data/tests/test_components.py index bfa71f5f..6a3ce05c 100644 --- a/extra_data/tests/test_components.py +++ b/extra_data/tests/test_components.py @@ -179,12 +179,11 @@ def test_get_array_lpd_parallelgain(mock_lpd_parallelgain_run): np.testing.assert_array_equal(arr.coords['gain'], np.arange(3)) np.testing.assert_array_equal(arr.coords['pulse'], np.arange(100)) - run = RunDirectory(mock_lpd_parallelgain_run) - det = LPDMini(run.select_trains(by_index[:2]), parallel_gain=True) + det = LPDMini(run.select_trains(by_index[:2]), parallel_gain=True, corrected=False) assert det.detector_name == 'FXE_DET_LPD_MINI' arr = det.get_array('image.data') - assert arr.shape == (1, 2, 3, 100, 256, 256) + assert arr.shape == (2, 2, 3, 100, 32, 256) assert arr.dims == ('module', 'train', 'gain', 'pulse', 'slow_scan', 'fast_scan') np.testing.assert_array_equal(arr.coords['gain'], np.arange(3)) np.testing.assert_array_equal(arr.coords['pulse'], np.arange(100)) @@ -206,18 +205,17 @@ def test_get_array_lpd_parallelgain_select_pulses(mock_lpd_parallelgain_run): assert arr.shape == (16, 2, 3, 5, 256, 256) np.testing.assert_array_equal(arr.coords['pulse'], np.arange(5)) - run = RunDirectory(mock_lpd_parallelgain_run) - det = LPDMini(run.select_trains(by_index[:2]), parallel_gain=True) + det = LPDMini(run.select_trains(by_index[:2]), parallel_gain=True, corrected=False) assert det.detector_name == 'FXE_DET_LPD_MINI' arr = det.get_array('image.data', pulses=np.s_[:5]) - assert arr.shape == (1, 2, 3, 5, 256, 256) + assert arr.shape == (2, 2, 3, 5, 32, 256) assert arr.dims == ('module', 'train', 'gain', 'pulse', 'slow_scan', 'fast_scan') np.testing.assert_array_equal(arr.coords['gain'], np.arange(3)) np.testing.assert_array_equal(arr.coords['pulse'], np.arange(5)) arr = det.get_array('image.data', pulses=by_id[:5]) - assert arr.shape == (1, 2, 3, 5, 256, 256) + assert arr.shape == (2, 2, 3, 5, 32, 256) np.testing.assert_array_equal(arr.coords['pulse'], np.arange(5)) From 1ad29635912ecf4793c85657762564df0ce8df2c Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Wed, 22 Mar 2023 11:28:41 +0000 Subject: [PATCH 5/6] Fix identify_multimod_detector with LPD mini --- extra_data/components.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extra_data/components.py b/extra_data/components.py index 582f70f1..1a638ef0 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -1563,6 +1563,9 @@ class LPDMini(LPDBase, XtdfDetectorBase): repeat the pulse & cell IDs from the first 1/3 of each train, and add gain stage labels from 0 (high-gain) to 2 (low-gain). """ + # Some code uses cls._source_re, but when creating an instance we replace + # this with either the raw or corrected variant. + _source_re = re.compile(r'(?P.+_LPD_MINI.*)/(DET|CORR)/(?P\d+)CH') _source_re_raw = re.compile(r'(?P.+_LPD_MINI.*)/DET/(?P\d+)CH') _source_re_corr = re.compile(r'(?P.+_LPD_MINI.*)/CORR/(?P\d+)CH') module_shape = (32, 256) From ac0f579b29ae2329b8a77fe64ad6c36a058873b8 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Wed, 22 Mar 2023 11:42:07 +0000 Subject: [PATCH 6/6] Fix some tests stacking LPD-1M data --- extra_data/tests/test_stacking.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extra_data/tests/test_stacking.py b/extra_data/tests/test_stacking.py index 6f0cf213..e6276f4e 100644 --- a/extra_data/tests/test_stacking.py +++ b/extra_data/tests/test_stacking.py @@ -22,7 +22,7 @@ def test_stack_detector_data(mock_fxe_raw_run): def test_stack_detector_data_missing(mock_fxe_raw_run): test_run = RunDirectory(mock_fxe_raw_run) - tid, data = test_run.train_from_id(10000, devices=[('*/DET/*', 'image.data')]) + tid, data = test_run.train_from_id(10000, devices=[('*_LPD1M-1/DET/*', 'image.data')]) # Three variants of missing data: # 1. Source missing @@ -52,7 +52,7 @@ def test_stack_detector_data_missing(mock_fxe_raw_run): def test_stack_detector_data_stackview(mock_fxe_raw_run): test_run = RunDirectory(mock_fxe_raw_run) - tid, data = test_run.train_from_id(10000, devices=[('*/DET/*', 'image.data')]) + tid, data = test_run.train_from_id(10000, devices=[('*_LPD1M-1/DET/*', 'image.data')]) # Three variants of missing data: # 1. Source missing @@ -126,7 +126,7 @@ def test_stack_detector_data_type_error(mock_fxe_raw_run): def test_stack_detector_data_extra_mods(mock_fxe_raw_run): test_run = RunDirectory(mock_fxe_raw_run) - tid, data = test_run.train_from_id(10000, devices=[('*/DET/*', 'image.data')]) + tid, data = test_run.train_from_id(10000, devices=[('*_LPD1M-1/DET/*', 'image.data')]) data.setdefault( 'FXE_DET_LPD1M-1/DET/16CH0:xtdf',