int-brain-lab · k1o0 · Oct 18, 2024 · Oct 17, 2024 · Oct 17, 2024 · samupicard
diff --git a/ibllib/pipes/mesoscope_tasks.py b/ibllib/pipes/mesoscope_tasks.py
@@ -486,23 +486,42 @@ def _consolidate_exptQC(exptQC):
         numpy.array
             An array of frame indices where QC code != 0.
         """
-
-        # Merge and make sure same indexes have same names across all files
-        frameQC_names_list = [e['frameQC_names'] for e in exptQC]
-        frameQC_names_list = [{k: i for i, k in enumerate(ensure_list(f))} for f in frameQC_names_list]
-        frameQC_names = {k: v for d in frameQC_names_list for k, v in d.items()}
-        for d in frameQC_names_list:
-            for k, v in d.items():
-                if frameQC_names[k] != v:
-                    raise IOError(f'exptQC.mat files have different values for name "{k}"')
-
-        frameQC_names = pd.DataFrame(sorted([(v, k) for k, v in frameQC_names.items()]),
-                                     columns=['qc_values', 'qc_labels'])
-
+        # Create a new enumeration combining all unique QC labels.
+        # 'ok' will always have an enum of 0, the rest are determined by order alone
+        qc_labels = ['ok']
+        frame_qc = []
+        for e in exptQC:
+            assert e.keys() >= set(['frameQC_names', 'frameQC_frames'])
+            # Initialize an NaN array the same size of frameQC_frames to fill with new enum values
+            frames = np.full(e['frameQC_frames'].shape, fill_value=np.nan)
+            # May be numpy array of str or a single str, in both cases we cast to list of str
+            names = list(ensure_list(e['frameQC_names']))
+            # For each label for the old enum, populate initialized array with the new one
+            for name in names:
+                i_old = names.index(name)  # old enumeration
+                name = name if len(name) else 'unknown'  # handle empty array and empty str
+                try:
+                    i_new = qc_labels.index(name)
+                except ValueError:
+                    i_new = len(qc_labels)
+                    qc_labels.append(name)
+                frames[e['frameQC_frames'] == i_old] = i_new
+            frame_qc.append(frames)
         # Concatenate frames
-        frameQC = np.concatenate([e['frameQC_frames'] for e in exptQC], axis=0)
-        bad_frames = np.where(frameQC != 0)[0]
-        return frameQC, frameQC_names, bad_frames
+        frame_qc = np.concatenate(frame_qc)
+        # If any NaNs left over, assign 'unknown' label
+        if (missing_name := np.isnan(frame_qc)).any():
+            try:
+                i = qc_labels.index('unknown')
+            except ValueError:
+                i = len(qc_labels)
+                qc_labels.append('unknown')
+            frame_qc[missing_name] = i
+        frame_qc = frame_qc.astype(np.uint32)  # case to uint
+        bad_frames, = np.where(frame_qc != 0)
 # If applicable, save as bad_frames.npy in first raw_imaging_folder for suite2p 
 # badframes.mat contains QC values that do affect ROI detection (e.g. no PMT, lens artefacts) 
 badframes = np.array([], dtype='i8') 
 total_frames = 0 
 # Ensure all indices are relative to total cumulative frames 
 for m, collection in zip(all_meta, raw_image_collections): 
     badframes_path = self.session_path.joinpath(collection, 'badframes.mat') 
     if badframes_path.exists(): 
         raw_mat = loadmat(badframes_path, squeeze_me=True, simplify_cells=True)['badframes'] 
         badframes = np.r_[badframes, raw_mat + total_frames] 
     total_frames += m['nFrames'] 
 if len(badframes) > 0 and use_badframes is True: 
     # The badframes array should always be a subset of the frameQC array 
     assert np.max(badframes) < frameQC.size and np.all(frameQC[badframes]) 
     np.save(raw_image_collections[0].joinpath('bad_frames.npy'), badframes) 
 # If applicable, save as bad_frames.npy in first raw_imaging_folder for suite2p 
 # badframes.mat contains QC values that do affect ROI detection (e.g. no PMT, lens artefacts) 
 badframes = np.array([], dtype='i8') 
 total_frames = 0 
 # Ensure all indices are relative to total cumulative frames 
 for m, collection in zip(all_meta, raw_image_collections): 
     badframes_path = self.session_path.joinpath(collection, 'badframes.mat') 
     if badframes_path.exists(): 
         raw_mat = loadmat(badframes_path, squeeze_me=True, simplify_cells=True)['badframes'] 
         badframes = np.r_[badframes, raw_mat + total_frames] 
     total_frames += m['nFrames'] 
 if len(badframes) > 0 and use_badframes is True: 
     # The badframes array should always be a subset of the frameQC array 
     assert np.max(badframes) < frameQC.size and np.all(frameQC[badframes]) 
     np.save(raw_image_collections[0].joinpath('bad_frames.npy'), badframes) 
+        # Convert labels to value -> label data frame
+        frame_qc_names = pd.DataFrame(list(enumerate(qc_labels)), columns=['qc_values', 'qc_labels'])
+        return frame_qc, frame_qc_names, bad_frames
 
     def get_default_tau(self):
         """

diff --git a/ibllib/tests/test_mesoscope.py b/ibllib/tests/test_mesoscope.py
@@ -102,30 +102,26 @@ def test_consolidate_exptQC(self):
         exptQC = [
             {'frameQC_names': np.array(['ok', 'PMT off', 'galvos fault', 'high signal'], dtype=object),
              'frameQC_frames': np.array([0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4])},
-            {'frameQC_names': np.array(['ok', 'PMT off', 'galvos fault', 'high signal'], dtype=object),
-             'frameQC_frames': np.zeros(50, dtype=int)}
+            {'frameQC_names': np.array(['ok', 'PMT off', 'foo', 'galvos fault', np.array([])], dtype=object),
+             'frameQC_frames': np.array([0, 0, 1, 1, 2, 2, 2, 2, 3, 4])},
+            {'frameQC_names': 'ok',  # check with single str instead of array
+             'frameQC_frames': np.array([0, 0])}
         ]
 
         # Check concatinates frame QC arrays
-        frameQC, frameQC_names, bad_frames = self.task._consolidate_exptQC(exptQC)
-        expected_frames = np.r_[exptQC[0]['frameQC_frames'], exptQC[1]['frameQC_frames']]
-        np.testing.assert_array_equal(expected_frames, frameQC)
-        expected = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+        frame_qc, frame_qc_names, bad_frames = self.task._consolidate_exptQC(exptQC)
+        # Check frame_qc array
+        expected_frames = [
+            0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 5, 0, 0, 1, 1, 4, 4, 4, 4, 2, 5, 0, 0]
+        np.testing.assert_array_equal(expected_frames, frame_qc)
+        # Check bad_frames array
+        expected = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25]
         np.testing.assert_array_equal(expected, bad_frames)
-        self.assertCountEqual(['qc_values', 'qc_labels'], frameQC_names.columns)
-        self.assertCountEqual(range(4), frameQC_names['qc_values'])
-        expected = ['ok', 'PMT off', 'galvos fault', 'high signal']
-        self.assertCountEqual(expected, frameQC_names['qc_labels'])
-
-        # Check with single str instead of array
-        exptQC[1]['frameQC_names'] = 'ok'
-        frameQC, frameQC_names, bad_frames = self.task._consolidate_exptQC(exptQC)
-        self.assertCountEqual(expected, frameQC_names['qc_labels'])
-        np.testing.assert_array_equal(expected_frames, frameQC)
-        # Check with inconsistent enumerations
-        exptQC[0]['frameQC_names'] = expected
-        exptQC[1]['frameQC_names'] = [*expected[-2:], *expected[:-2]]
-        self.assertRaises(IOError, self.task._consolidate_exptQC, exptQC)
+        # Check frame_qc_names data frame
+        self.assertCountEqual(['qc_values', 'qc_labels'], frame_qc_names.columns)
+        self.assertEqual(list(range(6)), frame_qc_names['qc_values'].tolist())
+        expected = ['ok', 'PMT off', 'galvos fault', 'high signal', 'foo', 'unknown']
+        self.assertCountEqual(expected, frame_qc_names['qc_labels'].tolist())
 
     def test_setup_uncompressed(self):
         """Test set up behaviour when raw tifs present."""

diff --git a/ibllib/tests/test_tasks.py b/ibllib/tests/test_tasks.py
@@ -80,7 +80,7 @@
    def _run(self, overwrite=False):
        Task02_error.run_count += 1
        if Task02_error.run_count == 1:
            raise Exception('Something dumb happened')
        out_files = self.session_path.joinpath('alf', 'spikes.templates.npy')
        out_files.touch()
        return out_files
@@ -153,15 +153,9 @@
 
     def setUp(self) -> None:
         self.td = tempfile.TemporaryDirectory()
-        # ses = one.alyx.rest('sessions', 'list', subject=ses_dict['subject'],
-        #                     date_range=[ses_dict['start_time'][:10]] * 2,
-        #                     number=ses_dict['number'],
-        #                     no_cache=True)
-        # if len(ses):
-        #     one.alyx.rest('sessions', 'delete', ses[0]['url'][-36:])
-        # randomise number
-        ses_dict['number'] = np.random.randint(1, 30)
-        ses = one.alyx.rest('sessions', 'create', data=ses_dict)
+        self.ses_dict = ses_dict.copy()
+        self.ses_dict['number'] = np.random.randint(1, 999)
+        ses = one.alyx.rest('sessions', 'create', data=self.ses_dict)
         session_path = Path(self.td.name).joinpath(
             ses['subject'], ses['start_time'][:10], str(ses['number']).zfill(3))
         session_path.joinpath('alf').mkdir(exist_ok=True, parents=True)