Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolves #865 #868

Merged
merged 2 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 35 additions & 16 deletions ibllib/pipes/mesoscope_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,23 +486,42 @@ def _consolidate_exptQC(exptQC):
numpy.array
An array of frame indices where QC code != 0.
"""

# Merge and make sure same indexes have same names across all files
frameQC_names_list = [e['frameQC_names'] for e in exptQC]
frameQC_names_list = [{k: i for i, k in enumerate(ensure_list(f))} for f in frameQC_names_list]
frameQC_names = {k: v for d in frameQC_names_list for k, v in d.items()}
for d in frameQC_names_list:
for k, v in d.items():
if frameQC_names[k] != v:
raise IOError(f'exptQC.mat files have different values for name "{k}"')

frameQC_names = pd.DataFrame(sorted([(v, k) for k, v in frameQC_names.items()]),
columns=['qc_values', 'qc_labels'])

# Create a new enumeration combining all unique QC labels.
# 'ok' will always have an enum of 0, the rest are determined by order alone
qc_labels = ['ok']
frame_qc = []
for e in exptQC:
assert e.keys() >= set(['frameQC_names', 'frameQC_frames'])
# Initialize an NaN array the same size of frameQC_frames to fill with new enum values
frames = np.full(e['frameQC_frames'].shape, fill_value=np.nan)
# May be numpy array of str or a single str, in both cases we cast to list of str
names = list(ensure_list(e['frameQC_names']))
# For each label for the old enum, populate initialized array with the new one
for name in names:
i_old = names.index(name) # old enumeration
name = name if len(name) else 'unknown' # handle empty array and empty str
try:
i_new = qc_labels.index(name)
except ValueError:
i_new = len(qc_labels)
qc_labels.append(name)
frames[e['frameQC_frames'] == i_old] = i_new
frame_qc.append(frames)
# Concatenate frames
frameQC = np.concatenate([e['frameQC_frames'] for e in exptQC], axis=0)
bad_frames = np.where(frameQC != 0)[0]
return frameQC, frameQC_names, bad_frames
frame_qc = np.concatenate(frame_qc)
# If any NaNs left over, assign 'unknown' label
if (missing_name := np.isnan(frame_qc)).any():
try:
i = qc_labels.index('unknown')
except ValueError:
i = len(qc_labels)
qc_labels.append('unknown')
frame_qc[missing_name] = i
frame_qc = frame_qc.astype(np.uint32) # case to uint
bad_frames, = np.where(frame_qc != 0)
Copy link
Member

@samupicard samupicard Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the frames that should be excluded from suite2p cell detection are not necessarily all frame_qc != 0. E.g. 'PMT off' or 'galvos fault' will mean those frames need to be excluded, but e.g. 'adjust spout' will not. However, this doesn't need to be figured out here - the acquirer will have written a separate badframes.mat vector specifying which frames suite2p should be excluding from analysis. So probably best to not try to impute the bad_frames variable here at all, and instead load it from disk?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if loaded from disk, this badframes array will also need to be consolidated (the indexing of the badframes.mat vectors is relative to each bout)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can confirm that the bad_frames var returned by this method is not at all used. The badframes.mat files are loaded afterward, and there is an assertion that all badframes indices are a subset of the non-zero frameQC indices:

# If applicable, save as bad_frames.npy in first raw_imaging_folder for suite2p
# badframes.mat contains QC values that do affect ROI detection (e.g. no PMT, lens artefacts)
badframes = np.array([], dtype='i8')
total_frames = 0
# Ensure all indices are relative to total cumulative frames
for m, collection in zip(all_meta, raw_image_collections):
badframes_path = self.session_path.joinpath(collection, 'badframes.mat')
if badframes_path.exists():
raw_mat = loadmat(badframes_path, squeeze_me=True, simplify_cells=True)['badframes']
badframes = np.r_[badframes, raw_mat + total_frames]
total_frames += m['nFrames']
if len(badframes) > 0 and use_badframes is True:
# The badframes array should always be a subset of the frameQC array
assert np.max(badframes) < frameQC.size and np.all(frameQC[badframes])
np.save(raw_image_collections[0].joinpath('bad_frames.npy'), badframes)

# Convert labels to value -> label data frame
frame_qc_names = pd.DataFrame(list(enumerate(qc_labels)), columns=['qc_values', 'qc_labels'])
return frame_qc, frame_qc_names, bad_frames

def get_default_tau(self):
"""
Expand Down
36 changes: 16 additions & 20 deletions ibllib/tests/test_mesoscope.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,30 +102,26 @@ def test_consolidate_exptQC(self):
exptQC = [
{'frameQC_names': np.array(['ok', 'PMT off', 'galvos fault', 'high signal'], dtype=object),
'frameQC_frames': np.array([0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4])},
{'frameQC_names': np.array(['ok', 'PMT off', 'galvos fault', 'high signal'], dtype=object),
'frameQC_frames': np.zeros(50, dtype=int)}
{'frameQC_names': np.array(['ok', 'PMT off', 'foo', 'galvos fault', np.array([])], dtype=object),
'frameQC_frames': np.array([0, 0, 1, 1, 2, 2, 2, 2, 3, 4])},
{'frameQC_names': 'ok', # check with single str instead of array
'frameQC_frames': np.array([0, 0])}
]

# Check concatinates frame QC arrays
frameQC, frameQC_names, bad_frames = self.task._consolidate_exptQC(exptQC)
expected_frames = np.r_[exptQC[0]['frameQC_frames'], exptQC[1]['frameQC_frames']]
np.testing.assert_array_equal(expected_frames, frameQC)
expected = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
frame_qc, frame_qc_names, bad_frames = self.task._consolidate_exptQC(exptQC)
# Check frame_qc array
expected_frames = [
0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 5, 0, 0, 1, 1, 4, 4, 4, 4, 2, 5, 0, 0]
np.testing.assert_array_equal(expected_frames, frame_qc)
# Check bad_frames array
expected = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25]
np.testing.assert_array_equal(expected, bad_frames)
self.assertCountEqual(['qc_values', 'qc_labels'], frameQC_names.columns)
self.assertCountEqual(range(4), frameQC_names['qc_values'])
expected = ['ok', 'PMT off', 'galvos fault', 'high signal']
self.assertCountEqual(expected, frameQC_names['qc_labels'])

# Check with single str instead of array
exptQC[1]['frameQC_names'] = 'ok'
frameQC, frameQC_names, bad_frames = self.task._consolidate_exptQC(exptQC)
self.assertCountEqual(expected, frameQC_names['qc_labels'])
np.testing.assert_array_equal(expected_frames, frameQC)
# Check with inconsistent enumerations
exptQC[0]['frameQC_names'] = expected
exptQC[1]['frameQC_names'] = [*expected[-2:], *expected[:-2]]
self.assertRaises(IOError, self.task._consolidate_exptQC, exptQC)
# Check frame_qc_names data frame
self.assertCountEqual(['qc_values', 'qc_labels'], frame_qc_names.columns)
self.assertEqual(list(range(6)), frame_qc_names['qc_values'].tolist())
expected = ['ok', 'PMT off', 'galvos fault', 'high signal', 'foo', 'unknown']
self.assertCountEqual(expected, frame_qc_names['qc_labels'].tolist())

def test_setup_uncompressed(self):
"""Test set up behaviour when raw tifs present."""
Expand Down
12 changes: 3 additions & 9 deletions ibllib/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
def _run(self, overwrite=False):
Task02_error.run_count += 1
if Task02_error.run_count == 1:
raise Exception('Something dumb happened')

Check failure on line 83 in ibllib/tests/test_tasks.py

View workflow job for this annotation

GitHub Actions / build (3.11, windows-latest)

Something dumb happened

Check failure on line 83 in ibllib/tests/test_tasks.py

View workflow job for this annotation

GitHub Actions / build (3.8, ubuntu-latest)

Something dumb happened
out_files = self.session_path.joinpath('alf', 'spikes.templates.npy')
out_files.touch()
return out_files
Expand Down Expand Up @@ -153,15 +153,9 @@

def setUp(self) -> None:
self.td = tempfile.TemporaryDirectory()
# ses = one.alyx.rest('sessions', 'list', subject=ses_dict['subject'],
# date_range=[ses_dict['start_time'][:10]] * 2,
# number=ses_dict['number'],
# no_cache=True)
# if len(ses):
# one.alyx.rest('sessions', 'delete', ses[0]['url'][-36:])
# randomise number
ses_dict['number'] = np.random.randint(1, 30)
ses = one.alyx.rest('sessions', 'create', data=ses_dict)
self.ses_dict = ses_dict.copy()
self.ses_dict['number'] = np.random.randint(1, 999)
ses = one.alyx.rest('sessions', 'create', data=self.ses_dict)
session_path = Path(self.td.name).joinpath(
ses['subject'], ses['start_time'][:10], str(ses['number']).zfill(3))
session_path.joinpath('alf').mkdir(exist_ok=True, parents=True)
Expand Down
Loading