Skip to content

Commit

Permalink
BUG: handle temporal discontinuities in Neuralynx .ncs files (mne-t…
Browse files Browse the repository at this point in the history
…ools#12279)

Co-authored-by: Eric Larson <[email protected]>
  • Loading branch information
KristijanArmeni and larsoner authored Dec 20, 2023
1 parent 0f59894 commit 97512a1
Show file tree
Hide file tree
Showing 7 changed files with 289 additions and 42 deletions.
1 change: 1 addition & 0 deletions doc/changes/devel.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Bugs
- Remove incorrect type hints in :func:`mne.io.read_raw_neuralynx` (:gh:`12236` by `Richard Höchenberger`_)
- Fix bug where parent directory existence was not checked properly in :meth:`mne.io.Raw.save` (:gh:`12282` by `Eric Larson`_)
- ``defusedxml`` is now an optional (rather than required) dependency and needed when reading EGI-MFF data, NEDF data, and BrainVision montages (:gh:`12264` by `Eric Larson`_)
- Correctly handle temporal gaps in Neuralynx .ncs files via :func:`mne.io.read_raw_neuralynx` (:gh:`12279` by `Kristijan Armeni`_ and `Eric Larson`_)

API changes
~~~~~~~~~~~
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,4 @@ dependencies:
- mamba
- lazy_loader
- defusedxml
- python-neo
4 changes: 2 additions & 2 deletions mne/datasets/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
# respective repos, and make a new release of the dataset on GitHub. Then
# update the checksum in the MNE_DATASETS dict below, and change version
# here: ↓↓↓↓↓ ↓↓↓
RELEASES = dict(testing="0.150", misc="0.27")
RELEASES = dict(testing="0.151", misc="0.27")
TESTING_VERSIONED = f'mne-testing-data-{RELEASES["testing"]}'
MISC_VERSIONED = f'mne-misc-data-{RELEASES["misc"]}'

Expand All @@ -112,7 +112,7 @@
# Testing and misc are at the top as they're updated most often
MNE_DATASETS["testing"] = dict(
archive_name=f"{TESTING_VERSIONED}.tar.gz",
hash="md5:0b7452daef4d19132505b5639d695628",
hash="md5:5832b4d44f0423d22305fa61cb75bc25",
url=(
"https://codeload.github.com/mne-tools/mne-testing-data/"
f'tar.gz/{RELEASES["testing"]}'
Expand Down
214 changes: 187 additions & 27 deletions mne/io/neuralynx/neuralynx.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,51 @@

from ..._fiff.meas_info import create_info
from ..._fiff.utils import _mult_cal_one
from ...annotations import Annotations
from ...utils import _check_fname, _soft_import, fill_doc, logger, verbose
from ..base import BaseRaw


class AnalogSignalGap(object):
"""Dummy object to represent gaps in Neuralynx data.
Creates a AnalogSignalProxy-like object.
Propagate `signal`, `units`, and `sampling_rate` attributes
to the `AnalogSignal` init returned by `load()`.
Parameters
----------
signal : array-like
Array of shape (n_channels, n_samples) containing the data.
units : str
Units of the data. (e.g., 'uV')
sampling_rate : quantity
Sampling rate of the data. (e.g., 4000 * pq.Hz)
Returns
-------
sig : instance of AnalogSignal
A AnalogSignal object representing a gap in Neuralynx data.
"""

def __init__(self, signal, units, sampling_rate):
self.signal = signal
self.units = units
self.sampling_rate = sampling_rate

def load(self, channel_indexes):
"""Return AnalogSignal object."""
_soft_import("neo", "Reading NeuralynxIO files", strict=True)
from neo import AnalogSignal

sig = AnalogSignal(
signal=self.signal[:, channel_indexes],
units=self.units,
sampling_rate=self.sampling_rate,
)
return sig


@fill_doc
def read_raw_neuralynx(
fname, *, preload=False, exclude_fname_patterns=None, verbose=None
Expand Down Expand Up @@ -59,11 +100,11 @@ def __init__(
exclude_fname_patterns=None,
verbose=None,
):
fname = _check_fname(fname, "read", True, "fname", need_dir=True)

_soft_import("neo", "Reading NeuralynxIO files", strict=True)
from neo.io import NeuralynxIO

fname = _check_fname(fname, "read", True, "fname", need_dir=True)

logger.info(f"Checking files in {fname}")

# construct a list of filenames to ignore
Expand All @@ -81,12 +122,18 @@ def __init__(
try:
nlx_reader = NeuralynxIO(dirname=fname, exclude_filename=exclude_fnames)
except ValueError as e:
raise ValueError(
"It seems some .ncs channels might have different number of samples. "
+ "This is likely due to different sampling rates. "
+ "Try excluding them with `exclude_fname_patterns` input arg."
+ f"\nOriginal neo.NeuralynxIO.parse_header() ValueError:\n{e}"
)
# give a more informative error message and what the user can do about it
if "Incompatible section structures across streams" in str(e):
raise ValueError(
"It seems .ncs channels have different numbers of samples. "
+ "This is likely due to different sampling rates. "
+ "Try reading in only channels with uniform sampling rate "
+ "by excluding other channels with `exclude_fname_patterns` "
+ "input argument."
+ f"\nOriginal neo.NeuralynxRawIO ValueError:\n{e}"
) from None
else:
raise

info = create_info(
ch_types="seeg",
Expand All @@ -98,32 +145,122 @@ def __init__(
# the sample sizes of all segments
n_segments = nlx_reader.header["nb_segment"][0]
block_id = 0 # assumes there's only one block of recording
n_total_samples = sum(
nlx_reader.get_signal_size(block_id, segment)
for segment in range(n_segments)

# get segment start/stop times
start_times = np.array(
[nlx_reader.segment_t_start(block_id, i) for i in range(n_segments)]
)
stop_times = np.array(
[nlx_reader.segment_t_stop(block_id, i) for i in range(n_segments)]
)

# construct an array of shape (n_total_samples,) indicating
# segment membership for each sample
sample2segment = np.concatenate(
# find discontinuous boundaries (of length n-1)
next_start_times = start_times[1::]
previous_stop_times = stop_times[:-1]
seg_diffs = next_start_times - previous_stop_times

# mark as discontinuous any two segments that have
# start/stop delta larger than sampling period (1/sampling_rate)
logger.info("Checking for temporal discontinuities in Neo data segments.")
delta = 1.5 / info["sfreq"]
gaps = seg_diffs > delta

seg_gap_dict = {}

logger.info(
f"N = {gaps.sum()} discontinuous Neo segments detected "
+ f"with delta > {delta} sec. "
+ "Annotating gaps as BAD_ACQ_SKIP."
if gaps.any()
else "No discontinuities detected."
)

gap_starts = stop_times[:-1][gaps] # gap starts at segment offset
gap_stops = start_times[1::][gaps] # gap stops at segment onset

# (n_gaps,) array of ints giving number of samples per inferred gap
gap_n_samps = np.array(
[
int(round(stop * info["sfreq"])) - int(round(start * info["sfreq"]))
for start, stop in zip(gap_starts, gap_stops)
]
).astype(int) # force an int array (if no gaps, empty array is a float)

# get sort indices for all segments (valid and gap) in ascending order
all_starts_ids = np.argsort(np.concatenate([start_times, gap_starts]))

# variable indicating whether each segment is a gap or not
gap_indicator = np.concatenate(
[
np.full(shape=(nlx_reader.get_signal_size(block_id, i),), fill_value=i)
for i in range(n_segments)
np.full(len(start_times), fill_value=0),
np.full(len(gap_starts), fill_value=1),
]
)
gap_indicator = gap_indicator[all_starts_ids].astype(bool)

# store this in a dict to be passed to _raw_extras
seg_gap_dict = {
"gap_n_samps": gap_n_samps,
"isgap": gap_indicator, # False (data segment) or True (gap segment)
}

valid_segment_sizes = [
nlx_reader.get_signal_size(block_id, i) for i in range(n_segments)
]

sizes_sorted = np.concatenate([valid_segment_sizes, gap_n_samps])[
all_starts_ids
]

# now construct an (n_samples,) indicator variable
sample2segment = np.concatenate(
[np.full(shape=(n,), fill_value=i) for i, n in enumerate(sizes_sorted)]
)

# construct Annotations()
gap_seg_ids = np.unique(sample2segment)[gap_indicator]
gap_start_ids = np.array(
[np.where(sample2segment == seg_id)[0][0] for seg_id in gap_seg_ids]
)

# recreate time axis for gap annotations
mne_times = np.arange(0, len(sample2segment)) / info["sfreq"]

assert len(gap_start_ids) == len(gap_n_samps)
annotations = Annotations(
onset=[mne_times[onset_id] for onset_id in gap_start_ids],
duration=[
mne_times[onset_id + (n - 1)] - mne_times[onset_id]
for onset_id, n in zip(gap_start_ids, gap_n_samps)
],
description=["BAD_ACQ_SKIP"] * len(gap_start_ids),
)

super(RawNeuralynx, self).__init__(
info=info,
last_samps=[n_total_samples - 1],
last_samps=[sizes_sorted.sum() - 1],
filenames=[fname],
preload=preload,
raw_extras=[dict(smp2seg=sample2segment, exclude_fnames=exclude_fnames)],
raw_extras=[
dict(
smp2seg=sample2segment,
exclude_fnames=exclude_fnames,
segment_sizes=sizes_sorted,
seg_gap_dict=seg_gap_dict,
)
],
)

self.set_annotations(annotations)

def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
"""Read a chunk of raw data."""
from neo import Segment
from neo.io import NeuralynxIO

# quantities is a dependency of neo so we are guaranteed it exists
from quantities import Hz

nlx_reader = NeuralynxIO(
dirname=self._filenames[fi],
exclude_filename=self._raw_extras[0]["exclude_fnames"],
Expand All @@ -136,13 +273,7 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
[len(segment.analogsignals) for segment in neo_block[0].segments]
) == len(neo_block[0].segments)

# collect sizes of each segment
segment_sizes = np.array(
[
nlx_reader.get_signal_size(0, segment_id)
for segment_id in range(len(neo_block[0].segments))
]
)
segment_sizes = self._raw_extras[fi]["segment_sizes"]

# construct a (n_segments, 2) array of the first and last
# sample index for each segment relative to the start of the recording
Expand Down Expand Up @@ -188,15 +319,44 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
-1, 0
] # express stop sample relative to segment onset

# array containing Segments
segments_arr = np.array(neo_block[0].segments, dtype=object)

# if gaps were detected, correctly insert gap Segments in between valid Segments
gap_samples = self._raw_extras[fi]["seg_gap_dict"]["gap_n_samps"]
gap_segments = [Segment(f"gap-{i}") for i in range(len(gap_samples))]

# create AnalogSignal objects representing gap data filled with 0's
sfreq = nlx_reader.get_signal_sampling_rate()
n_chans = (
np.arange(idx.start, idx.stop, idx.step).size
if type(idx) is slice
else len(idx) # idx can be a slice or an np.array so check both
)

for seg, n in zip(gap_segments, gap_samples):
asig = AnalogSignalGap(
signal=np.zeros((n, n_chans)), units="uV", sampling_rate=sfreq * Hz
)
seg.analogsignals.append(asig)

n_total_segments = len(neo_block[0].segments + gap_segments)
segments_arr = np.zeros((n_total_segments,), dtype=object)

# insert inferred gap segments at the right place in between valid segments
isgap = self._raw_extras[0]["seg_gap_dict"]["isgap"]
segments_arr[~isgap] = neo_block[0].segments
segments_arr[isgap] = gap_segments

# now load data from selected segments/channels via
# neo.Segment.AnalogSignal.load()
# neo.Segment.AnalogSignal.load() or AnalogSignalGap.load()
all_data = np.concatenate(
[
signal.load(channel_indexes=idx).magnitude[
samples[0] : samples[-1] + 1, :
]
for seg, samples in zip(
neo_block[0].segments[first_seg : last_seg + 1], sel_samples_local
segments_arr[first_seg : last_seg + 1], sel_samples_local
)
for signal in seg.analogsignals
]
Expand Down
Loading

0 comments on commit 97512a1

Please sign in to comment.