BUG: handle temporal discontinuities in Neuralynx .ncs files (mne-t…

…ools#12279) Co-authored-by: Eric Larson <[email protected]>
larsoner · Dec 20, 2023 · 97512a1 · 97512a1
1 parent 0f59894
commit 97512a1
Show file tree

Hide file tree

Showing 7 changed files with 289 additions and 42 deletions.
diff --git a/doc/changes/devel.rst b/doc/changes/devel.rst
@@ -49,6 +49,7 @@ Bugs
 - Remove incorrect type hints in :func:`mne.io.read_raw_neuralynx` (:gh:`12236` by `Richard Höchenberger`_)
 - Fix bug where parent directory existence was not checked properly in :meth:`mne.io.Raw.save` (:gh:`12282` by `Eric Larson`_)
 - ``defusedxml`` is now an optional (rather than required) dependency and needed when reading EGI-MFF data, NEDF data, and BrainVision montages (:gh:`12264` by `Eric Larson`_)
+- Correctly handle temporal gaps in Neuralynx .ncs files via :func:`mne.io.read_raw_neuralynx` (:gh:`12279` by `Kristijan Armeni`_ and `Eric Larson`_)
 
 API changes
 ~~~~~~~~~~~

diff --git a/environment.yml b/environment.yml
@@ -61,3 +61,4 @@ dependencies:
   - mamba
   - lazy_loader
   - defusedxml
+  - python-neo
diff --git a/mne/datasets/config.py b/mne/datasets/config.py
@@ -88,7 +88,7 @@
 # respective repos, and make a new release of the dataset on GitHub. Then
 # update the checksum in the MNE_DATASETS dict below, and change version
 # here:                  ↓↓↓↓↓         ↓↓↓
-RELEASES = dict(testing="0.150", misc="0.27")
+RELEASES = dict(testing="0.151", misc="0.27")
 TESTING_VERSIONED = f'mne-testing-data-{RELEASES["testing"]}'
 MISC_VERSIONED = f'mne-misc-data-{RELEASES["misc"]}'
 
@@ -112,7 +112,7 @@
 # Testing and misc are at the top as they're updated most often
 MNE_DATASETS["testing"] = dict(
     archive_name=f"{TESTING_VERSIONED}.tar.gz",
-    hash="md5:0b7452daef4d19132505b5639d695628",
+    hash="md5:5832b4d44f0423d22305fa61cb75bc25",
     url=(
         "https://codeload.github.com/mne-tools/mne-testing-data/"
         f'tar.gz/{RELEASES["testing"]}'

diff --git a/mne/io/neuralynx/neuralynx.py b/mne/io/neuralynx/neuralynx.py
@@ -7,10 +7,51 @@
 
 from ..._fiff.meas_info import create_info
 from ..._fiff.utils import _mult_cal_one
+from ...annotations import Annotations
 from ...utils import _check_fname, _soft_import, fill_doc, logger, verbose
 from ..base import BaseRaw
 
 
+class AnalogSignalGap(object):
+    """Dummy object to represent gaps in Neuralynx data.
+
+    Creates a AnalogSignalProxy-like object.
+    Propagate `signal`, `units`, and `sampling_rate` attributes
+    to the `AnalogSignal` init returned by `load()`.
+
+    Parameters
+    ----------
+    signal : array-like
+        Array of shape (n_channels, n_samples) containing the data.
+    units : str
+        Units of the data. (e.g., 'uV')
+    sampling_rate : quantity
+        Sampling rate of the data. (e.g., 4000 * pq.Hz)
+
+    Returns
+    -------
+    sig : instance of AnalogSignal
+        A AnalogSignal object representing a gap in Neuralynx data.
+    """
+
+    def __init__(self, signal, units, sampling_rate):
+        self.signal = signal
+        self.units = units
+        self.sampling_rate = sampling_rate
+
+    def load(self, channel_indexes):
+        """Return AnalogSignal object."""
+        _soft_import("neo", "Reading NeuralynxIO files", strict=True)
+        from neo import AnalogSignal
+
+        sig = AnalogSignal(
+            signal=self.signal[:, channel_indexes],
+            units=self.units,
+            sampling_rate=self.sampling_rate,
+        )
+        return sig
+
+
 @fill_doc
 def read_raw_neuralynx(
     fname, *, preload=False, exclude_fname_patterns=None, verbose=None
@@ -59,11 +100,11 @@ def __init__(
         exclude_fname_patterns=None,
         verbose=None,
     ):
+        fname = _check_fname(fname, "read", True, "fname", need_dir=True)
+
         _soft_import("neo", "Reading NeuralynxIO files", strict=True)
         from neo.io import NeuralynxIO
 
-        fname = _check_fname(fname, "read", True, "fname", need_dir=True)
-
         logger.info(f"Checking files in {fname}")
 
         # construct a list of filenames to ignore
@@ -81,12 +122,18 @@ def __init__(
         try:
             nlx_reader = NeuralynxIO(dirname=fname, exclude_filename=exclude_fnames)
         except ValueError as e:
-            raise ValueError(
-                "It seems some .ncs channels might have different number of samples. "
-                + "This is likely due to different sampling rates. "
-                + "Try excluding them with `exclude_fname_patterns` input arg."
-                + f"\nOriginal neo.NeuralynxIO.parse_header() ValueError:\n{e}"
-            )
+            # give a more informative error message and what the user can do about it
+            if "Incompatible section structures across streams" in str(e):
+                raise ValueError(
+                    "It seems .ncs channels have different numbers of samples. "
+                    + "This is likely due to different sampling rates. "
+                    + "Try reading in only channels with uniform sampling rate "
+                    + "by excluding other channels with `exclude_fname_patterns` "
+                    + "input argument."
+                    + f"\nOriginal neo.NeuralynxRawIO ValueError:\n{e}"
+                ) from None
+            else:
+                raise
 
         info = create_info(
             ch_types="seeg",
@@ -98,32 +145,122 @@ def __init__(
         # the sample sizes of all segments
         n_segments = nlx_reader.header["nb_segment"][0]
         block_id = 0  # assumes there's only one block of recording
-        n_total_samples = sum(
-            nlx_reader.get_signal_size(block_id, segment)
-            for segment in range(n_segments)
+
+        # get segment start/stop times
+        start_times = np.array(
+            [nlx_reader.segment_t_start(block_id, i) for i in range(n_segments)]
+        )
+        stop_times = np.array(
+            [nlx_reader.segment_t_stop(block_id, i) for i in range(n_segments)]
         )
 
-        # construct an array of shape (n_total_samples,) indicating
-        # segment membership for each sample
-        sample2segment = np.concatenate(
+        # find discontinuous boundaries (of length n-1)
+        next_start_times = start_times[1::]
+        previous_stop_times = stop_times[:-1]
+        seg_diffs = next_start_times - previous_stop_times
+
+        # mark as discontinuous any two segments that have
+        # start/stop delta larger than sampling period (1/sampling_rate)
+        logger.info("Checking for temporal discontinuities in Neo data segments.")
+        delta = 1.5 / info["sfreq"]
+        gaps = seg_diffs > delta
+
+        seg_gap_dict = {}
+
+        logger.info(
+            f"N = {gaps.sum()} discontinuous Neo segments detected "
+            + f"with delta > {delta} sec. "
+            + "Annotating gaps as BAD_ACQ_SKIP."
+            if gaps.any()
+            else "No discontinuities detected."
+        )
+
+        gap_starts = stop_times[:-1][gaps]  # gap starts at segment offset
+        gap_stops = start_times[1::][gaps]  # gap stops at segment onset
+
+        # (n_gaps,) array of ints giving number of samples per inferred gap
+        gap_n_samps = np.array(
+            [
+                int(round(stop * info["sfreq"])) - int(round(start * info["sfreq"]))
+                for start, stop in zip(gap_starts, gap_stops)
+            ]
+        ).astype(int)  # force an int array (if no gaps, empty array is a float)
+
+        # get sort indices for all segments (valid and gap) in ascending order
+        all_starts_ids = np.argsort(np.concatenate([start_times, gap_starts]))
+
+        # variable indicating whether each segment is a gap or not
+        gap_indicator = np.concatenate(
             [
-                np.full(shape=(nlx_reader.get_signal_size(block_id, i),), fill_value=i)
-                for i in range(n_segments)
+                np.full(len(start_times), fill_value=0),
+                np.full(len(gap_starts), fill_value=1),
             ]
         )
+        gap_indicator = gap_indicator[all_starts_ids].astype(bool)
+
+        # store this in a dict to be passed to _raw_extras
+        seg_gap_dict = {
+            "gap_n_samps": gap_n_samps,
+            "isgap": gap_indicator,  # False (data segment) or True (gap segment)
+        }
+
+        valid_segment_sizes = [
+            nlx_reader.get_signal_size(block_id, i) for i in range(n_segments)
+        ]
+
+        sizes_sorted = np.concatenate([valid_segment_sizes, gap_n_samps])[
+            all_starts_ids
+        ]
+
+        # now construct an (n_samples,) indicator variable
+        sample2segment = np.concatenate(
+            [np.full(shape=(n,), fill_value=i) for i, n in enumerate(sizes_sorted)]
+        )
+
+        # construct Annotations()
+        gap_seg_ids = np.unique(sample2segment)[gap_indicator]
+        gap_start_ids = np.array(
+            [np.where(sample2segment == seg_id)[0][0] for seg_id in gap_seg_ids]
+        )
+
+        # recreate time axis for gap annotations
+        mne_times = np.arange(0, len(sample2segment)) / info["sfreq"]
+
+        assert len(gap_start_ids) == len(gap_n_samps)
+        annotations = Annotations(
+            onset=[mne_times[onset_id] for onset_id in gap_start_ids],
+            duration=[
+                mne_times[onset_id + (n - 1)] - mne_times[onset_id]
+                for onset_id, n in zip(gap_start_ids, gap_n_samps)
+            ],
+            description=["BAD_ACQ_SKIP"] * len(gap_start_ids),
+        )
 
         super(RawNeuralynx, self).__init__(
             info=info,
-            last_samps=[n_total_samples - 1],
+            last_samps=[sizes_sorted.sum() - 1],
             filenames=[fname],
             preload=preload,
-            raw_extras=[dict(smp2seg=sample2segment, exclude_fnames=exclude_fnames)],
+            raw_extras=[
+                dict(
+                    smp2seg=sample2segment,
+                    exclude_fnames=exclude_fnames,
+                    segment_sizes=sizes_sorted,
+                    seg_gap_dict=seg_gap_dict,
+                )
+            ],
         )
 
+        self.set_annotations(annotations)
+
     def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
         """Read a chunk of raw data."""
+        from neo import Segment
         from neo.io import NeuralynxIO
 
+        # quantities is a dependency of neo so we are guaranteed it exists
+        from quantities import Hz
+
         nlx_reader = NeuralynxIO(
             dirname=self._filenames[fi],
             exclude_filename=self._raw_extras[0]["exclude_fnames"],
@@ -136,13 +273,7 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
             [len(segment.analogsignals) for segment in neo_block[0].segments]
         ) == len(neo_block[0].segments)
 
-        # collect sizes of each segment
-        segment_sizes = np.array(
-            [
-                nlx_reader.get_signal_size(0, segment_id)
-                for segment_id in range(len(neo_block[0].segments))
-            ]
-        )
+        segment_sizes = self._raw_extras[fi]["segment_sizes"]
 
         # construct a (n_segments, 2) array of the first and last
         # sample index for each segment relative to the start of the recording
@@ -188,15 +319,44 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
             -1, 0
         ]  # express stop sample relative to segment onset
 
+        # array containing Segments
+        segments_arr = np.array(neo_block[0].segments, dtype=object)
+
+        # if gaps were detected, correctly insert gap Segments in between valid Segments
+        gap_samples = self._raw_extras[fi]["seg_gap_dict"]["gap_n_samps"]
+        gap_segments = [Segment(f"gap-{i}") for i in range(len(gap_samples))]
+
+        # create AnalogSignal objects representing gap data filled with 0's
+        sfreq = nlx_reader.get_signal_sampling_rate()
+        n_chans = (
+            np.arange(idx.start, idx.stop, idx.step).size
+            if type(idx) is slice
+            else len(idx)  # idx can be a slice or an np.array so check both
+        )
+
+        for seg, n in zip(gap_segments, gap_samples):
+            asig = AnalogSignalGap(
+                signal=np.zeros((n, n_chans)), units="uV", sampling_rate=sfreq * Hz
+            )
+            seg.analogsignals.append(asig)
+
+        n_total_segments = len(neo_block[0].segments + gap_segments)
+        segments_arr = np.zeros((n_total_segments,), dtype=object)
+
+        # insert inferred gap segments at the right place in between valid segments
+        isgap = self._raw_extras[0]["seg_gap_dict"]["isgap"]
+        segments_arr[~isgap] = neo_block[0].segments
+        segments_arr[isgap] = gap_segments
+
         # now load data from selected segments/channels via
-        # neo.Segment.AnalogSignal.load()
+        # neo.Segment.AnalogSignal.load() or AnalogSignalGap.load()
         all_data = np.concatenate(
             [
                 signal.load(channel_indexes=idx).magnitude[
                     samples[0] : samples[-1] + 1, :
                 ]
                 for seg, samples in zip(
-                    neo_block[0].segments[first_seg : last_seg + 1], sel_samples_local
+                    segments_arr[first_seg : last_seg + 1], sel_samples_local
                 )
                 for signal in seg.analogsignals
             ]