From 533425bbd16afa3ff9d001acc58693af62f42e59 Mon Sep 17 00:00:00 2001 From: KatharineShapcott <65502584+KatharineShapcott@users.noreply.github.com> Date: Fri, 30 Dec 2022 10:13:26 +0100 Subject: [PATCH 01/10] FIX: Replace very inefficient discrete _get_trial On my data x20 speedup --- syncopy/datatype/discrete_data.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/syncopy/datatype/discrete_data.py b/syncopy/datatype/discrete_data.py index 376c0fbb5..83c88b98b 100644 --- a/syncopy/datatype/discrete_data.py +++ b/syncopy/datatype/discrete_data.py @@ -168,7 +168,8 @@ def trialid(self, trlid): def trials(self): """list-like([sample x (>=2)] :class:`numpy.ndarray`) : trial slices of :attr:`data` property""" if self.trialid is not None: - valid_trls = np.unique(self.trialid[self.trialid >= 0]) + valid_trls = np.unique(self.trialid) + valid_trls = valid_trls[valid_trls >= 0] return Indexer(map(self._get_trial, valid_trls), valid_trls.size) else: @@ -184,7 +185,12 @@ def trialtime(self): # Helper function that grabs a single trial def _get_trial(self, trialno): - return self._data[self.trialid == trialno, :] + this_trl = self.trialid == trialno + if not np.any(this_trl): + return self._data[None, :] + st = this_trl.argmax() + end = len(this_trl) - this_trl[st:][::-1].argmax() - 1 + return self._data[st:end, :][this_trl[st:end],:] # Helper function that spawns a `FauxTrial` object given actual trial information def _preview_trial(self, trialno): From 8c1db01e94c7f23c5abf5091c763498ed51c95cb Mon Sep 17 00:00:00 2001 From: KatharineShapcott <65502584+KatharineShapcott@users.noreply.github.com> Date: Fri, 30 Dec 2022 10:26:54 +0100 Subject: [PATCH 02/10] FIX: end of slice incremented by 1 --- syncopy/datatype/discrete_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncopy/datatype/discrete_data.py b/syncopy/datatype/discrete_data.py index 83c88b98b..65b600df5 100644 --- a/syncopy/datatype/discrete_data.py +++ b/syncopy/datatype/discrete_data.py @@ -189,7 +189,7 @@ def _get_trial(self, trialno): if not np.any(this_trl): return self._data[None, :] st = this_trl.argmax() - end = len(this_trl) - this_trl[st:][::-1].argmax() - 1 + end = len(this_trl) - this_trl[st:][::-1].argmax() return self._data[st:end, :][this_trl[st:end],:] # Helper function that spawns a `FauxTrial` object given actual trial information From f28eead144b5096805899daeb2c6118796135b87 Mon Sep 17 00:00:00 2001 From: KatharineShapcott <65502584+KatharineShapcott@users.noreply.github.com> Date: Mon, 2 Jan 2023 15:05:41 +0100 Subject: [PATCH 03/10] FIX: return empty array NOT all data --- syncopy/datatype/discrete_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncopy/datatype/discrete_data.py b/syncopy/datatype/discrete_data.py index 65b600df5..a8c6c4c7c 100644 --- a/syncopy/datatype/discrete_data.py +++ b/syncopy/datatype/discrete_data.py @@ -187,7 +187,7 @@ def trialtime(self): def _get_trial(self, trialno): this_trl = self.trialid == trialno if not np.any(this_trl): - return self._data[None, :] + return self._data[0:0, :] st = this_trl.argmax() end = len(this_trl) - this_trl[st:][::-1].argmax() return self._data[st:end, :][this_trl[st:end],:] From f2c280c37bfc3150d35c408a04886ef734e4f7a7 Mon Sep 17 00:00:00 2001 From: KatharineShapcott <65502584+KatharineShapcott@users.noreply.github.com> Date: Mon, 2 Jan 2023 17:22:05 +0100 Subject: [PATCH 04/10] CHG: remove unique from sample --- syncopy/datatype/discrete_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncopy/datatype/discrete_data.py b/syncopy/datatype/discrete_data.py index a8c6c4c7c..79afc29aa 100644 --- a/syncopy/datatype/discrete_data.py +++ b/syncopy/datatype/discrete_data.py @@ -115,7 +115,7 @@ def sample(self): """Indices of all recorded samples""" if self.data is None: return None - return np.unique(self.data[:, self.dimord.index("sample")]) + return self.data[:, self.dimord.index("sample")] @property def samplerate(self): From 1cd9b03df783ce0bbf24cebd96cab327ed4f6b9d Mon Sep 17 00:00:00 2001 From: KatharineShapcott <65502584+KatharineShapcott@users.noreply.github.com> Date: Wed, 4 Jan 2023 12:55:25 +0100 Subject: [PATCH 05/10] CHG: new _trialslice property for DiscreteData --- syncopy/datatype/methods/definetrial.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/syncopy/datatype/methods/definetrial.py b/syncopy/datatype/methods/definetrial.py index a2a23c27d..c268a46e1 100644 --- a/syncopy/datatype/methods/definetrial.py +++ b/syncopy/datatype/methods/definetrial.py @@ -336,18 +336,18 @@ def definetrial(obj, trialdefinition=None, pre=None, post=None, start=None, # Compute trial-IDs by matching data samples with provided trial-bounds samples = tgt.data[:, tgt.dimord.index("sample")] if np.size(samples) > 0: - starts = tgt.sampleinfo[:, 0] - ends = tgt.sampleinfo[:, 1] - startids = np.searchsorted(starts, samples, side="right") - endids = np.searchsorted(ends, samples, side="left") - mask = startids == endids - startids -= 1 - # Samples not belonging into any trial get a trial-ID of -1 - startids[mask] = int(startids.min() <= 0) * (-1) - tgt.trialid = startids + idx = np.searchsorted(samples, tgt.sampleinfo.ravel()) + idx = idx.reshape(tgt.sampleinfo.shape) + + tgt._trialslice = [slice(st,end) for st,end in idx] + tgt.trialid = np.full((samples.shape), -1, dtype=int) + for itrl, itrl_slice in enumerate(tgt._trialslice): + tgt.trialid[itrl_slice] = itrl + # no data - empty object, can happen due to a selection else: tgt.trialid = None + tgt._trialslice = None tgt._trialdefinition = None # Write log entry From 321c2d3c917351a77d4b822dd244a9fe39d972b2 Mon Sep 17 00:00:00 2001 From: KatharineShapcott <65502584+KatharineShapcott@users.noreply.github.com> Date: Wed, 4 Jan 2023 13:07:31 +0100 Subject: [PATCH 06/10] CHG: Update DiscreteData to use _trialslice Removed custom .trials property --- syncopy/datatype/discrete_data.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/syncopy/datatype/discrete_data.py b/syncopy/datatype/discrete_data.py index 79afc29aa..73e307363 100644 --- a/syncopy/datatype/discrete_data.py +++ b/syncopy/datatype/discrete_data.py @@ -164,17 +164,6 @@ def trialid(self, trlid): raise exc self._trialid = np.array(trlid, dtype=int) - @property - def trials(self): - """list-like([sample x (>=2)] :class:`numpy.ndarray`) : trial slices of :attr:`data` property""" - if self.trialid is not None: - valid_trls = np.unique(self.trialid) - valid_trls = valid_trls[valid_trls >= 0] - return Indexer(map(self._get_trial, valid_trls), - valid_trls.size) - else: - return None - @property def trialtime(self): """list(:class:`numpy.ndarray`): trigger-relative sample times in s""" @@ -185,12 +174,7 @@ def trialtime(self): # Helper function that grabs a single trial def _get_trial(self, trialno): - this_trl = self.trialid == trialno - if not np.any(this_trl): - return self._data[0:0, :] - st = this_trl.argmax() - end = len(this_trl) - this_trl[st:][::-1].argmax() - return self._data[st:end, :][this_trl[st:end],:] + return self._data[self._trialslice[trialno], :] # Helper function that spawns a `FauxTrial` object given actual trial information def _preview_trial(self, trialno): From ccbff5c7795a76ab8dcd48f12b2b0a9a34f85a84 Mon Sep 17 00:00:00 2001 From: Katharine Shapcott Date: Thu, 5 Jan 2023 17:54:59 +0100 Subject: [PATCH 07/10] FIX: no data returns empty array --- syncopy/datatype/discrete_data.py | 3 +++ syncopy/datatype/methods/definetrial.py | 21 +++++++-------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/syncopy/datatype/discrete_data.py b/syncopy/datatype/discrete_data.py index 73e307363..641241e2c 100644 --- a/syncopy/datatype/discrete_data.py +++ b/syncopy/datatype/discrete_data.py @@ -156,6 +156,9 @@ def trialid(self, trlid): print("SyNCoPy core - trialid: Cannot assign `trialid` without data. " + "Please assing data first") return + if (self.data.shape[0] == 0) and (trlid.shape[0] == 0): + self._trialid = np.array(trlid, dtype=int) + return scount = np.nanmax(self.data[:, self.dimord.index("sample")]) try: array_parser(trlid, varname="trialid", dims=(self.data.shape[0],), diff --git a/syncopy/datatype/methods/definetrial.py b/syncopy/datatype/methods/definetrial.py index c268a46e1..5ba449294 100644 --- a/syncopy/datatype/methods/definetrial.py +++ b/syncopy/datatype/methods/definetrial.py @@ -335,20 +335,13 @@ def definetrial(obj, trialdefinition=None, pre=None, post=None, start=None, # Compute trial-IDs by matching data samples with provided trial-bounds samples = tgt.data[:, tgt.dimord.index("sample")] - if np.size(samples) > 0: - idx = np.searchsorted(samples, tgt.sampleinfo.ravel()) - idx = idx.reshape(tgt.sampleinfo.shape) - - tgt._trialslice = [slice(st,end) for st,end in idx] - tgt.trialid = np.full((samples.shape), -1, dtype=int) - for itrl, itrl_slice in enumerate(tgt._trialslice): - tgt.trialid[itrl_slice] = itrl - - # no data - empty object, can happen due to a selection - else: - tgt.trialid = None - tgt._trialslice = None - tgt._trialdefinition = None + idx = np.searchsorted(samples, tgt.sampleinfo.ravel()) + idx = idx.reshape(tgt.sampleinfo.shape) + + tgt._trialslice = [slice(st,end) for st,end in idx] + tgt.trialid = np.full((samples.shape), -1, dtype=int) + for itrl, itrl_slice in enumerate(tgt._trialslice): + tgt.trialid[itrl_slice] = itrl # Write log entry if ref == tgt: From b730e92a83846a2822ced860461d84fc1e0e26be Mon Sep 17 00:00:00 2001 From: tensionhead Date: Thu, 5 Jan 2023 20:50:43 +0100 Subject: [PATCH 08/10] CHG: revert sample property Changes to be committed: modified: syncopy/datatype/discrete_data.py --- syncopy/datatype/discrete_data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/syncopy/datatype/discrete_data.py b/syncopy/datatype/discrete_data.py index 641241e2c..fc71a4871 100644 --- a/syncopy/datatype/discrete_data.py +++ b/syncopy/datatype/discrete_data.py @@ -115,7 +115,9 @@ def sample(self): """Indices of all recorded samples""" if self.data is None: return None - return self.data[:, self.dimord.index("sample")] + # return self.data[:, self.dimord.index("sample")] + # there should be only one event per sample number?! + return np.unique(self.data[:, self.dimord.index("sample")]) @property def samplerate(self): From ff23719da54d2aa2c06a4f8792e5c4aee9bce40f Mon Sep 17 00:00:00 2001 From: KatharineShapcott <65502584+KatharineShapcott@users.noreply.github.com> Date: Wed, 18 Jan 2023 07:48:32 +0100 Subject: [PATCH 09/10] FIX: Remove incorrect test --- syncopy/tests/test_discretedata.py | 63 ------------------------------ 1 file changed, 63 deletions(-) diff --git a/syncopy/tests/test_discretedata.py b/syncopy/tests/test_discretedata.py index 9a668f464..9a32eee6b 100644 --- a/syncopy/tests/test_discretedata.py +++ b/syncopy/tests/test_discretedata.py @@ -506,69 +506,6 @@ def test_ed_trialsetting(self): with pytest.raises(SPYValueError): ang_dummy.definetrial(evt_dummy, pre=pre, post=post, trigger=1) - # test data-selection via class method - def test_ed_dataselection(self): - - # Create testing objects (regular and swapped dimords) - dummy = EventData(data=np.hstack([self.data, self.data]), - dimord=self.customDimord, - trialdefinition=self.trl, - samplerate=2.0) - ymmud = EventData(data=np.hstack([self.data[:, ::-1], self.data[:, ::-1]]), - trialdefinition=self.trl, - samplerate=2.0, - dimord=dummy.dimord[::-1]) - - # selections are chosen so that result is not empty - trialSelections = [ - "all", # enforce below selections in all trials of `dummy` - [3, 1] # minimally unordered - ] - - eventidSelections = [ - [0, 0, 1], # preserve repetition, don't convert to slice - range(0, 2), # narrow range - ] - - latencySelections = [ - [0.5, 2.5], # regular range - [0.7, 2.] # reduce range - ] - - timeSelections = list(zip(["latency"] * len(latencySelections), latencySelections)) - - trialSels = [random.choice(trialSelections)] - eventidSels = [random.choice(eventidSelections)] - timeSels = [random.choice(timeSelections)] - - for obj in [dummy, ymmud]: - eventidIdx = obj.dimord.index("eventid") - for trialSel in trialSels: - for eventidSel in eventidSels: - for timeSel in timeSels: - kwdict = {} - kwdict["trials"] = trialSel - kwdict["eventid"] = eventidSel - kwdict[timeSel[0]] = timeSel[1] - cfg = StructDict(kwdict) - # data selection via class-method + `Selector` instance for indexing - selected = obj.selectdata(**kwdict) - obj.selectdata(**kwdict, inplace=True) - selector = obj.selection - tk = 0 - for trialno in selector.trial_ids: - if selector.time[tk]: - assert np.array_equal(obj.trials[trialno][selector.time[tk], :], - selected.trials[tk]) - tk += 1 - assert np.array_equal(selected.eventid, - obj.eventid[np.unique(selected.data[:, eventidIdx]).astype(np.intp)]) - cfg.data = obj - # data selection via package function and `cfg`: ensure equality - out = selectdata(cfg) - assert np.array_equal(out.eventid, selected.eventid) - assert np.array_equal(out.data, selected.data) - def test_ed_parallel(self, testcluster): # repeat selected test w/parallel processing engine client = dd.Client(testcluster) From 6d6dce73d793525ffa1b8a98b5040a53ba95a406 Mon Sep 17 00:00:00 2001 From: KatharineShapcott <65502584+KatharineShapcott@users.noreply.github.com> Date: Wed, 18 Jan 2023 11:30:33 +0100 Subject: [PATCH 10/10] FIX: Remove unique from sample --- syncopy/datatype/discrete_data.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/syncopy/datatype/discrete_data.py b/syncopy/datatype/discrete_data.py index 78162a958..9c8e4b53b 100644 --- a/syncopy/datatype/discrete_data.py +++ b/syncopy/datatype/discrete_data.py @@ -115,9 +115,7 @@ def sample(self): """Indices of all recorded samples""" if self.data is None: return None - # return self.data[:, self.dimord.index("sample")] - # there should be only one event per sample number?! - return np.unique(self.data[:, self.dimord.index("sample")]) + return self.data[:, self.dimord.index("sample")] @property def samplerate(self):