Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: Replace very inefficient discrete _get_trial #403

Merged
merged 12 commits into from
Jan 18, 2023
7 changes: 6 additions & 1 deletion syncopy/datatype/discrete_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def sample(self):
"""Indices of all recorded samples"""
if self.data is None:
return None
# return self.data[:, self.dimord.index("sample")]
# there should be only one event per sample number?!
tensionhead marked this conversation as resolved.
Show resolved Hide resolved
return np.unique(self.data[:, self.dimord.index("sample")])
tensionhead marked this conversation as resolved.
Show resolved Hide resolved

@property
Expand Down Expand Up @@ -156,6 +158,9 @@ def trialid(self, trlid):
print("SyNCoPy core - trialid: Cannot assign `trialid` without data. " +
"Please assing data first")
return
if (self.data.shape[0] == 0) and (trlid.shape[0] == 0):
self._trialid = np.array(trlid, dtype=int)
tensionhead marked this conversation as resolved.
Show resolved Hide resolved
return
scount = np.nanmax(self.data[:, self.dimord.index("sample")])
try:
array_parser(trlid, varname="trialid", dims=(self.data.shape[0],),
Expand All @@ -174,7 +179,7 @@ def trialtime(self):

# Helper function that grabs a single trial
def _get_trial(self, trialno):
return self._data[self.trialid == trialno, :]
return self._data[self._trialslice[trialno], :]

# Helper function that spawns a `FauxTrial` object given actual trial information
def _preview_trial(self, trialno):
Expand Down
21 changes: 7 additions & 14 deletions syncopy/datatype/methods/definetrial.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,20 +335,13 @@ def definetrial(obj, trialdefinition=None, pre=None, post=None, start=None,

# Compute trial-IDs by matching data samples with provided trial-bounds
samples = tgt.data[:, tgt.dimord.index("sample")]
if np.size(samples) > 0:
starts = tgt.sampleinfo[:, 0]
ends = tgt.sampleinfo[:, 1]
startids = np.searchsorted(starts, samples, side="right")
endids = np.searchsorted(ends, samples, side="left")
mask = startids == endids
startids -= 1
# Samples not belonging into any trial get a trial-ID of -1
startids[mask] = int(startids.min() <= 0) * (-1)
tgt.trialid = startids
# no data - empty object, can happen due to a selection
else:
tgt.trialid = None
tgt._trialdefinition = None
idx = np.searchsorted(samples, tgt.sampleinfo.ravel())
idx = idx.reshape(tgt.sampleinfo.shape)

tgt._trialslice = [slice(st,end) for st,end in idx]
tgt.trialid = np.full((samples.shape), -1, dtype=int)
for itrl, itrl_slice in enumerate(tgt._trialslice):
KatharineShapcott marked this conversation as resolved.
Show resolved Hide resolved
tgt.trialid[itrl_slice] = itrl

# Write log entry
if ref == tgt:
Expand Down
63 changes: 0 additions & 63 deletions syncopy/tests/test_discretedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,69 +506,6 @@ def test_ed_trialsetting(self):
with pytest.raises(SPYValueError):
ang_dummy.definetrial(evt_dummy, pre=pre, post=post, trigger=1)

# test data-selection via class method
def test_ed_dataselection(self):

# Create testing objects (regular and swapped dimords)
dummy = EventData(data=np.hstack([self.data, self.data]),
dimord=self.customDimord,
trialdefinition=self.trl,
samplerate=2.0)
ymmud = EventData(data=np.hstack([self.data[:, ::-1], self.data[:, ::-1]]),
trialdefinition=self.trl,
samplerate=2.0,
dimord=dummy.dimord[::-1])

# selections are chosen so that result is not empty
trialSelections = [
"all", # enforce below selections in all trials of `dummy`
[3, 1] # minimally unordered
]

eventidSelections = [
[0, 0, 1], # preserve repetition, don't convert to slice
range(0, 2), # narrow range
]

latencySelections = [
[0.5, 2.5], # regular range
[0.7, 2.] # reduce range
]

timeSelections = list(zip(["latency"] * len(latencySelections), latencySelections))

trialSels = [random.choice(trialSelections)]
eventidSels = [random.choice(eventidSelections)]
timeSels = [random.choice(timeSelections)]

for obj in [dummy, ymmud]:
eventidIdx = obj.dimord.index("eventid")
for trialSel in trialSels:
for eventidSel in eventidSels:
for timeSel in timeSels:
kwdict = {}
kwdict["trials"] = trialSel
kwdict["eventid"] = eventidSel
kwdict[timeSel[0]] = timeSel[1]
cfg = StructDict(kwdict)
# data selection via class-method + `Selector` instance for indexing
selected = obj.selectdata(**kwdict)
obj.selectdata(**kwdict, inplace=True)
selector = obj.selection
tk = 0
for trialno in selector.trial_ids:
if selector.time[tk]:
assert np.array_equal(obj.trials[trialno][selector.time[tk], :],
selected.trials[tk])
tk += 1
assert np.array_equal(selected.eventid,
obj.eventid[np.unique(selected.data[:, eventidIdx]).astype(np.intp)])
cfg.data = obj
# data selection via package function and `cfg`: ensure equality
out = selectdata(cfg)
assert np.array_equal(out.eventid, selected.eventid)
assert np.array_equal(out.data, selected.data)

def test_ed_parallel(self, testcluster):
# repeat selected test w/parallel processing engine
client = dd.Client(testcluster)
Expand Down