diff --git a/elephant/__init__.py b/elephant/__init__.py index d64a3bee8..15be84997 100644 --- a/elephant/__init__.py +++ b/elephant/__init__.py @@ -22,13 +22,13 @@ sta, conversion, neo_tools, - spade, cell_assembly_detection, waveform_features) try: from . import pandas_bridge from . import asset + from . import spade except ImportError: # requirements-extras are missing pass diff --git a/elephant/spade.py b/elephant/spade.py index 2d4e3e7b9..1f8dab36c 100644 --- a/elephant/spade.py +++ b/elephant/spade.py @@ -13,63 +13,64 @@ If the fim.so module is not present in the correct location or cannot be imported (only available for linux OS) SPADE will make use of a python implementation of the fast fca algorithm contained in -elephant/spade_src/fast_fca.py, which is about 10 times slower. +`elephant/spade_src/fast_fca.py`, which is about 10 times slower. +Examples +-------- +>>> from elephant.spade import spade +>>> import elephant.spike_train_generation +>>> import quantities as pq -import elephant.spade -import elephant.spike_train_generation -import quantities as pq +Generate correlated spiketrains. -# Generate correlated data -sts = elephant.spike_train_generation.cpp( - rate=5*pq.Hz, A=[0]+[0.99]+[0]*9+[0.01], t_stop=10*pq.s) - -# Mining patterns with SPADE using a binsize of 1 ms and a window length of 1 -# bin (i.e., detecting only synchronous patterns). -patterns = spade.spade( - data=sts, binsize=1*pq.ms, winlen=1, dither=5*pq.ms, - min_spikes=10, n_surr=10, psr_param=[0,0,3], - output_format='patterns')['patterns'][0] - -# Plotting -plt.figure() -for neu in patterns['neurons']: - if neu == 0: - plt.plot( - patterns['times'], [neu]*len(patterns['times']), 'ro', - label='pattern') - else: - plt.plot( - patterns['times'], [neu] * len(patterns['times']), 'ro') -# Raster plot of the data -for st_idx, st in enumerate(sts): - if st_idx == 0: - plt.plot(st.rescale(pq.ms), [st_idx] * len(st), 'k.', label='spikes') - else: - plt.plot(st.rescale(pq.ms), [st_idx] * len(st), 'k.') -plt.ylim([-1, len(sts)]) -plt.xlabel('time (ms)') -plt.ylabel('neurons ids') -plt.legend() -plt.show() +>>> spiketrains = elephant.spike_train_generation.cpp( +... rate=5*pq.Hz, A=[0]+[0.99]+[0]*9+[0.01], t_stop=10*pq.s) + +Mining patterns with SPADE using a `binsize` of 1 ms and a window length of 1 +bin (i.e., detecting only synchronous patterns). + +>>> patterns = spade( +... spiketrains=spiketrains, binsize=1*pq.ms, winlen=1, dither=5*pq.ms, +... min_spikes=10, n_surr=10, psr_param=[0,0,3], +... output_format='patterns')['patterns'][0] + +>>> import matplotlib.pyplot as plt +>>> for neu in patterns['neurons']: +... label = 'pattern' if neu == 0 else None +... plt.plot(patterns['times'], [neu]*len(patterns['times']), 'ro', +... label=label) + +Raster plot of the spiketrains. + +>>> for st_idx, spiketrain in enumerate(spiketrains): +... label = 'pattern' if st_idx == 0 else None +... plt.plot(spiketrain.rescale(pq.ms), [st_idx] * len(spiketrain), +... 'k.', label=label) +>>> plt.ylim([-1, len(spiketrains)]) +>>> plt.xlabel('time (ms)') +>>> plt.ylabel('neurons ids') +>>> plt.legend() +>>> plt.show() :copyright: Copyright 2017 by the Elephant team, see `doc/authors.rst`. :license: BSD, see LICENSE.txt for details. """ +from __future__ import division, print_function, unicode_literals + +import operator import time import warnings -import operator -from itertools import chain, combinations -from functools import reduce from collections import defaultdict +from functools import reduce +from itertools import chain, combinations -import numpy as np import neo +import numpy as np import quantities as pq from scipy import sparse -import elephant.spike_train_surrogates as surr import elephant.conversion as conv +import elephant.spike_train_surrogates as surr from elephant.spade_src import fast_fca warnings.simplefilter('once', UserWarning) @@ -89,193 +90,164 @@ HAVE_FIM = False -def spade(data, binsize, winlen, min_spikes=2, min_occ=2, max_spikes=None, - max_occ=None, min_neu=1, n_subsets=0, delta=0, epsilon=0, - stability_thresh=None, n_surr=0, dither=15 * pq.ms, spectrum='#', - alpha=1, stat_corr='fdr', psr_param=None, output_format='concepts'): +def spade(spiketrains, binsize, winlen, min_spikes=2, min_occ=2, + max_spikes=None, max_occ=None, min_neu=1, approx_stab_pars=None, + n_surr=0, dither=15 * pq.ms, spectrum='#', + alpha=None, stat_corr='fdr_bh', surr_method='dither_spikes', + psr_param=None, output_format='patterns'): r""" - Perform the SPADE [1,2] analysis for the parallel spike trains given in the - input. The data are discretized with a temporal resolution equal binsize - in a sliding window of winlen*binsize milliseconds. - - First, spike patterns are mined from the data using a technique termed - frequent itemset mining (FIM) or formal concept analysis (FCA). In this - framework, a particular spatio-temporal spike pattern is termed a - "concept". It is then possible to compute the stability and the signature - significance of all pattern candidates. In a final step, it is possible to - select a stability threshold and the significance level to select only - stable/significant concepts. + Perform the SPADE [1-3] analysis for the parallel input `spiketrains`. + They are discretized with a temporal resolution equal to + `binsize` in a sliding window of `winlen*binsize`. + + First, spike patterns are mined from the `spiketrains` using a technique + called frequent itemset mining (FIM) or formal concept analysis (FCA). In + this framework, a particular spatio-temporal spike pattern is called a + "concept". It is then possible to compute the stability and the p-value + of all pattern candidates. In a final step, concepts are filtered + according to a stability threshold and a significance level `alpha`. Parameters ---------- - data: list of neo.SpikeTrains + spiketrains: list of neo.SpikeTrain List containing the parallel spike trains to analyze - binsize: Quantity - The time precision used to discretize the data (binning). - winlen: int (positive) + binsize: pq.Quantity + The time precision used to discretize the spiketrains (binning). + winlen: int The size (number of bins) of the sliding window used for the analysis. The maximal length of a pattern (delay between first and last spike) is then given by winlen*binsize - min_spikes: int (positive) + min_spikes: int, optional Minimum number of spikes of a sequence to be considered a pattern. Default: 2 - min_occ: int (positive) + min_occ: int, optional Minimum number of occurrences of a sequence to be considered as a pattern. Default: 2 - max_spikes: int (positive) + max_spikes: int, optional Maximum number of spikes of a sequence to be considered a pattern. If None no maximal number of spikes is considered. Default: None - max_occ: int (positive) + max_occ: int, optional Maximum number of occurrences of a sequence to be considered as a pattern. If None, no maximal number of occurrences is considered. Default: None - min_neu: int (positive) + min_neu: int, optional Minimum number of neurons in a sequence to considered a pattern. Default: 1 - n_subsets: int - Number of subsets of a concept used to approximate its stability. If - n_subset is set to 0 the stability is not computed. If, however, - for parameters delta and epsilon (see below) delta + epsilon == 0, - then an optimal n_subsets is calculated according to the formula given - in Babin, Kuznetsov (2012), proposition 6: - - ..math:: - n_subset = frac{1}{2\eps^2} \ln(frac{2}{\delta}) +1 - - Default:0 - delta: float - delta: probability with at least ..math:$1-\delta$ - Default: 0 - epsilon: float - epsilon: absolute error - Default: 0 - stability_thresh: None or list of float - List containing the stability thresholds used to filter the concepts. - If stab_thr is None, then the concepts are not filtered. Otherwise, - only concepts with intensional stability > stab_thr[0] or extensional - stability > stab_thr[1] are returned and used for further analysis - within SPADE. - Default: None - n_surr: int + approx_stab_pars: dict or None, optional + Parameter values for approximate stability computation. + + 'n_subsets': int + Number of subsets of a concept used to approximate its stability. + If `n_subsets` is 0, it is calculated according to to the formula + given in Babin, Kuznetsov (2012), proposition 6: + + .. math:: + n_{\text{subset}} = \frac{1}{2 \cdot \epsilon^2} + \ln{\left( \frac{2}{\delta} \right)} +1 + + 'delta': float, optional + delta: probability with at least :math:`1-\delta` + 'epsilon': float, optional + epsilon: absolute error + 'stability_thresh': None or list of float + List containing the stability thresholds used to filter the + concepts. + If `stability_thresh` is None, then the concepts are not filtered. + Otherwise, only concepts with + + intensional stability is greater than `stability_thresh[0]` or + + extensional stability is greater than `stability_thresh[1]` + + are further analyzed. + n_surr: int, optional Number of surrogates to generate to compute the p-value spectrum. - This number should be large (n_surr>=1000 is recommended for 100 - spike trains in *sts*). If n_surr is 0, then the p-value spectrum is - not computed. + This number should be large (`n_surr >= 1000` is recommended for 100 + spike trains in `spiketrains`). If `n_surr == 0`, then the p-value + spectrum is not computed. Default: 0 - dither: Quantity + dither: pq.Quantity, optional Amount of spike time dithering for creating the surrogates for - filtering the pattern spectrum. A spike at time t is placed randomly - within ]t-dither, t+dither[ (see also - elephant.spike_train_surrogates.dither_spikes). + filtering the pattern spectrum. A spike at time `t` is placed randomly + within `[t-dither, t+dither]` (see also + :func:`elephant.spike_train_surrogates.dither_spikes`). Default: 15*pq.ms - spectrum: str - Define the signature of the patterns, it can assume values: + spectrum: {'#', '3d#'}, optional + Define the signature of the patterns. + '#': pattern spectrum using the as signature the pair: (number of spikes, number of occurrences) '3d#': pattern spectrum using the as signature the triplets: (number of spikes, number of occurrence, difference between last and first spike of the pattern) Default: '#' - alpha: float - The significance level of the hypothesis tests performed. If alpha=1 - all the concepts are returned. If 00: - (spikes in the pattern, occurrences of the patterns, - (intensional stability, extensional stability)) - corresponding pvalue - - The patterns are filtered depending on the parameters in input: - If stability_thresh==None and alpha==1: - output['patterns'] contains all the candidates patterns - (all concepts mined with the fca algorithm) - If stability_thresh!=None and alpha==1: - output contains only patterns candidates with: - intensional stability>stability_thresh[0] or - extensional stability>stability_thresh[1] - If stability_thresh==None and alpha!=1: - output contains only pattern candidates with a signature - significant in respect the significance level alpha corrected - If stability_thresh!=None and alpha!=1: - output['patterns'] contains only pattern candidates with a - signature significant in respect the significance level alpha - corrected and such that: - intensional stability>stability_thresh[0] or - extensional stability>stability_thresh[1] - In addition, output['non_sgnf_sgnt'] contains the list of - non-significant signature for the significance level alpha. - If n_surr>0: - output['pvalue_spectrum'] contains a tuple of signatures and - the corresponding p-value. - - If output_format is 'patterns': - output: list - List of dictionaries. Each dictionary corresponds to a patterns and - has the following keys: - neurons: array containing the indices of the neurons of the - pattern. - lags: array containing the lags (integers corresponding to the - number of bins) between the spikes of the patterns. The - first lag is always assumed to be 0 and corresponds to the - first spike ['times'] array containing the times. - (integers corresponding to the bin idx) of the occurrences of the - patterns - signature: tuple containing two integers: - (number of spikes of the patterns, - number of occurrences of the pattern) - pvalue: the p-value corresponding to the pattern. If n_surr==0 the - p-values are set to 0.0. + output : dict + 'patterns': + If `output_format` is 'patterns', see the return of + :func:`concept_output_to_patterns` - Notes - ----- - If detected, this function will utilize MPI to parallelize the analysis. + If `output_format` is 'concepts', then `output['patterns']` is a + tuple of patterns which in turn are tuples of + 1. spikes in the pattern - Example - ------- - The following applies SPADE to a list of spike trains in data. These calls - do not include the statistical testing (for details see the documentation - of spade.spade()) + 2. occurrences of the pattern - >>> import elephant.spade - >>> import quantities as pq - >>> binsize = 3 * pq.ms # time resolution used to discretize the data - >>> winlen = 10 # maximal pattern length in bins (i.e., sliding window) - >>> result_spade = spade.spade(data, binsize, winlen) + For details see :func:`concepts_mining`. + + if stability is calculated, there are also: + + 3. intensional stability + + 4. extensional stability + + For details see :func:`approximate_stability`. + + 'pvalue_spectrum' (only if `n_surr > 0`): + A list of signatures in tuples format: + * size + + * number of occurrences + + * duration (only if `spectrum=='3d#'`) + + * p-value + + 'non_sgnf_sgnt': list + Non significant signatures of 'pvalue_spectrum'. + + Notes + ----- + If detected, this function will use MPI to parallelize the analysis. References ---------- @@ -285,7 +257,22 @@ def spade(data, binsize, winlen, min_spikes=2, min_occ=2, max_spikes=None, [2] Quaglio, P., Yegenoglu, A., Torre, E., Endres, D. M., & Gruen, S.(2017) Detection and Evaluation of Spatio-Temporal Spike Patterns in Massively Parallel Spike Train Data with SPADE. - Frontiers in Computational Neuroscience, 11. + Frontiers in Computational Neuroscience, 11. + [3] Stella, A., Quaglio, P., Torre, E., & Gruen, S. (2019). + 3d-SPADE: Significance evaluation of spatio-temporal patterns of various + temporal extents. Biosystems, 185, 104022. + + Examples + -------- + The following example applies SPADE to `spiketrains` (list of + `neo.SpikeTrain`). + + >>> from elephant.spade import spade + >>> import quantities as pq + >>> binsize = 3 * pq.ms # time resolution to discretize the spiketrains + >>> winlen = 10 # maximal pattern length in bins (i.e., sliding window) + >>> result_spade = spade(spiketrains, binsize, winlen) + """ if HAVE_MPI: # pragma: no cover comm = MPI.COMM_WORLD # create MPI communicator @@ -293,38 +280,41 @@ def spade(data, binsize, winlen, min_spikes=2, min_occ=2, max_spikes=None, else: rank = 0 - if output_format not in ['concepts', 'patterns']: - raise ValueError("The output_format value has to be" - "'patterns' or 'concepts'") + compute_stability = _check_input( + spiketrains=spiketrains, binsize=binsize, winlen=winlen, + min_spikes=min_spikes, min_occ=min_occ, + max_spikes=max_spikes, max_occ=max_occ, min_neu=min_neu, + approx_stab_pars=approx_stab_pars, + n_surr=n_surr, dither=dither, spectrum=spectrum, + alpha=alpha, stat_corr=stat_corr, surr_method=surr_method, + psr_param=psr_param, output_format=output_format) time_mining = time.time() - if rank == 0 or n_subsets > 0: - # Mine the data for extraction of concepts - concepts, rel_matrix = concepts_mining(data, binsize, winlen, - min_spikes=min_spikes, - min_occ=min_occ, - max_spikes=max_spikes, - max_occ=max_occ, - min_neu=min_neu, - report='a') + if rank == 0 or compute_stability: + # Mine the spiketrains for extraction of concepts + concepts, rel_matrix = concepts_mining( + spiketrains, binsize, winlen, min_spikes=min_spikes, + min_occ=min_occ, max_spikes=max_spikes, max_occ=max_occ, + min_neu=min_neu, report='a') time_mining = time.time() - time_mining print("Time for data mining: {}".format(time_mining)) # Decide if compute the approximated stability - if n_subsets > 0: + if compute_stability: + if 'stability_thresh' in approx_stab_pars.keys(): + stability_thresh = approx_stab_pars.pop('stability_thresh') + else: + stability_thresh = None # Computing the approximated stability of all the concepts time_stability = time.time() - concepts = approximate_stability(concepts, rel_matrix, n_subsets, - delta=delta, epsilon=epsilon) + concepts = approximate_stability( + concepts, rel_matrix, **approx_stab_pars) time_stability = time.time() - time_stability print("Time for stability computation: {}".format(time_stability)) # Filtering the concepts using stability thresholds if stability_thresh is not None: - concepts = list(filter( - lambda c: _stability_filter(c, stability_thresh), concepts)) - elif stability_thresh is not None: - warnings.warn('Stability_thresh not None but stability has not been ' - 'computed (n_subsets==0)') + concepts = [concept for concept in concepts + if _stability_filter(concept, stability_thresh)] output = {} pv_spec = None # initialize pv_spec to None @@ -332,19 +322,16 @@ def spade(data, binsize, winlen, min_spikes=2, min_occ=2, max_spikes=None, if n_surr > 0: # Compute pvalue spectrum time_pvalue_spectrum = time.time() - pv_spec = pvalue_spectrum(data, binsize, winlen, dither=dither, - n_surr=n_surr, min_spikes=min_spikes, - min_occ=min_occ, max_spikes=max_spikes, - max_occ=max_occ, min_neu=min_neu, - spectrum=spectrum) + pv_spec = pvalue_spectrum( + spiketrains, binsize, winlen, dither=dither, n_surr=n_surr, + min_spikes=min_spikes, min_occ=min_occ, max_spikes=max_spikes, + max_occ=max_occ, min_neu=min_neu, spectrum=spectrum, + surr_method=surr_method) time_pvalue_spectrum = time.time() - time_pvalue_spectrum print("Time for pvalue spectrum computation: {}".format( time_pvalue_spectrum)) # Storing pvalue spectrum output['pvalue_spectrum'] = pv_spec - elif 0 < alpha < 1: - warnings.warn('0 0: - if len(pv_spec) > 0: + if len(pv_spec) > 0 and alpha is not None: # Computing non-significant entries of the spectrum applying # the statistical correction - ns_sgnt = test_signature_significance(pv_spec, alpha, - corr=stat_corr, - report='non_significant', - spectrum=spectrum) + ns_signatures = test_signature_significance( + pv_spec, concepts, alpha, winlen, corr=stat_corr, + report='non_significant', spectrum=spectrum) # Storing non-significant entries of the pvalue spectrum - output['non_sgnf_sgnt'] = ns_sgnt + output['non_sgnf_sgnt'] = ns_signatures # Filter concepts with pvalue spectrum (psf) - if len(ns_sgnt) > 0: - concepts = list(filter( - lambda c: _pattern_spectrum_filter( - c, ns_sgnt, spectrum, winlen), concepts)) + if len(ns_signatures) > 0: + concepts = [concept for concept in concepts + if _pattern_spectrum_filter(concept, ns_signatures, + spectrum, winlen)] # Decide whether to filter concepts using psr - if psr_param is not None: - # Filter using conditional tests (psr) - concepts = pattern_set_reduction(concepts, ns_sgnt, - winlen=winlen, - h_subset_filtering=psr_param[0], - k_superset_filtering=psr_param[1], - l_covered_spikes=psr_param[2], - min_spikes=min_spikes, - min_occ=min_occ) # nopep8 - # Storing patterns for ouput format concepts + if psr_param is not None: + # Filter using conditional tests (psr) + concepts = pattern_set_reduction( + concepts, ns_signatures, winlen=winlen, spectrum=spectrum, + h_subset_filtering=psr_param[0], k_superset_filtering=psr_param[1], + l_covered_spikes=psr_param[2], min_spikes=min_spikes, + min_occ=min_occ) + # Storing patterns for output format concepts if output_format == 'concepts': output['patterns'] = concepts - return output + else: # output_format == 'patterns': + # Transforming concepts to dictionary containing pattern's infos + output['patterns'] = concept_output_to_patterns( + concepts, winlen, binsize, pv_spec, spectrum, + spiketrains[0].t_start) - # Transforming concepts to dictionary containing pattern's infos - output['patterns'] = concept_output_to_patterns(concepts, - winlen, binsize, - pv_spec, spectrum, - data[0].t_start) return output -def concepts_mining(data, binsize, winlen, min_spikes=2, min_occ=2, +def _check_input( + spiketrains, binsize, winlen, min_spikes=2, min_occ=2, + max_spikes=None, max_occ=None, min_neu=1, approx_stab_pars=None, + n_surr=0, dither=15 * pq.ms, spectrum='#', + alpha=None, stat_corr='fdr_bh', surr_method='dither_spikes', + psr_param=None, output_format='patterns'): + """ + Checks all input given to SPADE + Parameters + ---------- + see :`func`:`spade` + + Returns + ------- + compute_stability: bool + if the stability calculation is used + """ + + # Check spiketrains + if not all([isinstance(elem, neo.SpikeTrain) for elem in spiketrains]): + raise TypeError( + 'spiketrains must be a list of SpikeTrains') + # Check that all spiketrains have same t_start and same t_stop + if not all([spiketrain.t_start == spiketrains[0].t_start + for spiketrain in spiketrains]) or \ + not all([spiketrain.t_stop == spiketrains[0].t_stop + for spiketrain in spiketrains]): + raise ValueError( + 'All spiketrains must have the same t_start and t_stop') + + # Check binsize + if not isinstance(binsize, pq.Quantity): + raise ValueError('binsize must be a pq.Quantity') + + # Check winlen + if not isinstance(winlen, int): + raise ValueError('winlen must be an integer') + + # Check min_spikes + if not isinstance(min_spikes, int): + raise ValueError('min_spikes must be an integer') + + # Check min_occ + if not isinstance(min_occ, int): + raise ValueError('min_occ must be an integer') + + # Check max_spikes + if not (isinstance(max_spikes, int) or max_spikes is None): + raise ValueError('max_spikes must be an integer or None') + + # Check max_occ + if not (isinstance(max_occ, int) or max_occ is None): + raise ValueError('max_occ must be an integer or None') + + # Check min_neu + if not isinstance(min_neu, int): + raise ValueError('min_neu must be an integer') + + # Check approx_stab_pars + compute_stability = False + if isinstance(approx_stab_pars, dict): + if 'n_subsets' in approx_stab_pars.keys() or\ + ('epsilon' in approx_stab_pars.keys() and + 'delta' in approx_stab_pars.keys()): + compute_stability = True + else: + raise ValueError( + 'for approximate stability computation you need to ' + 'pass n_subsets or epsilon and delta.') + + # Check n_surr + if not isinstance(n_surr, int): + raise ValueError('n_surr must be an integer') + + # Check dither + if not isinstance(dither, pq.Quantity): + raise ValueError('dither must be a pq.Quantity') + + # Check spectrum + if spectrum not in ('#', '3d#'): + raise ValueError("spectrum must be '#' or '3d#'") + + # Check alpha + if isinstance(alpha, (int, float)): + # Check redundant use of alpha + if 0. < alpha < 1. and n_surr == 0: + warnings.warn('0.=1') + raise ValueError('min_neu must be an integer >=1') # By default, set the maximum pattern size to the number of spiketrains if max_z is None: - max_z = np.max((np.max([len(tr) for tr in transactions]), min_z + 1)) + max_z = max(max(map(len, transactions)), min_z + 1) # By default set maximum number of data to number of bins if max_c is None: max_c = len(transactions) @@ -724,23 +835,30 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None, if report == '3d#': spec_matrix = np.zeros((max_z + 1, max_c + 1, winlen)) spectrum = [] - # Mining the data with fpgrowth algorithm - if np.unique(transactions, return_counts=True)[1][0] == len( - transactions): - fpgrowth_output = [(tuple(transactions[0]), len(transactions))] + # check whether all transactions are identical + # in that case FIM would not find anything, + # so we need to create the output manually + # for optimal performance, + # we do the check sequentially and immediately break + # once we find a second unique transaction + first_transaction = transactions[0] + for transaction in transactions[1:]: + if transaction != first_transaction: + # Mining the spiketrains with fpgrowth algorithm + fpgrowth_output = fim.fpgrowth( + tracts=transactions, + target=target, + supp=-min_c, + zmin=min_z, + zmax=max_z, + report='a', + algo='s') + break else: - fpgrowth_output = fim.fpgrowth( - tracts=transactions, - target=target, - supp=-min_c, - zmin=min_z, - zmax=max_z, - report='a', - algo='s') + fpgrowth_output = [(tuple(transactions[0]), len(transactions))] # Applying min/max conditions and computing extent (window positions) - fpgrowth_output = list(filter( - lambda c: _fpgrowth_filter( - c, winlen, max_c, min_neu), fpgrowth_output)) + fpgrowth_output = [concept for concept in fpgrowth_output + if _fpgrowth_filter(concept, winlen, max_c, min_neu)] # filter out subsets of patterns that are found as a side-effect # of using the moving window strategy fpgrowth_output = _filter_for_moving_window_subsets( @@ -751,9 +869,11 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None, # Computing the extent of the concept (patterns # occurrences), checking in rel_matrix in which windows # the intent occurred - extent = tuple(np.where( - np.all(rel_matrix[:, intent], axis=1) == 1)[0]) - concepts.append((intent, extent)) + extent = tuple( + np.nonzero( + np.all(rel_matrix[:, intent], axis=1) + )[0]) + concepts.append((intent, extent)) # Computing 2d spectrum elif report == '#': spec_matrix[len(intent) - 1, supp - 1] += 1 @@ -766,13 +886,23 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None, return concepts if report == '#': - for (z, c) in np.transpose(np.where(spec_matrix != 0)): - spectrum.append((z + 1, c + 1, int(spec_matrix[z, c]))) + for (size, occurrences) in np.transpose(np.where(spec_matrix != 0)): + spectrum.append( + (size + 1, occurrences + 1, + int(spec_matrix[size, occurrences]))) elif report == '3d#': - for (z, c, l) in np.transpose(np.where(spec_matrix != 0)): + for (size, occurrences, duration) in\ + np.transpose(np.where(spec_matrix != 0)): spectrum.append( - (z + 1, c + 1, l, int(spec_matrix[z, c, l]))) + (size + 1, occurrences + 1, duration, + int(spec_matrix[size, occurrences, duration]))) del spec_matrix + if len(spectrum) > 0: + spectrum = np.array(spectrum) + elif report == '#': + spectrum = np.zeros(shape=(0, 3)) + elif report == '3d#': + spectrum = np.zeros(shape=(0, 4)) return spectrum @@ -782,11 +912,12 @@ def _fpgrowth_filter(concept, winlen, max_c, min_neu): neurons and a maximum number of occurrences and first spike in the first bin position """ - keep_concepts = len( - np.unique(np.array( - concept[0]) // winlen)) >= min_neu and concept[1] <= max_c and min( - np.array(concept[0]) % winlen) == 0 - return keep_concepts + intent = np.array(concept[0]) + keep_concept = (min(intent % winlen) == 0 + and concept[1] <= max_c + and np.unique(intent // winlen).shape[0] >= min_neu + ) + return keep_concept def _rereference_to_last_spike(transactions, winlen): @@ -823,7 +954,7 @@ def _filter_for_moving_window_subsets(concepts, winlen): Uses a reverse map with a set representation. """ # don't do anything if the input list is empty - if not len(concepts): + if len(concepts) == 0: return concepts if hasattr(concepts[0], 'intent'): @@ -834,20 +965,20 @@ def _filter_for_moving_window_subsets(concepts, winlen): support = np.array([len(c.extent) for c in concepts]) # convert transactions relative to last pattern spike - converted_transactions = [_rereference_to_last_spike(c.intent, + converted_transactions = [_rereference_to_last_spike(concept.intent, winlen=winlen) - for c in concepts] + for concept in concepts] else: # fim.fpgrowth format # sort the concepts by (decreasing) support - concepts.sort(key=lambda c: -c[1]) + concepts.sort(key=lambda concept: -concept[1]) - support = np.array([c[1] for c in concepts]) + support = np.array([concept[1] for concept in concepts]) # convert transactions relative to last pattern spike - converted_transactions = [_rereference_to_last_spike(c[0], + converted_transactions = [_rereference_to_last_spike(concept[0], winlen=winlen) - for c in concepts] + for concept in concepts] output = [] @@ -904,24 +1035,24 @@ def _fast_fca(context, min_c=2, min_z=2, max_z=None, times of the last and the first spike of the pattern) Default: 'a' The following parameters are specific to Massive parallel SpikeTrains - winlen: int (positive) + winlen: int The size (number of bins) of the sliding window used for the analysis. The maximal length of a pattern (delay between first and last spike) is then given by winlen*binsize Default: 1 - min_neu: int (positive) + min_neu: int Minimum number of neurons in a sequence to considered a potential pattern. Default: 1 Returns - -------- + ------- If report == 'a': - All the pattern candidates (concepts) found in the data. Each + All the pattern candidates (concepts) found in the spiketrains. Each pattern is represented as a tuple containing (spike IDs, discrete times (window position) of the occurrences of the pattern). The spike IDs are defined as: - spike_id=neuron_id*bin_id; with neuron_id in [0, len(data)] and + spike_id=neuron_id*bin_id; with neuron_id in [0, len(spiketrains)] and bin_id in [0, winlen]. If report == '#': The pattern spectrum is represented as a list of triplets each @@ -937,7 +1068,7 @@ def _fast_fca(context, min_c=2, min_z=2, max_z=None, concepts = [] # Check parameters if min_neu < 1: - raise AttributeError('min_neu must be an integer >=1') + raise ValueError('min_neu must be an integer >=1') # By default set maximum number of attributes if max_z is None: max_z = len(context) @@ -946,16 +1077,16 @@ def _fast_fca(context, min_c=2, min_z=2, max_z=None, max_c = len(context) if report == '#': spec_matrix = np.zeros((max_z, max_c)) - if report == '3d#': + elif report == '3d#': spec_matrix = np.zeros((max_z, max_c, winlen)) spectrum = [] - # Mining the data with fast fca algorithm + # Mining the spiketrains with fast fca algorithm fca_out = fast_fca.FormalConcepts(context) fca_out.computeLattice() fca_concepts = fca_out.concepts - fca_concepts = list(filter( - lambda c: _fca_filter( - c, winlen, min_c, min_z, max_c, max_z, min_neu), fca_concepts)) + fca_concepts = [concept for concept in fca_concepts + if _fca_filter(concept, winlen, min_c, min_z, max_c, max_z, + min_neu)] fca_concepts = _filter_for_moving_window_subsets(fca_concepts, winlen) # Applying min/max conditions for fca_concept in fca_concepts: @@ -965,7 +1096,7 @@ def _fast_fca(context, min_c=2, min_z=2, max_z=None, # computing spectrum if report == '#': spec_matrix[len(intent) - 1, len(extent) - 1] += 1 - if report == '3d#': + elif report == '3d#': spec_matrix[len(intent) - 1, len(extent) - 1, max( np.array(intent) % winlen)] += 1 if report == 'a': @@ -974,14 +1105,23 @@ def _fast_fca(context, min_c=2, min_z=2, max_z=None, del concepts # returning spectrum if report == '#': - for (z, c) in np.transpose(np.where(spec_matrix != 0)): - spectrum.append((z + 1, c + 1, int(spec_matrix[z, c]))) + for (size, occurrence) in np.transpose(np.where(spec_matrix != 0)): + spectrum.append( + (size + 1, occurrence + 1, int(spec_matrix[size, occurrence]))) if report == '3d#': - for (z, c, l) in np.transpose(np.where(spec_matrix != 0)): + for (size, occurrence, duration) in\ + np.transpose(np.where(spec_matrix != 0)): spectrum.append( - (z + 1, c + 1, l, int(spec_matrix[z, c, l]))) + (size + 1, occurrence + 1, duration, + int(spec_matrix[size, occurrence, duration]))) del spec_matrix + if len(spectrum) > 0: + spectrum = np.array(spectrum) + elif report == '#': + spectrum = np.zeros(shape=(0, 3)) + elif report == '3d#': + spectrum = np.zeros(shape=(0, 4)) return spectrum @@ -992,16 +1132,17 @@ def _fca_filter(concept, winlen, min_c, min_z, max_c, max_z, min_neu): """ intent = tuple(concept.intent) extent = tuple(concept.extent) - keep_concepts = len(intent) >= min_z and len(extent) >= min_c and len( - intent) <= max_z and len(extent) <= max_c and len( - np.unique(np.array(intent) // winlen)) >= min_neu and min( - np.array(intent) % winlen) == 0 + keep_concepts = \ + min_z <= len(intent) <= max_z and \ + min_c <= len(extent) <= max_c and \ + len(np.unique(np.array(intent) // winlen)) >= min_neu and \ + min(np.array(intent) % winlen) == 0 return keep_concepts -def pvalue_spectrum(data, binsize, winlen, dither, n_surr, min_spikes=2, +def pvalue_spectrum(spiketrains, binsize, winlen, dither, n_surr, min_spikes=2, min_occ=2, max_spikes=None, max_occ=None, min_neu=1, - spectrum='#'): + spectrum='#', surr_method='dither_spikes'): """ Compute the p-value spectrum of pattern signatures extracted from surrogates of parallel spike trains, under the null hypothesis of @@ -1013,59 +1154,64 @@ def pvalue_spectrum(data, binsize, winlen, dither, n_surr, min_spikes=2, are computed, and their occurrence probability estimated by their occurrence frequency (p-value spectrum) - Parameters ---------- - data: list of neo.SpikeTrains + spiketrains: list of neo.SpikeTrain List containing the parallel spike trains to analyze - binsize: Quantity - The time precision used to discretize the data (binning). - winlen: int (positive) + binsize: pq.Quantity + The time precision used to discretize the `spiketrains` (binning). + winlen: int The size (number of bins) of the sliding window used for the analysis. The maximal length of a pattern (delay between first and last spike) is - then given by winlen*binsize - dither: Quantity + then given by `winlen*binsize` + dither: pq.Quantity Amount of spike time dithering for creating the surrogates for filtering the pattern spectrum. A spike at time t is placed randomly - within ]t-dither, t+dither[ (see also - elephant.spike_train_surrogates.dither_spikes). + within `[t-dither, t+dither]` (see also + :func:`elephant.spike_train_surrogates.dither_spikes`). Default: 15*pq.s n_surr: int Number of surrogates to generate to compute the p-value spectrum. - This number should be large (n_surr>=1000 is recommended for 100 - spike trains in *sts*). If n_surr is 0, then the p-value spectrum is - not computed. + This number should be large (`n_surr>=1000` is recommended for 100 + spike trains in spiketrains). If `n_surr` is 0, then the p-value + spectrum is not computed. Default: 0 - min_spikes: int (positive) + min_spikes: int, optional Minimum number of spikes of a sequence to be considered a pattern. Default: 2 - min_occ: int (positive) - Minimum number of occurrences of a sequence to be considered as a - pattern. - Default: 2 - max_spikes: int (positive) + min_occ: int, optional + Minimum number of occurrences of a sequence to be considered as a + pattern. + Default: 2 + max_spikes: int, optional Maximum number of spikes of a sequence to be considered a pattern. If None no maximal number of spikes is considered. Default: None - max_occ: int (positive) + max_occ: int, optional Maximum number of occurrences of a sequence to be considered as a pattern. If None, no maximal number of occurrences is considered. Default: None - min_neu: int (positive) + min_neu: int, optional Minimum number of neurons in a sequence to considered a pattern. Default: 1 - spectrum: str - Defines the signature of the patterns, it can assume values: + spectrum: {'#', '3d#'}, optional + Defines the signature of the patterns. + '#': pattern spectrum using the as signature the pair: (number of spikes, number of occurrence) '3d#': pattern spectrum using the as signature the triplets: (number of spikes, number of occurrence, difference between last and first spike of the pattern) Default: '#' + surr_method: str + Method that is used to generate the surrogates. You can use every + method defined in + :func:`elephant.spike_train_surrogates.dither_spikes`. + Default: 'dither_spikes' Returns - ------ - pv_spec: list + ------- + pv_spec : list if spectrum == '#': A list of triplets (z,c,p), where (z,c) is a pattern signature and p is the corresponding p-value (fraction of surrogates @@ -1087,174 +1233,279 @@ def pvalue_spectrum(data, binsize, winlen, dither, n_surr, min_spikes=2, size = 1 # Check on number of surrogates if n_surr <= 0: - raise AttributeError('n_surr has to be >0') + raise ValueError('n_surr has to be >0') + if surr_method not in surr.SURR_METHODS: + raise ValueError( + 'specified surr_method (=%s) not valid' % surr_method) + len_partition = n_surr // size # length of each MPI task len_remainder = n_surr % size - # For each surrogate collect the signatures (z,c) such that (z*,c*)>=(z,c) - # exists in that surrogate. Group such signatures (with repetition) - # list of all signatures found in surrogates, initialized to [] - surr_sgnts = [] - add_remainder = rank < len_remainder + + if max_spikes is None: + # if max_spikes not defined, set it to the number of spiketrains times + # number of bins per window. + max_spikes = len(spiketrains) * winlen + + if spectrum == '#': + max_occs = np.empty(shape=(len_partition + add_remainder, + max_spikes - min_spikes + 1), + dtype=np.uint16) + else: # spectrum == '3d#': + max_occs = np.empty(shape=(len_partition + add_remainder, + max_spikes - min_spikes + 1, winlen), + dtype=np.uint16) + + if surr_method == 'joint_isi_dithering': + joint_isi_instances = [surr.JointISI(spiketrain, dither=dither, + method='window') + for spiketrain in spiketrains] for i in range(len_partition + add_remainder): - surrs = [surr.dither_spikes( - xx, dither=dither, n=1)[0] for xx in data] + if surr_method == 'joint_isi_dithering': + surrs = [instance.dithering()[0] for + instance in joint_isi_instances] + else: + surrs = [surr.surrogates( + spiketrain, n=1, surr_method=surr_method, + dt=dither)[0] for spiketrain in spiketrains] + # Find all pattern signatures in the current surrogate data set - surr_sgnt = concepts_mining( + surr_concepts = concepts_mining( surrs, binsize, winlen, min_spikes=min_spikes, max_spikes=max_spikes, min_occ=min_occ, max_occ=max_occ, min_neu=min_neu, report=spectrum)[0] - filled_sgnt = [] - # List all signatures (z,c) <= (z*, c*), for each (z*,c*) in the - # current surrogate, and add it to the list of all signatures - if spectrum == '#': - for sgnt in surr_sgnt: - for j in range(min_spikes, sgnt[0] + 1): - for k in range(min_occ, sgnt[1] + 1): - filled_sgnt.append((j, k)) - # List all signatures (z,c,l) <= (z*, c*, l*), for each (z*,c*,l*) - # in the current surrogate, and add it to the list of - # all signatures - if spectrum == '3d#': - for sgnt in surr_sgnt: - for j in range(min_spikes, sgnt[0] + 1): - for k in range(min_occ, sgnt[1] + 1): - filled_sgnt.append((j, k, sgnt[2])) - surr_sgnts.extend(list(set(filled_sgnt))) + # The last entry of the signature is the number of times the + # signature appeared. This entry is not needed here. + surr_concepts = surr_concepts[:, :-1] + + max_occs[i] = _get_max_occ(surr_concepts, min_spikes, max_spikes, + winlen, spectrum) + # Collecting results on the first PCU - if rank != 0: # pragma: no cover - comm.send(surr_sgnts, dest=0) - del surr_sgnts - return [] - if rank == 0: - for i in range(1, size): - recv_list = comm.recv(source=i) - surr_sgnts.extend(recv_list) + if size != 1: + max_occs = comm.gather(max_occs, root=0) - # Compute the p-value spectrum, and return it - pv_spec = [] - for sgnt in set(surr_sgnts): - sgnt = list(sgnt) - sgnt.append((sum(np.all(np.array(surr_sgnts) == sgnt, axis=1) - ) / float(n_surr))) - pv_spec.append(sgnt) - return pv_spec + if rank != 0: # pragma: no cover + return [] + # The gather operator gives a list out. This is rearranged as a 2 resp. + # 3 dimensional numpy-array. + max_occs = np.vstack(max_occs) -def _stability_filter(c, stab_thr): - """Criteria by which to filter concepts from the lattice""" - # stabilities larger then min_st - keep_concept = c[2] > stab_thr[0] or c[3] > stab_thr[1] - return keep_concept + # Compute the p-value spectrum, and return it + return _get_pvalue_spec(max_occs, min_spikes, max_spikes, min_occ, + n_surr, winlen, spectrum) -def _fdr(pvalues, alpha): +def _get_pvalue_spec(max_occs, min_spikes, max_spikes, min_occ, n_surr, winlen, + spectrum): """ - performs False Discovery Rate (FDR) statistical correction on a list of - p-values, and assesses accordingly which of the associated statistical - tests is significant at the desired level *alpha* + This function converts the list of maximal occurrences into the + corresponding p-value spectrum. Parameters ---------- - pvalues: numpy.ndarray - array of p-values, each corresponding to a statistical test - alpha: float - significance level (desired FDR-ratio) + max_occs: np.ndarray + min_spikes: int + max_spikes: int + min_occ: int + n_surr: int + winlen: int + spectrum: {'#', '3d#'} Returns - ------ - Returns a triplet containing: - * an array of bool, indicating for each p-value whether it was - significantly low or not - * the largest p-value that was below the FDR linear threshold - (effective confidence level). That and each lower p-value are - considered significant. - * the rank of the largest significant p-value + ------- + if spectrum == '#': + List[List]: + each entry has the form: [pattern_size, pattern_occ, p_value] + if spectrum == '3d#': + List[List]: + each entry has the form: + [pattern_size, pattern_occ, pattern_dur, p_value] + """ + if spectrum not in ('#', '3d#'): + raise ValueError("Invalid spectrum: '{}'".format(spectrum)) + pv_spec = [] + if spectrum == '#': + max_occs = np.expand_dims(max_occs, axis=2) + winlen = 1 + for size_id, pt_size in enumerate(range(min_spikes, max_spikes + 1)): + for dur in range(winlen): + max_occs_size_dur = max_occs[:, size_id, dur] + counts, occs = np.histogram( + max_occs_size_dur, + bins=np.arange(min_occ, + np.max(max_occs_size_dur) + 2)) + occs = occs[:-1].astype(np.uint16) + pvalues = np.cumsum(counts[::-1])[::-1] / n_surr + for occ_id, occ in enumerate(occs): + if spectrum == '#': + pv_spec.append([pt_size, occ, pvalues[occ_id]]) + else: # spectrum == '3d#': + pv_spec.append([pt_size, occ, dur, pvalues[occ_id]]) + return pv_spec + + +def _get_max_occ(surr_concepts, min_spikes, max_spikes, winlen, spectrum): """ + This function takes from a list of surrogate_concepts those concepts which + have the highest occurrence for a given pattern size and duration. - # Sort the p-values from largest to smallest - pvs_sorted = np.sort(pvalues)[::-1] # Sort PVs in decreasing order + Parameters + ---------- + surr_concepts: List[List] + min_spikes: int + max_spikes: int + winlen: int + spectrum: {'#', '3d#'} - # Perform FDR on the sorted p-values - m = len(pvalues) + Returns + ------- + np.ndarray + Two-dimensional array. Each element corresponds to a highest occurrence + for a specific pattern size (which range from min_spikes to max_spikes) + and pattern duration (which range from 0 to winlen-1). + The first axis corresponds to the pattern size the second to the + duration. + """ + if spectrum not in ('#', '3d#'): + raise ValueError("Invalid spectrum: '{}'".format(spectrum)) - for i, pv in enumerate(pvs_sorted): # For each PV, from the largest on - k = m - i - if pv <= alpha * (k * 1. / m): # continue if PV > fdr-threshold - break # otherwise stop - # this applies, when loop is not stopped due to significant pvalue - else: - k = 0 - thresh = alpha * (k * 1. / m) + if spectrum == '#': + winlen = 1 + max_occ = np.zeros(shape=(max_spikes - min_spikes + 1, winlen)) + for size_id, pt_size in enumerate(range(min_spikes, max_spikes + 1)): + concepts_for_size = surr_concepts[ + surr_concepts[:, 0] == pt_size][:, 1:] + for dur in range(winlen): + if spectrum == '#': + occs = concepts_for_size[:, 0] + else: # spectrum == '3d#': + occs = concepts_for_size[concepts_for_size[:, 1] == dur][:, 0] + max_occ[size_id, dur] = np.max(occs, initial=0) + + for pt_size in range(max_spikes - 1, min_spikes - 1, -1): + size_id = pt_size - min_spikes + max_occ[size_id] = np.max(max_occ[size_id:size_id + 2], axis=0) + if spectrum == '#': + max_occ = np.squeeze(max_occ, axis=1) - # Return outcome of the test, critical p-value and its order - return pvalues <= thresh, thresh, k + return max_occ -def _holm_bonferroni(pvalues, alpha): - """ - performs Holm Bonferroni statistical correction on a list of - p-values, and assesses accordingly which of the associated statistical - tests is significant at the desired level *alpha* +def _stability_filter(concept, stability_thresh): + """Criteria by which to filter concepts from the lattice""" + # stabilities larger then stability_thresh + keep_concept = \ + concept[2] > stability_thresh[0]\ + or concept[3] > stability_thresh[1] + return keep_concept + +def _mask_pvalue_spectrum(pv_spec, concepts, spectrum, winlen): + """ + The function filters the pvalue spectrum based on the number of + the statistical tests to be done. Only the entries of the pvalue spectrum + that coincide with concepts found in the original data are kept. + Moreover, entries of the pvalue spectrum with a value of 1 (all surrogates + datasets containing at least one mined pattern with that signature) + are discarded as well and considered trivial. Parameters ---------- - pvalues: list - list of p-values, each corresponding to a statistical test - alpha: float - significance level + pv_spec: List[List] + concepts: List[Tuple] + spectrum: {'#', '3d#'} + winlen: int Returns ------- - tests : list - A list of boolean values, indicating for each p-value whether it was - significantly low or not - """ - id_sorted = np.argsort(pvalues) - tests = [pval <= alpha / float( - len(pvalues) - id_sorted[pval_idx]) for pval_idx, pval in enumerate( - pvalues)] - return tests - - -def test_signature_significance(pvalue_spectrum, alpha, corr='', - report='spectrum', spectrum='#'): + mask : np.array + An array of boolean values, indicating if a signature of p-value + spectrum is also in the mined concepts of the original data. + """ + if spectrum == '#': + signatures = {(len(concept[0]), len(concept[1])) + for concept in concepts} + else: # spectrum == '3d#': + # third entry of signatures is the duration, fixed as the maximum lag + signatures = {(len(concept[0]), len(concept[1]), + max(np.array(concept[0]) % winlen)) + for concept in concepts} + mask = np.array([tuple(pvs[:-1]) in signatures + and not np.isclose(pvs[-1], [1]) + for pvs in pv_spec]) + return mask + + +def test_signature_significance(pv_spec, concepts, alpha, winlen, + corr='fdr_bh', report='spectrum', + spectrum='#'): """ Compute the significance spectrum of a pattern spectrum. - Given pvalue_spectrum as a list of triplets (z,c,p), where z is pattern - size, c is pattern support and p is the p-value of the signature (z,c), - this routine assesses the significance of (z,c) using the confidence level - alpha. + Given pvalue_spectrum `pv_spec` as a list of triplets (z,c,p), where z is + pattern size, c is pattern support and p is the p-value of the signature + (z,c), this routine assesses the significance of (z,c) using the + confidence level alpha. Bonferroni or FDR statistical corrections can be applied. Parameters ---------- - pvalue_spectrum: list + pv_spec: list A list of triplets (z,c,p), where z is pattern size, c is pattern support and p is the p-value of signature (z,c) + concepts: list of tuple + Output of the concepts mining for the original data. alpha: float Significance level of the statistical test - corr: str - Statistical correction to be applied: - '', 'no' : no statistical correction - 'f', 'fdr' : false discovery rate - 'b', 'bonf': Bonferroni correction - 'hb', 'holm_bonf': Holm-Bonferroni correction - Default: '' - report: str + winlen: int + Size (number of bins) of the sliding window used for the analysis + corr: str, optional + Method used for testing and adjustment of pvalues. + Can be either the full name or initial letters. + Available methods are: + 'bonferroni' : one-step correction + + 'sidak' : one-step correction + + 'holm-sidak' : step down method using Sidak adjustments + + 'holm' : step-down method using Bonferroni adjustments + + 'simes-hochberg' : step-up method (independent) + + 'hommel' : closed method based on Simes tests (non-negative) + + 'fdr_bh' : Benjamini/Hochberg (non-negative) + + 'fdr_by' : Benjamini/Yekutieli (negative) + + 'fdr_tsbh' : two stage fdr correction (non-negative) + + 'fdr_tsbky' : two stage fdr correction (non-negative) + + '' or 'no': no statistical correction + For further description see: + https://www.statsmodels.org/stable/generated/statsmodels.stats.multitest.multipletests.html + Default: 'fdr_bh' + + report: {'spectrum', 'significant', 'non_significant'}, optional Format to be returned for the significance spectrum: + 'spectrum': list of triplets (z,c,b), where b is a boolean specifying - whether signature (z,c) is significant (True) or not (False) + whether signature (z,c) is significant (True) or not + (False) + 'significant': list containing only the significant signatures (z,c) of - pvalue_spectrum + pvalue_spectrum + 'non_significant': list containing only the non-significant signatures - Default: '#' - spectrum: str - Defines the signature of the patterns, it can assume values: + spectrum: {'#', '3d#'}, optional + Defines the signature of the patterns. + '#': pattern spectrum using the as signature the pair: (number of spikes, number of occurrence) '3d#': pattern spectrum using the as signature the triplets: @@ -1263,82 +1514,100 @@ def test_signature_significance(pvalue_spectrum, alpha, corr='', Default: '#' Returns - ------ - sig_spectrum: list + ------- + sig_spectrum : list Significant signatures of pvalue_spectrum, in the format specified - by report + by `report` """ # If alpha == 1 all signatures are significant if alpha == 1: return [] if spectrum not in ['#', '3d#']: - raise AttributeError("spectrum must be either '#' or '3d#', " - "got {} instead".format(spectrum)) + raise ValueError("spectrum must be either '#' or '3d#', " + "got {} instead".format(spectrum)) if report not in ['spectrum', 'significant', 'non_significant']: - raise AttributeError("report must be either 'spectrum'," + - " 'significant' or 'non_significant'," + - "got {} instead".format(report)) - - x_array = np.array(pvalue_spectrum) - # Compute significance... - if corr in ['', 'no']: # ...without statistical correction - tests = x_array[:, -1] <= alpha - elif corr in ['b', 'bonf']: # or with Bonferroni correction - tests = x_array[:, -1] <= alpha * 1. / len(pvalue_spectrum) - elif corr in ['f', 'fdr']: # or with FDR correction - tests = _fdr(x_array[:, -1], alpha=alpha)[0] - elif corr in ['hb', 'holm_bonf']: - tests = _holm_bonferroni(x_array[:, -1], alpha=alpha) - else: - raise AttributeError( - "Parameter corr must be either ''('no'), 'b'('bonf'), 'f'('fdr')" + - " or 'hb'('holm_bonf'), but not '" + str(corr) + "'.") + raise ValueError("report must be either 'spectrum'," + + " 'significant' or 'non_significant'," + + "got {} instead".format(report)) + if corr not in ['bonferroni', 'sidak', 'holm-sidak', 'holm', + 'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by', + 'fdr_tsbh', 'fdr_tsbky', '', 'no']: + raise ValueError("Parameter corr not recognized") + + pv_spec = np.array(pv_spec) + mask = _mask_pvalue_spectrum(pv_spec, concepts, spectrum, winlen) + pvalues = pv_spec[:, -1] + pvalues_totest = pvalues[mask] + + # Initialize test array to False + tests = [False] * len(pvalues) + + if len(pvalues_totest) > 0: + + # Compute significance for only the non trivial tests + if corr in ['', 'no']: # ...without statistical correction + tests_selected = pvalues_totest <= alpha + else: + try: + import statsmodels.stats.multitest as sm + except ModuleNotFoundError: + raise ModuleNotFoundError( + "Please run 'pip install statsmodels' if you " + "want to use multiple testing correction") + + tests_selected = sm.multipletests(pvalues_totest, alpha=alpha, + method=corr)[0] + + # assign each corrected pvalue to its corresponding entry + for index, value in zip(mask.nonzero()[0], tests_selected): + tests[index] = value + # Return the specified results: if spectrum == '#': if report == 'spectrum': - return [(size, supp, test) - for (size, supp, pv), test in zip(pvalue_spectrum, tests)] - if report == 'significant': - return [(size, supp) for ((size, supp, pv), test) - in zip(pvalue_spectrum, tests) if test] - # report == 'non_significant' - return [(size, supp) - for ((size, supp, pv), test) in zip(pvalue_spectrum, tests) - if not test] - - # spectrum == '3d#' - if report == 'spectrum': - return [(size, supp, l, test) - for (size, supp, l, pv), test in zip(pvalue_spectrum, tests)] - if report == 'significant': - return [(size, supp, l) for ((size, supp, l, pv), test) - in zip(pvalue_spectrum, tests) if test] - # report == 'non_significant' - return [(size, supp, l) - for ((size, supp, l, pv), test) in zip(pvalue_spectrum, tests) - if not test] - - -def _pattern_spectrum_filter(concept, ns_signature, spectrum, winlen): + sig_spectrum = [(size, supp, test) + for (size, supp, pv), test in zip(pv_spec, tests)] + elif report == 'significant': + sig_spectrum = [(size, supp) for ((size, supp, pv), test) + in zip(pv_spec, tests) if test] + else: # report == 'non_significant' + sig_spectrum = [(size, supp) + for ((size, supp, pv), test) in zip(pv_spec, tests) + if not test] + + else: # spectrum == '3d#' + if report == 'spectrum': + sig_spectrum =\ + [(size, supp, l, test) + for (size, supp, l, pv), test in zip(pv_spec, tests)] + elif report == 'significant': + sig_spectrum = [(size, supp, l) for ((size, supp, l, pv), test) + in zip(pv_spec, tests) if test] + else: # report == 'non_significant' + sig_spectrum =\ + [(size, supp, l) + for ((size, supp, l, pv), test) in zip(pv_spec, tests) + if not test] + return sig_spectrum + + +def _pattern_spectrum_filter(concept, ns_signatures, spectrum, winlen): """ - Filter to select concept which signature is significant + Filter for significant concepts """ if spectrum == '#': - keep_concept = (len(concept[0]), len(concept[1])) not in ns_signature - if spectrum == '3d#': - bin_ids = sorted(np.array(concept[0]) % winlen) - # The duration is effectively the delay between the last neuron and - # the first one, measured in bins. Since we only allow the first spike - # to be in the first bin (see concepts_mining, _build_context and - # _fpgrowth), it is the the position of the latest spike. - duration = bin_ids[-1] + keep_concept = (len(concept[0]), len(concept[1])) not in ns_signatures + else: # spectrum == '3d#': + # duration is fixed as the maximum lag + duration = max(np.array(concept[0]) % winlen) keep_concept = (len(concept[0]), len(concept[1]), - duration) not in ns_signature + duration) not in ns_signatures return keep_concept -def approximate_stability(concepts, rel_matrix, n_subsets, delta=0, epsilon=0): +def approximate_stability(concepts, rel_matrix, n_subsets=0, + delta=0., epsilon=0.): r""" Approximate the stability of concepts. Uses the algorithm described in Babin, Kuznetsov (2012): Approximating Concept Stability @@ -1346,60 +1615,58 @@ def approximate_stability(concepts, rel_matrix, n_subsets, delta=0, epsilon=0): Parameters ---------- concepts: list - All the pattern candidates (concepts) found in the data. Each + All the pattern candidates (concepts) found in the spiketrains. Each pattern is represented as a tuple containing (spike IDs, discrete times (window position) of the occurrences of the pattern). The spike IDs are defined as: - spike_id=neuron_id*bin_id; with neuron_id in [0, len(data)] and - bin_id in [0, winlen]. + `spike_id=neuron_id*bin_id` with `neuron_id` in `[0, len(spiketrains)]` + and `bin_id` in `[0, winlen]`. rel_matrix: sparse.coo_matrix - A binary matrix with shape (number of windows, winlen*len(data)). Each - row corresponds to a window (order according to their position in - time). Each column corresponds to one bin and one neuron and it is 0 if + A binary matrix with shape (number of windows, + winlen*len(spiketrains)). Each row corresponds to a window (order + according to their position in time). + Each column corresponds to one bin and one neuron and it is 0 if no spikes or 1 if one or more spikes occurred in that bin for that particular neuron. For example, the entry [0,0] of this matrix corresponds to the first bin of the first window position for the first - neuron, the entry [0,winlen] to the first bin of the first window + neuron, the entry `[0, winlen]` to the first bin of the first window position for the second neuron. n_subsets: int - Number of subsets of a concept used to approximate its stability. If - n_subset is set to 0 the stability is not computed. If, however, - for parameters delta and epsilon (see below) delta + epsilon > 0, - then an optimal n_subsets is calculated according to the formula given - in Babin, Kuznetsov (2012), proposition 6: - - ..math:: - n_subset = frac{1}{2\eps^2} \ln(frac{2}{\delta}) +1 - - Default:0 - delta: float - delta: probability with at least ..math:$1-\delta$ + Number of subsets of a concept used to approximate its stability. + If `n_subsets` is 0, it is calculated according to to the formula + given in Babin, Kuznetsov (2012), proposition 6: + + .. math:: + n_{\text{subset}} = \frac{1}{2 \cdot \epsilon^2} + \ln{\left( \frac{2}{\delta} \right)} +1 Default: 0 - epsilon: float + delta: float, optional + delta: probability with at least :math:`1-\delta` + Default: 0. + epsilon: float, optional epsilon: absolute error - Default: 0 + Default: 0. Returns ------- - output: list + output : list List of all the pattern candidates (concepts) given in input, each with the correspondent intensional and extensional stability. Each - pattern is represented as a tuple containing: - (spike IDs, + pattern is represented as a tuple (spike IDs, discrete times of the occurrences of the pattern, intensional stability of the pattern, extensional stability of the pattern). The spike IDs are defined as: - spike_id=neuron_id*bin_id; with neuron_id in [0, len(data)] and - bin_id in [0, winlen]. + `spike_id=neuron_id*bin_id` with `neuron_id` in `[0, len(spiketrains)]` + and `bin_id` in `[0, winlen]`. Notes ----- - If n_subset is larger than the extent all subsets are directly - calculated, otherwise for small extent size an infinite - loop can be created while doing the recursion, - since the random generation will always contain the same - numbers and the algorithm will be stuck searching for - other (random) numbers + If n_subset is larger than the extent all subsets are directly + calculated, otherwise for small extent size an infinite + loop can be created while doing the recursion, + since the random generation will always contain the same + numbers and the algorithm will be stuck searching for + other (random) numbers. """ if HAVE_MPI: # pragma: no cover @@ -1409,20 +1676,24 @@ def approximate_stability(concepts, rel_matrix, n_subsets, delta=0, epsilon=0): else: rank = 0 size = 1 - if n_subsets <= 0 and delta + epsilon <= 0: - raise AttributeError('n_subsets has to be >=0 or delta + epsilon > 0') + if not (isinstance(n_subsets, int) and n_subsets >= 0): + raise ValueError('n_subsets must be an integer >=0') + if n_subsets == 0 and not (isinstance(delta, float) and delta > 0. and + isinstance(epsilon, float) and epsilon > 0.): + raise ValueError('delta and epsilon must be floats > 0., ' + 'given that n_subsets = 0') + if len(concepts) == 0: return [] if len(concepts) <= size: rank_idx = [0] * (size + 1) + [len(concepts)] else: rank_idx = list( - np.arange( - 0, len(concepts) - len(concepts) % size + 1, - len(concepts) // size)) + [len(concepts)] + range(0, len(concepts) - len(concepts) % size + 1, + len(concepts) // size)) + [len(concepts)] # Calculate optimal n - if delta + epsilon > 0 and n_subsets == 0: - n_subsets = np.log(2. / delta) / (2 * epsilon ** 2) + 1 + if n_subsets == 0: + n_subsets = int(round(np.log(2. / delta) / (2 * epsilon ** 2) + 1)) if rank == 0: concepts_on_partition = concepts[rank_idx[rank]:rank_idx[rank + 1]] + \ @@ -1432,70 +1703,11 @@ def approximate_stability(concepts, rel_matrix, n_subsets, delta=0, epsilon=0): output = [] for concept in concepts_on_partition: - stab_ext = 0.0 - stab_int = 0.0 - intent = np.array(list(concept[0])) - extent = np.array(list(concept[1])) - r_unique_ext = set() - r_unique_int = set() - excluded_subset = [] - # Calculate all subsets if n is larger than the power set of - # the extent - if n_subsets > 2 ** len(extent): - subsets_ext = chain.from_iterable( - combinations(extent, r) for r in range( - len(extent) + 1)) - for s in subsets_ext: - if any( - [set(s).issubset(se) for se in excluded_subset]): - continue - if _closure_probability_extensional( - intent, s, rel_matrix): - stab_ext += 1 - else: - excluded_subset.append(s) - else: - for _ in range(n_subsets): - subset_ext = extent[ - _give_random_idx(r_unique_ext, len(extent))] - if any([set(subset_ext).issubset(se) - for se in excluded_subset]): - continue - if _closure_probability_extensional( - intent, subset_ext, rel_matrix): - stab_ext += 1 - else: - excluded_subset.append(subset_ext) - stab_ext /= min(n_subsets, 2 ** len(extent)) - excluded_subset = [] - # Calculate all subsets if n is larger than the power set of - # the extent - if n_subsets > 2 ** len(intent): - subsets_int = chain.from_iterable( - combinations(intent, r) for r in range( - len(intent) + 1)) - for s in subsets_int: - if any( - [set(s).issubset(se) for se in excluded_subset]): - continue - if _closure_probability_intensional( - extent, s, rel_matrix): - stab_int += 1 - else: - excluded_subset.append(s) - else: - for _ in range(n_subsets): - subset_int = intent[ - _give_random_idx(r_unique_int, len(intent))] - if any([set(subset_int).issubset(se) for - se in excluded_subset]): - continue - if _closure_probability_intensional( - extent, subset_int, rel_matrix): - stab_int += 1 - else: - excluded_subset.append(subset_int) - stab_int /= min(n_subsets, 2 ** len(intent)) + intent, extent = np.array(concept[0]), np.array(concept[1]) + stab_int = _calculate_single_stability_parameter( + intent, extent, n_subsets, rel_matrix, look_at='intent') + stab_ext = _calculate_single_stability_parameter( + intent, extent, n_subsets, rel_matrix, look_at='extent') output.append((intent, extent, stab_int, stab_ext)) if size != 1: @@ -1507,73 +1719,106 @@ def approximate_stability(concepts, rel_matrix, n_subsets, delta=0, epsilon=0): return output -def _closure_probability_extensional(intent, subset, rel_matrix): +def _calculate_single_stability_parameter(intent, extent, + n_subsets, rel_matrix, + look_at='intent'): """ - Return True if the closure of the subset of the extent given in input is - equal to the intent given in input + Calculates the stability parameter for extent or intent. + + For detailed describtion see approximate_stabilty Parameters ---------- - intent : array - Set of the attributes of the concept - subset : list - List of objects that form the subset of the extent to be evaluated - rel_matrix: ndarray - Binary matrix that specify the relation that defines the context + extent: np.array + 2nd element of concept + intent: np.array + 1st element of concept + n_subsets: int + See approximate_stabilty + rel_matrix: sparse.coo_matrix + See approximate_stabilty + look_at: {'extent', 'intent'} + whether to determine stability for extent or intent. + Default: 'intent' Returns ------- - 1 if (subset)' == intent - 0 else + stability: float + Stability parameter for given extent, intent depending on which to look """ - # computation of the ' operator for the subset - subset_prime = np.where(np.all(rel_matrix[subset, :], axis=0) == 1)[0] - if set(subset_prime) == set(list(intent)): - return 1 - return 0 + if look_at == 'intent': + element_1, element_2 = intent, extent + else: # look_at == 'extent': + element_1, element_2 = extent, intent + + if n_subsets > 2 ** len(element_1): + subsets = chain.from_iterable( + combinations(element_1, subset_index) + for subset_index in range(len(element_1) + 1)) + else: + subsets = _select_random_subsets(element_1, n_subsets) + + stability = 0 + excluded_subsets = [] + for subset in subsets: + if any([set(subset).issubset(excluded_subset) + for excluded_subset in excluded_subsets]): + continue + + # computation of the ' operator for the subset + if look_at == 'intent': + subset_prime = \ + np.where(np.all(rel_matrix[:, subset], axis=1) == 1)[0] + else: # look_at == 'extent': + subset_prime = \ + np.where(np.all(rel_matrix[subset, :], axis=0) == 1)[0] + + # Condition holds if the closure of the subset of element_1 given in + # element_2 is equal to element_2 given in input + if set(subset_prime) == set(element_2): + stability += 1 + else: + excluded_subsets.append(subset) + stability /= min(n_subsets, 2 ** len(element_1)) + return stability -def _closure_probability_intensional(extent, subset, rel_matrix): +def _select_random_subsets(element_1, n_subsets): """ - Return True if the closure of the subset of the intent given in input is - equal to the extent given in input + Creates a list of random_subsets of element_1. Parameters ---------- - extent : list - Set of the objects of the concept - subset : list - List of attributes that form the subset of the intent to be evaluated - rel_matrix: ndarray - Binary matrix that specify the relation that defines the context + element_1: np.array + intent or extent + n_subsets: int + see approximate_stability Returns ------- - 1 if (subset)' == extent - 0 else + subsets : list + each element a subset of element_1 """ - # computation of the ' operator for the subset - subset_prime = np.where(np.all(rel_matrix[:, subset], axis=1) == 1)[0] - if set(subset_prime) == set(list(extent)): - return 1 - return 0 + subsets_indices = [set()] * (len(element_1)+1) + subsets = [] + while len(subsets) < n_subsets: + num_indices = np.random.binomial(n=len(element_1), p=1/2) + random_indices = np.random.choice( + len(element_1), size=num_indices, replace=False) + random_indices.sort() -def _give_random_idx(r_unique, n): - """ asd """ + random_tuple = tuple(random_indices) + if random_tuple not in subsets_indices[num_indices]: + subsets_indices[num_indices].add(random_tuple) + subsets.append(element_1[random_indices]) - r = np.random.randint(n, - size=np.random.randint(low=1, high=n)) - r_tuple = tuple(r) - if r_tuple not in r_unique: - r_unique.add(r_tuple) - return np.unique(r) - return _give_random_idx(r_unique, n) + return subsets -def pattern_set_reduction(concepts, excluded, winlen, h_subset_filtering=0, - k_superset_filtering=0, l_covered_spikes=0, - min_spikes=2, min_occ=2): +def pattern_set_reduction(concepts, ns_signatures, winlen, spectrum, + h_subset_filtering=0, k_superset_filtering=0, + l_covered_spikes=0, min_spikes=2, min_occ=2): r""" Takes a list concepts and performs pattern set reduction (PSR). @@ -1581,251 +1826,314 @@ def pattern_set_reduction(concepts, excluded, winlen, h_subset_filtering=0, given any other pattern, on the basis of the pattern size and occurrence count ("support"). Only significant patterns are retained. The significance of a pattern A is evaluated through its signature - (|A|,c_A), where |A| is the size and c_A the support of A, by either of: - * subset filtering: any pattern B is discarded if *cfis* contains a - superset A of B such that (z_B, c_B-c_A+*h*) \in *excluded* - * superset filtering: any pattern A is discarded if *cfis* contains a - subset B of A such that (z_A-z_B+*k*, c_A) \in *excluded* - * covered-spikes criterion: for any two patterns A, B with A \subset B, B - is discarded if (z_B-l)*c_B <= c_A*(z_A-*l*), A is discarded otherwise. - * combined filtering: combines the three procedures above - takes a list concepts (see output psf function) and performs - combined filtering based on the signature (z, c) of each pattern, where - z is the pattern size and c the pattern support. - - For any two patterns A and B in concepts_psf such that B \subset A, check: - 1) (z_B, c_B-c_A+*h*) \in *excluded*, and - 2) (z_A-z_B+*k*, c_A) \in *excluded*. + :math:`(z_a, c_A)`, where :math:`z_A = |A|` is the size and :math:`c_A` - + the support of A, by either of: + + * subset filtering: any pattern B is discarded if *concepts* contains a + superset A of B such that + :math:`(z_B, c_B - c_A + h) \in \text{ns}_{\text{signatures}}` + * superset filtering: any pattern A is discarded if *concepts* contains a + subset B of A such that + :math:`(z_A - z_B + k, c_A) \in \text{ns}_{\text{signatures}}` + * covered-spikes criterion: for any two patterns A, B with + :math:`A \subset B`, B is discarded if + :math:`(z_B-l) \cdot c_B \le c_A \cdot (z_A - l)`, A is discarded + otherwise; + * combined filtering: combines the three procedures above: + takes a list concepts (see output psf function) and performs + combined filtering based on the signature (z, c) of each pattern, where + z is the pattern size and c the pattern support. + + For any two patterns A and B in concepts_psf such that :math:`B \subset A`, + check: + + 1) :math:`(z_B, c_B - c_A + h) \in \text{ns}_{\text{signatures}}`, and + + 2) :math:`(z_A - z_B + k, c_A) \in \text{ns}_{\text{signatures}}`. + Then: + * if 1) and not 2): discard B * if 2) and not 1): discard A - * if 1) and 2): discard B if c_B*(z_B-*l*) <= c_A*(z_A-*l*), A otherwise; - * if neither 1) nor 2): keep both patterns. + * if 1) and 2): discard B if + :math:`c_B \cdot (z_B - l) \le c_A \cdot (z_A - l)`, + otherwise discard A + * if neither 1) nor 2): keep both patterns + + Assumptions/Approximations: - Parameters: - ----------- - concept: list + * a pair of concepts cannot cause one another to be rejected + * if two concepts overlap more than min_occ times, one of them can + account for all occurrences of the other one if it passes the + filtering + + Parameters + ---------- + concepts: list List of concepts, each consisting in its intent and extent - excluded: list - A list of non-significant pattern signatures (z, c) (see above). - h_subset_filtering: int - Correction parameter for subset filtering (see above). - Defaults: 0 - k_superset_filtering: int - Correction parameter for superset filtering (see above). + ns_signatures: list + A list of non-significant pattern signatures (z, c) + winlen: int + The size (number of bins) of the sliding window used for the analysis. + The maximal length of a pattern (delay between first and last spike) is + then given by `winlen*binsize`. + spectrum: {'#', '3d#'} + Define the signature of the patterns. + + '#': pattern spectrum using the as signature the pair: + (number of spikes, number of occurrences) + '3d#': pattern spectrum using the as signature the triplets: + (number of spikes, number of occurrence, difference between last + and first spike of the pattern) + h_subset_filtering: int, optional + Correction parameter for subset filtering + Default: 0 + k_superset_filtering: int, optional + Correction parameter for superset filtering Default: 0 - l_covered_spikes: int - Correction parameter for covered-spikes criterion (see above). + l_covered_spikes: int, optional + Correction parameter for covered-spikes criterion Default: 0 - min_size: int - Minimum pattern size. + min_spikes: int, optional + Minimum pattern size Default: 2 - min_supp: int - Minimum pattern support. + min_occ: int, optional + Minimum number of pattern occurrences Default: 2 - Returns: + Returns ------- - returns a tuple containing the elements of the input argument - that are significant according to combined filtering. + tuple + A tuple containing the elements of the input argument + that are significant according to combined filtering. """ - conc = [] + additional_measures = [] # Extracting from the extent and intent the spike and window times for concept in concepts: intent = concept[0] extent = concept[1] - spike_times = np.array([st % winlen for st in intent]) - conc.append((intent, spike_times, extent, len(extent))) + additional_measures.append((len(extent), len(intent))) # by default, select all elements in conc to be returned in the output - selected = [True for _ in conc] + selected = [True] * len(concepts) # scan all conc and their subsets - for id1, (conc1, s_times1, winds1, count1) in enumerate(conc): - for id2, (conc2, s_times2, winds2, count2) in enumerate(conc): - if not selected[id1]: - break - if id1 == id2: + for id1, id2 in combinations(range(len(concepts)), r=2): + # immediately continue if both concepts have already been rejected + if not selected[id1] and not selected[id2]: + continue + + intent1, extent1 = concepts[id1][:2] + intent2, extent2 = concepts[id2][:2] + occ1, size1 = additional_measures[id1] + occ2, size2 = additional_measures[id2] + dur1 = max(np.array(intent1) % winlen) + dur2 = max(np.array(intent2) % winlen) + intent2 = set(intent2) + + # Collecting all the possible distances between the windows + # of the two concepts + time_diff_all = np.array( + [w2 - w1 for w2 in extent2 for w1 in extent1]) + # sort time differences by ascending absolute value + time_diff_sorting = np.argsort(np.abs(time_diff_all)) + sorted_time_diff, sorted_time_diff_occ = np.unique( + time_diff_all[time_diff_sorting], + return_counts=True) + # only consider time differences that are smaller than winlen + # and that correspond to intersections that occur at least min_occ + # times + time_diff_mask = np.logical_and( + np.abs(sorted_time_diff) < winlen, + sorted_time_diff_occ >= min_occ) + # Rescaling the spike times to realign to real time + for time_diff in sorted_time_diff[time_diff_mask]: + intent1_new = [t_old - time_diff for t_old in intent1] + # from here on we will only need the intents as sets + intent1_new = set(intent1_new) + # if intent1 and intent2 are disjoint, skip this step + if len(intent1_new & intent2) == 0: continue - # Collecting all the possible distances between the windows - # of the two concepts - time_diff_all = np.array( - [w2 - w1 for w2 in winds2 for w1 in winds1]) - sorted_time_diff = np.unique( - time_diff_all[np.argsort(np.abs(time_diff_all))]) - # Rescaling the spike times to realign to real time - for time_diff in sorted_time_diff[ - np.abs(sorted_time_diff) < winlen]: - conc1_new = [ - t_old - time_diff for t_old in conc1] - # if conc1 is of conc2 are disjoint or they have both been - # already de-selected, skip the step - if set(conc1_new) == set( - conc2) and selected[id1] and selected[id2]: - selected[id2] = False - break - if not set(conc1_new) & set(conc2): - continue - if not selected[id1]: - continue - if not selected[id2]: - continue - if set(conc1_new).issuperset(conc2) and count2 \ - - count1 + h_subset_filtering < min_occ: - selected[id2] = False - break - if set(conc2).issuperset(conc1_new) and count1 \ - - count2 + h_subset_filtering < min_occ: - selected[id1] = False - break - if len(excluded) == 0: - break - # Test the case conc1 is a superset of conc2 - if set(conc1_new).issuperset(conc2): - supp_diff = count2 - count1 + h_subset_filtering - size1, size2 = len(conc1_new), len(conc2) - size_diff = size1 - size2 + k_superset_filtering - # 2d spectrum case - if len(excluded[0]) == 2: - # Determine whether the subset (conc2) - # should be rejected - # according to the test for excess occurrences - reject_sub = (size2, supp_diff) in excluded \ - or supp_diff < min_occ - # Determine whether the superset (conc1_new) should be - # rejected according to the test for excess items - reject_sup = (size_diff, count1) in excluded \ - or size_diff < min_spikes - # 3d spectrum case - if len(excluded[0]) == 3: - # Determine whether the subset (conc2) - # should be rejected - # according to the test for excess occurrences - len_sub = max( - np.abs(np.diff(np.array(conc2) % winlen))) - reject_sub = (size2, supp_diff, len_sub) in excluded \ - or supp_diff < min_occ - # Determine whether the superset (conc1_new) should be - # rejected according to the test for excess items - len_sup = max( - np.abs(np.diff(np.array(conc1_new) % winlen))) - reject_sup = (size_diff, count1, len_sup) in excluded \ - or size_diff < min_spikes - # Reject the superset and/or the subset accordingly: - if reject_sub and not reject_sup: - selected[id2] = False - break - if reject_sup and not reject_sub: - selected[id1] = False - break - if reject_sub and reject_sup: - if (size1 - l_covered_spikes) * \ - count1 >= (size2 - l_covered_spikes) * count2: - selected[id2] = False - break - selected[id1] = False - break - # if both sets are significant given the other, keep both - continue - elif set(conc2).issuperset(conc1_new): - supp_diff = count1 - count2 + h_subset_filtering - size1, size2 = len(conc1_new), len(conc2) - size_diff = size2 - size1 + k_superset_filtering - # 2d spectrum case - if len(excluded[0]) == 2: - # Determine whether the subset (conc2) should be - # rejected according to the test for excess occurrences - reject_sub = (size2, supp_diff) in excluded \ - or supp_diff < min_occ - # Determine whether the superset (conc1_new) should be - # rejected according to the test for excess items - reject_sup = (size_diff, count1) in excluded \ - or size_diff < min_spikes - # 3d spectrum case - elif len(excluded[0]) == 3: - # Determine whether the subset (conc2) should be - # rejected according to the test for excess occurrences - len_sub = max( - np.abs(np.diff(np.array(conc1) % winlen))) - reject_sub = (size2, supp_diff, len_sub) in excluded \ - or supp_diff < min_occ - # Determine whether the superset (conc1_new) should be - # rejected according to the test for excess items - len_sup = max( - np.abs(np.diff(np.array(conc2) % winlen))) - - reject_sup = (size_diff, count1, len_sup) in excluded \ - or size_diff < min_spikes - # Reject the superset and/or the subset accordingly: - if reject_sub and not reject_sup: - selected[id1] = False - break - if reject_sup and not reject_sub: - selected[id2] = False - break - if reject_sub and reject_sup: - if (size1 - l_covered_spikes) * \ - count1 >= (size2 - l_covered_spikes) * count2: - selected[id2] = False - break - selected[id1] = False - break - # if both sets are significant given the other, keep both - continue - else: - size1, size2 = len(conc1_new), len(conc2) - inter_size = len(set(conc1_new) & set(conc2)) - # 2d spectrum case - if len(excluded[0]) == 2: - reject_1 = (size1 - inter_size + k_superset_filtering, - count1) in excluded - reject_1 = reject_1 or \ - (size1 - inter_size + k_superset_filtering)\ - < min_spikes - reject_2 = (size2 - inter_size + k_superset_filtering, - count2) in excluded - reject_2 = reject_2 or \ - (size1 - inter_size + k_superset_filtering) \ - < min_spikes - # 3d spectrum case - if len(excluded[0]) == 3: - len_1 = max( - np.abs(np.diff(np.array(conc1_new) % winlen))) - len_2 = max( - np.abs(np.diff(np.array(conc2) % winlen))) - reject_1 = (size1 - inter_size + k_superset_filtering, - count1, len_1) in excluded - reject_1 = reject_1 or \ - (size1 - inter_size + k_superset_filtering) \ - < min_spikes - reject_2 = (size2 - inter_size + k_superset_filtering, - count2, len_2) in excluded - reject_2 = reject_2 or \ - (size1 - inter_size + k_superset_filtering) \ - < min_spikes - - # Reject accordingly: - if reject_2 and not reject_1: - selected[id2] = False - break - if reject_1 and not reject_2: - selected[id1] = False - break - if reject_1 and reject_2: - if (size1 - l_covered_spikes) * \ - count1 >= (size2 - l_covered_spikes) * count2: - selected[id2] = False - break - selected[id1] = False - break - # if both sets are significant given the other, keep both - continue + # Test the case intent1 is a superset of intent2 + if intent1_new.issuperset(intent2): + reject1, reject2 = _perform_combined_filtering( + occ_superset=occ1, + size_superset=size1, + dur_superset=dur1, + occ_subset=occ2, + size_subset=size2, + dur_subset=dur2, + spectrum=spectrum, + ns_signatures=ns_signatures, + h_subset_filtering=h_subset_filtering, + k_superset_filtering=k_superset_filtering, + l_covered_spikes=l_covered_spikes, + min_spikes=min_spikes, + min_occ=min_occ) + + elif intent2.issuperset(intent1_new): + reject2, reject1 = _perform_combined_filtering( + occ_superset=occ2, + size_superset=size2, + dur_superset=dur2, + occ_subset=occ1, + size_subset=size1, + dur_subset=dur1, + spectrum=spectrum, + ns_signatures=ns_signatures, + h_subset_filtering=h_subset_filtering, + k_superset_filtering=k_superset_filtering, + l_covered_spikes=l_covered_spikes, + min_spikes=min_spikes, + min_occ=min_occ) + + else: + # none of the intents is a superset of the other one + # we compare both concepts to the intersection + # if one of them is not significant given the + # intersection, it is rejected + inter_size = len(intent1_new & intent2) + reject1 = _superset_filter( + occ_superset=occ1, + size_superset=size1, + dur_superset=dur1, + size_subset=inter_size, + spectrum=spectrum, + ns_signatures=ns_signatures, + k_superset_filtering=k_superset_filtering, + min_spikes=min_spikes) + reject2 = _superset_filter( + occ_superset=occ2, + size_superset=size2, + dur_superset=dur2, + size_subset=inter_size, + spectrum=spectrum, + ns_signatures=ns_signatures, + k_superset_filtering=k_superset_filtering, + min_spikes=min_spikes) + # Reject accordingly: + if reject1 and reject2: + reject1, reject2 = _covered_spikes_criterion( + occ_superset=occ1, + size_superset=size1, + occ_subset=occ2, + size_subset=size2, + l_covered_spikes=l_covered_spikes) + + selected[id1] &= not reject1 + selected[id2] &= not reject2 + + # skip remaining time-shifts if both concepts have been rejected + if (not selected[id1]) and (not selected[id2]): + break # Return the selected concepts return [p for i, p in enumerate(concepts) if selected[i]] -def concept_output_to_patterns(concepts, winlen, binsize, pvalue_spectrum=None, - spectrum=None, t_start=0 * pq.ms): +def _perform_combined_filtering(occ_superset, + size_superset, + dur_superset, + occ_subset, + size_subset, + dur_subset, + spectrum, + ns_signatures, + h_subset_filtering, + k_superset_filtering, + l_covered_spikes, + min_spikes, + min_occ): + """ + perform combined filtering + (see pattern_set_reduction) + """ + reject_subset = _subset_filter( + occ_superset=occ_superset, + occ_subset=occ_subset, + size_subset=size_subset, + dur_subset=dur_subset, + spectrum=spectrum, + ns_signatures=ns_signatures, + h_subset_filtering=h_subset_filtering, + min_occ=min_occ) + reject_superset = _superset_filter( + occ_superset=occ_superset, + size_superset=size_superset, + dur_superset=dur_superset, + size_subset=size_subset, + spectrum=spectrum, + ns_signatures=ns_signatures, + k_superset_filtering=k_superset_filtering, + min_spikes=min_spikes) + # Reject the superset and/or the subset accordingly: + if reject_superset and reject_subset: + reject_superset, reject_subset = _covered_spikes_criterion( + occ_superset=occ_superset, + size_superset=size_superset, + occ_subset=occ_subset, + size_subset=size_subset, + l_covered_spikes=l_covered_spikes) + return reject_superset, reject_subset + + +def _subset_filter(occ_superset, occ_subset, size_subset, dur_subset, spectrum, + ns_signatures=None, h_subset_filtering=0, min_occ=2): + """ + perform subset filtering + (see pattern_set_reduction) + """ + if ns_signatures is None: + ns_signatures = [] + occ_diff = occ_subset - occ_superset + h_subset_filtering + if spectrum == '#': + signature_to_test = (size_subset, occ_diff) + else: # spectrum == '3d#': + signature_to_test = (size_subset, occ_diff, dur_subset) + reject_subset = occ_diff < min_occ or signature_to_test in ns_signatures + return reject_subset + + +def _superset_filter(occ_superset, size_superset, dur_superset, size_subset, + spectrum, ns_signatures=None, k_superset_filtering=0, + min_spikes=2): + """ + perform superset filtering + (see pattern_set_reduction) + """ + if ns_signatures is None: + ns_signatures = [] + size_diff = size_superset - size_subset + k_superset_filtering + if spectrum == '#': + signature_to_test = (size_diff, occ_superset) + else: # spectrum == '3d#': + signature_to_test = (size_diff, occ_superset, dur_superset) + reject_superset = \ + size_diff < min_spikes or signature_to_test in ns_signatures + return reject_superset + + +def _covered_spikes_criterion(occ_superset, + size_superset, + occ_subset, + size_subset, + l_covered_spikes): + """ + evaluate covered spikes criterion + (see pattern_set_reduction) + """ + reject_superset = True + reject_subset = True + score_superset = (size_superset - l_covered_spikes) * occ_superset + score_subset = (size_subset - l_covered_spikes) * occ_subset + if score_superset >= score_subset: + reject_superset = False + else: + reject_subset = False + return reject_superset, reject_subset + + +def concept_output_to_patterns(concepts, winlen, binsize, pv_spec=None, + spectrum='#', t_start=0 * pq.ms): """ Construction of dictionaries containing all the information about a pattern starting from a list of concepts and its associated pvalue_spectrum. @@ -1833,117 +2141,102 @@ def concept_output_to_patterns(concepts, winlen, binsize, pvalue_spectrum=None, Parameters ---------- concepts: tuple - Each element of the tuple corresponds to a pattern and it is itself a - tuple consisting of: - ((spikes in the pattern), (occurrences of the patterns)) + Each element of the tuple corresponds to a pattern which it turn is a + tuple of (spikes in the pattern, occurrences of the patterns) winlen: int - Length (in bins) of the sliding window used for the analysis - binsize: Quantity - The time precision used to discretize the data (binning). - pvalue_spectrum: None or tuple + Length (in bins) of the sliding window used for the analysis. + binsize: pq.Quantity + The time precision used to discretize the `spiketrains` (binning). + pv_spec: None or tuple Contains a tuple of signatures and the corresponding p-value. If equal - to None all pvalues are set to -1 - spectrum: None or str - The signature of the given concepts, it can assume values: - None: the signature is determined from the pvalue_spectrum + to None all p-values are set to -1. + spectrum: {'#', '3d#'} '#': pattern spectrum using the as signature the pair: (number of spikes, number of occurrences) '3d#': pattern spectrum using the as signature the triplets: (number of spikes, number of occurrence, difference between last and first spike of the pattern) - Default: None - t_start: Quantity + Default: '#' + t_start: pq.Quantity t_start of the analyzed spike trains Returns - -------- - output: list + ------- + output : list List of dictionaries. Each dictionary corresponds to a pattern and has the following entries: - ['itemset'] list of the spikes in the pattern - expressed in the form of itemset, each spike is encoded by: - spiketrain_id * winlen + bin_id - [windows_ids'] the ids of the windows in which the pattern occurred - in discretized time (given byt the binning) - ['neurons'] array containing the idx of the neurons of the pattern - ['lags'] array containing the lags (integers corresponding to the + 'itemset': + A list of the spikes in the pattern, expressed in theform of + itemset, each spike is encoded by + `spiketrain_id * winlen + bin_id`. + 'windows_ids': + The ids of the windows in which the pattern occurred + in discretized time (given byt the binning). + 'neurons': + An array containing the idx of the neurons of the pattern. + 'lags': + An array containing the lags (integers corresponding to the number of bins) between the spikes of the patterns. The first lag is always assumed to be 0 and corresponds to the first spike. - ['times'] array containing the times (integers corresponding to the + 'times': + An array containing the times (integers corresponding to the bin idx) of the occurrences of the patterns. - ['signature'] tuple containing two integers - (number of spikes of the patterns, - number of occurrences of the pattern) - ['pvalue'] the pvalue corresponding to the pattern. If n_surr==0 - then all pvalues are set to -1. + 'signature': + A tuple containing two integers (number of spikes of the + patterns, number of occurrences of the pattern). + 'pvalue': + The p-value corresponding to the pattern. If `n_surr==0`, + all p-values are set to -1. """ - if pvalue_spectrum is None: - if spectrum is None: - spectrum = '#' - else: - if spectrum is None: - if len(pvalue_spectrum) == 0: - spectrum = '#' - elif len(pvalue_spectrum[0]) == 4: - spectrum = '3d#' - elif len(pvalue_spectrum[0]) == 3: - spectrum = '#' - pvalue_dict = {} + if pv_spec is not None: + pvalue_dict = defaultdict(float) # Creating a dictionary for the pvalue spectrum - for entry in pvalue_spectrum: - if len(entry) == 4: + for entry in pv_spec: + if spectrum == '3d#': pvalue_dict[(entry[0], entry[1], entry[2])] = entry[-1] - if len(entry) == 3: + if spectrum == '#': pvalue_dict[(entry[0], entry[1])] = entry[-1] # Initializing list containing all the patterns + t_start = t_start.rescale(binsize.units) output = [] - for conc in concepts: + for concept in concepts: + itemset, window_ids = concept[:2] # Vocabulary for each of the patterns, containing: # - The pattern expressed in form of Itemset, each spike in the pattern # is represented as spiketrain_id * winlen + bin_id # - The ids of the windows in which the pattern occurred in discretized - # time (binning) - output_dict = {'itemset': conc[0], 'windows_ids': conc[1]} + # time (clipping) + output_dict = {'itemset': itemset, 'windows_ids': window_ids} # Bins relative to the sliding window in which the spikes of patt fall - bin_ids_unsort = np.array(conc[0]) % winlen + itemset = np.array(itemset) + bin_ids_unsort = itemset % winlen order_bin_ids = np.argsort(bin_ids_unsort) bin_ids = bin_ids_unsort[order_bin_ids] # id of the neurons forming the pattern - output_dict['neurons'] = list(np.array( - conc[0])[order_bin_ids] // winlen) + output_dict['neurons'] = list(itemset[order_bin_ids] // winlen) # Lags (in binsizes units) of the pattern - output_dict['lags'] = (bin_ids - bin_ids[0])[1:] * binsize + output_dict['lags'] = bin_ids[1:] * binsize # Times (in binsize units) in which the pattern occurs - output_dict['times'] = sorted(conc[1]) * binsize + bin_ids[0] * \ - binsize + t_start + output_dict['times'] = sorted(window_ids) * binsize + t_start + + # pattern dictionary appended to the output + if spectrum == '#': + # Signature (size, n occ) of the pattern + signature = (len(itemset), len(window_ids)) + else: # spectrum == '3d#': + # Signature (size, n occ, duration) of the pattern + # duration is position of the last bin + signature = (len(itemset), len(window_ids), bin_ids[-1]) + + output_dict['signature'] = signature # If None is given in input to the pval spectrum the pvalue # is set to -1 (pvalue spectrum not available) - # pattern dictionary appended to the output - if pvalue_spectrum is None: + if pv_spec is None: output_dict['pvalue'] = -1 - # Signature (size, n occ) of the pattern - elif spectrum == '3d#': - # The duration is effectively the delay between the last neuron and - # the first one, measured in bins. - # Since we only allow the first spike - # to be in the first bin (see concepts_mining, _build_context and - # _fpgrowth), it is the the position of the latest spike. - duration = bin_ids[-1] - sgnt = (len(conc[0]), len(conc[1]), duration) - output_dict['signature'] = sgnt - # p-value assigned to the pattern from the pvalue spectrum - try: - output_dict['pvalue'] = pvalue_dict[sgnt] - except KeyError: - output_dict['pvalue'] = 0.0 - elif spectrum == '#': - sgnt = (len(conc[0]), len(conc[1])) - output_dict['signature'] = sgnt + else: # p-value assigned to the pattern from the pvalue spectrum - try: - output_dict['pvalue'] = pvalue_dict[sgnt] - except KeyError: - output_dict['pvalue'] = 0.0 + output_dict['pvalue'] = pvalue_dict[signature] + output.append(output_dict) return output diff --git a/elephant/spike_train_surrogates.py b/elephant/spike_train_surrogates.py index a8e19dc71..55310590f 100644 --- a/elephant/spike_train_surrogates.py +++ b/elephant/spike_train_surrogates.py @@ -52,6 +52,10 @@ except ImportError: from .statistics import isi # Convenience when in elephant working dir. +# List of all available surrogate methods +SURR_METHODS = ['dither_spike_train', 'dither_spikes', 'jitter_spikes', + 'randomise_spikes', 'shuffle_isis', 'joint_isi_dithering'] + def dither_spikes(spiketrain, dither, n=1, decimals=None, edges=True): """ @@ -394,7 +398,7 @@ def jitter_spikes(spiketrain, binsize, n=1): Size of the time bins within which to randomise the spike times. Note: the last bin arrives until `spiketrain.t_stop` and might have width different from `binsize`. - n : int (optional) + n : int, optional Number of surrogates to be generated. Default: 1 @@ -967,13 +971,17 @@ def surrogates( 'joint_isi_dithering': None} if surr_method not in surrogate_types.keys(): - raise ValueError('specified surr_method (=%s) not valid' % surr_method) + raise AttributeError( + 'specified surr_method (=%s) not valid' % surr_method) - if surr_method in ['dither_spike_train', 'dither_spikes', 'jitter_spikes']: + if surr_method in ['dither_spike_train', 'dither_spikes']: return surrogate_types[surr_method]( spiketrain, dt, n=n, decimals=decimals, edges=edges) elif surr_method in ['randomise_spikes', 'shuffle_isis']: return surrogate_types[surr_method]( spiketrain, n=n, decimals=decimals) + elif surr_method == 'jitter_spikes': + return surrogate_types[surr_method]( + spiketrain, dt, n=n) elif surr_method == 'joint_isi_dithering': - return JointISI(spiketrain).dithering(n) + return JointISI(spiketrain).dithering(n) \ No newline at end of file diff --git a/elephant/test/test_spade.py b/elephant/test/test_spade.py index 6e4017e6a..a1451f613 100644 --- a/elephant/test/test_spade.py +++ b/elephant/test/test_spade.py @@ -17,10 +17,17 @@ import elephant.conversion as conv import elephant.spade as spade import elephant.spike_train_generation as stg +import elephant.spike_train_surrogates as surr from elephant.spade import HAVE_FIM python_version_major = sys.version_info.major +try: + import statsmodels + HAVE_STATSMODELS = True +except ImportError: + HAVE_STATSMODELS = False + class SpadeTestCase(unittest.TestCase): def setUp(self): @@ -113,10 +120,12 @@ def setUp(self): @unittest.skipUnless(HAVE_FIM, "Time consuming with pythonic FIM") def test_spade_cpp(self): output_cpp = spade.spade(self.cpp, self.binsize, 1, - n_subsets=self.n_subset, - stability_thresh=self.stability_thresh, + approx_stab_pars=dict( + n_subsets=self.n_subset, + stability_thresh=self.stability_thresh), n_surr=self.n_surr, alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] elements_cpp = [] lags_cpp = [] @@ -145,10 +154,12 @@ def test_spade_spectrum_cpp(self): # Testing with multiple patterns input def test_spade_msip(self): output_msip = spade.spade(self.msip, self.binsize, self.winlen, - n_subsets=self.n_subset, - stability_thresh=self.stability_thresh, + approx_stab_pars=dict( + n_subsets=self.n_subset, + stability_thresh=self.stability_thresh), n_surr=self.n_surr, alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] elements_msip = [] occ_msip = [] @@ -181,10 +192,11 @@ def test_parameters(self): self.binsize, self.winlen, min_spikes=self.min_spikes, - n_subsets=self.n_subset, + approx_stab_pars=dict(n_subsets=self.n_subset), n_surr=0, alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] # collecting spade output elements_msip_min_spikes = [] @@ -211,9 +223,11 @@ def test_parameters(self): # test min_occ parameter output_msip_min_occ = spade.spade(self.msip, self.binsize, self.winlen, min_occ=self.min_occ, - n_subsets=self.n_subset, + approx_stab_pars=dict( + n_subsets=self.n_subset), n_surr=self.n_surr, alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] # collect spade output occ_msip_min_occ = [] @@ -230,10 +244,12 @@ def test_parameters(self): self.binsize, self.winlen, max_spikes=self.max_spikes, - n_subsets=self.n_subset, + approx_stab_pars=dict( + n_subsets=self.n_subset), n_surr=self.n_surr, alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] # collecting spade output elements_msip_max_spikes = [] @@ -254,9 +270,11 @@ def test_parameters(self): # test max_occ parameter output_msip_max_occ = spade.spade(self.msip, self.binsize, self.winlen, max_occ=self.max_occ, - n_subsets=self.n_subset, + approx_stab_pars=dict( + n_subsets=self.n_subset), n_surr=self.n_surr, alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] # collect spade output occ_msip_max_occ = [] @@ -293,10 +311,12 @@ def test_fpgrowth_fca(self): # Testing with multiple patterns input def test_spade_msip_3d(self): output_msip = spade.spade(self.msip, self.binsize, self.winlen, - n_subsets=self.n_subset, - stability_thresh=self.stability_thresh, + approx_stab_pars=dict( + n_subsets=self.n_subset, + stability_thresh=self.stability_thresh), n_surr=self.n_surr, spectrum='3d#', alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] elements_msip = [] occ_msip = [] @@ -324,11 +344,13 @@ def test_parameters_3d(self): self.binsize, self.winlen, min_spikes=self.min_spikes, - n_subsets=self.n_subset, + approx_stab_pars=dict( + n_subsets=self.n_subset), n_surr=self.n_surr, spectrum='3d#', alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] # collecting spade output elements_msip_min_spikes = [] @@ -352,10 +374,13 @@ def test_parameters_3d(self): # test min_occ parameter output_msip_min_occ = spade.spade(self.msip, self.binsize, self.winlen, min_occ=self.min_occ, - n_subsets=self.n_subset, - n_surr=self.n_surr, spectrum='3d#', + approx_stab_pars=dict( + n_subsets=self.n_subset), + n_surr=self.n_surr, + spectrum='3d#', alpha=self.alpha, psr_param=self.psr_param, + stat_corr='no', output_format='patterns')['patterns'] # collect spade output occ_msip_min_occ = [] @@ -381,54 +406,84 @@ def test_spectrum(self): def test_spade_raise_error(self): # Test list not using neo.Spiketrain self.assertRaises(TypeError, spade.spade, [ - [1, 2, 3], [3, 4, 5]], 1 * pq.ms, 4) + [1, 2, 3], [3, 4, 5]], 1 * pq.ms, 4, stat_corr='no') # Test neo.Spiketrain with different t_stop - self.assertRaises(AttributeError, spade.spade, [neo.SpikeTrain( + self.assertRaises(ValueError, spade.spade, [neo.SpikeTrain( [1, 2, 3] * pq.s, t_stop=5 * pq.s), neo.SpikeTrain( - [3, 4, 5] * pq.s, t_stop=6 * pq.s)], 1 * pq.ms, 4) + [3, 4, 5] * pq.s, t_stop=6 * pq.s)], 1 * pq.ms, 4, + stat_corr='no') # Test wrong spectrum parameter self.assertRaises(ValueError, spade.spade, [neo.SpikeTrain( [1, 2, 3] * pq.s, t_stop=6 * pq.s), neo.SpikeTrain( [3, 4, 5] * pq.s, t_stop=6 * pq.s)], 1 * pq.ms, 4, n_surr=1, - spectrum='try') + stat_corr='no', spectrum='try') # Test negative minimum number of spikes - self.assertRaises(AttributeError, spade.spade, [neo.SpikeTrain( + self.assertRaises(ValueError, spade.spade, [neo.SpikeTrain( [1, 2, 3] * pq.s, t_stop=5 * pq.s), neo.SpikeTrain( - [3, 4, 5] * pq.s, t_stop=5 * pq.s)], 1 * pq.ms, 4, min_neu=-3) + [3, 4, 5] * pq.s, t_stop=5 * pq.s)], 1 * pq.ms, 4, min_neu=-3, + stat_corr='no') + # Test wrong dither method + self.assertRaises(ValueError, spade.spade, [neo.SpikeTrain( + [1, 2, 3] * pq.s, t_stop=5 * pq.s), neo.SpikeTrain( + [3, 4, 5] * pq.s, t_stop=5 * pq.s)], 1 * pq.ms, 4, + surr_method='try', stat_corr='no') # Test negative number of surrogates - self.assertRaises(AttributeError, spade.pvalue_spectrum, [ + self.assertRaises(ValueError, spade.pvalue_spectrum, [ neo.SpikeTrain([1, 2, 3] * pq.s, t_stop=5 * pq.s), neo.SpikeTrain( [3, 4, 5] * pq.s, t_stop=5 * pq.s)], 1 * pq.ms, 4, 3 * pq.ms, n_surr=-3) # Test wrong correction parameter - self.assertRaises(AttributeError, spade.test_signature_significance, ( - (2, 3, 0.2), (2, 4, 0.1)), 0.01, corr='try') + self.assertRaises(ValueError, spade.test_signature_significance, + pv_spec=((2, 3, 0.2), (2, 4, 0.1)), + concepts=([[(2, 3), (1, 2, 3)]]), + alpha=0.01, + winlen=1, + corr='invalid_correction_key') # Test negative number of subset for stability - self.assertRaises(AttributeError, spade.approximate_stability, (), + self.assertRaises(ValueError, spade.approximate_stability, (), np.array([]), n_subsets=-3) def test_pattern_set_reduction(self): - output_msip = spade.spade(self.patt_psr, self.binsize, self.winlen, - n_subsets=self.n_subset, - stability_thresh=self.stability_thresh, - n_surr=self.n_surr, spectrum='3d#', - alpha=self.alpha, psr_param=self.psr_param, - output_format='patterns')['patterns'] - elements_msip = [] - occ_msip = [] - lags_msip = [] - # collecting spade output - for out in output_msip: - elements_msip.append(sorted(out['neurons'])) - occ_msip.append(list(out['times'].magnitude)) - lags_msip.append(list(out['lags'].magnitude)) - elements_msip = sorted(elements_msip, key=lambda d: len(d)) - occ_msip = sorted(occ_msip, key=lambda d: len(d)) - lags_msip = sorted(lags_msip, key=lambda d: len(d)) - # check neurons in the patterns - assert_array_equal(elements_msip, [range(len(self.lags3) + 1)]) - # check the occurrences time of the patters - assert_array_equal(len(occ_msip[0]), self.n_occ3) + surr_methods = surr.SURR_METHODS + for surr_method in surr_methods: + output_msip = spade.spade( + self.patt_psr, self.binsize, self.winlen, + approx_stab_pars=dict( + n_subsets=self.n_subset, + stability_thresh=self.stability_thresh), + n_surr=self.n_surr, spectrum='3d#', + alpha=self.alpha, + psr_param=self.psr_param, + output_format='patterns', + stat_corr='no', + surr_method=surr_method)['patterns'] + elements_msip = [] + occ_msip = [] + lags_msip = [] + # collecting spade output + for out in output_msip: + elements_msip.append(sorted(out['neurons'])) + occ_msip.append(list(out['times'].magnitude)) + lags_msip.append(list(out['lags'].magnitude)) + elements_msip = sorted(elements_msip, key=len) + occ_msip = sorted(occ_msip, key=len) + # check neurons in the patterns + assert_array_equal(elements_msip, [range(len(self.lags3) + 1)]) + # check the occurrences time of the patters + assert_array_equal(len(occ_msip[0]), self.n_occ3) + + @unittest.skipUnless(HAVE_STATSMODELS, + "'fdr_bh' stat corr requires statsmodels") + def test_signature_significance_fdr_bh_corr(self): + """ + A typical corr='fdr_bh' scenario, that requires statsmodels. + """ + sig_spectrum = spade.test_signature_significance( + pv_spec=((2, 3, 0.2), (2, 4, 0.05)), + concepts=([[(2, 3), (1, 2, 3)], + [(2, 4), (1, 2, 3, 4)]]), + alpha=0.15, winlen=1, corr='fdr_bh') + self.assertEqual(sig_spectrum, [(2., 3., False), (2., 4., True)]) def suite(): diff --git a/elephant/test/test_spike_train_surrogates.py b/elephant/test/test_spike_train_surrogates.py index 7cf436a89..4558af6c8 100644 --- a/elephant/test/test_spike_train_surrogates.py +++ b/elephant/test/test_spike_train_surrogates.py @@ -289,7 +289,7 @@ def test_surr_method(self): surrs = surr.surrogates(st, dt=3 * pq.ms, n=nr_surr, surr_method='shuffle_isis', edges=False) - self.assertRaises(ValueError, surr.surrogates, st, n=1, + self.assertRaises(AttributeError, surr.surrogates, st, n=1, surr_method='spike_shifting', dt=None, decimals=None, edges=True) self.assertTrue(len(surrs) == nr_surr) diff --git a/requirements-extras.txt b/requirements-extras.txt index 029f47711..2834b0f19 100644 --- a/requirements-extras.txt +++ b/requirements-extras.txt @@ -1,3 +1,4 @@ pandas>=0.18.0 scikit-learn>=0.19.0 mpi4py>=3.0.1 +statsmodels>=0.8.0