From 99888cc643b4530f733beafd76ff1dfb9e2798c4 Mon Sep 17 00:00:00 2001 From: Ryan Harvey Date: Thu, 17 Oct 2024 10:11:41 -0400 Subject: [PATCH] docs, typing, stats module --- neuro_py/stats/circ_stats.py | 384 ++++++++++++++++++++-------- neuro_py/stats/regression.py | 173 ++++++++----- neuro_py/stats/stats.py | 111 ++++---- neuro_py/stats/system_identifier.py | 80 +++++- 4 files changed, 527 insertions(+), 221 deletions(-) diff --git a/neuro_py/stats/circ_stats.py b/neuro_py/stats/circ_stats.py index fd86b4a..7032404 100644 --- a/neuro_py/stats/circ_stats.py +++ b/neuro_py/stats/circ_stats.py @@ -1,3 +1,4 @@ +from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union import warnings from collections import namedtuple @@ -10,13 +11,30 @@ CI = namedtuple("confidence_interval", ["lower", "upper"]) -def nd_bootstrap(data, iterations, axis=None, strip_tuple_if_one=True): +def nd_bootstrap( + data: Iterable[np.ndarray], + iterations: int, + axis: Union[int, None] = None, + strip_tuple_if_one: bool = True +) -> Generator[Union[np.ndarray, Tuple[np.ndarray, ...]], None, None]: """ Bootstrap iterator for several n-dimensional data arrays. - :param data: Iterable containing the data arrays - :param iterations: Number of bootstrap iterations. - :param axis: Bootstrapping is performed along this axis. + Parameters + ---------- + data : Iterable[np.ndarray] + Iterable containing the data arrays. + iterations : int + Number of bootstrap iterations. + axis : Union[int, None], optional + Bootstrapping is performed along this axis. If None, the data is flattened. + strip_tuple_if_one : bool, optional + If True, return a single array without tuple if only one data array is provided. + + Yields + ------ + Tuple[np.ndarray, ...] + Bootstrapped data arrays for each iteration. """ shape0 = data[0].shape if axis is None: @@ -47,12 +65,22 @@ def nd_bootstrap(data, iterations, axis=None, strip_tuple_if_one=True): ) -def mod2pi(f): +def mod2pi(f: Callable) -> Callable: """ Decorator to apply modulo 2*pi on the output of the function. The decorated function must either return a tuple of numpy.ndarrays or a numpy.ndarray itself. + + Parameters + ---------- + f : Callable + The function to be decorated. + + Returns + ------- + Callable + A wrapper function that applies modulo 2*pi on the output. """ def wrapper(f, *args, **kwargs): @@ -83,10 +111,13 @@ class bootstrap: The argument scale determines whether the percentile is taken on a circular scale or on a linear scale. - :param no_bootstrap: the number of arguments that are bootstrapped - (e.g. for correlation it would be two, for median it - would be one) - :param scale: linear or ciruclar scale (default is 'linear') + Parameters + ---------- + no_bootstrap : int + The number of arguments that are bootstrapped + (e.g., for correlation it would be two, for median it would be one). + scale : str, optional + Linear or circular scale (default is 'linear'). """ def __init__(self, no_bootstrap, scale="linear"): @@ -165,19 +196,28 @@ def wrapper(f, *args, **kwargs): @bootstrap(1, "circular") def percentile(alpha, q, q0, axis=None, ci=None, bootstrap_iter=None): """ - Computes circular percentiles - - :param alpha: array with circular samples - :param q: percentiles in [0,100] (single number or iterable) - :param q0: value of the 0 percentile - :param axis: percentiles will be computed along this axis. - If None percentiles will be computed over the entire array - :param ci: if not None, confidence level is bootstrapped - :param bootstrap_iter: number of bootstrap iterations - (number of samples if None) - - :return: percentiles - + Computes circular percentiles. + + Parameters + ---------- + alpha : np.ndarray + Array with circular samples. + q : float or iterable of float + Percentiles in [0, 100] (single number or iterable). + q0 : float + Value of the 0 percentile. + axis : int, optional + Percentiles will be computed along this axis. + If None, percentiles will be computed over the entire array. + ci : float, optional + If not None, confidence level is bootstrapped. + bootstrap_iter : int, optional + Number of bootstrap iterations (number of samples if None). + + Returns + ------- + np.ndarray + Computed percentiles. """ if axis is None: alpha = (alpha.ravel() - q0) % (2 * np.pi) @@ -209,7 +249,26 @@ def percentile(alpha, q, q0, axis=None, ci=None, bootstrap_iter=None): return np.asarray(ret) -def _complex_mean(alpha, w=None, axis=None, axial_correction=1): +def _complex_mean(alpha: np.ndarray, w: Optional[np.ndarray] = None, axis: Optional[int] = None, axial_correction: float = 1) -> np.ndarray: + """ + Compute the weighted mean of complex values. + + Parameters + ---------- + alpha : np.ndarray + Array of angles (in radians) representing complex values. + w : np.ndarray, optional + Array of weights corresponding to the alpha values. If None, uniform weights are used. + axis : int, optional + Axis along which the mean is computed. If None, the mean is computed over the entire array. + axial_correction : float, optional + Correction factor for the angles (default is 1). + + Returns + ------- + np.ndarray + Weighted mean of the complex values. + """ if w is None: w = np.ones_like(alpha) alpha = np.asarray(alpha) @@ -229,28 +288,47 @@ def _complex_mean(alpha, w=None, axis=None, axial_correction=1): @bootstrap(1, "linear") def resultant_vector_length( - alpha, w=None, d=None, axis=None, axial_correction=1, ci=None, bootstrap_iter=None -): + alpha: np.ndarray, + w: Optional[np.ndarray] = None, + d: Optional[float] = None, + axis: Optional[int] = None, + axial_correction: int = 1, + ci: Optional[float] = None, + bootstrap_iter: Optional[int] = None +) -> float: """ - Computes mean resultant vector length for circular data. + Computes the mean resultant vector length for circular data. This statistic is sometimes also called vector strength. - :param alpha: sample of angles in radians - :param w: number of incidences in case of binned angle data - :param ci: ci-confidence limits are computed via bootstrapping, - default None. - :param d: spacing of bin centers for binned data, if supplied - correction factor is used to correct for bias in - estimation of r, in radians (!) - :param axis: compute along this dimension, default is None - (across all dimensions) - :param axial_correction: axial correction (2,3,4,...), default is 1 - :param bootstrap_iter: number of bootstrap iterations - (number of samples if None) - :return: mean resultant length - - References: [Fisher1995]_, [Jammalamadaka2001]_, [Zar2009]_ + Parameters + ---------- + alpha : np.ndarray + Sample of angles in radians. + w : np.ndarray, optional + Number of incidences in case of binned angle data. + ci : float, optional + Confidence limits computed via bootstrapping. Default is None. + d : float, optional + Spacing of bin centers for binned data. If supplied, + correction factor is used to correct for bias in + estimation of r, in radians. + axis : int, optional + Dimension along which to compute the result. Default is None + (across all dimensions). + axial_correction : int, optional + Axial correction factor (2, 3, 4,...). Default is 1. + bootstrap_iter : int, optional + Number of bootstrap iterations (number of samples if None). + + Returns + ------- + float + Mean resultant vector length. + + References + ---------- + [Fisher1995]_, [Jammalamadaka2001]_, [Zar2009]_ """ if axis is None: axis = 0 @@ -276,23 +354,41 @@ def resultant_vector_length( vector_strength = resultant_vector_length -def mean_ci_limits(alpha, ci=0.95, w=None, d=None, axis=None): +def mean_ci_limits( + alpha: np.ndarray, + ci: float = 0.95, + w: Optional[np.ndarray] = None, + d: Optional[float] = None, + axis: Optional[int] = None +) -> np.ndarray: """ Computes the confidence limits on the mean for circular data. - :param alpha: sample of angles in radians - :param ci: ci-confidence limits are computed, default 0.95 - :param w: number of incidences in case of binned angle data - :param d: spacing of bin centers for binned data, if supplied - correction factor is used to correct for bias in - estimation of r, in radians (!) - :param axis: compute along this dimension, default is None - (across all dimensions) - - :return: confidence limit width d; mean +- d yields upper/lower - (1-xi)% confidence limit - - References: [Fisher1995]_, [Jammalamadaka2001]_, [Zar2009]_ + Parameters + ---------- + alpha : np.ndarray + Sample of angles in radians. + ci : float, optional + Confidence interval limits are computed. Default is 0.95. + w : np.ndarray, optional + Number of incidences in case of binned angle data. + d : float, optional + Spacing of bin centers for binned data. If supplied, + correction factor is used to correct for bias in + estimation of r, in radians. + axis : int, optional + Dimension along which to compute the result. Default is None + (across all dimensions). + + Returns + ------- + np.ndarray + Confidence limit width d; mean ± d yields upper/lower + (1 - xi)% confidence limit. + + References + ---------- + [Fisher1995]_, [Jammalamadaka2001]_, [Zar2009]_ """ if w is None: @@ -328,28 +424,47 @@ def mean_ci_limits(alpha, ci=0.95, w=None, d=None, axis=None): @mod2pi -def mean(alpha, w=None, ci=None, d=None, axis=None, axial_correction=1): +def mean( + alpha: np.ndarray, + w: Optional[np.ndarray] = None, + ci: Optional[float] = None, + d: Optional[float] = None, + axis: Optional[int] = None, + axial_correction: int = 1 +) -> Union[float, Tuple[float, CI]]: """ Compute mean direction of circular data. - :param alpha: circular data - :param w: weightings in case of binned angle data - :param ci: if not None, the upper and lower 100*ci% confidence - interval is returned as well - :param d: spacing of bin centers for binned data, if supplied - correction factor is used to correct for bias in - estimation of r, in radians (!) - :param axis: compute along this dimension, default is None - (across all dimensions) - :param axial_correction: axial correction (2,3,4,...), default is 1 - :return: circular mean if ci=None, or circular mean as well as lower and - upper confidence interval limits - - Example + Parameters + ---------- + alpha : np.ndarray + Circular data. + w : np.ndarray, optional + Weightings in case of binned angle data. + ci : float, optional + If not None, the upper and lower 100*ci% confidence + interval is returned as well. + d : float, optional + Spacing of bin centers for binned data. If supplied, + correction factor is used to correct for bias in + estimation of r, in radians. + axis : int, optional + Compute along this dimension. Default is None + (across all dimensions). + axial_correction : int, optional + Axial correction (2,3,4,...). Default is 1. + + Returns + ------- + float or Tuple[float, CI] + Circular mean if ci is None, or circular mean as well as lower and + upper confidence interval limits. + + Examples + -------- >>> import numpy as np - >>> data = 2*np.pi*np.random.rand(10) + >>> data = 2 * np.pi * np.random.rand(10) >>> mu, (ci_l, ci_u) = mean(data, ci=0.95) - """ cmean = _complex_mean(alpha, w=w, axis=axis, axial_correction=axial_correction) @@ -366,16 +481,22 @@ def mean(alpha, w=None, ci=None, d=None, axis=None, axial_correction=1): @mod2pi -def center(*args, **kwargs): +def center(*args: np.ndarray, **kwargs: Optional[dict]) -> Tuple[np.ndarray, ...]: """ Centers the data on its circular mean. Each non-keyword argument is another data array that is centered. - :param axis: the mean is computed along this dimension (default axis=None). - **Must be used as a keyword argument!** - :return: tuple of centered data arrays + Parameters + ---------- + axis : int, optional + The mean is computed along this dimension (default is None). + **Must be used as a keyword argument!** + Returns + ------- + tuple of np.ndarray + Tuple of centered data arrays. """ axis = kwargs.pop("axis", None) @@ -399,7 +520,33 @@ def center(*args, **kwargs): ) -def get_var(f, varnames, args, kwargs): +def get_var(f: Callable, varnames: List[str], args: List[Any], kwargs: Dict[str, Any]) -> Tuple[List[int], List[str]]: + """ + Retrieve indices of specified variables from a function's argument list. + + Parameters + ---------- + f : Callable + The function from which to retrieve variable information. + varnames : list of str + The names of the variables to retrieve. + args : list + Positional arguments passed to the function. + kwargs : dict + Keyword arguments passed to the function. + + Returns + ------- + tuple of (list of int, list of str) + A tuple containing two elements: + - A list of indices of the specified variables in the function's argument list. + - A list of keys for the keyword arguments that correspond to the specified variables. + + Raises + ------ + ValueError + If a specified variable is not found in the function's argument list. + """ fvarnames = f.__code__.co_varnames var_idx = [] @@ -423,28 +570,42 @@ def get_var(f, varnames, args, kwargs): class swap2zeroaxis: """ - This decorator is best explained by an example:: - - @swap2zeroaxis(['x','y'], [0, 1]) - def dummy(x,y,z, axis=None): - return np.mean(x[::2,...], axis=0), np.mean(y[::2, ...], axis=0), z - - This creates a new function that - - - either swaps the axes axis to zero for the arguments x and y if axis - is specified in dummy or ravels x and y - - swaps back the axes from the output arguments 0 and 1. Here it is - assumed that the outputs lost one dimension during the function - (e.g. like numpy.mean(x, axis=1) looses one axis). + Decorator to swap specified axes of input arguments to zero and swap them back in output. + + Parameters + ---------- + inputs : list of str + The names of the input arguments for which the axes are swapped. + out_idx : list of int + The indices of the output arguments whose axes will be swapped back. + + Raises + ------ + ValueError + If a specified output index is inconsistent with a single output argument. + + Examples + -------- + + >>> @swap2zeroaxis(['x', 'y'], [0, 1]) + >>> def dummy(x, y, z, axis=None): + >>> return np.mean(x[::2, ...], axis=0), np.mean(y[::2, ...], axis=0), z + + This creates a new function that: + + - Either swaps the specified axes to zero for the arguments `x` and `y` + if `axis` is specified in the wrapped function, or flattens `x` and `y`. + - Swaps back the axes from the output arguments, assuming the outputs lost + one dimension during the function (e.g., like `numpy.mean(x, axis=1)`). """ - def __init__(self, inputs, out_idx): + def __init__(self, inputs: list[str], out_idx: list[int]): self.inputs = inputs self.out_idx = out_idx - def __call__(self, f): + def __call__(self, f: callable) -> callable: - def _deco(f, *args, **kwargs): + def _deco(f: callable, *args: tuple, **kwargs: dict) -> tuple: to_swap_idx, to_swap_keys = get_var(f, self.inputs, args, kwargs) args = list(args) @@ -505,27 +666,38 @@ def _deco(f, *args, **kwargs): @swap2zeroaxis(["alpha", "w"], [0, 1]) -def rayleigh(alpha, w=None, d=None, axis=None): +def rayleigh(alpha: np.ndarray, w: np.ndarray = None, d: float = None, axis: int = None) -> Tuple[float, float]: """ Computes Rayleigh test for non-uniformity of circular data. H0: the population is uniformly distributed around the circle - HA: the populatoin is not distributed uniformly around the circle + HA: the population is not distributed uniformly around the circle Assumption: the distribution has maximally one mode and the data is sampled from a von Mises distribution! - :param alpha: sample of angles in radian - :param w: number of incidences in case of binned angle data - :param d: spacing of bin centers for binned data, if supplied - correction factor is used to correct for bias in - estimation of r - :param axis: compute along this dimension, default is None - if axis=None, array is raveled - :return pval: two-tailed p-value - :return z: value of the z-statistic - - References: [Fisher1995]_, [Jammalamadaka2001]_, [Zar2009]_ + Parameters + ---------- + alpha : ndarray + Sample of angles in radians. + w : ndarray, optional + Number of incidences in case of binned angle data. + d : float, optional + Spacing of bin centers for binned data, if supplied. + Correction factor is used to correct for bias in estimation of r. + axis : int, optional + Compute along this dimension, default is None; if None, the array is raveled. + + Returns + ------- + pval : float + Two-tailed p-value. + z : float + Value of the z-statistic. + + References + ---------- + [Fisher1995]_, [Jammalamadaka2001]_, [Zar2009]_ """ if w is None: diff --git a/neuro_py/stats/regression.py b/neuro_py/stats/regression.py index 5a54262..2bbc2dd 100644 --- a/neuro_py/stats/regression.py +++ b/neuro_py/stats/regression.py @@ -1,3 +1,5 @@ +from typing import Optional, Tuple + import numpy as np import scipy from scipy import sparse @@ -5,36 +7,64 @@ from sklearn.metrics import r2_score +def ideal_data( + num: int, dimX: int, dimY: int, rrank: int, noise: float = 1 +) -> Tuple[np.ndarray, np.ndarray]: + """Generate low-rank data. + + Parameters + ---------- + num : int + Number of samples. + dimX : int + Dimensionality of the input data. + dimY : int + Dimensionality of the output data. + rrank : int + Rank of the low-rank structure. + noise : float, optional + Standard deviation of the noise added to the output data (default is 1). + + Returns + ------- + tuple[np.ndarray, np.ndarray] + A tuple containing: + - X : np.ndarray + The generated input data of shape (num, dimX). + - Y : np.ndarray + The generated output data of shape (num, dimY). -def ideal_data(num, dimX, dimY, rrank, noise=1): - """Low rank data""" + """ X = np.random.randn(num, dimX) W = np.random.randn(dimX, rrank) @ np.random.randn(rrank, dimY) Y = X @ W + np.random.randn(num, dimY) * noise return X, Y -""" -Reduced rank regression class. -Requires scipy to be installed. - -Implemented by Chris Rayner (2015) -dchrisrayner AT gmail DOT com - -Optimal linear 'bottlenecking' or 'multitask learning'. -""" - - class ReducedRankRegressor(object): """ - Reduced Rank Regressor (linear 'bottlenecking' or 'multitask learning') - - X is an n-by-d matrix of features. - - Y is an n-by-D matrix of targets. - - rrank is a rank constraint. - - reg is a regularization parameter (optional). + Reduced Rank Regressor (linear 'bottlenecking' or 'multitask learning'). + + Parameters + ---------- + X : np.ndarray + An n-by-d matrix of features. + Y : np.ndarray + An n-by-D matrix of targets. + rank : int + A rank constraint. + reg : Optional[float], optional + A regularization parameter (default is None). + + References + ---- + Implemented by Chris Rayner (2015). + dchrisrayner AT gmail DOT com """ - def __init__(self, X, Y, rank, reg=None): + def __init__( + self, X: np.ndarray, Y: np.ndarray, rank: int, reg: Optional[float] = None + ): if np.size(np.shape(X)) == 1: X = np.reshape(X, (-1, 1)) if np.size(np.shape(Y)) == 1: @@ -49,16 +79,16 @@ def __init__(self, X, Y, rank, reg=None): self.W = V[0:rank, :].T self.A = (np.linalg.pinv(CXX) @ (CXY @ self.W)).T - def __str__(self): + def __str__(self) -> str: return "Reduced Rank Regressor (rank = {})".format(self.rank) - def predict(self, X): + def predict(self, X: np.ndarray) -> np.ndarray: """Predict Y from X.""" if np.size(np.shape(X)) == 1: X = np.reshape(X, (-1, 1)) return X @ (self.A.T @ self.W.T) - def score(self, X, Y): + def score(self, X: np.ndarray, Y: np.ndarray) -> float: """Score the model.""" if np.size(np.shape(X)) == 1: X = np.reshape(X, (-1, 1)) @@ -69,26 +99,21 @@ def score(self, X, Y): return r2_score(Y, y_pred) -""" -Multivariate linear regression -Requires scipy to be installed. - -Implemented by Chris Rayner (2015) -dchrisrayner AT gmail DOT com - -Just simple linear regression with regularization - nothing new here -""" - - class MultivariateRegressor(object): """ Multivariate Linear Regressor. - - X is an n-by-d matrix of features. - - Y is an n-by-D matrix of targets. - - reg is a regularization parameter (optional). + + Parameters + ---------- + X : np.ndarray + An n-by-d matrix of features. + Y : np.ndarray + An n-by-D matrix of targets. + reg : Optional[float], optional + A regularization parameter (default is None). """ - def __init__(self, X, Y, reg=None): + def __init__(self, X: np.ndarray, Y: np.ndarray, reg: Optional[float] = None): if np.size(np.shape(X)) == 1: X = np.reshape(X, (-1, 1)) if np.size(np.shape(Y)) == 1: @@ -100,52 +125,64 @@ def __init__(self, X, Y, reg=None): W2 = np.dot(X, W1) self.W = np.dot(Y.T, W2) - def __str__(self): + def __str__(self) -> str: return "Multivariate Linear Regression" - def predict(self, X): + def predict(self, X: np.ndarray) -> np.ndarray: """Return the predicted Y for input X.""" if np.size(np.shape(X)) == 1: X = np.reshape(X, (-1, 1)) return np.array(np.dot(X, self.W.T)) - def score(self, X, Y): + def score(self, X: np.ndarray, Y: np.ndarray) -> float: """Return the coefficient of determination R^2 of the prediction.""" y_pred = self.predict(X) return r2_score(Y, y_pred) -""" -kernel Reduced Rank Ridge Regression by Mukherjee - DOI:10.1002/sam.10138 - -Code by Michele Svanera (2017-June) - -""" - - class kernelReducedRankRegressor(BaseEstimator): """ - kernel Reduced Rank Ridge Regression - - X is an n-by-P matrix of features (n-time points). - - Y is an n-by-Q matrix of targets (n-time points). - - rank is a rank constraint. - - reg is a regularization parameter. + Kernel Reduced Rank Ridge Regression. + + Parameters + ---------- + rank : int, optional + The rank constraint (default is 10). + reg : float, optional + The regularization parameter (default is 1). + P_rr : Optional[np.ndarray], optional + The P matrix for reduced rank (default is None). + Q_fr : Optional[np.ndarray], optional + The Q matrix for fitted values (default is None). + trainX : Optional[np.ndarray], optional + The training features (default is None). + + References + ---------- + Mukherjee, S. (DOI:10.1002/sam.10138) + Code by Michele Svanera (2017-June). """ - def __init__(self, rank=10, reg=1, P_rr=None, Q_fr=None, trainX=None): + def __init__( + self, + rank: int = 10, + reg: float = 1, + P_rr: Optional[np.ndarray] = None, + Q_fr: Optional[np.ndarray] = None, + trainX: Optional[np.ndarray] = None, + ): self.rank = rank self.reg = reg self.P_rr = P_rr self.Q_fr = Q_fr self.trainX = trainX - def __str__(self): + def __str__(self) -> str: return "kernel Reduced Rank Ridge Regression by Mukherjee (rank = {})".format( self.rank ) - def fit(self, X, Y): + def fit(self, X: np.ndarray, Y: np.ndarray) -> None: # use try/except blog with exceptions! self.rank = int(self.rank) @@ -159,7 +196,7 @@ def fit(self, X, Y): self.P_rr = P_rr self.trainX = X - def predict(self, testX): + def predict(self, testX: np.ndarray) -> np.ndarray: # use try/except blog with exceptions! K_Xx = scipy.dot(testX, self.trainX.T) @@ -167,12 +204,12 @@ def predict(self, testX): return Yhat - def rrr_scorer(self, Yhat, Ytest): + def rrr_scorer(self, Yhat: np.ndarray, Ytest: np.ndarray) -> float: diag_corr = (np.diag(np.corrcoef(Ytest, Yhat))).mean() return diag_corr ## Optional - def get_params(self, deep=True): + def get_params(self, deep: bool = True) -> dict: return {"rank": self.rank, "reg": self.reg} # @@ -181,15 +218,27 @@ def get_params(self, deep=True): # self.setattr(parameter, value) # return self - def mse(self, X, y_true): + def mse(self, X: np.ndarray, y_true: np.ndarray) -> float: """ Score the model on test data. + + Parameters + ---------- + X : np.ndarray + The test data features. + y_true : np.ndarray + The true target values. + + Returns + ------- + float + The mean squared error of the predictions. """ Yhat = self.predict(X).real MSE = (np.power((y_true - Yhat), 2) / np.prod(y_true.shape)).mean() return MSE - def score(self, X, Y): + def score(self, X: np.ndarray, Y: np.ndarray) -> float: """Score the model.""" y_pred = self.predict(X) diff --git a/neuro_py/stats/stats.py b/neuro_py/stats/stats.py index 8cca4ca..6e24d82 100644 --- a/neuro_py/stats/stats.py +++ b/neuro_py/stats/stats.py @@ -1,31 +1,41 @@ import warnings +from typing import Tuple, Union import numpy as np import pandas as pd import scipy.stats as stats -def get_significant_events(scores, shuffled_scores, q=95, tail="both"): +def get_significant_events( + scores: Union[list, np.ndarray], + shuffled_scores: np.ndarray, + q: float = 95, + tail: str = "both", +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Return the significant events based on percentiles, - the p-values and the standard deviation of the scores + the p-values, and the standard deviation of the scores in terms of the shuffled scores. + Parameters ---------- - scores : array of shape (n_events,) - The array of scores for which to calculate significant events - shuffled_scores : array of shape (n_shuffles, n_events) - The array of scores obtained from randomized data - q : float in range of [0,100] - Percentile to compute, which must be between 0 and 100 inclusive. + scores : Union[list, np.ndarray] + The array of scores for which to calculate significant events. + shuffled_scores : np.ndarray + The array of scores obtained from randomized data (shape: (n_shuffles, n_events)). + q : float, optional + Percentile to compute, which must be between 0 and 100 inclusive (default is 95). + tail : str, optional + Tail for the test, which can be 'left', 'right', or 'both' (default is 'both'). + Returns ------- - sig_event_idx : array of shape (n_sig_events,) + sig_event_idx : np.ndarray Indices (from 0 to n_events-1) of significant events. - pvalues : array of shape (n_events,) - The p-values - stddev : array of shape (n_events,) - The standard deviation of the scores in terms of the shuffled scores + pvalues : np.ndarray + The p-values. + stddev : np.ndarray + The standard deviation of the scores in terms of the shuffled scores. """ # check shape and correct if needed if isinstance(scores, list) | isinstance(scores, np.ndarray): @@ -59,16 +69,25 @@ def get_significant_events(scores, shuffled_scores, q=95, tail="both"): return np.atleast_1d(sig_event_idx), np.atleast_1d(pvalues), np.atleast_1d(stddev) -def confidence_intervals(X: np.ndarray, conf: float = 0.95): +def confidence_intervals( + X: np.ndarray, conf: float = 0.95 +) -> Tuple[np.ndarray, np.ndarray]: """ - confidence_intervals: calculates upper and lower .95 confidence intervals on matrix - - Input: - X - numpy ndarray, (n signals, n samples) - conf - float, confidence level value (default: .95) - Output: - lower - numpy ndarray, (n signals, 1) - upper - numpy ndarray, (n signals, 1) + Calculate upper and lower confidence intervals on a matrix. + + Parameters + ---------- + X : np.ndarray + A numpy ndarray of shape (n_signals, n_samples). + conf : float, optional + Confidence level value (default is 0.95). + + Returns + ------- + lower : np.ndarray + Lower bounds of the confidence intervals (shape: (n_signals,)). + upper : np.ndarray + Upper bounds of the confidence intervals (shape: (n_signals,)). """ # compute interval for each column with warnings.catch_warnings(): @@ -91,19 +110,25 @@ def confidence_intervals(X: np.ndarray, conf: float = 0.95): return lower, upper -def reindex_df(df: pd.core.frame.DataFrame, weight_col: str) -> pd.core.frame.DataFrame: +def reindex_df(df: pd.DataFrame, weight_col: str) -> pd.DataFrame: """ - reindex_df: expands dataframe by weights + Expand the dataframe by weights. - expand the dataframe to prepare for resampling result is 1 row per count per sample + This function expands the dataframe to prepare for resampling, + resulting in 1 row per count per sample, which is helpful + when making weighted proportion plots. - Helpful when making weighted proportion plots + Parameters + ---------- + df : pd.DataFrame + The pandas dataframe to be expanded. + weight_col : str + The column name that contains weights (should be int). - Input: - df - pandas dataframe - weight_col - column name that contains weights (should be int) - Output: - df - new pandas dataframe with resampling + Returns + ------- + pd.DataFrame + A new pandas dataframe with resampling based on the weights. """ df = df.reindex(df.index.repeat(df[weight_col])).copy() @@ -115,28 +140,28 @@ def reindex_df(df: pd.core.frame.DataFrame, weight_col: str) -> pd.core.frame.Da def regress_out(a: np.ndarray, b: np.ndarray) -> np.ndarray: """ - Regress b from a keeping a's original mean. + Regress b from a while keeping a's original mean. - - a_prime = regress_out(a, b) performs regression of variable b from variable a - while preserving the original mean of a. The function calculates the residual - component of a that remains after removing the effect of b. The regression is - performed using the ordinary least squares method. - - adapted from the seaborn function of the same name - https://github.com/mwaskom/seaborn/blob/824c102525e6a29cde9bca1ce0096d50588fda6b/seaborn/regression.py#L337 + This function performs regression of variable b from variable a while + preserving the original mean of a. It calculates the residual component + of a that remains after removing the effect of b using ordinary least squares. Parameters ---------- - a : array-like + a : np.ndarray The variable to be regressed. Must be 1-dimensional. - b : array-like + b : np.ndarray The variable to regress on a. Must be 1-dimensional. Returns ------- - a_prime : array-like + np.ndarray The residual of a after regressing out b. Has the same shape as a. + + Notes + ----- + Adapted from the seaborn function of the same name: + https://github.com/mwaskom/seaborn/blob/824c102525e6a29cde9bca1ce0096d50588fda6b/seaborn/regression.py#L337 """ # remove nans and infs from a and b, and make a_result vector for output a_result = np.full_like(a, np.nan) diff --git a/neuro_py/stats/system_identifier.py b/neuro_py/stats/system_identifier.py index 96fb8d7..8b7af0e 100644 --- a/neuro_py/stats/system_identifier.py +++ b/neuro_py/stats/system_identifier.py @@ -15,14 +15,41 @@ This is a linear dynamical system. """ +from typing import Tuple, Union import numpy as np import scipy as sp from scipy import sparse from scipy.sparse import linalg as sparse_linalg -def ideal_data(num, dimU, dimY, dimX, noise=1): - """Linear system data""" +def ideal_data(num: int, dimU: int, dimY: int, dimX: int, noise: float = 1.0) -> Tuple[np.ndarray, np.ndarray]: + """ + Generate linear system data. + + This function creates randomized linear system matrices and simulates a linear + system with specified dimensions. The resulting output data includes added noise. + + Parameters + ---------- + num : int + Number of time points (samples). + dimU : int + Dimensionality of the input (control inputs). + dimY : int + Dimensionality of the output. + dimX : int + Dimensionality of the state. + noise : float + Standard deviation of the noise added to the output (default: 1.0). + + Returns + ------- + Tuple[np.ndarray, np.ndarray] + U : np.ndarray + Random input data of shape (num, dimU). + Y : np.ndarray + Output data of shape (num, dimY) with added noise. + """ # generate randomized linear system matrices A = np.random.randn(dimX, dimX) B = np.random.randn(dimX, dimU) @@ -59,14 +86,34 @@ def ideal_data(num, dimU, dimY, dimX, noise=1): class SystemIdentifier(object): """ - Simple Subspace System Identifier - - U is an n-by-d matrix of "control inputs". - - Y is an n-by-D matrix of output observations. - - statedim is the dimension of the internal state variable. - - reg is a regularization parameter (optional). + Simple Subspace System Identifier. + + This class identifies a linear dynamical system based on given input and output data using subspace methods. + + Parameters + ---------- + U : np.ndarray + An n-by-d matrix of control inputs. + Y : np.ndarray + An n-by-D matrix of output observations. + statedim : int + The dimension of the internal state variable. + reg : float, optional + Regularization parameter (default is None, which is set to 0). + + Attributes + ---------- + A : np.ndarray + State transition matrix. + B : np.ndarray + Control input matrix. + C : np.ndarray + Output matrix. + D : np.ndarray + Feedforward matrix. """ - def __init__(self, U, Y, statedim, reg=None): + def __init__(self, U: np.ndarray, Y: np.ndarray, statedim: int, reg: Union[float, None] = None): if np.size(np.shape(U)) == 1: U = np.reshape(U, (-1, 1)) if np.size(np.shape(Y)) == 1: @@ -134,10 +181,23 @@ def __init__(self, U, Y, statedim, reg=None): self.C = Sys[statedim:, :statedim] self.D = Sys[statedim:, statedim:] - def __str__(self): + def __str__(self) -> str: return "Linear Dynamical System" - def predict(self, U): + def predict(self, U: np.ndarray) -> np.ndarray: + """ + Predict output given the control inputs. + + Parameters + ---------- + U : np.ndarray + Control inputs, shape (n_samples, n_controls). + + Returns + ------- + np.ndarray + Predicted outputs, shape (n_samples, n_outputs). + """ # If U is a vector, reshape it if np.size(np.shape(U)) == 1: U = np.reshape(U, (-1, 1))