diff --git a/CMakeLists.txt b/CMakeLists.txt index 26cecc9b4..49e92827c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ if(DEFINED PROJECT_NAME) endif() project(deeptime LANGUAGES C CXX VERSION 0.0.0) -find_package(OpenMP) +find_package(OpenMP 4) if(OpenMP_FOUND) add_definitions(-DUSE_OPENMP) endif() diff --git a/deeptime/__init__.py b/deeptime/__init__.py index 59d35869b..735c45a25 100644 --- a/deeptime/__init__.py +++ b/deeptime/__init__.py @@ -14,6 +14,11 @@ from . import decomposition from . import markov +from .util.platform import module_available +if module_available("matplotlib"): + from . import plots +del module_available + def capi_includes(inc_clustering: bool = False, inc_markov: bool = False, inc_markov_hmm: bool = False, inc_data: bool = False): diff --git a/deeptime/data/_drunkards_walk_simulator.py b/deeptime/data/_drunkards_walk_simulator.py index d68ec00aa..fc181efe9 100644 --- a/deeptime/data/_drunkards_walk_simulator.py +++ b/deeptime/data/_drunkards_walk_simulator.py @@ -3,7 +3,7 @@ import numpy as np -from deeptime.util.decorators import plotting_function +from deeptime.util.decorators import plotting_function_with_networkx Coordinate = Tuple[int, int] @@ -245,7 +245,7 @@ def walk(self, start: Coordinate, n_steps: int, stop: bool = True, return_states return states @staticmethod - @plotting_function + @plotting_function_with_networkx def plot_path(ax, path, intermediates: bool = True, color_lerp: bool = True, **plot_kw): # pragma: no cover r""" Plots a path onto a drunkard's walk map. The path is interpolated with splines and, if desired, has a linearly interpolated color along its path. @@ -286,7 +286,7 @@ def plot_path(ax, path, intermediates: bool = True, color_lerp: bool = True, **p else: ax.plot(xint, yint, **plot_kw) - @plotting_function + @plotting_function_with_networkx def plot_network(self, ax, F, cmap=None, connection_threshold: float = 0.): # pragma: no cover r""" Plots a flux network onto a matplotlib axis assuming that the states are ordered according to a iota range (i.e., {0, 1, ..., n_states-1}) on the grid. @@ -335,7 +335,7 @@ def plot_network(self, ax, F, cmap=None, connection_threshold: float = 0.): # p connectionstyle='arc3, rad=0.1') return edge_colors - @plotting_function + @plotting_function_with_networkx def plot_2d_map(self, ax, barriers: bool = True, barrier_mode: str = 'filled'): # pragma: no cover r""" Plots the drunkard's map onto an already existing matplotlib axis. One can select whether to draw barriers and if barriers should be drawn, whether they have a 'filled' face or be 'hollow' with just the border diff --git a/deeptime/decomposition/_koopman.py b/deeptime/decomposition/_koopman.py index e4ccb776c..6b69eaa09 100644 --- a/deeptime/decomposition/_koopman.py +++ b/deeptime/decomposition/_koopman.py @@ -642,6 +642,12 @@ def timescales(self, k=None, lagtime: Optional[int] = None) -> np.ndarray: lagtime = self._cov.lagtime return - lagtime / np.log(np.abs(self.singular_values[:k])) + @property + def lagtime(self): + r""" The lagtime corresponding to this model. See also + :meth:`CovarianceModel.lagtime `. """ + return self._cov.lagtime + @property def feature_component_correlation(self): r"""Instantaneous correlation matrix between mean-free input features and projection components. diff --git a/deeptime/decomposition/_vamp.py b/deeptime/decomposition/_vamp.py index 11ad2bcb9..8298af448 100644 --- a/deeptime/decomposition/_vamp.py +++ b/deeptime/decomposition/_vamp.py @@ -513,7 +513,7 @@ def chapman_kolmogorov_validator(self, mlags, test_model: CovarianceKoopmanModel The Champan-Kolmogorov test is to compare the predictions to the estimates. """ test_model = self.fetch_model() if test_model is None else test_model - assert test_model is not None, "We need a test model via argument or an estimator which was already" \ + assert test_model is not None, "We need a test model via argument or an estimator which was already " \ "fit to data." lagtime = self.lagtime if n_observables is not None: diff --git a/deeptime/markov/_base.py b/deeptime/markov/_base.py index 482a83c79..81ec84822 100644 --- a/deeptime/markov/_base.py +++ b/deeptime/markov/_base.py @@ -168,6 +168,26 @@ def evaluate_samples(self, quantity, delimiter='/', *args, **kwargs): from deeptime.util.stats import evaluate_samples as _eval return _eval(self.samples, quantity=quantity, delimiter=delimiter, *args, **kwargs) + def timescales(self, k=None): + r""" Relaxation timescales corresponding to the eigenvalues. + + Parameters + ---------- + k : int, optional, default=None + The number of timescales (excluding the stationary process). + + Returns + ------- + timescales : tuple(iterable, iterable) + Timescales of the prior and timescales of the samples. + """ + return self.prior.timescales(k=k), self.evaluate_samples('timescales', k=k) + + @property + def lagtime(self): + r"""Lagtime of the models.""" + return self.prior.lagtime + class MembershipsChapmanKolmogorovValidator(LaggedModelValidator): r""" Validates a model estimated at lag time tau by testing its predictions for longer lag times. diff --git a/deeptime/markov/hmm/_hidden_markov_model.py b/deeptime/markov/hmm/_hidden_markov_model.py index 322386a0a..f872a751f 100644 --- a/deeptime/markov/hmm/_hidden_markov_model.py +++ b/deeptime/markov/hmm/_hidden_markov_model.py @@ -303,6 +303,12 @@ def transition_counts(self) -> Optional[np.ndarray]: """ return self.transition_model.count_model.count_matrix if self.transition_model.count_model is not None else None + def timescales(self, k=None): + r""" Yields the timescales of the hidden transition model. + See :meth:`MarkovStateModel.timescales `. + """ + return self.transition_model.timescales(k=k) + @property def hidden_state_trajectories(self) -> Optional[List[np.ndarray]]: r""" diff --git a/deeptime/markov/msm/_bayesian_msm.py b/deeptime/markov/msm/_bayesian_msm.py index de2ca8d80..fb0d289fb 100644 --- a/deeptime/markov/msm/_bayesian_msm.py +++ b/deeptime/markov/msm/_bayesian_msm.py @@ -173,7 +173,7 @@ def fetch_model(self) -> Optional[BayesianPosterior]: """ return self._model - def fit(self, data, callback: Callable = None): + def fit(self, data, callback: Callable = None, **kw): """ Performs the estimation on either a count matrix or a previously estimated TransitionCountModel. @@ -185,23 +185,23 @@ def fit(self, data, callback: Callable = None): callback: callable, optional, default=None Function to be called to indicate progress of sampling. + Other Parameters + ---------------- + ignore_counting_mode : bool, optional, default=False + Method does not raise if counting mode isn't of the "effective" family. Use with caution. + Returns ------- self : BayesianMSM Reference to self. """ - from deeptime.markov import TransitionCountModel - if isinstance(data, TransitionCountModel) and data.counting_mode is not None \ - and "effective" not in data.counting_mode: - raise ValueError("The transition count model was not estimated using an effective counting method, " - "therefore counts are likely to be strongly correlated yielding wrong confidences.") - if isinstance(data, Estimator): if data.has_model: data = data.fetch_model() else: raise ValueError("Can only use estimators as input if they have been fit previously.") + from deeptime.markov import TransitionCountModel if isinstance(data, TransitionCountModel) or is_square_matrix(data): msm = MaximumLikelihoodMSM( reversible=self.reversible, stationary_distribution_constraint=self.stationary_distribution_constraint, @@ -214,7 +214,7 @@ def fit(self, data, callback: Callable = None): "TransitionCountEstimator) or a MarkovStateModel instance or an estimator producing " "Markov state models.") - return self.fit_from_msm(msm, callback=callback) + return self.fit_from_msm(msm, callback=callback, **kw) def sample(self, prior: MarkovStateModel, n_samples: int, n_steps: Optional[int] = None, callback=None): r""" Performs sampling based on a prior. @@ -275,7 +275,7 @@ def sample(self, prior: MarkovStateModel, n_samples: int, n_steps: Optional[int] ] return samples - def fit_from_msm(self, msm: MarkovStateModel, callback=None): + def fit_from_msm(self, msm: MarkovStateModel, callback=None, **kw): r""" Fits a bayesian posterior from a given Markov state model. The MSM must contain a count model to be able to produce confidences. Note that the count model should be produced using effective counting, otherwise counts are correlated and computed confidences are wrong. @@ -287,6 +287,11 @@ def fit_from_msm(self, msm: MarkovStateModel, callback=None): callback : callable, optional, default=None Function to be called to indicate progress of sampling. + Other Parameters + ---------------- + ignore_counting_mode : bool, optional, default=False + Method does not raise if counting mode isn't of the "effective" family. Use with caution. + Returns ------- self : BayesianMSM @@ -295,6 +300,11 @@ def fit_from_msm(self, msm: MarkovStateModel, callback=None): if not msm.has_count_model: raise ValueError("Can only sample confidences with a count model. The counting mode should be 'effective'" " to avoid correlations between counts and therefore wrong confidences.") + if not kw.get("ignore_counting_mode", False) \ + and msm.count_model.counting_mode is not None and "effective" not in msm.count_model.counting_mode: + raise ValueError("The transition count model was not estimated using an effective counting method, " + "therefore counts are likely to be strongly correlated yielding wrong confidences. " + "To ignore this, set `ignore_counting_mode` to True in the call to `fit`.") # use the same count matrix as the MLE. This is why we have effective as a default samples = self.sample(msm, self.n_samples, self.n_steps, callback) self._model = BayesianPosterior(prior=msm, samples=samples) @@ -336,4 +346,4 @@ def _ck_estimate_model_for_lag(estimator, model, data, lagtime): class BayesianMSMChapmanKolmogorovValidator(MembershipsChapmanKolmogorovValidator): def fit(self, data, n_jobs=None, progress=None, **kw): - return super().fit(data, n_jobs, progress, estimate_model_for_lag=_ck_estimate_model_for_lag, **kw) \ No newline at end of file + return super().fit(data, n_jobs, progress, estimate_model_for_lag=_ck_estimate_model_for_lag, **kw) diff --git a/deeptime/markov/msm/_markov_state_model.py b/deeptime/markov/msm/_markov_state_model.py index e8a29ede8..237be35e1 100644 --- a/deeptime/markov/msm/_markov_state_model.py +++ b/deeptime/markov/msm/_markov_state_model.py @@ -48,6 +48,10 @@ class MarkovStateModel(Model): transition_matrix_tolerance : float, default=1e-8 The tolerance under which a matrix is still considered a transition matrix (only non-negative elements and row sums of 1). + lagtime : int, optional, default=None + The lagtime of this MSM. If there is a count model, the MSM assumes the lagtime of the count model, otherwise + falls back to the lagtime set via this constructor argument or a lagtime of `1` if no lagtime is provided + at all. See Also -------- @@ -64,11 +68,12 @@ class MarkovStateModel(Model): """ def __init__(self, transition_matrix, stationary_distribution=None, reversible=None, - n_eigenvalues=None, ncv=None, count_model=None, transition_matrix_tolerance=1e-8): + n_eigenvalues=None, ncv=None, count_model=None, transition_matrix_tolerance=1e-8, lagtime=None): super().__init__() self._is_reversible = reversible self._ncv = ncv self._transition_matrix_tolerance = transition_matrix_tolerance + self._lagtime = lagtime self.update_transition_matrix(transition_matrix) @@ -204,7 +209,7 @@ def to_koopman_model(self, empirical: bool = True, epsilon: float = 1e-10): @property def lagtime(self) -> int: r""" The lagtime this model was estimated at. In case no count model was provided, this property defaults - to a lagtime of `1`. + to the lagtime set in the constructor or a lagtime of :math:`\tau = 1` if it was left `None`. Returns ------- @@ -213,7 +218,7 @@ def lagtime(self) -> int: """ if self.count_model is not None: return self.count_model.lagtime - return 1 + return 1 if self._lagtime is None else self._lagtime @property def transition_matrix(self): @@ -474,20 +479,19 @@ def eigenvectors_right(self, k=None): def timescales(self, k=None): r""" - The relaxation timescales corresponding to the eigenvalues + Relaxation timescales corresponding to the eigenvalues. Parameters ---------- - k : int - number of timescales to be returned. By default all available - eigenvalues, minus 1. + k : int, optional, default=None + Number of timescales to be returned. By default, this uses all available eigenvalues except for the + first (stationary) eigenvalue. Returns ------- ts : ndarray(m) - relaxation timescales in units of the input trajectory time step, - defined by :math:`-\tau / ln | \lambda_i |, i = 2,...,k+1`. - + Relaxation timescales in units of the input trajectory time step, + defined by :math:`-\tau / \mathrm{ln} | \lambda_i |, i = 2,...,k+1`. """ if k is None: self._ensure_eigenvalues() diff --git a/deeptime/markov/tools/estimation/dense/tmat_sampling/tmatrix_sampler.py b/deeptime/markov/tools/estimation/dense/tmat_sampling/tmatrix_sampler.py index cb47df323..f2eeb51c0 100644 --- a/deeptime/markov/tools/estimation/dense/tmat_sampling/tmatrix_sampler.py +++ b/deeptime/markov/tools/estimation/dense/tmat_sampling/tmatrix_sampler.py @@ -55,16 +55,13 @@ def sample(self, nsamples=1, return_statdist=False, callback=None): else: n = self.count_matrix.shape[0] P_samples = np.zeros((nsamples, n, n)) - if return_statdist: - pi_samples = np.zeros((nsamples, n)) - for i in range(nsamples): - P_samples[i, :, :], pi_samples[i, :] = self.sampler.sample(N=self.n_steps, return_statdist=True) - if callback is not None: - callback() - return P_samples, pi_samples - else: - for i in range(nsamples): - P_samples[i, :, :] = self.sampler.sample(N=self.n_steps, return_statdist=False) - if callback is not None: - callback() - return P_samples + pi_samples = np.zeros((nsamples, n)) if return_statdist else None + for i in range(nsamples): + out = self.sampler.sample(N=self.n_steps, return_statdist=return_statdist) + if return_statdist: + P_samples[i, :, :], pi_samples[i, :] = out + else: + P_samples[i, :, :] = out + if callback is not None: + callback() + return P_samples if not return_statdist else (P_samples, pi_samples) diff --git a/deeptime/numeric/_utils.py b/deeptime/numeric/_utils.py index 527ff44c0..02793a028 100644 --- a/deeptime/numeric/_utils.py +++ b/deeptime/numeric/_utils.py @@ -46,13 +46,13 @@ def is_diagonal_matrix(matrix: _np.ndarray) -> bool: return _np.all(matrix == _np.diag(_np.diagonal(matrix))) -def is_square_matrix(arr: _np.ndarray) -> bool: +def is_square_matrix(arr) -> bool: r""" Determines whether an array is a square matrix. This means that ndim must be 2 and shape[0] must be equal to shape[1]. Parameters ---------- - arr : ndarray or sparse array + arr : object The array to check. Returns diff --git a/deeptime/plots/__init__.py b/deeptime/plots/__init__.py new file mode 100644 index 000000000..14b17191a --- /dev/null +++ b/deeptime/plots/__init__.py @@ -0,0 +1,12 @@ +r""" +.. currentmodule: deeptime.plots + +.. autosummary:: + :toctree: generated/ + :template: class_nomodule.rst + + ImpliedTimescalesData + plot_implied_timescales +""" + +from .implied_timescales import plot_implied_timescales, ImpliedTimescalesData diff --git a/deeptime/plots/chapman_kolmogorov.py b/deeptime/plots/chapman_kolmogorov.py new file mode 100644 index 000000000..669ddd62f --- /dev/null +++ b/deeptime/plots/chapman_kolmogorov.py @@ -0,0 +1,8 @@ + + +class ChapmanKolmogorovData: + pass + + +def plot_chapman_kolmogorov_test(ax, data: ChapmanKolmogorovData): + pass diff --git a/deeptime/plots/implied_timescales.py b/deeptime/plots/implied_timescales.py new file mode 100644 index 000000000..cd199ad80 --- /dev/null +++ b/deeptime/plots/implied_timescales.py @@ -0,0 +1,255 @@ +from typing import Optional + +import numpy as np + +from deeptime.util import confidence_interval +from deeptime.util.decorators import plotting_function + + +class ImpliedTimescalesData: + r""" Instances of this class hold a sequence of lagtimes and corresponding process timescales (potentially + with process timescales of sampled models in a Bayesian setting). Objects can be + used with :meth:`plot_implied_timescales`. + + In case models over a range of lagtimes are available, the static method :meth:`from_models` can be used. + + Parameters + ---------- + lagtimes : iterable of int + Lagtimes corresponding to processes and their timescales. + its : iterable of ndarray + The timescales for each process, shape (n_lagtimes, n_processes). + its_stats : list of list of ndarray, optional, default=None + Sampled timescales of shape (n_lagtimes, n_processes, n_samples). + + See Also + -------- + plot_implied_timescales + """ + def __init__(self, lagtimes, its, its_stats=None): + self._lagtimes = np.asarray(lagtimes, dtype=int) + assert len(its) == self.n_lagtimes, f"The length of its should correspond to the number of " \ + f"lagtimes ({self.n_lagtimes}), got {len(its)} instead." + self._max_n_processes = max(len(x) for x in its) + self._max_n_samples = 0 if its_stats is None else max(len(x) if x is not None else 0 for x in its_stats) + self._its = np.full((self.n_lagtimes, self._max_n_processes), fill_value=np.nan) + assert self._its.ndim == 2 and self._its.shape[0] == self.n_lagtimes, \ + "its should be of shape (lagtimes, processes)." + for i, processes in enumerate(its): + self._its[i, :len(processes)] = processes + + if self.has_samples: + assert len(its_stats) == self.n_lagtimes, f"The length of its stats should correspond to the number of " \ + f"lagtimes ({self.n_lagtimes}), got {len(its_stats)} instead." + self._its_stats = np.full((self.n_lagtimes, self.max_n_processes, self.max_n_samples), fill_value=np.nan) + for lag_ix in range(len(its_stats)): + + samples = its_stats[lag_ix] + if samples is not None: + for sample_ix in range(len(samples)): + arr = np.asarray(its_stats[lag_ix][sample_ix]) + n = min(len(arr), self.max_n_processes) + self._its_stats[lag_ix, :n, sample_ix] = arr[:n] + if not (self._its_stats.ndim == 3 and self._its_stats.shape[0] == self.n_lagtimes and + self._its_stats.shape[1] == self.max_n_processes): + raise ValueError(f"its_stats should be of shape (lagtimes={self.n_lagtimes}, " + f"processes={self.max_n_processes}, samples={self.max_n_samples}) but was " + f"{self._its_stats.shape}") + else: + self._its_stats = None + ix = np.argsort(self.lagtimes) + self._lagtimes = self._lagtimes[ix] + self._its = self._its[ix] + self._its_stats = None if self._its_stats is None else self._its_stats[ix] + + @property + def lagtimes(self) -> np.ndarray: + r""" Yields the lagtimes corresponding to an instance of this class. """ + return self._lagtimes + + @property + def n_lagtimes(self) -> int: + r""" Number of lagtimes. """ + return len(self.lagtimes) + + @property + def max_n_processes(self) -> int: + r""" Maximum number of processes. """ + return self._max_n_processes + + @property + def max_n_samples(self) -> int: + r""" Maximum number of samples. """ + return self._max_n_samples + + @property + def has_samples(self) -> bool: + r""" Whether the data contains samples. """ + return self.max_n_samples > 0 + + def timescales_for_process(self, process_index: int) -> np.ndarray: + r""" Yields maximum-likelihood timescales for a particular process. + + Parameters + ---------- + process_index : int + The process. + + Returns + ------- + timescales : ndarray (lagtimes,) + The timescales for the particular process. Might contain NaN. + """ + assert process_index < self.max_n_processes, \ + f"The process ({process_index}) should be contained in data ({self.max_n_processes})." + return self._its[:, process_index] + + def samples_for_process(self, process_index: int) -> np.ndarray: + r"""Yields timescales samples for a particular process. + + Parameters + ---------- + process_index : int + The process. + + Returns + ------- + timescales_samples : ndarray(lagtimes, max_n_samples) + The sampled timescales for a particular process. Might contain NaN. + """ + assert self.has_samples, "This timescales data object contains no samples." + assert process_index < self.max_n_processes, "The process should be contained in data." + return self._its_stats[:, process_index] + + def n_samples(self, lagtime_index: int, process_index: int) -> int: + r""" Yields the number of samples for a particular lagtime and a particular process. + + Parameters + ---------- + lagtime_index : int + The lagtime index corresponding to :attr:`lagtimes`. + process_index : int + The process index. + + Returns + ------- + n_samples : int + The number of samples. + """ + data = self.samples_for_process(process_index)[lagtime_index] + return np.count_nonzero(~np.isnan(data)) + + @staticmethod + def from_models(models, n_its=None): + r""" Converts a list of models to a :class:`ImpliedTimescalesData` object. + + Parameters + ---------- + models : list + The input data. Models with and without samples to compute confidences should not be mixed. + n_its : int or None, optional + Number of timescales to compute. + + Returns + ------- + its_data : ImpliedTimescalesData + The data object. + """ + if not isinstance(models, (list, tuple)): + models = [models] + + if len(models) == 0: + raise ValueError("Data cannot be empty.") + assert all(callable(getattr(model, 'timescales', None)) for model in models), \ + "all models need to have a timescales method" + assert all(hasattr(model, 'lagtime') for model in models), "all models need a lagtime attribute or property" + + lagtimes = [] + its = [] + its_stats = [] + + for model in models: + is_bayesian = hasattr(model, 'prior') and hasattr(model, 'samples') + lagtimes.append(model.lagtime) + if is_bayesian: + result = model.timescales(k=n_its) + its.append(result[0]) + its_stats.append(result[1]) + else: + its.append(model.timescales(k=n_its)) + its_stats.append(None) + return ImpliedTimescalesData(lagtimes, its, its_stats) + + +@plotting_function +def plot_implied_timescales(ax, data: ImpliedTimescalesData, n_its: Optional[int] = None, process: Optional[int] = None, + show_mle: bool = True, show_samples: bool = True, show_sample_mean: bool = True, + show_sample_confidence: bool = True, show_cutoff: bool = True, + sample_confidence: float = .95, + colors=None, **kwargs): + r"""Creates an implied timescales plot inside exising matplotlib axes. + + .. plot:: examples/plot_implied_timescales.py + + Parameters + ---------- + ax : matplotlib.axes.Axes + The matplotlib axes to use for plotting. + data : ImpliedTimescalesData + A timescales data container object, can be obtained, e.g., via :meth:`ImpliedTimescalesData.from_models`. + n_its : int, optional, default=None + Maximum number of timescales to plot. + process : int, optional, default=None + A particular process to plot. This is mutually exclusive with n_its. + show_mle : bool, default=True + Whether to show the timescale of the maximum-likelihood estimate. + show_samples : bool, default=True + Whether to show sample means and/or confidences. + show_sample_mean : bool, default=True + Whether to show the sample mean. Only has an effect if show_samples is True and there are samples in the data. + show_sample_confidence : bool, default=True + Whether to show the sample confidence. Only has an effect if show_samples is True and there are samples + in the data. + show_cutoff : bool, default=True + Whether to show the model resolution cutoff as grey filled area. + sample_confidence : float, default=0.95 + The confidence to plot. The default amounts to a shaded area containing 95% of the sampled values. + colors : list of colors, optional, default=None + The colors that should be used for timescales. By default uses the matplotlib default colors as per + rc-config value "axes.prop_cycle". + **kwargs + Keyword arguments which are forwarded into the matplotlib plotting function for timescales. + + See Also + -------- + ImpliedTimescalesData + """ + + if n_its is not None and process is not None: + raise ValueError("n_its and process are mutually exclusive.") + if process is not None and process >= data.max_n_processes: + raise ValueError(f"Requested process {process} when only {data.max_n_processes} are available.") + + if process is None and n_its is None: + n_its = data.max_n_processes + it_indices = [process] if process is not None else np.arange(n_its) + if colors is None: + from matplotlib import rcParams + colors = rcParams['axes.prop_cycle'].by_key()['color'] + for it_index in it_indices: + color = colors[it_index % len(colors)] + if show_mle: + ax.plot(data.lagtimes, data.timescales_for_process(it_index), color=color, **kwargs) + if data.has_samples and show_samples: + its_samples = data.samples_for_process(it_index) + if show_sample_mean: + sample_mean = np.nanmean(its_samples, axis=1) + ax.plot(data.lagtimes, sample_mean, marker='o', linestyle='dashed', color=color) + if show_sample_confidence: + l_conf, r_conf = confidence_interval(its_samples.T, conf=sample_confidence, remove_nans=True) + ax.fill_between(data.lagtimes, l_conf, r_conf, alpha=0.2, color=color) + + if show_cutoff: + ax.plot(data.lagtimes, data.lagtimes, linewidth=2, color='black') + ax.fill_between(data.lagtimes, np.full((data.n_lagtimes,), fill_value=ax.get_ylim()[0]), data.lagtimes, + alpha=0.5, color='grey') diff --git a/deeptime/setup.py b/deeptime/setup.py index 62212fbd1..1de4304c6 100644 --- a/deeptime/setup.py +++ b/deeptime/setup.py @@ -13,6 +13,7 @@ def configuration(parent_package='', top_path=None): config.add_subpackage('basis') config.add_subpackage('util') config.add_subpackage('sindy') + config.add_subpackage('plots') from Cython.Build import cythonize config.ext_modules = cythonize( diff --git a/deeptime/src/include/deeptime/markov/msm/tram/tram.h b/deeptime/src/include/deeptime/markov/msm/tram/tram.h index 9d76c9349..9f15929ab 100644 --- a/deeptime/src/include/deeptime/markov/msm/tram/tram.h +++ b/deeptime/src/include/deeptime/markov/msm/tram/tram.h @@ -288,6 +288,7 @@ struct TRAM { double iterationError{0}; + py::gil_scoped_release gil; for (decltype(maxIter) iterationCount = 0; iterationCount < maxIter; ++iterationCount) { // Self-consistent update of the TRAM equations. diff --git a/deeptime/util/_validation.py b/deeptime/util/_validation.py index ea055edc2..f5c5fdf80 100644 --- a/deeptime/util/_validation.py +++ b/deeptime/util/_validation.py @@ -122,8 +122,7 @@ class LaggedModelValidator(Estimator): mlags : int or int-array, default=10 multiples of lag times for testing the Model, e.g. range(10). A single int will trigger a range, i.e. mlags=10 maps to - mlags=range(1, 10). The setting None will choose mlags automatically - according to the longest available trajectory + mlags=range(1, 10). conf : float, default = 0.95 confidence interval for errors diff --git a/deeptime/util/callbacks.py b/deeptime/util/callbacks.py index 7c15df98d..b14205cd6 100644 --- a/deeptime/util/callbacks.py +++ b/deeptime/util/callbacks.py @@ -63,8 +63,6 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - if self.total and not exc_type: - inc = self.total - self.progress_bar.n - if inc > 0: - self.progress_bar.update(inc) + if exc_type is None: + self.progress_bar.total = self.progress_bar.n # force finish self.progress_bar.close() diff --git a/deeptime/util/decorators.py b/deeptime/util/decorators.py index cdaed088c..e2b9a4511 100644 --- a/deeptime/util/decorators.py +++ b/deeptime/util/decorators.py @@ -43,14 +43,19 @@ def invalidate(self): self.cache.clear() -def plotting_function(fn): # pragma: no cover +def _plotting_function(fn, requires_networkx): # pragma: no cover r""" Decorator marking a function that is a plotting utility. This will exclude it from coverage and test whether dependencies are installed. """ @functools.wraps(fn) def wrapper(*args, **kw): - if not module_available("matplotlib") or not module_available("networkx"): - raise RuntimeError("Plotting functions require matplotlib and networkx to be installed.") + if not module_available("matplotlib") or (requires_networkx and not module_available("networkx")): + raise RuntimeError(f"Plotting function requires matplotlib {'and networkx ' if requires_networkx else ''}" + f"to be installed.") return fn(*args, **kw) return wrapper + + +plotting_function = functools.partial(_plotting_function, requires_networkx=False) +plotting_function_with_networkx = functools.partial(_plotting_function, requires_networkx=True) diff --git a/deeptime/util/stats.py b/deeptime/util/stats.py index d5297fbf1..3f84afc61 100644 --- a/deeptime/util/stats.py +++ b/deeptime/util/stats.py @@ -6,7 +6,7 @@ from deeptime.util.types import ensure_array -def confidence_interval(data, conf=0.95): +def confidence_interval(data, conf=0.95, remove_nans=False): r""" Computes element-wise confidence intervals from a sample of ndarrays Given a sample of arbitrarily shaped ndarrays, computes element-wise @@ -19,6 +19,9 @@ def confidence_interval(data, conf=0.95): index, the remaining indexes are specific to the array of interest conf : float, optional, default = 0.95 confidence interval + remove_nans : bool, optional, default=False + The default leads to a `np.nan` result if there are any `nan` values in `data`. If set to `True`, the + `np.nan` values are ignored. Return ------ @@ -49,8 +52,14 @@ def _confidence_interval_1d(x): """ assert x.ndim == 1, x.ndim - if np.any(np.isnan(x)): - return np.nan, np.nan, np.nan + nan = np.isnan(x) + if remove_nans: + x = x[np.where(~nan)] + if len(x) == 0: + return np.nan, np.nan, np.nan + else: + if np.any(nan): + return np.nan, np.nan, np.nan d_min, d_max = np.min(x), np.max(x) diff --git a/docs/source/api/index_plots.rst b/docs/source/api/index_plots.rst new file mode 100644 index 000000000..389a2b1c1 --- /dev/null +++ b/docs/source/api/index_plots.rst @@ -0,0 +1,11 @@ +.. _ref-plots: + +deeptime.plots +============== + +The *plots* package contains some plotting convenience utilities. + +.. automodule:: deeptime.plots + +.. toctree:: + :maxdepth: 1 diff --git a/docs/source/contents.rst b/docs/source/contents.rst index a49f51e37..d06f8ba02 100644 --- a/docs/source/contents.rst +++ b/docs/source/contents.rst @@ -34,6 +34,7 @@ Table Of Contents api/index_basis api/index_kernels api/index_data + api/index_plots api/index_numeric api/index_util diff --git a/examples/methods/plot_implied_timescales.py b/examples/methods/plot_implied_timescales.py new file mode 100644 index 000000000..e322cef57 --- /dev/null +++ b/examples/methods/plot_implied_timescales.py @@ -0,0 +1,35 @@ +""" +Implied timescales +================== + +This example demonstrates how to obtain an implied timescales (ITS) plot for a Bayesian Markov state model. +""" + +import matplotlib.pyplot as plt +import numpy as np + +from deeptime.clustering import KMeans +from deeptime.data import double_well_2d +from deeptime.markov import TransitionCountEstimator +from deeptime.markov.msm import BayesianMSM +from deeptime.plots import ImpliedTimescalesData, plot_implied_timescales + +system = double_well_2d() +data = system.trajectory(x0=np.random.normal(scale=.2, size=(10, 2)), length=1000) +clustering = KMeans(n_clusters=50).fit_fetch(np.concatenate(data)) +dtrajs = [clustering.transform(traj) for traj in data] + +models = [] +lagtimes = np.arange(1, 10) +for lagtime in lagtimes: + counts = TransitionCountEstimator(lagtime=lagtime, count_mode='effective').fit_fetch(dtrajs) + models.append(BayesianMSM(n_samples=50).fit_fetch(counts)) + +its_data = ImpliedTimescalesData.from_models(models) + +fig, ax = plt.subplots(1, 1) +plot_implied_timescales(ax, its_data, n_its=2) +ax.set_yscale('log') +ax.set_title('Implied timescales') +ax.set_xlabel('lag time (steps)') +ax.set_ylabel('timescale (steps)') diff --git a/noxfile.py b/noxfile.py index e8c3b5d7b..de8e58c1f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -53,11 +53,12 @@ def tests(session: nox.Session) -> None: @nox.session(reuse_venv=True) def make_docs(session: nox.Session) -> None: - session.install("-r", "tests/requirements.txt") - session.install("-r", "docs/requirements.txt") - session.install("-e", ".", '-v', silent=False) + if not session.posargs or 'noinstall' not in session.posargs: + session.install("-r", "tests/requirements.txt") + session.install("-r", "docs/requirements.txt") + session.install("-e", ".", '-v', silent=False) session.chdir("docs") - if session.posargs and session.posargs[0] == 'clean': + if session.posargs and 'clean' in session.posargs: session.log("First run clean") shutil.rmtree('source/api/generated') shutil.rmtree('source/examples') diff --git a/tests/clustering/test_kmeans.py b/tests/clustering/test_kmeans.py index 4bdd34963..d9c8d8a75 100644 --- a/tests/clustering/test_kmeans.py +++ b/tests/clustering/test_kmeans.py @@ -411,7 +411,5 @@ def loop_callback(*_): assert_equal(progress_instances[0].n, 2) assert_equal(progress_instances[0].n_update_calls, 2) assert_equal(progress_instances[0].n_close_calls, 1) - assert_equal(progress_instances[1].n_update_calls, - n_loop_callbacks if n_loop_callbacks == 5 else n_loop_callbacks + 1) - assert_equal(progress_instances[1].n, 5) + assert_equal(progress_instances[1].n, progress_instances[1].total) assert_equal(progress_instances[1].n_close_calls, 1) diff --git a/tests/plots/__init__.py b/tests/plots/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/plots/test_implied_timescales.py b/tests/plots/test_implied_timescales.py new file mode 100644 index 000000000..606c181b0 --- /dev/null +++ b/tests/plots/test_implied_timescales.py @@ -0,0 +1,150 @@ +import matplotlib.pyplot as plt +import pytest +import numpy as np +from numpy.testing import assert_raises, assert_equal, assert_array_equal, assert_ + +from deeptime.data import double_well_discrete, double_well_2d +from deeptime.decomposition import TICA +from deeptime.markov.hmm import HiddenMarkovModel, GaussianOutputModel, MaximumLikelihoodHMM, init, BayesianHMM +from deeptime.markov.msm import MarkovStateModel, MaximumLikelihoodMSM, BayesianMSM +from deeptime.markov import BayesianPosterior +from deeptime.plots import plot_implied_timescales, ImpliedTimescalesData + + +@pytest.fixture +def figure(): + f, ax = plt.subplots(1, 1) + yield f, ax + + +def test_to_its_data_wrong_args(): + with assert_raises(ValueError): + ImpliedTimescalesData.from_models([]) + + with assert_raises(AssertionError): + ImpliedTimescalesData.from_models([object]) + + +@pytest.mark.parametrize("model", [MarkovStateModel([[.9, .1], [.1, .9]]), + HiddenMarkovModel(MarkovStateModel([[.9, .1], [.1, .9]]), + GaussianOutputModel(2, [0, 1], [1, 1]))], + ids=lambda x: x.__class__.__name__) +def test_to_its_data(model): + data = ImpliedTimescalesData.from_models([model, model]) + assert_equal(data.lagtimes, [1, 1]) + assert_equal(data.n_lagtimes, 2) + assert_equal(data.max_n_samples, 0) + assert_equal(data.max_n_processes, 1) + timescales = data.timescales_for_process(0) + assert_equal(len(timescales), 2) + assert_array_equal(timescales[0], timescales[1]) + with assert_raises(AssertionError): + data.timescales_for_process(1) + + +def doublewell_mlmsm(lagtime, n_samples, count_mode='sliding'): + return MaximumLikelihoodMSM(lagtime=lagtime).fit_fetch(double_well_discrete().dtraj_n6good, count_mode=count_mode) + + +def doublewell_bmsm(lagtime, n_samples): + return BayesianMSM(n_samples=n_samples)\ + .fit_fetch(doublewell_mlmsm(lagtime, 0, count_mode='effective')) + + +def doublewell_hmm(lagtime, n_samples): + return MaximumLikelihoodHMM( + init.discrete.metastable_from_data(double_well_discrete().dtraj_n6good, n_hidden_states=4, lagtime=lagtime), + lagtime=lagtime, maxit=10, maxit_reversible=100 + ).fit_fetch(double_well_discrete().dtraj_n6good) + + +def doublewell_bhmm(lagtime, n_samples): + return BayesianHMM.default(double_well_discrete().dtraj_n6good, n_hidden_states=4, lagtime=lagtime, + n_samples=n_samples) \ + .fit_fetch(double_well_discrete().dtraj_n6good) + + +def doublewell_tica(lagtime, n_samples): + return TICA(lagtime=lagtime).fit_fetch(double_well_2d().trajectory([[0, 0]], length=200)) + + +doublewell_models = [ + doublewell_mlmsm, + doublewell_bmsm, + doublewell_bhmm, + doublewell_hmm, + doublewell_tica +] + + +@pytest.mark.parametrize("dw_model", doublewell_models, ids=lambda x: x.__name__) +def test_plot_its(figure, dw_model): + f, ax = figure + lagtimes = [1, 2, 5, 10, 15, 100] + n_samples = [5, 10, 2, 30, 100, 20] + + models = [] + for lagtime, n in zip(lagtimes, n_samples): + models.append(dw_model(lagtime, n)) + data = ImpliedTimescalesData.from_models(models) + assert_(data.max_n_processes >= 2) + assert_equal(data.has_samples, isinstance(models[0], BayesianPosterior)) + assert_equal(data.max_n_samples, 100 if data.has_samples else 0) + if data.has_samples: + assert_equal(data.n_samples(0, 0), n_samples[0]) + assert_equal(data.n_samples(1, 0), n_samples[1]) + assert_equal(data.n_samples(2, 0), n_samples[2]) + assert_equal(data.n_samples(3, 0), n_samples[3]) + assert_equal(data.n_samples(4, 0), n_samples[4]) + assert_equal(data.n_samples(5, 0), n_samples[5]) + plot_implied_timescales(ax, data) + + +def test_its_mixed_est_sort(figure): + f, ax = figure + + mlmsm5 = MaximumLikelihoodMSM(lagtime=5).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective') + mlmsm6 = MaximumLikelihoodMSM(lagtime=6).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective') + mlmsm7 = MaximumLikelihoodMSM(lagtime=7).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective') + mlmsm8 = MaximumLikelihoodMSM(lagtime=8).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective') + mlmsm9 = MaximumLikelihoodMSM(lagtime=9).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective') + mlmsm10 = MaximumLikelihoodMSM(lagtime=10).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective') + models = [ + BayesianMSM(n_samples=25).fit_fetch(mlmsm8), + mlmsm5, + BayesianMSM(n_samples=15).fit_fetch(mlmsm6), + mlmsm10, + mlmsm7, + BayesianMSM(n_samples=13).fit_fetch(mlmsm9) + ] + data = ImpliedTimescalesData.from_models(models) + assert_equal(data.n_lagtimes, len(models)) + assert_equal(data.lagtimes, [5, 6, 7, 8, 9, 10]) + assert_equal(data.max_n_samples, 25) + assert_equal(data.n_samples(0, 0), 0) + assert_equal(data.n_samples(1, 0), 15) + assert_equal(data.n_samples(2, 0), 0) + assert_equal(data.n_samples(3, 0), 25) + assert_equal(data.n_samples(4, 0), 13) + assert_equal(data.n_samples(5, 0), 0) + plot_implied_timescales(ax, data) + + +@pytest.mark.parametrize("bayesian", [False, True]) +def test_decayed_process(bayesian): + dtraj = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 2, 1]) # state "2" is only visible with lagtime 1 + msm1 = MaximumLikelihoodMSM(lagtime=1).fit_fetch(dtraj) + msm2 = MaximumLikelihoodMSM(lagtime=2).fit_fetch(dtraj) + msm3 = MaximumLikelihoodMSM(lagtime=3).fit_fetch(dtraj) + models = [msm1, msm2, msm3] + if bayesian: + models = [BayesianMSM(n_samples=15).fit_fetch(msm, ignore_counting_mode=True) for msm in models] + data = ImpliedTimescalesData.from_models(models) + assert_equal(data.max_n_processes, 2) + assert_equal(data.max_n_samples, 0 if not bayesian else 15) + assert_equal(data.n_lagtimes, 3) + assert_equal(np.count_nonzero(np.isnan(data.timescales_for_process(0))), 0) # 0 nans + assert_equal(np.count_nonzero(np.isnan(data.timescales_for_process(1))), 2) # 2 nans + if bayesian: + assert_equal(np.count_nonzero(np.isnan(data.samples_for_process(0))), 0) # 0 nans + assert_equal(np.count_nonzero(np.isnan(data.samples_for_process(1))), 2 * 15) # 0 nans diff --git a/tests/requirements.txt b/tests/requirements.txt index 040532a72..e04ab9ad8 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -7,6 +7,7 @@ pybind11==2.8.1 scikit-learn==1.0.1 threadpoolctl==3.0.0 torch>=1.10.0; python_version<"3.10" +matplotlib pytest==6.2.5 pytest-cov