diff --git a/CMakeLists.txt b/CMakeLists.txt
index 26cecc9b4..49e92827c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,7 +9,7 @@ if(DEFINED PROJECT_NAME)
 endif()
 project(deeptime LANGUAGES C CXX VERSION 0.0.0)
 
-find_package(OpenMP)
+find_package(OpenMP 4)
 if(OpenMP_FOUND)
     add_definitions(-DUSE_OPENMP)
 endif()
diff --git a/deeptime/__init__.py b/deeptime/__init__.py
index 59d35869b..735c45a25 100644
--- a/deeptime/__init__.py
+++ b/deeptime/__init__.py
@@ -14,6 +14,11 @@
 from . import decomposition
 from . import markov
 
+from .util.platform import module_available
+if module_available("matplotlib"):
+    from . import plots
+del module_available
+
 
 def capi_includes(inc_clustering: bool = False, inc_markov: bool = False, inc_markov_hmm: bool = False,
                   inc_data: bool = False):
diff --git a/deeptime/data/_drunkards_walk_simulator.py b/deeptime/data/_drunkards_walk_simulator.py
index d68ec00aa..fc181efe9 100644
--- a/deeptime/data/_drunkards_walk_simulator.py
+++ b/deeptime/data/_drunkards_walk_simulator.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from deeptime.util.decorators import plotting_function
+from deeptime.util.decorators import plotting_function_with_networkx
 
 Coordinate = Tuple[int, int]
 
@@ -245,7 +245,7 @@ def walk(self, start: Coordinate, n_steps: int, stop: bool = True, return_states
             return states
 
     @staticmethod
-    @plotting_function
+    @plotting_function_with_networkx
     def plot_path(ax, path, intermediates: bool = True, color_lerp: bool = True, **plot_kw):  # pragma: no cover
         r""" Plots a path onto a drunkard's walk map. The path is interpolated with splines and, if desired, has a
         linearly interpolated color along its path.
@@ -286,7 +286,7 @@ def plot_path(ax, path, intermediates: bool = True, color_lerp: bool = True, **p
         else:
             ax.plot(xint, yint, **plot_kw)
 
-    @plotting_function
+    @plotting_function_with_networkx
     def plot_network(self, ax, F, cmap=None, connection_threshold: float = 0.):  # pragma: no cover
         r""" Plots a flux network onto a matplotlib axis assuming that the states are ordered according to a iota
         range (i.e., {0, 1, ..., n_states-1}) on the grid.
@@ -335,7 +335,7 @@ def plot_network(self, ax, F, cmap=None, connection_threshold: float = 0.):  # p
                                connectionstyle='arc3, rad=0.1')
         return edge_colors
 
-    @plotting_function
+    @plotting_function_with_networkx
     def plot_2d_map(self, ax, barriers: bool = True, barrier_mode: str = 'filled'):  # pragma: no cover
         r""" Plots the drunkard's map onto an already existing matplotlib axis. One can select whether to draw barriers
         and if barriers should be drawn, whether they have a 'filled' face or be 'hollow' with just the border
diff --git a/deeptime/decomposition/_koopman.py b/deeptime/decomposition/_koopman.py
index e4ccb776c..6b69eaa09 100644
--- a/deeptime/decomposition/_koopman.py
+++ b/deeptime/decomposition/_koopman.py
@@ -642,6 +642,12 @@ def timescales(self, k=None, lagtime: Optional[int] = None) -> np.ndarray:
             lagtime = self._cov.lagtime
         return - lagtime / np.log(np.abs(self.singular_values[:k]))
 
+    @property
+    def lagtime(self):
+        r""" The lagtime corresponding to this model. See also
+        :meth:`CovarianceModel.lagtime <deeptime.covariance.CovarianceModel.lagtime>`. """
+        return self._cov.lagtime
+
     @property
     def feature_component_correlation(self):
         r"""Instantaneous correlation matrix between mean-free input features and projection components.
diff --git a/deeptime/decomposition/_vamp.py b/deeptime/decomposition/_vamp.py
index 11ad2bcb9..8298af448 100644
--- a/deeptime/decomposition/_vamp.py
+++ b/deeptime/decomposition/_vamp.py
@@ -513,7 +513,7 @@ def chapman_kolmogorov_validator(self, mlags, test_model: CovarianceKoopmanModel
         The Champan-Kolmogorov test is to compare the predictions to the estimates.
         """
         test_model = self.fetch_model() if test_model is None else test_model
-        assert test_model is not None, "We need a test model via argument or an estimator which was already" \
+        assert test_model is not None, "We need a test model via argument or an estimator which was already " \
                                        "fit to data."
         lagtime = self.lagtime
         if n_observables is not None:
diff --git a/deeptime/markov/_base.py b/deeptime/markov/_base.py
index 482a83c79..81ec84822 100644
--- a/deeptime/markov/_base.py
+++ b/deeptime/markov/_base.py
@@ -168,6 +168,26 @@ def evaluate_samples(self, quantity, delimiter='/', *args, **kwargs):
         from deeptime.util.stats import evaluate_samples as _eval
         return _eval(self.samples, quantity=quantity, delimiter=delimiter, *args, **kwargs)
 
+    def timescales(self, k=None):
+        r""" Relaxation timescales corresponding to the eigenvalues.
+
+        Parameters
+        ----------
+        k : int, optional, default=None
+            The number of timescales (excluding the stationary process).
+
+        Returns
+        -------
+        timescales : tuple(iterable, iterable)
+            Timescales of the prior and timescales of the samples.
+        """
+        return self.prior.timescales(k=k), self.evaluate_samples('timescales', k=k)
+
+    @property
+    def lagtime(self):
+        r"""Lagtime of the models."""
+        return self.prior.lagtime
+
 
 class MembershipsChapmanKolmogorovValidator(LaggedModelValidator):
     r""" Validates a model estimated at lag time tau by testing its predictions for longer lag times.
diff --git a/deeptime/markov/hmm/_hidden_markov_model.py b/deeptime/markov/hmm/_hidden_markov_model.py
index 322386a0a..f872a751f 100644
--- a/deeptime/markov/hmm/_hidden_markov_model.py
+++ b/deeptime/markov/hmm/_hidden_markov_model.py
@@ -303,6 +303,12 @@ def transition_counts(self) -> Optional[np.ndarray]:
         """
         return self.transition_model.count_model.count_matrix if self.transition_model.count_model is not None else None
 
+    def timescales(self, k=None):
+        r""" Yields the timescales of the hidden transition model.
+        See :meth:`MarkovStateModel.timescales <deeptime.markov.msm.MarkovStateModel.timescales>`.
+        """
+        return self.transition_model.timescales(k=k)
+
     @property
     def hidden_state_trajectories(self) -> Optional[List[np.ndarray]]:
         r"""
diff --git a/deeptime/markov/msm/_bayesian_msm.py b/deeptime/markov/msm/_bayesian_msm.py
index de2ca8d80..fb0d289fb 100644
--- a/deeptime/markov/msm/_bayesian_msm.py
+++ b/deeptime/markov/msm/_bayesian_msm.py
@@ -173,7 +173,7 @@ def fetch_model(self) -> Optional[BayesianPosterior]:
         """
         return self._model
 
-    def fit(self, data, callback: Callable = None):
+    def fit(self, data, callback: Callable = None, **kw):
         """
         Performs the estimation on either a count matrix or a previously estimated TransitionCountModel.
 
@@ -185,23 +185,23 @@ def fit(self, data, callback: Callable = None):
         callback: callable, optional, default=None
             Function to be called to indicate progress of sampling.
 
+        Other Parameters
+        ----------------
+        ignore_counting_mode : bool, optional, default=False
+            Method does not raise if counting mode isn't of the "effective" family. Use with caution.
+
         Returns
         -------
         self : BayesianMSM
             Reference to self.
         """
-        from deeptime.markov import TransitionCountModel
-        if isinstance(data, TransitionCountModel) and data.counting_mode is not None \
-                and "effective" not in data.counting_mode:
-            raise ValueError("The transition count model was not estimated using an effective counting method, "
-                             "therefore counts are likely to be strongly correlated yielding wrong confidences.")
-
         if isinstance(data, Estimator):
             if data.has_model:
                 data = data.fetch_model()
             else:
                 raise ValueError("Can only use estimators as input if they have been fit previously.")
 
+        from deeptime.markov import TransitionCountModel
         if isinstance(data, TransitionCountModel) or is_square_matrix(data):
             msm = MaximumLikelihoodMSM(
                 reversible=self.reversible, stationary_distribution_constraint=self.stationary_distribution_constraint,
@@ -214,7 +214,7 @@ def fit(self, data, callback: Callable = None):
                              "TransitionCountEstimator) or a MarkovStateModel instance or an estimator producing "
                              "Markov state models.")
 
-        return self.fit_from_msm(msm, callback=callback)
+        return self.fit_from_msm(msm, callback=callback, **kw)
 
     def sample(self, prior: MarkovStateModel, n_samples: int, n_steps: Optional[int] = None, callback=None):
         r""" Performs sampling based on a prior.
@@ -275,7 +275,7 @@ def sample(self, prior: MarkovStateModel, n_samples: int, n_steps: Optional[int]
         ]
         return samples
 
-    def fit_from_msm(self, msm: MarkovStateModel, callback=None):
+    def fit_from_msm(self, msm: MarkovStateModel, callback=None, **kw):
         r""" Fits a bayesian posterior from a given Markov state model. The MSM must contain a count model to be able
         to produce confidences. Note that the count model should be produced using effective counting, otherwise
         counts are correlated and computed confidences are wrong.
@@ -287,6 +287,11 @@ def fit_from_msm(self, msm: MarkovStateModel, callback=None):
         callback : callable, optional, default=None
             Function to be called to indicate progress of sampling.
 
+        Other Parameters
+        ----------------
+        ignore_counting_mode : bool, optional, default=False
+            Method does not raise if counting mode isn't of the "effective" family. Use with caution.
+
         Returns
         -------
         self : BayesianMSM
@@ -295,6 +300,11 @@ def fit_from_msm(self, msm: MarkovStateModel, callback=None):
         if not msm.has_count_model:
             raise ValueError("Can only sample confidences with a count model. The counting mode should be 'effective'"
                              " to avoid correlations between counts and therefore wrong confidences.")
+        if not kw.get("ignore_counting_mode", False) \
+                and msm.count_model.counting_mode is not None and "effective" not in msm.count_model.counting_mode:
+            raise ValueError("The transition count model was not estimated using an effective counting method, "
+                             "therefore counts are likely to be strongly correlated yielding wrong confidences. "
+                             "To ignore this, set `ignore_counting_mode` to True in the call to `fit`.")
         # use the same count matrix as the MLE. This is why we have effective as a default
         samples = self.sample(msm, self.n_samples, self.n_steps, callback)
         self._model = BayesianPosterior(prior=msm, samples=samples)
@@ -336,4 +346,4 @@ def _ck_estimate_model_for_lag(estimator, model, data, lagtime):
 class BayesianMSMChapmanKolmogorovValidator(MembershipsChapmanKolmogorovValidator):
 
     def fit(self, data, n_jobs=None, progress=None, **kw):
-        return super().fit(data, n_jobs, progress, estimate_model_for_lag=_ck_estimate_model_for_lag, **kw)
\ No newline at end of file
+        return super().fit(data, n_jobs, progress, estimate_model_for_lag=_ck_estimate_model_for_lag, **kw)
diff --git a/deeptime/markov/msm/_markov_state_model.py b/deeptime/markov/msm/_markov_state_model.py
index e8a29ede8..237be35e1 100644
--- a/deeptime/markov/msm/_markov_state_model.py
+++ b/deeptime/markov/msm/_markov_state_model.py
@@ -48,6 +48,10 @@ class MarkovStateModel(Model):
     transition_matrix_tolerance : float, default=1e-8
         The tolerance under which a matrix is still considered a transition matrix (only non-negative elements and
         row sums of 1).
+    lagtime : int, optional, default=None
+        The lagtime of this MSM. If there is a count model, the MSM assumes the lagtime of the count model, otherwise
+        falls back to the lagtime set via this constructor argument or a lagtime of `1` if no lagtime is provided
+        at all.
 
     See Also
     --------
@@ -64,11 +68,12 @@ class MarkovStateModel(Model):
     """
 
     def __init__(self, transition_matrix, stationary_distribution=None, reversible=None,
-                 n_eigenvalues=None, ncv=None, count_model=None, transition_matrix_tolerance=1e-8):
+                 n_eigenvalues=None, ncv=None, count_model=None, transition_matrix_tolerance=1e-8, lagtime=None):
         super().__init__()
         self._is_reversible = reversible
         self._ncv = ncv
         self._transition_matrix_tolerance = transition_matrix_tolerance
+        self._lagtime = lagtime
 
         self.update_transition_matrix(transition_matrix)
 
@@ -204,7 +209,7 @@ def to_koopman_model(self, empirical: bool = True, epsilon: float = 1e-10):
     @property
     def lagtime(self) -> int:
         r""" The lagtime this model was estimated at. In case no count model was provided, this property defaults
-        to a lagtime of `1`.
+        to the lagtime set in the constructor or a lagtime of :math:`\tau = 1` if it was left `None`.
 
         Returns
         -------
@@ -213,7 +218,7 @@ def lagtime(self) -> int:
         """
         if self.count_model is not None:
             return self.count_model.lagtime
-        return 1
+        return 1 if self._lagtime is None else self._lagtime
 
     @property
     def transition_matrix(self):
@@ -474,20 +479,19 @@ def eigenvectors_right(self, k=None):
 
     def timescales(self, k=None):
         r"""
-        The relaxation timescales corresponding to the eigenvalues
+        Relaxation timescales corresponding to the eigenvalues.
 
         Parameters
         ----------
-        k : int
-            number of timescales to be returned. By default all available
-            eigenvalues, minus 1.
+        k : int, optional, default=None
+            Number of timescales to be returned. By default, this uses all available eigenvalues except for the
+            first (stationary) eigenvalue.
 
         Returns
         -------
         ts : ndarray(m)
-            relaxation timescales in units of the input trajectory time step,
-            defined by :math:`-\tau / ln | \lambda_i |, i = 2,...,k+1`.
-
+            Relaxation timescales in units of the input trajectory time step,
+            defined by :math:`-\tau / \mathrm{ln} | \lambda_i |, i = 2,...,k+1`.
         """
         if k is None:
             self._ensure_eigenvalues()
diff --git a/deeptime/markov/tools/estimation/dense/tmat_sampling/tmatrix_sampler.py b/deeptime/markov/tools/estimation/dense/tmat_sampling/tmatrix_sampler.py
index cb47df323..f2eeb51c0 100644
--- a/deeptime/markov/tools/estimation/dense/tmat_sampling/tmatrix_sampler.py
+++ b/deeptime/markov/tools/estimation/dense/tmat_sampling/tmatrix_sampler.py
@@ -55,16 +55,13 @@ def sample(self, nsamples=1, return_statdist=False, callback=None):
         else:
             n = self.count_matrix.shape[0]
             P_samples = np.zeros((nsamples, n, n))
-            if return_statdist:
-                pi_samples = np.zeros((nsamples, n))
-                for i in range(nsamples):
-                    P_samples[i, :, :], pi_samples[i, :] = self.sampler.sample(N=self.n_steps, return_statdist=True)
-                    if callback is not None:
-                        callback()
-                return P_samples, pi_samples
-            else:
-                for i in range(nsamples):
-                    P_samples[i, :, :] = self.sampler.sample(N=self.n_steps, return_statdist=False)
-                    if callback is not None:
-                        callback()
-                return P_samples
+            pi_samples = np.zeros((nsamples, n)) if return_statdist else None
+            for i in range(nsamples):
+                out = self.sampler.sample(N=self.n_steps, return_statdist=return_statdist)
+                if return_statdist:
+                    P_samples[i, :, :], pi_samples[i, :] = out
+                else:
+                    P_samples[i, :, :] = out
+                if callback is not None:
+                    callback()
+            return P_samples if not return_statdist else (P_samples, pi_samples)
diff --git a/deeptime/numeric/_utils.py b/deeptime/numeric/_utils.py
index 527ff44c0..02793a028 100644
--- a/deeptime/numeric/_utils.py
+++ b/deeptime/numeric/_utils.py
@@ -46,13 +46,13 @@ def is_diagonal_matrix(matrix: _np.ndarray) -> bool:
     return _np.all(matrix == _np.diag(_np.diagonal(matrix)))
 
 
-def is_square_matrix(arr: _np.ndarray) -> bool:
+def is_square_matrix(arr) -> bool:
     r""" Determines whether an array is a square matrix. This means that ndim must be 2 and shape[0] must be equal
     to shape[1].
 
     Parameters
     ----------
-    arr : ndarray or sparse array
+    arr : object
         The array to check.
 
     Returns
diff --git a/deeptime/plots/__init__.py b/deeptime/plots/__init__.py
new file mode 100644
index 000000000..14b17191a
--- /dev/null
+++ b/deeptime/plots/__init__.py
@@ -0,0 +1,12 @@
+r"""
+.. currentmodule: deeptime.plots
+
+.. autosummary::
+    :toctree: generated/
+    :template: class_nomodule.rst
+
+    ImpliedTimescalesData
+    plot_implied_timescales
+"""
+
+from .implied_timescales import plot_implied_timescales, ImpliedTimescalesData
diff --git a/deeptime/plots/chapman_kolmogorov.py b/deeptime/plots/chapman_kolmogorov.py
new file mode 100644
index 000000000..669ddd62f
--- /dev/null
+++ b/deeptime/plots/chapman_kolmogorov.py
@@ -0,0 +1,8 @@
+
+
+class ChapmanKolmogorovData:
+    pass
+
+
+def plot_chapman_kolmogorov_test(ax, data: ChapmanKolmogorovData):
+    pass
diff --git a/deeptime/plots/implied_timescales.py b/deeptime/plots/implied_timescales.py
new file mode 100644
index 000000000..cd199ad80
--- /dev/null
+++ b/deeptime/plots/implied_timescales.py
@@ -0,0 +1,255 @@
+from typing import Optional
+
+import numpy as np
+
+from deeptime.util import confidence_interval
+from deeptime.util.decorators import plotting_function
+
+
+class ImpliedTimescalesData:
+    r""" Instances of this class hold a sequence of lagtimes and corresponding process timescales (potentially
+    with process timescales of sampled models in a Bayesian setting). Objects can be
+    used with :meth:`plot_implied_timescales`.
+
+    In case models over a range of lagtimes are available, the static method :meth:`from_models` can be used.
+
+    Parameters
+    ----------
+    lagtimes : iterable of int
+        Lagtimes corresponding to processes and their timescales.
+    its : iterable of ndarray
+        The timescales for each process, shape (n_lagtimes, n_processes).
+    its_stats : list of list of ndarray, optional, default=None
+        Sampled timescales of shape (n_lagtimes, n_processes, n_samples).
+
+    See Also
+    --------
+    plot_implied_timescales
+    """
+    def __init__(self, lagtimes, its, its_stats=None):
+        self._lagtimes = np.asarray(lagtimes, dtype=int)
+        assert len(its) == self.n_lagtimes, f"The length of its should correspond to the number of " \
+                                            f"lagtimes ({self.n_lagtimes}), got {len(its)} instead."
+        self._max_n_processes = max(len(x) for x in its)
+        self._max_n_samples = 0 if its_stats is None else max(len(x) if x is not None else 0 for x in its_stats)
+        self._its = np.full((self.n_lagtimes, self._max_n_processes), fill_value=np.nan)
+        assert self._its.ndim == 2 and self._its.shape[0] == self.n_lagtimes, \
+            "its should be of shape (lagtimes, processes)."
+        for i, processes in enumerate(its):
+            self._its[i, :len(processes)] = processes
+
+        if self.has_samples:
+            assert len(its_stats) == self.n_lagtimes, f"The length of its stats should correspond to the number of " \
+                                                      f"lagtimes ({self.n_lagtimes}), got {len(its_stats)} instead."
+            self._its_stats = np.full((self.n_lagtimes, self.max_n_processes, self.max_n_samples), fill_value=np.nan)
+            for lag_ix in range(len(its_stats)):
+
+                samples = its_stats[lag_ix]
+                if samples is not None:
+                    for sample_ix in range(len(samples)):
+                        arr = np.asarray(its_stats[lag_ix][sample_ix])
+                        n = min(len(arr), self.max_n_processes)
+                        self._its_stats[lag_ix, :n, sample_ix] = arr[:n]
+            if not (self._its_stats.ndim == 3 and self._its_stats.shape[0] == self.n_lagtimes and
+                    self._its_stats.shape[1] == self.max_n_processes):
+                raise ValueError(f"its_stats should be of shape (lagtimes={self.n_lagtimes}, "
+                                 f"processes={self.max_n_processes}, samples={self.max_n_samples}) but was "
+                                 f"{self._its_stats.shape}")
+        else:
+            self._its_stats = None
+        ix = np.argsort(self.lagtimes)
+        self._lagtimes = self._lagtimes[ix]
+        self._its = self._its[ix]
+        self._its_stats = None if self._its_stats is None else self._its_stats[ix]
+
+    @property
+    def lagtimes(self) -> np.ndarray:
+        r""" Yields the lagtimes corresponding to an instance of this class. """
+        return self._lagtimes
+
+    @property
+    def n_lagtimes(self) -> int:
+        r""" Number of lagtimes. """
+        return len(self.lagtimes)
+
+    @property
+    def max_n_processes(self) -> int:
+        r""" Maximum number of processes. """
+        return self._max_n_processes
+
+    @property
+    def max_n_samples(self) -> int:
+        r""" Maximum number of samples. """
+        return self._max_n_samples
+
+    @property
+    def has_samples(self) -> bool:
+        r""" Whether the data contains samples. """
+        return self.max_n_samples > 0
+
+    def timescales_for_process(self, process_index: int) -> np.ndarray:
+        r""" Yields maximum-likelihood timescales for a particular process.
+
+        Parameters
+        ----------
+        process_index : int
+            The process.
+
+        Returns
+        -------
+        timescales : ndarray (lagtimes,)
+            The timescales for the particular process. Might contain NaN.
+        """
+        assert process_index < self.max_n_processes, \
+            f"The process ({process_index}) should be contained in data ({self.max_n_processes})."
+        return self._its[:, process_index]
+
+    def samples_for_process(self, process_index: int) -> np.ndarray:
+        r"""Yields timescales samples for a particular process.
+
+        Parameters
+        ----------
+        process_index : int
+            The process.
+
+        Returns
+        -------
+        timescales_samples : ndarray(lagtimes, max_n_samples)
+            The sampled timescales for a particular process. Might contain NaN.
+        """
+        assert self.has_samples, "This timescales data object contains no samples."
+        assert process_index < self.max_n_processes, "The process should be contained in data."
+        return self._its_stats[:, process_index]
+
+    def n_samples(self, lagtime_index: int, process_index: int) -> int:
+        r""" Yields the number of samples for a particular lagtime and a particular process.
+
+        Parameters
+        ----------
+        lagtime_index : int
+            The lagtime index corresponding to :attr:`lagtimes`.
+        process_index : int
+            The process index.
+
+        Returns
+        -------
+        n_samples : int
+            The number of samples.
+        """
+        data = self.samples_for_process(process_index)[lagtime_index]
+        return np.count_nonzero(~np.isnan(data))
+
+    @staticmethod
+    def from_models(models, n_its=None):
+        r""" Converts a list of models to a :class:`ImpliedTimescalesData` object.
+
+        Parameters
+        ----------
+        models : list
+            The input data. Models with and without samples to compute confidences should not be mixed.
+        n_its : int or None, optional
+            Number of timescales to compute.
+
+        Returns
+        -------
+        its_data : ImpliedTimescalesData
+            The data object.
+        """
+        if not isinstance(models, (list, tuple)):
+            models = [models]
+
+        if len(models) == 0:
+            raise ValueError("Data cannot be empty.")
+        assert all(callable(getattr(model, 'timescales', None)) for model in models), \
+            "all models need to have a timescales method"
+        assert all(hasattr(model, 'lagtime') for model in models), "all models need a lagtime attribute or property"
+
+        lagtimes = []
+        its = []
+        its_stats = []
+
+        for model in models:
+            is_bayesian = hasattr(model, 'prior') and hasattr(model, 'samples')
+            lagtimes.append(model.lagtime)
+            if is_bayesian:
+                result = model.timescales(k=n_its)
+                its.append(result[0])
+                its_stats.append(result[1])
+            else:
+                its.append(model.timescales(k=n_its))
+                its_stats.append(None)
+        return ImpliedTimescalesData(lagtimes, its, its_stats)
+
+
+@plotting_function
+def plot_implied_timescales(ax, data: ImpliedTimescalesData, n_its: Optional[int] = None, process: Optional[int] = None,
+                            show_mle: bool = True, show_samples: bool = True, show_sample_mean: bool = True,
+                            show_sample_confidence: bool = True, show_cutoff: bool = True,
+                            sample_confidence: float = .95,
+                            colors=None, **kwargs):
+    r"""Creates an implied timescales plot inside exising matplotlib axes.
+
+    .. plot:: examples/plot_implied_timescales.py
+
+    Parameters
+    ----------
+    ax : matplotlib.axes.Axes
+        The matplotlib axes to use for plotting.
+    data : ImpliedTimescalesData
+        A timescales data container object, can be obtained, e.g., via :meth:`ImpliedTimescalesData.from_models`.
+    n_its : int, optional, default=None
+        Maximum number of timescales to plot.
+    process : int, optional, default=None
+        A particular process to plot. This is mutually exclusive with n_its.
+    show_mle : bool, default=True
+        Whether to show the timescale of the maximum-likelihood estimate.
+    show_samples : bool, default=True
+        Whether to show sample means and/or confidences.
+    show_sample_mean : bool, default=True
+        Whether to show the sample mean. Only has an effect if show_samples is True and there are samples in the data.
+    show_sample_confidence : bool, default=True
+        Whether to show the sample confidence. Only has an effect if show_samples is True and there are samples
+        in the data.
+    show_cutoff : bool, default=True
+        Whether to show the model resolution cutoff as grey filled area.
+    sample_confidence : float, default=0.95
+        The confidence to plot. The default amounts to a shaded area containing 95% of the sampled values.
+    colors : list of colors, optional, default=None
+        The colors that should be used for timescales. By default uses the matplotlib default colors as per
+        rc-config value "axes.prop_cycle".
+    **kwargs
+        Keyword arguments which are forwarded into the matplotlib plotting function for timescales.
+
+    See Also
+    --------
+    ImpliedTimescalesData
+    """
+
+    if n_its is not None and process is not None:
+        raise ValueError("n_its and process are mutually exclusive.")
+    if process is not None and process >= data.max_n_processes:
+        raise ValueError(f"Requested process {process} when only {data.max_n_processes} are available.")
+
+    if process is None and n_its is None:
+        n_its = data.max_n_processes
+    it_indices = [process] if process is not None else np.arange(n_its)
+    if colors is None:
+        from matplotlib import rcParams
+        colors = rcParams['axes.prop_cycle'].by_key()['color']
+    for it_index in it_indices:
+        color = colors[it_index % len(colors)]
+        if show_mle:
+            ax.plot(data.lagtimes, data.timescales_for_process(it_index), color=color, **kwargs)
+        if data.has_samples and show_samples:
+            its_samples = data.samples_for_process(it_index)
+            if show_sample_mean:
+                sample_mean = np.nanmean(its_samples, axis=1)
+                ax.plot(data.lagtimes, sample_mean, marker='o', linestyle='dashed', color=color)
+            if show_sample_confidence:
+                l_conf, r_conf = confidence_interval(its_samples.T, conf=sample_confidence, remove_nans=True)
+                ax.fill_between(data.lagtimes, l_conf, r_conf, alpha=0.2, color=color)
+
+    if show_cutoff:
+        ax.plot(data.lagtimes, data.lagtimes, linewidth=2, color='black')
+        ax.fill_between(data.lagtimes, np.full((data.n_lagtimes,), fill_value=ax.get_ylim()[0]), data.lagtimes,
+                        alpha=0.5, color='grey')
diff --git a/deeptime/setup.py b/deeptime/setup.py
index 62212fbd1..1de4304c6 100644
--- a/deeptime/setup.py
+++ b/deeptime/setup.py
@@ -13,6 +13,7 @@ def configuration(parent_package='', top_path=None):
     config.add_subpackage('basis')
     config.add_subpackage('util')
     config.add_subpackage('sindy')
+    config.add_subpackage('plots')
 
     from Cython.Build import cythonize
     config.ext_modules = cythonize(
diff --git a/deeptime/src/include/deeptime/markov/msm/tram/tram.h b/deeptime/src/include/deeptime/markov/msm/tram/tram.h
index 9d76c9349..9f15929ab 100644
--- a/deeptime/src/include/deeptime/markov/msm/tram/tram.h
+++ b/deeptime/src/include/deeptime/markov/msm/tram/tram.h
@@ -288,6 +288,7 @@ struct TRAM {
 
         double iterationError{0};
 
+        py::gil_scoped_release gil;
         for (decltype(maxIter) iterationCount = 0; iterationCount < maxIter; ++iterationCount) {
 
             // Self-consistent update of the TRAM equations.
diff --git a/deeptime/util/_validation.py b/deeptime/util/_validation.py
index ea055edc2..f5c5fdf80 100644
--- a/deeptime/util/_validation.py
+++ b/deeptime/util/_validation.py
@@ -122,8 +122,7 @@ class LaggedModelValidator(Estimator):
     mlags : int or int-array, default=10
         multiples of lag times for testing the Model, e.g. range(10).
         A single int will trigger a range, i.e. mlags=10 maps to
-        mlags=range(1, 10). The setting None will choose mlags automatically
-        according to the longest available trajectory
+        mlags=range(1, 10).
 
     conf : float, default = 0.95
         confidence interval for errors
diff --git a/deeptime/util/callbacks.py b/deeptime/util/callbacks.py
index 7c15df98d..b14205cd6 100644
--- a/deeptime/util/callbacks.py
+++ b/deeptime/util/callbacks.py
@@ -63,8 +63,6 @@ def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        if self.total and not exc_type:
-            inc = self.total - self.progress_bar.n
-            if inc > 0:
-                self.progress_bar.update(inc)
+        if exc_type is None:
+            self.progress_bar.total = self.progress_bar.n  # force finish
         self.progress_bar.close()
diff --git a/deeptime/util/decorators.py b/deeptime/util/decorators.py
index cdaed088c..e2b9a4511 100644
--- a/deeptime/util/decorators.py
+++ b/deeptime/util/decorators.py
@@ -43,14 +43,19 @@ def invalidate(self):
         self.cache.clear()
 
 
-def plotting_function(fn):  # pragma: no cover
+def _plotting_function(fn, requires_networkx):  # pragma: no cover
     r""" Decorator marking a function that is a plotting utility. This will exclude it from coverage and test
     whether dependencies are installed. """
 
     @functools.wraps(fn)
     def wrapper(*args, **kw):
-        if not module_available("matplotlib") or not module_available("networkx"):
-            raise RuntimeError("Plotting functions require matplotlib and networkx to be installed.")
+        if not module_available("matplotlib") or (requires_networkx and not module_available("networkx")):
+            raise RuntimeError(f"Plotting function requires matplotlib {'and networkx ' if requires_networkx else ''}"
+                               f"to be installed.")
         return fn(*args, **kw)
 
     return wrapper
+
+
+plotting_function = functools.partial(_plotting_function, requires_networkx=False)
+plotting_function_with_networkx = functools.partial(_plotting_function, requires_networkx=True)
diff --git a/deeptime/util/stats.py b/deeptime/util/stats.py
index d5297fbf1..3f84afc61 100644
--- a/deeptime/util/stats.py
+++ b/deeptime/util/stats.py
@@ -6,7 +6,7 @@
 from deeptime.util.types import ensure_array
 
 
-def confidence_interval(data, conf=0.95):
+def confidence_interval(data, conf=0.95, remove_nans=False):
     r""" Computes element-wise confidence intervals from a sample of ndarrays
 
     Given a sample of arbitrarily shaped ndarrays, computes element-wise
@@ -19,6 +19,9 @@ def confidence_interval(data, conf=0.95):
         index, the remaining indexes are specific to the array of interest
     conf : float, optional, default = 0.95
         confidence interval
+    remove_nans : bool, optional, default=False
+        The default leads to a `np.nan` result if there are any `nan` values in `data`. If set to `True`, the
+        `np.nan` values are ignored.
 
     Return
     ------
@@ -49,8 +52,14 @@ def _confidence_interval_1d(x):
         """
         assert x.ndim == 1, x.ndim
 
-        if np.any(np.isnan(x)):
-            return np.nan, np.nan, np.nan
+        nan = np.isnan(x)
+        if remove_nans:
+            x = x[np.where(~nan)]
+            if len(x) == 0:
+                return np.nan, np.nan, np.nan
+        else:
+            if np.any(nan):
+                return np.nan, np.nan, np.nan
 
         d_min, d_max = np.min(x), np.max(x)
 
diff --git a/docs/source/api/index_plots.rst b/docs/source/api/index_plots.rst
new file mode 100644
index 000000000..389a2b1c1
--- /dev/null
+++ b/docs/source/api/index_plots.rst
@@ -0,0 +1,11 @@
+.. _ref-plots:
+
+deeptime.plots
+==============
+
+The *plots* package contains some plotting convenience utilities.
+
+.. automodule:: deeptime.plots
+
+.. toctree::
+   :maxdepth: 1
diff --git a/docs/source/contents.rst b/docs/source/contents.rst
index a49f51e37..d06f8ba02 100644
--- a/docs/source/contents.rst
+++ b/docs/source/contents.rst
@@ -34,6 +34,7 @@ Table Of Contents
    api/index_basis
    api/index_kernels
    api/index_data
+   api/index_plots
    api/index_numeric
    api/index_util
 
diff --git a/examples/methods/plot_implied_timescales.py b/examples/methods/plot_implied_timescales.py
new file mode 100644
index 000000000..e322cef57
--- /dev/null
+++ b/examples/methods/plot_implied_timescales.py
@@ -0,0 +1,35 @@
+"""
+Implied timescales
+==================
+
+This example demonstrates how to obtain an implied timescales (ITS) plot for a Bayesian Markov state model.
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from deeptime.clustering import KMeans
+from deeptime.data import double_well_2d
+from deeptime.markov import TransitionCountEstimator
+from deeptime.markov.msm import BayesianMSM
+from deeptime.plots import ImpliedTimescalesData, plot_implied_timescales
+
+system = double_well_2d()
+data = system.trajectory(x0=np.random.normal(scale=.2, size=(10, 2)), length=1000)
+clustering = KMeans(n_clusters=50).fit_fetch(np.concatenate(data))
+dtrajs = [clustering.transform(traj) for traj in data]
+
+models = []
+lagtimes = np.arange(1, 10)
+for lagtime in lagtimes:
+    counts = TransitionCountEstimator(lagtime=lagtime, count_mode='effective').fit_fetch(dtrajs)
+    models.append(BayesianMSM(n_samples=50).fit_fetch(counts))
+
+its_data = ImpliedTimescalesData.from_models(models)
+
+fig, ax = plt.subplots(1, 1)
+plot_implied_timescales(ax, its_data, n_its=2)
+ax.set_yscale('log')
+ax.set_title('Implied timescales')
+ax.set_xlabel('lag time (steps)')
+ax.set_ylabel('timescale (steps)')
diff --git a/noxfile.py b/noxfile.py
index e8c3b5d7b..de8e58c1f 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -53,11 +53,12 @@ def tests(session: nox.Session) -> None:
 
 @nox.session(reuse_venv=True)
 def make_docs(session: nox.Session) -> None:
-    session.install("-r", "tests/requirements.txt")
-    session.install("-r", "docs/requirements.txt")
-    session.install("-e", ".", '-v', silent=False)
+    if not session.posargs or 'noinstall' not in session.posargs:
+        session.install("-r", "tests/requirements.txt")
+        session.install("-r", "docs/requirements.txt")
+        session.install("-e", ".", '-v', silent=False)
     session.chdir("docs")
-    if session.posargs and session.posargs[0] == 'clean':
+    if session.posargs and 'clean' in session.posargs:
         session.log("First run clean")
         shutil.rmtree('source/api/generated')
         shutil.rmtree('source/examples')
diff --git a/tests/clustering/test_kmeans.py b/tests/clustering/test_kmeans.py
index 4bdd34963..d9c8d8a75 100644
--- a/tests/clustering/test_kmeans.py
+++ b/tests/clustering/test_kmeans.py
@@ -411,7 +411,5 @@ def loop_callback(*_):
         assert_equal(progress_instances[0].n, 2)
         assert_equal(progress_instances[0].n_update_calls, 2)
         assert_equal(progress_instances[0].n_close_calls, 1)
-        assert_equal(progress_instances[1].n_update_calls,
-                     n_loop_callbacks if n_loop_callbacks == 5 else n_loop_callbacks + 1)
-        assert_equal(progress_instances[1].n, 5)
+        assert_equal(progress_instances[1].n, progress_instances[1].total)
         assert_equal(progress_instances[1].n_close_calls, 1)
diff --git a/tests/plots/__init__.py b/tests/plots/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/plots/test_implied_timescales.py b/tests/plots/test_implied_timescales.py
new file mode 100644
index 000000000..606c181b0
--- /dev/null
+++ b/tests/plots/test_implied_timescales.py
@@ -0,0 +1,150 @@
+import matplotlib.pyplot as plt
+import pytest
+import numpy as np
+from numpy.testing import assert_raises, assert_equal, assert_array_equal, assert_
+
+from deeptime.data import double_well_discrete, double_well_2d
+from deeptime.decomposition import TICA
+from deeptime.markov.hmm import HiddenMarkovModel, GaussianOutputModel, MaximumLikelihoodHMM, init, BayesianHMM
+from deeptime.markov.msm import MarkovStateModel, MaximumLikelihoodMSM, BayesianMSM
+from deeptime.markov import BayesianPosterior
+from deeptime.plots import plot_implied_timescales, ImpliedTimescalesData
+
+
+@pytest.fixture
+def figure():
+    f, ax = plt.subplots(1, 1)
+    yield f, ax
+
+
+def test_to_its_data_wrong_args():
+    with assert_raises(ValueError):
+        ImpliedTimescalesData.from_models([])
+
+    with assert_raises(AssertionError):
+        ImpliedTimescalesData.from_models([object])
+
+
+@pytest.mark.parametrize("model", [MarkovStateModel([[.9, .1], [.1, .9]]),
+                                   HiddenMarkovModel(MarkovStateModel([[.9, .1], [.1, .9]]),
+                                                     GaussianOutputModel(2, [0, 1], [1, 1]))],
+                         ids=lambda x: x.__class__.__name__)
+def test_to_its_data(model):
+    data = ImpliedTimescalesData.from_models([model, model])
+    assert_equal(data.lagtimes, [1, 1])
+    assert_equal(data.n_lagtimes, 2)
+    assert_equal(data.max_n_samples, 0)
+    assert_equal(data.max_n_processes, 1)
+    timescales = data.timescales_for_process(0)
+    assert_equal(len(timescales), 2)
+    assert_array_equal(timescales[0], timescales[1])
+    with assert_raises(AssertionError):
+        data.timescales_for_process(1)
+
+
+def doublewell_mlmsm(lagtime, n_samples, count_mode='sliding'):
+    return MaximumLikelihoodMSM(lagtime=lagtime).fit_fetch(double_well_discrete().dtraj_n6good, count_mode=count_mode)
+
+
+def doublewell_bmsm(lagtime, n_samples):
+    return BayesianMSM(n_samples=n_samples)\
+        .fit_fetch(doublewell_mlmsm(lagtime, 0, count_mode='effective'))
+
+
+def doublewell_hmm(lagtime, n_samples):
+    return MaximumLikelihoodHMM(
+        init.discrete.metastable_from_data(double_well_discrete().dtraj_n6good, n_hidden_states=4, lagtime=lagtime),
+        lagtime=lagtime, maxit=10, maxit_reversible=100
+    ).fit_fetch(double_well_discrete().dtraj_n6good)
+
+
+def doublewell_bhmm(lagtime, n_samples):
+    return BayesianHMM.default(double_well_discrete().dtraj_n6good, n_hidden_states=4, lagtime=lagtime,
+                               n_samples=n_samples) \
+        .fit_fetch(double_well_discrete().dtraj_n6good)
+
+
+def doublewell_tica(lagtime, n_samples):
+    return TICA(lagtime=lagtime).fit_fetch(double_well_2d().trajectory([[0, 0]], length=200))
+
+
+doublewell_models = [
+    doublewell_mlmsm,
+    doublewell_bmsm,
+    doublewell_bhmm,
+    doublewell_hmm,
+    doublewell_tica
+]
+
+
+@pytest.mark.parametrize("dw_model", doublewell_models, ids=lambda x: x.__name__)
+def test_plot_its(figure, dw_model):
+    f, ax = figure
+    lagtimes = [1, 2, 5, 10, 15, 100]
+    n_samples = [5, 10, 2, 30, 100, 20]
+
+    models = []
+    for lagtime, n in zip(lagtimes, n_samples):
+        models.append(dw_model(lagtime, n))
+    data = ImpliedTimescalesData.from_models(models)
+    assert_(data.max_n_processes >= 2)
+    assert_equal(data.has_samples, isinstance(models[0], BayesianPosterior))
+    assert_equal(data.max_n_samples, 100 if data.has_samples else 0)
+    if data.has_samples:
+        assert_equal(data.n_samples(0, 0), n_samples[0])
+        assert_equal(data.n_samples(1, 0), n_samples[1])
+        assert_equal(data.n_samples(2, 0), n_samples[2])
+        assert_equal(data.n_samples(3, 0), n_samples[3])
+        assert_equal(data.n_samples(4, 0), n_samples[4])
+        assert_equal(data.n_samples(5, 0), n_samples[5])
+    plot_implied_timescales(ax, data)
+
+
+def test_its_mixed_est_sort(figure):
+    f, ax = figure
+
+    mlmsm5 = MaximumLikelihoodMSM(lagtime=5).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective')
+    mlmsm6 = MaximumLikelihoodMSM(lagtime=6).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective')
+    mlmsm7 = MaximumLikelihoodMSM(lagtime=7).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective')
+    mlmsm8 = MaximumLikelihoodMSM(lagtime=8).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective')
+    mlmsm9 = MaximumLikelihoodMSM(lagtime=9).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective')
+    mlmsm10 = MaximumLikelihoodMSM(lagtime=10).fit_fetch(double_well_discrete().dtraj_n6good, count_mode='effective')
+    models = [
+        BayesianMSM(n_samples=25).fit_fetch(mlmsm8),
+        mlmsm5,
+        BayesianMSM(n_samples=15).fit_fetch(mlmsm6),
+        mlmsm10,
+        mlmsm7,
+        BayesianMSM(n_samples=13).fit_fetch(mlmsm9)
+    ]
+    data = ImpliedTimescalesData.from_models(models)
+    assert_equal(data.n_lagtimes, len(models))
+    assert_equal(data.lagtimes, [5, 6, 7, 8, 9, 10])
+    assert_equal(data.max_n_samples, 25)
+    assert_equal(data.n_samples(0, 0), 0)
+    assert_equal(data.n_samples(1, 0), 15)
+    assert_equal(data.n_samples(2, 0), 0)
+    assert_equal(data.n_samples(3, 0), 25)
+    assert_equal(data.n_samples(4, 0), 13)
+    assert_equal(data.n_samples(5, 0), 0)
+    plot_implied_timescales(ax, data)
+
+
+@pytest.mark.parametrize("bayesian", [False, True])
+def test_decayed_process(bayesian):
+    dtraj = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 2, 1])  # state "2" is only visible with lagtime 1
+    msm1 = MaximumLikelihoodMSM(lagtime=1).fit_fetch(dtraj)
+    msm2 = MaximumLikelihoodMSM(lagtime=2).fit_fetch(dtraj)
+    msm3 = MaximumLikelihoodMSM(lagtime=3).fit_fetch(dtraj)
+    models = [msm1, msm2, msm3]
+    if bayesian:
+        models = [BayesianMSM(n_samples=15).fit_fetch(msm, ignore_counting_mode=True) for msm in models]
+    data = ImpliedTimescalesData.from_models(models)
+    assert_equal(data.max_n_processes, 2)
+    assert_equal(data.max_n_samples, 0 if not bayesian else 15)
+    assert_equal(data.n_lagtimes, 3)
+    assert_equal(np.count_nonzero(np.isnan(data.timescales_for_process(0))), 0)  # 0 nans
+    assert_equal(np.count_nonzero(np.isnan(data.timescales_for_process(1))), 2)  # 2 nans
+    if bayesian:
+        assert_equal(np.count_nonzero(np.isnan(data.samples_for_process(0))), 0)  # 0 nans
+        assert_equal(np.count_nonzero(np.isnan(data.samples_for_process(1))), 2 * 15)  # 0 nans
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 040532a72..e04ab9ad8 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -7,6 +7,7 @@ pybind11==2.8.1
 scikit-learn==1.0.1
 threadpoolctl==3.0.0
 torch>=1.10.0; python_version<"3.10"
+matplotlib
 
 pytest==6.2.5
 pytest-cov