feat: initial Monte Carlo classes

pybop-team · BradyPlanden · Sep 2, 2024 · May 24, 2024 · May 24, 2024 · Jun 3, 2024
commit 89ae3e6b11df9e0eb1395b4159a25fe7670878a8
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Features
 
+- [#6](https://github.com/pybop-team/PyBOP/issues/6) - Adds Monte Carlo functionality, with methods based on Pints' algorithms. A base class is added `BaseSampler`, in addition to `PintsBaseSampler`.
 - [#321](https://github.com/pybop-team/PyBOP/pull/321) - Updates Prior classes with BaseClass, adds a `problem.sample_initial_conditions` method to improve stability of SciPy.Minimize optimiser.
 - [#249](https://github.com/pybop-team/PyBOP/pull/249) - Add WeppnerHuggins model and GITT example.
 - [#304](https://github.com/pybop-team/PyBOP/pull/304) - Decreases the testing suite completion time.

diff --git a/examples/scripts/mcmc_example.py b/examples/scripts/mcmc_example.py
@@ -0,0 +1,91 @@
+import numpy as np
+import plotly.graph_objects as go
+
+import pybop
+
+# Parameter set and model definition
+parameter_set = pybop.ParameterSet.pybamm("Chen2020")
+model = pybop.lithium_ion.SPM(parameter_set=parameter_set)
+
+# Fitting parameters
+parameters = [
+    pybop.Parameter(
+        "Negative electrode active material volume fraction",
+        prior=pybop.GaussianLogPrior(0.68, 0.05),
+    ),
+    pybop.Parameter(
+        "Positive electrode active material volume fraction",
+        prior=pybop.GaussianLogPrior(0.58, 0.05),
+    ),
+]
+
+# Generate data
+init_soc = 0.5
+sigma = 0.001
+experiment = pybop.Experiment(
+    [
+        (
+            "Discharge at 0.5C for 3 minutes (2 second period)",
+            "Charge at 0.5C for 3 minutes (2 second period)",
+        ),
+    ]
+    * 2
+)
+values = model.predict(init_soc=init_soc, experiment=experiment)
+
+
+def noise(sigma):
+    return np.random.normal(0, sigma, len(values["Voltage [V]"].data))
+
+
+# Form dataset
+dataset = pybop.Dataset(
+    {
+        "Time [s]": values["Time [s]"].data,
+        "Current function [A]": values["Current [A]"].data,
+        "Voltage [V]": values["Voltage [V]"].data + noise(sigma),
+        "Bulk open-circuit voltage [V]": values["Bulk open-circuit voltage [V]"].data
+        + noise(sigma),
+    }
+)
+
+signal = ["Voltage [V]", "Bulk open-circuit voltage [V]"]
+
+# Generate problem, cost function, and optimisation class
+problem = pybop.FittingProblem(
+    model, parameters, dataset, signal=signal, init_soc=init_soc
+)
+likelihood = pybop.GaussianLogLikelihoodKnownSigma(problem, sigma=[0.02, 0.02])
+prior1 = pybop.GaussianLogPrior(0.7, 0.02)
+prior2 = pybop.GaussianLogPrior(0.6, 0.02)
+composed_prior = pybop.ComposedLogPrior(prior1, prior2)
+posterior = pybop.LogPosterior(likelihood, composed_prior)
+x0 = [[0.68, 0.58], [0.68, 0.58], [0.68, 0.58]]
+
+optim = pybop.DREAM(
+    posterior,
+    chains=3,
+    x0=x0,
+    max_iterations=400,
+    initial_phase_iterations=250,
+    parallel=True,
+)
+result = optim.run()
+
+
+# Create a histogram
+fig = go.Figure()
+for i, data in enumerate(result):
+    fig.add_trace(go.Histogram(x=data[:, 0], name="Neg", opacity=0.75))
+    fig.add_trace(go.Histogram(x=data[:, 1], name="Pos", opacity=0.75))
+
+# Update layout for better visualization
+fig.update_layout(
+    title="Posterior distribution of volume fractions",
+    xaxis_title="Value",
+    yaxis_title="Count",
+    barmode="overlay",
+)
+
+# Show the plot
+fig.show()
diff --git a/pybop/parameters/priors.py b/pybop/parameters/priors.py
@@ -120,6 +120,10 @@ def sigma(self):
         """
         return self.scale
 
+    @property
+    def n_parameters(self):
+        return self._n_parameters
+
 
 class Gaussian(BasePrior):
     """
@@ -199,3 +203,160 @@ def __init__(self, scale, loc=0, random_state=None):
         self.loc = loc
         self.scale = scale
         self.prior = stats.expon
+
+
+class GaussianLogPrior(BasePrior):
+    """
+    Represents a log-normal distribution with a given mean and standard deviation.
+
+    This class provides methods to calculate the probability density function (pdf),
+    the logarithm of the pdf, and to generate random variates (rvs) from the distribution.
+
+    Parameters
+    ----------
+    mean : float
+        The mean of the log-normal distribution.
+    sigma : float
+        The standard deviation of the log-normal distribution.
+    """
+
+    def __init__(self, mean, sigma, random_state=None):
+        self.name = "Gaussian Log Prior"
+        self.loc = mean
+        self.scale = sigma
+        self.prior = stats.norm
+        self._offset = -0.5 * np.log(2 * np.pi * self.scale**2)
+        self.sigma2 = self.scale**2
+        self._multip = -1 / (2.0 * self.sigma2)
+        self._n_parameters = 1
+
+    def __call__(self, x):
+        """
+        Evaluates the gaussian (log) distribution at x.
+
+        Parameters
+        ----------
+        x : float
+            The point(s) at which to evaluate the distribution.
+
+        Returns
+        -------
+        float
+            The value(s) of the distribution at x.
+        """
+        x = np.asarray(x)
+        return self._offset + self._multip * (x[0] - self.loc) ** 2
+
+    def evaluateS1(self, x):
+        """
+        Evaluates the first derivative of the gaussian (log) distribution at x.
+
+        Parameters
+        ----------
+        x : float
+            The point(s) at which to evaluate the first derivative.
+
+        Returns
+        -------
+        float
+            The value(s) of the first derivative at x.
+        """
+        if not isinstance(x, np.ndarray):
+            x = np.asarray(x)
+        return self(x), -(x - self.loc) * self._multip
+
+    def icdf(self, q):
+        """
+        Calculates the inverse cumulative distribution function (CDF) of the distribution at q.
+
+        Parameters
+        ----------
+        q : float
+            The point(s) at which to evaluate the inverse CDF.
+
+        Returns
+        -------
+        float
+            The inverse cumulative distribution function value at q.
+        """
+        return self.prior.ppf(q, s=self.scale, loc=self.loc)
+
+    def cdf(self, x):
+        """
+        Calculates the cumulative distribution function (CDF) of the distribution at x.
+
+        Parameters
+        ----------
+        x : float
+            The point(s) at which to evaluate the CDF.
+
+        Returns
+        -------
+        float
+            The cumulative distribution function value at x.
+        """
+        return self.prior.cdf(x, s=self.scale, loc=self.loc)
+
+
+class ComposedLogPrior(BasePrior):
+    """
+    Represents a composition of multiple prior distributions.
+    """
+
+    def __init__(self, *priors):
+        self._priors = priors
+        for prior in priors:
+            if not isinstance(prior, BasePrior):
+                raise ValueError("All priors must be instances of BasePrior")
+
+        self._n_parameters = len(priors)  # Needs to be updated
+
+    def __call__(self, x):
+        """
+        Evaluates the composed prior distribution at x.
+
+        Parameters
+        ----------
+        x : float
+            The point(s) at which to evaluate the distribution.
+
+        Returns
+        -------
+        float
+            The value(s) of the distribution at x.
+        """
+        if not isinstance(x, np.ndarray):
+            x = np.asarray(x)
+        return sum(prior(x) for prior in self._priors)
+
+    def evaluateS1(self, x):
+        """
+        Evaluates the first derivative of the composed prior distribution at x.
+        Inspired by PINTS implementation.
+
+        *This method only works if the underlying :class:`LogPrior` classes all
+        implement the optional method :class:`LogPDF.evaluateS1().`.*
+
+        Parameters
+        ----------
+        x : float
+            The point(s) at which to evaluate the first derivative.
+
+        Returns
+        -------
+        float
+            The value(s) of the first derivative at x.
+        """
+        output = 0
+        doutput = np.zeros(self.n_parameters)
+        index = 0
+
+        for prior in self._priors:
+            num_params = prior.n_parameters
+            x_subset = x[index : index + num_params]
+            p, dp = prior.evaluateS1(x_subset)
+            output += p
+            doutput[index : index + num_params] = dp
+            index += num_params
+
+        return output, doutput
diff --git a/pybop/samplers/__init__.py b/pybop/samplers/__init__.py
@@ -0,0 +1,72 @@
+import numpy as np
+from pints import ParallelEvaluator
+import warnings
+
+class BaseSampler:
+    """
+    Base class for Monte Carlo samplers.
+    """
+    def __init__(self, x0, sigma0):
+        """
+        Initialise the base sampler.
+
+        Args:
+            cost (pybop.cost): The cost to be sampled.
+        """
+        self._x0 = x0
+        self._sigma0 = sigma0
+
+    def run(self) -> np.ndarray:
+        """
+        Sample from the posterior distribution.
+
+        Args:
+            n_samples (int): Number of samples to draw.
+
+        Returns:
+            np.ndarray: Samples from the posterior distribution.
+        """
+        raise NotImplementedError
+
+    def set_initial_phase_iterations(self, iterations=250):
+        """
+        Set the number of iterations for the initial phase of the sampler.
+
+        Args:
+            iterations (int): Number of iterations for the initial phase.
+        """
+        self._initial_phase_iterations = iterations
+
+    def set_max_iterations(self, iterations=500):
+        """
+        Set the maximum number of iterations for the sampler.
+
+        Args:
+            iterations (int): Maximum number of iterations.
+        """
+        iterations = int(iterations)
+        if iterations < 1:
+            raise ValueError("Number of iterations must be greater than 0")
+
+        self._max_iterations = iterations
+
+    def set_parallel(self, parallel=False):
+        """
+        Enable or disable parallel evaluation.
+        Credit: PINTS
+
+        Parameters
+        ----------
+        parallel : bool or int, optional
+            If True, use as many worker processes as there are CPU cores. If an integer, use that many workers.
+            If False or 0, disable parallelism (default: False).
+        """
+        if parallel is True:
+            self._parallel = True
+            self._n_workers = ParallelEvaluator.cpu_count()
+        elif parallel >= 1:
+            self._parallel = True
+            self._n_workers = int(parallel)
+        else:
+            self._parallel = False
+            self._n_workers = 1