From 20dd5a67657024efe24f521ad1070bfe97c6d22c Mon Sep 17 00:00:00 2001
From: Yannick Kuhn <yannick.kuhn@dlr.de>
Date: Thu, 19 Dec 2024 17:25:00 +0100
Subject: [PATCH] drafted interface to EP-BOLFI

---
 pybop/experimental/_ep_bolfi.py               | 162 ++++++++++++++++++
 .../_expectation_propagation_cost.py          |  27 +++
 pybop/experimental/base_bayes_optimiser.py    |  57 ++++++
 3 files changed, 246 insertions(+)
 create mode 100644 pybop/experimental/_ep_bolfi.py
 create mode 100644 pybop/experimental/_expectation_propagation_cost.py
 create mode 100644 pybop/experimental/base_bayes_optimiser.py

diff --git a/pybop/experimental/_ep_bolfi.py b/pybop/experimental/_ep_bolfi.py
new file mode 100644
index 000000000..47c45ad9c
--- /dev/null
+++ b/pybop/experimental/_ep_bolfi.py
@@ -0,0 +1,162 @@
+import json
+
+import ep_bolfi
+import numpy as np
+
+from pybop import BaseOptimiser
+from pybop.experimental.base_bayes_optimiser import BayesianOptimisationResult
+from pybop.experimental.multivariate_priors import MultivariateGaussian
+
+
+class EP_BOLFI(BaseOptimiser):
+    """
+    Wraps the Bayesian Optimization algorithm EP-BOLFI.
+
+    For implementation details and background information, consult the
+    relevant publication at https://doi.org/10.1002/batt.202200374 and
+    visit https://github.com/YannickNoelStephanKuhn/EP-BOLFI.
+
+    Note that all properties may and should be given here as PyBOP
+    objects, but will be converted to an ep_bolfi.EP_BOLFI instance
+    upon instantation of this class. To change attributes, re-init.
+
+    Only compatible with MultivariateParameters with
+    MultivariateGaussian prior and an ExpectationPropagationCost.
+    """
+
+    def model_wrapper(self, parameter_set):
+        return self.cost.problem.model.predict(
+            self.inputs, self.t_eval, parameter_set, self.experiment, self.initial_state
+        )[self.output_variable]
+
+    def _set_up_optimiser(self):
+        # Read in EP-BOLFI-specific settings.
+        self.boundaries_in_deviations = self.unset_options.pop(
+            "boundaries_in_deviations", 0
+        )
+        self.bolfi_initial_evidence = self.unset_options.pop(
+            "bolfi_initial_evidence", None
+        )
+        self.bolfi_total_evidence = self.unset_options.pop("bolfi_total_evidence", None)
+        self.bolfi_posterior_samples = self.unset_options.pop(
+            "bolfi_posterior_samples", None
+        )
+        self.ep_iterations = self.unset_options.pop("ep_iterations", 3)
+        self.ep_dampener = self.unset_options.pop("ep_dampener", None)
+        self.final_dampening = self.unset_options.pop("final_dampening", None)
+        self.ep_dampener_reduction_steps = self.unset_options.pop(
+            "ep_dampener_reduction_steps", -1
+        )
+        self.ess_ratio_resample = self.unset_options.pop("ess_ratio_resample", 5)
+        self.ess_ratio_sampling_from_zero = self.unset_options.pop(
+            "ess_ratio_sampling_from_zero", -1
+        )
+        self.ess_ratio_abort = self.unset_options.pop("ess_ratio_abort", 20)
+        # Copy the state of a previous EP-BOLFI call, if given.
+        self.Q = self.unset_options.pop("Q", None)
+        self.r = self.unset_options.pop("r", None)
+        self.Q_features = self.unset_options.pop("Q_features", None)
+        self.r_features = self.unset_options.pop("r_features", None)
+        # Read in live feedback options.
+        self.show_trials = self.unset_options.pop("show_trials", None)
+        self.verbose = self.unset_options.pop("verbose", None)
+        # Read in auxiliary EP-BOLFI settings.
+        self.gelman_rubin_threshold = self.unset_options.pop(
+            "gelman_rubin_threshold", None
+        )
+        self.max_heuristic_steps = self.unset_options.pop("max_heuristic_steps", 10)
+        self.posterior_sampling_increase = self.unset_options.pop(
+            "posterior_sampling_increase", 1.2
+        )
+        self.model_resampling_increase = self.unset_options.pop(
+            "model_resampling_increase", 1.1
+        )
+        self.independent_mcmc_chains = self.unset_options.pop(
+            "independent_mcmc_chains", 4
+        )
+        self.seed = self.unset_options.pop("seed", -1)
+        transposed_boundaries = {}
+        for i, name in enumerate(self.parameters.param.keys()):
+            transposed_boundaries[name] = [
+                self.bounds["lower"][i],
+                self.bounds["upper"][i],
+            ]
+        # EP-BOLFI can handle multiple simulators at once, hence the
+        # lists. ToDo: mediate this between EP-BOLFI and PyBOP.
+        self.optimiser = ep_bolfi.EP_BOLFI(
+            [self.model_wrapper],
+            [self.cost.problem.dataset],
+            self.cost.costs,
+            fixed_parameters={},  # probably baked into self.problem.model
+            free_parameters={k: v.initial_value for k, v in self.parameters.items()},
+            initial_covariance=self.parameters.prior.properties["cov"],
+            free_parameters_boundaries=transposed_boundaries,
+            boundaries_in_deviations=self.boundaries_in_deviations,
+            Q=self.Q,
+            r=self.r,
+            Q_features=self.Q_features,
+            r_features=self.r_features,
+            transform_parameters={},  # might be handled within PyBOP
+            weights=None,  # only applicable within vector-valued features
+            display_current_feature=None,  # ToDo: costs with names
+            fixed_parameter_order=list(self.parameters.param.keys()),
+        )
+
+    def _run(self):
+        # bolfi_posterior is the full GPy object containing the state at
+        # the end of the last feature iteration, while the
+        # MultivariateGaussian is a slight approximation.
+        self.bolfi_posterior = self.optimiser.run(
+            self.bolfi_initial_evidence,
+            self.bolfi_total_evidence,
+            self.bolfi_posterior_samples,
+            self.ep_iterations,
+            self.ep_dampener,
+            self.final_dampening,
+            self.ep_dampener_reduction_steps,
+            self.gelman_rubin_threshold,
+            self.ess_ratio_resample,
+            self.ess_ratio_sampling_from_zero,
+            self.ess_ratio_abort,
+            self.max_heuristic_steps,
+            self.posterior_sampling_increase,
+            self.model_resampling_increase,
+            self.independent_mcmc_chains,
+            self.scramble_ep_feature_order,
+            self.show_trials,
+            self.verbose,
+            self.seed,
+        )
+        ep_bolfi_result = json.loads(self.optimiser.result_to_json(seed=self.seed))
+        len_log = len(
+            self.optimiser.log_of_tried_parameters[
+                list(self.parameters.param.values())[0]
+            ]
+        )
+        transposed_log = [[] for _ in range(len_log)]
+        for log in self.optimiser.log_of_tried_parameters.values():
+            for j, l in enumerate(log):
+                transposed_log[j].append(l)
+        for key in self.log.keys():
+            self.log[key] = []
+        self.log_update(x=transposed_log)
+        mean = np.array(ep_bolfi_result["inferred_parameters"].values())
+        return BayesianOptimisationResult(
+            x=mean,
+            posterior=MultivariateGaussian(
+                mean, np.array(ep_bolfi_result["covariance"])
+            ),
+            cost=self.cost,
+            n_iterations={
+                "model evaluations": len_log,
+                "EP iterations": self.ep_iterations,
+                "total feature iterations": self.ep_iterations * len(self.cost.costs),
+            },
+            optim=self.optimiser,
+        )
+
+    def name(self):
+        return (
+            "Expectation Propagation with Bayesian Optimization for "
+            "Likelihood-Free Inference"
+        )
diff --git a/pybop/experimental/_expectation_propagation_cost.py b/pybop/experimental/_expectation_propagation_cost.py
new file mode 100644
index 000000000..a88b472bd
--- /dev/null
+++ b/pybop/experimental/_expectation_propagation_cost.py
@@ -0,0 +1,27 @@
+from pybop import BaseCost
+
+
+class ExpectationPropagationCost(BaseCost):
+    """
+    A subclass for managing a cost that consists of several features.
+
+    Parameters
+    ----------
+    costs : pybop.BaseCost
+        The individual PyBOP cost objects.
+    has_identical_problems : bool
+        If True, the shared problem will be evaluated once and saved
+        before the self.compute() method of each cost is called
+        (default: False).
+    has_separable_problem : bool
+        This attribute must be set to False for
+        ExpectationPropagationCost objects. If the corresponding
+        attribute of an individual cost is True, the problem is
+        separable from the cost function and will be evaluated before
+        the individual cost evaluation is called.
+    """
+
+    def __init__(self, *costs):
+        if not all(isinstance(cost, BaseCost) for cost in costs):
+            raise TypeError("All costs must be instances of BaseCost.")
+        self.costs = list(costs)
diff --git a/pybop/experimental/base_bayes_optimiser.py b/pybop/experimental/base_bayes_optimiser.py
new file mode 100644
index 000000000..9d7985cf5
--- /dev/null
+++ b/pybop/experimental/base_bayes_optimiser.py
@@ -0,0 +1,57 @@
+from typing import Optional, Union
+
+import numpy as np
+
+from pybop import (
+    BaseCost,
+    BaseOptimiser,
+    BasePrior,
+    Inputs,
+)
+
+
+class BaseBayesOptimiser(BaseOptimiser):
+    def __init__(
+        self,
+        cost,
+        **optimiser_kwargs,
+    ):
+        super().__init__(cost, **optimiser_kwargs)
+
+
+class BayesianOptimisationResult:
+    """
+    Stores the result of a Bayesian optimisation.
+
+    Attributes
+    ----------
+    x : ndarray
+        The MAP (Maximum A Posteriori) of the optimisation.
+    posterior : pybop.BasePrior
+        The probability distribution of the optimisation. (PyBOP
+        currently handles all probability distributions as "Priors".)
+    final_cost : float
+        The cost associated with the MAP ``x``.
+    n_iterations : int or dict
+        Number of iterations performed by the optimizer. Since Bayesian
+        optimisers tend to have layers of various optimisation
+        algorithms, their iteration counts may be put individually.
+    optim : pybop.BaseOptimiser
+        The instance of the utilized optimisation algorithm.
+    time : float or dict
+        The wall-clock time of the optimiser in seconds. You may give
+        this as a dict to also store the processing unit time.
+    """
+
+    def __init__(
+        self,
+        x: Union[Inputs, np.ndarray] = None,
+        posterior: BasePrior = None,
+        cost: Union[BaseCost, None] = None,
+        final_cost: Optional[float] = None,
+        n_iterations: Union[int, dict, None] = None,
+        optim: Optional[BaseOptimiser] = None,
+        time: Union[float, dict, None] = None,
+    ):
+        super().__init__(x, cost, final_cost, n_iterations, optim, time)
+        self.posterior = posterior