From 1e6118f00155192508aedf0e12cf0df126cff5d4 Mon Sep 17 00:00:00 2001
From: Prateek Bhustali <p.bhustali@tu-braunschweig.de>
Date: Mon, 9 May 2022 03:23:33 +0200
Subject: [PATCH] Added generalised sobol sensitivity

---
 src/UQpy/sensitivity/__init__.py          |   2 +
 src/UQpy/sensitivity/generalised_sobol.py | 378 ++++++++++++++++++++++
 2 files changed, 380 insertions(+)
 create mode 100644 src/UQpy/sensitivity/generalised_sobol.py

diff --git a/src/UQpy/sensitivity/__init__.py b/src/UQpy/sensitivity/__init__.py
index 73c4166d..e25335a3 100644
--- a/src/UQpy/sensitivity/__init__.py
+++ b/src/UQpy/sensitivity/__init__.py
@@ -3,9 +3,11 @@
 from UQpy.sensitivity.sobol import Sobol
 from UQpy.sensitivity.cramer_von_mises import CramervonMises
 from UQpy.sensitivity.chatterjee import Chatterjee
+from UQpy.sensitivity.generalised_sobol import GeneralisedSobol
 
 from . import MorrisSensitivity
 from . import PceSensitivity
 from . import Sobol
 from . import CramervonMises
 from . import Chatterjee
+from . import GeneralisedSobol
diff --git a/src/UQpy/sensitivity/generalised_sobol.py b/src/UQpy/sensitivity/generalised_sobol.py
new file mode 100644
index 00000000..e5cf2f65
--- /dev/null
+++ b/src/UQpy/sensitivity/generalised_sobol.py
@@ -0,0 +1,378 @@
+"""
+
+The GeneralisedSobol class computes the generalised Sobol indices for a given
+multi-ouput model. The class is based on the work of [1]_ and [2]_.
+
+Additionally, we can compute the confidence intervals for the Sobol indices 
+using bootstrapping [3]_.
+
+References
+----------
+
+ .. [1] Gamboa F, Janon A, Klein T, Lagnoux A, others.
+        Sensitivity analysis for multidimensional and functional outputs.
+        Electronic journal of statistics 2014; 8(1): 575-603.
+
+ .. [2] Alexanderian A, Gremaud PA, Smith RC. Variance-based sensitivity 
+        analysis for time-dependent processes. Reliability engineering 
+        & system safety 2020; 196: 106722.
+
+.. [3] Jeremy Orloff and Jonathan Bloom (2014), Bootstrap confidence intervals, 
+       Introduction to Probability and Statistics, MIT OCW. 
+
+"""
+
+import logging
+
+import numpy as np
+
+from UQpy.sensitivity.baseclass.sensitivity import Sensitivity
+from UQpy.sensitivity.baseclass.pickfreeze import generate_pick_freeze_samples
+from UQpy.utilities.UQpyLoggingFormatter import UQpyLoggingFormatter
+
+
+class GeneralisedSobol(Sensitivity):
+    """
+    Compute the generalised Sobol indices for models with multiple outputs
+    (vector-valued response) using the Pick-and-Freeze method.
+
+    :param runmodel_object: The computational model. It should be of type :class:`.RunModel`. \
+        The output QoI can be a scalar or vector of length :code:`ny`, then the sensitivity \
+        indices of all :code:`ny` outputs are computed independently.
+
+    :param distributions: List of :class:`.Distribution` objects corresponding to each \
+        random variable, or :class:`.JointIndependent` object \
+        (multivariate RV with independent marginals).
+
+    :param random_state: Random seed used to initialize the pseudo-random number \
+        generator. Default is :any:`None`.
+
+    **Methods:**
+    """
+
+    def __init__(
+        self, runmodel_object, dist_object, random_state=None, **kwargs
+    ) -> None:
+
+        super().__init__(runmodel_object, dist_object, random_state, **kwargs)
+
+        # Create logger with the same name as the class
+        self.logger = logging.getLogger(__name__)
+        self.logger.setLevel(logging.ERROR)
+        frmt = UQpyLoggingFormatter()
+
+        # create console handler with a higher log level
+        ch = logging.StreamHandler()
+        ch.setFormatter(frmt)
+
+        # add the handler to the logger
+        self.logger.addHandler(ch)
+
+        self.gen_sobol_i = None
+        "Generalised first order Sobol indices, :class:`ndarray` of shape (num_vars, 1)"
+
+        self.gen_sobol_total_i = None
+        "Generalised total order Sobol indices, :class:`ndarray` of shape (num_vars, 1)"
+
+        self.n_samples = None
+        "Number of samples used to compute the sensitivity indices, :class:`int`"
+
+        self.num_vars = None
+        "Number of model input variables, :class:`int`"
+
+    def run(
+        self,
+        n_samples=1_000,
+        num_bootstrap_samples=None,
+        confidence_level=0.95,
+    ):
+
+        """
+        Compute the generalised Sobol indices for models with multiple outputs
+        (vector-valued response) using the Pick-and-Freeze method.
+
+        :param n_samples: Number of samples used to compute the sensitivity indices. \
+            Default is 1,000.
+
+        :param num_bootstrap_samples: Number of bootstrap samples used to compute the \
+            confidence intervals. Default is :any:`None`.
+
+        :param confidence_level: Confidence level used to compute the confidence \
+            intervals. Default is 0.95.
+
+        :return: A :class:`dict` with the following keys: \
+            :code:`gen_sobol_i` of shape :code:`(num_vars, 1)`, \
+            :code:`gen_sobol_total_i` of shape :code:`(num_vars, 1)`, \
+            :code:`CI_gen_sobol_i` of shape :code:`(num_vars, 2)`, \
+            :code:`CI_gen_sobol_total_i` of shape :code:`(num_vars, 2)`.
+
+        """
+
+        # Check n_samples data type
+        self.n_samples = n_samples
+        if not isinstance(self.n_samples, int):
+            raise TypeError("UQpy: n_samples should be an integer")
+
+        # Check num_bootstrap_samples data type
+        if num_bootstrap_samples is not None:
+            if not isinstance(num_bootstrap_samples, int):
+                raise TypeError("UQpy: num_bootstrap_samples should be an integer.\n")
+        elif num_bootstrap_samples is None:
+            self.logger.info(
+                "UQpy: num_bootstrap_samples is set to None, confidence intervals will not be computed.\n"
+            )
+
+        ################## GENERATE SAMPLES ##################
+
+        (A_samples, B_samples, C_i_generator, _,) = generate_pick_freeze_samples(
+            self.dist_object, self.n_samples, self.random_state
+        )
+
+        self.logger.info("UQpy: Generated samples using the pick-freeze scheme.\n")
+
+        self.num_vars = A_samples.shape[1]  # Number of variables
+
+        ################# MODEL EVALUATIONS ####################
+
+        A_model_evals = self._run_model(A_samples)  # shape: (n_samples, n_outputs)
+
+        # if model output is vectorised,
+        # shape retured by model is (n_samples, n_outputs, 1)
+        # we need to reshape it to (n_samples, n_outputs)
+        if A_model_evals.ndim == 3:
+            A_model_evals = A_model_evals[:, :, 0]  # shape: (n_samples, n_outputs)
+
+        self.logger.info("UQpy: Model evaluations A completed.\n")
+
+        B_model_evals = self._run_model(B_samples)  # shape: (n_samples, n_outputs)
+
+        # if model output is vectorised,
+        # shape retured by model is (n_samples, n_outputs, 1)
+        # we need to reshape it to (n_samples, n_outputs)
+        if B_model_evals.ndim == 3:
+            B_model_evals = B_model_evals[:, :, 0]  # shape: (n_samples, n_outputs)
+
+        self.logger.info("UQpy: Model evaluations B completed.\n")
+
+        self.n_outputs = A_model_evals.shape[1]
+
+        # shape: (n_outputs, n_samples, num_vars)
+        C_i_model_evals = np.zeros((self.n_outputs, self.n_samples, self.num_vars))
+
+        for i, C_i in enumerate(C_i_generator):
+
+            # if model output is vectorised,
+            # shape retured by model is (n_samples, n_outputs, 1)
+            # we need to reshape it to (n_samples, n_outputs)
+            model_evals = self._run_model(C_i)
+
+            if model_evals.ndim == 3:
+                C_i_model_evals[:, :, i] = self._run_model(C_i)[:, :, 0].T
+            else:
+                C_i_model_evals[:, :, i] = model_evals.T
+
+        self.logger.info("UQpy: Model evaluations C completed.\n")
+
+        self.logger.info("UQpy: All model evaluations computed successfully.\n")
+
+        ######################### STORAGE ########################
+
+        # Create dictionary to store the sensitivity indices
+        computed_indices = {}
+
+        ################## COMPUTE GENERALISED SOBOL INDICES ##################
+
+        self.gen_sobol_i = self.compute_first_order_generalised_sobol_indices(
+            A_model_evals, B_model_evals, C_i_model_evals
+        )
+
+        self.logger.info(
+            "UQpy: First order Generalised Sobol indices computed successfully.\n"
+        )
+
+        self.gen_sobol_total_i = self.compute_total_order_generalised_sobol_indices(
+            A_model_evals, B_model_evals, C_i_model_evals
+        )
+
+        self.logger.info(
+            "UQpy: Total order Generalised Sobol indices computed successfully.\n"
+        )
+
+        # Store the indices in the dictionary
+        computed_indices["gen_sobol_i"] = self.gen_sobol_i
+        computed_indices["gen_sobol_total_i"] = self.gen_sobol_total_i
+
+        ################## CONFIDENCE INTERVALS ####################
+
+        if num_bootstrap_samples is not None:
+
+            self.logger.info("UQpy: Computing confidence intervals ...\n")
+
+            estimator_inputs = [
+                A_model_evals,
+                B_model_evals,
+                C_i_model_evals,
+            ]
+
+            # First order generalised Sobol indices
+            self.CI_gen_sobol_i = self.bootstrapping(
+                self.compute_first_order_generalised_sobol_indices,
+                estimator_inputs,
+                computed_indices["gen_sobol_i"],
+                num_bootstrap_samples,
+                confidence_level,
+            )
+
+            self.logger.info(
+                "UQpy: Confidence intervals for First order Generalised Sobol indices computed successfully.\n"
+            )
+
+            # Total order generalised Sobol indices
+            self.CI_gen_sobol_total_i = self.bootstrapping(
+                self.compute_total_order_generalised_sobol_indices,
+                estimator_inputs,
+                computed_indices["gen_sobol_total_i"],
+                num_bootstrap_samples,
+                confidence_level,
+            )
+
+            self.logger.info(
+                "UQpy: Confidence intervals for Total order Sobol Generalised indices computed successfully.\n"
+            )
+
+            # Store the indices in the dictionary
+            computed_indices["CI_gen_sobol_i"] = self.CI_gen_sobol_i
+            computed_indices["CI_gen_sobol_total_i"] = self.CI_gen_sobol_total_i
+
+        return computed_indices
+
+    @staticmethod
+    def compute_first_order_generalised_sobol_indices(
+        A_model_evals, B_model_evals, C_i_model_evals
+    ):
+
+        """
+        Compute the generalised Sobol indices for models with multiple outputs.
+
+        :param A_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`.
+        :param B_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`.
+        :param C_i_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_outputs, n_samples, num_vars)`.
+
+        :return: First order generalised Sobol indices, :class:`numpy.ndarray` of shape :code:`(n_outputs, num_vars)`.
+
+        """
+
+        num_vars = C_i_model_evals.shape[2]
+        n_outputs = A_model_evals.shape[1]
+
+        # store generalised Sobol indices
+        gen_sobol_i = np.zeros((num_vars, 1))
+
+        for i in range(num_vars):
+
+            all_Y_i = A_model_evals.T  # shape: (n_outputs, n_samples)
+            all_Y_i_tilde = B_model_evals.T  # shape: (n_outputs, n_samples)
+            all_Y_i_u = C_i_model_evals[:, :, i]  # shape: (n_outputs, n_samples)
+
+            # compute the mean using all model evaluations
+            # shape: (n_outputs, 1)
+            mean = (
+                np.mean(all_Y_i, axis=1, keepdims=1)
+                + np.mean(all_Y_i_u, axis=1, keepdims=1)
+                + np.mean(all_Y_i_tilde, axis=1, keepdims=1)
+            ) / 3
+
+            # center the evaluations since mean is available
+            all_Y_i = all_Y_i - mean
+            all_Y_i_tilde = all_Y_i_tilde - mean
+            all_Y_i_u = all_Y_i_u - mean
+
+            # compute the variance matrix using all available model evaluations
+            # shape: (n_outputs, n_outputs)
+            C = (np.cov(all_Y_i) + np.cov(all_Y_i_u) + np.cov(all_Y_i_tilde)) / 3
+
+            # compute covariance btw. RVs 'X' and 'Y'
+            # shape: (2*n_outputs, 2*n_outputs)
+            # It contains the following 4 block matrices:
+            #   (1, 1) variance of 'X'
+            #  *(1, 2) covariance between 'X' and 'Y' (a.k.a. cross-covariance)
+            #   (2, 1) covariance between 'Y' and 'X' (a.k.a. cross-covariance)
+            #   (2, 2) variance of 'Y'
+            _cov_1 = np.cov(all_Y_i_u, all_Y_i)  # for first order indices
+
+            # We need the cross-covariance between 'X' and 'Y'
+            # Extract *(1, 2) (upper right block)
+            # shape: (n_outputs, n_outputs)
+            C_u = _cov_1[0:n_outputs, n_outputs : 2 * n_outputs]
+
+            denominator = np.trace(C)
+
+            # Generalised Sobol indices
+            gen_sobol_i[i] = np.trace(C_u) / denominator
+
+        return gen_sobol_i
+
+    @staticmethod
+    def compute_total_order_generalised_sobol_indices(
+        A_model_evals, B_model_evals, C_i_model_evals
+    ):
+
+        """
+        Compute the generalised Sobol indices for models with multiple outputs.
+
+        :param A_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`.
+        :param B_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`.
+        :param C_i_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_outputs, n_samples, num_vars)`.
+
+        :return: Total order generalised Sobol indices, :class:`numpy.ndarray` of shape :code:`(n_outputs, num_vars)`.
+
+        """
+
+        num_vars = C_i_model_evals.shape[2]
+        n_outputs = A_model_evals.shape[1]
+
+        # store generalised Sobol indices
+        gen_sobol_total_i = np.zeros((num_vars, 1))
+
+        for i in range(num_vars):
+
+            all_Y_i = A_model_evals.T  # shape: (n_outputs, n_samples)
+            all_Y_i_tilde = B_model_evals.T  # shape: (n_outputs, n_samples)
+            all_Y_i_u = C_i_model_evals[:, :, i]  # shape: (n_outputs, n_samples)
+
+            # compute the mean using all model evaluations
+            # shape: (n_outputs, 1)
+            mean = (
+                np.mean(all_Y_i, axis=1, keepdims=1)
+                + np.mean(all_Y_i_u, axis=1, keepdims=1)
+                + np.mean(all_Y_i_tilde, axis=1, keepdims=1)
+            ) / 3
+
+            # center the evaluations since mean is available
+            all_Y_i = all_Y_i - mean
+            all_Y_i_tilde = all_Y_i_tilde - mean
+            all_Y_i_u = all_Y_i_u - mean
+
+            # compute the variance matrix using all available model evaluations
+            # shape: (n_outputs, n_outputs)
+            C = (np.cov(all_Y_i) + np.cov(all_Y_i_u) + np.cov(all_Y_i_tilde)) / 3
+
+            # compute covariance btw. RVs 'X' and 'Y'
+            # shape: (2*n_outputs, 2*n_outputs)
+            # It contains the following 4 block matrices:
+            #   (1, 1) variance of 'X'
+            #  *(1, 2) covariance between 'X' and 'Y' (a.k.a. cross-covariance)
+            #   (2, 1) covariance between 'Y' and 'X' (a.k.a. cross-covariance)
+            #   (2, 2) variance of 'Y'
+            _cov_2 = np.cov(all_Y_i_u, all_Y_i_tilde)  # for total order indices
+
+            # We need the cross-covariance between 'X' and 'Y'
+            # Extract *(1, 2) (upper right block)
+            # shape: (n_outputs, n_outputs)
+            C_u_tilde = _cov_2[0:n_outputs, n_outputs : 2 * n_outputs]
+            denominator = np.trace(C)
+
+            # Generalised Sobol indices
+            gen_sobol_total_i[i] = 1 - np.trace(C_u_tilde) / denominator
+
+        return gen_sobol_total_i