From 1e6118f00155192508aedf0e12cf0df126cff5d4 Mon Sep 17 00:00:00 2001
From: Prateek Bhustali
Date: Mon, 9 May 2022 03:23:33 +0200
Subject: [PATCH] Added generalised sobol sensitivity
---
src/UQpy/sensitivity/__init__.py | 2 +
src/UQpy/sensitivity/generalised_sobol.py | 378 ++++++++++++++++++++++
2 files changed, 380 insertions(+)
create mode 100644 src/UQpy/sensitivity/generalised_sobol.py
diff --git a/src/UQpy/sensitivity/__init__.py b/src/UQpy/sensitivity/__init__.py
index 73c4166d..e25335a3 100644
--- a/src/UQpy/sensitivity/__init__.py
+++ b/src/UQpy/sensitivity/__init__.py
@@ -3,9 +3,11 @@
from UQpy.sensitivity.sobol import Sobol
from UQpy.sensitivity.cramer_von_mises import CramervonMises
from UQpy.sensitivity.chatterjee import Chatterjee
+from UQpy.sensitivity.generalised_sobol import GeneralisedSobol
from . import MorrisSensitivity
from . import PceSensitivity
from . import Sobol
from . import CramervonMises
from . import Chatterjee
+from . import GeneralisedSobol
diff --git a/src/UQpy/sensitivity/generalised_sobol.py b/src/UQpy/sensitivity/generalised_sobol.py
new file mode 100644
index 00000000..e5cf2f65
--- /dev/null
+++ b/src/UQpy/sensitivity/generalised_sobol.py
@@ -0,0 +1,378 @@
+"""
+
+The GeneralisedSobol class computes the generalised Sobol indices for a given
+multi-ouput model. The class is based on the work of [1]_ and [2]_.
+
+Additionally, we can compute the confidence intervals for the Sobol indices
+using bootstrapping [3]_.
+
+References
+----------
+
+ .. [1] Gamboa F, Janon A, Klein T, Lagnoux A, others.
+ Sensitivity analysis for multidimensional and functional outputs.
+ Electronic journal of statistics 2014; 8(1): 575-603.
+
+ .. [2] Alexanderian A, Gremaud PA, Smith RC. Variance-based sensitivity
+ analysis for time-dependent processes. Reliability engineering
+ & system safety 2020; 196: 106722.
+
+.. [3] Jeremy Orloff and Jonathan Bloom (2014), Bootstrap confidence intervals,
+ Introduction to Probability and Statistics, MIT OCW.
+
+"""
+
+import logging
+
+import numpy as np
+
+from UQpy.sensitivity.baseclass.sensitivity import Sensitivity
+from UQpy.sensitivity.baseclass.pickfreeze import generate_pick_freeze_samples
+from UQpy.utilities.UQpyLoggingFormatter import UQpyLoggingFormatter
+
+
+class GeneralisedSobol(Sensitivity):
+ """
+ Compute the generalised Sobol indices for models with multiple outputs
+ (vector-valued response) using the Pick-and-Freeze method.
+
+ :param runmodel_object: The computational model. It should be of type :class:`.RunModel`. \
+ The output QoI can be a scalar or vector of length :code:`ny`, then the sensitivity \
+ indices of all :code:`ny` outputs are computed independently.
+
+ :param distributions: List of :class:`.Distribution` objects corresponding to each \
+ random variable, or :class:`.JointIndependent` object \
+ (multivariate RV with independent marginals).
+
+ :param random_state: Random seed used to initialize the pseudo-random number \
+ generator. Default is :any:`None`.
+
+ **Methods:**
+ """
+
+ def __init__(
+ self, runmodel_object, dist_object, random_state=None, **kwargs
+ ) -> None:
+
+ super().__init__(runmodel_object, dist_object, random_state, **kwargs)
+
+ # Create logger with the same name as the class
+ self.logger = logging.getLogger(__name__)
+ self.logger.setLevel(logging.ERROR)
+ frmt = UQpyLoggingFormatter()
+
+ # create console handler with a higher log level
+ ch = logging.StreamHandler()
+ ch.setFormatter(frmt)
+
+ # add the handler to the logger
+ self.logger.addHandler(ch)
+
+ self.gen_sobol_i = None
+ "Generalised first order Sobol indices, :class:`ndarray` of shape (num_vars, 1)"
+
+ self.gen_sobol_total_i = None
+ "Generalised total order Sobol indices, :class:`ndarray` of shape (num_vars, 1)"
+
+ self.n_samples = None
+ "Number of samples used to compute the sensitivity indices, :class:`int`"
+
+ self.num_vars = None
+ "Number of model input variables, :class:`int`"
+
+ def run(
+ self,
+ n_samples=1_000,
+ num_bootstrap_samples=None,
+ confidence_level=0.95,
+ ):
+
+ """
+ Compute the generalised Sobol indices for models with multiple outputs
+ (vector-valued response) using the Pick-and-Freeze method.
+
+ :param n_samples: Number of samples used to compute the sensitivity indices. \
+ Default is 1,000.
+
+ :param num_bootstrap_samples: Number of bootstrap samples used to compute the \
+ confidence intervals. Default is :any:`None`.
+
+ :param confidence_level: Confidence level used to compute the confidence \
+ intervals. Default is 0.95.
+
+ :return: A :class:`dict` with the following keys: \
+ :code:`gen_sobol_i` of shape :code:`(num_vars, 1)`, \
+ :code:`gen_sobol_total_i` of shape :code:`(num_vars, 1)`, \
+ :code:`CI_gen_sobol_i` of shape :code:`(num_vars, 2)`, \
+ :code:`CI_gen_sobol_total_i` of shape :code:`(num_vars, 2)`.
+
+ """
+
+ # Check n_samples data type
+ self.n_samples = n_samples
+ if not isinstance(self.n_samples, int):
+ raise TypeError("UQpy: n_samples should be an integer")
+
+ # Check num_bootstrap_samples data type
+ if num_bootstrap_samples is not None:
+ if not isinstance(num_bootstrap_samples, int):
+ raise TypeError("UQpy: num_bootstrap_samples should be an integer.\n")
+ elif num_bootstrap_samples is None:
+ self.logger.info(
+ "UQpy: num_bootstrap_samples is set to None, confidence intervals will not be computed.\n"
+ )
+
+ ################## GENERATE SAMPLES ##################
+
+ (A_samples, B_samples, C_i_generator, _,) = generate_pick_freeze_samples(
+ self.dist_object, self.n_samples, self.random_state
+ )
+
+ self.logger.info("UQpy: Generated samples using the pick-freeze scheme.\n")
+
+ self.num_vars = A_samples.shape[1] # Number of variables
+
+ ################# MODEL EVALUATIONS ####################
+
+ A_model_evals = self._run_model(A_samples) # shape: (n_samples, n_outputs)
+
+ # if model output is vectorised,
+ # shape retured by model is (n_samples, n_outputs, 1)
+ # we need to reshape it to (n_samples, n_outputs)
+ if A_model_evals.ndim == 3:
+ A_model_evals = A_model_evals[:, :, 0] # shape: (n_samples, n_outputs)
+
+ self.logger.info("UQpy: Model evaluations A completed.\n")
+
+ B_model_evals = self._run_model(B_samples) # shape: (n_samples, n_outputs)
+
+ # if model output is vectorised,
+ # shape retured by model is (n_samples, n_outputs, 1)
+ # we need to reshape it to (n_samples, n_outputs)
+ if B_model_evals.ndim == 3:
+ B_model_evals = B_model_evals[:, :, 0] # shape: (n_samples, n_outputs)
+
+ self.logger.info("UQpy: Model evaluations B completed.\n")
+
+ self.n_outputs = A_model_evals.shape[1]
+
+ # shape: (n_outputs, n_samples, num_vars)
+ C_i_model_evals = np.zeros((self.n_outputs, self.n_samples, self.num_vars))
+
+ for i, C_i in enumerate(C_i_generator):
+
+ # if model output is vectorised,
+ # shape retured by model is (n_samples, n_outputs, 1)
+ # we need to reshape it to (n_samples, n_outputs)
+ model_evals = self._run_model(C_i)
+
+ if model_evals.ndim == 3:
+ C_i_model_evals[:, :, i] = self._run_model(C_i)[:, :, 0].T
+ else:
+ C_i_model_evals[:, :, i] = model_evals.T
+
+ self.logger.info("UQpy: Model evaluations C completed.\n")
+
+ self.logger.info("UQpy: All model evaluations computed successfully.\n")
+
+ ######################### STORAGE ########################
+
+ # Create dictionary to store the sensitivity indices
+ computed_indices = {}
+
+ ################## COMPUTE GENERALISED SOBOL INDICES ##################
+
+ self.gen_sobol_i = self.compute_first_order_generalised_sobol_indices(
+ A_model_evals, B_model_evals, C_i_model_evals
+ )
+
+ self.logger.info(
+ "UQpy: First order Generalised Sobol indices computed successfully.\n"
+ )
+
+ self.gen_sobol_total_i = self.compute_total_order_generalised_sobol_indices(
+ A_model_evals, B_model_evals, C_i_model_evals
+ )
+
+ self.logger.info(
+ "UQpy: Total order Generalised Sobol indices computed successfully.\n"
+ )
+
+ # Store the indices in the dictionary
+ computed_indices["gen_sobol_i"] = self.gen_sobol_i
+ computed_indices["gen_sobol_total_i"] = self.gen_sobol_total_i
+
+ ################## CONFIDENCE INTERVALS ####################
+
+ if num_bootstrap_samples is not None:
+
+ self.logger.info("UQpy: Computing confidence intervals ...\n")
+
+ estimator_inputs = [
+ A_model_evals,
+ B_model_evals,
+ C_i_model_evals,
+ ]
+
+ # First order generalised Sobol indices
+ self.CI_gen_sobol_i = self.bootstrapping(
+ self.compute_first_order_generalised_sobol_indices,
+ estimator_inputs,
+ computed_indices["gen_sobol_i"],
+ num_bootstrap_samples,
+ confidence_level,
+ )
+
+ self.logger.info(
+ "UQpy: Confidence intervals for First order Generalised Sobol indices computed successfully.\n"
+ )
+
+ # Total order generalised Sobol indices
+ self.CI_gen_sobol_total_i = self.bootstrapping(
+ self.compute_total_order_generalised_sobol_indices,
+ estimator_inputs,
+ computed_indices["gen_sobol_total_i"],
+ num_bootstrap_samples,
+ confidence_level,
+ )
+
+ self.logger.info(
+ "UQpy: Confidence intervals for Total order Sobol Generalised indices computed successfully.\n"
+ )
+
+ # Store the indices in the dictionary
+ computed_indices["CI_gen_sobol_i"] = self.CI_gen_sobol_i
+ computed_indices["CI_gen_sobol_total_i"] = self.CI_gen_sobol_total_i
+
+ return computed_indices
+
+ @staticmethod
+ def compute_first_order_generalised_sobol_indices(
+ A_model_evals, B_model_evals, C_i_model_evals
+ ):
+
+ """
+ Compute the generalised Sobol indices for models with multiple outputs.
+
+ :param A_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`.
+ :param B_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`.
+ :param C_i_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_outputs, n_samples, num_vars)`.
+
+ :return: First order generalised Sobol indices, :class:`numpy.ndarray` of shape :code:`(n_outputs, num_vars)`.
+
+ """
+
+ num_vars = C_i_model_evals.shape[2]
+ n_outputs = A_model_evals.shape[1]
+
+ # store generalised Sobol indices
+ gen_sobol_i = np.zeros((num_vars, 1))
+
+ for i in range(num_vars):
+
+ all_Y_i = A_model_evals.T # shape: (n_outputs, n_samples)
+ all_Y_i_tilde = B_model_evals.T # shape: (n_outputs, n_samples)
+ all_Y_i_u = C_i_model_evals[:, :, i] # shape: (n_outputs, n_samples)
+
+ # compute the mean using all model evaluations
+ # shape: (n_outputs, 1)
+ mean = (
+ np.mean(all_Y_i, axis=1, keepdims=1)
+ + np.mean(all_Y_i_u, axis=1, keepdims=1)
+ + np.mean(all_Y_i_tilde, axis=1, keepdims=1)
+ ) / 3
+
+ # center the evaluations since mean is available
+ all_Y_i = all_Y_i - mean
+ all_Y_i_tilde = all_Y_i_tilde - mean
+ all_Y_i_u = all_Y_i_u - mean
+
+ # compute the variance matrix using all available model evaluations
+ # shape: (n_outputs, n_outputs)
+ C = (np.cov(all_Y_i) + np.cov(all_Y_i_u) + np.cov(all_Y_i_tilde)) / 3
+
+ # compute covariance btw. RVs 'X' and 'Y'
+ # shape: (2*n_outputs, 2*n_outputs)
+ # It contains the following 4 block matrices:
+ # (1, 1) variance of 'X'
+ # *(1, 2) covariance between 'X' and 'Y' (a.k.a. cross-covariance)
+ # (2, 1) covariance between 'Y' and 'X' (a.k.a. cross-covariance)
+ # (2, 2) variance of 'Y'
+ _cov_1 = np.cov(all_Y_i_u, all_Y_i) # for first order indices
+
+ # We need the cross-covariance between 'X' and 'Y'
+ # Extract *(1, 2) (upper right block)
+ # shape: (n_outputs, n_outputs)
+ C_u = _cov_1[0:n_outputs, n_outputs : 2 * n_outputs]
+
+ denominator = np.trace(C)
+
+ # Generalised Sobol indices
+ gen_sobol_i[i] = np.trace(C_u) / denominator
+
+ return gen_sobol_i
+
+ @staticmethod
+ def compute_total_order_generalised_sobol_indices(
+ A_model_evals, B_model_evals, C_i_model_evals
+ ):
+
+ """
+ Compute the generalised Sobol indices for models with multiple outputs.
+
+ :param A_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`.
+ :param B_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`.
+ :param C_i_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_outputs, n_samples, num_vars)`.
+
+ :return: Total order generalised Sobol indices, :class:`numpy.ndarray` of shape :code:`(n_outputs, num_vars)`.
+
+ """
+
+ num_vars = C_i_model_evals.shape[2]
+ n_outputs = A_model_evals.shape[1]
+
+ # store generalised Sobol indices
+ gen_sobol_total_i = np.zeros((num_vars, 1))
+
+ for i in range(num_vars):
+
+ all_Y_i = A_model_evals.T # shape: (n_outputs, n_samples)
+ all_Y_i_tilde = B_model_evals.T # shape: (n_outputs, n_samples)
+ all_Y_i_u = C_i_model_evals[:, :, i] # shape: (n_outputs, n_samples)
+
+ # compute the mean using all model evaluations
+ # shape: (n_outputs, 1)
+ mean = (
+ np.mean(all_Y_i, axis=1, keepdims=1)
+ + np.mean(all_Y_i_u, axis=1, keepdims=1)
+ + np.mean(all_Y_i_tilde, axis=1, keepdims=1)
+ ) / 3
+
+ # center the evaluations since mean is available
+ all_Y_i = all_Y_i - mean
+ all_Y_i_tilde = all_Y_i_tilde - mean
+ all_Y_i_u = all_Y_i_u - mean
+
+ # compute the variance matrix using all available model evaluations
+ # shape: (n_outputs, n_outputs)
+ C = (np.cov(all_Y_i) + np.cov(all_Y_i_u) + np.cov(all_Y_i_tilde)) / 3
+
+ # compute covariance btw. RVs 'X' and 'Y'
+ # shape: (2*n_outputs, 2*n_outputs)
+ # It contains the following 4 block matrices:
+ # (1, 1) variance of 'X'
+ # *(1, 2) covariance between 'X' and 'Y' (a.k.a. cross-covariance)
+ # (2, 1) covariance between 'Y' and 'X' (a.k.a. cross-covariance)
+ # (2, 2) variance of 'Y'
+ _cov_2 = np.cov(all_Y_i_u, all_Y_i_tilde) # for total order indices
+
+ # We need the cross-covariance between 'X' and 'Y'
+ # Extract *(1, 2) (upper right block)
+ # shape: (n_outputs, n_outputs)
+ C_u_tilde = _cov_2[0:n_outputs, n_outputs : 2 * n_outputs]
+ denominator = np.trace(C)
+
+ # Generalised Sobol indices
+ gen_sobol_total_i[i] = 1 - np.trace(C_u_tilde) / denominator
+
+ return gen_sobol_total_i