From 1e6118f00155192508aedf0e12cf0df126cff5d4 Mon Sep 17 00:00:00 2001 From: Prateek Bhustali Date: Mon, 9 May 2022 03:23:33 +0200 Subject: [PATCH] Added generalised sobol sensitivity --- src/UQpy/sensitivity/__init__.py | 2 + src/UQpy/sensitivity/generalised_sobol.py | 378 ++++++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 src/UQpy/sensitivity/generalised_sobol.py diff --git a/src/UQpy/sensitivity/__init__.py b/src/UQpy/sensitivity/__init__.py index 73c4166d..e25335a3 100644 --- a/src/UQpy/sensitivity/__init__.py +++ b/src/UQpy/sensitivity/__init__.py @@ -3,9 +3,11 @@ from UQpy.sensitivity.sobol import Sobol from UQpy.sensitivity.cramer_von_mises import CramervonMises from UQpy.sensitivity.chatterjee import Chatterjee +from UQpy.sensitivity.generalised_sobol import GeneralisedSobol from . import MorrisSensitivity from . import PceSensitivity from . import Sobol from . import CramervonMises from . import Chatterjee +from . import GeneralisedSobol diff --git a/src/UQpy/sensitivity/generalised_sobol.py b/src/UQpy/sensitivity/generalised_sobol.py new file mode 100644 index 00000000..e5cf2f65 --- /dev/null +++ b/src/UQpy/sensitivity/generalised_sobol.py @@ -0,0 +1,378 @@ +""" + +The GeneralisedSobol class computes the generalised Sobol indices for a given +multi-ouput model. The class is based on the work of [1]_ and [2]_. + +Additionally, we can compute the confidence intervals for the Sobol indices +using bootstrapping [3]_. + +References +---------- + + .. [1] Gamboa F, Janon A, Klein T, Lagnoux A, others. + Sensitivity analysis for multidimensional and functional outputs. + Electronic journal of statistics 2014; 8(1): 575-603. + + .. [2] Alexanderian A, Gremaud PA, Smith RC. Variance-based sensitivity + analysis for time-dependent processes. Reliability engineering + & system safety 2020; 196: 106722. + +.. [3] Jeremy Orloff and Jonathan Bloom (2014), Bootstrap confidence intervals, + Introduction to Probability and Statistics, MIT OCW. + +""" + +import logging + +import numpy as np + +from UQpy.sensitivity.baseclass.sensitivity import Sensitivity +from UQpy.sensitivity.baseclass.pickfreeze import generate_pick_freeze_samples +from UQpy.utilities.UQpyLoggingFormatter import UQpyLoggingFormatter + + +class GeneralisedSobol(Sensitivity): + """ + Compute the generalised Sobol indices for models with multiple outputs + (vector-valued response) using the Pick-and-Freeze method. + + :param runmodel_object: The computational model. It should be of type :class:`.RunModel`. \ + The output QoI can be a scalar or vector of length :code:`ny`, then the sensitivity \ + indices of all :code:`ny` outputs are computed independently. + + :param distributions: List of :class:`.Distribution` objects corresponding to each \ + random variable, or :class:`.JointIndependent` object \ + (multivariate RV with independent marginals). + + :param random_state: Random seed used to initialize the pseudo-random number \ + generator. Default is :any:`None`. + + **Methods:** + """ + + def __init__( + self, runmodel_object, dist_object, random_state=None, **kwargs + ) -> None: + + super().__init__(runmodel_object, dist_object, random_state, **kwargs) + + # Create logger with the same name as the class + self.logger = logging.getLogger(__name__) + self.logger.setLevel(logging.ERROR) + frmt = UQpyLoggingFormatter() + + # create console handler with a higher log level + ch = logging.StreamHandler() + ch.setFormatter(frmt) + + # add the handler to the logger + self.logger.addHandler(ch) + + self.gen_sobol_i = None + "Generalised first order Sobol indices, :class:`ndarray` of shape (num_vars, 1)" + + self.gen_sobol_total_i = None + "Generalised total order Sobol indices, :class:`ndarray` of shape (num_vars, 1)" + + self.n_samples = None + "Number of samples used to compute the sensitivity indices, :class:`int`" + + self.num_vars = None + "Number of model input variables, :class:`int`" + + def run( + self, + n_samples=1_000, + num_bootstrap_samples=None, + confidence_level=0.95, + ): + + """ + Compute the generalised Sobol indices for models with multiple outputs + (vector-valued response) using the Pick-and-Freeze method. + + :param n_samples: Number of samples used to compute the sensitivity indices. \ + Default is 1,000. + + :param num_bootstrap_samples: Number of bootstrap samples used to compute the \ + confidence intervals. Default is :any:`None`. + + :param confidence_level: Confidence level used to compute the confidence \ + intervals. Default is 0.95. + + :return: A :class:`dict` with the following keys: \ + :code:`gen_sobol_i` of shape :code:`(num_vars, 1)`, \ + :code:`gen_sobol_total_i` of shape :code:`(num_vars, 1)`, \ + :code:`CI_gen_sobol_i` of shape :code:`(num_vars, 2)`, \ + :code:`CI_gen_sobol_total_i` of shape :code:`(num_vars, 2)`. + + """ + + # Check n_samples data type + self.n_samples = n_samples + if not isinstance(self.n_samples, int): + raise TypeError("UQpy: n_samples should be an integer") + + # Check num_bootstrap_samples data type + if num_bootstrap_samples is not None: + if not isinstance(num_bootstrap_samples, int): + raise TypeError("UQpy: num_bootstrap_samples should be an integer.\n") + elif num_bootstrap_samples is None: + self.logger.info( + "UQpy: num_bootstrap_samples is set to None, confidence intervals will not be computed.\n" + ) + + ################## GENERATE SAMPLES ################## + + (A_samples, B_samples, C_i_generator, _,) = generate_pick_freeze_samples( + self.dist_object, self.n_samples, self.random_state + ) + + self.logger.info("UQpy: Generated samples using the pick-freeze scheme.\n") + + self.num_vars = A_samples.shape[1] # Number of variables + + ################# MODEL EVALUATIONS #################### + + A_model_evals = self._run_model(A_samples) # shape: (n_samples, n_outputs) + + # if model output is vectorised, + # shape retured by model is (n_samples, n_outputs, 1) + # we need to reshape it to (n_samples, n_outputs) + if A_model_evals.ndim == 3: + A_model_evals = A_model_evals[:, :, 0] # shape: (n_samples, n_outputs) + + self.logger.info("UQpy: Model evaluations A completed.\n") + + B_model_evals = self._run_model(B_samples) # shape: (n_samples, n_outputs) + + # if model output is vectorised, + # shape retured by model is (n_samples, n_outputs, 1) + # we need to reshape it to (n_samples, n_outputs) + if B_model_evals.ndim == 3: + B_model_evals = B_model_evals[:, :, 0] # shape: (n_samples, n_outputs) + + self.logger.info("UQpy: Model evaluations B completed.\n") + + self.n_outputs = A_model_evals.shape[1] + + # shape: (n_outputs, n_samples, num_vars) + C_i_model_evals = np.zeros((self.n_outputs, self.n_samples, self.num_vars)) + + for i, C_i in enumerate(C_i_generator): + + # if model output is vectorised, + # shape retured by model is (n_samples, n_outputs, 1) + # we need to reshape it to (n_samples, n_outputs) + model_evals = self._run_model(C_i) + + if model_evals.ndim == 3: + C_i_model_evals[:, :, i] = self._run_model(C_i)[:, :, 0].T + else: + C_i_model_evals[:, :, i] = model_evals.T + + self.logger.info("UQpy: Model evaluations C completed.\n") + + self.logger.info("UQpy: All model evaluations computed successfully.\n") + + ######################### STORAGE ######################## + + # Create dictionary to store the sensitivity indices + computed_indices = {} + + ################## COMPUTE GENERALISED SOBOL INDICES ################## + + self.gen_sobol_i = self.compute_first_order_generalised_sobol_indices( + A_model_evals, B_model_evals, C_i_model_evals + ) + + self.logger.info( + "UQpy: First order Generalised Sobol indices computed successfully.\n" + ) + + self.gen_sobol_total_i = self.compute_total_order_generalised_sobol_indices( + A_model_evals, B_model_evals, C_i_model_evals + ) + + self.logger.info( + "UQpy: Total order Generalised Sobol indices computed successfully.\n" + ) + + # Store the indices in the dictionary + computed_indices["gen_sobol_i"] = self.gen_sobol_i + computed_indices["gen_sobol_total_i"] = self.gen_sobol_total_i + + ################## CONFIDENCE INTERVALS #################### + + if num_bootstrap_samples is not None: + + self.logger.info("UQpy: Computing confidence intervals ...\n") + + estimator_inputs = [ + A_model_evals, + B_model_evals, + C_i_model_evals, + ] + + # First order generalised Sobol indices + self.CI_gen_sobol_i = self.bootstrapping( + self.compute_first_order_generalised_sobol_indices, + estimator_inputs, + computed_indices["gen_sobol_i"], + num_bootstrap_samples, + confidence_level, + ) + + self.logger.info( + "UQpy: Confidence intervals for First order Generalised Sobol indices computed successfully.\n" + ) + + # Total order generalised Sobol indices + self.CI_gen_sobol_total_i = self.bootstrapping( + self.compute_total_order_generalised_sobol_indices, + estimator_inputs, + computed_indices["gen_sobol_total_i"], + num_bootstrap_samples, + confidence_level, + ) + + self.logger.info( + "UQpy: Confidence intervals for Total order Sobol Generalised indices computed successfully.\n" + ) + + # Store the indices in the dictionary + computed_indices["CI_gen_sobol_i"] = self.CI_gen_sobol_i + computed_indices["CI_gen_sobol_total_i"] = self.CI_gen_sobol_total_i + + return computed_indices + + @staticmethod + def compute_first_order_generalised_sobol_indices( + A_model_evals, B_model_evals, C_i_model_evals + ): + + """ + Compute the generalised Sobol indices for models with multiple outputs. + + :param A_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`. + :param B_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`. + :param C_i_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_outputs, n_samples, num_vars)`. + + :return: First order generalised Sobol indices, :class:`numpy.ndarray` of shape :code:`(n_outputs, num_vars)`. + + """ + + num_vars = C_i_model_evals.shape[2] + n_outputs = A_model_evals.shape[1] + + # store generalised Sobol indices + gen_sobol_i = np.zeros((num_vars, 1)) + + for i in range(num_vars): + + all_Y_i = A_model_evals.T # shape: (n_outputs, n_samples) + all_Y_i_tilde = B_model_evals.T # shape: (n_outputs, n_samples) + all_Y_i_u = C_i_model_evals[:, :, i] # shape: (n_outputs, n_samples) + + # compute the mean using all model evaluations + # shape: (n_outputs, 1) + mean = ( + np.mean(all_Y_i, axis=1, keepdims=1) + + np.mean(all_Y_i_u, axis=1, keepdims=1) + + np.mean(all_Y_i_tilde, axis=1, keepdims=1) + ) / 3 + + # center the evaluations since mean is available + all_Y_i = all_Y_i - mean + all_Y_i_tilde = all_Y_i_tilde - mean + all_Y_i_u = all_Y_i_u - mean + + # compute the variance matrix using all available model evaluations + # shape: (n_outputs, n_outputs) + C = (np.cov(all_Y_i) + np.cov(all_Y_i_u) + np.cov(all_Y_i_tilde)) / 3 + + # compute covariance btw. RVs 'X' and 'Y' + # shape: (2*n_outputs, 2*n_outputs) + # It contains the following 4 block matrices: + # (1, 1) variance of 'X' + # *(1, 2) covariance between 'X' and 'Y' (a.k.a. cross-covariance) + # (2, 1) covariance between 'Y' and 'X' (a.k.a. cross-covariance) + # (2, 2) variance of 'Y' + _cov_1 = np.cov(all_Y_i_u, all_Y_i) # for first order indices + + # We need the cross-covariance between 'X' and 'Y' + # Extract *(1, 2) (upper right block) + # shape: (n_outputs, n_outputs) + C_u = _cov_1[0:n_outputs, n_outputs : 2 * n_outputs] + + denominator = np.trace(C) + + # Generalised Sobol indices + gen_sobol_i[i] = np.trace(C_u) / denominator + + return gen_sobol_i + + @staticmethod + def compute_total_order_generalised_sobol_indices( + A_model_evals, B_model_evals, C_i_model_evals + ): + + """ + Compute the generalised Sobol indices for models with multiple outputs. + + :param A_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`. + :param B_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_samples, n_outputs)`. + :param C_i_model_evals: Model evaluations, :class:`numpy.ndarray` of shape :code:`(n_outputs, n_samples, num_vars)`. + + :return: Total order generalised Sobol indices, :class:`numpy.ndarray` of shape :code:`(n_outputs, num_vars)`. + + """ + + num_vars = C_i_model_evals.shape[2] + n_outputs = A_model_evals.shape[1] + + # store generalised Sobol indices + gen_sobol_total_i = np.zeros((num_vars, 1)) + + for i in range(num_vars): + + all_Y_i = A_model_evals.T # shape: (n_outputs, n_samples) + all_Y_i_tilde = B_model_evals.T # shape: (n_outputs, n_samples) + all_Y_i_u = C_i_model_evals[:, :, i] # shape: (n_outputs, n_samples) + + # compute the mean using all model evaluations + # shape: (n_outputs, 1) + mean = ( + np.mean(all_Y_i, axis=1, keepdims=1) + + np.mean(all_Y_i_u, axis=1, keepdims=1) + + np.mean(all_Y_i_tilde, axis=1, keepdims=1) + ) / 3 + + # center the evaluations since mean is available + all_Y_i = all_Y_i - mean + all_Y_i_tilde = all_Y_i_tilde - mean + all_Y_i_u = all_Y_i_u - mean + + # compute the variance matrix using all available model evaluations + # shape: (n_outputs, n_outputs) + C = (np.cov(all_Y_i) + np.cov(all_Y_i_u) + np.cov(all_Y_i_tilde)) / 3 + + # compute covariance btw. RVs 'X' and 'Y' + # shape: (2*n_outputs, 2*n_outputs) + # It contains the following 4 block matrices: + # (1, 1) variance of 'X' + # *(1, 2) covariance between 'X' and 'Y' (a.k.a. cross-covariance) + # (2, 1) covariance between 'Y' and 'X' (a.k.a. cross-covariance) + # (2, 2) variance of 'Y' + _cov_2 = np.cov(all_Y_i_u, all_Y_i_tilde) # for total order indices + + # We need the cross-covariance between 'X' and 'Y' + # Extract *(1, 2) (upper right block) + # shape: (n_outputs, n_outputs) + C_u_tilde = _cov_2[0:n_outputs, n_outputs : 2 * n_outputs] + denominator = np.trace(C) + + # Generalised Sobol indices + gen_sobol_total_i[i] = 1 - np.trace(C_u_tilde) / denominator + + return gen_sobol_total_i