CLIMADA-project · luseverin · Jun 11, 2024 · Jan 23, 2024 · Jan 24, 2024 · Jan 24, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,8 @@ Code freeze date: YYYY-MM-DD
 
 ### Changed
 
+- Update SALib sensitivity and sampling methods from newest version (SALib 1.4.7) [#828](https://github.com/CLIMADA-project/climada_python/issues/828)
+- Allow for computation of relative and absolute delta impacts in `CalcDeltaClimate`
 - Remove content tables and make minor improvements (fix typos and readability) in
 CLIMADA tutorials. [#872](https://github.com/CLIMADA-project/climada_python/pull/872)
 - Centroids complete overhaul. Most function should be backward compatible. Internal data is stored in a geodataframe attribute. Raster are now stored as points, and the meta attribute is removed. Several methds were deprecated or removed. [#787](https://github.com/CLIMADA-project/climada_python/pull/787)
@@ -152,6 +154,7 @@ Changed:
 
 - `geopandas` >=0.13 &rarr; >=0.14
 - `pandas` >=1.5,<2.0 &rarr; >=2.1
+- `salib` >=1.3.0 &rarr; >=1.4.7
 
 Removed:
 

diff --git a/climada/engine/unsequa/calc_base.py b/climada/engine/unsequa/calc_base.py
@@ -203,8 +203,8 @@
             Number of samples as used in the sampling method from SALib
         sampling_method : str, optional
             The sampling method as defined in SALib. Possible choices:
-            'saltelli', 'fast_sampler', 'latin', 'morris', 'dgsm', 'ff'
-            https://salib.readthedocs.io/en/latest/api.html
+            'saltelli', 'latin', 'morris', 'dgsm', 'fast_sampler', 'ff', 'finite_diff',
+             https://salib.readthedocs.io/en/latest/api.html
             The default is 'saltelli'.
         sampling_kwargs : kwargs, optional
             Optional keyword arguments passed on to the SALib sampling_method.
@@ -215,6 +215,17 @@
         unc_output : climada.engine.uncertainty.unc_output.UncOutput()
             Uncertainty data object with the samples
 
+        Notes
+        -----
+        The 'ff' sampling method does not require a value for the N parameter.
+        The inputed N value is hence ignored in the sampling process in the case
+        of this method.
+        The 'ff' sampling method requires a number of uncerainty parameters to be
+        a power of 2. The users can generate dummy variables to achieve this
+        requirement. Please refer to https://salib.readthedocs.io/en/latest/api.html
+        for more details.
+
+
         See Also
         --------
         SALib.sample: sampling methods from SALib SALib.sample
@@ -231,11 +242,17 @@
             'names' : param_labels,
             'bounds' : [[0, 1]]*len(param_labels)
             }
-
+        #for the ff sampler, no value of N is needed. For API consistency the user
+        #must input a value that is ignored and a warning is given.
+        if sampling_method == 'ff':
+            LOGGER.warning("You are using the 'ff' sampler which does not require "
+                           "a value for N. The entered N value will be ignored"
+                           "in the sampling process.")
         uniform_base_sample = self._make_uniform_base_sample(N, problem_sa,
                                                              sampling_method,
                                                              sampling_kwargs)
         df_samples = pd.DataFrame(uniform_base_sample, columns=param_labels)
+
         for param in list(df_samples):
             df_samples[param] = df_samples[param].apply(
                 self.distr_dict[param].ppf
@@ -271,7 +288,7 @@
             SALib sampling method.
         sampling_method: string
             The sampling method as defined in SALib. Possible choices:
-            'saltelli', 'fast_sampler', 'latin', 'morris', 'dgsm', 'ff'
+            'saltelli', 'latin', 'morris', 'dgsm', 'fast_sampler', 'ff', 'finite_diff',
             https://salib.readthedocs.io/en/latest/api.html
         sampling_kwargs: dict()
             Optional keyword arguments passed on to the SALib sampling method.
@@ -292,8 +309,20 @@
         #c.f. https://stackoverflow.com/questions/2724260/why-does-pythons-import-require-fromlist
         import importlib # pylint: disable=import-outside-toplevel
         salib_sampling_method = importlib.import_module(f'SALib.sample.{sampling_method}')
-        sample_uniform = salib_sampling_method.sample(
-            problem = problem_sa, N = N, **sampling_kwargs)
+
+        if sampling_method == 'ff': #the ff sampling has a fixed sample size and
+                                    #does not require the N parameter
+            if problem_sa['num_vars'] & (problem_sa['num_vars'] - 1) != 0:
+                raise ValueError("The number of parameters must be a power of 2. "
+                                 "To use the ff sampling method, you can generate "
+                                 "dummy parameters to overcome this limitation."
+                                 " See https://salib.readthedocs.io/en/latest/api.html")
+
+            sample_uniform = salib_sampling_method.sample(
+            problem = problem_sa, **sampling_kwargs)
+        else:
+            sample_uniform = salib_sampling_method.sample(
+                problem = problem_sa, N = N, **sampling_kwargs)
         return sample_uniform
 
     def sensitivity(self, unc_output, sensitivity_method = 'sobol',
@@ -323,17 +352,21 @@
         unc_output : climada.engine.unsequa.UncOutput
             Uncertainty data object in which to store the sensitivity indices
         sensitivity_method : str, optional
-            sensitivity analysis method from SALib.analyse
-            Possible choices:
-                'fast', 'rbd_fact', 'morris', 'sobol', 'delta', 'ff'
-            The default is 'sobol'.
-            Note that in Salib, sampling methods and sensitivity analysis
-            methods should be used in specific pairs.
+            Sensitivity analysis method from SALib.analyse. Possible choices: 'sobol', 'fast',
+            'rbd_fast', 'morris', 'dgsm', 'ff', 'pawn', 'rhdm', 'rsa', 'discrepancy', 'hdmr'.
+            Note that in Salib, sampling methods and sensitivity
+            analysis methods should be used in specific pairs:
             https://salib.readthedocs.io/en/latest/api.html
         sensitivity_kwargs: dict, optional
             Keyword arguments of the chosen SALib analyse method.
             The default is to use SALib's default arguments.
 
+        Notes
+        -----
+        The variables 'Em','Term','X','Y' are removed from the output of the
+        'hdmr' method to ensure compatibility with unsequa.
+        The 'Delta' method is currently not supported.
+
         Returns
         -------
         sens_output : climada.engine.unsequa.UncOutput
@@ -360,10 +393,10 @@
 
         sens_output = copy.deepcopy(unc_output)
 
-        #Certaint Salib method required model input (X) and output (Y), others
+        #Certain Salib method required model input (X) and output (Y), others
         #need only ouput (Y)
         salib_kwargs = method.analyze.__code__.co_varnames  # obtain all kwargs of the salib method
         X = unc_output.samples_df.to_numpy() if 'X' in salib_kwargs else None

        for metric_name in self._metric_names:
            unc_df = unc_output.get_unc_df(metric_name)
@@ -467,7 +500,7 @@
        )


 def _calc_sens_df(method, problem_sa, sensitivity_kwargs, param_labels, X, unc_df):
    """Compute the sensitifity indices

    Parameters
@@ -500,10 +533,47 @@
         else:
             sens_indices = method.analyze(problem_sa, Y,
                                                     **sensitivity_kwargs)
+        #refactor incoherent SALib output
+        nparams = len(param_labels)
+        if method.__name__[-3:] == '.ff': #ff method
+            if sensitivity_kwargs['second_order']:
+                #parse interaction terms of sens_indices to a square matrix
+                #to ensure consistency with unsequa
+                interaction_names = sens_indices.pop('interaction_names')
+                interactions = np.full((nparams, nparams), np.nan)
+                #loop over interaction names and extract each param pair,
+                #then match to the corresponding param from param_labels
+                for i,interaction_name in enumerate(interaction_names):
+                    interactions[param_labels.index(interaction_name[0]),
+                                 param_labels.index(interaction_name[1])] = sens_indices['IE'][i]
+                sens_indices['IE'] = interactions
+
+        if method.__name__[-5:] == '.hdmr': #hdmr method
+            #first, remove variables that are incompatible with unsequa output
+            keys_to_remove = ['Em','Term','select', 'RT', 'Y_em', 'idx', 'X', 'Y']
+            sens_indices = {k: v for k, v in sens_indices.items()
+                            if k not in keys_to_remove}
+            names = sens_indices.pop('names') #names of terms
+
+            #second, refactor to 2D
+            for si, si_val_array in sens_indices.items():
+                if (np.array(si_val_array).ndim == 1 and    #for everything that is 1d and has
+                    np.array(si_val_array).size > nparams): #lentgh > n params, refactor to 2D
+                    si_new_array = np.full((nparams, nparams), np.nan)
+                    np.fill_diagonal(si_new_array, si_val_array[0:nparams]) #simple terms go on diag
+                    for i,interaction_name in enumerate(names[nparams:]):
+                        t1, t2 = interaction_name.split('/') #interaction terms
+                        si_new_array[param_labels.index(t1),
+                                      param_labels.index(t2)] = si_val_array[nparams+i]
+                    sens_indices[si] = si_new_array
+
+
         sens_first_order = np.array([
             np.array(si_val_array)
             for si, si_val_array in sens_indices.items()
-            if (np.array(si_val_array).ndim == 1 and si!='names')  # dirty trick due to Salib incoherent output
+            if (np.array(si_val_array).ndim == 1 # dirty trick due to Salib incoherent output
+                and si!='names'
+                and np.array(si_val_array).size == len(param_labels))
             ]).ravel()
         sens_first_order_dict[submetric_name] = sens_first_order
 
@@ -515,6 +585,11 @@
         sens_second_order_dict[submetric_name] = sens_second_order
 
     sens_first_order_df = pd.DataFrame(sens_first_order_dict, dtype=np.number)
+    # Assume sens_first_order_dict is a dictionary where values are lists/arrays of varying lengths
+    # !for some reason this make the plotting methods fail
+    #sens_first_order_df = pd.DataFrame({k: pd.Series(v, dtype=object)
+    #                                    for k, v in sens_first_order_dict.items()})
+
     if not sens_first_order_df.empty:
         si_names_first_order, param_names_first_order = _si_param_first(param_labels, sens_indices)
         sens_first_order_df.insert(0, 'si', si_names_first_order)

diff --git a/climada/engine/unsequa/calc_delta_climate.py b/climada/engine/unsequa/calc_delta_climate.py
@@ -148,12 +148,13 @@
        self.value_unit = self.exp_initial_input_var.evaluate().value_unit
        self.check_distr()

    def uncertainty(
        self,
        unc_sample,
         rp=None,
         calc_eai_exp=False,
         calc_at_event=False,
+        relative_delta=True,
         processes=1,
         chunksize=None,
     ):
@@ -195,6 +196,9 @@
         calc_at_event : boolean, optional
             Toggle computation of the impact for each event.
             The default is False.
+        relative_delta : bool, optional
+            Normalize delta impacts by past impacts or not.
+            The default is True.
         processes : int, optional
             Number of CPUs to use for parralel computations.
             The default is 1 (not parallel)
@@ -245,9 +249,10 @@
        if rp is None:
            rp = [5, 10, 20, 50, 100, 250]

         self.rp = rp
         self.calc_eai_exp = calc_eai_exp
         self.calc_at_event = calc_at_event
+        self.relative_delta = relative_delta
 
         one_sample = samples_df.iloc[0:1]
         start = time.time()
@@ -319,6 +324,7 @@
                 rp=self.rp,
                 calc_eai_exp=self.calc_eai_exp,
                 calc_at_event=self.calc_at_event,
+                relative_delta=self.relative_delta,
             )
             if processes > 1:
                 with mp.Pool(processes=processes) as pool:
@@ -343,6 +349,7 @@
     rp,
     calc_eai_exp,
     calc_at_event,
+    relative_delta,
 ):
     """
     Map to compute impact for all parameter samples in parallel
@@ -363,6 +370,8 @@
         Compute eai_exp or not
     calc_at_event : bool
         Compute at_event or not
+    relative_delta : bool
+        Normalize delta impacts by past impacts or not
 
     Returns
     -------
@@ -416,22 +425,27 @@
             at_event_initial = np.array([])
             at_event_final = np.array([])
 
-        delta_aai_agg = safe_divide(
-            imp_final.aai_agg - imp_initial.aai_agg, imp_initial.aai_agg
+        if relative_delta:
+            delta_func = lambda x, y: safe_divide(x - y, y)
+        else:
+            delta_func = lambda x, y: x - y
+
+        delta_aai_agg = delta_func(
+            imp_final.aai_agg, imp_initial.aai_agg
         )
 
-        delta_freq_curve = safe_divide(
-            freq_curve_final - freq_curve_initial, freq_curve_initial
+        delta_freq_curve = delta_func(
+            freq_curve_final, freq_curve_initial
         )
 
         delta_eai_exp = (
-            safe_divide(eai_exp_final - eai_exp_initial, eai_exp_initial)
+            delta_func(eai_exp_final, eai_exp_initial)
             if calc_eai_exp
             else np.array([])
         )
 
         delta_at_event = (
-            safe_divide(at_event_final - at_event_initial, at_event_initial)
+            delta_func(at_event_final, at_event_initial)
             if calc_at_event
             else np.array([])
         )