tsikes · tsikes · Jan 24, 2021 · May 21, 2020 · May 21, 2020 · May 21, 2020
diff --git a/src/optimize/CheKiPEUQ_from_Frhodo.py b/src/optimize/CheKiPEUQ_from_Frhodo.py
@@ -38,10 +38,11 @@
 #CiteSoft.import_cite(unique_id=software_unique_id, software_name=software_name, write_immediately=True, **software_kwargs)
 #decorator CiteSoft.after_call_compile_consolidated_log()
 #decorator CiteSoft.module_call_cite(unique_id=software_unique_id, software_name=software_name, **software_kwargs)
-def load_into_CheKiPUEQ(simulation_function, observed_data, pars_initial_guess = [], pars_lower_bnds=[], pars_upper_bnds =[],observed_data_lower_bounds=[], observed_data_upper_bounds=[], weights_data=[], pars_uncertainty_distribution='automatic', sigma_multiple = 3.0, num_rate_constants_and_rate_constant_parameters=[]):
+def load_into_CheKiPUEQ(simulation_function, observed_data, pars_initial_guess = [], pars_lower_bnds=[], pars_upper_bnds =[], pars_bnds_exist = [], observed_data_lower_bounds=[], observed_data_upper_bounds=[], weights_data=[], pars_uncertainty_distribution='automatic', sigma_multiple = 3.0, num_rate_constants_and_rate_constant_parameters=[]):
     #observed_data is an array of values of observed data (can be nested if there is more than one observable)
     #pars_lower_bnds and pars_upper_bnds are the bounds of the parameters ('coefficents') in absolute values.
     #  for 'uniform' distribution the bounds are taken directly. For Gaussian, the larger of the 2 deltas is taken and divided by 3 for sigma.
+    # rate_constants_parameters_bnds_exist is an array-like with values like [True False] where the Booleans are about whether the parameter has a lower bound and upper bound, respectively. So there is one pair of booleans per parameter.
     #pars_initial_guess is the initial guess for the parameters ('coefficients')
     #weights_data is an optional array of values that matches observed data in length.
     #sigma_multiple is how many sigma the bounds are equal to (relative to mean).
@@ -73,13 +74,30 @@ def load_into_CheKiPUEQ(simulation_function, observed_data, pars_initial_guess =
     if pars_uncertainty_distribution.lower() == 'gaussian': 
         UserInput.model['InputParametersPriorValuesUncertainties'] = extract_larger_delta_and_make_sigma_values(pars_initial_guess, pars_lower_bnds, pars_upper_bnds, sigma_multiple)
     if pars_uncertainty_distribution.lower() == 'automatic' or pars_uncertainty_distribution.lower() == 'auto': 
-        num_rate_constants = num_rate_constants_and_rate_constant_parameters[0]
+        num_rate_constants = num_rate_constants_and_rate_constant_parameters[0] 
         num_rate_constants_parameters = num_rate_constants_and_rate_constant_parameters[1]
-        rate_constant_uncertainties = extract_larger_delta_and_make_sigma_values(pars_initial_guess[0:num_rate_constants], pars_lower_bnds[0:num_rate_constants], pars_upper_bnds[0:num_rate_constants], sigma_multiple)
-        rate_constant_parameters_uncertainties = -1*np.ones(num_rate_constants_parameters)
+        rate_constant_uncertainties = -1*np.ones(num_rate_constants_parameters) #by default, use uniform for the rate_constant_uncertainties (signified by -1).
+        rate_constant_parameters_uncertainties = extract_larger_delta_and_make_sigma_values(pars_initial_guess[num_rate_constants+0:], pars_lower_bnds[num_rate_constants+0:], pars_upper_bnds[num_rate_constants+0:], sigma_multiple)  #this returns a 1 sigma value for a gaussian, assuming that the range indicates a certain sigma_multiple in each direction. The "+0" is to start at next value with array indexing. Kind of like "-1 +1".
         UserInput.model['InputParametersPriorValuesUncertainties'] = np.concatenate( (rate_constant_uncertainties, rate_constant_parameters_uncertainties) )
-    UserInput.model['InputParameterPriorValues_upperBounds'] = pars_upper_bnds
-    UserInput.model['InputParameterPriorValues_lowerBounds'] = pars_lower_bnds
+    if len(pars_bnds_exist)> 1: #If this is not a blank list, we're going to check each entry. For anything which has a "False", we are going to set the InputParametersPriorValuesUncertainties value to "-1" to indicate uniform since that means it can't be a Gaussian.
+        for exist_index, lower_upper_booleans in enumerate(pars_bnds_exist):
+            #print("line 85", exist_index, lower_upper_booleans, np.sum(lower_upper_booleans))
+            if np.sum(lower_upper_booleans) < 2: #True True will add to 2, anything else does not pass.
+                UserInput.model['InputParametersPriorValuesUncertainties'][exist_index] = -1
+    #print("line 86", UserInput.model['InputParametersPriorValuesUncertainties']) 
+
+    #CheKiPEUQ cannot handle much larger than 1E100 for upper bounds.
+    for upper_bound_index, upper_bound in enumerate(pars_upper_bnds):
+        if upper_bound > 1.0E100:
+            pars_upper_bnds[upper_bound_index] = 1.0E100
+
+    #CheKiPEUQ cannot handle much more negative than -1E100 for lower bounds.
+    for lower_bound_index, lower_bound in enumerate(pars_lower_bnds):
+        if lower_bound < -1.0E100:
+            pars_lower_bnds[lower_bound_index] = -1.0E100
+
+    UserInput.model['InputParameterPriorValues_upperBounds'] = np.array(pars_upper_bnds)
+    UserInput.model['InputParameterPriorValues_lowerBounds'] = np.array(pars_lower_bnds)
     UserInput.model['simulateByInputParametersOnlyFunction'] = simulation_function
     print("line 61", CKPQ.frog)
     PE_object = CKPQ.parameter_estimation(UserInput)
@@ -91,7 +109,7 @@ def get_log_posterior_density(PE_object, parameters):
 #calculates delta between initial guess and bounds, takes the larger delta, and divides by sigma_multiple to return sigma.
 def extract_larger_delta_and_make_sigma_values(initial_guess, lower_bound, upper_bound, sigma_multiple):
     #can probably be done faster with some kind of arrays and zipping, but for simple algorithm will use for loop and if statements.
-    sigma_values = np.zeros(len(initial_guess))
+    sigma_values = np.zeros(len(initial_guess)) #just initializing.
     for index in range(len(initial_guess)):
         upper_delta = np.abs(upper_bound[index]-initial_guess[index])
         lower_delta = np.abs(lower_bound[index]-initial_guess[index])
@@ -115,15 +133,35 @@ def get_varying_rate_vals_and_bnds(rate_vals, rate_bnds):
             varying_rate_vals_upper_bnds.append(rate_bnds[bounds_index][1]) #append current upper bound
     return varying_rate_vals_indices, varying_rate_vals_initial_guess, varying_rate_vals_lower_bnds, varying_rate_vals_upper_bnds
 
-def get_consolidated_parameters_arrays(rate_constants_initial_guess, rate_constants_lower_bnds, rate_constants_upper_bnds, rate_constants_parameters_initial_guess, rate_constants_parameters_lower_bnds, rate_constants_parameters_upper_bnds):
-    #A. Savara recommends 'uniform' for rate constants and 'gaussian' for things like "log(A)" and "Ea"
+def get_consolidated_parameters_arrays(rate_constants_values, rate_constants_lower_bnds, rate_constants_upper_bnds, rate_constants_bnds_exist, rate_constants_parameters_values, rate_constants_parameters_lower_bnds, rate_constants_parameters_upper_bnds, rate_constants_parameters_bnds_exist, return_unbounded_indices=True):
+    #A. Savara recommends 'uniform' for rate constants and 'gaussian' for things like "log(A)" and "Ea"    
+    #note that we use "pars_values" as the variable name because it can be pars_initial_guess or pars_current_guess and this makes the function more general.
+
     #we first start the arrays using the rate_constants arrays.
-    pars_initial_guess = np.array(rate_constants_initial_guess).flatten()
+    pars_values = np.array(rate_constants_values).flatten()
     pars_lower_bnds = np.array(rate_constants_lower_bnds).flatten()
     pars_upper_bnds = np.array(rate_constants_upper_bnds).flatten()
+    rate_constants_bnds_exist = np.array(rate_constants_bnds_exist, dtype = bool) #Can't flatten() because these have to be retained as pairs;
 
     #Now we concatenate those with the rate_constant_parameters arrays.
-    pars_initial_guess = np.concatenate( (pars_initial_guess , np.array(rate_constants_parameters_initial_guess).flatten()) ) 
+    pars_values = np.concatenate( (pars_values , np.array(rate_constants_parameters_values).flatten()) ) 
     pars_lower_bnds = np.concatenate( (pars_lower_bnds, np.array(rate_constants_parameters_lower_bnds).flatten()) )
     pars_upper_bnds = np.concatenate( (pars_upper_bnds, np.array(rate_constants_parameters_upper_bnds).flatten()) ) 
-    return pars_initial_guess, pars_lower_bnds, pars_upper_bnds
+    pars_bnds_exist = np.concatenate( (rate_constants_bnds_exist, np.array(rate_constants_parameters_bnds_exist, dtype = bool) )) #Can't flatten() because these have to be retained as pairs; 
+
+    unbounded_indices = []  #need to make this even if it will not be populated.
+    if return_unbounded_indices==True:
+        if len(pars_bnds_exist)> 1: #If this is not a blank list, we're going to check each entry. For anything which has a "False", we are going to set the InputParametersPriorValuesUncertainties value to "-1" to indicate uniform since that means it can't be a Gaussian.
+            #pars_bnds_exist has values like [True False] for each parameter. [True False] would mean it has a lower bound but no upper bound.
+            for exist_index, lower_upper_booleans in enumerate(pars_bnds_exist):
+                if np.sum(lower_upper_booleans) < 2: #True True will add to 2, anything else does not pass.
+                    unbounded_indices.append(exist_index)
+        unbounded_indices = np.atleast_1d(np.array(unbounded_indices))
+
+    return pars_values, pars_lower_bnds, pars_upper_bnds, pars_bnds_exist, unbounded_indices
+
+def remove_unbounded_values(array_to_truncate, unbounded_indices):
+    print("line 164", unbounded_indices, array_to_truncate)
+    truncated_array = np.delete(array_to_truncate, unbounded_indices, axis=0)
+    print("line 164", unbounded_indices, truncated_array)
+    return truncated_array
diff --git a/src/optimize/CheKiPEUQ_integration_notes.txt b/src/optimize/CheKiPEUQ_integration_notes.txt
@@ -1,3 +1,21 @@
+Notes on Jan 23rd 2021
+
+* The temperature exponent (n) was coming in as "True True" for bounds because of an error in the bounds checking. I have changed the lines to as follows:
+
+            lb_exist = [x != min_neg_system_value for x in rxn_coef['coef_bnds']['lower']]
+            ub_exist = [x != max_pos_system_value for x in rxn_coef['coef_bnds']['upper']]
+
+* I have found that the "Ea" is having a first guess of 34 million, not matching the table.
+
+* It looks like the "global" optimization is not working very well with Bayesian right now, at least for the example I am playing with. Additionally, the global optimization seems to go on "forever" if there is no abort. It will be interesting to know what happens once the local optimization happens. Trying "no abort" criteria. 
+
+* I am concerned that a bad initial guess combined with a loss function might prevent a good optimization, unless a regular optimization was done before the Bayesian one. I do think there should be a facile way for the user to turn off the loss function when using Bayesian.
+
+* I have checked and the code seems to work correctly if I use several rate_constants' parameters at the same time. It might be worth testing this even more thoroughly.
+
+******
+Notes before Jan 23rd 2021
+
 General Notes: 
 1) I was mistaken about only a numpy dependence: we need scipy as well. But that is also standard for Anaconda so should not be hard to add.
 2) IMPORTANT: There is a flag at top top of fit_fcn.py to forceBayesian = True until the GUI has it. FOR NOW, SET THIS TO FALSE TO TURN BAYESIAN OFF, AND TO TRUE IF YOU WANT BAYESIAN ON.
@@ -25,3 +43,5 @@ Bayesian Notes: I have put comments labeling the below steps in the code.  To si
 NEXT STEPS AFTER THE ABOVE ITEMS ARE FIXED.
 
 #ATTENTION: Right now, the Bayesian parameter estimation is taking in the rate_val parameters and their bounds.  It is actually even better if we feed in the "logA" "n" and "Ea" values and their bounds.  I suggest that we fix the above issues first, and then do that after.  However, we should probably discuss what happens in fit_coeffs so I can implement the Bayesian part in the most logical way.
+
+
diff --git a/src/optimize/CheKiPEUQ_local/InverseProblem.py b/src/optimize/CheKiPEUQ_local/InverseProblem.py
@@ -109,8 +109,8 @@ def __init__(self, UserInput = None):
                     #We will also fill the model['InputParameterPriorValues'] to have the mean of the two bounds. This can matter for some of the scaling that occurs later.
                     self.UserInput.mu_prior[parameterIndex] = (UserInput.model['InputParameterPriorValues_upperBounds'][parameterIndex] + UserInput.model['InputParameterPriorValues_lowerBounds'][parameterIndex])/2
 
+        print("line 112 of CheKiPEUQ", UserInput.InputParametersPriorValuesUncertainties)
         #Now to make covmat. Leaving the original dictionary object intact, but making a new object to make covmat_prior.
-        print("line 114 of CheKiPEUQ", UserInput.InputParametersPriorValuesUncertainties)
         if len(np.shape(UserInput.InputParametersPriorValuesUncertainties)) == 1 and (len(UserInput.InputParametersPriorValuesUncertainties) > 0): #If it's a 1D array/list that is filled, we'll diagonalize it.
             UserInput.std_prior = np.array(UserInput.InputParametersPriorValuesUncertainties, dtype='float') #using 32 since not everyone has 64.
             UserInput.var_prior = np.power(UserInput.InputParametersPriorValuesUncertainties,2)