diff --git a/benchmarks/basic_tests.py b/benchmarks/basic_tests.py index 0a58dcc54..e1cf0f09f 100644 --- a/benchmarks/basic_tests.py +++ b/benchmarks/basic_tests.py @@ -354,6 +354,31 @@ def unc_file_test(): print(diff.sum()) assert diff.sum().max() < 1.0e-10 + cov.to_uncfile(os.path.join(m_d, "pest.unc"), covmat_file=None) + pst.control_data.noptmax = -2 + pst.pestpp_options["ies_num_reals"] = 100000 + pst.pestpp_options["ies_enforce_bounds"] = False + pst.write(os.path.join(m_d, "pest_unc.pst")) + pyemu.os_utils.run("{0} {1}".format(exe_path, "pest_unc.pst"), cwd=m_d) + pe_3 = pd.read_csv(os.path.join(m_d, "pest_unc.0.par.csv"), index_col=0).apply(np.log10) + print(pe_3.std(ddof=0)) + pe_std = pe_3.std(ddof=0) + for r,v in zip(cov.row_names,cov.x): + d = np.abs(pe_std.loc[r] - np.sqrt(v)) + + print(r,v,np.sqrt(v),d) + assert d < 0.01 + pst.control_data.noptmax = -1 + pst.write(os.path.join(m_d, "pest_unc.pst")) + pyemu.os_utils.run("{0} {1}".format(exe_path.replace("-ies","-glm"), "pest_unc.pst"), cwd=m_d) + fosm_df = pd.read_csv(os.path.join(m_d,"pest_unc.par.usum.csv"),index_col=0) + cov_df = cov.to_dataframe() + for pname,prior_std in zip(fosm_df.index,fosm_df.prior_stdev): + d = np.abs(prior_std - np.sqrt(cov_df.loc[pname,pname])) + print(pname,d) + assert d < 1.0e-4 + + def parchglim_test(): model_d = "ies_10par_xsec" @@ -1191,13 +1216,13 @@ def ins_missing_e_test(): #shutil.copy2(os.path.join("..","exe","windows","x64","Debug","pestpp-glm.exe"),os.path.join("..","bin","win","pestpp-glm.exe")) #shutil.copy2(os.path.join("..", "exe", "windows", "x64", "Debug", "pestpp-ies.exe"), # os.path.join("..", "bin", "win", "pestpp-ies.exe")) - ins_missing_e_test() + #ins_missing_e_test() #basic_test() #agnostic_path_test() #glm_long_name_test() #sen_plusplus_test() #parchglim_test() - #unc_file_test() + unc_file_test() #secondary_marker_test() #basic_test("ies_10par_xsec") #glm_save_binary_test() @@ -1223,7 +1248,7 @@ def ins_missing_e_test(): #da_mf6_freyberg_test_2() #shutil.copy2(os.path.join("..","exe","windows","x64","Debug","pestpp-ies.exe"),os.path.join("..","bin","win","pestpp-ies.exe")) #tplins1_test() - mf6_v5_ies_test() + #mf6_v5_ies_test() #mf6_v5_sen_test() #shutil.copy2(os.path.join("..","exe","windows","x64","Debug","pestpp-opt.exe"),os.path.join("..","bin","win","pestpp-opt.exe")) diff --git a/documentation/pestpp_users_guide_v5.1.6.docx b/documentation/pestpp_users_guide_v5.1.7.docx similarity index 69% rename from documentation/pestpp_users_guide_v5.1.6.docx rename to documentation/pestpp_users_guide_v5.1.7.docx index 02df653cf..50db6d4ae 100644 Binary files a/documentation/pestpp_users_guide_v5.1.6.docx and b/documentation/pestpp_users_guide_v5.1.7.docx differ diff --git a/documentation/pestpp_users_manual.md b/documentation/pestpp_users_manual.md index 807d44050..b340f5289 100644 --- a/documentation/pestpp_users_manual.md +++ b/documentation/pestpp_users_manual.md @@ -1,9 +1,9 @@ - A close up of a purple sign Description automatically generated + A close up of a purple sign Description automatically generated -# Version 5.1.6 +# Version 5.1.7 - + PEST++ Development Team @@ -70,7 +70,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI # Table of Contents -- [Version 5.1.6](#s1) +- [Version 5.1.7](#s1) - [Acknowledgements](#s2) - [Preface](#s3) - [License](#s4) @@ -153,162 +153,168 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI - [4.15 Prior Information Section](#s8-15) - [4.16 Regularization Section](#s8-16) - [4.17 Control Variables for PEST++ Programs ](#s8-17) -- [](#s9) - - [4.18 Keyword and External File Control File Format](#s9-1) - - [4.18.1 Keyword and Consolidated Algorithmic Variables](#s9-2) - - [4.18.2 External file support](#s9-3) -- [](#s10) -- [5. Running PEST++ Programs](#s11) - - [5.1 General](#s11-1) - - [5.2 Model Runs in Serial](#s11-2) - - [5.2.1 Concepts](#s11-2-1) - - [5.2.2 Running PESTPP-XXX](#s11-2-2) - - [5.3 Model Runs in Parallel](#s11-3) - - [5.3.1 Concepts](#s11-3-1) - - [5.3.2 Manager to Agent Communication](#s11-3-2) - - [5.3.3 Running PESTPP-XXX as Manager and Agent](#s11-3-3) - - [5.3.4 Run Management Record File](#s11-3-4) - - [5.3.5 Run Management Control Variables ](#s11-3-5) - - [5.4 Run Book-Keeping Files](#s11-4) -- [6. PESTPP-GLM](#s12) - - [6.1 Introduction](#s12-1) - - [6.2.1 Basic Equations](#s12-1-1) - - [6.2.2 Choosing the Regularization Weight Factor](#s12-1-2) - - [6.2.3 Inter-Regularization Group Weighting](#s12-1-3) - - [6.2.4 Choosing Values for the Marquardt Lambda](#s12-1-4) - - [6.2.5 Singular Value Decomposition](#s12-1-5) - - [6.2.6 SVD-Assist ](#s12-1-6) - - [6.2.7 Expediting the First Iteration](#s12-1-7) - - [6.2.8 First Order, Second Moment Uncertainty Analysis and Monte Carlo](#s12-1-8) - - [6.2.9 Model Run Failure](#s12-1-9) - - [6.2.10 Composite Parameter Sensitivities](#s12-1-10) - - [6.2.11 Other Controls](#s12-1-11) - - [6.2.12 Running PESTPP-GLM](#s12-1-12) - - [6.2.13 PESTPP-GLM Output Files](#s12-1-13) - - [6.3.4 Running PESTPP](#s12-1-14) - - [6.3.5 PESTPP-GLM Output Files](#s12-1-15) - - [6.4 Summary of PESTPP-GLM Control Variables](#s12-2) - - [6.4.1 General](#s12-2-1) - - [6.4.2 Control Variables in the PEST Control File ](#s12-2-2) - - [6.4.3 PEST++ Control Variables](#s12-2-3) -- [7. PESTPP-SEN](#s13) - - [7.1 Introduction](#s13-1) - - [7.1.1 General](#s13-1-1) - - [7.1.2 Grouped Parameters](#s13-1-2) - - [7.2 Method of Morris](#s13-2) - - [7.2.1 Elementary Effects](#s13-2-1) - - [7.2.2 Sampling Scheme](#s13-2-2) - - [7.2.3 Control Variables](#s13-2-3) - - [7.3 Method of Sobol](#s13-3) - - [7.3.1 Sensitivity Indices](#s13-3-1) - - [7.3.2 Control Variables](#s13-3-2) - - [7.4 PESTPP-SEN Output Files](#s13-4) -- [8. PESTPP-OPT](#s14) - - [8.1 Introduction](#s14-1) - - [8.1.1 A Publication](#s14-1-1) - - [8.1.2 Overview](#s14-1-2) - - [8.1.3 Calculation of Uncertainty](#s14-1-3) - - [8.1.4 Optimization](#s14-1-4) - - [8.1.5 Chance Constraints](#s14-1-5) - - [8.2 Using PESTPP-OPT](#s14-2) - - [8.2.1The PEST Control File ](#s14-2-1) - - [8.2.2 Decision Variables and Parameters](#s14-2-2) - - [8.2.3 Defining the Objective Function](#s14-2-3) - - [8.2.4 Constraints](#s14-2-4) - - [8.2.5 Observations](#s14-2-5) - - [8.2.6 Regularization ](#s14-2-6) - - [8.2.7 Prior Covariance Matrix](#s14-2-7) - - [8.2.8 Risk](#s14-2-8) - - [8.2.9 Jacobian and Response Matrices](#s14-2-9) - - [8.2.10 Solution Convergence](#s14-2-10) - - [8.2.11 Other Control Variables](#s14-2-11) - - [8.2.12 Final Model Run](#s14-2-12) - - [8.2.13 Restarts](#s14-2-13) - - [8.2.14 Zero Run Solution](#s14-2-14) - - [8.3 PESTPP-OPT Output Files](#s14-3) - - [8.4 Summary of Control Variables](#s14-4) -- [9. PESTPP-IES](#s15) - - [9.1 Introduction](#s15-1) - - [9.1.1 Publications](#s15-1-1) - - [9.1.2 Overview](#s15-1-2) - - [9.1.3 Ensemble Kalman Filters and Ensemble Smoothers](#s15-1-3) - - [9.1.4 Some Repercussions of Using Ensembles](#s15-1-4) - - [9.1.5 Iterations](#s15-1-5) - - [9.1.6 Measurement Noise](#s15-1-6) - - [9.1.7 Regularization](#s15-1-7) - - [9.1.8 Base Realization](#s15-1-8) - - [9.1.9 Parameter Transformation Status](#s15-1-9) - - [9.1.10 Inequality Observations](#s15-1-10) - - [9.1.11 Localization](#s15-1-11) - - [9.1.12 Use of observation noise covariance matrices](#s15-1-12) - - [9.1.13 Detecting and resolving prior-data conflict](#s15-1-13) - - [9.1.14 Multi-modal solution process](#s15-1-14) - - [9.2 Using PESTPP-IES](#s15-2) - - [9.2.1 General](#s15-2-1) - - [9.2.2 Initial Realizations](#s15-2-2) - - [9.2.3 “Regularization”](#s15-2-3) - - [9.2.4 Prior Parameter Scaling](#s15-2-4) - - [9.2.5 The Marquardt Lambda](#s15-2-5) - - [9.2.6 Restarting](#s15-2-6) - - [9.2.7 Failed Model Runs](#s15-2-7) - - [9.2.8 Reporting ](#s15-2-8) - - [9.2.9 Termination Criteria, Objective Functions, and Upgrade Acceptance ](#s15-2-9) - - [9.3 PESTPP-IES Output Files](#s15-3) - - [9.3.1 CSV Output Files](#s15-3-1) - - [9.3.2 Non-CSV Output Files](#s15-3-2) - - [9.4 Summary of Control Variables](#s15-4) -- [10. PESTPP-SWP](#s16) - - [10.1 Introduction](#s16-1) - - [10.2 Using PESTPP-SWP](#s16-2) - - [10.3 Summary of Control Variables](#s16-3) - - [11.1 Introduction](#s16-4) - - [11.1.2 Multi-Objective Particle Swarm optimization](#s16-4-1) - - [11.1.2 Decision Variable Transformations](#s16-4-2) - - [11.1 Using PESTPP-PSO](#s16-5) - - [11.1.1 General](#s16-5-1) - - [11.1.2 Estimation Mode](#s16-5-2) - - [11.2.3. Pareto mode](#s16-5-3) - - [](#s16-6) - - [11.2 PESTPP-PSO Output Files](#s16-7) -- [](#s17) -- [12. PESTPP-DA](#s18) - - [12.1 Introduction](#s18-1) - - [12.2 Theory](#s18-2) - - [12.2.1 Background and Basic Equations](#s18-2-1) - - [12.2.2 Schemes for Assimilating Temporal Data](#s18-2-2) - - [12.2.2.1 Batch Data Assimilation with PESTPP-DA](#s18-2-3) - - [12.2.2.2 Sequential Data Assimilation with PESTPP-DA](#s18-2-4) - - [12.2.4 State estimation, parameter estimation and joint state-parameter estimation](#s18-2-5) - - [12.2.4 Parameter, Observation and Weight Cycle Tables](#s18-2-6) - - [12.2.5 Steps for Data Assimilation implementation](#s18-2-7) - - [12.2.12 Running PESTPP-DA](#s18-2-8) - - [12.2.13 Other uses for PESTPP-DA](#s18-2-9) - - [12.2.14 PESTPP-DA Output Files](#s18-2-10) - - [12.4 Summary of PESTPP-DA Control Variables](#s18-3) - - [12.4.1 General](#s18-3-1) - - [12.4.2 Control Variables in the PEST Control File ](#s18-3-2) - - [12.4.3 PEST++ Control Variables](#s18-3-3) -- [13. PESTPP-MOU](#s19) - - [13.1 Introduction](#s19-1) - - [13.2 Theory](#s19-2) - - [13.2.1 Background and Basic Equations](#s19-2-1) - - [13.2.2 Evaluating chances in a population-based algorithm](#s19-2-2) - - [](#s19-2-3) - - [13.2.3 PESTPP-MOU workflow](#s19-2-4) - - [13.2.4 Advanced functionality](#s19-2-5) - - [13.2.5 Running PESTPP-MOU](#s19-2-6) - - [13.2.6 PESTPP-DA Output Files](#s19-2-7) - - [](#s19-3) - - [13.4 Summary of PESTPP-MOU Control Variables](#s19-4) - - [13.4.1 General](#s19-4-1) - - [13.4.2 Control Variables in the PEST Control File ](#s19-4-2) - - [13.4.3 PEST++ Control Variables](#s19-4-3) -- [14. References](#s20) + - [4.18 Keyword and External File Control File Format](#s8-18) + - [4.18.1 Keyword and Consolidated Algorithmic Variables](#s8-19) + - [4.18.2 External file support](#s8-20) +- [5. Running PEST++ Programs](#s9) + - [5.1 General](#s9-1) + - [5.2 Model Runs in Serial](#s9-2) + - [5.2.1 Concepts](#s9-2-1) + - [5.2.2 Running PESTPP-XXX](#s9-2-2) + - [5.3 Model Runs in Parallel](#s9-3) + - [5.3.1 Concepts](#s9-3-1) + - [5.3.2 Manager to Agent Communication](#s9-3-2) + - [5.3.3 Running PESTPP-XXX as Manager and Agent](#s9-3-3) + - [5.3.4 Run Management Record File](#s9-3-4) + - [5.3.5 Run Management Control Variables ](#s9-3-5) + - [5.4 Run Book-Keeping Files](#s9-4) +- [6. PESTPP-GLM](#s10) + - [6.1 Introduction](#s10-1) + - [6.2 Highly Parameterized Inversion](#s10-2) + - [6.2.1 Basic Equations](#s10-2-1) + - [6.2.2 Choosing the Regularization Weight Factor](#s10-2-2) + - [6.2.3 Inter-Regularization Group Weighting](#s10-2-3) + - [6.2.4 Choosing Values for the Marquardt Lambda](#s10-2-4) + - [6.2.5 Singular Value Decomposition](#s10-2-5) + - [6.2.6 SVD-Assist ](#s10-2-6) + - [6.2.7 Expediting the First Iteration](#s10-2-7) + - [6.2.8 First Order, Second Moment Uncertainty Analysis and Monte Carlo](#s10-2-8) + - [6.2.9 Model Run Failure](#s10-2-9) + - [6.2.10 Composite Parameter Sensitivities](#s10-2-10) + - [6.2.11 Other Controls](#s10-2-11) + - [6.2.12 Running PESTPP-GLM](#s10-2-12) + - [6.2.13 PESTPP-GLM Output Files](#s10-2-13) + - [6.3.4 Running PESTPP](#s10-2-14) + - [6.3.5 PESTPP-GLM Output Files](#s10-2-15) + - [6.4 Summary of PESTPP-GLM Control Variables](#s10-3) + - [6.4.1 General](#s10-3-1) + - [6.4.2 Control Variables in the PEST Control File ](#s10-3-2) + - [6.4.3 PEST++ Control Variables](#s10-3-3) +- [7. PESTPP-SEN](#s11) + - [7.1 Introduction](#s11-1) + - [7.1.1 General](#s11-1-1) + - [7.1.2 Grouped Parameters](#s11-1-2) + - [7.2 Method of Morris](#s11-2) + - [7.2.1 Elementary Effects](#s11-2-1) + - [7.2.2 Sampling Scheme](#s11-2-2) + - [7.2.3 Control Variables](#s11-2-3) + - [7.3 Method of Sobol](#s11-3) + - [7.3.1 Sensitivity Indices](#s11-3-1) + - [7.3.2 Control Variables](#s11-3-2) + - [7.4 PESTPP-SEN Output Files](#s11-4) +- [8. PESTPP-OPT](#s12) + - [8.1 Introduction](#s12-1) + - [8.1.1 A Publication](#s12-1-1) + - [8.1.2 Overview](#s12-1-2) + - [8.1.3 Calculation of Uncertainty](#s12-1-3) + - [8.1.4 Optimization](#s12-1-4) + - [8.1.5 Chance Constraints](#s12-1-5) + - [8.2 Using PESTPP-OPT](#s12-2) + - [8.2.1The PEST Control File ](#s12-2-1) + - [8.2.2 Decision Variables and Parameters](#s12-2-2) + - [8.2.3 Defining the Objective Function](#s12-2-3) + - [8.2.4 Constraints](#s12-2-4) + - [8.2.5 Observations](#s12-2-5) + - [8.2.6 Regularization ](#s12-2-6) + - [8.2.7 Prior Covariance Matrix](#s12-2-7) + - [8.2.8 Risk](#s12-2-8) + - [8.2.9 Jacobian and Response Matrices](#s12-2-9) + - [8.2.10 Solution Convergence](#s12-2-10) + - [8.2.11 Other Control Variables](#s12-2-11) + - [8.2.12 Final Model Run](#s12-2-12) + - [8.2.13 Restarts](#s12-2-13) + - [8.2.14 Zero Run Solution](#s12-2-14) + - [8.3 PESTPP-OPT Output Files](#s12-3) + - [8.4 Summary of Control Variables](#s12-4) +- [9. PESTPP-IES](#s13) + - [9.1 Introduction](#s13-1) + - [9.1.1 Publications](#s13-1-1) + - [9.1.2 Overview](#s13-1-2) + - [9.1.3 Ensemble Kalman Filters and Ensemble Smoothers](#s13-1-3) + - [9.1.4 Some Repercussions of Using Ensembles](#s13-1-4) + - [9.1.5 Iterations](#s13-1-5) + - [9.1.6 Measurement Noise](#s13-1-6) + - [9.1.7 Regularization](#s13-1-7) + - [9.1.8 Base Realization](#s13-1-8) + - [9.1.9 Parameter Transformation Status](#s13-1-9) + - [9.1.10 Inequality Observations](#s13-1-10) + - [9.1.11 Localization](#s13-1-11) + - [9.1.12 Use of observation noise covariance matrices](#s13-1-12) + - [9.1.13 Detecting and resolving prior-data conflict](#s13-1-13) + - [9.1.14 Multi-modal solution process](#s13-1-14) + - [9.2 Using PESTPP-IES](#s13-2) + - [9.2.1 General](#s13-2-1) + - [9.2.2 Initial Realizations](#s13-2-2) + - [9.2.3 “Regularization”](#s13-2-3) + - [9.2.4 Prior Parameter Scaling](#s13-2-4) + - [9.2.5 The Marquardt Lambda](#s13-2-5) + - [9.2.6 Restarting](#s13-2-6) + - [9.2.7 Failed Model Runs](#s13-2-7) + - [9.2.8 Reporting ](#s13-2-8) + - [9.2.9 Termination Criteria, Objective Functions, and Upgrade Acceptance ](#s13-2-9) + - [9.3 PESTPP-IES Output Files](#s13-3) + - [9.3.1 CSV Output Files](#s13-3-1) + - [9.3.2 Non-CSV Output Files](#s13-3-2) + - [9.4 Summary of Control Variables](#s13-4) +- [10. PESTPP-SWP](#s14) + - [10.1 Introduction](#s14-1) + - [10.2 Using PESTPP-SWP](#s14-2) + - [10.3 Summary of Control Variables](#s14-3) +- [PESTPP-PSO](#s15) + - [11.1 Introduction](#s15-1) + - [11.1.2 Multi-Objective Particle Swarm optimization](#s15-1-1) + - [11.1.2 Decision Variable Transformations](#s15-1-2) + - [11.1 Using PESTPP-PSO](#s15-2) + - [11.1.1 General](#s15-2-1) + - [11.1.2 Estimation Mode](#s15-2-2) + - [11.2.3. Pareto mode](#s15-2-3) + - [11.2 PESTPP-PSO Output Files](#s15-3) +- [12. PESTPP-DA](#s16) + - [12.1 Introduction](#s16-1) + - [12.2 Theory](#s16-2) + - [12.2.1 Background and Basic Equations](#s16-2-1) + - [12.2.2 Schemes for Assimilating Temporal Data](#s16-2-2) + - [12.2.2.1 Batch Data Assimilation with PESTPP-DA](#s16-2-3) + - [12.2.2.2 Sequential Data Assimilation with PESTPP-DA](#s16-2-4) + - [12.2.4 State estimation, parameter estimation and joint state-parameter estimation](#s16-2-5) + - [12.2.4 Parameter, Observation and Weight Cycle Tables](#s16-2-6) + - [12.2.5 Steps for Data Assimilation implementation](#s16-2-7) + - [12.2.12 Running PESTPP-DA](#s16-2-8) + - [12.2.13 Other uses for PESTPP-DA](#s16-2-9) + - [12.2.14 PESTPP-DA Output Files](#s16-2-10) + - [12.4 Summary of PESTPP-DA Control Variables](#s16-3) + - [12.4.1 General](#s16-3-1) + - [12.4.2 Control Variables in the PEST Control File ](#s16-3-2) + - [12.4.3 PEST++ Control Variables](#s16-3-3) +- [13. PESTPP-MOU](#s17) + - [13.1 Introduction](#s17-1) + - [13.2 Theory](#s17-2) + - [13.2.1 Background and Basic Equations](#s17-2-1) + - [13.2.2 Evaluating chances in a population-based algorithm](#s17-2-2) + - [](#s17-2-3) + - [13.2.3 PESTPP-MOU workflow](#s17-2-4) + - [13.2.4 Advanced functionality](#s17-2-5) + - [13.2.5 Running PESTPP-MOU](#s17-2-6) + - [13.2.6 PESTPP-DA Output Files](#s17-2-7) + - [13.4 Summary of PESTPP-MOU Control Variables](#s17-3) + - [13.4.1 General](#s17-3-1) + - [13.4.2 Control Variables in the PEST Control File ](#s17-3-2) + - [13.4.3 PEST++ Control Variables](#s17-3-3) +- [14. References](#s18) +- [Appendix A. PEST Control File Specifications](#s19) +- [Appendix B. Some File Formats](#s20) + - [B.1 Introduction](#s20-1) + - [B.2 Matrix File](#s20-2) + - [B.3 Uncertainty Files](#s20-3) + - [B.4 JCO File](#s20-4) + - [B.5 JCB File](#s20-5) # 1. Introduction + ## 1.1 PEST++ and PEST + The name “PEST++” refers to a suite of programs which have some things in common but which are also very different from each other. The names of all programs which comprise the suite begin with “PESTPP”. PEST stands for “Parameter ESTimation”. PEST was released in 1995; it has been continually improved since then. It undertakes highly parameterized inversion of environmental models. In doing so, it runs a model many times, either sequentially or in parallel. It does this in a non-intrusive manner. Before it runs a model, it records parameter values that it wishes the model to use on that particular run via input files required by the model. User-prepared template files of model input files guide it in this task. After the model run is complete, PEST reads numbers from model output files which it then compares with field measurements. User-prepared instruction files guide it in this task. All other information which PEST requires is recorded in a PEST control file. @@ -341,6 +347,7 @@ If salient to the tasks which they implement, members of the PEST++ suite write ## 1.2 Software Installation + Copy PEST++ executable programs to a suitable folder on your hard disk. Then make sure that this folder is cited in the PATH environment variable, so that your computer can find them regardless of your current working folder. Source code, Visual Studio solution files and UNIX makefiles for PEST++ suite programs are also freely available. @@ -349,6 +356,7 @@ With the exception of PESTPP-PSO, the PEST++ tools are C++. Users who want to co ## 1.3 This Document + This document constitutes a manual for programs of the PEST++ suite. As such, it records their capabilities and lists variables that control their operation. However, while it discusses the algorithms which they implement to some extent, it does not describe them in detail. Instead, reference is made to publications where these explanations are made. These texts thus constitute recommended reading for those who wish to take full advantage of PEST++ software. Another relevant publication is the “PEST Book” (Doherty, 2015). This provides an extensive theoretical overview of model calibration and linear/nonlinear predictive uncertainty analysis. It also discusses the role that modelling can play in environmental decision support. To avoid excessive cross-referencing, parts of the PEST manual are reproduced in this document. This allows a user of the PEST++ suite to dispense with the need to read the manual for PEST in addition to the present text. However, the reader’s attention is drawn to Part 2 of the PEST manual (Doherty, 2018b) which describes PEST utility support software. Attention should also be given to documentation of the PEST Groundwater Data Utilities. At the time of writing, both of these utility suites are undergoing expansion in order to provide better support for members of the PEST++ suite. @@ -357,6 +365,7 @@ PEST++ users should also be aware of setup and processing assistance provided by ## 1.4 A Model: Some Considerations + ### 1.4.1 Running a Model In the course of estimating its parameters, optimizing its decision variables, and/or computing sensitivities of model outputs to its parameters, members of the PEST++ suite must run a model many times. They can do this in “serial” (one at a time), or they can run the model parallel, distributed across several cores and/or multiple machines. In either case, the model is run by submitting its command line to the operating system through a so-called “system call”. This call has the same effect as typing the name of the model (and any arguments required by the model which follow its name) in a command line window. Hence the model must be accessible to a user (and therefore to the pertinent PEST++ program) through the command line. Ideally, the folder (i.e., the directory) in which the model executable resides should be featured in the PATH environment variable so that the operating system knows where to find it. @@ -375,7 +384,7 @@ at the command line prompt for “start” command details. If, on commencement of execution, a model prompts the user for keyboard input, this situation can be easily accommodated through input redirection in the batch or shell script environment. The keyboard responses to a model’s prompts can be placed in a text file. Suppose that this file is named *model.inp*. Suppose also that the name of the model executable file is *model.exe*, or simply *model* on a UNIX platform. Then if the model is run using the command -model \< model.inp +model < model.inp the model will look to file *model.inp*, rather than to the keyboard, for its input. A PEST++ program can therefore run the model without the need for any user involvement. On UNIX-based platforms, the forward run command is a binary or script located in the directory where PEST++ is running, it may be necessary to add a preceding “./” (dot forward slash). @@ -409,12 +418,15 @@ In PEST and PEST++ parlance, any number that is read from a model output file is # 2. The PEST(++) Model Interface + ## 2.1 Introduction + This chapter reproduces material from the seventh edition of the PEST manual. This reflects the fact that programs of both the PEST and PEST++ suites employ template and instruction files as the basis for their non-intrusive model interface. ## 2.2 PEST++ Input Files + Programs of both the PEST and PEST++ suites require three types of input file. These are: - template files, one for each model input file in which parameters or decision variables reside; @@ -429,6 +441,7 @@ In order to make the remainder of this chapter easier to read, the word “param ## 2.3 Template Files + ### 2.3.1 Model Input Files Whenever a PEST++ program runs a model, as it must do many times in the course of carrying out the parameter estimation, uncertainty analysis, or optimization process for which it was designed, it must first write the values of parameters to the model input files which hold them. The model can thus access these values as it would on any other occasion that it is run. @@ -445,11 +458,11 @@ A template file receives its name from the fact that it is simply a replica of a Consider the model input file shown in figure 2.1; this file supplies data to a program which computes the “apparent resistivity” on the surface of a layered half-space for different surface electrode configurations. Suppose that we wish to use this program (i.e., model) in an inversion process through which properties of each of three half-space layers are estimated from apparent resistivity data collected on the surface of the half-space. The parameters for which we want estimates are the resistivity and thickness of the upper two layers and the resistivity of the third (its thickness is infinite). A suitable template file appears in figure 2.2. -
MODEL INPUT FILE
3, 19 no. of layers, no. of spacings
1.0, 1.0 resistivity, thickness: layer 1
40.0, 20.0 resistivity, thickness: layer 2
5.0 resistivity: layer 3
1.0 electrode spacings
1.47
2.15
3.16
4.64
6.81
10.0
14.9
21.5
31.6
46.4
68.1
100
149
215
316
464
681
1000
+
MODEL INPUT FILE
3, 19 no. of layers, no. of spacings
1.0, 1.0 resistivity, thickness: layer 1
40.0, 20.0 resistivity, thickness: layer 2
5.0 resistivity: layer 3
1.0 electrode spacings
1.47
2.15
3.16
4.64
6.81
10.0
14.9
21.5
31.6
46.4
68.1
100
149
215
316
464
681
1000
Figure 2.1 A model input file. -
ptf ~
MODEL INPUT FILE
3, 19 no. of layers, no. of spacings
~res1 ~,~t1 ~ resistivity, thickness: layer 1
~res2 ~,~t2 ~ resistivity, thickness: layer 2
~res3 ~ resistivity: layer 3
1.0 electrode spacings
1.47
2.15
3.16
4.64
6.81
10.0
14.9
21.5
31.6
46.4
68.1
100
149
215
316
464
681
1000
+
ptf ~
MODEL INPUT FILE
3, 19 no. of layers, no. of spacings
~res1 ~,~t1 ~ resistivity, thickness: layer 1
~res2 ~,~t2 ~ resistivity, thickness: layer 2
~res3 ~ resistivity: layer 3
1.0 electrode spacings
1.47
2.15
3.16
4.64
6.81
10.0
14.9
21.5
31.6
46.4
68.1
100
149
215
316
464
681
1000
Figure 2.2 A template file. @@ -475,13 +488,13 @@ Generally, a model reads numbers from an input file in either of two ways, namel The FORTRAN code of figure 2.3 directs a program to read five real numbers. The first three are read using a format specifier, whereas the last two are read in free field fashion. -
READ(20,100) A,B,C
100 FORMAT(3F10.0)
READ(20,*) D,E
+
READ(20,100) A,B,C
100 FORMAT(3F10.0)
READ(20,*) D,E
Figure 2.3 Formatted and free field input. The relevant part of the model input file may be as illustrated in figure 2.4. -
6.32 1.42E-05123.456789
34.567, 1.2E17
+
6.32 1.42E-05123.456789
34.567, 1.2E17
Figure 2.4 Numbers read using the code of figure 2.3. @@ -489,7 +502,7 @@ Notice how no whitespace or comma is needed between numbers which are read using Suppose all of variables *A* to *E* are model parameters, and that a PEST++ program has been assigned the task of estimating them. For convenience we provide the same names for these parameters as those that are used by the model code (this, of course, will not normally be the case). The template fragment corresponding to figure 2.4 may then be as set out in figure 2.5. Notice how the parameter space for each of parameters *A*, *B* and *C* is 10 characters wide, and that the parameter spaces abut each other in accordance with the expectations of the model as defined through the format specifier of figure 2.3. If the parameter space for any of these parameters is greater than 10 characters in width, then the PEST++ program, when it replaces each parameter space by the current parameter value, would construct a model input file which would be incorrectly read by the model. (You could have designed parameter spaces to be less than 10 characters wide if you wished, as long as you placed enough whitespace between each parameter space in order that the number which will replace each such space when the PEST++ program writes the model input file falls within the field expected by the model. However, defining the parameter spaces in this way would achieve nothing, as there would be no advantage in using less than the full 10 characters allowed by the model.) -
~ A ~~ B ~~ C ~
~ D ~, ~ E ~
+
~ A ~~ B ~~ C ~
~ D ~, ~ E ~
Figure 2.5 Fragment of a template file corresponding to parameters represented in figure 2.4. @@ -503,7 +516,7 @@ Similarly, numbers read through field-specifying format statements may not occup Programs of the PEST++ suite write as many significant figures to a parameter space as they can. They do this so that even if a parameter space must be small in order to satisfy the input field requirements of a model, there is still every chance that a parameter value can be distinguished from its incrementally varied counterpart so as to allow proper derivatives calculation with respect to that parameter. Also, as has already been discussed, even though PEST++ programs adjust their internal representation of a parameter value to the precision with which the model can read it so that the PEST++ program and the model are using the same number, in general more precision is better. -PEST provides two control variables, PRECIS and DPOINT, that affect the manner in which numbers fill a parameter space. These can optimize representation of numbers that are written to parameter spaces that are very broad or very narrow. Programs of the PEST++ suite do not use these control variables. Instead, the PEST++ suite declares PRECIS to be exclusively “double”. Furthermore, where parameter spaces are greater than about 23 characters in width (which can arise often because of PEST++ support for unlimited length parameter names), numbers are padded to the left with zeroes to fill the space (for example, “000001.2345678901234e+001”). Testing has indicated that most numerical models, as well as pre- and post-processing codes (written in languages such as Python) tolerate this padding. This padding is off by default and is controlled with the *fill_tpl_zeros* option. +PEST provides two control variables, PRECIS and DPOINT, that affect the manner in which numbers fill a parameter space. These can optimize representation of numbers that are written to parameter spaces that are very broad or very narrow. Programs of the PEST++ suite do not use these control variables. Instead, the PEST++ suite declares PRECIS to be exclusively “double”. Furthermore, where parameter spaces are greater than about 23 characters in width (which can arise often because of PEST++ support for unlimited length parameter names), numbers are padded to the left with zeroes to fill the space (for example, “000001.2345678901234e+001”). Testing has indicated that most numerical models, as well as pre- and post-processing codes (written in languages such as Python) tolerate this padding. This padding is off by default and is controlled with the *fill\_tpl\_zeros* option. ###
2.3.7 Multiple Occurrences of the Same Parameter @@ -521,11 +534,12 @@ Note that TEMPCHEK, like PEST, sets a 12-character limit on the length of parame ## 2.4 Instruction Files + Of the possibly voluminous amounts of information that a model may write to its output file(s), PEST++ programs are interested in only a few numbers. As has already been discussed, these can be numbers for which corresponding field or laboratory data are available. Alternatively, they can be model predictions of particular interest, model-calculated values to which optimization constraints must be applied, model outputs for which sensitivities with respect to parameters are required, or simply model outputs whose values you need to know. These particular model-generated numbers are referred to as “observations” or “model-generated observations” in the discussion which follows. For every model output file containing observations, you must provide an instruction file containing the directions which PEST++ programs must follow in order to read that file. -Some models write some or all of their output data to the terminal. You can redirect this screen output to a file using the “>” symbol. You can teach a PEST++ program how to read this file using a matching instruction file in the usual manner. +Some models write some or all of their output data to the terminal. You can redirect this screen output to a file using the “>” symbol. You can teach a PEST++ program how to read this file using a matching instruction file in the usual manner. It is suggested that instruction files be provided with the extension *.ins* in order to distinguish them from other types of files. @@ -549,11 +563,11 @@ Markers can be of either primary or secondary type. PEST++ programs use a primar Figure 2.6 shows an output file written by the model whose input file appears in figure 2.1. Suppose that we wish to estimate the parameters appearing in the template file of figure 2.2 (i.e., the resistivities of the three half-space layers and the thicknesses of the upper two) by comparing apparent resistivities generated by the model with a set of apparent resistivities provided by field measurements. Then we need to provide instructions which teach PEST++ programs how to read each of the apparent resistivities appearing in figure 2.6. An appropriate instruction file is shown in figure 2.7. -
SCHLUMBERGER ELECTRIC SOUNDING
Apparent resistivities calculated using the linear filter method
electrode spacing apparent resistivity
1.00 1.21072
1.47 1.51313
2.15 2.07536
3.16 2.95097
4.64 4.19023
6.81 5.87513
10.0 8.08115
14.7 10.8029
21.5 13.8229
31.6 16.5158
46.4 17.7689
68.1 16.4943
100. 12.8532
147. 8.79979
215. 6.30746
316. 5.40524
464. 5.15234
681. 5.06595
1000. 5.02980
+
SCHLUMBERGER ELECTRIC SOUNDING
Apparent resistivities calculated using the linear filter method
electrode spacing apparent resistivity
1.00 1.21072
1.47 1.51313
2.15 2.07536
3.16 2.95097
4.64 4.19023
6.81 5.87513
10.0 8.08115
14.7 10.8029
21.5 13.8229
31.6 16.5158
46.4 17.7689
68.1 16.4943
100. 12.8532
147. 8.79979
215. 6.30746
316. 5.40524
464. 5.15234
681. 5.06595
1000. 5.02980
Figure 2.6 A model output file. -
pif @
@electrode@
l1 [ar1]21:27
l1 [ar2]21:27
l1 [ar3]21:27
l1 [ar4]21:27
l1 [ar5]21:27
l1 [ar6]21:27
l1 [ar7]21:27
l1 [ar8]21:27
l1 [ar9]21:27
l1 [ar10]21:27
l1 [ar11]21:27
l1 [ar12]21:27
l1 [ar13]21:27
l1 [ar14]21:27
l1 [ar15]21:27
l1 [ar16]21:27
l1 [ar17]21:27
l1 [ar18]21:27
l1 [ar19]21:27
+
pif @
@electrode@
l1 [ar1]21:27
l1 [ar2]21:27
l1 [ar3]21:27
l1 [ar4]21:27
l1 [ar5]21:27
l1 [ar6]21:27
l1 [ar7]21:27
l1 [ar8]21:27
l1 [ar9]21:27
l1 [ar10]21:27
l1 [ar11]21:27
l1 [ar12]21:27
l1 [ar13]21:27
l1 [ar14]21:27
l1 [ar15]21:27
l1 [ar16]21:27
l1 [ar17]21:27
l1 [ar18]21:27
l1 [ar19]21:27
Figure 2.7 A PEST instruction file. @@ -590,11 +604,11 @@ A primary marker may be the only item on an instruction line, or it may precede Primary markers can provide a useful means of navigating a model output file. Consider the extract from a model output file shown in figure 2.8 (the dots replace one or a number of lines not shown in the example in order to conserve space). The instruction file extract shown in figure 2.9 provides a means to read the numbers comprising the third solution vector. Notice how the “SOLUTION VECTOR” primary marker is preceded by the “PERIOD NO. 3” primary marker. The latter marker is used purely to establish a reference point from which a search can be made for the “SOLUTION VECTOR” marker; if this reference point were not established (using either a primary marker or line advance item) the program which is perusing the file would read the solution vector pertaining to a previous time period. -
TIME PERIOD NO. 1 --->
.
.
SOLUTION VECTOR:
1.43253 6.43235 7.44532 4.23443 91.3425 3.39872
.
.
TIME PERIOD NO. 2 --->
.
.
SOLUTION VECTOR
1.34356 7.59892 8.54195 5.32094 80.9443 5.49399
.
.
TIME PERIOD NO. 3 --->
.
.
SOLUTION VECTOR
2.09485 8.49021 9.39382 6.39920 79.9482 6.20983
+
TIME PERIOD NO. 1 --->
.
.
SOLUTION VECTOR:
1.43253 6.43235 7.44532 4.23443 91.3425 3.39872
.
.
TIME PERIOD NO. 2 --->
.
.
SOLUTION VECTOR
1.34356 7.59892 8.54195 5.32094 80.9443 5.49399
.
.
TIME PERIOD NO. 3 --->
.
.
SOLUTION VECTOR
2.09485 8.49021 9.39382 6.39920 79.9482 6.20983
Figure 2.8 Extract from a model output file. -
pif *
.
.
*PERIOD NO. 3*
*SOLUTION VECTOR*
l1 (obs1)5:10 (obs2)12:17 (obs3)21:28 (obs4)32:37 (obs5)41:45
& (obs6)50:55
.
.
+
pif *
.
.
*PERIOD NO. 3*
*SOLUTION VECTOR*
l1 (obs1)5:10 (obs2)12:17 (obs3)21:28 (obs4)32:37 (obs5)41:45
& (obs6)50:55
.
.
Figure 2.9 Extract from an instruction file. @@ -612,23 +626,23 @@ A secondary marker is a marker which does not occupy the first position of a PES Figure 2.10 shows an extract from a model output file while figure 2.11 shows the instructions necessary to read the potassium concentration from this output file. A primary marker is used to place the cursor on the line above that on which the calculated concentrations are recorded for the distance in which we are interested. Then the program which reads the file is directed to advance one line and read the number following the “K:” string in order to find an observation named “kc”; the exclamation marks surrounding “kc” will be discussed shortly. -
.
.
DISTANCE = 20.0: CATION CONCENTRATIONS:-
Na: 3.49868E-2 Mg: 5.987638E-2 K: 9.987362E-3
.
.
+
.
.
DISTANCE = 20.0: CATION CONCENTRATIONS:-
Na: 3.49868E-2 Mg: 5.987638E-2 K: 9.987362E-3
.
.
Figure 2.10 Extract from a model output file. -
pif ~
.
.
~DISTANCE = 20.0~
l1 ~K:~ !kc!
.
.
+
pif ~
.
.
~DISTANCE = 20.0~
l1 ~K:~ !kc!
.
.
Figure 2.11 Extract from an instruction file. A useful feature of secondary marker functionality is illustrated in figures 2.12 and 2.13 which represent a model output file extract and a corresponding instruction file extract, respectively. If a particular secondary marker is preceded only by other markers (including, perhaps, one or a number of secondary markers and certainly a primary marker), and the text string corresponding to that secondary marker is not found on a model output file line on which the previous markers’ strings have been located, a PEST++ program will assume that it has not yet found the correct model output line and resume its search for a line which holds the text pertaining to all three markers. Thus, the instruction “%TIME STEP 10%” will cause this program to pause on its downward journey through the model output file at the first line illustrated in figure 2.12. However, when it does not find the string “STRAIN” on the same line, it re-commences its perusal of the model output file, looking for the string “TIME STEP 10” again. Eventually it finds a line containing both the primary and secondary markers and, having done so, commences execution of the next instruction line. -
.
.
TIME STEP 10 (13 ITERATIONS REQUIRED) STRESS --->
X = 1.05 STRESS = 4.35678E+03
X = 1.10 STRESS = 4.39532E+03
.
.
TIME STEP 10 (BACK SUBSTITUTION) STRAIN --->
X = 1.05 STRAIN = 2.56785E-03
X = 1.10 STRAIN = 2.34564E-03
.
.
+
.
.
TIME STEP 10 (13 ITERATIONS REQUIRED) STRESS --->
X = 1.05 STRESS = 4.35678E+03
X = 1.10 STRESS = 4.39532E+03
.
.
TIME STEP 10 (BACK SUBSTITUTION) STRAIN --->
X = 1.05 STRAIN = 2.56785E-03
X = 1.10 STRAIN = 2.34564E-03
.
.
Figure 2.12 Extract from a model output file. It is important to note that if any instruction items other than markers precede an unmatched secondary marker, it will be assumed that the mismatch is an error condition; an appropriate error message will then be generated. Note also that secondary markers may be used sequentially. For example, if the STRAIN variable is always in position 2, then the pertinent line in the instruction file of figure 2.13 could be replaced by "l1 %=% %=% !str1!".  This is handy for comma-delimited output files. -
pif %
.
.
%TIME STEP 10% %STRAIN%
l1 %STRAIN =% !str1!
l1 %STRAIN =% !str2!
.
.
+
pif %
.
.
%TIME STEP 10% %STRAIN%
l1 %STRAIN =% !str1!
l1 %STRAIN =% !str2!
.
.
Figure 2.13 Extract from an instruction file. @@ -643,7 +657,7 @@ The following instruction line directs a PEST++ program to read the fourth numbe %MODEL OUTPUTS:% w w w !obs1! -The instruction line begins with a primary marker, allowing this program to locate the above line on the model output file. After this marker is processed, the cursor rests on the “:” character of “OUTPUTS:”, i.e., on the last character of the marker string. In response to the first whitespace instruction, a PEST++ program finds the next whitespace and then moves its cursor to the end of this whitespace, i.e., just before the “2” of the first number on the above model output file line. The second whitespace instruction moves the cursor to the blank character preceding the first “4” of the second number on the above line; processing of the third whitespace instruction results in the cursor being moved to the blank character just before the negative sign. After the fourth whitespace instruction is implemented, the cursor rests on the blank character preceding the last number; the latter can then be read as a non-fixed observation (see below). Note, unlike PEST, PEST++ treats the comma (“,”) as a whitespace-type character, given its very-common use. Additionally, PEST++ also supports an optional “++” style argument *additional_ins_delimiters* that users can employ to include additional characters to treat as delimiters (in addition to space, tab, and comma). Using these additional delimiters makes instruction file processing much faster than using repeating secondary markers, especially for reading large CSV-format files. +The instruction line begins with a primary marker, allowing this program to locate the above line on the model output file. After this marker is processed, the cursor rests on the “:” character of “OUTPUTS:”, i.e., on the last character of the marker string. In response to the first whitespace instruction, a PEST++ program finds the next whitespace and then moves its cursor to the end of this whitespace, i.e., just before the “2” of the first number on the above model output file line. The second whitespace instruction moves the cursor to the blank character preceding the first “4” of the second number on the above line; processing of the third whitespace instruction results in the cursor being moved to the blank character just before the negative sign. After the fourth whitespace instruction is implemented, the cursor rests on the blank character preceding the last number; the latter can then be read as a non-fixed observation (see below). Note, unlike PEST, PEST++ treats the comma (“,”) as a whitespace-type character, given its very-common use. Additionally, PEST++ also supports an optional “++” style argument *additional\_ins\_delimiters* that users can employ to include additional characters to treat as delimiters (in addition to space, tab, and comma). Using these additional delimiters makes instruction file processing much faster than using repeating secondary markers, especially for reading large CSV-format files. **Tab** The tab instruction places the cursor at a user-specified character position (i.e., column number) on the model output file line which is currently being processed. The instruction syntax is “tn” where *n* is the column number. The column number is obtained by counting character positions (including blank characters) from the left side of any line, starting at 1. Like the whitespace instruction, the tab instruction can be useful in navigating through a model output file line prior to locating and reading a non-fixed observation. For example, consider the following line from a model output file: @@ -663,7 +677,7 @@ Observations can be identified in one of three ways. The first way is to tell th Figure 2.14 shows how the numbers listed in the third solution vector of figure 2.8 can be read as fixed observations. The instruction item informing the PEST++ program how to read a fixed observation consists of two parts. The first part consists of the observation name enclosed in square brackets, while the second part consists of the first and last columns from which to read the observation. Note that no space must separate these two parts of the observation instruction; a space in an instruction file is always construed as marking the end of one instruction item and the beginning of another (unless the space lies between marker delimiters). -
pif *
.
.
*PERIOD NO. 3*
*SOLUTION VECTOR*
l1 [obs1]1:9 [obs2]10:18 [obs3]19:27 [obs4]28:36 [obs5]37:45
& [obs6]46:54
.
.
+
pif *
.
.
*PERIOD NO. 3*
*SOLUTION VECTOR*
l1 [obs1]1:9 [obs2]10:18 [obs3]19:27 [obs4]28:36 [obs5]37:45
& [obs6]46:54
.
.
Figure 2.14 Extract from an instruction file. @@ -671,7 +685,7 @@ Reading numbers as fixed observations is useful when the model writes its output Where a model writes its results as an array of numbers, it is not an uncommon occurrence for these numbers to abut each other. Consider, for example, the following FORTRAN code fragment. -
A=1236.567
B=8495.0
C=-900.0
WRITE(10,20) A,B,C
20 FORMAT(3(F8.3))
+
A=1236.567
B=8495.0
C=-900.0
WRITE(10,20) A,B,C
20 FORMAT(3(F8.3))
The result is 1236.5678495.000-900.000. In this case there is no choice but to read these numbers as fixed observations. (Both of the alternative ways to read an observation require that the observation be surrounded by either whitespace or a string that is invariant from model run to model run and can thus be used as a marker.) Hence to read the above three numbers as observations *A*, *B* and *C* the following instruction line may be used. @@ -701,11 +715,11 @@ When a PEST++ program encounters a non-fixed observation instruction it first se Consider the output file fragment shown in figure 2.15. The species populations at different times cannot be read as either fixed or semi-fixed observations because the numbers representing these populations cannot be guaranteed to fall within a certain range of column numbers on the model output file because “iterative adjustment” may be required in the calculation of any such population. Hence we must find our way to the number using another method; one such method is illustrated in figure 2.16. -
.
.
SPECIES POPULATION AFTER 1 YEAR = 1.23498E5
SPECIES POPULATION AFTER 2 YEARS = 1.58374E5
SPECIES POPULATION AFTER 3 YEARS (ITERATIVE ADJUSTMENT REQUIRED)= 1.78434E5
SPECIES POPULATION AFTER 4 YEARS = 2.34563E5
.
.
+
.
.
SPECIES POPULATION AFTER 1 YEAR = 1.23498E5
SPECIES POPULATION AFTER 2 YEARS = 1.58374E5
SPECIES POPULATION AFTER 3 YEARS (ITERATIVE ADJUSTMENT REQUIRED)= 1.78434E5
SPECIES POPULATION AFTER 4 YEARS = 2.34563E5
.
.
Figure 2.15 Extract from a model output file. -
pif *
.
.
*SPECIES* *=* !sp1!
l1 *=* !sp2!
l1 *=* !sp3!
l1 *=* !sp4!
.
.
+
pif *
.
.
*SPECIES* *=* !sp1!
l1 *=* !sp2!
l1 *=* !sp3!
l1 *=* !sp4!
.
.
Figure 2.16 Extract from an instruction file. @@ -717,37 +731,43 @@ Successful identification of a non-fixed observation depends on the instructions Consider the model output file line shown below as a further illustration of the use of non-fixed observations. -| 4.33 -20.3 23.392093 3.394382 | +| | |-------------------------------| +| 4.33 -20.3 23.392093 3.394382 | If we are interested in the fourth of these numbers but we are unsure as to whether the numbers preceding it might not be written with greater precision in some model runs (hence pushing the number in which we are interested to the right), then we have no alternative but to read the number as a non-fixed observation. However, if the previous numbers vary from model run to model run, we cannot use a secondary marker either; nor can a tab be used. Fortunately, whitespace comes to the rescue, with the following instruction line taking the program reading the file to the fourth number: -| l10 w w w !obs1! | +| | |------------------| +| l10 w w w !obs1! | Here it is assumed that, prior to reading this instruction, the processing cursor was located on the 10th preceding line of the model output file. As long as we can be sure that no whitespace will ever precede the first number, there will always be three incidences of whitespace preceding the number in which we are interested. However, if it happens that whitespace may precede the first number on some occasions, while on other occasions it may not, then we can read the first number as a dummy observation as shown below: -| l10 !dum! w w w !obs1! | +| | |------------------------| +| l10 !dum! w w w !obs1! | As was explained previously, the number on the model output file corresponding to an observation named “dum” is not actually used; nor can the name “dum” appear in the “observation data” section of a PEST control file. The use of this name is reserved for instances like the present case where a number must be read in order to facilitate navigation along a particular line of the model output file. The number is read according to the non-fixed observation protocol, for only observations of this type can be dummy observations. An alternative to the use of whitespace in locating the observation “obs1” in the above example could involve using the dummy observation more than once. Hence the instruction line below would also enable the number representing “obs1” to be located and read. -| l10 !dum! !dum! !dum! !obs1! | +| | |------------------------------| +| l10 !dum! !dum! !dum! !obs1! | If the numbers in the above example been separated by commas instead of whitespace, PEST++ would have read this line in the same way A number not surrounded by whitespace can still be read as a non-fixed observation with the proper choice of secondary markers. Consider the model output file line shown below. -| SOIL WATER CONTENT (NO CORRECTION)=21.345634% | +| | |-----------------------------------------------| +| SOIL WATER CONTENT (NO CORRECTION)=21.345634% | It may not be possible to read the soil water content as a fixed observation because the “(NO CORRECTION)” string may or may not be present after any particular model run. Reading it as a non-fixed observation appears troublesome as the number is neither preceded nor followed by whitespace. However, a suitable instruction line is -| l5 \*=\* !sws! \*%\* | +| | |----------------------| +| l5 \*=\* !sws! \*%\* | Notice how a secondary marker (i.e., \*%\*) is referenced even though it occurs after the observation we wish to read. If this marker were not present, a run-time error would occur when a PEST++ program tries to read the soil water content because it would define the observation string to include the “%” character and, naturally, would be unable to read a number from a string which includes non-numeric characters. However, by including the “%” character as a secondary marker after the number representing the observation “sws”, the program reading the file is instructed to separate the character from the string before trying to read the number. But note that if a post-observation secondary marker of this type begins with a numerical character, a PEST++ program will include this character with the number representing the value of the observation if there is no whitespace separating it from the observation. The observation will then be read incorrectly. @@ -762,20 +782,23 @@ As line lengths in PEST++ are unlimited, PEST++ does not support the line contin An instruction file can be built using a text editor. Alternatively, it can be written by software dedicated to this purpose such as the graphical user interface of a model which supports PEST and/or PEST++. Alternatively, it can be built by members of the PEST Groundwater and Surface Water Utility suites, both which are downloadable from the PEST web pages. Note also that the python module pyEMU contains several approaches to automate the construction of instruction files. -Caution must always be exercised in building an instruction set to read a model output file, especially if navigational instructions such as markers, whitespace, tabs and dummy observations are used. A PEST++ program which reads a model output file will always follow your instructions to the letter, but it may not read the number that you intend it to read if you get an instruction wrong. If this program tries to read an observation but does not find a number where it expects to find one, a run-time error will occur. The program will inform you of where it encountered the error, and of the instruction it was implementing when the error occurred; this should allow you to find the problem. However, if the program actually reads the wrong number from the model output file, this may only become apparent if an unusually high objective function results, or if the objective function cannot be lowered. Alternatively, if the number which the PEST++ program is instructed to read is a model prediction, or if this program is being asked purely to compute sensitivities of this number to model parameters, the error may never become apparent. If in doubt, check run record and/or other files that are written by the pertinent PEST++ program for numbers that it reads from model output files to ensure that they meet expectations. Note that by default, PEST++ programs will check the consistency between observations listed in the control file and the instruction file(s). This functionality can be disable with the *check_tplins* option. +Caution must always be exercised in building an instruction set to read a model output file, especially if navigational instructions such as markers, whitespace, tabs and dummy observations are used. A PEST++ program which reads a model output file will always follow your instructions to the letter, but it may not read the number that you intend it to read if you get an instruction wrong. If this program tries to read an observation but does not find a number where it expects to find one, a run-time error will occur. The program will inform you of where it encountered the error, and of the instruction it was implementing when the error occurred; this should allow you to find the problem. However, if the program actually reads the wrong number from the model output file, this may only become apparent if an unusually high objective function results, or if the objective function cannot be lowered. Alternatively, if the number which the PEST++ program is instructed to read is a model prediction, or if this program is being asked purely to compute sensitivities of this number to model parameters, the error may never become apparent. If in doubt, check run record and/or other files that are written by the pertinent PEST++ program for numbers that it reads from model output files to ensure that they meet expectations. Note that by default, PEST++ programs will check the consistency between observations listed in the control file and the instruction file(s). This functionality can be disable with the *check\_tplins* option. Included in the PEST suite are two programs which can be used to verify that instruction files have been built correctly. Program PESTCHEK reads all the instruction files cited in a PEST control file, ensuring that no syntax errors are present in any of these files. Program INSCHEK, on the other hand, checks a single instruction file for syntax errors. If an instruction file is error-free, INSCHEK can then use that instruction file to read a model output file, recording a list of observation values read from that file to another file. In this way you can be sure that your instruction set “works” before it is actually used by a program from the PEST++ suite. (Note that INSCHEK and PESTCHEK, like PEST, set a 20-character limit on the length of observation names.) #
3. Some Important PEST++ Features + ## 3.1 General + Before describing the PEST control file, certain features that programs of the PEST++ suite have in common are discussed. This will make the task of explaining the settings of PEST and PEST++ control variables somewhat easier. As for the previous and ensuing chapters of this manual, parts of the present chapter are taken from version 7 of the PEST manual. ## 3.2 Parameter Adjustment + ### 3.2.1 Parameter Transformation All of the programs of the PEST++ suite adjust values ascribed to a model’s parameters (and sometimes its decision variables, though they are given the umbrella name “parameters” in the present chapter). However, behind the scenes, these programs can be asked to adjust the logarithms of parameter values instead of the parameter values themselves. This adjustment strategy often renders an inversion or optimization process more numerically stable, and faster, than it would otherwise be. It also removes large inequalities in sensitivities between parameters which may be an artefact of the units employed for their representation. @@ -792,9 +815,9 @@ A parameter can be referenced in a template file yet take no part in the paramet Programs of the PEST++ suite allow one or more parameters to be tied (i.e., linked) to a “parent” parameter. A value is not estimated for a tied parameter. Instead, a value is estimated for the parent parameter; the ratio of tied parameter value to parent parameter value is then maintained through the inversion or optimization process. Thus, tied parameters “piggyback” on their parents. Note that a parameter cannot be tied to a parameter which is either fixed or tied to another parameter itself. Parameters are tied through setting PARTRANS to “tied”. As is described in the following chapter, parent parameters of tied parameters are specified in the second part of the “parameter data” section of the PEST control file. -Programs of the PEST++ suite allow groups of parameters to be adjusted as a single parameter through use of a PEST++-specific control variable named *tie_by_group()*. This has a similar effect to multiple PARTRANS settings of “tied”, but can be easier to implement in many contexts. +Programs of the PEST++ suite allow groups of parameters to be adjusted as a single parameter through use of a PEST++-specific control variable named *tie\_by\_group()*. This has a similar effect to multiple PARTRANS settings of “tied”, but can be easier to implement in many contexts. -In PEST, the bounds of tied parameters are not enforced. The same is true in PEST++ unless users activate tied parameter bounds enforcement via the *enforce_tied_bounds* option. This option should be used with caution because it can effectively limit the bounds range of the adjustable parameters–this has a number of implications across the various codes of the PEST++ suite. However, under some circumstances, it can be important to maintain all parameters within their bounds for stability reasons. +In PEST, the bounds of tied parameters are not enforced. The same is true in PEST++ unless users activate tied parameter bounds enforcement via the *enforce\_tied\_bounds* option. This option should be used with caution because it can effectively limit the bounds range of the adjustable parameters–this has a number of implications across the various codes of the PEST++ suite. However, under some circumstances, it can be important to maintain all parameters within their bounds for stability reasons. ### 3.2.3 Upper and Lower Parameter Bounds @@ -802,7 +825,7 @@ Upper and lower bounds, defining the maximum and minimum values that a parameter It is important that upper and lower parameter bounds be chosen wisely. For many models, parameters can lie only within certain well-defined domains determined by the theory on which the model is based. In such cases model-generated floating-point errors may result if a PEST++ program is not prevented from adjusting a parameter to a value outside its allowed domain. For example if, at some stage during a model run, the logarithm or square root of a particular parameter is taken, then that parameter must be prevented from ever becoming negative (or zero if the model takes the log of the parameter). If the reciprocal is taken of a parameter, that parameter must never be zero. -For some programs belonging to the PEST++ suite, parameter bounds have a significance beyond that of parameter range confinement. Bounds can be used to provide an estimate of prior parameter uncertainty. By default, this estimate is based on the assumption that the difference between a parameter’s upper and lower bound is equal to four standard deviations of its prior probability distribution. (This assumption can be altered using the PEST++ *par_sigma_range()* control variable, discussed in detail elsewhere in this document). If a parameter is log-transformed, the difference is calculated between the logarithms of its bounds, and the standard deviation is applied to the logarithm of parameter values. +For some programs belonging to the PEST++ suite, parameter bounds have a significance beyond that of parameter range confinement. Bounds can be used to provide an estimate of prior parameter uncertainty. By default, this estimate is based on the assumption that the difference between a parameter’s upper and lower bound is equal to four standard deviations of its prior probability distribution. (This assumption can be altered using the PEST++ *par\_sigma\_range()* control variable, discussed in detail elsewhere in this document). If a parameter is log-transformed, the difference is calculated between the logarithms of its bounds, and the standard deviation is applied to the logarithm of parameter values. ### 3.2.4 Scale and Offset @@ -836,6 +859,7 @@ Problems associated with imposing change limits on relative- or factor-limited p ## 3.3 Calculation of Derivatives + ### 3.3.1 General The ability to calculate partial derivatives of model outputs with respect to adjustable parameters and/or decision variables is fundamental to implementation of the inversion and linear uncertainty analysis methodologies implemented by PESTPP-GLM, and to management optimization under chance constraints implemented by PESTPP-OPT. Because programs of the PEST++ suite interact with a model non-intrusively, they must evaluate these derivatives themselves, using model outputs calculated on the basis of incrementally varied parameters and/or decision variables. @@ -862,9 +886,9 @@ Because of the importance of reliable finite-difference derivatives calculation, As stated above, variables which control derivatives calculation are assigned to parameter groups, rather than to parameters themselves. These variables reside in the “parameter groups” section of the PEST control file. -Three control variables, namely INCTYP, DERINC and DERINCLB are of primary relevance to the setting of parameter increments. INCTYP determines the type of increment to use, for which there are three options, namely “absolute”, “relative” and “rel_to_max”. If the increment type for a parameter group is “absolute”, the increment used for all parameters in the group is supplied as the input variable DERINC; this increment is added (and subtracted for central derivatives calculation) directly to the current value of a particular group member when calculating derivatives with respect to that parameter. However, if the increment type is “relative”, DERINC is multiplied by the current absolute value of a parameter in order to determine the increment for that parameter. In this way the parameter increment is adjusted upwards and downwards as the parameter itself is adjusted upwards and downwards; this may have the effect of maintaining significance in the difference between model outcomes calculated on the basis of the incrementally varied parameter. If the increment type for a group is “rel_to_max”, the increment for all members of that group is calculated as DERINC times the absolute value of the group member of currently greatest absolute value. This can be a useful means by which to calculate increments for parameters whose values can vary widely, including down to zero. The “relative” aspect of the “rel_to_max” option may maintain model outcome difference significance as described above; however, because the increment is calculated as a fraction of the maximum absolute value occurring within a group, rather than as a fraction of the value of each parameter, an individual parameter can attain near-zero values without its increment simultaneously dropping to zero. +Three control variables, namely INCTYP, DERINC and DERINCLB are of primary relevance to the setting of parameter increments. INCTYP determines the type of increment to use, for which there are three options, namely “absolute”, “relative” and “rel\_to\_max”. If the increment type for a parameter group is “absolute”, the increment used for all parameters in the group is supplied as the input variable DERINC; this increment is added (and subtracted for central derivatives calculation) directly to the current value of a particular group member when calculating derivatives with respect to that parameter. However, if the increment type is “relative”, DERINC is multiplied by the current absolute value of a parameter in order to determine the increment for that parameter. In this way the parameter increment is adjusted upwards and downwards as the parameter itself is adjusted upwards and downwards; this may have the effect of maintaining significance in the difference between model outcomes calculated on the basis of the incrementally varied parameter. If the increment type for a group is “rel\_to\_max”, the increment for all members of that group is calculated as DERINC times the absolute value of the group member of currently greatest absolute value. This can be a useful means by which to calculate increments for parameters whose values can vary widely, including down to zero. The “relative” aspect of the “rel\_to\_max” option may maintain model outcome difference significance as described above; however, because the increment is calculated as a fraction of the maximum absolute value occurring within a group, rather than as a fraction of the value of each parameter, an individual parameter can attain near-zero values without its increment simultaneously dropping to zero. -Further protection against the occurrence of near-zero increments for “relative” and “rel_to_max” increment types is provided through the variable DERINCLB. This variable provides a standby absolute increment which can be used in place of the “relative” or “rel_to_max” increment if the increment calculated for a particular parameter using either of these latter methods falls below the absolute increment value provided by DERINCLB. +Further protection against the occurrence of near-zero increments for “relative” and “rel\_to\_max” increment types is provided through the variable DERINCLB. This variable provides a standby absolute increment which can be used in place of the “relative” or “rel\_to\_max” increment if the increment calculated for a particular parameter using either of these latter methods falls below the absolute increment value provided by DERINCLB. If a parameter is log-transformed then it is wise that its increment be calculated using the “relative” method, though programs of the PEST++ suite do not insist on this. @@ -874,13 +898,13 @@ It will be recalled that programs of the PEST++ suite write parameter values to ### 3.3.4 Settings for Three-Point Derivatives -The FORCEN control variable, featured in the “parameter groups” section of the PEST control file, determines whether derivatives for the parameters belonging to a particular group are calculated using the forward-difference method, the central-difference method, or whether the method should change as the inversion process progresses. FORCEN can be designated as “always_2”, “always_3” or “switch”. If it is supplied as “always_2”, derivatives calculation is through forward differences for all parameters within the group for the duration of the inversion or optimization process; if it is set to “always_3”, central (i.e., three-point) derivatives are used for the entirety of the inversion or optimization process. However, if FORCEN is supplied as “switch”, a PEST++ program will commence the inversion process using forward differences for all members of the group, and switch to using central differences on the first occasion that the relative reduction in the objective function between iterations is less than the value of the variable PHIREDSWH (which resides in the “control data” section of the PEST control file). +The FORCEN control variable, featured in the “parameter groups” section of the PEST control file, determines whether derivatives for the parameters belonging to a particular group are calculated using the forward-difference method, the central-difference method, or whether the method should change as the inversion process progresses. FORCEN can be designated as “always\_2”, “always\_3” or “switch”. If it is supplied as “always\_2”, derivatives calculation is through forward differences for all parameters within the group for the duration of the inversion or optimization process; if it is set to “always\_3”, central (i.e., three-point) derivatives are used for the entirety of the inversion or optimization process. However, if FORCEN is supplied as “switch”, a PEST++ program will commence the inversion process using forward differences for all members of the group, and switch to using central differences on the first occasion that the relative reduction in the objective function between iterations is less than the value of the variable PHIREDSWH (which resides in the “control data” section of the PEST control file). -Two parameter group variables pertain specifically to the calculation of derivatives using higher order methods. These are DERINCMUL and DERMTHD. If FORCEN is set to “always_3” or “switch”, DERMTHD must be set to one of “outside_pts”, “parabolic” or “best_fit”; this determines the method of central derivatives calculation to be used by programs of the PEST++ suite. These three options are discussed above. +Two parameter group variables pertain specifically to the calculation of derivatives using higher order methods. These are DERINCMUL and DERMTHD. If FORCEN is set to “always\_3” or “switch”, DERMTHD must be set to one of “outside\_pts”, “parabolic” or “best\_fit”; this determines the method of central derivatives calculation to be used by programs of the PEST++ suite. These three options are discussed above. The variable DERINCMUL is the parameter increment multiplier; this is the value by which DERINC is multiplied when it is used to evaluate increments for any of the three central derivatives methods. Sometimes it is useful to employ larger increments for central derivatives calculation than for forward derivatives calculation, especially where the model output dependence on parameter values is “bumpy” (see the next section). However, if the increment is raised too high, derivative precision must ultimately fall. Note that through DERINCMUL you can also reduce the increment used for central derivatives calculation if you wish. -For increments calculated using the “relative” and “rel_to_max” methods, the variable DERINCLB has the same role in central derivatives calculation as it does in forward derivatives calculation, namely to place a lower limit on the absolute increment value. Note, however, that DERINCLB is not multiplied by DERINCMUL when central derivatives are calculated. +For increments calculated using the “relative” and “rel\_to\_max” methods, the variable DERINCLB has the same role in central derivatives calculation as it does in forward derivatives calculation, namely to place a lower limit on the absolute increment value. Note, however, that DERINCLB is not multiplied by DERINCMUL when central derivatives are calculated. ### 3.3.5 How to Obtain Derivatives You Can Trust @@ -902,6 +926,7 @@ If PESTPP-GLM or PESTPP-OPT does not perform as well as you think it should, the ## 3.4 The Jacobian Matrix File + Elements of a Jacobian matrix represent partial derivatives of model outputs with respect to model parameters. In the present context, “model outputs” are model-calculated numbers that are named in the “observation data” section of a PEST control file; these are read from model output files using instruction files. “Parameters” are those which are declared as adjustable in the “parameter data” section of a PEST control file. Each column of the Jacobian matrix contains partial derivatives of model outputs with respect to a particular parameter. Each row of a Jacobian matrix contains partial derivatives of a particular model output with respect to all adjustable parameters. If a parameter is denoted as log-transformed in a PEST control file, then partial derivatives contained in a Jacobian matrix file are with respect to the log of that parameter. @@ -916,7 +941,7 @@ Uses to which a JCO file may be put include the following. - Examination of local sensitivities of model outputs to parameters and/or decision variables. -- Giving PEST or PESTPP-GLM a “head start” in calibrating a model by providing it with a pre-calculated Jacobian matrix to use in its first iteration. PEST uses this matrix if started with the “/i” switch. For PESTPP-GLM this is achieved through use of the *base_jacobian()* control variable. +- Giving PEST or PESTPP-GLM a “head start” in calibrating a model by providing it with a pre-calculated Jacobian matrix to use in its first iteration. PEST uses this matrix if started with the “/i” switch. For PESTPP-GLM this is achieved through use of the *base\_jacobian()* control variable. - To support the many types of linear analysis implemented by utility programs supplied with PEST, and functions provided by pyEMU; these calculate @@ -932,9 +957,10 @@ Uses to which a JCO file may be put include the following. ## 3.5 The Objective Function + Most programs of the PEST and PEST++ suites minimize a least-squares objective function. This is calculated as the sum of squared weighted differences between measurements (or prior information equations) and corresponding model outputs. The difference between a measurement and a model output to which it corresponds is referred to as a residual. Let the *i*th residual be designated as *ri*. Let the weight associated with the *i*th observation (which may be a prior information equation) be designated as *wi*. Then the objective function Φ is calculated as -![](c6e8be34c728676210e0fa7bc01f3102b8843c1f.wmf) (3.3) +(3.3) Obviously, if an observation is ascribed a weight of zero, then the residual associated with that observation makes no contribution to the total objective function. An observation can therefore be removed from an inversion process by assigning it a weight of zero. This provides a far easier mechanism for removal of an observation from the calibration dataset than that of re-building the PEST control file (and the pertinent instruction file) with this observation absent. @@ -950,24 +976,28 @@ Unlike PEST, programs of the PEST++ suite do not (at the time of writing) suppor # 4. The PEST Control File + ## 4.1 General + Programs of the PEST++ suite, like PEST itself, require three types of input file. Two of these were discussed in chapter 2 of this manual, namely template and instruction files. On any occasion that a PEST++ program is run, as many of each of these must be provided as there are model input files in which parameters and/or decision variables reside, and model output files from which numbers must be read, respectively. However, there is only one PEST control file. This chapter describes the PEST control file. However, it will not be discussed in the same detail as in part I of the PEST manual. This is because programs of the PEST++ suite do not read all of the variables that reside in this file. Furthermore, some entire sections of a PEST control file have no relevance for programs of the PEST++ suite because they pertain to functionality which the PEST++ suite does not offer. Programs of the PEST++ suite do require their own specific control variables, however. As will be discussed later in this chapter, these can be provided anywhere in a PEST control file following a line that begins with the “++” character string. Conversely, lines that begin with the “++” string are ignored by programs of the PEST suite. With the exception of section headers and lines that being with the “++” string, each line of a PEST control file is comprised of the values of variables listed in sequence. These values may be integers, real numbers or text strings, the latter often providing the names of files. Each such item must be separated from its neighbour by whitespace, i.e., spaces or tabs. Multiple consecutive spaces or tabs are permitted. To avoid confusion, a text variable which contains a space must be enclosed in quotes. The variable to which each number or text string pertains is identified by the line on which it resides, and by its position on that line. -The first line of a PEST control file must begin with the string “pcf”; this stands for “PEST control file”. No other variables must appear on this first line. Versions of PEST from 15 onwards, and versions of PEST++ from 4 onwards, allow blank lines to be interspersed with data lines in a PEST control file. They also allow for the presence of comments. Comments are preceded by the “” character. Any text to the right of that character is treated as a comment, and hence ignored by a PEST++ program (including the entirety of a line if the “” character leads the line). However, unless the “” character is the first character on a line, it must be preceded by a space; also, it must not be part of a string that is enclosed in quotes. These precautions prevent the occurrence of the “” character within a filename from being misinterpreted as introducing a comment. +The first line of a PEST control file must begin with the string “pcf”; this stands for “PEST control file”. No other variables must appear on this first line. Versions of PEST from 15 onwards, and versions of PEST++ from 4 onwards, allow blank lines to be interspersed with data lines in a PEST control file. They also allow for the presence of comments. Comments are preceded by the “\” character. Any text to the right of that character is treated as a comment, and hence ignored by a PEST++ program (including the entirety of a line if the “\” character leads the line). However, unless the “\” character is the first character on a line, it must be preceded by a space; also, it must not be part of a string that is enclosed in quotes. These precautions prevent the occurrence of the “\” character within a filename from being misinterpreted as introducing a comment. ## 4.2 Naming Conventions + A PEST control file must have an extension of *.pst*. Suppose that its filename base is *case*. Then the PEST control file must be named *case.pst*. As they run, programs of the PEST++ suite produce many files. The number and type of files that are written depend on the program. All of these files have the same filename base as that of the PEST control file on which their run is based (*case* in the present example). ## 4.3 Sections + The PEST control file is subdivided into sections. Each section begins with a header. A header is easily recognized because it begins with the “\*” character followed by a space. Table 4.1 shows sections that can appear in a PEST control file. For PEST, some of these sections are optional and can be omitted. Those that are present must be provided in the order shown in this table. Programs of the PEST++ suite ignore many of these sections. | Section | Status for PEST programs | Status for PEST++ programs | Contents | @@ -997,24 +1027,26 @@ Note that in table 4.1, separate sections are required for model input files and ## 4.4 Control Variables + Figure 4.1 names variables which appear in a PEST control file. They are listed according to the position that they occupy in the file. This figure omits sections of the PEST control file that programs of the PEST++ suite ignore. It also omits control variables which are optional for PEST but are ignored by PEST++ programs. It does include some variables, however, which must be present within a PEST control file even though their values are not used by programs of the PEST++ suite. The presence of these variables defines line numbers and/or the locations of variables that follow them on a certain line. A control file provided to a PEST++ suite program should include all of these variables. In figure 4.1, variables whose values are actually used by a program of the PEST++ suite to control its operations are shaded; these are described below. Of these variables, those which are optional are enclosed in square brackets. The roles of all other variables which appear in figure 4.1 are discussed in PEST documentation. Nevertheless, a number of these variables are discussed herein as PEST-acceptable values for these variables will preclude objections being raised by the PESTCHEK checking utility (see below). It is also important to note that not all programs of the PEST++ suite use all of the variables that are shaded in figure 4.1. Furthermore, any particular PEST++ program may use a particular PEST control variable under certain circumstances. -
pcf
* control data
RSTFLE PESTMODE
NPAR NOBS NPARGP NPRIOR NOBSGP
NTPLFLE NINSFLE PRECIS DPOINT [NUMCOM]
RLAMBDA1 RLAMFAC PHIRATSUF PHIREDLAM NUMLAM
RELPARMAX FACPARMAX FACORIG
PHIREDSWH
NOPTMAX PHIREDSTP NPHISTP NPHINORED RELPARSTP NRELPAR
ICOV ICOR IEIG
* singular value decomposition
SVDMODE
MAXSING EIGTHRESH
EIGWRITE
* parameter groups
PARGPNME INCTYP DERINC DERINCLB FORCEN DERINCMUL DERMTHD
(one such line for each parameter group)
* parameter data
PARNME PARTRANS PARCHGLIM PARVAL1 PARLBND PARUBND PARGP SCALE OFFSET DERCOM
(one such line for each parameter)
PARNME PARTIED
(one such line for each tied parameter)
* observation groups
OBGNME
(one such line for each observation group)
* observation data
OBSNME OBSVAL WEIGHT OBGNME
(one such line for each observation)
* model command line
COMLINE
(one such line for each model command line)
* model input
TEMPFLE INFLE
(one such line for each template file)
* model output
INSFLE OUTFLE
(one such line for each instruction file)
* prior information
PILBL PIFAC * PARNME + PIFAC * log(PARNME) ... = PIVAL WEIGHT OBGNME
(one such line for each article of prior information)
* regularization
PHIMLIM PHIMACCEPT [FRACPHIM]
WFINIT WFMIN WFMAX
WFFAC WFTOL [IREGADJ]
+
pcf
* control data
RSTFLE PESTMODE
NPAR NOBS NPARGP NPRIOR NOBSGP
NTPLFLE NINSFLE PRECIS DPOINT [NUMCOM]
RLAMBDA1 RLAMFAC PHIRATSUF PHIREDLAM NUMLAM
RELPARMAX FACPARMAX FACORIG
PHIREDSWH
NOPTMAX PHIREDSTP NPHISTP NPHINORED RELPARSTP NRELPAR
ICOV ICOR IEIG
* singular value decomposition
SVDMODE
MAXSING EIGTHRESH
EIGWRITE
* parameter groups
PARGPNME INCTYP DERINC DERINCLB FORCEN DERINCMUL DERMTHD
(one such line for each parameter group)
* parameter data
PARNME PARTRANS PARCHGLIM PARVAL1 PARLBND PARUBND PARGP SCALE OFFSET DERCOM
(one such line for each parameter)
PARNME PARTIED
(one such line for each tied parameter)
* observation groups
OBGNME
(one such line for each observation group)
* observation data
OBSNME OBSVAL WEIGHT OBGNME
(one such line for each observation)
* model command line
COMLINE
(one such line for each model command line)
* model input
TEMPFLE INFLE
(one such line for each template file)
* model output
INSFLE OUTFLE
(one such line for each instruction file)
* prior information
PILBL PIFAC * PARNME + PIFAC * log(PARNME) ... = PIVAL WEIGHT OBGNME
(one such line for each article of prior information)
* regularization
PHIMLIM PHIMACCEPT [FRACPHIM]
WFINIT WFMIN WFMAX
WFFAC WFTOL [IREGADJ]
Figure 4.1 Variables comprising a minimalist PEST control file. Figure 4.2 provides an example of a simple PEST control file. -
pcf
* control data
restart regularization
5 19 2 2 3
2 3 single point
10.0 -3.0 0.3 0.03 10
10.0 10.0 0.001
0.1
50 0.005 4 4 0.005 4
1 1 1
* parameter groups
ro relative 0.01 0.0 switch 2.0 parabolic
h relative 0.01 0.0 switch 2.0 parabolic
* parameter data
ro1 fixed factor 0.5 .1 10 ro 1.0 0.0
ro2 log factor 5.0 .1 10 ro 1.0 0.0
ro3 tied factor 0.5 .1 10 ro 1.0 0.0
h1 none factor 2.0 .05 100 h 1.0 0.0
h2 log factor 5.0 .05 100 h 1.0 0.0
ro3 ro2
* observation groups
obsgp1
obsgp2
prgp1
* observation data
ar1 1.21038 1.0 obsgp1
ar2 1.51208 1.0 obsgp1
ar3 2.07204 1.0 obsgp1
ar4 2.94056 1.0 obsgp1
ar5 4.15787 1.0 obsgp1
ar6 5.7762 1.0 obsgp1
ar7 7.7894 1.0 obsgp1
ar8 9.99743 1.0 obsgp1
ar9 11.8307 1.0 obsgp2
ar10 12.3194 1.0 obsgp2
ar11 10.6003 1.0 obsgp2
ar12 7.00419 1.0 obsgp2
ar13 3.44391 1.0 obsgp2
ar14 1.58279 1.0 obsgp2
ar15 1.1038 1.0 obsgp2
ar16 1.03086 1.0 obsgp2
ar17 1.01318 1.0 obsgp2
ar18 1.00593 0.0 obsgp2
ar19 1.00272 0.0 obsgp2
* model command line
model.bat
* model inputoutput
ves1.tpl a_model.in1
ves2.tpl a_model.in2
* model output
ves1.ins a_model.ot1
ves2.ins a_model.ot2
ves3.ins a_model.ot3
* prior information
pi1 1.0 * h1 = 1.0 3.0 prgp1
pi2 1.0 * log(ro2) + 1.0 * log(h2) = 2.6026 2.0 prgp1
* regularization
125.0 130.0 0.1000000
1.0 1.0e-10 1.0e10
1.3 1.0e-2 1
+
pcf
* control data
restart regularization
5 19 2 2 3
2 3 single point
10.0 -3.0 0.3 0.03 10
10.0 10.0 0.001
0.1
50 0.005 4 4 0.005 4
1 1 1
* parameter groups
ro relative 0.01 0.0 switch 2.0 parabolic
h relative 0.01 0.0 switch 2.0 parabolic
* parameter data
ro1 fixed factor 0.5 .1 10 ro 1.0 0.0
ro2 log factor 5.0 .1 10 ro 1.0 0.0
ro3 tied factor 0.5 .1 10 ro 1.0 0.0
h1 none factor 2.0 .05 100 h 1.0 0.0
h2 log factor 5.0 .05 100 h 1.0 0.0
ro3 ro2
* observation groups
obsgp1
obsgp2
prgp1
* observation data
ar1 1.21038 1.0 obsgp1
ar2 1.51208 1.0 obsgp1
ar3 2.07204 1.0 obsgp1
ar4 2.94056 1.0 obsgp1
ar5 4.15787 1.0 obsgp1
ar6 5.7762 1.0 obsgp1
ar7 7.7894 1.0 obsgp1
ar8 9.99743 1.0 obsgp1
ar9 11.8307 1.0 obsgp2
ar10 12.3194 1.0 obsgp2
ar11 10.6003 1.0 obsgp2
ar12 7.00419 1.0 obsgp2
ar13 3.44391 1.0 obsgp2
ar14 1.58279 1.0 obsgp2
ar15 1.1038 1.0 obsgp2
ar16 1.03086 1.0 obsgp2
ar17 1.01318 1.0 obsgp2
ar18 1.00593 0.0 obsgp2
ar19 1.00272 0.0 obsgp2
* model command line
model.bat
* model inputoutput
ves1.tpl a_model.in1
ves2.tpl a_model.in2
* model output
ves1.ins a_model.ot1
ves2.ins a_model.ot2
ves3.ins a_model.ot3
* prior information
pi1 1.0 * h1 = 1.0 3.0 prgp1
pi2 1.0 * log(ro2) + 1.0 * log(h2) = 2.6026 2.0 prgp1
* regularization
125.0 130.0 0.1000000
1.0 1.0e-10 1.0e10
1.3 1.0e-2 1
Figure 4.2 Example of a PEST control file. ##
4.5 The PESTCHEK Utility + PESTCHEK is a utility program that is supplied with the PEST suite. It reads an entire PEST input dataset, this being comprised of a PEST control file and all template and instruction files cited therein. It checks all of these files for correctness and consistency. In doing so, it performs all of the tasks performed by the TEMPCHEK and INSCHEK utilities that were mentioned in chapter 2 of this manual. However, it goes further than this. It ensures that any parameter that is cited in a template file is also cited in a PEST control file (and vice versa), and that any observation that is cited in an instruction file is also cited in the PEST control file (and vice versa). The benefits of such checking are obvious. PESTCHEK also subjects a PEST control file to thorough error and consistency checking. For example, it informs a user if a parameter is not between its upper and lower bounds, or if a parameter which is denoted as being log-transformed is endowed with a zero or negative initial value. It checks that the number of tied-parent parameter relationships that are provided in the second half of the “parameter data” section of a PEST control file are in accordance with the number of parameters that are denoted as being tied in the first half of this section. Consistency checks are made between equations comprising the “prior information” section of a PEST control file and the transformation status of parameters in the “parameter data” section of the PEST control file. @@ -1027,15 +1059,16 @@ PESTCHEK ignores lines in a PEST control file that begin with the “++” strin Variables appearing in a PEST control file which are used by members of the PEST++ suite are now described. At the same time, sensible, PESTCHEK-safe placeholder values are provided for all variables whose presence is required in a PEST control file, but which are not actually used by members of the PEST++ suite. The interested reader is referred to part I of the PEST manual for further details. -Note that all of the PEST++ tools will check that the parameters and observations between the control file and template/instruction files are aligned. This checking can be disabled by setting *CHECK_TPLINS* to false. +Note that all of the PEST++ tools will check that the parameters and observations between the control file and template/instruction files are aligned. This checking can be disabled by setting *CHECK\_TPLINS* to false. ## 4.6 Control Data Section + ### 4.6.1 General Variables appearing in the “control data” section of a minimalist PEST control file are shown in figure 4.3 (which is reproduced from figure 4.1). -
* control data
RSTFLE PESTMODE
NPAR NOBS NPARGP NPRIOR NOBSGP
NTPLFLE NINSFLE PRECIS DPOINT [NUMCOM]
RLAMBDA1 RLAMFAC PHIRATSUF PHIREDLAM NUMLAM
RELPARMAX FACPARMAX FACORIG
PHIREDSWH
NOPTMAX PHIREDSTP NPHISTP NPHINORED RELPARSTP NRELPAR
ICOV ICOR IEIG
+
* control data
RSTFLE PESTMODE
NPAR NOBS NPARGP NPRIOR NOBSGP
NTPLFLE NINSFLE PRECIS DPOINT [NUMCOM]
RLAMBDA1 RLAMFAC PHIRATSUF PHIREDLAM NUMLAM
RELPARMAX FACPARMAX FACORIG
PHIREDSWH
NOPTMAX PHIREDSTP NPHISTP NPHINORED RELPARSTP NRELPAR
ICOV ICOR IEIG
Figure 4.3 Variables appearing in the “control data” section of a PEST control file. @@ -1105,6 +1138,7 @@ The integer ICOV, ICOR and IEIG variables recorded on the 9th line of ##
4.7 Singular Value Decomposition Section + Singular value decomposition (i.e., SVD) as a solution device, and as a mechanism for ensuring numerical stability in solution of an ill-posed inverse problem, is discussed at length by Doherty (2015). PESTPP-GLM always uses singular value decomposition, or a closely related numerical device, to solve an inverse problem. It uses default settings to govern the operation of this process. However, these can be over-ridden by a user if he/she includes a “singular value decomposition” section in a PEST control file. @@ -1113,12 +1147,13 @@ If a PEST control file includes a “singular value decomposition” section, th Unless you wish to over-ride internal PESTPP-GLM settings for implementation of singular value decomposition, it is best to omit the “singular value decomposition” section from the PEST control file. If you do decide to include it, PESTCHEK-friendly values for control variables are those shown in figure 4.2. However, set MAXSING to a number equal to, or greater than, the number of parameters featured in the PEST control file unless you specifically wish to reduce the dimensionality of the inverse problem solution space. -An issue that sometimes causes confusion is the different roles played by the PEST MAXSING and EIGTHRESH variables on the one hand, and the PEST++ *max_n\_super()* and *super_eigthresh()* control variables on the other hand. As is described later in this manual, the latter two variables are used to determine how many super parameters are used in SVD-assisted inversion. MAXSING and EIGTHRESH, on the other hand, control the operation of the singular value decomposition solution process, regardless of whether this is being used to estimate base parameters or super parameters. +An issue that sometimes causes confusion is the different roles played by the PEST MAXSING and EIGTHRESH variables on the one hand, and the PEST++ *max\_n\_super()* and *super\_eigthresh()* control variables on the other hand. As is described later in this manual, the latter two variables are used to determine how many super parameters are used in SVD-assisted inversion. MAXSING and EIGTHRESH, on the other hand, control the operation of the singular value decomposition solution process, regardless of whether this is being used to estimate base parameters or super parameters. Note that PESTPP-GLM, PESTPP-IES, and PESTPP-DA support only SVD-based inversion and contain internal default values for the SVD truncation arguments. If this section exists, then MAXSING and EIGTHRESH override internal defaults, even if SVDMODE is set to 0. ## 4.8 Parameter Groups Section + ### 4.8.1 General Every parameter must belong to a parameter group; the group to which each parameter belongs is denoted by the parameter-specific PARGP variable supplied in the “parameter data” section of the PEST control file. @@ -1127,13 +1162,13 @@ Each parameter group must possess a unique name. For PEST this name must be 12 c Notwithstanding the role of the PEST ADDREG1 utility, the primary purpose of parameter groups is to provide a basis for assignment of variables which govern calculation of finite-difference derivatives. These variables are assigned to parameter groups, rather than to individual parameters, because the number of the latter may be very large in many inversion contexts. At the time of writing, programs of the PEST++ suite which calculate derivatives using finite parameter differences are PESTPP-GLM (unless it us implementing global optimization) and PESTPP‑OPT. Note that PESTPP-OPT calculates derivatives with respect to both model parameters and decision variables; the word “parameter” is used to refer to both of these in the discussion that follows. -Tied and fixed parameters must also be assigned to groups; however, as derivatives are not calculated with respect to these parameters, the groups to which they belong are of no significance (except in calculating the derivative increment for adjustable members belonging to the same group if the increment type is “rel_to_max”). +Tied and fixed parameters must also be assigned to groups; however, as derivatives are not calculated with respect to these parameters, the groups to which they belong are of no significance (except in calculating the derivative increment for adjustable members belonging to the same group if the increment type is “rel\_to\_max”). The many options available for finite-difference derivatives calculation are discussed in section 3.3 of this manual, as are the variables which control these options. That discussion is not repeated here; only a short description of the role of each variable is presented below. Minimalist specifications (i.e., specifications which pertain only to functionality offered by members of the PEST++ suite) of the “parameter groups” section of a PEST control file are provided in figure 4.4. -
* parameter groups
PARGPNME INCTYP DERINC DERINCLB FORCEN DERINCMUL DERMTHD
(one such line for each parameter group)
+
* parameter groups
PARGPNME INCTYP DERINC DERINCLB FORCEN DERINCMUL DERMTHD
(one such line for each parameter group)
Figure 4.4 Minimalist specifications for the “parameter groups” section of a PEST control file. @@ -1145,16 +1180,16 @@ PARGPNME is the parameter group name. This must be a maximum of 200 characters i If a group is featured in the “parameter groups” section of a PEST control file it is not essential that any parameters belong to that group. However, if, in the “parameter data” section of a PEST control file, a parameter is declared as belonging to a group that is not featured in the “parameter groups” section of the same PEST control file, an error condition will arise. **INCTYP** -INCTYP is a character variable which can assume the values “relative”, “absolute” or “rel_to_max”. If it is “relative”, the increment used for forward-difference calculation of derivatives with respect to any parameter belonging to the group is calculated as a fraction of the current value of that parameter; that fraction is provided as the real variable DERINC. However, if INCTYP is “absolute” the parameter increment for parameters belonging to the group is fixed, being again provided as the variable DERINC. Alternatively, if INCTYP is “rel_to_max”, the increment for any group member is calculated as a fraction of the group member with highest absolute value, that fraction again being DERINC. +INCTYP is a character variable which can assume the values “relative”, “absolute” or “rel\_to\_max”. If it is “relative”, the increment used for forward-difference calculation of derivatives with respect to any parameter belonging to the group is calculated as a fraction of the current value of that parameter; that fraction is provided as the real variable DERINC. However, if INCTYP is “absolute” the parameter increment for parameters belonging to the group is fixed, being again provided as the variable DERINC. Alternatively, if INCTYP is “rel\_to\_max”, the increment for any group member is calculated as a fraction of the group member with highest absolute value, that fraction again being DERINC. -Thus, for example, if INCTYP is “relative” and DERINC is 0.01 (a suitable value in many cases), the parameter increment for each group member is calculated as 0.01 times the current value of that parameter; this applies in all iterations of the inversion or optimization process. However, if INCTYP is “absolute” and DERINC is 0.01, the parameter increment is the same for all members of the group over all iterations, this being 0.01. If INCTYP is “rel_to_max” and DERINC is again 0.01, the parameter increment for all group members is the same for any one iteration, this being 0.01 times the absolute value of the group member of highest current magnitude; however, the increment may vary from iteration to iteration. +Thus, for example, if INCTYP is “relative” and DERINC is 0.01 (a suitable value in many cases), the parameter increment for each group member is calculated as 0.01 times the current value of that parameter; this applies in all iterations of the inversion or optimization process. However, if INCTYP is “absolute” and DERINC is 0.01, the parameter increment is the same for all members of the group over all iterations, this being 0.01. If INCTYP is “rel\_to\_max” and DERINC is again 0.01, the parameter increment for all group members is the same for any one iteration, this being 0.01 times the absolute value of the group member of highest current magnitude; however, the increment may vary from iteration to iteration. -If a group contains members which are fixed and/or tied, it is important to note that the values of these parameters are taken into account when calculating parameter increments using the “rel_to_max” option. +If a group contains members which are fixed and/or tied, it is important to note that the values of these parameters are taken into account when calculating parameter increments using the “rel\_to\_max” option. -For the “relative” and “rel_to_max” options, a DERINC value of between 0.01 and 0.02 is often appropriate. However, no suggestion for an appropriate DERINC value can be provided for the “absolute” increment option; the most appropriate increment will depend on parameter magnitudes and types. +For the “relative” and “rel\_to\_max” options, a DERINC value of between 0.01 and 0.02 is often appropriate. However, no suggestion for an appropriate DERINC value can be provided for the “absolute” increment option; the most appropriate increment will depend on parameter magnitudes and types. **DERINCLB** -If a parameter increment is calculated as “relative” or “rel_to_max”, it is possible that it may become too low if the parameter’s value becomes very small or, in the case of the “rel_to_max” option, if the magnitude of the largest parameter in the group becomes very small. A parameter increment becomes “too low” if it does not allow reliable derivatives to be calculated with respect to that parameter because of round-off errors incurred in the subtraction of nearly equal model outputs. +If a parameter increment is calculated as “relative” or “rel\_to\_max”, it is possible that it may become too low if the parameter’s value becomes very small or, in the case of the “rel\_to\_max” option, if the magnitude of the largest parameter in the group becomes very small. A parameter increment becomes “too low” if it does not allow reliable derivatives to be calculated with respect to that parameter because of round-off errors incurred in the subtraction of nearly equal model outputs. To circumvent this possibility, an absolute lower bound can be placed on parameter increments; this lower bound is the same for all group members, and is provided as the value of the DERINCLB control variable. Thus, if a parameter value is currently 1000.0 and it belongs to a group for which INCTYP is “relative”, DERINC is 0.01, and DERINCLB is 15.0, the parameter increment will be 15.0 instead of 10.0 calculated on the basis of DERINC alone. If you do not wish to place a lower bound on parameter increments in this fashion, you should provide DERINCLB with a value of 0.0. @@ -1163,19 +1198,19 @@ Note that if INCTYP is “absolute”, DERINCLB is ignored. **FORCEN** The character variable FORCEN (an abbreviation of “forward/central”) determines whether derivatives for group members are calculated using forward differences, or using one of the variants of the central difference method. -If FORCEN for a particular group is “always_2”, derivatives for all parameters belonging to that group will always be calculated using the forward difference method; filling of the columns of the Jacobian matrix corresponding to members of the group will require as many model runs as there are adjustable parameters in the group. If FORCEN is provided as “always_3”, the filling of these same columns will require twice as many model runs as there are parameters within the group. However, the derivatives will be calculated with greater numerical precision. +If FORCEN for a particular group is “always\_2”, derivatives for all parameters belonging to that group will always be calculated using the forward difference method; filling of the columns of the Jacobian matrix corresponding to members of the group will require as many model runs as there are adjustable parameters in the group. If FORCEN is provided as “always\_3”, the filling of these same columns will require twice as many model runs as there are parameters within the group. However, the derivatives will be calculated with greater numerical precision. If FORCEN is set to “switch”, derivatives calculation for all adjustable group members begins using the forward difference method. However, it switches to the central difference method for the remainder of the inversion process on the iteration after which the relative objective function reduction between successive iterations is less than PHIREDSWH. A value for PHIREDSWH is supplied in the “control data” section of the PEST control file. Experience has shown that in most instances, the most appropriate value for FORCEN is “switch”. This allows speed to take precedence over accuracy in the early stages of an inversion or optimization process when accuracy is not critical to objective function improvement, and accuracy to take precedence over speed later in the process when achievement of an objective function improvement requires that derivatives be calculated with as much precision as possible. **DERINCMUL** -If derivatives are calculated using one of the three-point methods, the parameter increment is added to the current parameter value prior to a model run, and then subtracted prior to another model run. In some cases it may be desirable to increase the value of the increment when calculating derivatives using a three-point method. The real variable DERINCMUL allows you to do this. If three-point derivatives calculation is employed, the value of DERINC is multiplied by DERINCMUL; this applies whether DERINC holds the increment factor, as it does for the “relative” or “rel_to_max” increment types, or holds the parameter increment itself, as it does for the “absolute” increment type. +If derivatives are calculated using one of the three-point methods, the parameter increment is added to the current parameter value prior to a model run, and then subtracted prior to another model run. In some cases it may be desirable to increase the value of the increment when calculating derivatives using a three-point method. The real variable DERINCMUL allows you to do this. If three-point derivatives calculation is employed, the value of DERINC is multiplied by DERINCMUL; this applies whether DERINC holds the increment factor, as it does for the “relative” or “rel\_to\_max” increment types, or holds the parameter increment itself, as it does for the “absolute” increment type. A DERINCMUL value of between 1.0 and 2.0 is usually satisfactory. **DERMTHD** -As is described in section 3.3 of this manual, the three-point method of derivatives calculation implemented by programs of the PEST++ suite has three variants. If FORCEN for a particular parameter group is set to “always_3” or “switch”, the program must be informed of which alternative to employ. This is accomplished through the character variable DERMTHD, which must be supplied as “parabolic”, “best_fit” or “outside_pts”. If FORCEN is set to “always_2” for a particular group, you must still provide one of these three legal values for DERMTHD; however, for such a parameter group, the value of DERMTHD has no bearing on derivatives calculation for its member parameters. +As is described in section 3.3 of this manual, the three-point method of derivatives calculation implemented by programs of the PEST++ suite has three variants. If FORCEN for a particular parameter group is set to “always\_3” or “switch”, the program must be informed of which alternative to employ. This is accomplished through the character variable DERMTHD, which must be supplied as “parabolic”, “best\_fit” or “outside\_pts”. If FORCEN is set to “always\_2” for a particular group, you must still provide one of these three legal values for DERMTHD; however, for such a parameter group, the value of DERMTHD has no bearing on derivatives calculation for its member parameters. On most occasions, FORCEN should be set to “switch” while DERMTHD should be set to “parabolic”. @@ -1184,6 +1219,7 @@ Programs of the PEST++ suite which do not calculate finite difference derivative ##
4.9 Parameter Data Section + ### 4.9.1 General It is the task of programs of the PEST++ suite to adjust parameters or decision variables in order to achieve some goal. This goal varies from program to program. They include @@ -1204,7 +1240,7 @@ The “parameter data” section of a PEST control file is divided into two part Each item of parameter data is now discussed in detail. Specifications of the “parameter data” section of a PEST control file are provided in figure 4.5. -
* parameter data
PARNME PARTRANS PARCHGLIM PARVAL1 PARLBND PARUBND PARGP SCALE OFFSET DERCOM
(one such line for each parameter)
PARNME PARTIED
(one such line for each tied parameter)
+
* parameter data
PARNME PARTRANS PARCHGLIM PARVAL1 PARLBND PARUBND PARGP SCALE OFFSET DERCOM
(one such line for each parameter)
PARNME PARTIED
(one such line for each tied parameter)
Figure 4.5 Specifications of the “parameter data” section of a PEST control file. @@ -1226,14 +1262,14 @@ If a parameter is fixed, it takes no part in an inversion, uncertainty analysis If a parameter is neither fixed nor tied, and is not log-transformed, the parameter transformation variable PARTRANS must be supplied as “none”. -If a particular inversion, uncertainty analysis or optimization problem would benefit from a more complex parameter transformation type than logarithmic, and/or if more complex relationships between parameters than ratio maintenance are required, these can be accomplished through use of the parameter pre-processor PAR2PAR supplied with the PEST suite; see part II of the PEST manual for details. It can also be accommodated using “secondary parameter” functionality available through PEST_HP; see documentation of PEST_HP for details. +If a particular inversion, uncertainty analysis or optimization problem would benefit from a more complex parameter transformation type than logarithmic, and/or if more complex relationships between parameters than ratio maintenance are required, these can be accomplished through use of the parameter pre-processor PAR2PAR supplied with the PEST suite; see part II of the PEST manual for details. It can also be accommodated using “secondary parameter” functionality available through PEST\_HP; see documentation of PEST\_HP for details. **PARCHGLIM** This character variable is used to designate whether an adjustable parameter is relative-limited, factor-limited or absolute-limited; see section 3.2.5 of this manual, and the discussion of the RELPARMAX and FACPARMAX control variables. PARCHGLIM must be provided with a value of “relative” or “factor”. The former designates that alterations to a parameter’s value are factor-limited whereas the latter designates that alterations to its value are relative-limited. The following aspects of change limit specifications should be noted. -- The only members of the PEST++ suite which imposes limits on parameter changes in this way are PESTPP-GLM and (optionally) PESTPP-IES (through the *ies_enforce_chglim* option). +- The only members of the PEST++ suite which imposes limits on parameter changes in this way are PESTPP-GLM and (optionally) PESTPP-IES (through the *ies\_enforce\_chglim* option). - If a parameter is tied or fixed, its change limit is ignored. @@ -1276,13 +1312,14 @@ Each line within the second part of the “parameter data” section of a PEST c Note that multiple tied parameters can be linked to a single parent parameter. However, a tied parameter can, naturally, be linked to only one parent parameter. -Programs of the PEST++ suite support a protocol for tying parameters together that may be convenient in some contexts. This is activated using the *tie_by_groups()* control variable. Denoting this variable as *true* effectively ties all parameters in a group to a single member of that group so that initial, intra-group parameter ratios are maintained as parameters are adjusted. However, it is important to note that this action does not affect any parameters that a user designates as tied to another parameter, or that has another parameter tied to it. +Programs of the PEST++ suite support a protocol for tying parameters together that may be convenient in some contexts. This is activated using the *tie\_by\_groups()* control variable. Denoting this variable as *true* effectively ties all parameters in a group to a single member of that group so that initial, intra-group parameter ratios are maintained as parameters are adjusted. However, it is important to note that this action does not affect any parameters that a user designates as tied to another parameter, or that has another parameter tied to it. ##
4.10 Observation Groups Section + Specifications for the “observation groups” section of a PEST control file are provided in figure 4.6. -
* observation groups
OBGNME
(one such line for each observation group)
+
* observation groups
OBGNME
(one such line for each observation group)
Figure 4.6 Specifications of the “observation groups” section of a PEST control file. @@ -1296,9 +1333,10 @@ Observation groups whose name begins with “regul” are special. Observations ##
4.11 Observation Data Section + The “observation data” section of a PEST control file is particularly simple. Its specifications are provided in figure 4.7. -
* observation data
OBSNME OBSVAL WEIGHT OBGNME
(one such line for each observation)
+
* observation data
OBSNME OBSVAL WEIGHT OBGNME
(one such line for each observation)
Figure 4.7 Specifications of the “observation data” section of a PEST control file. @@ -1322,13 +1360,14 @@ The observation group name supplied through OBGNME must also be listed in the ##
4.12 Model Command Line Section + The “model command line” section of a PEST control file supplies the command which programs of the PEST++ suite must use to run the model. Optionally, this section can contain multiple commands; note however, that, at the time of writing, not all programs of the PEST++ suite support multiple command line functionality. PESTPP-GLM (like PEST) supports the use of different commands for running a model to compute derivatives with respect to different parameters. The model command number associated with each parameter is provided by the parameter-specific DERCOM variable which appears in the “parameter data” section of a PEST control file. Figure 4.8 shows specifications for the “model command line” section of a PEST control file. -
* model command line
COMLINE
(one such line for each model command line)
+
* model command line
COMLINE
(one such line for each model command line)
Figure 4.8 Specifications of the “model command line” section of a PEST control file. @@ -1336,9 +1375,10 @@ The model command line may be simply the name of an executable file, or it may b ##
4.13 Model Input Section + The “model input” section of a PEST control file relates PEST template files to model input files Its specifications are provided in figure 4.9. -
* model input
TEMPFLE INFLE
(one such line for each template file)
+
* model input
TEMPFLE INFLE
(one such line for each template file)
Figure 4.9 Specifications of the “model input” section of a PEST control file. @@ -1348,11 +1388,12 @@ It is possible for a single template file to be linked to more than one model in ##
4.14 Model Output Section + The “model output” section of a PEST control file relates PEST instruction files to model output files. Specifications of the “model output” section of a PEST control file are provided in figure 4.10. -
* model output
INSFLE OUTFLE
(one such line for each instruction file)
+
* model output
INSFLE OUTFLE
(one such line for each instruction file)
Figure 4.10 Specifications of the “model output” section of a PEST control file. @@ -1362,6 +1403,7 @@ A single model output file may be read by more than one instruction file. Howeve ##
4.15 Prior Information Section + Prior information can be thought of as observations which pertain directly to parameters themselves. As such, they comprise part of a calibration dataset which, together with the observations themselves, assists in the estimation of parameters. An “observation” which comprises a prior information equation can involve more than one parameter. Relationships between parameters that are encapsulated in prior information equations must be linear. If parameter-constraining “observations” pertain to nonlinear relationships between parameters, these relationships must be calculated by the model itself. The PEST PAR2PAR utility can accomplish this task; see part II of the PEST manual. The manner in which prior information equations are recorded in the “prior information” section of a PEST control file is not unlike that in which you would write an equation on paper yourself; however, certain strict protocols must be observed. Refer to figure 4.2 for an instance of a PEST control file containing prior information. (Note that PEST utilities such as ADDREG1 and ADDREG2, as well as some of the programs comprising the Groundwater Data Utility suite, add prior information to a PEST control file automatically, this saving you the trouble of having to add it yourself. Similar functionality is available through the PyEMU library.) @@ -1370,7 +1412,7 @@ Each item on a prior information line must be separated from its neighbouring it Prior information lines must adhere to the syntax set out in figure 4.11. -
PILBL PIFAC * PARNME + PIFAC * log(PARNME) ... = PIVAL WEIGHT OBGNME
(one such line for each article of prior information)
+
PILBL PIFAC * PARNME + PIFAC * log(PARNME) ... = PIVAL WEIGHT OBGNME
(one such line for each article of prior information)
Figure 4.11. The syntax of a prior information line. @@ -1380,32 +1422,37 @@ Following the prior information label is the prior information equation. To the The parameter factor must never be omitted. Suppose, for example, that a prior information equation consists of only a single term, namely that an untransformed, adjustable parameter named “par1” has a preferred value of 2.305, and that you would like this information included in the inversion process with a weight of 1.0. If this article of prior information is given the label “pi1”, the pertinent prior information line can be written as -| pi1 1.0 \* par1 = 2.305 1.0 pr_info | -|-------------------------------------| +| | +|--------------------------------------| +| pi1 1.0 \* par1 = 2.305 1.0 pr\_info | If you had simply written -| pi1 par1 = 2.305 1.0 pr_info | -|------------------------------| +| | +|-------------------------------| +| pi1 par1 = 2.305 1.0 pr\_info | programs of the PEST++ suite would have objected, complaining of a syntax error. If a parameter is log-transformed, you must provide prior information pertinent to the log of that parameter, rather than to the parameter itself. Furthermore, the parameter name must be placed in brackets and preceded by “log” (note that there is no space between “log” and the following opening bracket). Thus, in the above example, if parameter “par1” is log-transformed, the prior information equation should be rewritten as -| pi1 1.0 \* log(par1) = .362671 1.0 pr_info | -|--------------------------------------------| +| | +|---------------------------------------------| +| pi1 1.0 \* log(par1) = .362671 1.0 pr\_info | Note that logs are taken to base 10. The left side of a prior information equation can be comprised of the sum and/or difference of a number of factor-parameter pairs of the type already illustrated; these pairs must be separated from each other by a “+” or “-” sign, with a space to either side of the sign. For example -| pi2 1.0 \* par2 + 3.43435 \* par4 - 2.389834 \* par3 = 1.09e3 3.00 group_pr | -|-----------------------------------------------------------------------------| +| | +|------------------------------------------------------------------------------| +| pi2 1.0 \* par2 + 3.43435 \* par4 - 2.389834 \* par3 = 1.09e3 3.00 group\_pr | Prior information equations which include log-transformed parameters must express a relationship between the logs of those parameters. For example, if you would like the ratio between the estimated values of parameters “par1” and “par2” to be about 40.0, the prior information equation may be written as -| pi3 1.0 \* log(par1) - 1.0 \* log(par2) = 1.60206 2.0 group_pr | -|----------------------------------------------------------------| +| | +|-----------------------------------------------------------------| +| pi3 1.0 \* log(par1) - 1.0 \* log(par2) = 1.60206 2.0 group\_pr | To the right of the “=” sign of each article of prior information are two real variables and a character variable, namely PIVAL, WEIGHT and OBGNME. The first of these is the “observed value” of the prior information equation. The second is the weight assigned to the article of prior information in the parameter estimation process. This can be zero if you wish (thereby removing the prior information equation from consideration); however, it must not be negative. @@ -1413,18 +1460,19 @@ The final item associated with each article of prior information must be the obs When adding prior information to a PEST control file, you should note that no two prior information equations should say the same thing. Thus, the following pair of prior information lines is illegal. -
pi1 2.0 * log(par1) + 2.5 * log(par2) - 3.5 * log(par3) = 1.342 1.00 obgp1
pi2 4.0 * log(par1) + 5.0 * log(par2) - 7.0 * log(par3) = 2.684 1.00 obgp2
+
pi1 2.0 * log(par1) + 2.5 * log(par2) - 3.5 * log(par3) = 1.342 1.00 obgp1
pi2 4.0 * log(par1) + 5.0 * log(par2) - 7.0 * log(par3) = 2.684 1.00 obgp2
If you wish to break a single prior information equation into more than one line, use the continuation character “&”. This must be placed at the beginning of each continuation line, separated from the item which follows it by a space. The line break must be placed between individual items of a prior information equation, not within an item. Thus, the following lines convey the same information as does the first of the above pair of prior information lines. -
pi1
& 2.0
& *
& log(par1)
& +
& 2.5
& *
& log(par2)
& -
& 3.5
& *
& log(par3)
& =
& 1.342
& 1.00
& obgp1
+
pi1
& 2.0
& *
& log(par1)
& +
& 2.5
& *
& log(par2)
& -
& 3.5
& *
& log(par3)
& =
& 1.342
& 1.00
& obgp1
However, the following article of prior information is illegal because of the break between “log” and “par2": -
pi1 2.0 * log(par1) + 2.5 * log
& (par2) - 3.5 * log(par3) = 1.342 1.00 obgp1
+
pi1 2.0 * log(par1) + 2.5 * log
& (par2) - 3.5 * log(par3) = 1.342 1.00 obgp1
##
4.16 Regularization Section + The regularization section of a PEST control file is optional. If PESTMODE is not set to “regularization”, it is redundant. If it is set to “regularization” and a “regularization” section is not provided, the PESTPP-GLM program (the only program of the PEST++ suite which uses this section) provides default values for the control variables that are featured in it. These variables are now described. To clarify the meanings of some of the terms that appear in the following explanation, see the description of PESTPP-GLM in chapter 6 of this manual. For more general information on regularization, see Doherty (2015). **PHIMLIM** @@ -1462,31 +1510,32 @@ PESTPP-GLM does not support the same range of values for IREGADJ as does PEST. W ## 4.17 Control Variables for PEST++ Programs + Each of the programs comprising the PEST++ suite requires its own control variables. Unlike variables that control the operation of PEST, variables that are specific to PEST++ programs are not identified by their position in the PEST control file. Instead, they are introduced through a variable-specific keyword. Any line in a PEST control file that begins with the character string “++” is ignored by PEST-suite programs, and by PESTCHEK. However, programs of the PEST++ suite read these lines, expecting to find one or more keywords. Figure 4.12 shows a PEST control file that includes the values of some PEST++ control variables (and a comment line). Wherever a PEST++ keyword is supplied, one or more values for the control variable that is associated with that keyword must follow it in brackets. Where more than one value is associated with a keyword, these values must be comma-delimited within the brackets. More than one keyword can be supplied on a “++” line. If so, they must be separated by one or more whitespace characters. -
pcf
* control data
restart estimation
5 19 2 2 3
2 3 single point
10.0 -3.0 0.3 0.03 10
10.0 10.0 0.001
0.1
50 0.005 4 4 0.005 4
1 1 1
* parameter groups
ro relative 0.01 0.0 switch 2.0 parabolic
h relative 0.01 0.0 switch 2.0 parabolic
* parameter data
ro1 fixed factor 0.5 .1 10 ro 1.0 0.0
ro2 log factor 5.0 .1 10 ro 1.0 0.0
ro3 tied factor 0.5 .1 10 ro 1.0 0.0
h1 none factor 2.0 .05 100 h 1.0 0.0
h2 log factor 5.0 .05 100 h 1.0 0.0
ro3 ro2
* observation groups
obsgp1
obsgp2
prgp1
* observation data
ar1 1.21038 1.0 obsgp1
ar2 1.51208 1.0 obsgp1
ar3 2.07204 1.0 obsgp1
ar4 2.94056 1.0 obsgp1
ar5 4.15787 1.0 obsgp1
ar6 5.7762 1.0 obsgp1
ar7 7.7894 1.0 obsgp1
ar8 9.99743 1.0 obsgp1
ar9 11.8307 1.0 obsgp2
ar10 12.3194 1.0 obsgp2
ar11 10.6003 1.0 obsgp2
ar12 7.00419 1.0 obsgp2
ar13 3.44391 1.0 obsgp2
ar14 1.58279 1.0 obsgp2
ar15 1.1038 1.0 obsgp2
ar16 1.03086 1.0 obsgp2
ar17 1.01318 1.0 obsgp2
ar18 1.00593 0.0 obsgp2
ar19 1.00272 0.0 obsgp2
* model command line
model.bat
* model inputoutput
ves1.tpl a_model.in1
ves2.tpl a_model.in2
* model output
ves1.ins a_model.ot1
ves2.ins a_model.ot2
ves3.ins a_model.ot3
* prior information
pi1 1.0 * h1 = 1.0 3.0 prgp1
pi2 1.0 * log(ro2) + 1.0 * log(h2) = 2.6026 2.0 prgp1
~ This is a comment line
++ forecasts(ar18,ar19) parcov(param.unc)
++ lambdas(0.1, 1.0, 10,100)
++ n_iter_base(-1)
++ n_iter_super(4)
++ base_jacobian(pest.jco)
++ par_sigma_range(6)
+
pcf
* control data
restart estimation
5 19 2 2 3
2 3 single point
10.0 -3.0 0.3 0.03 10
10.0 10.0 0.001
0.1
50 0.005 4 4 0.005 4
1 1 1
* parameter groups
ro relative 0.01 0.0 switch 2.0 parabolic
h relative 0.01 0.0 switch 2.0 parabolic
* parameter data
ro1 fixed factor 0.5 .1 10 ro 1.0 0.0
ro2 log factor 5.0 .1 10 ro 1.0 0.0
ro3 tied factor 0.5 .1 10 ro 1.0 0.0
h1 none factor 2.0 .05 100 h 1.0 0.0
h2 log factor 5.0 .05 100 h 1.0 0.0
ro3 ro2
* observation groups
obsgp1
obsgp2
prgp1
* observation data
ar1 1.21038 1.0 obsgp1
ar2 1.51208 1.0 obsgp1
ar3 2.07204 1.0 obsgp1
ar4 2.94056 1.0 obsgp1
ar5 4.15787 1.0 obsgp1
ar6 5.7762 1.0 obsgp1
ar7 7.7894 1.0 obsgp1
ar8 9.99743 1.0 obsgp1
ar9 11.8307 1.0 obsgp2
ar10 12.3194 1.0 obsgp2
ar11 10.6003 1.0 obsgp2
ar12 7.00419 1.0 obsgp2
ar13 3.44391 1.0 obsgp2
ar14 1.58279 1.0 obsgp2
ar15 1.1038 1.0 obsgp2
ar16 1.03086 1.0 obsgp2
ar17 1.01318 1.0 obsgp2
ar18 1.00593 0.0 obsgp2
ar19 1.00272 0.0 obsgp2
* model command line
model.bat
* model inputoutput
ves1.tpl a_model.in1
ves2.tpl a_model.in2
* model output
ves1.ins a_model.ot1
ves2.ins a_model.ot2
ves3.ins a_model.ot3
* prior information
pi1 1.0 * h1 = 1.0 3.0 prgp1
pi2 1.0 * log(ro2) + 1.0 * log(h2) = 2.6026 2.0 prgp1
~ This is a comment line
++ forecasts(ar18,ar19) parcov(param.unc)
++ lambdas(0.1, 1.0, 10,100)
++ n_iter_base(-1)
++ n_iter_super(4)
++ base_jacobian(pest.jco)
++ par_sigma_range(6)
Figure 4.12 A PEST control file which includes PEST++ control variables. Values that are supplied with a keyword can be integer, real or text (for example filenames), this depending on the keyword. Text can be optionally surrounded by single or double quotes; this option becomes a necessity if a filename provided with a keyword includes blanks. -If a program of the PEST++ suite does not use a keyword, it simply ignores it. Hence a PEST control file can contain keywords that are pertinent to a number of members of the suite. The PEST++ program that is currently using the PEST control file only reads values for control variables that it recognizes. If a ++ argument is not recognized, this will raise an exception; if users want to “forgive” unrecognized ++ args, the “++forgive_unknown_args(true)” should be supplied. +If a program of the PEST++ suite does not use a keyword, it simply ignores it. Hence a PEST control file can contain keywords that are pertinent to a number of members of the suite. The PEST++ program that is currently using the PEST control file only reads values for control variables that it recognizes. If a ++ argument is not recognized, this will raise an exception; if users want to “forgive” unrecognized ++ args, the “++forgive\_unknown\_args(true)” should be supplied. -#
+## 4.18 Keyword and External File Control File Format -## 4.18 Keyword and External File Control File Format As of version 4.3.0, the programs in the PEST++ suite support an enhanced control file format, which has been designed to support an increasingly diverse set of tools. This new format requires significantly less “in depth” knowledge of the algorithmic controls over the PEST++ tools from the user as all of these control variables now have internal default values, so arguments that are not specified in the control file simply use these internal defaults. Additionally, the sections of the control file with listed of data (e.g., “\* parameter data”, “\* observation data”, among others) can now be stored in external files; only the name of this file and some optional parsing information is needed in the control file. -Empty blanks lines are tolerated in the enhanced format as are lines that start with “#” as comment lines. Note once a “#” is encountered on a line, the remaining characters to the right are ignored. This allows users to have both full comment lines and partial comment lines. +Empty blanks lines are tolerated in the enhanced format as are lines that start with “\#” as comment lines. Note once a “\#” is encountered on a line, the remaining characters to the right are ignored. This allows users to have both full comment lines and partial comment lines. Below is a more detailed description of this enhanced format. -## 4.18.1 Keyword and Consolidated Algorithmic Variables +## 4.18.1 Keyword and Consolidated Algorithmic Variables + The enhanced control file format now accepts a “\* control data keyword” section. This section replaces the following section in the standard control file format: @@ -1498,9 +1547,10 @@ The enhanced control file format now accepts a “\* control data keyword” sec - ++ arguments -Therefore, if users construct a “\* control data keyword” section, these cannot also be listed–an error message will be issued if you try. The format of the “\* control data keyword”, as expected is by keywords. An example section is shown on Figure 4.13. Keyword-value pairs should be separated by one or more whitespace (tabs acceptable as well). For values that have multiple entries (like the PESTPP-GLM control variable “lambdas”), users should comma separate each separate value (as shown on Figure 4.13). If a ++ argument is not recognized, this will raise an exception; if users want to “forgive” unrecognized ++ args, the “forgive_unknown_args true” should be supplied. +Therefore, if users construct a “\* control data keyword” section, these cannot also be listed–an error message will be issued if you try. The format of the “\* control data keyword”, as expected is by keywords. An example section is shown on Figure 4.13. Keyword-value pairs should be separated by one or more whitespace (tabs acceptable as well). For values that have multiple entries (like the PESTPP-GLM control variable “lambdas”), users should comma separate each separate value (as shown on Figure 4.13). If a ++ argument is not recognized, this will raise an exception; if users want to “forgive” unrecognized ++ args, the “forgive\_unknown\_args true” should be supplied. + +## 4.18.2 External file support -## 4.18.2 External file support As shown in Figure 4.13, the enhanced control file format allows users to store list-directed input in external files. These files must have the same number of entries on each line and the location of these files in the user’s directory structure must be the path from where the control file is located to where the external file is located. For example, if the control file is saved in the directory “model” and parameter data is stored in the file “parameters.csv”, the entry in the “\* parameter data external” must be “parameters.csv”, regardless of where the calling program is instantiated. @@ -1512,7 +1562,7 @@ Each line in the external sections is required to have one entry and may have ad - “sep” (for separator). Default is “,” (comma). For whitespace-delimited (one or more whitespaces) use “w” -- “missing_values”. Default is empty/whitespace (for comma-separated). Users are strongly encouraged to supply this option for whitespace-delimited files. +- “missing\_values”. Default is empty/whitespace (for comma-separated). Users are strongly encouraged to supply this option for whitespace-delimited files. These options should be supplied as whitespace-separated options on the same line as the filename (see Figure 4.13 for an example) @@ -1526,9 +1576,9 @@ Using the “sep”, each line of the external file must have the same number en - parval1(value) – initial parameter value - - parubnd(upper_bound) – parameter upper bound + - parubnd(upper\_bound) – parameter upper bound - - parlbnd(lower_bound) – parameter lower bound + - parlbnd(lower\_bound) – parameter lower bound - pargp(group) – parameter group @@ -1544,15 +1594,15 @@ Using the “sep”, each line of the external file must have the same number en - \* model input external - - pest_file – template file name + - pest\_file – template file name - - model_file – corresponding model input file name + - model\_file – corresponding model input file name - \* model output external - - pest_file – instruction file name + - pest\_file – instruction file name - - model_file – corresponding model output file name + - model\_file – corresponding model output file name - \* prior information external @@ -1568,27 +1618,28 @@ Each of the listed formal names (or its alias) must be found in the header row o Through the use of these external files, PEST++ programs support the use of following optional quantities: -- *standard_deviation*: If this column appears in any external file for either \* *parameter data external* or *\* observation data external* section, then the rows of that external file with values in the *standard_deviation* column that can be parsed to double-precision floating point values will be used as the prior parameter standard deviation and observation noise standard deviation, respectively. This is to allow the parameter bounds to serve solely an algorithmic function and the weights to be used solely to define a composite objective function. In this way, users can define a prior uncertainty for parameters separate from the parameter bounds and an observation noise separate from observation weights +- *standard\_deviation*: If this column appears in any external file for either \* *parameter data external* or *\* observation data external* section, then the rows of that external file with values in the *standard\_deviation* column that can be parsed to double-precision floating point values will be used as the prior parameter standard deviation and observation noise standard deviation, respectively. This is to allow the parameter bounds to serve solely an algorithmic function and the weights to be used solely to define a composite objective function. In this way, users can define a prior uncertainty for parameters separate from the parameter bounds and an observation noise separate from observation weights -- *upper_bound, lower_bound*: if this column appears in any external file for *\* observation data external* section, then the rows of that external file with values in the *upper_bound* and/or *lower_bound* column that can be parsed to double-precision floating point values will be used to limit the values of realized observation ensemble values. This is primarily used in the ensemble tools where realizations of noise are drawn and added to the observation values in the control file; In some cases, extreme noise values may be drawn and the use of *upper_bound* and *lower_bound* can reduce the effect of these extreme values by replacing realized observation values greater than *upper_bound* with the value of ­*upper_bound* and visa versa from *lower_bound*. +- *upper\_bound, lower\_bound*: if this column appears in any external file for *\* observation data external* section, then the rows of that external file with values in the *upper\_bound* and/or *lower\_bound* column that can be parsed to double-precision floating point values will be used to limit the values of realized observation ensemble values. This is primarily used in the ensemble tools where realizations of noise are drawn and added to the observation values in the control file; In some cases, extreme noise values may be drawn and the use of *upper\_bound* and *lower\_bound* can reduce the effect of these extreme values by replacing realized observation values greater than *upper\_bound* with the value of ­*upper\_bound* and visa versa from *lower\_bound*. Note that not all external files or even every row in an external file must have a valid value for these optional quantities. For rows where the values are “missing”, the standard operating procedure is applied. -
~ comment line
pcf
* control data keyword ~ more comments here:
pestmode estimation
maxsing 100
~Phimlim 1234 variable commented out
forecasts ar18,ar19
Parcov param.unc ~ the prior cov matrix in unc file format
lambdas 0.1, 1.0, 10,100 ~some lambda values
n_iter_base -1
n_iter_super 4
base_jacobian pest.jcb
par_sigma_range 6
ies_par_en par.jcb
* parameter data external
Par_hk.csv
Par_rech.dat sep w missing_values nan
~ observation data split into separate file for each type
* observation data external
head_obs.dat sep w missing_values missing
flux_obs.csv
additional_valuable_obs.csv
* model command line
model.bat
* model input external
Model_input.csv
* model output external
Model_output.csv
* prior information external
Pi.csv
+
~ comment line
pcf
* control data keyword ~ more comments here:
pestmode estimation
maxsing 100
~Phimlim 1234 variable commented out
forecasts ar18,ar19
Parcov param.unc ~ the prior cov matrix in unc file format
lambdas 0.1, 1.0, 10,100 ~some lambda values
n_iter_base -1
n_iter_super 4
base_jacobian pest.jcb
par_sigma_range 6
ies_par_en par.jcb
* parameter data external
Par_hk.csv
Par_rech.dat sep w missing_values nan
~ observation data split into separate file for each type
* observation data external
head_obs.dat sep w missing_values missing
flux_obs.csv
additional_valuable_obs.csv
* model command line
model.bat
* model input external
Model_input.csv
* model output external
Model_output.csv
* prior information external
Pi.csv
Figure 4.13 An enhanced PEST control file. -#
+# 5. Running PEST++ Programs -# 5. Running PEST++ Programs -## 5.1 General +## 5.1 General + To simplify the following discussion, let PESTPP-XXX signify the name of a program belonging to the PEST++ suite. This can be any of the programs listed in table 1.1. -## 5.2 Model Runs in Serial +## 5.2 Model Runs in Serial + -### 5.2.1 Concepts +### 5.2.1 Concepts Programs of the PEST++ suite have in common the fact that they run a model many times. These runs can be undertaken in serial or in parallel. Where a PEST++ program undertakes model runs in serial, it issues a command directly to the operating system whenever it requires that a model run be undertaken. The command which it issues is provided in the “model command line” section of the PEST control file. @@ -1596,20 +1647,22 @@ On most occasions of its use, execution of PESTPP-XXX should be initiated from t If the model program resides in the folder from which PESTPP-XXX is run, or resides in a folder that is cited in the PATH environment variable, there is no need to prefix the model command with an absolute or relative pathname in the PEST control file. If the model is a batch or shell script, the same applies to executable programs which are cited in this script. However, if this is not the case, then pathnames are required. -### 5.2.2 Running PESTPP-XXX +### 5.2.2 Running PESTPP-XXX Where model runs are undertaken in serial, PESTPP-XXX is run using the command -| pestpp-xxx *case* | +| | |-------------------| +| pestpp-xxx *case* | where *case* is the filename base of the PEST control file. If you wish, you can include the *.pst* extension at the end of the name of the PEST control file. -Note that all members of the PEST++ suite support multithreaded template and instruction file processing, which can be an important consideration for very high-dimensional problems where the number of template and instruction files may number in the hundreds. The number of threads to use for processing template and instruction files is controlled by the *num_tpl_ins_threads* option, which is set to 1 by default. Note the parallel agents also use this argument. Also note that using multiple threads to process template and instruction files can consume significantly more memory and clock cycles than a single thread. +Note that all members of the PEST++ suite support multithreaded template and instruction file processing, which can be an important consideration for very high-dimensional problems where the number of template and instruction files may number in the hundreds. The number of threads to use for processing template and instruction files is controlled by the *num\_tpl\_ins\_threads* option, which is set to 1 by default. Note the parallel agents also use this argument. Also note that using multiple threads to process template and instruction files can consume significantly more memory and clock cycles than a single thread. -## 5.3 Model Runs in Parallel +## 5.3 Model Runs in Parallel -### 5.3.1 Concepts + +### 5.3.1 Concepts Tasks that are carried out by programs of the PEST++ suite require that a model be run many times. By undertaking these model runs in parallel, the time required for completion of an inversion or optimization task can be reduced considerably. Nowadays, most modellers have ready access to parallel computing facilities. Modern-day personal computers have multiple CPUs. Most offices have multiple computers connected to each other through an office network. Many modellers have access to a high-performance computing cluster which may provide hundreds of cores. All modellers have access to the computing cloud. @@ -1623,18 +1676,19 @@ When the PESTPP-XXX manager wishes that a model run be carried out, it chooses o In order to write model input files and read model output files, each instance of the PESTPP-XXX agent must have access to template files and instruction files. Normally copies of all template and instruction files that are listed in the PEST control file used by the PESTPP-XXX manager are placed in the working folder of each PESTPP-XXX agent, together with files required by the model. As will be discussed shortly, the PESTPP-XXX agent knows of the existence of these template and instruction files because it reads the PEST control pertaining to the current problem as it commences execution. -### 5.3.2 Manager to Agent Communication +### 5.3.2 Manager to Agent Communication The PESTPP-XXX manager and the PESTPP-XXX agent communicate with each other using the TCP/IP communications protocol. Where a agent resides on a different machine from that of the manager, network management must permit this kind of communication between them. If the manager’s machine can be “pinged” from the agent’s machine, and if the agent’s machine can be “pinged” from the manager’s machine, then you have this permission. When the PESTPP-XXX manager commences execution, it opens a so-called “port”. Agents must be informed of the IP address or hostname of the machine on which the manager is operating, and of the number of this port (see below). In contrast, the PESTPP-XXX manager does not need to know the locations of its agents. It knows that a agent exists through the TCP/IP connection which the agent initiates when it commences execution. Acceptance of this connection is sufficient for the communications pathway to exist. Then, whenever the manager requires that a agent carry out a model run, it sends parameter values to that agent through this connection. When the model run that is supervised by the agent is complete, the manager receives the values of model-calculated observations read from model output files by the agent through the same connection. It is oblivious to the location of the agent, and hence of the computer and folder in which these model runs are being carried out. -### 5.3.3 Running PESTPP-XXX as Manager and Agent +### 5.3.3 Running PESTPP-XXX as Manager and Agent When model runs are parallelized, execution of the PESTPP-XXX manager must be initiated using the following command: -| pestpp-xxx case /h :port | +| | |--------------------------| +| pestpp-xxx case /h :port | As usual, *case* is the filename base of the PEST control file. *port* in the above command signifies a TCP/IP port number that has not already been opened by another program on the machine on which the PESTPP-XXX manager is running. Choosing a high number such as 4000 or above generally forestalls conflicts. “/h” is case-insensitive; “/H” is also acceptable. @@ -1642,8 +1696,9 @@ When it is run using the above command, the PESTPP-XXX manager opens the nominat Execution of each instance of the PESTPP-XXX agent must be initiated as follows: -| pestpp-xxx case /h textstring:port | +| | |------------------------------------| +| pestpp-xxx case /h textstring:port | *case* is the filename base of the PEST control file. *textstring* can be any of the following: @@ -1655,18 +1710,19 @@ Execution of each instance of the PESTPP-XXX agent must be initiated as follows: If you are running the manager together with multiple agent instances on the same machine, and if that machine is not connected to the internet, then the last of the above three options is the only one available to you. Type -| hostname | +| | |----------| +| hostname | in a command line window to ascertain the hostname of a machine. If, for some reason, a agent ceases execution, or the computer on which it resides loses its connection with the manager, it should be re-started using the above command. A agent does not have to be restarted using the “/r” switch as its tasks are repetitive and simple, namely to receive parameters, run the model, and then send model outputs to the PESTPP-XXX manager. -### 5.3.4 Run Management Record File +### 5.3.4 Run Management Record File -The PESTPP-XXX manager records all communications between it and its agents in a run management record file. This file is named *case.rmr* where *case* is the filename base of the PEST control file. The agent that execute runs write all information related to communications with the master to the *panther_agent.rec* file, which is written in the local agent directory. +The PESTPP-XXX manager records all communications between it and its agents in a run management record file. This file is named *case.rmr* where *case* is the filename base of the PEST control file. The agent that execute runs write all information related to communications with the master to the *panther\_agent.rec* file, which is written in the local agent directory. -### 5.3.5 Run Management Control Variables +### 5.3.5 Run Management Control Variables A number of PESTPP-XXX control variables are used to control parallel run management. These are now described. @@ -1674,55 +1730,58 @@ As has already been discussed, on commencement of execution PESTPP-XXX reads a P As was discussed in section 4.17, the value of a PEST++ control variable must be supplied as an argument to a keyword. It must be surrounded by brackets immediately following the keyword that announces its presence. -**overdue_resched_fac()** -Suppose that the value of *overdue_resched_fac()* is supplied as *r*, where *r* is a real number. If the run manager is still awaiting completion of a simulation that has already been running for more than *r* times the average model run time (calculated on the basis of previous successful runs), then it will ask another agent to start the same run if one is available. The maximum number of concurrent runs that employ the same set of parameters is set by the *max_run_fail()* control variable (this variable is further discussed below). The re-running of a simulation using another agent is done as a precautionary measure. It guards against the possibility that the computer on which the delayed model run is executing has become busy with other tasks. A competition is started between agents. The first of the concurrent model runs to finish successfully is accepted; meanwhile, the remaining concurrent model run (or runs) is terminated by PESTPP-XXX. Note, however, that if *max_run_fail()* is set to 1, concurrent model run scheduling is not undertaken. +**overdue\_resched\_fac()** +Suppose that the value of *overdue\_resched\_fac()* is supplied as *r*, where *r* is a real number. If the run manager is still awaiting completion of a simulation that has already been running for more than *r* times the average model run time (calculated on the basis of previous successful runs), then it will ask another agent to start the same run if one is available. The maximum number of concurrent runs that employ the same set of parameters is set by the *max\_run\_fail()* control variable (this variable is further discussed below). The re-running of a simulation using another agent is done as a precautionary measure. It guards against the possibility that the computer on which the delayed model run is executing has become busy with other tasks. A competition is started between agents. The first of the concurrent model runs to finish successfully is accepted; meanwhile, the remaining concurrent model run (or runs) is terminated by PESTPP-XXX. Note, however, that if *max\_run\_fail()* is set to 1, concurrent model run scheduling is not undertaken. -The value supplied for the *overdue_resched_fac()* variable must be a real number that is 1.0 or greater. Its default value is 1.15; this is the value which the run manager uses if you do not supply a value for this control variable yourself. A value greater than 1.15 may be desirable in circumstances where the time required for completion of a simulation is sensitive to parameter values. +The value supplied for the *overdue\_resched\_fac()* variable must be a real number that is 1.0 or greater. Its default value is 1.15; this is the value which the run manager uses if you do not supply a value for this control variable yourself. A value greater than 1.15 may be desirable in circumstances where the time required for completion of a simulation is sensitive to parameter values. -**overdue_giveup_fac()** -The value of *overdue_giveup_fac()* is also a real number. Suppose that it is *r*. If an overdue simulation has already taken *r* times the average model run time but is still not complete, then the run manager declares the run to have failed. It instructs the agent that is supervising the run to kill it. The agent is then free to supervise another model run based on another set of parameters. Handling of run failure by programs of the PEST++ suite is discussed in more detail below. The default value for *overdue_giveup_fac()* is 100.0 for all programs in the suite except for PESTPP-IES, which uses a default value of 2.0. +**overdue\_giveup\_fac()** +The value of *overdue\_giveup\_fac()* is also a real number. Suppose that it is *r*. If an overdue simulation has already taken *r* times the average model run time but is still not complete, then the run manager declares the run to have failed. It instructs the agent that is supervising the run to kill it. The agent is then free to supervise another model run based on another set of parameters. Handling of run failure by programs of the PEST++ suite is discussed in more detail below. The default value for *overdue\_giveup\_fac()* is 100.0 for all programs in the suite except for PESTPP-IES, which uses a default value of 2.0. -**overdue_giveup_minutes()** -This control variable can be used to place a “hard” upper bound on the acceptable model run time. The value supplied for *overdue_giveup_minutes()*must be a real number. Suppose that it is *r*. Then a model run is deemed to have failed if it has been executing for more than *r* minutes. This model run time criterion works as an “or” condition with *overdue_giveup_fac()*; if the duration of a given model run exceeds *overdue_giveup_minutes()* or *overdue_giveup_fac()* times the average model run time, it is marked as a failure. The default value for *overdue_giveup_fac()* is 1.0E+30, this effectively disabling it. +**overdue\_giveup\_minutes()** +This control variable can be used to place a “hard” upper bound on the acceptable model run time. The value supplied for *overdue\_giveup\_minutes()*must be a real number. Suppose that it is *r*. Then a model run is deemed to have failed if it has been executing for more than *r* minutes. This model run time criterion works as an “or” condition with *overdue\_giveup\_fac()*; if the duration of a given model run exceeds *overdue\_giveup\_minutes()* or *overdue\_giveup\_fac()* times the average model run time, it is marked as a failure. The default value for *overdue\_giveup\_fac()* is 1.0E+30, this effectively disabling it. -**max_run_fail()** -A model run is declared to have failed if PESTPP-WRK cannot read one or more of the model’s output files. Excessive execution time can also be deemed as model run failure, as discussed above. Run failure is normally an outcome of model hostility to the set of parameters with which it was provided. However, sometimes it can be caused by external problems that are not the model’s fault, for example operating system quirks or network failure. Hence, the parallel run manager can be instructed to attempt a failed model run again using another agent. However, only a certain number of model run failures can be tolerated. This “certain number” (an integer) must be supplied as the value of the *max_run_fail()* control variable. A value of 1 is used to request no repetition of model runs. The default value of 3 is used for all programs except PESTPP-IES, which uses a default value of 1. +**max\_run\_fail()** +A model run is declared to have failed if PESTPP-WRK cannot read one or more of the model’s output files. Excessive execution time can also be deemed as model run failure, as discussed above. Run failure is normally an outcome of model hostility to the set of parameters with which it was provided. However, sometimes it can be caused by external problems that are not the model’s fault, for example operating system quirks or network failure. Hence, the parallel run manager can be instructed to attempt a failed model run again using another agent. However, only a certain number of model run failures can be tolerated. This “certain number” (an integer) must be supplied as the value of the *max\_run\_fail()* control variable. A value of 1 is used to request no repetition of model runs. The default value of 3 is used for all programs except PESTPP-IES, which uses a default value of 1. -As discussed above, *max_run_fail()* is also used to control the number of concurrent runs that are allowed when the *overdue_resched_fac()* criterion is exceeded. +As discussed above, *max\_run\_fail()* is also used to control the number of concurrent runs that are allowed when the *overdue\_resched\_fac()* criterion is exceeded. -**panther_agent_restart_on_error()** -In certain Wide Area Network (WAN) environments, manager-agent communications can become “broken”, or “half-open”, where one side does not know that the other has closed the connection. This can happen with TCP/IP connections when there are long periods of no communication, such as during extended periods when the PEST++ manager is busy calculating parameter upgrades or undertaking localization calculations in PESTPP-IES. To help alleviate this, a discrete run manager ping thread is invoked which pings agents at least once every two minutes when the run manager is idle (i.e., when the manager is not communicating with agents as they undertake model runs but is busy doing other intensive calculations). In addition, agents can optionally be restarted in the case that communication errors still occur, instead of terminating or remaining in a half-open state. By default, this option is activated (i.e., it is set to *true)* but can be inactivated using the ++*panther_agent_restart_on_error(false)* control variable. +**panther\_agent\_restart\_on\_error()** +In certain Wide Area Network (WAN) environments, manager-agent communications can become “broken”, or “half-open”, where one side does not know that the other has closed the connection. This can happen with TCP/IP connections when there are long periods of no communication, such as during extended periods when the PEST++ manager is busy calculating parameter upgrades or undertaking localization calculations in PESTPP-IES. To help alleviate this, a discrete run manager ping thread is invoked which pings agents at least once every two minutes when the run manager is idle (i.e., when the manager is not communicating with agents as they undertake model runs but is busy doing other intensive calculations). In addition, agents can optionally be restarted in the case that communication errors still occur, instead of terminating or remaining in a half-open state. By default, this option is activated (i.e., it is set to *true)* but can be inactivated using the ++*panther\_agent\_restart\_on\_error(false)* control variable. -**panther_agent_no_ping_timeout_secs()** -Related to the above agent restart option, agent can be instructed to terminate (if *panther_agent_restart_on_error()* is set to *false*) or restart (if *panther_agent_restart_on_error()* is set to *true*), if no ping message has been received from the run manager in more than a specified time interval. This interval is configurable in seconds via the *panther_agent_no_ping_timeout_secs()* control variable, with a default value of 300 (i.e., 5 minutes). +**panther\_agent\_no\_ping\_timeout\_secs()** +Related to the above agent restart option, agent can be instructed to terminate (if *panther\_agent\_restart\_on\_error()* is set to *false*) or restart (if *panther\_agent\_restart\_on\_error()* is set to *true*), if no ping message has been received from the run manager in more than a specified time interval. This interval is configurable in seconds via the *panther\_agent\_no\_ping\_timeout\_secs()* control variable, with a default value of 300 (i.e., 5 minutes). -**panther_agent_freeze_on_fail()** -In some settings, when starting to use PEST++, it can be difficult to debug why runs may be failing in a parallel run environment. This is especially true when agents are on separate physical hosts, which can make monitoring agent progress difficult and when a run fails, the panther run manager will immediate try to schedule another run on that same agent, which will cause the template files to be rewritten (nearly immediately) and any temporary files to be erased, making it nearly impossible to investigate the cause of the run failure. If users want to “slow down the process” so they debug run failures, adding *panther_agent_freeze_on_fail* as *true* to a (agent) control file will cause a agent to “freeze” on the occurrence of a run failure. This freeze can only be undone by forcing the agent to exit and restarting it, but, nevertheless, freezing a agent when a run failure occurs can be very useful to diagnosing issues related to parallelization of the PEST++ process because it allows direct inspection of all (temporary) files related to the failed run. +**panther\_agent\_freeze\_on\_fail()** +In some settings, when starting to use PEST++, it can be difficult to debug why runs may be failing in a parallel run environment. This is especially true when agents are on separate physical hosts, which can make monitoring agent progress difficult and when a run fails, the panther run manager will immediate try to schedule another run on that same agent, which will cause the template files to be rewritten (nearly immediately) and any temporary files to be erased, making it nearly impossible to investigate the cause of the run failure. If users want to “slow down the process” so they debug run failures, adding *panther\_agent\_freeze\_on\_fail* as *true* to a (agent) control file will cause a agent to “freeze” on the occurrence of a run failure. This freeze can only be undone by forcing the agent to exit and restarting it, but, nevertheless, freezing a agent when a run failure occurs can be very useful to diagnosing issues related to parallelization of the PEST++ process because it allows direct inspection of all (temporary) files related to the failed run. -**panther_echo()** -If users are piping the master instance stdout and stderr to a file (through a redirect), then the panther master run summary, which echoes to the file with a line return to overwrite the output (in place using a carriage return), can fill up these file because the carriage return character is ignored or converted to a line return. In this case, suppling *panther_echo(false)* will turn off this stdout updating during the run sequence. Users can still inspect the run management process through the run management record. +**panther\_echo()** +If users are piping the master instance stdout and stderr to a file (through a redirect), then the panther master run summary, which echoes to the file with a line return to overwrite the output (in place using a carriage return), can fill up these file because the carriage return character is ignored or converted to a line return. In this case, suppling *panther\_echo(false)* will turn off this stdout updating during the run sequence. Users can still inspect the run management process through the run management record. -**num_tpl_ins_threads** -When using the PEST++ tools for very high-dimensional problems, the time required to process template and/or instruction files can be considerable. To speed things up, the parallel agents can multithread these input and output processing operations. By default, only one thread is used and the number of threads to use is controlled by the *num_tpl_ins_threads* arg. Note that the number of threads used to process template and/or instruction files is set to the minimum of the number files and the value of *num_tpl_ins_threads*. Also note that using multithreading to process template and/or instruction files can consume significantly more memory and clock cycles. +**num\_tpl\_ins\_threads** +When using the PEST++ tools for very high-dimensional problems, the time required to process template and/or instruction files can be considerable. To speed things up, the parallel agents can multithread these input and output processing operations. By default, only one thread is used and the number of threads to use is controlled by the *num\_tpl\_ins\_threads* arg. Note that the number of threads used to process template and/or instruction files is set to the minimum of the number files and the value of *num\_tpl\_ins\_threads*. Also note that using multithreading to process template and/or instruction files can consume significantly more memory and clock cycles. **pest.stp** Sometimes, users may want to stop a pest++ tool at a certain stage of the algorithm. Of course, you can use the ctrl+c, but this option will quit (nearly) immediately without recording any results regarding the current stage of the algorithm. For example, during an ensemble evaluation for pestpp-ies or pestpp-da, users may not want to wait for one or more (really) slow model runs to finish, but still want the results of the completed runs to be written to files. In this case, users can place a file named “pest.stp” (case sensitive – use lower case!) in the directory where PESTPP-XXX master is running and the first line of “pest.stp” should have a “1” or a “2” as the first whitespace-delimited token on the first line. If this file is found and meets the first-token requirement, then the PESTPP-XXX master instance will exit gracefully. If the file’s presence is detected during run management, all remaining runs (queued or being run) are marked as fails (supported by both parallel and serial run manager). Then the algorithm records any relevant results are recorded and the pestpp-xxx exits gracefully. Alternatively, users may want to the run manager to stop waiting on one or more runs that are progressing slowly but don’t want to quit running the pestpp-xxx tool. In this case, users can create the “pest.stp” file and put a “4” as the first whitespace-delimited token on the first line. In the case, the run manager will mark any remaining runs as fails and return control to the calling program. This program will then continue its execution and remove the pest.stp file. In this way, the next time the run manager is called, the pest.stp file is gone and execution will continue. -**panther_transfer_on_finish/panther_transfer_on_fail** -In some cases, users may want to retrieve one or more model output files from the agent working directories and collect those files in the master directory. For example, users may want an entire model output binary file for further processing after a successful model run. Or, if a model run fails to complete, users may wish to see certain model input/output files to diagnose issues. In a parallel run setting, both of these tasks can be difficult to complete. To support these use cases, the PEST++ tools allow transferring files from the agent directories to the master directory through the *panther_transfer_on_finish* and *panther_transfer_on_fail* options. Both of these options can be supplied as comma-separated lists of files or single file names. After successful completion or run failure, respectively, the panther run manager will transfer the nominated files found in the agent control file to the master working directory. This is worth saying again – the values of *panther_transfer_on_finish* and *panther_transfer_on_fail* listed in the agent’s control file are transferred to the master. This approach allows users to potentially transfer different files from each agent. To avoid naming conflicts in the master directory, the name of the file saved in the master directory is prepended with additional metadata information including agent hostname, agent working directory, run manager run id value, run manager group id value and run manager information text (this information text usually includes information like realization name from pestpp-ies/pestpp-da and parameter name for Jacobian filling and global sensitivity analysis). Users are encouraged to study the .rmr file because it lists several valuable pieces of information regarding any file transfers. +**panther\_transfer\_on\_finish/panther\_transfer\_on\_fail** +In some cases, users may want to retrieve one or more model output files from the agent working directories and collect those files in the master directory. For example, users may want an entire model output binary file for further processing after a successful model run. Or, if a model run fails to complete, users may wish to see certain model input/output files to diagnose issues. In a parallel run setting, both of these tasks can be difficult to complete. To support these use cases, the PEST++ tools allow transferring files from the agent directories to the master directory through the *panther\_transfer\_on\_finish* and *panther\_transfer\_on\_fail* options. Both of these options can be supplied as comma-separated lists of files or single file names. After successful completion or run failure, respectively, the panther run manager will transfer the nominated files found in the agent control file to the master working directory. This is worth saying again – the values of *panther\_transfer\_on\_finish* and *panther\_transfer\_on\_fail* listed in the agent’s control file are transferred to the master. This approach allows users to potentially transfer different files from each agent. To avoid naming conflicts in the master directory, the name of the file saved in the master directory is prepended with additional metadata information including agent hostname, agent working directory, run manager run id value, run manager group id value and run manager information text (this information text usually includes information like realization name from pestpp-ies/pestpp-da and parameter name for Jacobian filling and global sensitivity analysis). Users are encouraged to study the .rmr file because it lists several valuable pieces of information regarding any file transfers. -**panther_poll_interval** -Once a panther agent is initialized, it will start to try to connect to the master instance. On some operating systems, this act of trying connect actually results in a OS-level “file handle” being opened, which, if substantial time passes, can accumulate to a large number of open file handles. To prevent this, the panther agents will “sleep” for a given number of seconds before trying to connect to the master again. The length of time the agent sleeps is controlled by the *panther_poll_interval*, which an interger value of seconds to sleep. By default, this value is 1 second. +**panther\_poll\_interval** +Once a panther agent is initialized, it will start to try to connect to the master instance. On some operating systems, this act of trying connect actually results in a OS-level “file handle” being opened, which, if substantial time passes, can accumulate to a large number of open file handles. To prevent this, the panther agents will “sleep” for a given number of seconds before trying to connect to the master again. The length of time the agent sleeps is controlled by the *panther\_poll\_interval*, which an interger value of seconds to sleep. By default, this value is 1 second. + +## 5.4 Run Book-Keeping Files -## 5.4 Run Book-Keeping Files After running a program of the PEST++ suite, you may notice a number of (possibly large) files in the folder from which it was run. These are *case.rns*, *case.rnu* and *case.rnj*, where *case* is the filename base of the PEST control file. These are binary files that are used for temporary storage of “raw” run results. They contain information that assists in parallel run management, and that facilitates restart of an interrupted PEST++ run – if PESTPP-XXX exits gracefully, these files are removed. These run storage files can be read and processed using pyEMU. -# 6. PESTPP-GLM +# 6. PESTPP-GLM + + +## 6.1 Introduction -## 6.1 Introduction PESTPP-GLM was the original member of the PEST++ suite; its original name was “PESTPP”. The intention behind its creation was to reproduce much of the functionality of PEST in code that is modular, object oriented and supportive of collaborative programming. At the same time, it was hoped that certain aspects of PEST’s performance could be improved by taking advantage of the “new slate” that was offered by PESTPP-GLM. @@ -1730,9 +1789,10 @@ Like all versions of the PEST++ suite, PESTPP-GLM is written in C++. PESTPP-GLM Like PEST, PESTPP-GLM undertakes highly parameterized inversion. However, if requested, it can also undertake global optimization using the differential evolution (DE) method. The task that it implements on any given run is determined by the values that are supplied for pertinent control variables. - 6.2 Highly Parameterized Inversion +## 6.2 Highly Parameterized Inversion + -### 6.2.1 Basic Equations +### 6.2.1 Basic Equations When used to undertake highly parameterized inversion, PESTPP-GLM implements theory and methodologies that are programmed into PEST. However, many implementation details have been improved. In order to explain these details, it is necessary to present some theory. This theory employs matrices and vectors. These are used to describe the linearized inverse problem on which so-called “gradient methods” are based. Through repeated linearization of the inverse problem over successive iterations, these methods achieve their purpose of model calibration, notwithstanding the nonlinear relationship that exists between model outputs and model parameters. Full details of this theory are presented in Doherty (2015). @@ -1797,9 +1857,9 @@ where Փm is referred to as the “measurement objective function” If PESTPP-GLM is not run in “regularization” mode then, or course, regularization terms in the above equations are omitted. This does not preclude the use of prior information in the inversion process; however, the ability of PESTPP-GLM to adjust the importance that it gives to prior information through the regularization weight factor *μ*2 is lost. -PESTPP-GLM also offers users the option of using “regularized Gauss Levenburg Marquardt” of Hanke (1996), where prior parameter covariance matrix based regularization is “baked in” to the upgrade calculation process. This form of upgrade calculations is activated with the *glm_normal_form(prior)* option. Users can specify a prior parameter covariance matrix via the *parcov* option; if a covariance matrix is not supplied, then one is constructed on the fly using the parameter bounds and the optional *par_sigma_range* argument. In this case, MAXSING and EIGTHRESH become the “knobs” to control regularization – “regularization” mode and the associated variables in the “\* regularization” section are not allowed in this mode of operation. +PESTPP-GLM also offers users the option of using “regularized Gauss Levenburg Marquardt” of Hanke (1996), where prior parameter covariance matrix based regularization is “baked in” to the upgrade calculation process. This form of upgrade calculations is activated with the *glm\_normal\_form(prior)* option. Users can specify a prior parameter covariance matrix via the *parcov* option; if a covariance matrix is not supplied, then one is constructed on the fly using the parameter bounds and the optional *par\_sigma\_range* argument. In this case, MAXSING and EIGTHRESH become the “knobs” to control regularization – “regularization” mode and the associated variables in the “\* regularization” section are not allowed in this mode of operation. -### 6.2.2 Choosing the Regularization Weight Factor +### 6.2.2 Choosing the Regularization Weight Factor The value that is assigned to *μ*2 strongly influences the inversion process. If it is too small, then overfitting may occur. If it is too large, then the fit that is attained with the calibration dataset may not be satisfactory. @@ -1807,7 +1867,7 @@ When using PESTPP-GLM (and PEST), a user does not have to choose a value for *μ As was discussed in section 4.16, the level of fit that can be achieved with a calibration dataset is often difficult to predict. To accommodate this, it may be useful to endow PHIMLIM with a very low value (for example 1.0E-10). This allows you to find out just how good a fit you can get with the calibration dataset, possibly at the cost of over-fitting that dataset. At the same time, the value of the FRACPHIM control variable should be set to 0.1. If FRACPHIM is set to a value greater than zero, PESTPP-GLM adjusts the target measurement objective function internally to be FRACPHIM times the current value of the actual measurement objective function. Hence regularization is operative, notwithstanding the pursuit of what may turn out to be too good a fit with the calibration dataset. Then, on a subsequent PESTPP-GLM run, PHIMLIM should be set about five percent greater than the best measurement objective function attained through the preceding “range finder” run. -### 6.2.3 Inter-Regularization Group Weighting +### 6.2.3 Inter-Regularization Group Weighting Experience gained in using PEST suggests that a Tikhonov-regularised inversion process can benefit from automatic balancing of weights between different regularization groups (i.e., observation groups that contain regularisation observations). Ideally, a higher weight factor should be applied to regularization groups that feature parameters that are mildly influenced by the calibration dataset than to groups which feature parameters whose values are heavily influenced by the calibration dataset. Not only can this ensure that the former set of parameters do not depart much from their preferred values (which should also be their initial values); it can contribute to numerical stability of the inversion process. @@ -1821,19 +1881,19 @@ where *m* is the number of adjustable parameters featured in the PEST control fi If IREGADJ is set to 1, PESTPP-GLM multiplies the weights pertaining to all members of each regularization group by a group-specific factor. This factor is chosen so that, after this operation has been performed, the total composite sensitivities of all regularization groups are the same. -### 6.2.4 Choosing Values for the Marquardt Lambda +### 6.2.4 Choosing Values for the Marquardt Lambda -In contrast to PEST, PESTPP-GLM does not use the control variables specified on the fifth line of the “control data” section of the PEST control file to govern how it chooses Marquardt lambdas. Instead, it receives Marquardt lambda control information from PEST++ control variables. Two of these are *lambdas()* and *lambda_scale_fac()*. Default settings for these variables are as follows: +In contrast to PEST, PESTPP-GLM does not use the control variables specified on the fifth line of the “control data” section of the PEST control file to govern how it chooses Marquardt lambdas. Instead, it receives Marquardt lambda control information from PEST++ control variables. Two of these are *lambdas()* and *lambda\_scale\_fac()*. Default settings for these variables are as follows: *lambdas(0.1, 1, 10,100,100)* -*lambda_scale_fac(0.9, 0.8, 0.7, 0.5)* +*lambda\_scale\_fac(0.9, 0.8, 0.7, 0.5)* -As is apparent, more than one value can be associated with each of these control variables. Values supplied for *lambdas()* specify the Marquardt lambdas that PESTPP-GLM must use during each iteration of the inversion process. Then, if values are supplied for *lambda_scale_fac()*, PESTPP-GLM calculates multiple upgraded parameter sets for each Marquardt lambda. These sets are those that correspond to various fractional lengths along the upgrade vector that is calculated on the basis of the Marquardt lambda alone, the latter corresponding to a *lambda_scale_fac()* value of 1.0. Note that parameter change limits and parameter bounds are not enforced on scale factors greater than 1.0. +As is apparent, more than one value can be associated with each of these control variables. Values supplied for *lambdas()* specify the Marquardt lambdas that PESTPP-GLM must use during each iteration of the inversion process. Then, if values are supplied for *lambda\_scale\_fac()*, PESTPP-GLM calculates multiple upgraded parameter sets for each Marquardt lambda. These sets are those that correspond to various fractional lengths along the upgrade vector that is calculated on the basis of the Marquardt lambda alone, the latter corresponding to a *lambda\_scale\_fac()* value of 1.0. Note that parameter change limits and parameter bounds are not enforced on scale factors greater than 1.0. If only a single value is supplied for *lambdas()*, then this value of lambda is used in all iterations of the inversion process. In contrast, if more than one value is supplied for *lambdas()*, then PESTPP-GLM expands the user-supplied lambda list over the course of the inversion process. The expanded list includes other lambda values that are above and/or below those already comprising the list. A lambda is added to the list if the best parameters obtained during any particular iteration of the inversion process were calculated using an end member of the current list. The value chosen for a new lambda ensures that the previous best lambda is bracketed by members of the expanded list. -### 6.2.5 Singular Value Decomposition +### 6.2.5 Singular Value Decomposition In order to simplify the following equations, it is assumed that regularization observations and/or prior information equations are assimilated into the rows of Z using a weight factor that has already been evaluated. @@ -1877,17 +1937,17 @@ Like the solution calculated using equation 6.14, the solution to the inverse pr As stated above, PESTPP-GLM uses singular value decomposition, or variants thereof, to solve the inverse problem that is defined through the PEST control file with which it is provided. However, only one of the above options are available - ZtQZ. That is, singular value decomposition can only be undertaken on ZtQZ. (Actually, although not discussed in the present sub-section, the Marquardt lambda is included in these matrices before they are decomposed in order to enhance inversion performance in nonlinear contexts.) -A PEST++ control variable governs the way in which singular value decomposition is undertaken by PESTPP-GLM - *svd_pack()*. Two choices are available for the *svd_pack()* control variable. These select the numerical library that is employed for implementing singular value decomposition. Options are as follows. +A PEST++ control variable governs the way in which singular value decomposition is undertaken by PESTPP-GLM - *svd\_pack()*. Two choices are available for the *svd\_pack()* control variable. These select the numerical library that is employed for implementing singular value decomposition. Options are as follows. - “eigen”. This uses a Jacobi method supplied with the Eigen library. Eigen is a C++ template library for linear algebra; see - “redsvd”. This uses the redsvd library available from . Randomized methods are employed to undertake singular value decomposition. These are very fast; where matrices are large and sparse, their performance is outstanding. Furthermore, randomized methods only solve the factorization problem to the number of singular components needed. This means the variable MAXSING can be used to increase the efficient of large SVD factorizations. -The default values for *svd_pack()* is “redsvd”. This is the method which PESTPP-GLM uses if you do not supply values for this control variable. +The default values for *svd\_pack()* is “redsvd”. This is the method which PESTPP-GLM uses if you do not supply values for this control variable. -Two variables that are recorded in the PEST control file can also affect PESTPP’s deployment of singular value decomposition to solve an inverse problem. These are the MAXSING and EIGTHRESH variables featured in the “singular value decomposition” section of that file. If a “singular value decomposition” section is not provided in the PEST control file that is read by PESTPP-GLM, then PESTPP-GLM assigns MAXSING a value that is equal to the number of adjustable parameters that define the current inverse problem; at the same time, EIGTHRESH is assigned a value of 1.0E-7. (Recall from section 4.7 that MAXSING and EIGTHRESH are used to define the singular value truncation point.) However, if the PEST control file includes a “singular value decomposition” section, then PESTPP-GLM employs values for MAXSING and EIGTHRESH that it reads from this file. Note also that if *glm_normal_form(prior)* is specified, then MAXSING and EIGTHRESH also function as the consolidated regularization controls. +Two variables that are recorded in the PEST control file can also affect PESTPP’s deployment of singular value decomposition to solve an inverse problem. These are the MAXSING and EIGTHRESH variables featured in the “singular value decomposition” section of that file. If a “singular value decomposition” section is not provided in the PEST control file that is read by PESTPP-GLM, then PESTPP-GLM assigns MAXSING a value that is equal to the number of adjustable parameters that define the current inverse problem; at the same time, EIGTHRESH is assigned a value of 1.0E-7. (Recall from section 4.7 that MAXSING and EIGTHRESH are used to define the singular value truncation point.) However, if the PEST control file includes a “singular value decomposition” section, then PESTPP-GLM employs values for MAXSING and EIGTHRESH that it reads from this file. Note also that if *glm\_normal\_form(prior)* is specified, then MAXSING and EIGTHRESH also function as the consolidated regularization controls. -### 6.2.6 SVD-Assist +### 6.2.6 SVD-Assist Use of PEST’s “SVD-assist” methodology can promulgate significant increases in the numerical efficiency of highly parameterized inversion. In implementing this methodology, PEST estimates the values of so-called “super parameters” in place of the parameters themselves. @@ -1907,15 +1967,15 @@ PESTPP-GLM eradicates the second of these problems completely. There is no need Four PEST++ control variables govern the operation of SVD-assisted inversion as undertaken by PESTPP-GLM. If any of these are present, SVD-assisted inversion is implemented; default values are supplied for any of these control variables that a user fails to supply. -The number of super parameters to estimate can be set using either or both of the *max_n\_super()* and ­*super_eigthresh()* control variables. The value supplied for *max_n\_super* must be an integer greater than zero. This sets an upper limit on the number of super parameters to employ. *super_eigthresh()* performs a similar role to the PEST EIGTHRESH variable. The number of estimated super parameters is set by the singular value index at which the ratio of the corresponding singular value of JtQJ to the maximum singular value of this matrix is equal to the user-supplied value of *super_eigthresh()*. The default value for *super_eigthresh()* is 1.0E‑8. The default value for *max_n\_super()* is the number of adjustable parameters so that the number of super parameters is determined by the value of *super_eigthresh()*. +The number of super parameters to estimate can be set using either or both of the *max\_n\_super()* and ­*super\_eigthresh()* control variables. The value supplied for *max\_n\_super* must be an integer greater than zero. This sets an upper limit on the number of super parameters to employ. *super\_eigthresh()* performs a similar role to the PEST EIGTHRESH variable. The number of estimated super parameters is set by the singular value index at which the ratio of the corresponding singular value of JtQJ to the maximum singular value of this matrix is equal to the user-supplied value of *super\_eigthresh()*. The default value for *super\_eigthresh()* is 1.0E‑8. The default value for *max\_n\_super()* is the number of adjustable parameters so that the number of super parameters is determined by the value of *super\_eigthresh()*. -If *glm_normal_form(prior)* is supplied, activating the regularized GLM solution process, the super parameters are formed from the normal matrix JtQJ + Cp where Cp is the prior parameter covariance matrix (which is optionally supplied via the *parcov* argument). This effectively builds some prior parameter covariance matrix eigen components into the super parameter vectors. +If *glm\_normal\_form(prior)* is supplied, activating the regularized GLM solution process, the super parameters are formed from the normal matrix JtQJ + Cp where Cp is the prior parameter covariance matrix (which is optionally supplied via the *parcov* argument). This effectively builds some prior parameter covariance matrix eigen components into the super parameter vectors. -As was mentioned above, PESTPP-GLM allows mixing of inversion iterations based on base parameters and super parameters. Where an iteration is used to estimate base parameters the Jacobian matrix that is computed during that iteration is used to define super parameters for ensuing iterations in which the latter are estimated; however, base parameters are adjusted in any iteration in which a base Jacobian matrix is calculated before the ensuing super parameter iterations are commenced. The *n_iter_base()* control variable sets the number of base parameter iterations that are done in succession. These are followed by *n_iter_super()* super parameter iterations. The cycle is then repeated. +As was mentioned above, PESTPP-GLM allows mixing of inversion iterations based on base parameters and super parameters. Where an iteration is used to estimate base parameters the Jacobian matrix that is computed during that iteration is used to define super parameters for ensuing iterations in which the latter are estimated; however, base parameters are adjusted in any iteration in which a base Jacobian matrix is calculated before the ensuing super parameter iterations are commenced. The *n\_iter\_base()* control variable sets the number of base parameter iterations that are done in succession. These are followed by *n\_iter\_super()* super parameter iterations. The cycle is then repeated. -A special setting for *n_iter_base()* instructs PESTPP-GLM to vary from this behavior. If *n_iter_base()* is set to -1, then PESTPP-GLM carries out only one base parameter iteration. This comprises the first iteration of the inversion process. Furthermore, it does not upgrade base parameters using this Jacobian matrix before proceeding to the next iteration (which is a super parameter iteration). Instead the Jacobian matrix is used only for definition of super parameters; parameter upgrades are restricted to super parameter iterations. (This is the same behavior as that undertaken by PEST when it implements SVD-assisted inversion using a super parameter PEST control file constructed by SVDAPREP). +A special setting for *n\_iter\_base()* instructs PESTPP-GLM to vary from this behavior. If *n\_iter\_base()* is set to -1, then PESTPP-GLM carries out only one base parameter iteration. This comprises the first iteration of the inversion process. Furthermore, it does not upgrade base parameters using this Jacobian matrix before proceeding to the next iteration (which is a super parameter iteration). Instead the Jacobian matrix is used only for definition of super parameters; parameter upgrades are restricted to super parameter iterations. (This is the same behavior as that undertaken by PEST when it implements SVD-assisted inversion using a super parameter PEST control file constructed by SVDAPREP). -The number of super parameters to form is controlled by *max_n\_super* variable. When used judiciously and combined with the RedSVD package, the formation of the super parameter problem can be very efficient since the RedSVD solver only factorize the normal matrix to the number of specified components. +The number of super parameters to form is controlled by *max\_n\_super* variable. When used judiciously and combined with the RedSVD package, the formation of the super parameter problem can be very efficient since the RedSVD solver only factorize the normal matrix to the number of specified components. Two other aspects of PESTPP’s behavior in undertaking SVD-assisted inversion are worth mentioning. @@ -1925,17 +1985,17 @@ The handling of Jacobian matrix files (i.e., JCO files) is also somewhat differe Note, however, that if PESTPP-GLM is not undertaking SVD-assisted inversion, then the JCO file that is recorded at the end of each iteration of the inversion process is named *case.jco*. Under these circumstances, this file contains sensitivities with respect to base model parameters. -### 6.2.7 Expediting the First Iteration +### 6.2.7 Expediting the First Iteration In the normal course of events, PESTPP-GLM commences an inversion process by running the model in order to determine the value of the objective function based on initial parameter values. In doing this, it also determines the reference values of all model outputs for use in finite difference derivatives calculation. It then commences the long process of filling the Jacobian matrix. As has been explained, this requires at least as many model runs as there are adjustable parameters. -If model runs are parallelized (see section 5.3), then the first batch of parallelized model runs commissioned by PESTPP-GLM actually includes the initial model run, as well as those required for filling of the initial Jacobian matrix. However, if this initial model run has already been completed, then PESTPP-GLM can use the outcomes of this already-completed run rather than having to repeat it at the start of the inversion process. This action is instigated through use of the *hotstart_resfile()* control variable. The argument for this variable is the name of the residuals file (see below) which PESTPP-GLM recorded when it completed the initial model run. As is explained elsewhere in this manual, PESTPP-GLM can be run with the NOPTMAX termination control variable set to 0 specifically to undertake a single model run for the purpose of writing this (and other) files. A residuals file is named *case.res* where *case* is the filename base of the PEST control file. +If model runs are parallelized (see section 5.3), then the first batch of parallelized model runs commissioned by PESTPP-GLM actually includes the initial model run, as well as those required for filling of the initial Jacobian matrix. However, if this initial model run has already been completed, then PESTPP-GLM can use the outcomes of this already-completed run rather than having to repeat it at the start of the inversion process. This action is instigated through use of the *hotstart\_resfile()* control variable. The argument for this variable is the name of the residuals file (see below) which PESTPP-GLM recorded when it completed the initial model run. As is explained elsewhere in this manual, PESTPP-GLM can be run with the NOPTMAX termination control variable set to 0 specifically to undertake a single model run for the purpose of writing this (and other) files. A residuals file is named *case.res* where *case* is the filename base of the PEST control file. -Significant savings can also be made by employing an already-calculated Jacobian matrix for the first iteration of an inversion process. The *base_jacobian()* control variable instructs PESTPP-GLM to take this action. The value of this variable is the name of a Jacobian matrix (i.e., JCO) file. This file is named *case.jco* or *case.jcb* where *case* is the filename base of the PEST control file on which its calculation was based. This file can be produced by running PESTPP-GLM with the NOPTMAX control variable set to -1 or -2. (NOPTMAX is the first variable appearing on the eighth line of the “control data” section of a PEST control file.) +Significant savings can also be made by employing an already-calculated Jacobian matrix for the first iteration of an inversion process. The *base\_jacobian()* control variable instructs PESTPP-GLM to take this action. The value of this variable is the name of a Jacobian matrix (i.e., JCO) file. This file is named *case.jco* or *case.jcb* where *case* is the filename base of the PEST control file on which its calculation was based. This file can be produced by running PESTPP-GLM with the NOPTMAX control variable set to -1 or -2. (NOPTMAX is the first variable appearing on the eighth line of the “control data” section of a PEST control file.) -If both of the *hotstart_resfile()* and *base_jacobian()* options are selected at the same time, PESTPP-GLM does not need to run the model at all prior to calculating and testing parameter upgrades. This can sometimes be useful when fine-tuning PESTPP-GLM settings for optimal inversion performance. +If both of the *hotstart\_resfile()* and *base\_jacobian()* options are selected at the same time, PESTPP-GLM does not need to run the model at all prior to calculating and testing parameter upgrades. This can sometimes be useful when fine-tuning PESTPP-GLM settings for optimal inversion performance. -### 6.2.8 First Order, Second Moment Uncertainty Analysis and Monte Carlo +### 6.2.8 First Order, Second Moment Uncertainty Analysis and Monte Carlo A Jacobian matrix calculated by PESTPP-GLM can be used as a basis for first-order, second-moment (FOSM) parameter and predictive uncertainty analysis as well as FOSM-based Monte Carlo. The equations used by PESTPP-GLM for implementation of FOSM analysis are derived from Bayes equation. They are outlined by Fienen et al (2010) and Doherty (2015). These same equations form the basis for analyses undertaken by the PEST PREDUNC suite of utility programs and by the PyEMU library. @@ -1953,9 +2013,9 @@ FOSM analysis is implemented by PESTPP-GLM if the *uncertainty()* control variab If parameters are log-transformed in a PEST control file, then FOSM-based analyses pertains to the log (to base 10) of these parameters. Calculations that involve log-transformation are invisible to the user. However, if a user elects to provide prior parameter uncertainties (through a prior parameter covariance matrix or uncertainty file), these must pertain to the logs of log-transformed parameters. Similarly variances that appear in the parameter covariance matrix that is produced by PESTPP-GLM pertain to the logs of log-transformed parameters. -FOSM-based uncertainty analysis requires a Jacobian matrix. PESTPP-GLM uses the last base parameter Jacobian matrix that it has calculated (or read using the *base_jacobian()* control variable) as a basis for this analysis. It also requires that prior parameter uncertainties be provided. PESTPP-GLM offers a number of options for this. +FOSM-based uncertainty analysis requires a Jacobian matrix. PESTPP-GLM uses the last base parameter Jacobian matrix that it has calculated (or read using the *base\_jacobian()* control variable) as a basis for this analysis. It also requires that prior parameter uncertainties be provided. PESTPP-GLM offers a number of options for this. -Unless a *parcov()* control variable is provided in the PEST control file, PESTPP-GLM assumes that all adjustable parameters are statistically independent. In this case, by default, the prior standard deviation of each parameter is calculated as a quarter of the difference between its upper and lower bounds as provided in the PEST control file. However, the *par_sigma_range()* control variable (the default value for which is 4.0) can be used to specify that the difference between parameter bounds is equivalent to a different number of standard deviations from this. If a parameter is log-transformed, the prior standard deviation of its log is calculated from the difference between the logs of its bounds. +Unless a *parcov()* control variable is provided in the PEST control file, PESTPP-GLM assumes that all adjustable parameters are statistically independent. In this case, by default, the prior standard deviation of each parameter is calculated as a quarter of the difference between its upper and lower bounds as provided in the PEST control file. However, the *par\_sigma\_range()* control variable (the default value for which is 4.0) can be used to specify that the difference between parameter bounds is equivalent to a different number of standard deviations from this. If a parameter is log-transformed, the prior standard deviation of its log is calculated from the difference between the logs of its bounds. Alternatively, the name of a file can be supplied as the value of the *parcov()* control variable. If so, PESTPP-GLM reads prior parameter uncertainties from this file. Options for this file are as follows: @@ -1967,25 +2027,25 @@ Alternatively, the name of a file can be supplied as the value of the *parcov()* When asked to undertake FOSM analysis, PESTPP-GLM calculates a current-iteration posterior parameter covariance matrix. This is stored in a file named *case.N.post.cov* where *case* is the filename base of the PEST control file and *N* is the current iteration. As already stated, posterior variances and covariances pertaining to parameters that are log-transformed in the PEST control file, pertain to the logs of these respective parameters. Note that this is the same posterior covariance matrix as that calculated by the PEST PREDUNC7 utility, and by the PyEMU Schur object. -The PESTPP-GLM posterior parameter (and optional forecast) uncertainty analyses require an observation noise covariance matrix. As presently coded, this matrix is formed from the observation weights (e.g., it is diagonal). However, these weights assume that the final measurement objective function is equal to the number of non-zero weighted observations – this almost never happens, largely as a result of model error. This is a problem for posterior FOSM-based uncertainty analyses because the weights in the control imply a more complete transfer of information from observations to parameters than was actually achieved. To rectify this issue, PESTPP-GLM will scale the weights used for posterior FOSM analysis to account for the final residuals, adopting Morozov’s discrepancy principal. The scaled weights are written to separate residuals file for inspection named *case.fosm_reweight.rei.* +The PESTPP-GLM posterior parameter (and optional forecast) uncertainty analyses require an observation noise covariance matrix. As presently coded, this matrix is formed from the observation weights (e.g., it is diagonal). However, these weights assume that the final measurement objective function is equal to the number of non-zero weighted observations – this almost never happens, largely as a result of model error. This is a problem for posterior FOSM-based uncertainty analyses because the weights in the control imply a more complete transfer of information from observations to parameters than was actually achieved. To rectify this issue, PESTPP-GLM will scale the weights used for posterior FOSM analysis to account for the final residuals, adopting Morozov’s discrepancy principal. The scaled weights are written to separate residuals file for inspection named *case.fosm\_reweight.rei.* -PESTPP-GLM-calculated prior and posterior parameter uncertainties are recorded in the run record file. They are also recorded in a comma delimited file named *case.par.usum.csv* (“usum” stands for “uncertainty summary”). Prior and posterior means, standard deviations and bounds in the latter file pertain to the logs (to base 10) of parameters which are log-transformed in the PEST control file. These upper and lower posterior parameter bounds are calculated as the parameter’s current estimated or initial value (depending on whether or not inversion has been carried out) plus and minus *par_sigma_range* standard deviations. +PESTPP-GLM-calculated prior and posterior parameter uncertainties are recorded in the run record file. They are also recorded in a comma delimited file named *case.par.usum.csv* (“usum” stands for “uncertainty summary”). Prior and posterior means, standard deviations and bounds in the latter file pertain to the logs (to base 10) of parameters which are log-transformed in the PEST control file. These upper and lower posterior parameter bounds are calculated as the parameter’s current estimated or initial value (depending on whether or not inversion has been carried out) plus and minus *par\_sigma\_range* standard deviations. -If the glm\_*num_reals(N)* argument is supplied, then PESTPP-GLM will also generate *N* stochastic realizations from the FOSM-approximated posterior parameter covariance matrix (these realizations comprising a posterior parameter ensemble). It will then calculate model outputs using these realizations by running the model once for each of them (in parallel if PESTPP-GLM is being run in parallel mode). The parameter ensemble is saved to a CSV file named *case.N.paren.csv*, while the resulting model output ensemble is saved to a CSV file named *case.N.obsen.csv*. Alternatively, if the *save_binary()* variable is set to “true”, then the ensembles are saved in binary “enhanced Jacobian” (i.e., JCB) files named *case.N.paren.jcb* and *case.N.obsen.jcb*. This posterior Monte Carlo is undertaken at the completion of PESTPP-GLM iterations, and, also, if the *glm_iter_mc(true)* argument is set, then also, during each iteration­–if the posterior Monte Carlo is being undertaken during an iteration, the realizations are queued up and run with the lambda upgrade vectors. +If the glm\_*num\_reals(N)* argument is supplied, then PESTPP-GLM will also generate *N* stochastic realizations from the FOSM-approximated posterior parameter covariance matrix (these realizations comprising a posterior parameter ensemble). It will then calculate model outputs using these realizations by running the model once for each of them (in parallel if PESTPP-GLM is being run in parallel mode). The parameter ensemble is saved to a CSV file named *case.N.paren.csv*, while the resulting model output ensemble is saved to a CSV file named *case.N.obsen.csv*. Alternatively, if the *save\_binary()* variable is set to “true”, then the ensembles are saved in binary “enhanced Jacobian” (i.e., JCB) files named *case.N.paren.jcb* and *case.N.obsen.jcb*. This posterior Monte Carlo is undertaken at the completion of PESTPP-GLM iterations, and, also, if the *glm\_iter\_mc(true)* argument is set, then also, during each iteration­–if the posterior Monte Carlo is being undertaken during an iteration, the realizations are queued up and run with the lambda upgrade vectors. -Through the *glm_accept_mc_phi* argument, PESTPP-GLM will accept the lowest-phi FOSM-based parameter realization if the realization yields a lower phi than the lambda-testing lowest phi. In this way, PESTPP-GLM can be used as a hybrid global-local algorithm which combines efficient local search via subspace (and optionally super parameter) GLM solution process with the randomized search from the FOSM-based Monte Carlo realizations. Note the *glm_accept_mc_phi* option is only applicable to base parameter iterations, although FOSM analytical and Monte Carlo operations are still undertaken during super parameter iterations. +Through the *glm\_accept\_mc\_phi* argument, PESTPP-GLM will accept the lowest-phi FOSM-based parameter realization if the realization yields a lower phi than the lambda-testing lowest phi. In this way, PESTPP-GLM can be used as a hybrid global-local algorithm which combines efficient local search via subspace (and optionally super parameter) GLM solution process with the randomized search from the FOSM-based Monte Carlo realizations. Note the *glm\_accept\_mc\_phi* option is only applicable to base parameter iterations, although FOSM analytical and Monte Carlo operations are still undertaken during super parameter iterations. As well as calculating parameter uncertainties, PESTPP-GLM can also be asked to calculate the prior and posterior uncertainties of some predictions. This functionality is activated through use of the *forecasts()* control variable. The values which must be supplied for this variable are the names of predictions whose uncertainties are sought, or, optionally, the name of a file that stores multiple entries. For example, *forecasts(ar10,ar11)* requests that prior and predictive uncertainties be evaluated for model outputs named “ar10” and “ar11” in the PEST control file on which PESTPP-GLM’s operations are based. Despite the fact that these model outputs are predictions, they must be listed in the “observation data” section of the PEST control file; hence sensitivities of these model outputs to parameters are available as rows of the Jacobian matrix which is calculated by PESTPP-GLM. Model predictions should be endowed with weights of zero in a PEST control file; this is because predictions are not used to constrain parameters, and hence do not form part of a calibration dataset. (PESTPP-GLM issues a warning message if this is not the case.). If the *forecasts* argument is not supplied and the *uncertainty* flag is true, then PESTPP-GLM will treat all zero-weighted observations as forecasts. The uncertainties and lower/upper bounds of forecasts that are specified in this way are listed in the PESTPP-GLM run record file, and in a comma-delimited file named *case.N.pred.usum.csv*. Posterior predictive lower and upper bounds are calculated by subtracting and adding two standard deviations from/to the value of the prediction as calculated by the model using initial or estimated parameter values. -### 6.2.9 Model Run Failure +### 6.2.9 Model Run Failure The inversion process implemented by PESTPP-GLM is an iterative procedure. Each iteration is subdivided into two parts. Finite-difference derivatives are calculated in the first part of each iteration; parameter upgrades are calculated and tested in the second part. Model run failure is much more likely to occur during the second of these parts than in the first of these parts as parameter values may vary significantly from model run to model run in the latter case. Where an updated parameter set precipitates model run failure, PESTPP-GLM deems the objective function to be very high; the offending parameter set is therefore judged to be far from optimal. Model run failure during finite difference derivatives calculation is more worrisome, for a single parameter undergoes only an incremental change from its current value for each model run. A strategy for handling failed model runs under these circumstances is to prevent adjustment of that parameter for the remainder of the current iteration, effectively freezing it at its current value. On some occasions, this may be a shortsighted strategy, for if incremental variation of one parameter instigates model run failure, incremental variation of another parameter may do the same. An alternative strategy is to abandon the inversion process as the likelihood of further parameter improvement has been caste into doubt. -The *der_forgive()* control variable can be used to govern PESTPP’s behavior under these circumstances. It must be supplied as either *true* or *false*. If it is supplied as *true* (its default value) then model run failure when finite difference derivatives are being calculated is accommodated using the first of the above alternatives, that is through temporary freezing of the parameter at its current value. However, if it supplied as *false*, then model run failure during calculation of finite-difference derivatives precipitates cessation of PESTPP-GLM execution. +The *der\_forgive()* control variable can be used to govern PESTPP’s behavior under these circumstances. It must be supplied as either *true* or *false*. If it is supplied as *true* (its default value) then model run failure when finite difference derivatives are being calculated is accommodated using the first of the above alternatives, that is through temporary freezing of the parameter at its current value. However, if it supplied as *false*, then model run failure during calculation of finite-difference derivatives precipitates cessation of PESTPP-GLM execution. -### 6.2.10 Composite Parameter Sensitivities +### 6.2.10 Composite Parameter Sensitivities PESTPP-GLM records composite parameter sensitivities in a file named *case.sen* where *case* is the filename base of the PEST control file. These are recorded during each iteration of the inversion process. Two composite parameter sensitivities are recorded. The first is the *csp* statistic of Doherty (2015), calculated using the equation @@ -1993,17 +2053,17 @@ cspj= ((JTQJ)0.5)j/n (6.18) where J is the Jacobian matrix, Q is the weight matrix and *n* is the number of non-zero-weighted observations. PESTPP-GLM also records the composite scaled sensitivity of Hill and Tiedeman (2007) in this same file; see that text for details of its computation. Where regularization is employed in the inversion process, two sets of these two composite sensitivities are calculated. Regularization observations and prior information equations are included in one of them, while these are excluded from the other. Where they are included, the weights applied to regularization are multiplied by the current regularization weight factor. -### 6.2.11 Other Controls +### 6.2.11 Other Controls -If the control variable *iteration_summary()* is set to *true*, then PESTPP-GLM records (and continually updates) a comma-delimited file named *case.upg.csv* (where “upg” stands for “upgrade”). This file lists the values of parameters used for every model run in which parameter upgrades are tested. Iteration numbers, lambda values and fractional lengths along parameter upgrade vectors are also recorded in this file. PESTPP-GLM also records files named *case.ipar*, *case.iobj*, *case.isen* and *case.rid*. The first three of these are comma-delimited files; they list iteration-specific values of parameters, objective function components and composite parameter sensitivities respectively. *case.rid* links model runs undertaken for derivatives calculation to run identifiers in the parallel run management file; a user can thus be informed of the agent that undertook a particular finite-difference model run. +If the control variable *iteration\_summary()* is set to *true*, then PESTPP-GLM records (and continually updates) a comma-delimited file named *case.upg.csv* (where “upg” stands for “upgrade”). This file lists the values of parameters used for every model run in which parameter upgrades are tested. Iteration numbers, lambda values and fractional lengths along parameter upgrade vectors are also recorded in this file. PESTPP-GLM also records files named *case.ipar*, *case.iobj*, *case.isen* and *case.rid*. The first three of these are comma-delimited files; they list iteration-specific values of parameters, objective function components and composite parameter sensitivities respectively. *case.rid* links model runs undertaken for derivatives calculation to run identifiers in the parallel run management file; a user can thus be informed of the agent that undertook a particular finite-difference model run. -If the *jac_scale()* control variable is set to *true*, the equations that are used for calculating parameter upgrades are slightly modified from those presented in section 6.2.1. Prior to estimation, parameters are scaled by their sensitivities. Estimated scaled parameters then undergo post-estimation back-transformation; see equation 5.4.6 of Doherty (2015). This strategy can reduce numerical errors in some instances; however, it can also increase computation times. The default value for *jac_scale()* is *true*. +If the *jac\_scale()* control variable is set to *true*, the equations that are used for calculating parameter upgrades are slightly modified from those presented in section 6.2.1. Prior to estimation, parameters are scaled by their sensitivities. Estimated scaled parameters then undergo post-estimation back-transformation; see equation 5.4.6 of Doherty (2015). This strategy can reduce numerical errors in some instances; however, it can also increase computation times. The default value for *jac\_scale()* is *true*. -### 6.2.12 Running PESTPP-GLM +### 6.2.12 Running PESTPP-GLM See section 5 of this manual for how to run PESTPP-GLM. As is described in that section, model runs can be undertaken in series or in parallel. In either case, a prematurely terminated PESTPP-GLM run can be restarted by commencing PESTPP-GLM execution using the “/r” command line switch. -### 6.2.13 PESTPP-GLM Output Files +### 6.2.13 PESTPP-GLM Output Files The following table summarizes the contents of files that are recorded by PESTPP-GLM when it is asked to undertake highly-parameterized inversion. Most of these have been discussed above. It is assumed that the PEST control file on which the inversion process is based is named *case.pst*. @@ -2019,10 +2079,10 @@ The following table summarizes the contents of files that are recorded by PESTPP | *case.reiN* | An intermediate residuals file. This file lists model outputs, observations, residuals and related data calculated using best parameters achieved during iteration *N* of the inversion process. The latter are listed in file *case.parN*. | | *case.rei* | The latest incidence of *case.reiN*. | | *case.sen* | Parameter sensitivity file. This file contains composite parameter sensitivities; it is updated during every iteration of the inversion process. | -| *case.ipar* | This file is recorded unless the *iteration_summary()* control variable is set to *false*. It is a CSV file listing best parameters attained at all iterations of the inversion process. | -| *case.iobj* | This file is recorded unless the *iteration_summary()* control variable is set to *false*. It is a CSV file listing objective function components attained at the end of all iterations of the inversion process. | -| *case.isen* | This file is recorded unless the *iteration_summary()* control variable is set to *false*. It is a CSV file containing composite parameter sensitivities calculated during all iterations of the inversion process. | -| *case.upg.csv* | This file is recorded unless the *iteration_summary()* control variable is set to *false*. It is a CSV file containing upgraded parameter values calculated for each Marquardt lambda and each scale factor employed for all iterations of the inversion process. | +| *case.ipar* | This file is recorded unless the *iteration\_summary()* control variable is set to *false*. It is a CSV file listing best parameters attained at all iterations of the inversion process. | +| *case.iobj* | This file is recorded unless the *iteration\_summary()* control variable is set to *false*. It is a CSV file listing objective function components attained at the end of all iterations of the inversion process. | +| *case.isen* | This file is recorded unless the *iteration\_summary()* control variable is set to *false*. It is a CSV file containing composite parameter sensitivities calculated during all iterations of the inversion process. | +| *case.upg.csv* | This file is recorded unless the *iteration\_summary()* control variable is set to *false*. It is a CSV file containing upgraded parameter values calculated for each Marquardt lambda and each scale factor employed for all iterations of the inversion process. | | *case.rid* | Links model runs undertaken for calculating finite-difference derivatives with respect to each parameter to model run indices recorded in file *case.rmr*. | | *case.svd* | Information is appended to this file every time a parameter upgrade direction is calculated using a new Marquardt lambda. It lists the singular values of the (JtQJ + λI) matrix. | | *case.log* | PESTPP-GLM performance record. This file records the times at which PESTPP-GLM commenced and completed various processing tasks. | @@ -2036,17 +2096,17 @@ The following table summarizes the contents of files that are recorded by PESTPP | *case.N.post.obsen.csv* | (Optional) posterior model output ensemble CSV file. | | *case.N.post.paren.jcb* | (Optional) posterior parameter ensemble CSV file. | | *case.N.post.obsen.jcb* | (Optional) posterior model output ensemble CSV file. | -| *caseN.fosm_reweight.rei* | Scaled final weights used in FOSM calculations. | +| *caseN.fosm\_reweight.rei* | Scaled final weights used in FOSM calculations. | | *case.rst* | A binary file containing restart information. | | *case.rns, case.rnj, case.rnu* | Binary files used by the run manager. | Table 6.1. Files recorded by PESTPP-GLM. -### 6.3.4 Running PESTPP +### 6.3.4 Running PESTPP See chapter 5 of this manual for how to run PESTPP-GLM, with model runs undertaken in serial and with model runs undertaken in parallel. At the time of writing, a prematurely terminated PESTPP-GLM run cannot be restarted when implementing differential evolution. If started using the “/r” switch, it re-commences the DE process. -### 6.3.5 PESTPP-GLM Output Files +### 6.3.5 PESTPP-GLM Output Files When run in order to implement differential evolution optimization, a number of the output files recorded by PESTPP-GLM are actually empty as they pertain to gradient-based inversion. Other output files pertain to run management; these are discussed in section 6.2.13. The only output files that are relevant to DE-based optimization are those listed in the following table. In this table it is assumed that the PEST control file on which DE optimization is based is named *case.pst*. @@ -2058,13 +2118,14 @@ When run in order to implement differential evolution optimization, a number of Table 6.2 PESTPP-GLM output files that are pertinent to DE optimization. It is assumed that the name of the PEST control file is *case.pst*. -## 6.4 Summary of PESTPP-GLM Control Variables +## 6.4 Summary of PESTPP-GLM Control Variables -### 6.4.1 General + +### 6.4.1 General This section summarizes variables that control the operation of PESTPP-GLM. First those that feature in the PEST control file are discussed; see chapter 4 of this manual for a full description of the functions that they perform. The roles of PEST++ variables which control the operation of PESTPP-GLM are listed in table 6.3. -### 6.4.2 Control Variables in the PEST Control File +### 6.4.2 Control Variables in the PEST Control File The PESTMODE variable determines whether PESTPP-GLM runs in “regularization” or “estimation” modes. @@ -2072,7 +2133,7 @@ When PESTPP-GLM undertakes gradient based inversion, the NOPTMAX, PHIREDSTP, NPH If the PEST control file on which the inversion process is based contains a “singular value decomposition” section, then the variables NUMSING and EIGTHRESH that appear in this section determine the singular value truncation point. If no “singular value decomposition” section is present in the PEST control file, then the default value for NUMSING is the number of adjustable parameters featured in the PEST control file; the default value for EIGTHRESH is 1.0E-7. -### 6.4.3 PEST++ Control Variables +### 6.4.3 PEST++ Control Variables Table 6.3 lists PEST++ control variables. All of these are optional. If a variable is not supplied, a default value is employed. The value of the default is presented along with the name of each variable in the table below. Variables are grouped in approximate accordance with their roles. @@ -2080,42 +2141,44 @@ Variables discussed in section 5.3.6 that control parallel run management are no Note also that the number of control variables may change with time. Refer to the PEST++ web site for variables used by the latest version of PESTPP-GLM. -| Variable | Type | Role | -|---------------------------------|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *max_n\_super(100000)* | integer | The maximum number of super parameters to use when conducting SVD-assisted inversion. The default is the number of adjustable parameters, in which case the number of super parameters is effectively set by *super_eigthresh()*. | -| *super_eigthresh(1.0E-6)* | real | The ratio to maximum singular value of JtQJ at which truncation takes place to form super parameters. Note, however, that if the number of super parameters calculated in this way exceeds *max_n\_super()* then the value of the latter variable takes precedence. | -| *n_iter_base(100000)* | integer | Where super parameters are estimated in some iterations and base parameters are estimated in other iterations, this variable sets the number of sequential base parameter iterations to undertake before commencing an iteration in which super parameters are adjusted. If *n_iter_base()* is set to -1, this instructs PESTPP-GLM to emulate PEST behaviour; a base parameter Jacobian matrix is calculated; then super parameters are estimated as soon as they are defined on the basis of this matrix. Super parameters are estimated in all succeeding iterations. | -| *n_iter_super(0)* | integer | Where super parameters are estimated in some iterations and base parameters are estimated in other iterations, this variable sets the number of sequential super parameter iterations to undertake before commencing an iteration in which a base parameter Jacobian matrix is recalculated and base parameters are adjusted. | -| *jac_scale(true)* | Boolean | Scale parameters by their sensitivities when calculating parameter upgrades. This can increase numerical precision; however, it may incur a numerical cost. | -| *svd_pack(redsvd)* | text | This informs PESTPP-GLM of the package that it must employ to undertake singular value decomposition of the JtQJ matrix (appropriately modified to include the Marquardt lambda and regularization). Options are “eigen” and “redsvd”. | -| *lambdas(0.1,1,10,100,1000)* | set of real numbers | Values for the Marquardt lambda used in calculation of parameter upgrades. Note that this base list is augmented with values bracketing the previous iteration’s best lambda. However, if a single value is specified, only that lambda (and no other lambda) is used in all iterations. | -| *lambda_scale_fac(.75,1.0,1.1)* | set of real numbers | These values are used to scale each parameter upgrade vector calculated using different values of lambda. This results in a line search along each upgrade vector direction. The number of tested parameter upgrades (and hence model runs) is equal to the number of lambdas times the number of scaling factors. Set *lambda_scale_fac()* to 1.0 to disable an upgrade direction line search. | -| *base_jacobian()* | text | Provide the name of a JCO file. The Jacobian matrix contained in this file will be used for the first iteration of the inversion process. | -| *hotstart_resfile()* | text | Specify the name of a residuals file from a previous PESTPP-GLM run. PESTPP-GLM will assume that these are model outputs corresponding to initial parameter values. It will use these instead of undertaking the initial model run. | -| *uncertainty(true)* | Boolean | Flag to active or deactivate FOSM-based parameter and (optionally) forecast uncertainty estimation. | -| *parcov()* | text | Provide the name of a JCO, JCB, UNC or COV file from which the prior parameter covariance matrix used in FOSM analysis is read. | -| *par_sigma_range(4.0)* | real | The difference between a parameter’s upper and lower bounds expressed as standard deviations. | -| *forecasts()* | series of text strings | Provide the names of one or more observations featured in the “observation data” section of the PEST control file; these are treated as predictions in FOSM predictive uncertainty analysis. | -| *glm_num_reals(100)* | integer | Number of parameter realizations to draw from the posterior parameter distribution (using final, estimated parameter values as the parameter mean vector, and the FOSM-based posterior covariance matrix). Following generation of the realizations, the model is run once for each realization. The resulting observation ensemble is saved in a CSV file named *case.obs.csv*; the parameter ensemble is saved in a CSV file named *case.par.csv.* | -| *save_binary(false)* | Boolean | A flag to save parameter and observation ensembles in binary format. If this is set to *true*, parameter and observation ensembles are saved in files named *case.par.jcb* and *case.obs.jcb*. | -| *tie_by_group(false)* | Boolean | Flag to tie all adjustable parameters by group designation; however, all user-supplied parameter tied-parent relationships are preserved. The effective number of adjustable parameters thus becomes the number of parameter groups (which contain at least one adjustable parameter) plus the number of parameters that are listed as having others tied to it. | -| *iteration_summary(true)* | Boolean | This flag activates or deactivates the writing of CSV files summarizing parameters (*case.ipar*), objective functions (*case.iobj*), sensitivities (*case.isen*), trial parameter upgrades (*case.upg.csv*) and parameter-to-run-id mapping (*case.rid*). | -| *der_forgive(true)* | Boolean | If set to *true*, then if model run failure occurs when calculating finite-difference derivatives with respect to a certain parameter, that parameter is frozen at its current value for the remainder of the iteration. If set to *false*, PESTPP-GLM terminates execution with an appropriate message if this occurs. | -| *Enforce_tied_bounds(false)* | Boolean | Flag to enforce parameter bounds on any tied parameters | -| *glm_accept_mc_phi(false)* | Boolean | Flag to accept FOSM-based realization phi each base iteration if the phi is lower than the lambda-testing phi. Default is false. | -| *rand_seed(358183147)* | unsigned integer | Seed for the random number generator. Used for FOSM-basd Monte Carlo | -| *glm_rebase_super(true)* | boolean | A flag to run the super-parameter truncated values once at the start of the first super parameter iteration to provide a more accurate “base” run for calculating sensitivity numerators. Only applies if *n_iter_base* = -1 and *base_jacobian* is supplied. Default is False, which indicates use either the *hotstart_resfile* residuals or use the base run previously completed | -| *glm_iter_mc(false)* | Boolean | Flag to undertake FOSM-based posterior Monte Carlo during each iteration of PESTPP-GLM. Default is False, which will result in Monte Carlo only after iterations are done (depending on the *glm_num_reals* and *uncertainty* flags) | -| *ensemble_output_precision* | int | Number of significant digits to use in ASCII format ensemble files. Default is 6 | -| *glm_norm_form(diag)* | string | The form of the normal matrix to use. Can be “ident” (identity matrix lambda scaling), “diag” (use the diagonal of XtQX for lambda scaling) or “prior” (scale with the inverse of the prior parameter covariance matrix. Default is diag.s | +| Variable | Type | Role | +|-----------------------------------|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *max\_n\_super(100000)* | integer | The maximum number of super parameters to use when conducting SVD-assisted inversion. The default is the number of adjustable parameters, in which case the number of super parameters is effectively set by *super\_eigthresh()*. | +| *super\_eigthresh(1.0E-6)* | real | The ratio to maximum singular value of JtQJ at which truncation takes place to form super parameters. Note, however, that if the number of super parameters calculated in this way exceeds *max\_n\_super()* then the value of the latter variable takes precedence. | +| *n\_iter\_base(100000)* | integer | Where super parameters are estimated in some iterations and base parameters are estimated in other iterations, this variable sets the number of sequential base parameter iterations to undertake before commencing an iteration in which super parameters are adjusted. If *n\_iter\_base()* is set to -1, this instructs PESTPP-GLM to emulate PEST behaviour; a base parameter Jacobian matrix is calculated; then super parameters are estimated as soon as they are defined on the basis of this matrix. Super parameters are estimated in all succeeding iterations. | +| *n\_iter\_super(0)* | integer | Where super parameters are estimated in some iterations and base parameters are estimated in other iterations, this variable sets the number of sequential super parameter iterations to undertake before commencing an iteration in which a base parameter Jacobian matrix is recalculated and base parameters are adjusted. | +| *jac\_scale(true)* | Boolean | Scale parameters by their sensitivities when calculating parameter upgrades. This can increase numerical precision; however, it may incur a numerical cost. | +| *svd\_pack(redsvd)* | text | This informs PESTPP-GLM of the package that it must employ to undertake singular value decomposition of the JtQJ matrix (appropriately modified to include the Marquardt lambda and regularization). Options are “eigen” and “redsvd”. | +| *lambdas(0.1,1,10,100,1000)* | set of real numbers | Values for the Marquardt lambda used in calculation of parameter upgrades. Note that this base list is augmented with values bracketing the previous iteration’s best lambda. However, if a single value is specified, only that lambda (and no other lambda) is used in all iterations. | +| *lambda\_scale\_fac(.75,1.0,1.1)* | set of real numbers | These values are used to scale each parameter upgrade vector calculated using different values of lambda. This results in a line search along each upgrade vector direction. The number of tested parameter upgrades (and hence model runs) is equal to the number of lambdas times the number of scaling factors. Set *lambda\_scale\_fac()* to 1.0 to disable an upgrade direction line search. | +| *base\_jacobian()* | text | Provide the name of a JCO file. The Jacobian matrix contained in this file will be used for the first iteration of the inversion process. | +| *hotstart\_resfile()* | text | Specify the name of a residuals file from a previous PESTPP-GLM run. PESTPP-GLM will assume that these are model outputs corresponding to initial parameter values. It will use these instead of undertaking the initial model run. | +| *uncertainty(true)* | Boolean | Flag to active or deactivate FOSM-based parameter and (optionally) forecast uncertainty estimation. | +| *parcov()* | text | Provide the name of a JCO, JCB, UNC or COV file from which the prior parameter covariance matrix used in FOSM analysis is read. | +| *par\_sigma\_range(4.0)* | real | The difference between a parameter’s upper and lower bounds expressed as standard deviations. | +| *forecasts()* | series of text strings | Provide the names of one or more observations featured in the “observation data” section of the PEST control file; these are treated as predictions in FOSM predictive uncertainty analysis. | +| *glm\_num\_reals(100)* | integer | Number of parameter realizations to draw from the posterior parameter distribution (using final, estimated parameter values as the parameter mean vector, and the FOSM-based posterior covariance matrix). Following generation of the realizations, the model is run once for each realization. The resulting observation ensemble is saved in a CSV file named *case.obs.csv*; the parameter ensemble is saved in a CSV file named *case.par.csv.* | +| *save\_binary(false)* | Boolean | A flag to save parameter and observation ensembles in binary format. If this is set to *true*, parameter and observation ensembles are saved in files named *case.par.jcb* and *case.obs.jcb*. | +| *tie\_by\_group(false)* | Boolean | Flag to tie all adjustable parameters by group designation; however, all user-supplied parameter tied-parent relationships are preserved. The effective number of adjustable parameters thus becomes the number of parameter groups (which contain at least one adjustable parameter) plus the number of parameters that are listed as having others tied to it. | +| *iteration\_summary(true)* | Boolean | This flag activates or deactivates the writing of CSV files summarizing parameters (*case.ipar*), objective functions (*case.iobj*), sensitivities (*case.isen*), trial parameter upgrades (*case.upg.csv*) and parameter-to-run-id mapping (*case.rid*). | +| *der\_forgive(true)* | Boolean | If set to *true*, then if model run failure occurs when calculating finite-difference derivatives with respect to a certain parameter, that parameter is frozen at its current value for the remainder of the iteration. If set to *false*, PESTPP-GLM terminates execution with an appropriate message if this occurs. | +| *Enforce\_tied\_bounds(false)* | Boolean | Flag to enforce parameter bounds on any tied parameters | +| *glm\_accept\_mc\_phi(false)* | Boolean | Flag to accept FOSM-based realization phi each base iteration if the phi is lower than the lambda-testing phi. Default is false. | +| *rand\_seed(358183147)* | unsigned integer | Seed for the random number generator. Used for FOSM-basd Monte Carlo | +| *glm\_rebase\_super(true)* | boolean | A flag to run the super-parameter truncated values once at the start of the first super parameter iteration to provide a more accurate “base” run for calculating sensitivity numerators. Only applies if *n\_iter\_base* = -1 and *base\_jacobian* is supplied. Default is False, which indicates use either the *hotstart\_resfile* residuals or use the base run previously completed | +| *glm\_iter\_mc(false)* | Boolean | Flag to undertake FOSM-based posterior Monte Carlo during each iteration of PESTPP-GLM. Default is False, which will result in Monte Carlo only after iterations are done (depending on the *glm\_num\_reals* and *uncertainty* flags) | +| *ensemble\_output\_precision* | int | Number of significant digits to use in ASCII format ensemble files. Default is 6 | +| *glm\_norm\_form(diag)* | string | The form of the normal matrix to use. Can be “ident” (identity matrix lambda scaling), “diag” (use the diagonal of XtQX for lambda scaling) or “prior” (scale with the inverse of the prior parameter covariance matrix. Default is diag.s | Table 6.3 PESTPP-GLM control variables. Variables which control parallel run management can be supplied in addition to these. See section 5.3.6. -# 7. PESTPP-SEN +# 7. PESTPP-SEN + -## 7.1 Introduction +## 7.1 Introduction -### 7.1.1 General + +### 7.1.1 General The purpose of global sensitivity analysis (GSA) is to characterize how model parameters affect model outputs (or a function of model outputs such as an objective function) over a wide range of acceptable parameter values. In doing this, it strives for greater robustness, and for the provision of more information, than local sensitivity analysis based on partial derivatives of model outputs with respect to model parameters. Because local sensitivity analysis pertains to a single point in parameter space, the information that it yields is often insufficient to support an understanding of the behavior of nonlinear models whose outputs depend on combinations of model parameters in complicated and parameter-value-dependent ways. @@ -2137,15 +2200,16 @@ PESTPP-SEN currently supports two GSA meth­ods. These are The Method of Morris is a “one-at-a-time” method (Saltelli et al, 2004). It is computationally efficient and is therefore suitable for use with models whose run times are high. It provides estimates of the first two moments (mean and variance) of the effect that each parameter has on a model output of interest. These statistics acknowledge that a parameter’s sensitivity may be a function not just of its own value, but of the values of other parameters. In doing so, they reveal those parameters that have the most influence on model outputs of interest, and the consistency of these influences over parameter space. The information that it provides may justify the omission of some parameters from a calibration exercise; and/or it may support the design of a simple, fast-running, surrogate model. In contrast, the Method of Sobol has the potential to provide much more detailed information than the Method of Morris. Because it is based on decomposition of variance (Saltelli et al, 2004), it can reveal details of parameter nonlinearity that are beyond the reach of other methods. It can also reveal complex parameter interactions and, by inference, interaction of the processes to which these parameters pertain. Unfortunately, this information comes with a high computational cost. Hence unless Sobol-based global sensitivity analysis is restricted to only a few parameters and a relatively fast-running model, it is generally computationally unaffordable. -### 7.1.2 Grouped Parameters +### 7.1.2 Grouped Parameters There are many occasions where a modeler wishes to explore sensitivities of one or more model outputs to groups of parameters rather than to a single parameter. For example, in the groundwater modeling context, a modeler may wish to explore the sensitivity of a particular model output, or of the calibration objective function, to all of the pilot point parameters which collectively describe the vertical hydraulic conductivity of an aquitard. Analysis of the sensitivity of a model output to a group of parameters is easily accomplished by tying all but one of the members of that group to the remaining parameter; the latter then represents the group. In the above example, a single pilot point parameter would be selected as parent to all other pilot point parameters that collectively represent the vertical hydraulic conductivity of the aquitard. Where the sensitivity of the parent parameter is assessed using a global sensitivity analysis methodology, the joint sensitivity of it, and all of the parameters that are tied to it, are thereby assessed. In implementing this strategy, a modeler must ensure that the values assigned to tied parameters in relation to the parent parameter are realistic, because the ratio of these values will be preserved during the entire sensitivity analysis process. -Note that all members of the PEST++ suite (including PESTPP-SEN) implement an alternative means by which all members of a group of parameters can be tied together as a single parameter. Use of the *tie_by_group()* control variable dispenses with the need to specify individual parameter linkages in the manner described above. It is important to note that tying parameters together can change the effect upper and lower bounds of the adjustable parameters – this is necessary to keep the tied parameters within their bounds. The effective bounds for each adjustable parameter are calculated at the start of PESTPP-SEN using the distance that each parameter is from its bounds; this information is reported to the run record file. +Note that all members of the PEST++ suite (including PESTPP-SEN) implement an alternative means by which all members of a group of parameters can be tied together as a single parameter. Use of the *tie\_by\_group()* control variable dispenses with the need to specify individual parameter linkages in the manner described above. It is important to note that tying parameters together can change the effect upper and lower bounds of the adjustable parameters – this is necessary to keep the tied parameters within their bounds. The effective bounds for each adjustable parameter are calculated at the start of PESTPP-SEN using the distance that each parameter is from its bounds; this information is reported to the run record file. + +## 7.2 Method of Morris -## 7.2 Method of Morris -### 7.2.1 Elementary Effects +### 7.2.1 Elementary Effects The Method of Morris focuses on quantities called “elementary effects”. In describing this methodology, notation used by Morris (1991) and by Saltelli (2008) is adopted. The latter text explains the method particularly well (see chapter 3 of that text); hence the description provided herein is brief. @@ -2167,48 +2231,49 @@ The elementary effect of each parameter has a probability distribution. Using st For each adjustable parameter featured in the “parameter data” section of a PEST control file, PESTPP-SEN computes all of *μ*, *μ*\* and *σ* for the objective function (calculated using equation 3.3) and, optionally, for each model output (i.e., observation) featured in the “observation data” section of the PEST control file. -### 7.2.2 Sampling Scheme +### 7.2.2 Sampling Scheme While the Method of Morris does not have the same theoretical foundations as variance-based sensitivity analysis, its major attraction is that it can provide reasonably robust indications of parameter influence, and parameter influence variability, in a relatively few number of model runs. Its efficiency is an outcome of the way in which it chooses parameters on which to base these model runs. By varying a single, randomly-chosen parameter at a time by the amount Δ described above, the cost of each new addition to the pool through which *μ*, *μ*\* and *σ* are calculated for each parameter is only one model run. At the same time, the sequencing of model runs makes the effects of model nonlinearity and parameter interactions visible in these statistics. Model runs are performed in sequences of *m* runs (where *m* is the number of parameters used by the model), in which each parameter is varied in random order while all other parameters retain their values from the previous model run. At the end of this sequence a revised estimate of statistics pertaining to all EE*i*’s is available. A new model run sequence is then initiated, starting at another random point within the gridded parameter domain. See Morris (1991) and chapter 3 of Saltelli (2008) for full details. The number of model run sequences (denoted as *r* by Morris, 1991) is provided by the user. For a highly nonlinear model, a greater value of *r* leads to more robust estimates of *μ*, *μ*\* and *σ*. -### 7.2.3 Control Variables +### 7.2.3 Control Variables In common with other programs comprising the PEST++ suite, PESTPP-SEN obtains case-defining information from a PEST control file. PEST++ control variables which govern the operation of the Method of Morris can be placed in this file on lines that begin with the “++” character string. PEST++ variables which control the operation of the Method of Morris are listed in the following table. Default values are provided with each variable in this table. Note that if no GSA-pertinent variables are provided in a PEST control file used by PESTPP-SEN, it implements the Method of Morris. -| Control variable | Type | Role | -|------------------------------|------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *gsa_method(morris)* | text | Methods are “morris” and “sobol”. | -| *rand_seed(358183147)* | unsigned integer | Seed for the random number generator. | -| *gsa_morris_r(4)* | integer | Sample size. The number of times that an elementary effect is computed for each parameter. That is, the number of sequences of *m* model runs undertaken by PESTPP-SEN, where *m* is the number of adjustable parameters featured in the PEST control file. | -| *gsa_morris_p(4)* | integer | The number of levels employed to grid the interval \[0,1\] associated with each transformed parameter. The number of intervals into which \[0, 1\] is therefore subdivided is *p*-1. | -| *gsa_morris_delta()* | real | The default value for *morris_delta()* is *p*/2\[(*p*-1)\]. The value supplied for this variable must be a multiple of 1/2\[(*p*-1)\]. No check is made to assure this is the case so users must take if specifying this argument | -| *gsa_morris_obs_sen(true)* | Boolean | If supplied as *false*, PESTPP-SEN computes parameter sensitivities for the objective function only. If supplied as *true*, PESTPP-SEN computes parameter sensitivities for the objective function, as well as for each model output corresponding to observations featured in the “observation data” section of the PEST control file. | -| *tie_by_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | -| *enforce_tied_bounds(false)* | Boolean | Flag to enforce parameter bounds on any tied parameters | +| Control variable | Type | Role | +|--------------------------------|------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *gsa\_method(morris)* | text | Methods are “morris” and “sobol”. | +| *rand\_seed(358183147)* | unsigned integer | Seed for the random number generator. | +| *gsa\_morris\_r(4)* | integer | Sample size. The number of times that an elementary effect is computed for each parameter. That is, the number of sequences of *m* model runs undertaken by PESTPP-SEN, where *m* is the number of adjustable parameters featured in the PEST control file. | +| *gsa\_morris\_p(4)* | integer | The number of levels employed to grid the interval \[0,1\] associated with each transformed parameter. The number of intervals into which \[0, 1\] is therefore subdivided is *p*-1. | +| *gsa\_morris\_delta()* | real | The default value for *morris\_delta()* is *p*/2\[(*p*-1)\]. The value supplied for this variable must be a multiple of 1/2\[(*p*-1)\]. No check is made to assure this is the case so users must take if specifying this argument | +| *gsa\_morris\_obs\_sen(true)* | Boolean | If supplied as *false*, PESTPP-SEN computes parameter sensitivities for the objective function only. If supplied as *true*, PESTPP-SEN computes parameter sensitivities for the objective function, as well as for each model output corresponding to observations featured in the “observation data” section of the PEST control file. | +| *tie\_by\_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | +| *enforce\_tied\_bounds(false)* | Boolean | Flag to enforce parameter bounds on any tied parameters | Table 7.1 Variables used by PESTPP-SEN to control the operation of the Method of Morris. Saltelli et al. (2004) suggest the following values for Method of Morris control variables: -- 4 for *morris_p()* +- 4 for *morris\_p()* -- 0.667 for *morris_delta()* +- 0.667 for *morris\_delta()* -- 4 to 10 for *morris_r()* +- 4 to 10 for *morris\_r()* -## 7.3 Method of Sobol +## 7.3 Method of Sobol -### 7.3.1 Sensitivity Indices + +### 7.3.1 Sensitivity Indices The Method of Sobol is based on the decomposition of variance. It employs theory derived by Sobol (2001) through which it can be shown that any function of an arbitrary number of parameters can be decomposed into summed functions of individual parameters, pairs of parameters, triplets of parameters, etc. From this it follows that the variance of any model output (or function of a model output) can be expressed as separate variances arising from individual parameters, pairs of parameters, triplets of parameters, etc. By discovering and separating these variances, the importance of each parameter to the model output can be revealed. So too can the extent to which the influence of any particular parameter on a model’s output results from its interaction with other parameters rather than being a direct outcome of its own influence. See Sobol (2001) and Homma and Saltelli (1996) for further details. See Saltelli et al. (2004, 2008) for a comprehensive and easily understood discussion of the method, and for further details of concepts that are presented in summary form below. The total variance *VT*(*y*) of the output *y* of a model with *m* parameters can be decomposed as follows: -VT = VT(y) = sumi(Vi) + sumi(sumj>iVij) + sumi(sumj>1(sumk>jVijk)) ….. + Vi,2…m (7.3) +VT = VT(y) = sumi(Vi) + sumi(sumj>iVij) + sumi(sumj>1(sumk>jVijk)) ….. + Vi,2…m (7.3) where @@ -2244,51 +2309,54 @@ where the symbol x\~i signifies all parameters but *xi* be Notwithstanding its ability to provide a comprehensive characterization of the relationship between a model output and all parameters employed by a model, use of Sobol’s method is compromised by its high model run requirements. Hence the Method of Morris is normally a more practical alternative unless model run times are minimal. -### 7.3.2 Control Variables +### 7.3.2 Control Variables PEST++ variables which control the operation of the Method of Sobol are listed in the following table. As for variables which control PESTPP-SEN’s implementation of the Method of Morris, these must be provided on “++” lines within a PEST control file. -| Control variable | Type | Role | -|------------------------|------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *gsa_method(sobol)* | text | Select “sobol” to implement Method of Sobol. | -| *rand_seed(358183147)* | unsigned integer | Seed for the random number generator. | -| *gsa_sobol_samples()* | integer | The number of samples to use in computing variances. The number of model runs is actually twice this number because of the need to employ two series of parameter samples. See Saltelli et al (2008) for details. | -| *gsa_sobol_par_dist()* | text | Specifies whether parameter samples should be drawn from a uniform or normal distribution. Values are “unif” or “norm” respectively. In the latter case, samples are centered on parameter values provided in the PEST control file, while the standard deviation is a quarter of the difference between a parameter’s upper and lower bounds. Log-uniform and log-normal distributions are employed for parameters which are denoted as log-transformed in the PEST control file. | -| *tie_by_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | +| Control variable | Type | Role | +|---------------------------|------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *gsa\_method(sobol)* | text | Select “sobol” to implement Method of Sobol. | +| *rand\_seed(358183147)* | unsigned integer | Seed for the random number generator. | +| *gsa\_sobol\_samples()* | integer | The number of samples to use in computing variances. The number of model runs is actually twice this number because of the need to employ two series of parameter samples. See Saltelli et al (2008) for details. | +| *gsa\_sobol\_par\_dist()* | text | Specifies whether parameter samples should be drawn from a uniform or normal distribution. Values are “unif” or “norm” respectively. In the latter case, samples are centered on parameter values provided in the PEST control file, while the standard deviation is a quarter of the difference between a parameter’s upper and lower bounds. Log-uniform and log-normal distributions are employed for parameters which are denoted as log-transformed in the PEST control file. | +| *tie\_by\_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | Table 7.2 Variables used by PESTPP-SEN to control the operation of the Method of Sobol. -## 7.4 PESTPP-SEN Output Files +## 7.4 PESTPP-SEN Output Files + PESTPP-SEN writes the following output files. It is assumed that the filename base of the PEST control file on which global sensitivity analysis is based is named *case.pst*. -| File | Contents | -|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *case.msn* | Method of Morris only. Lists method of Morris outputs (*μ*, *μ*\* and *σ*) for each adjustable parameter. The model-generated quantity for which these provide sensitivity measures is the objective function. This is calculated using equation 3.3. | -| *case.group.msn* | Method of Morris only. Lists method of Morris outputs (*μ*, *μ*\* and *σ*) for each adjustable parameter to each observation group’s objective function contribution. . | -| *case.mio* | Method of Morris only. This CSV-format file is recorded if the *morris_obs_sen()* control variable is set to *true* (the default). It records *μ*, *μ*\* and *σ* for all model outputs (i.e., observations) featured in the “observation data” section of the PEST control file. | -| *case.raw.csv* | Method of Morris only. Lists parameter values and objective functions for all model runs. | -| *case.group.raw.csv* | Method of Morris only. Lists parameter values and observation group objective function contributions for all model runs. | -| *case.sobol.si,csv* | Sobol only. Listing of first-order sensitivity indices for each observation-parameter pair | -| *case.sobol.sti.csv* | Sobol only. Listing of total sensitivity indices for each observation-parameter pair | -| *Case.sobol.obs.csv* | Sobol only. Listing of all observations from running the model through the sobol sequence | -| *Case.sobol.par.csv* | Sobol only. Listing of all parameter sets used to run the sobol sequence | -| *case.sbl* | Method of Sobol only. Lists first order and total sensitivity indices for all model outputs listed in the “observation data” section of the PEST control file with respect to all adjustable parameters listed in the “parameter data” section of the PEST control file. | -| *case.rmr* | Parallel run management record. This file is written if model runs are conducted in parallel. | -| *Case.sen.par.csv* | Morris only. Lists the parameter sets used to run the model. | -| *case.rns* | Binary file used for model run management. | +| File | Contents | +|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *case.msn* | Method of Morris only. Lists method of Morris outputs (*μ*, *μ*\* and *σ*) for each adjustable parameter. The model-generated quantity for which these provide sensitivity measures is the objective function. This is calculated using equation 3.3. | +| *case.group.msn* | Method of Morris only. Lists method of Morris outputs (*μ*, *μ*\* and *σ*) for each adjustable parameter to each observation group’s objective function contribution. . | +| *case.mio* | Method of Morris only. This CSV-format file is recorded if the *morris\_obs\_sen()* control variable is set to *true* (the default). It records *μ*, *μ*\* and *σ* for all model outputs (i.e., observations) featured in the “observation data” section of the PEST control file. | +| *case.raw.csv* | Method of Morris only. Lists parameter values and objective functions for all model runs. | +| *case.group.raw.csv* | Method of Morris only. Lists parameter values and observation group objective function contributions for all model runs. | +| *case.sobol.si,csv* | Sobol only. Listing of first-order sensitivity indices for each observation-parameter pair | +| *case.sobol.sti.csv* | Sobol only. Listing of total sensitivity indices for each observation-parameter pair | +| *Case.sobol.obs.csv* | Sobol only. Listing of all observations from running the model through the sobol sequence | +| *Case.sobol.par.csv* | Sobol only. Listing of all parameter sets used to run the sobol sequence | +| *case.sbl* | Method of Sobol only. Lists first order and total sensitivity indices for all model outputs listed in the “observation data” section of the PEST control file with respect to all adjustable parameters listed in the “parameter data” section of the PEST control file. | +| *case.rmr* | Parallel run management record. This file is written if model runs are conducted in parallel. | +| *Case.sen.par.csv* | Morris only. Lists the parameter sets used to run the model. | +| *case.rns* | Binary file used for model run management. | Table 7.3 Files written by PESTPP-SEN. It is assumed that the name of the PEST control file is *case.pst*. Data elements in all of the above files are comma delimited. -# 8. PESTPP-OPT +# 8. PESTPP-OPT + -## 8.1 Introduction +## 8.1 Introduction -### 8.1.1 A Publication + +### 8.1.1 A Publication PESTPP-OPT is described by White et al (2018), where examples of its use are also provided. The following description summarizes information available from this source. See also Wagner and Gorelick (1987) where a similar methodology is described. -### 8.1.2 Overview +### 8.1.2 Overview Sustainable management of a natural system often requires that an optimization problem be solved. Something must be maximized or minimized through adjustment of so-called “decision variables”, subject to certain constraints. For example, it may be desirable to maximize the amount of water extracted from a number of wells (where pumping rates are the decision variables), subject to the constraints that flow in an adjacent stream does not fall below a specified rate, and that groundwater levels in certain observation wells are maintained above certain levels. Design of a contaminant remediation system may attempt to ensure that the cost of water extraction and treatment is minimized subject to the constraint that the contaminant is captured; pumping and injection rates, and the locations of pumping and injection wells, comprise the decision variables in this example. @@ -2300,13 +2368,13 @@ If uncertainty is to be taken into account in imposition of an optimization cons PESTPP-OPT not only solves a constrained optimization problem. It solves a constrained optimization problem that accommodates uncertainties in model outputs to which constraints are applied. These are often referred to as “chance constraints”. In applying chance constraints, PESTPP-OPT assumes that model predictive uncertainty is an outcome of model parameter uncertainty. The latter is, in turn, an outcome of prior parameter uncertainty (i.e., the uncertainty range that emerges from the stochastic nature of expert knowledge), and the extent to which this uncertainty is reduced through the model calibration process. Parameter uncertainty reduction is a function of the information content of the calibration dataset, and the extent to which flow of this information is hampered by the presence of noise within that dataset. -### 8.1.3 Calculation of Uncertainty +### 8.1.3 Calculation of Uncertainty PESTPP-OPT offers three options incorporate model-based constraint uncertainties: supplying model-based constraint “weights” in the pest control file as standard deviations, through the use of “stack-based” constraint uncertainty and through using linear methods. -The simplest way to activate and use chance constraints is through supply weights for model-based constraints standard deviations. These standard deviations could have been calculated through the use of pyEMU or the PREDUNC suite and then transferred to the control file. Or these weights could have been calculated empirically from a Monte Carlo analysis. This chance constraint option is activated via the *opt_std_weights(true)* option. +The simplest way to activate and use chance constraints is through supply weights for model-based constraints standard deviations. These standard deviations could have been calculated through the use of pyEMU or the PREDUNC suite and then transferred to the control file. Or these weights could have been calculated empirically from a Monte Carlo analysis. This chance constraint option is activated via the *opt\_std\_weights(true)* option. -The linear-analysis form of chance constraints use the same methods that are available through PESTPP-GLM (if the *uncertainty()* control variable is set to *true*), PyEMU and the PEST PREDUNC utilities. These are also referred to as “first order second moment”, or FOSM, methods. FOSM methods require user-characterization of prior parameter uncertainty, user-characterization of measurement noise associated with the calibration dataset, a Jacobian matrix that provides sensitivities of model outputs used in the calibration process to adjustable parameters, and the sensitivities of pertinent model predictions (i.e., the predictions to which chance constraints will be applied during the optimization process) to parameters used by the model. If asked to do so, PESTPP-OPT can calculate these sensitivities itself. Optionally, it can deduce prior parameter uncertainties from parameter bounds supplied in a PEST control file and also through the use of a \* *parameter data external* section with *standard_deviation* column specified in the external parameter data file(s) . It calculates measurement noise from the values of observation weights that appear in this same PEST control file, or, optionally, from an *\* observation data external* section with a *standard_deviation* column specified in the external observation data file(s). +The linear-analysis form of chance constraints use the same methods that are available through PESTPP-GLM (if the *uncertainty()* control variable is set to *true*), PyEMU and the PEST PREDUNC utilities. These are also referred to as “first order second moment”, or FOSM, methods. FOSM methods require user-characterization of prior parameter uncertainty, user-characterization of measurement noise associated with the calibration dataset, a Jacobian matrix that provides sensitivities of model outputs used in the calibration process to adjustable parameters, and the sensitivities of pertinent model predictions (i.e., the predictions to which chance constraints will be applied during the optimization process) to parameters used by the model. If asked to do so, PESTPP-OPT can calculate these sensitivities itself. Optionally, it can deduce prior parameter uncertainties from parameter bounds supplied in a PEST control file and also through the use of a \* *parameter data external* section with *standard\_deviation* column specified in the external parameter data file(s) . It calculates measurement noise from the values of observation weights that appear in this same PEST control file, or, optionally, from an *\* observation data external* section with a *standard\_deviation* column specified in the external observation data file(s). The variance (square of standard deviation) of the post-calibration uncertainty of a model prediction *s* can be calculated using either of the following equations; see Doherty (2015) for details. @@ -2346,11 +2414,11 @@ The posterior uncertainty of a model output can then be calculated directly from σ2*s* = ytCʹ(k)y (8.3) -The stack-based form of chance constraint usage is the most rigorous in that it removes the assumed linear relation between parameters and model-based constraints. The “stack” is essentially a parameter ensemble (possibly resulting from a PESTPP-IES run) that is evaluated during the PESTPP-OPT – the frequency of this evaluation is controlled with the *opt_recalc_chance_every* integer option. In essence, the stack represents parameter uncertainty. When the stack is updated, the columns of the stack that correspond to decision variables are filled with the current decision variable values. When the stack is run, the resulting observation stack or “ensemble” contains columns that represent a stochastic sample of model-based constraints (among the other observations listed in the pest control file). Compared to the risk-shift process from the FOSM-based chance constraints, the risk-shifting for stack-based constraints is simpler. For each model-based constraints, we simple sort the column of the observation stack corresponding to the constraint and select the value at the index that corresponds to the risk value. For example, if 100 realizations are being used in the stack, we select the 95th value of the sorted column for a risk of 0.95. In this way, we also relax the assumption of a Gaussian distribution for the model-based constraints. +The stack-based form of chance constraint usage is the most rigorous in that it removes the assumed linear relation between parameters and model-based constraints. The “stack” is essentially a parameter ensemble (possibly resulting from a PESTPP-IES run) that is evaluated during the PESTPP-OPT – the frequency of this evaluation is controlled with the *opt\_recalc\_chance\_every* integer option. In essence, the stack represents parameter uncertainty. When the stack is updated, the columns of the stack that correspond to decision variables are filled with the current decision variable values. When the stack is run, the resulting observation stack or “ensemble” contains columns that represent a stochastic sample of model-based constraints (among the other observations listed in the pest control file). Compared to the risk-shift process from the FOSM-based chance constraints, the risk-shifting for stack-based constraints is simpler. For each model-based constraints, we simple sort the column of the observation stack corresponding to the constraint and select the value at the index that corresponds to the risk value. For example, if 100 realizations are being used in the stack, we select the 95th value of the sorted column for a risk of 0.95. In this way, we also relax the assumption of a Gaussian distribution for the model-based constraints. If a stack is “reused” for multiple iterations, the same process is used except the subtract the mean value from each column and add the remaining “anomalies” to the current constraint value. This effectively transfers the stack-based probability distribution (represented by each column of the observation stack) to the current constraint value. -### 8.1.4 Optimization +### 8.1.4 Optimization An optimization problem can be formulated in many ways. For the moment it will be characterized as minimizing an objective function. (A maximization problem can be turned into a minimization problem simply through reversing the sign of the objective function.) The objective function which PESTPP-OPT minimizes must be distinguished from that which is minimized through model calibration. Use of PESTPP-OPT assumes that the model has already been calibrated (or, if not, it assumes that it does not need to be calibrated). The set of parameters that it uses must therefore be those that emerge from the calibration process or (if the model has not been calibrated) those that are of minimized error variance from an expert knowledge point of view. Hence the objective function that is the focus of model calibration is not considered when using PESTPP-OPT. Nevertheless, as will be discussed below, it is implicitly taken into account through the weights that are assigned to observations comprising the calibration dataset that are featured in the PEST control file on which PESTPP-OPT’s operations are based. @@ -2380,7 +2448,7 @@ where *aij* is the element of A that occupies its *i*th ro The optimization algorithm employed by PESTPP-OPT employs a so-called “linear programming” or “simplex” methodology that is accessed through the open-source CLP optimization library (Forrest et al., 2016), developed through the Computational Infrastructure for Operations Research (COIN-OR) project; see Lougee-Heimer (2003). This algorithm is fast and efficient; it can handle hundreds of thousands of decision-variables. The assumption of a linear relationship between model outputs and decision variables is accommodated by repeating the linear optimization process in a series of iterations in which the decision variable response matrix (i.e., A of equation 8.5) is re-computed on each occasion. Where decision-variables are many, this can be a time-consuming process. The iterative nature of the optimization process earns it the name “sequential linear programming”, or simply SLP for short. See Ahlfield and Mulligan (2000) for further details. -### 8.1.5 Chance Constraints +### 8.1.5 Chance Constraints A user of PESTPP-OPT can inform it whether he/she would like the optimization process which it implements to be risk neutral, risk averse, or risk tolerant. In the latter two cases he/she can specify the degree of aversion or tolerance that should characterize that process. Tolerance or aversion is introduced through the way in which model output uncertainty affects the imposition of optimization constraints. @@ -2388,9 +2456,10 @@ Suppose that a user specifies that a model output *o* shall have a value no grea A PESTPP-OPT user must provide one number to characterize his/her approach to risk. This number must be between zero and one. A model-output-specific number, representing the uncertainty of that output, is then added or subtracted from it prior to imposition of optimization constraints on that output. Provision of a value of 0.5 for this variable (signifying risk neutrality) is equivalent to ignoring parameter, and hence predictive, uncertainty. Under these circumstances, PESTPP-OPT does not calculate model output uncertainties at all. This reduces input requirements, at the same time as it accelerates the optimization process by foregoing the need to (re)calculate the J matrix and/or y vectors of equations 8.1 to 8.3. On the other hand, a value of 0.95 specifies that constraints are applied to model outputs which are corrected to represent the upper end of the 95% one-sided confidence level of that prediction. -## 8.2 Using PESTPP-OPT +## 8.2 Using PESTPP-OPT + -### 8.2.1The PEST Control File +### 8.2.1The PEST Control File Like other members of the PEST++ suite, execution of PESTPP-OPT is initiated using a command line that references a PEST control file. See chapter 5 of this manual for details. The PEST control file supplied to PESTPP-OPT must define the optimization problem that it must solve. In particular, this PEST control file must inform it of the following: @@ -2414,39 +2483,39 @@ As is the normal protocol for members of the PEST++ suite, variables which contr Each of the above issues is now discussed in detail. -### 8.2.2 Decision Variables and Parameters +### 8.2.2 Decision Variables and Parameters In a PEST control file, each parameter is assigned to a parameter group. PEST control variables which govern the operation of finite difference derivatives are assigned to these groups. These variables are just as important for PESTPP-OPT as they are for other members of the PEST++ suite (and indeed PEST), as PESTPP-OPT may be required to calculate multiple Jacobian and response matrices using finite parameter differences. However, for PESTPP-OPT, parameter groups play another important role. They provide the means through which entities defined in the “parameter data” section of a PEST control file are separated into decision variables on the one hand and model parameters on the other hand. The former are adjusted through the constrained optimization process that is implemented by PESTPP-OPT. The latter are used in calculation of the uncertainties of model outputs to which optimization constraints are applied; however, they are not actually adjusted. -The PEST++ control variable *opt_dec_var_groups()* informs PESTPP-OPT of the names of parameter groups whose members comprise decision variables. A comma-delimited list of parameter group names must be supplied as the value of this control variable. If this variable is omitted from the PEST control file, then PESTPP-OPT assumes that all parameter groups are comprised of decision variables (and hence that there is no uncertainty associated with any model output). +The PEST++ control variable *opt\_dec\_var\_groups()* informs PESTPP-OPT of the names of parameter groups whose members comprise decision variables. A comma-delimited list of parameter group names must be supplied as the value of this control variable. If this variable is omitted from the PEST control file, then PESTPP-OPT assumes that all parameter groups are comprised of decision variables (and hence that there is no uncertainty associated with any model output). It is by no means essential that every decision-variable be a model input. As will be discussed below, PESTPP-OPT allows arbitrary designation of the objective function defined by equation 8.4. All that is required is that all “parameters” that are used in calculation of the objective function belong to parameter groups that are nominated as containing decision variables. It is good practice to place decision variables which are not used by the model into different groups from those which influence model behaviour. The former are denoted herein as “external” decision variables. Then, when it fills the management response matrix (i.e., the A matrix of equation 8.5), PESTPP-OPT knows that it does not need to run the model to calculate finite-difference derivatives with respect to these decision variables. -The names of parameter groups which house external decision variables can be supplied through the *opt_external_dev_var_groups()* control variable. The value of this variable is a comma-delimited list of the names of parameter groups which hold external decision variables. Parameter groups whose names are supplied through the *opt_external_dev_var_groups()* keyword must also be supplied through the *opt_dec_var_groups()* keyword; the former are a subset of the latter. It is also important to note that, for the sake of PEST++ protocol consistency, all external parameters must appear in a template file, despite the fact that they are not used by the model. This can be easily accomplished by including their names in a single template file which can be matched (in the “model input” section of the PEST control file) to a model input file which the model never actually reads. +The names of parameter groups which house external decision variables can be supplied through the *opt\_external\_dev\_var\_groups()* control variable. The value of this variable is a comma-delimited list of the names of parameter groups which hold external decision variables. Parameter groups whose names are supplied through the *opt\_external\_dev\_var\_groups()* keyword must also be supplied through the *opt\_dec\_var\_groups()* keyword; the former are a subset of the latter. It is also important to note that, for the sake of PEST++ protocol consistency, all external parameters must appear in a template file, despite the fact that they are not used by the model. This can be easily accomplished by including their names in a single template file which can be matched (in the “model input” section of the PEST control file) to a model input file which the model never actually reads. The simplex algorithm that PESTPP-OPT employs to minimize the objective function defined by equation 8.4 is quite different from that used by PEST and PESTPP-GLM to minimize the type of objective function that quantifies model-to-measurement misfit. Hence some of the control variables that are pertinent to the latter optimization process are not pertinent to the former process. In particular, decision variables that are adjusted by PESTPP-OPT cannot be log-transformed; however, they can be tied or fixed. As they are altered in order to minimize the management objective function, they are not subject to limits imposed by the FACPARMAX and RELPARMAX variables that are featured in the “control data” section of the PEST control file. -### 8.2.3 Defining the Objective Function +### 8.2.3 Defining the Objective Function -The coefficients which are employed in formulating the objective function of equation 8.4 are supplied through the *opt_obj_func()* control variable. PESTPP-OPT is informed whether this function must be maximized or minimized through the *opt_direction()* control variable. The latter must be supplied as either “min” or “max”. +The coefficients which are employed in formulating the objective function of equation 8.4 are supplied through the *opt\_obj\_func()* control variable. PESTPP-OPT is informed whether this function must be maximized or minimized through the *opt\_direction()* control variable. The latter must be supplied as either “min” or “max”. -The *opt_obj_func()* control variable provides three options for defining the objective function. The first is to supply the name of a prior information equation. This equation must feature all decision variables; the coefficients of these variables in the prior information equation become the coefficients *ci* of equation 8.4b. Alternatively, the name of a file can be provided. In this case PESTPP-OPT reads the coefficient associated with each decision variable from that file. The file must have two columns; entries on each line must be space, tab or comma-delimited. The first entry on each line must be the name of a decision variable while the second must be the coefficient associated with that variable. All decision variables featured in the PEST control file must appear in this external file. Any line that begins with the “#” character is treated as a comment and is therefore ignored. Figure 8.1 exemplifies such a file. The third option for the *opt_obj_func* input is to name an observation. This results in the objective function coefficients for the each of the decision variables to be taken from a row in the Jacobian matrix – that is, the objective coefficients are the sensitivities of each of the decision variables to the named observation. +The *opt\_obj\_func()* control variable provides three options for defining the objective function. The first is to supply the name of a prior information equation. This equation must feature all decision variables; the coefficients of these variables in the prior information equation become the coefficients *ci* of equation 8.4b. Alternatively, the name of a file can be provided. In this case PESTPP-OPT reads the coefficient associated with each decision variable from that file. The file must have two columns; entries on each line must be space, tab or comma-delimited. The first entry on each line must be the name of a decision variable while the second must be the coefficient associated with that variable. All decision variables featured in the PEST control file must appear in this external file. Any line that begins with the “\#” character is treated as a comment and is therefore ignored. Figure 8.1 exemplifies such a file. The third option for the *opt\_obj\_func* input is to name an observation. This results in the objective function coefficients for the each of the decision variables to be taken from a row in the Jacobian matrix – that is, the objective coefficients are the sensitivities of each of the decision variables to the named observation. -
~ decision_variable coefficient
pump_rate1 3.345
pump_rate2 3.034
inj_rate1 4.321
inj_rate2 4.287
etc.
+
~ decision_variable coefficient
pump_rate1 3.345
pump_rate2 3.034
inj_rate1 4.321
inj_rate2 4.287
etc.
-Figure 8.1 An external file whose name is supplied with the *opt_obj_func()* variable. +Figure 8.1 An external file whose name is supplied with the *opt\_obj\_func()* variable. -If the *opt_obj_func()* control variable is not provided in the PEST control file that is featured on the PESTPP-OPT command line, PESTPP-OPT assigns coefficients to decision variables itself; each is assigned a coefficient of 1.0. +If the *opt\_obj\_func()* control variable is not provided in the PEST control file that is featured on the PESTPP-OPT command line, PESTPP-OPT assigns coefficients to decision variables itself; each is assigned a coefficient of 1.0. -###
8.2.4 Constraints +### 8.2.4 Constraints Constraints can be applied on model outputs (read from model output files using instruction files) or on prior information equations. PESTPP-OPT allows constraints to be either “less than” or “greater than” constraints; the latter are internally reformulated as “less than” constraints to meet the demands of the linear programming algorithm that it implements. Constraints are identified using observation groups. If an observation group contains “less than” constraints, then its name must begin with “l\_” (that is, the letter “el” followed by an underscore) or “less\_”; if an observation group contains “greater than” constraints then its name must begin with “g\_” or “greater\_”. Do not forget that prior information equations are also assigned to observation groups. -While the naming of observation groups in this way is sufficient to denote them as containing constraints, you can also nominate groups which contain constraints using the *opt_constraint_groups()* control variable. The value of this control variable is a comma-delimited set of observation groups. The name of each such group must begin with “l\_” or “g\_” (or “less\_” or “greater\_”) in accordance with the protocol described above so that PESTPP-OPT knows what type of constraints they are. If an *opt_constraint_groups()* keyword is supplied, and if a group name that begins with “l\_” or “g\_” (or “less\_” or “greater”) is omitted from the list of groups supplied through this variable, then members of that group do not comprise optimization constraints. They are therefore observations which comprise part of the calibration dataset (unless the pertinent observations are assigned weights of zero in the PEST control file). +While the naming of observation groups in this way is sufficient to denote them as containing constraints, you can also nominate groups which contain constraints using the *opt\_constraint\_groups()* control variable. The value of this control variable is a comma-delimited set of observation groups. The name of each such group must begin with “l\_” or “g\_” (or “less\_” or “greater\_”) in accordance with the protocol described above so that PESTPP-OPT knows what type of constraints they are. If an *opt\_constraint\_groups()* keyword is supplied, and if a group name that begins with “l\_” or “g\_” (or “less\_” or “greater”) is omitted from the list of groups supplied through this variable, then members of that group do not comprise optimization constraints. They are therefore observations which comprise part of the calibration dataset (unless the pertinent observations are assigned weights of zero in the PEST control file). -It is important to note that weights assigned in the PEST control file to model outputs to which constraints are applied have no bearing on their operation, unless they are set to zero. If the weight associated with a constraint is set to zero, then the constraint is disabled. However, if the weight is positive, its value does not affect the strength with which the constraint is applied, with one exception: if the *opt_std_weights()* option is used, then the weight values for the non-zero weight constraints are treated as standard deviations (uncertainty) for use in the chance constraint process. This option can help speed up the repeated application of PESTPP-OPT by allowing users to pre-calculate constraint uncertainty through application of PREDUNC, pyEMU or through empirical constraint uncertainty estimation via ensembles. In this way, PESTPP-OPT does not need to solve any linear analysis equations and instead simply uses these standard deviations directly. +It is important to note that weights assigned in the PEST control file to model outputs to which constraints are applied have no bearing on their operation, unless they are set to zero. If the weight associated with a constraint is set to zero, then the constraint is disabled. However, if the weight is positive, its value does not affect the strength with which the constraint is applied, with one exception: if the *opt\_std\_weights()* option is used, then the weight values for the non-zero weight constraints are treated as standard deviations (uncertainty) for use in the chance constraint process. This option can help speed up the repeated application of PESTPP-OPT by allowing users to pre-calculate constraint uncertainty through application of PREDUNC, pyEMU or through empirical constraint uncertainty estimation via ensembles. In this way, PESTPP-OPT does not need to solve any linear analysis equations and instead simply uses these standard deviations directly. In the simplex algorithm, all constraints are “hard”. It is the nature of most constrained optimization problems that the solution to that problem lies at a “corner” of decision variable space that is defined by the intersection of constraint surfaces. In that corner, at least one constraint specification is exactly met. @@ -2454,134 +2523,138 @@ In addition to constraints applied to model outputs and prior information equati As presently programmed, PESTPP-OPT requires that at least one constraint be applied to a model output. Hence if the only constraints that are specified in a PEST control file that is read by PESTPP-OPT are those on individual decision variables through their bounds, and/or on prior information equations that feature those decision variables, PESTPP-OPT will cease execution with an appropriate message. -### 8.2.5 Observations +### 8.2.5 Observations -If an observation that is featured in a PEST control file is not denoted as a constraint, then it is used in the notional calibration process through which posterior parameter uncertainties are calculated. (It is important to note that the same does not apply to prior information equations; if a prior information equation does not feature a decision variable, then PESTPP_OPT simply ignores it.) These posterior parameter uncertainties are used to calculate the uncertainties of model outputs to which constraints are applied; see equations 8.1 to 8.3. The calibration process is described as “notional” because model parameters are not actually adjusted to fit the calibration dataset. Instead, PESTPP-OPT assumes that parameter values have already been adjusted. The posterior uncertainties associated with these parameters are then calculated using equation 8.2. Observations that are employed in the notional calibration process must be featured in the PEST control file that is provided to PESTPP-OPT so that it can calculate the terms of this equation. However, it is important to note that, in calculating posterior parameter and model output uncertainties, PESTPP-OPT takes no notice of the values of observations that must be supplied in the “observation data” section of the PEST control file, for these do not appear in equations 8.1 to 8.3; only sensitivities are featured in these equations. The same applies to parameters; hence their values do not need to change in implementing the notional calibration exercise that is embodied in these equations. +If an observation that is featured in a PEST control file is not denoted as a constraint, then it is used in the notional calibration process through which posterior parameter uncertainties are calculated. (It is important to note that the same does not apply to prior information equations; if a prior information equation does not feature a decision variable, then PESTPP\_OPT simply ignores it.) These posterior parameter uncertainties are used to calculate the uncertainties of model outputs to which constraints are applied; see equations 8.1 to 8.3. The calibration process is described as “notional” because model parameters are not actually adjusted to fit the calibration dataset. Instead, PESTPP-OPT assumes that parameter values have already been adjusted. The posterior uncertainties associated with these parameters are then calculated using equation 8.2. Observations that are employed in the notional calibration process must be featured in the PEST control file that is provided to PESTPP-OPT so that it can calculate the terms of this equation. However, it is important to note that, in calculating posterior parameter and model output uncertainties, PESTPP-OPT takes no notice of the values of observations that must be supplied in the “observation data” section of the PEST control file, for these do not appear in equations 8.1 to 8.3; only sensitivities are featured in these equations. The same applies to parameters; hence their values do not need to change in implementing the notional calibration exercise that is embodied in these equations. The weights assigned to observations that are cited in the “observation data” section of the PEST control are important. These are used in calculating the C(ε) matrix that appears in equations 8.1 and 8.2. PESTPP-OPT assumes that each such weight is equal to the inverse of the standard deviation of noise associated with the corresponding observation. Hence each weight is squared, inverted, and then assigned to the appropriate diagonal element of C(ε) in these equations. In many instances, the weights that are associated with observations in a PESTPP-OPT control file may therefore be different from those that were used in a preceding model calibration process in which parameters were actually adjusted. Utilities such as PWTADJ2 from the PEST suite can be used to adjust calibration weights so that their magnitudes are commensurate with the level of model-to-measurement fit attained through the calibration process. A good rule of thumb is that weights should be such that the (measurement) objective function attained through the calibration process is roughly equal to the number of non-zero weighted observations featured in that process (collectively or on a group-by-group basis). To remove an observation from the notional calibration process, set its weight to zero. In some optimization contexts, it may be desirable to set all observation weights to zero. The notional calibration exercise through which a posterior covariance matrix is calculated from a prior covariance matrix is then foregone. In this case, PESTPP-OPT uses the prior covariance matrix to calculate the uncertainties associated with model outputs to which constraints are applied. (This covariance matrix is supplied to PESTPP-OPT in the manner described below.) Alternatively, if all weights are set to zero, a user may provide PESTPP-OPT with a self-calculated posterior parameter covariance matrix in place of the prior parameter covariance matrix for use in calculating parameter and model output uncertainty. For reasons which will be discussed below, this strategy may enhance the efficiency of PESTPP-OPT usage. -### 8.2.6 Regularization +### 8.2.6 Regularization It is recommended practice when calibrating a model to include Tikhonov regularization in the inversion process (often through a suite of prior information equations). Tikhonov regularization expresses expert knowledge as it pertains to parameters. When using PESTPP-OPT, this same expert knowledge is expressed through the prior covariance matrix featured in equations 8.1 and 8.2. All regularization that was employed in a previous calibration exercise should be removed from a PEST control file before using PESTPP-OPT. -### 8.2.7 Prior Covariance Matrix +### 8.2.7 Prior Covariance Matrix -A prior covariance matrix can be supplied to PESTPP-OPT using the PEST++ *parcov()* control variable; this variable is also used by PESTPP-GLM and PESTPP-IES. The name of a covariance matrix file (with extension *.cov*), parameter uncertainty file (with extension *.unc*), or binary file containing a covariance matrix (with extension *.jco* or *.jcb*) can be supplied as the value of this keyword. (See appendix B of this manual for specifications of these file types.) Variances and covariances featured in this file must pertain to the logs (to base 10) of parameters that are declared as log-transformed in the PEST control file. There is no need to feature any decision variables in this file, as these do not appear in equations 8.1 and 8.2. The user is reminded that parameter covariance matrices can be calculated using PyEMU, as well as the PPCOV, PPCOV3D, PPCOV_SVA and PPCOV3D_SVA utilities available from the PEST Groundwater Data Utilities suite. +A prior covariance matrix can be supplied to PESTPP-OPT using the PEST++ *parcov()* control variable; this variable is also used by PESTPP-GLM and PESTPP-IES. The name of a covariance matrix file (with extension *.cov*), parameter uncertainty file (with extension *.unc*), or binary file containing a covariance matrix (with extension *.jco* or *.jcb*) can be supplied as the value of this keyword. (See appendix B of this manual for specifications of these file types.) Variances and covariances featured in this file must pertain to the logs (to base 10) of parameters that are declared as log-transformed in the PEST control file. There is no need to feature any decision variables in this file, as these do not appear in equations 8.1 and 8.2. The user is reminded that parameter covariance matrices can be calculated using PyEMU, as well as the PPCOV, PPCOV3D, PPCOV\_SVA and PPCOV3D\_SVA utilities available from the PEST Groundwater Data Utilities suite. -If a covariance matrix is not supplied, then PESTPP-OPT calculates a prior covariance matrix itself. In doing so, it assumes that parameters are statistically independent, and that the difference between the upper and lower bounds of a parameter (with log transformation taken into account) is equal to 4 standard deviations of its prior probability distribution. (An alternative number of standard deviations can be provided through the *par_sigma_range()* control variable.) +If a covariance matrix is not supplied, then PESTPP-OPT calculates a prior covariance matrix itself. In doing so, it assumes that parameters are statistically independent, and that the difference between the upper and lower bounds of a parameter (with log transformation taken into account) is equal to 4 standard deviations of its prior probability distribution. (An alternative number of standard deviations can be provided through the *par\_sigma\_range()* control variable.) -### 8.2.8 Risk +### 8.2.8 Risk -Using the *opt_risk()* control variable, a user specifies his/her disposition with respect to risk. The setting of this variable determines the value of δ*o* discussed in section 8.1. This is the value that is added/subtracted to/from a model output before a constraint is applied to that output. +Using the *opt\_risk()* control variable, a user specifies his/her disposition with respect to risk. The setting of this variable determines the value of δ*o* discussed in section 8.1. This is the value that is added/subtracted to/from a model output before a constraint is applied to that output. -The value supplied for *opt_risk()* should be greater than zero and less than one. If the value supplied for *opt_risk()* is 0.5, then operation of PESTPP-OPT is risk neutral. In this case δ*o* is zero. Under these circumstances PESTPP-OPT does not calculate model output uncertainty at all. It therefore does not need to read (or calculate for itself) a prior parameter covariance matrix. Also, it does not need to read (or calculate for itself) derivatives of model outputs with respect to model parameters which are not decision-variables. +The value supplied for *opt\_risk()* should be greater than zero and less than one. If the value supplied for *opt\_risk()* is 0.5, then operation of PESTPP-OPT is risk neutral. In this case δ*o* is zero. Under these circumstances PESTPP-OPT does not calculate model output uncertainty at all. It therefore does not need to read (or calculate for itself) a prior parameter covariance matrix. Also, it does not need to read (or calculate for itself) derivatives of model outputs with respect to model parameters which are not decision-variables. -An *opt_risk()* setting of greater than 0.5 indicates risk aversion. For “less than” constraints (i.e., constraints for which a system state or flux must be less than a certain value), δ*o* is added to the model-calculated system state or flux (i.e., *o*) so that the constraint is applied to *o* + δ*o*. The opposite applies for “greater than” constraints. Suppose, for example, that *opt_risk()* is supplied as 0.95. Then, for a “less than” constraint, δ*o* is calculated to be such that there is a 95% chance that the real system state or flux is less than *o* + δ*o*. Similarly, for a “greater than” constraint, δ*o* is calculated to be such that there is a 95% chance that the real system state or flux is greater than *o* – δ*o*. In both of these cases *o* is the value that the model calculates for this quantity based on the current values of decision variables. +An *opt\_risk()* setting of greater than 0.5 indicates risk aversion. For “less than” constraints (i.e., constraints for which a system state or flux must be less than a certain value), δ*o* is added to the model-calculated system state or flux (i.e., *o*) so that the constraint is applied to *o* + δ*o*. The opposite applies for “greater than” constraints. Suppose, for example, that *opt\_risk()* is supplied as 0.95. Then, for a “less than” constraint, δ*o* is calculated to be such that there is a 95% chance that the real system state or flux is less than *o* + δ*o*. Similarly, for a “greater than” constraint, δ*o* is calculated to be such that there is a 95% chance that the real system state or flux is greater than *o* – δ*o*. In both of these cases *o* is the value that the model calculates for this quantity based on the current values of decision variables. -A setting for *opt_risk()* that is less than 0.5 indicates risk tolerance. For “less than” constraints, the constraint is applied to *o* - δ*o*. The opposite applies for “greater than” constraints. Suppose that *opt_risk()* is supplied as 0.05. Then, for a “less than” constraint, δ*o* is calculated to be such that there is a 5% chance that the real system state or flux corresponding to a certain set of decision variables is less than *o* - δ*o*. Similarly, for a “greater than” constraint, δ*o* is calculated to be such that there is a 5% chance that the real system state or flux corresponding to a certain set of decision variables is greater than *o* + δ*o*. +A setting for *opt\_risk()* that is less than 0.5 indicates risk tolerance. For “less than” constraints, the constraint is applied to *o* - δ*o*. The opposite applies for “greater than” constraints. Suppose that *opt\_risk()* is supplied as 0.05. Then, for a “less than” constraint, δ*o* is calculated to be such that there is a 5% chance that the real system state or flux corresponding to a certain set of decision variables is less than *o* - δ*o*. Similarly, for a “greater than” constraint, δ*o* is calculated to be such that there is a 5% chance that the real system state or flux corresponding to a certain set of decision variables is greater than *o* + δ*o*. -### 8.2.9 Jacobian and Response Matrices +### 8.2.9 Jacobian and Response Matrices During every iteration of the constrained optimization process, PESTPP-OPT calculates derivatives of model outputs to which constraints are applied to decision variables whose values are optimized. In accordance with the normal PEST/PEST++ protocol, control variables which govern calculation of finite-difference derivatives are read from the “parameter data” section of the PEST control file. -Unless *opt_risk()* is set to 0.5, PESTPP-OPT must obtain partial derivatives which comprise the J and y vectors appearing in equations 8.1 to 8.3. The J matrix contains partial derivatives of calibration-relevant model outputs to parameters featured in the PEST control file which are not decision variables. The y vector contains partial derivatives of model outputs to which constraints are applied to these same parameters. These partial derivatives are also calculated using finite parameter differences. +Unless *opt\_risk()* is set to 0.5, PESTPP-OPT must obtain partial derivatives which comprise the J and y vectors appearing in equations 8.1 to 8.3. The J matrix contains partial derivatives of calibration-relevant model outputs to parameters featured in the PEST control file which are not decision variables. The y vector contains partial derivatives of model outputs to which constraints are applied to these same parameters. These partial derivatives are also calculated using finite parameter differences. -Unlike derivatives of model outputs with respect to decision variables, derivatives of model outputs with respect to model parameters do not necessarily need to be re-calculated during every iteration of the PESTPP-OPT optimization process. In fact, PESTPP-OPT can calculate values for δ*o* using a set of derivatives that are supplied externally, at the start of the optimization process (see below). It is possible, however, that sensitivities of model outputs to parameters will change with the values of decision variables. If a user is worried about this, he/she can inform PESTPP-OPT that it should update the J matrix and y vector every now and then. The iteration update interval is set through the *opt_recalc_fosm_every()* control variable. This must be provided as an integer. If it is set to 1, then J and y are updated at the beginning of every iteration of the constrained optimization process. If it is set to 2, then J and y are calculated at the beginning of the constrained optimization process (if they are not supplied externally), and then at the beginning of every second iteration of that process. A similar protocol applies for higher values of this variable. On the other hand, if *opt_recalc_fosm_every()*is set to an exceedingly high value, then J and y are not updated at all. Nor, therefore, is the value for δ*o* updated for all model outputs to which chance constraints are applied. +Unlike derivatives of model outputs with respect to decision variables, derivatives of model outputs with respect to model parameters do not necessarily need to be re-calculated during every iteration of the PESTPP-OPT optimization process. In fact, PESTPP-OPT can calculate values for δ*o* using a set of derivatives that are supplied externally, at the start of the optimization process (see below). It is possible, however, that sensitivities of model outputs to parameters will change with the values of decision variables. If a user is worried about this, he/she can inform PESTPP-OPT that it should update the J matrix and y vector every now and then. The iteration update interval is set through the *opt\_recalc\_fosm\_every()* control variable. This must be provided as an integer. If it is set to 1, then J and y are updated at the beginning of every iteration of the constrained optimization process. If it is set to 2, then J and y are calculated at the beginning of the constrained optimization process (if they are not supplied externally), and then at the beginning of every second iteration of that process. A similar protocol applies for higher values of this variable. On the other hand, if *opt\_recalc\_fosm\_every()*is set to an exceedingly high value, then J and y are not updated at all. Nor, therefore, is the value for δ*o* updated for all model outputs to which chance constraints are applied. -It may be possible to avoid calculation of at least some partial derivatives through use of the *base_jacobian()* control variable that is described in documentation for PESTPP-GLM (see section 6.2.7). Recall that the value of this variable is the name of a binary Jacobian matrix file (with extension *.jco* or *.jcb*). If this file provides sensitivities for all calibration-relevant model outputs with respect to all model parameters, then PESTPP-OPT does not need to undertake any model runs for calculation of J during the first iteration of the optimization process. If it also includes sensitivities of constraint-relevant model outputs to all model parameters, then calculation of y during the first iteration of the optimization process is also avoided. If it also includes sensitivities of constraint-relevant model outputs to all decision variables, then calculation of optimization sensitivities during the first iteration of the optimization process is precluded. After reading an external Jacobian matrix, PESTPP-OPT works out for itself what model runs must be undertaken during the first iteration of the optimization process to calculate partial derivatives that are missing from this matrix. +It may be possible to avoid calculation of at least some partial derivatives through use of the *base\_jacobian()* control variable that is described in documentation for PESTPP-GLM (see section 6.2.7). Recall that the value of this variable is the name of a binary Jacobian matrix file (with extension *.jco* or *.jcb*). If this file provides sensitivities for all calibration-relevant model outputs with respect to all model parameters, then PESTPP-OPT does not need to undertake any model runs for calculation of J during the first iteration of the optimization process. If it also includes sensitivities of constraint-relevant model outputs to all model parameters, then calculation of y during the first iteration of the optimization process is also avoided. If it also includes sensitivities of constraint-relevant model outputs to all decision variables, then calculation of optimization sensitivities during the first iteration of the optimization process is precluded. After reading an external Jacobian matrix, PESTPP-OPT works out for itself what model runs must be undertaken during the first iteration of the optimization process to calculate partial derivatives that are missing from this matrix. -Filling of the J matrix of equations 8.1 and 8.2 can also be avoided if weights assigned to all calibration-relevant observations in the “observation data” section of the PEST control file are set to zero, or if no calibration-relevant observations are included in this section at all. This signifies to PESTPP-OPT that the model is uncalibrated. PESTPP-OPT then uses prior parameter uncertainties, rather than posterior parameter uncertainties, for calculation of δ*o* values for constraint-relevant model outputs. Using the *parcov()* control variable, a user may wish to supply a covariance matrix to PESTPP-OPT instead of letting PESTPP-OPT calculate prior parameter uncertainties itself from parameter bounds (and/or optional *standard_deviation* in external files). Under these circumstances he/she may wish to provide PESTPP-OPT with a posterior parameter covariance matrix instead of a prior covariance matrix. Because PESTPP-OPT “thinks” that this is a prior parameter covariance matrix, and because it has been informed that this matrix does not need modification in accordance with the notional calibration exercise that is embedded in equations 8.1 and 8.2, it simply uses this matrix for calculation of δ*o*; it does not expend model runs to calculate J. This strategy can speed up the optimization process considerably, at the same time as it ensures that δ*o* is calculated using post-calibration uncertainties. +Filling of the J matrix of equations 8.1 and 8.2 can also be avoided if weights assigned to all calibration-relevant observations in the “observation data” section of the PEST control file are set to zero, or if no calibration-relevant observations are included in this section at all. This signifies to PESTPP-OPT that the model is uncalibrated. PESTPP-OPT then uses prior parameter uncertainties, rather than posterior parameter uncertainties, for calculation of δ*o* values for constraint-relevant model outputs. Using the *parcov()* control variable, a user may wish to supply a covariance matrix to PESTPP-OPT instead of letting PESTPP-OPT calculate prior parameter uncertainties itself from parameter bounds (and/or optional *standard\_deviation* in external files). Under these circumstances he/she may wish to provide PESTPP-OPT with a posterior parameter covariance matrix instead of a prior covariance matrix. Because PESTPP-OPT “thinks” that this is a prior parameter covariance matrix, and because it has been informed that this matrix does not need modification in accordance with the notional calibration exercise that is embedded in equations 8.1 and 8.2, it simply uses this matrix for calculation of δ*o*; it does not expend model runs to calculate J. This strategy can speed up the optimization process considerably, at the same time as it ensures that δ*o* is calculated using post-calibration uncertainties. -### 8.2.10 Solution Convergence +### 8.2.10 Solution Convergence -Notwithstanding the nonlinear nature of most models, the constrained optimization problem that is solved by PESTPP-OPT is formulated as a linear problem. Model nonlinearities are accommodated by solving this problem in a progressive fashion through a series of iterations in which sensitivities to decision variables are re-calculated during every iteration. This sequential linear programming (SLP) process is deemed to be complete when neither the objective function, nor any decision variable, changes by more than a certain (small) amount from one iteration to the next. This amount is supplied by the user as the value of the *opt_iter_tol()* control variable. PESTPP-OPT provides a default value of 0.001 for this variable. +Notwithstanding the nonlinear nature of most models, the constrained optimization problem that is solved by PESTPP-OPT is formulated as a linear problem. Model nonlinearities are accommodated by solving this problem in a progressive fashion through a series of iterations in which sensitivities to decision variables are re-calculated during every iteration. This sequential linear programming (SLP) process is deemed to be complete when neither the objective function, nor any decision variable, changes by more than a certain (small) amount from one iteration to the next. This amount is supplied by the user as the value of the *opt\_iter\_tol()* control variable. PESTPP-OPT provides a default value of 0.001 for this variable. -As was stated above, PESTPP-OPT uses the open source CLP library supplied by the Computational Infrastructure for Operational Research (COIN-OR) project. This algorithm provides a history of the SLP solution process, with a level of detail that is set by the calling program. A PESTPP-OPT user can gain access to this history using the *opt_coin_log()* control variable. An integer in the range 1 to 4 must be supplied for its value, with a higher value requesting greater verbosity. The record is written to a file named *case.coin_log* where *case.pst* is the PEST control file whose name is supplied on the PESTPP-OPT command line. +As was stated above, PESTPP-OPT uses the open source CLP library supplied by the Computational Infrastructure for Operational Research (COIN-OR) project. This algorithm provides a history of the SLP solution process, with a level of detail that is set by the calling program. A PESTPP-OPT user can gain access to this history using the *opt\_coin\_log()* control variable. An integer in the range 1 to 4 must be supplied for its value, with a higher value requesting greater verbosity. The record is written to a file named *case.coin\_log* where *case.pst* is the PEST control file whose name is supplied on the PESTPP-OPT command line. Nonlinearities of constraint-relevant model outputs with respect to parameters which are not decision variables can be accommodated through intermittent re-calculation of J and y during the SLP process. However, it is important to keep in mind that this strategy constitutes only partial accommodation of this type of nonlinearity, as model parameters which are not decision-variables are not actually varied from iteration to iteration of the SLP process. Re-calculation of J and y accommodates only the effect that changes in the values of decision variables have on these sensitivities. It does not accommodate changes in J and y that may be incurred by variability of model parameters over ranges denoted by their posterior uncertainties. Nor does it accommodate the fact that equations 8.1 to 8.3 assume model linearity with respect to these parameters. -### 8.2.11 Other Control Variables +### 8.2.11 Other Control Variables In common with all other members of the PEST++ suite, a PEST control file used by PESTPP-OPT can include variables that govern parallel run management. See section 5.3 of this manual. -### 8.2.12 Final Model Run +### 8.2.12 Final Model Run -Once it has completed the constrained optimization process, PESTPP-OPT undertakes one final model run in which it employs optimized values of all decision variables. The optimized values of these variables thus remain on model input files when PESTPP-OPT ceases execution; model output files reflect these inputs. Undertaking of the final model run can be foregone through the use of the aptly named *opt_skip_final()* option. +Once it has completed the constrained optimization process, PESTPP-OPT undertakes one final model run in which it employs optimized values of all decision variables. The optimized values of these variables thus remain on model input files when PESTPP-OPT ceases execution; model output files reflect these inputs. Undertaking of the final model run can be foregone through the use of the aptly named *opt\_skip\_final()* option. -### 8.2.13 Restarts +### 8.2.13 Restarts -As presently programmed, a prematurely-terminated PESTPP-OPT run cannot be restarted. However, PESTPP-OPT does support use of the *hotstart_resfile()* and *base_jacobian()* control variables. Strategic use of functionality provided by these variables effectively constitutes a restart. +As presently programmed, a prematurely-terminated PESTPP-OPT run cannot be restarted. However, PESTPP-OPT does support use of the *hotstart\_resfile()* and *base\_jacobian()* control variables. Strategic use of functionality provided by these variables effectively constitutes a restart. -### 8.2.14 Zero Run Solution +### 8.2.14 Zero Run Solution -PESTPP-OPT offers functionality for solving the chance-constrained SLP problem without the requirement for any model runs. If a user activates the *base_jacobian()*, *hotstart_resfile()* and *opt_skip_final()* options while setting the NOPTMAX control variable to 1, then PESTPP-OPT will not undertake any model runs at all. Instead, it will solve the chance-constrained linear programming problem specified in the control file, report optimal decision variable values and the final objective function, and then cease execution. This can be a useful strategy for exploring the implications of changing decision variable bounds, constraints, risk and/or any of the factors affecting chance constraints. The latter can include prior parameter uncertainties, and the number of observations (and their weights) used to condition parameters. +PESTPP-OPT offers functionality for solving the chance-constrained SLP problem without the requirement for any model runs. If a user activates the *base\_jacobian()*, *hotstart\_resfile()* and *opt\_skip\_final()* options while setting the NOPTMAX control variable to 1, then PESTPP-OPT will not undertake any model runs at all. Instead, it will solve the chance-constrained linear programming problem specified in the control file, report optimal decision variable values and the final objective function, and then cease execution. This can be a useful strategy for exploring the implications of changing decision variable bounds, constraints, risk and/or any of the factors affecting chance constraints. The latter can include prior parameter uncertainties, and the number of observations (and their weights) used to condition parameters. + +## 8.3 PESTPP-OPT Output Files -## 8.3 PESTPP-OPT Output Files Files recorded by PESTPP-OPT are listed in the following table. The contents of this table are based on the assumption that the PEST control file on which constrained optimization is based is named *case.pst*. -| File | Contents | -|------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *case.rec* | The run record file. This file begins by echoing information provided in the PEST control file. It then records the history of the constrained optimization process undertaken by PESTPP-OPT. Optimization results are recorded at the end of this file. | -| *case.par* | A parameter value file. This file records best parameters that have been calculated up to any stage in the constrained optimization process. Note that only the values of parameters which are identified as decision variables change from iteration to iteration. | -| *case.N.par* | A parameter value file listing parameters calculated at the end of iteration *N* of the constrained optimization process. | -| *case.res* | A residuals file, recorded at the end of the PESTPP-OPT constrained optimization process. This is identical to the file of the same named recorded by PESTPP-GLM at the end of an inversion process. Note that calibration-pertinent model outputs do not change from the beginning to the end of the constrained optimization process, as parameters that are not decision variables are not altered by PESTPP-OPT. However, model outputs to which constraints are applied change as decision variables are updated. | -| *case.N.jcb.rei* | A residuals file calculated using the values for decision variables used when evaluating the response/Jacobian matrix at the start of iteration N. | -| *case.N.sim.rei* | A residuals file containing the modelled constraint values calculated using the optimal decision variables at the end of iteration N. Not written if solution is infeasible. | -| *case.N.est.rei* | A residuals file containing the estimated constraint (from the linear program) using the optimal decision variables at the end of iteration N . Not written if solution is infeasible. | -| *case.N.sim+fosm.rei* | A residuals value containing the modelled constraint values plus the chance constraint (FOSM) offset values at the end of iteration N. Only written if chance constraints are used. Not written if solution is infeasible. | -| *case.N.est+fosm.rei* | A residuals value containing estimated constraint values plus the chance constraint (FOSM) offset values at the end of iteration N. Only written if the chance constraints are used. Not written if solution is infeasible. | -| *case.log* | Performance log. This file is similar to those recorded by other members of the PEST++ suite. It records the times at which PESTPP-OPT began and ended various processing tasks. | -| *case.coin_log* | Log of operations performed by the public domain SLP optimization algorithm. | -| *case.rmr* | Parallel run management record file. This is recorded if PESTPP-OPT undertakes model runs in parallel. | -| *case.rnj* | A binary file used for run management. | -| *case.N.par_stack.csv* | Optional parameter stack saved each iteration | -| *case.N.obs_stack.csv* | Optional observation stack saved each iteration | +| File | Contents | +|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *case.rec* | The run record file. This file begins by echoing information provided in the PEST control file. It then records the history of the constrained optimization process undertaken by PESTPP-OPT. Optimization results are recorded at the end of this file. | +| *case.par* | A parameter value file. This file records best parameters that have been calculated up to any stage in the constrained optimization process. Note that only the values of parameters which are identified as decision variables change from iteration to iteration. | +| *case.N.par* | A parameter value file listing parameters calculated at the end of iteration *N* of the constrained optimization process. | +| *case.res* | A residuals file, recorded at the end of the PESTPP-OPT constrained optimization process. This is identical to the file of the same named recorded by PESTPP-GLM at the end of an inversion process. Note that calibration-pertinent model outputs do not change from the beginning to the end of the constrained optimization process, as parameters that are not decision variables are not altered by PESTPP-OPT. However, model outputs to which constraints are applied change as decision variables are updated. | +| *case.N.jcb.rei* | A residuals file calculated using the values for decision variables used when evaluating the response/Jacobian matrix at the start of iteration N. | +| *case.N.sim.rei* | A residuals file containing the modelled constraint values calculated using the optimal decision variables at the end of iteration N. Not written if solution is infeasible. | +| *case.N.est.rei* | A residuals file containing the estimated constraint (from the linear program) using the optimal decision variables at the end of iteration N . Not written if solution is infeasible. | +| *case.N.sim+fosm.rei* | A residuals value containing the modelled constraint values plus the chance constraint (FOSM) offset values at the end of iteration N. Only written if chance constraints are used. Not written if solution is infeasible. | +| *case.N.est+fosm.rei* | A residuals value containing estimated constraint values plus the chance constraint (FOSM) offset values at the end of iteration N. Only written if the chance constraints are used. Not written if solution is infeasible. | +| *case.log* | Performance log. This file is similar to those recorded by other members of the PEST++ suite. It records the times at which PESTPP-OPT began and ended various processing tasks. | +| *case.coin\_log* | Log of operations performed by the public domain SLP optimization algorithm. | +| *case.rmr* | Parallel run management record file. This is recorded if PESTPP-OPT undertakes model runs in parallel. | +| *case.rnj* | A binary file used for run management. | +| *case.N.par\_stack.csv* | Optional parameter stack saved each iteration | +| *case.N.obs\_stack.csv* | Optional observation stack saved each iteration | Table 8.1 PESTPP-OPT output files. It is assumed that the name of the PEST control file is *case.pst*. -## 8.4 Summary of Control Variables +## 8.4 Summary of Control Variables + Table 8.2 tabulates PEST++ control variables used by PESTPP-OPT. All of these are optional. If a particular control variable is not supplied, then PESTPP-OPT provides a default value. Where appropriate, the value of the default is presented with the name of the variable in the table below. Variables discussed in section 5.3.6 that control parallel run management are not listed in the following table. Note also that the number of control variables may change with time. Refer to the PEST++ web site for variables used by the latest version of PESTPP-OPT. -| Variable | Type | Role | -|---------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *opt_dec_var_groups()* | text | Comma-delimited string identifying which parameter groups are to be treated as decision variables. If not supplied, all adjustable parameters are treated as decision variables. | -| *opt_external_dec_var_groups()* | text | Comma-delimited string identifying which parameter groups are to be treated as "external" decision variables, that is decision variables that do not influence model outputs and that therefore do not require a finite-difference run of the model to fill the pertinent column of the response matrix. | -| *opt_constraint_groups()* | text | Comma- delimited string identifying which observation and prior information groups are to be treated as constraints. Group names for "less than" constraints must start with "l\_" or “less\_”; group names for "greater than" constraints must start with "g\_" or “greater\_”. If this control variable is omitted, all observation and prior information groups that meet these naming conventions are treated as constraints. | -| *opt_obj_func()* | text | String identifying the prior information equation or two-column ASCII file that contains coefficients used in formulation of the objective function (see equation 8.4). If this control variable is not supplied, then each decision variable is given a coefficient of 1.0 in formulation of the objective function. | -| *opt_direction(min)* | text | Either "min" or "max". “min” specifies that the objective function be minimized, while “max” specifies that it be maximized. | -| *opt_risk(0.5)* | real | A number between 0.0 and 1.0. A value of 0.5 signifies risk neutrality. A value of 0.95 seeks a 95% risk averse application of optimization constraints, while a value of 0.05 seeks a 5% risk tolerant application of optimization constraints. | -| *opt_recalc_chance_every(1)* | integer | Number of iterations of the SLP process over which chance constraints are re-used. If set to 1, a calibration Jacobian matrix is calculated during every iteration of the SLP constrained optimization process if fosm-based chance constraints are used or the stack is re-evaluated if stack-based chance constraints are being used | -| *parcov()* | text | Provide the name of a JCO, JCB, UNC or COV file from which the prior covariance matrix used in FOSM analysis is read. | -| *par_sigma_range(4.0)* | real | The difference between a parameter’s upper and lower bounds expressed as standard deviations. | -| *opt_iter_toll(0.001)* | real | Solution closure criterion applied to objective function and decision variables. | -| *base_jacobian()* | text | Provide the name of a Jacobian matrix file (with extension *.jco* or *.jcb*). Sensitivities read from this file are used for the first iteration of the constrained optimization process. | -| *hotstart_resfile()* | text | The name of a residuals file produced by PESTPP-GLM or PESTPP-OPT. PESTPP-OPT assumes that model output values contained in this file correspond to the values of parameters (including decision variables) listed in the PEST control file. Hence it does not carry out the initial model run. | -| *opt_coin_log(1)* | integer | Level of verbosity of solution information recorded by optimization library functions. | -| *opt_std_weights(false)* | Boolean | Flag that identifies constraint weights as standard deviations. If set to *true*, PESTPP-OPT skips FOSM-based constraint uncertainty calculation and uses observation weights directly as standard deviations in the calculation of risk. These standard deviations can be calculated externally via PREDUNC or pyEMU, or can be derived empirically from an ensemble. Setting this flag to true will override all other chance constraint flags and options. | -| *opt_skip_final(false)* | Boolean | Flag to skip the final model run. | -| *tie_by_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | -| *enforce_tied_bounds(false)* | Boolean | Flag to enforce parameter bounds on any tied parameters | -| *opt_stack_size(0)* | integer | Number of realizations to use in the stack. If positive, stack-based chance constraints are used. If *opt_par_stack* is not supplied, *opt_stack_size* realizations are drawn from the Prior. If *opt_par_stack* is supplied and the stack in that file is larger than *opt_stack_size*, the stack is truncated to *opt_stack_size*. | -| *opt_par_stack()* | string | File containing a parameter stack. The file extension is used to determining CSV for binary (JCB) format. The stack in this file must constrain all adjustable parameters. | -| *opt_obs_stack()* | string | File containing an observation stack. The file extension is used to determining CSV for binary (JCB) format. Supplying this file will forego evaluating the stack for the first iteration and possibly subsequent iterations depending on the value if *opt_recalc_chance_every* | +| Variable | Type | Role | +|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *opt\_dec\_var\_groups()* | text | Comma-delimited string identifying which parameter groups are to be treated as decision variables. If not supplied, all adjustable parameters are treated as decision variables. | +| *opt\_external\_dec\_var\_groups()* | text | Comma-delimited string identifying which parameter groups are to be treated as "external" decision variables, that is decision variables that do not influence model outputs and that therefore do not require a finite-difference run of the model to fill the pertinent column of the response matrix. | +| *opt\_constraint\_groups()* | text | Comma- delimited string identifying which observation and prior information groups are to be treated as constraints. Group names for "less than" constraints must start with "l\_" or “less\_”; group names for "greater than" constraints must start with "g\_" or “greater\_”. If this control variable is omitted, all observation and prior information groups that meet these naming conventions are treated as constraints. | +| *opt\_obj\_func()* | text | String identifying the prior information equation or two-column ASCII file that contains coefficients used in formulation of the objective function (see equation 8.4). If this control variable is not supplied, then each decision variable is given a coefficient of 1.0 in formulation of the objective function. | +| *opt\_direction(min)* | text | Either "min" or "max". “min” specifies that the objective function be minimized, while “max” specifies that it be maximized. | +| *opt\_risk(0.5)* | real | A number between 0.0 and 1.0. A value of 0.5 signifies risk neutrality. A value of 0.95 seeks a 95% risk averse application of optimization constraints, while a value of 0.05 seeks a 5% risk tolerant application of optimization constraints. | +| *opt\_recalc\_chance\_every(1)* | integer | Number of iterations of the SLP process over which chance constraints are re-used. If set to 1, a calibration Jacobian matrix is calculated during every iteration of the SLP constrained optimization process if fosm-based chance constraints are used or the stack is re-evaluated if stack-based chance constraints are being used | +| *parcov()* | text | Provide the name of a JCO, JCB, UNC or COV file from which the prior covariance matrix used in FOSM analysis is read. | +| *par\_sigma\_range(4.0)* | real | The difference between a parameter’s upper and lower bounds expressed as standard deviations. | +| *opt\_iter\_toll(0.001)* | real | Solution closure criterion applied to objective function and decision variables. | +| *base\_jacobian()* | text | Provide the name of a Jacobian matrix file (with extension *.jco* or *.jcb*). Sensitivities read from this file are used for the first iteration of the constrained optimization process. | +| *hotstart\_resfile()* | text | The name of a residuals file produced by PESTPP-GLM or PESTPP-OPT. PESTPP-OPT assumes that model output values contained in this file correspond to the values of parameters (including decision variables) listed in the PEST control file. Hence it does not carry out the initial model run. | +| *opt\_coin\_log(1)* | integer | Level of verbosity of solution information recorded by optimization library functions. | +| *opt\_std\_weights(false)* | Boolean | Flag that identifies constraint weights as standard deviations. If set to *true*, PESTPP-OPT skips FOSM-based constraint uncertainty calculation and uses observation weights directly as standard deviations in the calculation of risk. These standard deviations can be calculated externally via PREDUNC or pyEMU, or can be derived empirically from an ensemble. Setting this flag to true will override all other chance constraint flags and options. | +| *opt\_skip\_final(false)* | Boolean | Flag to skip the final model run. | +| *tie\_by\_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | +| *enforce\_tied\_bounds(false)* | Boolean | Flag to enforce parameter bounds on any tied parameters | +| *opt\_stack\_size(0)* | integer | Number of realizations to use in the stack. If positive, stack-based chance constraints are used. If *opt\_par\_stack* is not supplied, *opt\_stack\_size* realizations are drawn from the Prior. If *opt\_par\_stack* is supplied and the stack in that file is larger than *opt\_stack\_size*, the stack is truncated to *opt\_stack\_size*. | +| *opt\_par\_stack()* | string | File containing a parameter stack. The file extension is used to determining CSV for binary (JCB) format. The stack in this file must constrain all adjustable parameters. | +| *opt\_obs\_stack()* | string | File containing an observation stack. The file extension is used to determining CSV for binary (JCB) format. Supplying this file will forego evaluating the stack for the first iteration and possibly subsequent iterations depending on the value if *opt\_recalc\_chance\_every* | Table 8.2 PESTPP-OPT control variables. Parallel run management variables can be supplied in addition to these. See section 5.3.6. -# 9. PESTPP-IES +# 9. PESTPP-IES -## 9.1 Introduction -### 9.1.1 Publications +## 9.1 Introduction + + +### 9.1.1 Publications PESTPP-IES is described by White (2018). The reader is referred to that paper for a description of what it does and how it works, together with an example of its use. Chen and Oliver (2013) describe in detail the theory on which the iterative ensemble smoother methodology implemented by PESTPP-IES rests; they also provide deployment examples. -### 9.1.2 Overview +### 9.1.2 Overview Predictions made by environmental models are accompanied by uncertainty. This applies particularly to models of fluid flow and mass transport through the subsurface, where hydraulic properties and system stresses are often only poorly known. Where models are used to support management decisions, the uncertainties associated with their predictions should be quantified. Unless this is done, it is not possible to assess the range of possible outcomes of a particular management action. Without knowledge of the risks associated with an envisioned management strategy, the basis for a decision to adopt it is flawed. @@ -2599,7 +2672,7 @@ PESTPP-IES calculates samples of the posterior parameter probability distributio The numerical algorithm that is implemented by PESTPP-IES is referred to as an “iterative ensemble smoother”. The job of an ensemble smoother is to assimilate data in a “batch” sense. (This in contrast to recursive data assimilation that is implemented using the familiar ensemble Kalman filter methodology.) For PESTPP-IES this data comprises a calibration dataset. PESTPP-IES implements an approximate form of Bayes equation wherein efficiencies are gained by combining Bayesian methods with subspace methods. These efficiencies reduce the computational burden of giving effect to Bayesian principles enormously, especially where parameter numbers are high. In fact, beyond a certain threshold set by the information content of measurements comprising a calibration dataset, its numerical burden is almost insensitive to the number of parameters that are employed by a model. This number can thus rise to the hundreds of thousands, or even millions, with little extra computational cost. Hence hydraulic property heterogeneity can be represented at the grid of cell level in a groundwater or subsurface reservoir model. -A PESTPP-IES user commences the model parameterization process with a suite of random parameter fields sampled from the prior parameter probability distribution. These can be generated internally by PESTPP-IES as it begins execution; if parameters exhibit prior spatial correlation this can be expressed using the *parcov()* control variable, or, if no prior parameter correlation is expected, PESTPP-IES can generate the prior parameter covariance matrix on-the-fly from the parameter bounds, or, optionally, from external parameter data files with a *standard_deviation* column. Alternatively, if a user generates random parameter fields him/herself, PESTPP-IES can be asked to read these. Where parameters are based on pilot points, the PEST RANDPAR3 utility can be used to generate initial parameter fields; PyEMU provides similar functionality. Alternatively, parameter fields expressing cell-by-cell heterogeneity may be generated with the help of geostatistical software such as SGEMS (Remy et al, 2011). In accordance with normal PEST and PEST++ protocols, all parameters must be given a name, regardless of their number; these names must be recorded in the “parameter data” section of a PEST control file, and in template files that are cited in the PEST control file. +A PESTPP-IES user commences the model parameterization process with a suite of random parameter fields sampled from the prior parameter probability distribution. These can be generated internally by PESTPP-IES as it begins execution; if parameters exhibit prior spatial correlation this can be expressed using the *parcov()* control variable, or, if no prior parameter correlation is expected, PESTPP-IES can generate the prior parameter covariance matrix on-the-fly from the parameter bounds, or, optionally, from external parameter data files with a *standard\_deviation* column. Alternatively, if a user generates random parameter fields him/herself, PESTPP-IES can be asked to read these. Where parameters are based on pilot points, the PEST RANDPAR3 utility can be used to generate initial parameter fields; PyEMU provides similar functionality. Alternatively, parameter fields expressing cell-by-cell heterogeneity may be generated with the help of geostatistical software such as SGEMS (Remy et al, 2011). In accordance with normal PEST and PEST++ protocols, all parameters must be given a name, regardless of their number; these names must be recorded in the “parameter data” section of a PEST control file, and in template files that are cited in the PEST control file. The suite of initial parameter fields comprises an “ensemble”. In general, the greater the number of parameter fields that comprise this ensemble, the better is the history-matching performance of PESTPP-IES. In practice however, the number of realizations is limited by computing resources. A good rule of thumb is to ensure that the number of parameter fields comprising the ensemble exceeds the dimensionality of the solution space of the inverse problem that is posed by the history-matching process. This ensures that the iterative ensemble smoother has access to the directions in parameter space that it requires in order to provide a good fit with the calibration dataset. Normally, this number can only be guessed. However, if a Jacobian matrix is available, the SUPCALC utility from the PEST suite, or functionality available through PyEMU, can be used to assess solution space dimensionality. @@ -2607,7 +2680,7 @@ Through a series of successive iterations, PESTPP-IES modifies parameter realiza It is apparent that use of PESTPP-IES constitutes a significant departure from the “calibrate first and do uncertainty analysis later” approach that underpins much environmental model history-matching. PESTPP-IES does not seek a parameter field that is deemed to “calibrate” a model. Instead, it seeks a suite of parameter fields which collectively express posterior parameter uncertainty. Astonishingly, as will be discussed below, the numerical cost of obtaining this suite is often low compared with that required to obtain a single “calibrated” parameter field of post-calibration minimum error variance, particular where parameter numbers are high (which is often required to avoid underestimation of predictive uncertainty). -### 9.1.3 Ensemble Kalman Filters and Ensemble Smoothers +### 9.1.3 Ensemble Kalman Filters and Ensemble Smoothers The Kalman filter is widely used in time series processing and in engineering control. It constitutes an efficient means of processing noisy measurements in order to provide increasingly better estimates of system state, and of parameters that govern processes which determine system state. @@ -2627,7 +2700,7 @@ It is apparent from the above brief description that, unlike PESTPP-GLM and PEST PESTPP-IES includes two different solution techniques: The standard Kalman update (Evensen, 2003) (including the iterative Multiple Data Assimilation scheme of Emerick and Reynolds (2013)) and the ensemble form of the Gauss-Levenburg-Marquardt equation (Chen and Oliver, 2013). These two solution techniques differ in how they calculate parameter updates and, by default, the GLM form of Chen and Oliver (2013) is used by default. Conceptually, if the model is a perfect simulator of the natural system and linear, then the standard Kalman update is optimal in that it is a linearized Bayesian update. However, rarely are natural system models perfect representations and rarely are they linear. In this situation, iteration is needed to resolve nonlinearity. The MDA scheme of Emerick and Reynolds (2013) uses a series of covariance inflation factors chosen such that the sum of the inverse of these factors sums to 1 such that the cumulative effect of the assimilation iterations is theoretically the same as the standard Kalman update. The GLM iterative ensemble smoother update scheme of Chen and Oliver takes a different approach by using the GLM trust region formulation that penalizes parameter changes so that it too can be derived from a linear Bayesian formulation. Some preliminary testing indicates that the GLM IES solution has stronger capabilities to resolve nonlinearities but both the MDA and GLM solution schemes are widely cited in the literature. -### 9.1.4 Some Repercussions of Using Ensembles +### 9.1.4 Some Repercussions of Using Ensembles The most obvious advantage of using ensembles is their ability to sample the posterior parameter probability distribution. This is critical to a model’s support for environmental decision-making. @@ -2639,13 +2712,13 @@ The use of an ensemble to calculate partial parameter derivatives does not come Another issue associated with the use of random parameter fields is that of model stability. If the model (or models) that is run by PESTPP-IES experiences numerical difficulties when provided with some parameter sets, then use of an ensemble of random parameter fields is likely to trigger occasional model run failures, or occasional simulations whose run times are excessively long. As will be discussed below, PESTPP-IES includes functionality to accommodate this problem. -### 9.1.5 Iterations +### 9.1.5 Iterations The parameter adjustment algorithm that is implemented by PESTPP-IES is referred to as an “iterative ensemble smoother”. This reflects the fact that the process through which a prior ensemble becomes a posterior ensemble requires a number of iterations of parameter field adjustment, these iterations being necessary to deal with nonlinearity of the parameter-to-model output relationship. During each of these iterations, a new approximation to the Jacobian matrix is calculated based on the latest ensemble. Upgrades to parameter sets are then calculated using this matrix. The process is then repeated. An important prerequisite to success of the parameter upgrade process is use of an appropriately valued Marquardt lambda. The Marquardt lambda plays the same role in ensemble parameter field adjustment as it does in single parameter field adjustment such as that undertaken by PESTPP-GLM and PEST. A high value of lambda results in calculation of a damped (i.e., shortened) parameter upgrade vector that is aligned with the gradient of the objective function. This promulgates rapid improvement in the objective function where the latter is far above its minimum. A low value of lambda promulgates better navigation of an objective function surface in which the minimum lies at the bottom of a narrow valley, the existence of which is an outcome of a high degree of post-calibration parameter correlation. Ideally the Marquardt lambda should fall from a high value to a low value as the iterative parameter adjustment process progresses. Like PEST and PESTPP-GLM, PESTPP-IES employs a trial-and-error procedure to find the best value of the Marquardt lambda to employ at any iteration of the ensemble-based inversion process. However, numerical efficiency precludes testing of multiple lambdas when adjusting each parameter field in the entire ensemble, as this would incur an unduly large computation burden. Instead, lambda values are tested using only a few parameter fields. Once the best lambda is identified through this process, the remaining members of the ensemble are adjusted using this lambda. As was stated above, these same runs that are used for testing parameter upgrades are also used for calculation of a new Jacobian matrix. -### 9.1.6 Measurement Noise +### 9.1.6 Measurement Noise Variability between parameter fields that sample the posterior parameter probability distribution (and hence quantify the uncertainty of this distribution) arises from three sources. The first is prior parameter uncertainty. The second is a lack of information in the calibration dataset by which to reduce this uncertainty. The third arises from the fact that the calibration dataset is contaminated by measurement noise. @@ -2657,39 +2730,39 @@ Because of the role that weights play in the ensemble smoother parameter adjustm If weights are “correct” in an absolute sense, the objective function achieved through the calibration process should be roughly equal to the number of observations comprising the calibration dataset. This is because each weighted residual squared will, on average, be equal to 1.0. On many occasions of PESTPP-IES usage, the final objective function is not known I advance of the parameter adjustment process. Hence it must be guessed in order to support calculation of a suitable set of measurement weights. Fortunately, however, an incorrect guess rarely matters; this is especially the case where parameter uncertainty arises predominantly from lack of information in the calibration dataset, and hence is null-space dominated. -Despite the above considerations, there may be occasions when a modeller wishes to use one set of weights in the parameter adjustment process, and another set of weights for generation of observation realizations. For example, he/she may wish that contributions to the initial objective function by different observation groups be approximately balanced at the start of a parameter adjustment process. This may be achieved using, for example, the PWTADJ1 utility from the PEST suite or with the general adjustment options available in pyEMU, to implement a weighting strategy in the PESTPP-control file which is then used in the parameter upgrade process. In parallel with this, observation ensembles may be generated using an entirely different set of weights – pyEMU offers several methods to generate measurement noise realizations and to save them to file formats that PESTPP-IES can read directly (i.e., using the *ies_observation_ensemble()* control variable - see section 9.2.2). +Despite the above considerations, there may be occasions when a modeller wishes to use one set of weights in the parameter adjustment process, and another set of weights for generation of observation realizations. For example, he/she may wish that contributions to the initial objective function by different observation groups be approximately balanced at the start of a parameter adjustment process. This may be achieved using, for example, the PWTADJ1 utility from the PEST suite or with the general adjustment options available in pyEMU, to implement a weighting strategy in the PESTPP-control file which is then used in the parameter upgrade process. In parallel with this, observation ensembles may be generated using an entirely different set of weights – pyEMU offers several methods to generate measurement noise realizations and to save them to file formats that PESTPP-IES can read directly (i.e., using the *ies\_observation\_ensemble()* control variable - see section 9.2.2). -Optionally, users can forego the use of measurement noise realizations through the *ies_no_noise* option. In this case, realizations of measurement noise are not needed and users can set the weights in the pest control file according to the desired, subjective weighting strategy as in PEST. +Optionally, users can forego the use of measurement noise realizations through the *ies\_no\_noise* option. In this case, realizations of measurement noise are not needed and users can set the weights in the pest control file according to the desired, subjective weighting strategy as in PEST. -Still another option to cope with the need to define weights and noise that are non-commensurate is to employ the *external* control file section format for observation data (e.g., *\* observation data external*) and in the external observation data file(s), supply the column *standard_deviation* as values of expected noise and supply the *weight* column in the external files as the values necessary to form a balanced composite objective function. In this way, PESTPP-IES will generate realizations of observation noise using the covariance matrix implied by the *standard_deviation* entries and will use the weights during the upgrade calculation process and during objective function calculations. +Still another option to cope with the need to define weights and noise that are non-commensurate is to employ the *external* control file section format for observation data (e.g., *\* observation data external*) and in the external observation data file(s), supply the column *standard\_deviation* as values of expected noise and supply the *weight* column in the external files as the values necessary to form a balanced composite objective function. In this way, PESTPP-IES will generate realizations of observation noise using the covariance matrix implied by the *standard\_deviation* entries and will use the weights during the upgrade calculation process and during objective function calculations. -### 9.1.7 Regularization +### 9.1.7 Regularization The term “regularization” generally describes numerical devices that are employed to achieve uniqueness of ill-posed inverse problems. Regularization is thus fundamental to the notion of calibration. If regularization is properly implemented, the calibrated parameter field is one of minimized error variance. In most calibration contexts, regularization eliminates from the calibrated parameter field any heterogeneity that is not supported by direct measurements of hydraulic properties or by the calibration dataset. -In the iterative ensemble smoother process implemented by PESTPP-IES, regularization has a related, but different purpose; it is used to constrain parameter realizations comprising an ensemble to remain as close as possible to their initial (purely stochastic) values as they are adjusted in order for model outputs calculated using these realizations to achieve a good fit with the calibration dataset. Regularization as implemented by PESTPP-IES thus attempts to ensure that parameters comprising each realization are changed from their initial values by the smallest amount required for model outputs to reproduce field observations “acceptably” well. PESTPP-IES supports both subspace and Tikhonov regularization to help limit parameter changes. Use of the former is implicit in the numerical method that it employs to calculate parameter upgrades. Use of the latter is activated through use of the *ies_reg_factor()* control variable; see below. Regularisation, as applied to a parameter field ensemble in this fashion, thus attempts to ensure that the ensemble retains as much of its stochastic character as possible. The resulting algorithm resembles the randomized maximum likelihood methodology described by Oliver et al (2008). +In the iterative ensemble smoother process implemented by PESTPP-IES, regularization has a related, but different purpose; it is used to constrain parameter realizations comprising an ensemble to remain as close as possible to their initial (purely stochastic) values as they are adjusted in order for model outputs calculated using these realizations to achieve a good fit with the calibration dataset. Regularization as implemented by PESTPP-IES thus attempts to ensure that parameters comprising each realization are changed from their initial values by the smallest amount required for model outputs to reproduce field observations “acceptably” well. PESTPP-IES supports both subspace and Tikhonov regularization to help limit parameter changes. Use of the former is implicit in the numerical method that it employs to calculate parameter upgrades. Use of the latter is activated through use of the *ies\_reg\_factor()* control variable; see below. Regularisation, as applied to a parameter field ensemble in this fashion, thus attempts to ensure that the ensemble retains as much of its stochastic character as possible. The resulting algorithm resembles the randomized maximum likelihood methodology described by Oliver et al (2008). As is further discussed below, implementation of this type of regularization can only be approximate. At the same time, its use may require more iterations of the iterative ensemble smoother process than would otherwise be required to attain a good fit with the calibration dataset. It thus carries a numerical cost. It is important to note that if PESTPP-IES is provided with a PEST control file that instructs PEST to run in “regularization” mode (through provision of a “regularization” setting for the PESTMODE control variable), it ignores this setting. Furthermore, whether or not the PEST control file instructs PEST to run in “regularization” mode, PESTPP-IES ignores any prior information that it finds in this file. -As with PEST(\_HP) and PESTPP-GLM, the SVD truncation controls (i.e., MAXSING and EIGTHRESH) can also be used to enforce regularization in PESTPP-IES since these inputs control the number of singular components used in the upgrade calculation process. Setting MAXSING \< the number of realizations will result in fewer singular components be used in the upgrade process, which effectively limits the parameter adjustments. Additional regularization can also be implemented by using large lambda values and small *lambda_scale_fac* (i.e., line search) values. The optimal regularization strategy to use for any given PESTPP-IES analysis depends heavily on the problem, but generally, MAXSING seems to be the most effective and efficient means of controlling the level of fit. +As with PEST(\_HP) and PESTPP-GLM, the SVD truncation controls (i.e., MAXSING and EIGTHRESH) can also be used to enforce regularization in PESTPP-IES since these inputs control the number of singular components used in the upgrade calculation process. Setting MAXSING < the number of realizations will result in fewer singular components be used in the upgrade process, which effectively limits the parameter adjustments. Additional regularization can also be implemented by using large lambda values and small *lambda\_scale\_fac* (i.e., line search) values. The optimal regularization strategy to use for any given PESTPP-IES analysis depends heavily on the problem, but generally, MAXSING seems to be the most effective and efficient means of controlling the level of fit. -### 9.1.8 Base Realization +### 9.1.8 Base Realization Optionally (and by default), the parameter ensemble used by PESTPP-IES can include a “base realization”. Parameter values that comprise this realization are those which are listed in the “parameter data” section of the PEST control file which PESTPP-IES reads on commencement of execution. Ideally, this set of parameter values are those of minimum pre-calibration error variance; that is, they comprise the expected values of parameters from an expert knowledge point of view. PESTPP-IES pairs this realization with an observation dataset that has no “manufactured” measurement noise associated with it; this dataset is comprised of measurements that appear in the “observation data” section of the PEST control file. By monitoring the fate of the parameter set comprising this base realization, a user can witness the effect that the ensemble smoother process has on a non-random parameter field. Any bias that is introduced to this parameter field, or any incredulous heterogeneity that is introduced to this parameter field, is also presumably introduced to the other random parameter fields which comprise the ensemble. Inspection of this field can aid a modeller in assessing whether, in his/her opinion, the parameter field ensemble that emerges from the ensemble smoother process comprises a legitimate sample of the posterior parameter probability distribution. -### 9.1.9 Parameter Transformation Status +### 9.1.9 Parameter Transformation Status -The number of individual parameters which comprise a realization is equal to the number of parameters that are listed in the “parameter data” section of a PEST control file. However, parameters that are fixed in the “parameter data” section of the PEST control file are never varied from their initial values. Tied parameters maintain a fixed ratio with parent parameters, this ratio being determined by values provided for respective parameters in the “parameter data” section of the PEST control file. Note however, that if maintenance of this ratio takes a tied parameter beyond its bound (as provided in the PEST control file), then the bound is ignored, regardless of the setting of the *ies_enforce_bounds()* control variable; see below. (PEST and PESTPP-GLM treat tied parameters in the same way.). +The number of individual parameters which comprise a realization is equal to the number of parameters that are listed in the “parameter data” section of a PEST control file. However, parameters that are fixed in the “parameter data” section of the PEST control file are never varied from their initial values. Tied parameters maintain a fixed ratio with parent parameters, this ratio being determined by values provided for respective parameters in the “parameter data” section of the PEST control file. Note however, that if maintenance of this ratio takes a tied parameter beyond its bound (as provided in the PEST control file), then the bound is ignored, regardless of the setting of the *ies\_enforce\_bounds()* control variable; see below. (PEST and PESTPP-GLM treat tied parameters in the same way.). Where a user provides his/her own parameter ensembles, there are circumstances where he/she may wish to provide values for fixed parameters which differ from realization to realization within the ensemble, and which differ from those supplied in the PEST control file. PESTPP-IES allows this. Within each member of the ensemble, PESTPP-IES maintains all fixed parameters at their initial values as it adjusts the values of all other parameters. Note also that if a user-supplied realization omits values for fixed or tied parameters, then PESTPP-IES provides them with values from the PEST control file. As is usual practice in the PEST++ suite, if a parameter is designated as log-transformed in the “parameter data” section of a PEST control file, then adjustments that are made to the value of the parameter are actually made to the log of the parameter’s value. This takes place behind the scenes, and is invisible to the user. Strategic log-transformation of parameters can render a nonlinear problem much more linear; this can enhance the speed with which the objective function is reduced as parameter realizations comprising an ensemble are adjusted. In fact, most parameters are better log transformed than untransformed; nevertheless, sometimes log-transformation is inappropriate, for example if a parameter can adopt both positive and negative values, or if it represents the value of a quantity whose datum is arbitrary. -### 9.1.10 Inequality Observations +### 9.1.10 Inequality Observations PESTPP-IES introduces a special observation type that is not available in PESTPP-GLM or PEST, but resembles constraints supported by PESTPP-OPT. This is the “one way observation” type, which is synonymous with the inequality constraints used in PESTPP-OPT. For observations of this type, a residual is zero unless model outputs are either greater than or less than its “measured” value; the user specifies which of these apply. This reflects the nature of some types of measurements. However, their use is broader than this. “Greater than” and “less than” observations can comprise a powerful mechanism for inserting “soft knowledge” into the history-matching process. @@ -2697,37 +2770,37 @@ If an observation belongs to an observation group whose name begins with the str Similarly, if an observation belongs to an observation group whose name begins with the string “l\_” or “less\_”, then this observation is a “less than” observation. No objective function penalty is incurred if the modelled value of the pertinent quantity is less than the measured value listed in the “observation data” section of the PEST control file. However, if the model-calculated value is greater than the measured value, the objective function penalty is calculated in the usual manner, that is as the squared residual times the squared weight. -### 9.1.11 Localization +### 9.1.11 Localization Calculating an empirical cross-covariance between large numbers of parameters and observations from a limited number of realizations is likely to result in spurious cross-correlations. Because of this, some parameters will be adjusted when they should not be adjusted. Furthermore, when large numbers of independent observations comprise a calibration dataset, a small ensemble size will almost certainly not provide enough degrees of freedom to reproduce these data. To combat these problems, users can employ localization. The term “localization” comes from ensemble Kalman filter parlance. It refers to a strategy whereby only “local” covariances inform unmeasured states in a spatially distributed filtering problem. -PESTPP-IES supports localization through use of a localization matrix. This matrix has rows that are observation names and/or observation group names, and columns that are parameter names and/or parameter group names. Elements of the matrix should range between 0.0 and 1.0. Figure 9.1 shows an example localization matrix. In this example, a mixture of observation names and an observation group (“flx_river”) are used for row names while parameter group names are used for column names. Parameter groups “r1” and “w1” represent future recharge and pumping, respectively. In this example, the localization matrix “zeros out” any spurious sensitivity between historical observations and future recharge and pumping. If a localization matrix is specified, PESTPP-IES builds up the upgrade matrices for each lambda value sequentially by each row of the localization matrix. +PESTPP-IES supports localization through use of a localization matrix. This matrix has rows that are observation names and/or observation group names, and columns that are parameter names and/or parameter group names. Elements of the matrix should range between 0.0 and 1.0. Figure 9.1 shows an example localization matrix. In this example, a mixture of observation names and an observation group (“flx\_river”) are used for row names while parameter group names are used for column names. Parameter groups “r1” and “w1” represent future recharge and pumping, respectively. In this example, the localization matrix “zeros out” any spurious sensitivity between historical observations and future recharge and pumping. If a localization matrix is specified, PESTPP-IES builds up the upgrade matrices for each lambda value sequentially by each row of the localization matrix. -
4 5 2
1.0 1.0 0.0 1.0 0.0
1.0 1.0 0.0 1.0 0.0
1.0 1.0 0.0 1.0 0.0
1.0 1.0 0.0 1.0 0.0
* row names
c001cr03c10_19700102
c001cr03c16_19700102
c001cr04c09_19700102
flx_river
* column names
hk
r0
r1
w0
w1
+
4 5 2
1.0 1.0 0.0 1.0 0.0
1.0 1.0 0.0 1.0 0.0
1.0 1.0 0.0 1.0 0.0
1.0 1.0 0.0 1.0 0.0
* row names
c001cr03c10_19700102
c001cr03c16_19700102
c001cr04c09_19700102
flx_river
* column names
hk
r0
r1
w0
w1
Figure 9.1. An example localization matrix. When applying localization in a history-matching problem involving large numbers of parameters and observations, a user may wish to define a “local” neighbourhood around each observation location wherein parameters are expected to influence the simulated counterparts to observations. This, in effect, creates a series of “local” history-matching problems using subsets of adjustable parameters and observations. The number of degrees of freedom featured in each local problem can be relatively high, this allowing a small ensemble size to better reproduce large numbers of independent observations. Localization also provides protection against “spurious” (non-plausible) correlations between parameters and observations arising from the limited size of parameter ensembles. For example, standard methods of covariance calculation may suggest a correlation between a pumping rate parameter and a head that precedes it in time. Spurious correlations of this type can lead to parameter compensation and predictive bias. See Chen and Oliver (2016) for a good description of the theory and practice of localization. -Through localization, a complex parameter estimation problem can be turned into a series of independent parameter estimation problems. Suppose, for example, that localization is employed in the most granular manner, so that the localization matrix contains one column for each adjustable parameter and that each column contains a single non-zero value, this pertaining to a single observation which that parameter is presumed to influence. If large numbers of parameters are being adjusted, the parameter upgrade calculation process for a given lambda will require as many truncated SVD solves as there are adjustable parameters. This can require considerable numerical effort. To overcome this problem, the localized upgrade solution process in PESTPP-IES has been multithreaded; this is possible in circumstances such as these where each local solve is independent of every other local solve. The use of multiple threads is invoked through the *ies_num_threads()* control variable. It should be noted that the optimal number of threads to use is problem-specific. Furthermore, it should not exceed the number of physical cores of the host machine on which the PESTPP-IES master instance is running. +Through localization, a complex parameter estimation problem can be turned into a series of independent parameter estimation problems. Suppose, for example, that localization is employed in the most granular manner, so that the localization matrix contains one column for each adjustable parameter and that each column contains a single non-zero value, this pertaining to a single observation which that parameter is presumed to influence. If large numbers of parameters are being adjusted, the parameter upgrade calculation process for a given lambda will require as many truncated SVD solves as there are adjustable parameters. This can require considerable numerical effort. To overcome this problem, the localized upgrade solution process in PESTPP-IES has been multithreaded; this is possible in circumstances such as these where each local solve is independent of every other local solve. The use of multiple threads is invoked through the *ies\_num\_threads()* control variable. It should be noted that the optimal number of threads to use is problem-specific. Furthermore, it should not exceed the number of physical cores of the host machine on which the PESTPP-IES master instance is running. -PESTPP-IES also supports correlation-based, automatic adaptive localization. Its algorithm is based on that of Luo et al (2018). This functionality is activated by setting the *ies_autoadaloc()* control variable to *true*. If this localization scheme is employed, a user does not need to provide a localization matrix to define which parameters can be informed by which observations. Instead, PESTPP-IES uses the parameter ensemble and resulting model-output observation ensemble to calculate a Pearson correlation coefficient between each adjustable parameter and each non-zero-weighted observation. Because the normal usage context of PESTPP-IES is that of numerically efficient parameter estimation and uncertainty quantification, it is expected that the number of realizations that constitute an ensemble will normally be considerably smaller than the number of adjustable parameters. The estimated correlation coefficient between any given parameter and any observation is therefore likely to be somewhat in error. This error decreases (eventually to zero) as the size of an ensemble rises. +PESTPP-IES also supports correlation-based, automatic adaptive localization. Its algorithm is based on that of Luo et al (2018). This functionality is activated by setting the *ies\_autoadaloc()* control variable to *true*. If this localization scheme is employed, a user does not need to provide a localization matrix to define which parameters can be informed by which observations. Instead, PESTPP-IES uses the parameter ensemble and resulting model-output observation ensemble to calculate a Pearson correlation coefficient between each adjustable parameter and each non-zero-weighted observation. Because the normal usage context of PESTPP-IES is that of numerically efficient parameter estimation and uncertainty quantification, it is expected that the number of realizations that constitute an ensemble will normally be considerably smaller than the number of adjustable parameters. The estimated correlation coefficient between any given parameter and any observation is therefore likely to be somewhat in error. This error decreases (eventually to zero) as the size of an ensemble rises. Automatic adaptive localization attempts to estimate and compensate for these errors. It assesses the statistical significance of each correlation coefficient against background “noise” induced by use of a small ensemble size. This is accomplished through repeated calculation of “background” correlation coefficients between all parameters and all observations. Background correlation coefficients are estimated by applying a circular shifting operation to observation ensemble realization names with respect to their respective parameter ensembles, thus effectively “scrambling” these two ensembles. PESTPP-IES then recalculates each correlation coefficient. Note that a circular shift is simply the advancement of realization names by one place while the last realization name moves to the front of the realization name list (while holding the realization values constant). Exactly *N-*1 circular-shifts of the observation ensemble can take place through this operation, this yielding *N*-1 background correlation coefficients, where *N* is the number of realizations comprising the ensemble. If N is large enough, these background correlation coefficients would be approximately zero because the shifting operation should yield two statistically independent random variables which, with enough samples (realizations), should exhibit zero correlation. However, if a small ensemble size is employed, these background correlation coefficients may appear to be significantly nonzero. -Using this collection of background correlation coefficients and the optional *ies_autoadaloc_sigma_dist()* control variable (whose default value of 1.0) PESTPP-IES determines if the estimated correlation coefficient between any parameter and any observation is significantly (in a statistical sense) different from the background correlation coefficient. If this is the case, the calculated covariance between the parameter and the observation is retained and the absolute value of estimated correlation coefficient between the parameter and observation is used as the localizing constant since it naturally ranges from 0 to 1. If estimated correlation coefficient is not significantly different from the background estimates, the covariance is declared to be zero; hence the observation cannot inform that parameter during upgrade calculations. The value for *ies_autoadaloc_sigma_dist()* must be supplied as a real number; it represents the number of standard deviations from the background mean that the value of an estimated correlation coefficient must be for it to be considered significant. +Using this collection of background correlation coefficients and the optional *ies\_autoadaloc\_sigma\_dist()* control variable (whose default value of 1.0) PESTPP-IES determines if the estimated correlation coefficient between any parameter and any observation is significantly (in a statistical sense) different from the background correlation coefficient. If this is the case, the calculated covariance between the parameter and the observation is retained and the absolute value of estimated correlation coefficient between the parameter and observation is used as the localizing constant since it naturally ranges from 0 to 1. If estimated correlation coefficient is not significantly different from the background estimates, the covariance is declared to be zero; hence the observation cannot inform that parameter during upgrade calculations. The value for *ies\_autoadaloc\_sigma\_dist()* must be supplied as a real number; it represents the number of standard deviations from the background mean that the value of an estimated correlation coefficient must be for it to be considered significant. The automatic adaptive localization process can be used in conjunction with a localization matrix. In this case, the nonzero entries in the localization matrix are used to constraint the number of parameter-observation pairs to search for statistically significant correlations. In this mode of operation, the actual value of non-zero entries in the localization are not important. The localization matrix is only used to identify plausible and possible parameter-observation correlations and the automatic adaptive localization process then calculates estimated and background correlation coefficients as described previously. -If the *ies_verbose_level()* flag is set to greater than 1, the automatic adaptive localization process implemented by PESTPP-IES will record the resulting localization matrix in a file named *case.N.autoadaloc.mat*, where case is the filename base of the PEST control file. It will also record a CSV file containing results of the adaptive localization process as *case.N.autodaloc.csv*. Both of these are recorded at the end of each iteration; *N* is the iteration number. The automatic adaptive localization process can be computationally demanding. However, it can be multi-threaded. This option is activated using the *ies_num_threads()* control variable. +If the *ies\_verbose\_level()* flag is set to greater than 1, the automatic adaptive localization process implemented by PESTPP-IES will record the resulting localization matrix in a file named *case.N.autoadaloc.mat*, where case is the filename base of the PEST control file. It will also record a CSV file containing results of the adaptive localization process as *case.N.autodaloc.csv*. Both of these are recorded at the end of each iteration; *N* is the iteration number. The automatic adaptive localization process can be computationally demanding. However, it can be multi-threaded. This option is activated using the *ies\_num\_threads()* control variable. -###
9.1.12 Use of observation noise covariance matrices +### 9.1.12 Use of observation noise covariance matrices -In standard operation model, PESTPP-IES will generate the observation noise covariance matrix (required in the PESTPP-IES parameter adjustment equation) as a diagonal matrix with diagonal entries equal to one over the squared weights listed in the control file. Using this matrix assumes there is no correlation between observations, an assumption that is not always valid, especially in the presence of model error (Doherty and Welter, 2010). Note also that with the version 2 control file, users can specify a “standard_deviation” column in an “observation data external” file – this standard deviation will be used in place of weights for generating the observation noise covariance matrix. +In standard operation model, PESTPP-IES will generate the observation noise covariance matrix (required in the PESTPP-IES parameter adjustment equation) as a diagonal matrix with diagonal entries equal to one over the squared weights listed in the control file. Using this matrix assumes there is no correlation between observations, an assumption that is not always valid, especially in the presence of model error (Doherty and Welter, 2010). Note also that with the version 2 control file, users can specify a “standard\_deviation” column in an “observation data external” file – this standard deviation will be used in place of weights for generating the observation noise covariance matrix. Optionally, users can specify a full observation noise covariance matrix via the *obscov* option (the format of this matrix is determined by it’s extension - .cov for PEST-style ASCII format or .jcb for extended Jacobian binary format). In the case where *obscov* is specified, some specialized functionality is activated with PESTPP-IES. First, if specified noise matrix has less entries than there are non-zero weighted observations in the control file, PESTPP-IES resets the weight of the missing observations to 0.0, effectively reducing the number of non-zero weighted observations. Second, PESTPP-IES resets the weights of all remaining non-zero weighted observations to the square root of the inverse of the diagonal entries of the specified noise matrix. Note PESTPP-IES will not increase the weight of any observation; weights will only be decreased. -Together, these two specialized routines enable seamless implementation of the total-error covariance approach described in Alfonso and Oliver (2019) when paired with the *ies_save_rescov* option, which tells PESTPP-IES to record the residual covariance matrix each iteration. So, a general approach to implementing the total-error covariance analysis would be to run PESTPP-IES once with the *ies_save_rescov* option active. Then, after selecting the iteration from this run that the users prefers, supply the residuals covariance matrix from the selected realization (named case.N.res.cov/.jcb, where the format depends on the *ies_save_binary* option) as the *obscov* option for the next set of PESTPP-IES iterations. Note it is likely this residual covariance will be singular due to the limited number of realizations used in PESTPP-IES compared to the number of non-zero weighted observations and also due to the strong correlations usually present in final residuals–this singularity means the residual covariance matrix cannot be eigen factored for generating realizations of measurement noise. To help users overcome this issue, PESTPP-IES also writes a “shrunk” version of the residual covariance matrix, where this shrinking is implemented according to Target D of Schafer and Strimmer (2005). This shrinking is optimal in the sense that it preserves variances and also reduces the off-diagonal elements just enough to make the matrix non-singular. +Together, these two specialized routines enable seamless implementation of the total-error covariance approach described in Alfonso and Oliver (2019) when paired with the *ies\_save\_rescov* option, which tells PESTPP-IES to record the residual covariance matrix each iteration. So, a general approach to implementing the total-error covariance analysis would be to run PESTPP-IES once with the *ies\_save\_rescov* option active. Then, after selecting the iteration from this run that the users prefers, supply the residuals covariance matrix from the selected realization (named case.N.res.cov/.jcb, where the format depends on the *ies\_save\_binary* option) as the *obscov* option for the next set of PESTPP-IES iterations. Note it is likely this residual covariance will be singular due to the limited number of realizations used in PESTPP-IES compared to the number of non-zero weighted observations and also due to the strong correlations usually present in final residuals–this singularity means the residual covariance matrix cannot be eigen factored for generating realizations of measurement noise. To help users overcome this issue, PESTPP-IES also writes a “shrunk” version of the residual covariance matrix, where this shrinking is implemented according to Target D of Schafer and Strimmer (2005). This shrinking is optimal in the sense that it preserves variances and also reduces the off-diagonal elements just enough to make the matrix non-singular. Conceptually, this brings some rigor to the usually ad hoc process of trying to decide how to adjust PEST(++) settings between sets of iterations. By using the posterior residual covariance matrix as the observation noise covariance matrix for the next PESTPP-IES analysis treat any bias and correlation present in the posterior residuals of the previous analysis as (correlated) expected noise for the next set of PESTPP-IES iterations–PESTPP-IES will generate the requisite noise realizations from the matrix and also adjust the weights to express the variance implied by this matrix so that the next series of PESTPP-IES iterations will not try fit the (components) of the observations that it was not able to fit in the previous series of iterations. @@ -2735,35 +2808,36 @@ In this way, one effectively treats the observation noise covariance matrix as a Note that as presently coded, the residual covariance matrix is a dense matrix and it is formed for all non-zero-weighted observations simultaneously. This means that if the number of non-zero-weighted observations is greater than about 20,000, this matrix will likely not fit in memory. -### 9.1.13 Detecting and resolving prior-data conflict +### 9.1.13 Detecting and resolving prior-data conflict Closely related to the concept of measurement noise and the associated ensemble (described previously) is the concept of prior-data conflict (Evans and Moshonov (2006), Alfonso and Oliver (2019)). In the most general sense, prior-data conflict is a situation where the simulated outputs from the prior parameter ensemble to not “agree” with the observed values (plus, optionally, measurement noise), where “agree” is measured by the statistical distance between the ensemble of simulated outputs vs the ensemble of observed values plus noise realizations. If these two ensembles do not “agree”, then that implies that extreme parameter values and/or extreme parameter combinations will be needed to ultimately reproduce these conflicted observation values. In this case, the term “extreme” can be used interchangeably with the term “biased”. It is easy to see that continuing with parameter adjustments in the presence of prior-data conflict is a sure way to generate parameter bias, and ultimately, forecast bias. -While detecting prior-data conflict is relatively simple (and PESTPP-IES will do this after evaluating the prior parameter ensemble or during a restart), resolving this issue is more problematic. If time and budget permit, users should investigate the cause of these disagreements through detailed investigation of data sources, as well as through paired complex-simple analyses around model discretization and process representation. Another, simpler, but likely more controversial approach, is to remove (from the parameter adjustment process) the observations that are in conflict. In this way, users are willing to accept higher posterior variance to avoid bias–a concrete example of the bias-variance trade-off. PESTPP-IES will implement this draconian prior-data conflict resolution with use of the *ies_drop_conflicts* option (and the associated *ies_pdc_sigma_distance*). By only specifying the *ies_drop_conflicts* option, and observation whose prior realizations do not overlap or “cover” the corresponding observation values plus noise realizations will be marked as “conflicted”. Optionally, the *ies_pdc_sigma\_*distance can be passed as a positive real number this being the number of standard deviations from the mean for both the prior and observations plus noise realizations that will be treated as point where these two distributions must overlap or “cover” each. If the prior realizations mean value minus *ies_pdc_sigma_distance* times the corresponding standard deviation is greater than the observations plus noise realizations mean plus *ies_pdc_sigma\_*distance times the corresponding standard deviation, then the given observation is treated as conflicted (the converse of this situation is also checked). +While detecting prior-data conflict is relatively simple (and PESTPP-IES will do this after evaluating the prior parameter ensemble or during a restart), resolving this issue is more problematic. If time and budget permit, users should investigate the cause of these disagreements through detailed investigation of data sources, as well as through paired complex-simple analyses around model discretization and process representation. Another, simpler, but likely more controversial approach, is to remove (from the parameter adjustment process) the observations that are in conflict. In this way, users are willing to accept higher posterior variance to avoid bias–a concrete example of the bias-variance trade-off. PESTPP-IES will implement this draconian prior-data conflict resolution with use of the *ies\_drop\_conflicts* option (and the associated *ies\_pdc\_sigma\_distance*). By only specifying the *ies\_drop\_conflicts* option, and observation whose prior realizations do not overlap or “cover” the corresponding observation values plus noise realizations will be marked as “conflicted”. Optionally, the *ies\_pdc\_sigma\_*distance can be passed as a positive real number this being the number of standard deviations from the mean for both the prior and observations plus noise realizations that will be treated as point where these two distributions must overlap or “cover” each. If the prior realizations mean value minus *ies\_pdc\_sigma\_distance* times the corresponding standard deviation is greater than the observations plus noise realizations mean plus *ies\_pdc\_sigma\_*distance times the corresponding standard deviation, then the given observation is treated as conflicted (the converse of this situation is also checked). With these options active, PESTPP-IES will remove observations in a prior-data conflict state from the parameter adjustment process, that is, these observations will not feature in the residual matrix used for upgrade calculations. -### 9.1.14 Multi-modal solution process +### 9.1.14 Multi-modal solution process The theory that underpins PEST, PESTPP-GLM and PESTPP-IES is designed to seek a single minimum of the objective function (e.g., a single peak of the likelihood function). For many data assimilation problems however, the objective function surface is not convex with a single minimum located neatly at the bottom. Instead, it maybe pitted with local minimum and/or contain a curving high-dimensional trough of nearly equal objective function minimum. In the context of deterministic parameter estimation, the goal is to seek a unique minimum of the objective function, that is seeking a minimum error variance solution. However, in the context of uncertainty quantification, and especially in the context of evaluating the likelihood of an unwanted outcome, exploring local minima and/or this high-dimensional trough is paramount. To that end, and following the work of Zhang and others (2018), PESTPP-IES implements a multi-modal solution process. In essence, this solution process calculates the upgrade vector for each realization sequentially, using only realizations that are in the neighborhood of the current realization. The neighborhood is defined by two metrics: relative Euclidean distance in parameter space and relative objective function value. In this way, each realization uses an upgrade direction based on the approximate Jacobian matrix informed only by nearby realizations that have a relatively low objective function value. This allows each realization to move in a different upgrade direction, compared to the standard ies solution process that calculates upgrade magnitude and direction from the approximate Jacobian using all realizations simultaneously. This is depicted on Figure 9.1, where the posterior is a circle yielding an infinite number of posterior modes. -Chart, scatter chart Description automatically generated +Chart, scatter chart Description automatically generated Figure 9.1 – A demonstration of the multi-modal upgrade process (B) using the example problem from Zhang and others (2018) compared to the standard solution (A). During the first iteration upgrade process, the upgrade for red-dot realization uses only realizations in the local parameter space/low objective function neighborhood (cyan dots) out of the entire ensemble (grey dots). This upgrade yields the magenta location, very near the target circle, compared to the nearly no movement from the standard solution (A). -It is important to note that more realizations will be required in the PESTPP-IES solution process when using multi-modal upgrades. This is so an effectively local group of realizations can be found for each realizations upgrade that a) capture the local objective function behavior and b) the local group has enough realizations to resolve the important relations between pars and obs. The size of the local group of realizations is controlled by the *ies_multimodal_alpha* argument, which ranges between 0 and 1 and is the fraction of the total ensemble to use for the local group of realizations. Smaller values of *ies_multimodal_alpha* will result in more local groups of realizations but at the expense of these groups being smaller in number. A value between 0.1 and0.25 seems to work well for a limited number of test cases. +It is important to note that more realizations will be required in the PESTPP-IES solution process when using multi-modal upgrades. This is so an effectively local group of realizations can be found for each realizations upgrade that a) capture the local objective function behavior and b) the local group has enough realizations to resolve the important relations between pars and obs. The size of the local group of realizations is controlled by the *ies\_multimodal\_alpha* argument, which ranges between 0 and 1 and is the fraction of the total ensemble to use for the local group of realizations. Smaller values of *ies\_multimodal\_alpha* will result in more local groups of realizations but at the expense of these groups being smaller in number. A value between 0.1 and0.25 seems to work well for a limited number of test cases. -Closely related to the multimodal solution process is the use of a “weights” ensemble with PESTPP-IES. Through the *ies_weight_ensemble* argument, users can specify unique weight vectors for each realization. This argument can only be used with the multimodal solution process and allows the upgrade of each realization use a unique weighting scheme. In this way, PESTP-IES can be used to explore how different weighting scheme impact the posterior results. This functionality is demonstrated on the ZDT1 bi-objective optimization benchmark in Figure 9.2 +Closely related to the multimodal solution process is the use of a “weights” ensemble with PESTPP-IES. Through the *ies\_weight\_ensemble* argument, users can specify unique weight vectors for each realization. This argument can only be used with the multimodal solution process and allows the upgrade of each realization use a unique weighting scheme. In this way, PESTP-IES can be used to explore how different weighting scheme impact the posterior results. This functionality is demonstrated on the ZDT1 bi-objective optimization benchmark in Figure 9.2 -Chart, scatter chart Description automatically generated +Chart, scatter chart Description automatically generated Figure 9.2 – A demonstration of the multi-modal solution process using a weight ensemble on the ZDT1 benchmark problem. The standard solution process using single weight vector drives the posterior towards a single point, while the multi-modal upgrade process uses unique weights on each of the two objectives (observations in the control file) such that each realization targets a different point on the trade-off between the two objectives. -## 9.2 Using PESTPP-IES +## 9.2 Using PESTPP-IES + -### 9.2.1 General +### 9.2.1 General The parameter adjust algorithm implemented by PESTPP-IES is described in detail by Chen and Oliver (2013). The reader is referred to that publication for a complete mathematical description of that algorithm; see also the description presented by White (2018). We now focus on PESTPP-IES implementation details, and on control variables that are available for tuning the Chen and Oliver algorithm implemented by PESTPP-IES to particular history-matching contexts. @@ -2771,10 +2845,10 @@ As is the usual protocol for members of the PEST++ suite, values of control vari Like most programs of the PEST++ suite, PESTPP-IES can be run without any PEST++ control variables. PESTPP-IES then provides default values for all of these variables. For example, the default position of PESTPP-IES is to assume that the prior parameter covariance matrix is diagonal, and that parameter bounds span four standard deviations of the prior probability distribution of each parameter. Nevertheless, users are encouraged to use PESTPP-IES control variables in order to ensure that its performance is optimized for their particular modelling context. -### 9.2.2 Initial Realizations +### 9.2.2 Initial Realizations **Realizations** -Initial parameter realizations can be generated by PESTPP-IES (the default), or they can be supplied by the user. The number of realizations that PESTPP-IES generates is set by the *ies_num_reals()* control variable; the default value is 50. +Initial parameter realizations can be generated by PESTPP-IES (the default), or they can be supplied by the user. The number of realizations that PESTPP-IES generates is set by the *ies\_num\_reals()* control variable; the default value is 50. If you do not provide specifications for prior parameter uncertainty, then PESTPP-IES will make the following assumptions when generating initial parameter realizations. @@ -2782,40 +2856,40 @@ If you do not provide specifications for prior parameter uncertainty, then PESTP 2. Every parameter is statistically independent of every other parameter. -3. The difference between the upper and lower bound of each parameter is equal to *par_sigma_range()* standard deviations of its prior probability distribution. If *par_sigma_range(4)* is passed to PESTPP-IES, this indicates that the parameter bounds are set to approximately the 95% confidence interval of the parameter if it is normally distributed (i.e., four standard deviations). PESTPP-IES adopts this protocol even if the initial value of a parameter is not centrally located with respect to these bounds. Where a parameter is log transformed, the difference between the logarithms of the parameter’s upper and lower bounds is equated to *par_sigma_range()* standard deviations of the prior probability distribution of the log of the parameter. +3. The difference between the upper and lower bound of each parameter is equal to *par\_sigma\_range()* standard deviations of its prior probability distribution. If *par\_sigma\_range(4)* is passed to PESTPP-IES, this indicates that the parameter bounds are set to approximately the 95% confidence interval of the parameter if it is normally distributed (i.e., four standard deviations). PESTPP-IES adopts this protocol even if the initial value of a parameter is not centrally located with respect to these bounds. Where a parameter is log transformed, the difference between the logarithms of the parameter’s upper and lower bounds is equated to *par\_sigma\_range()* standard deviations of the prior probability distribution of the log of the parameter. -In generating prior parameter realizations, and in adjusting these realizations through the ensemble smoother process, PESTPP-IES ensures that a parameter’s bounds are not violated if the *ies_enforce_bounds()* control variable is set to *true* (it is *true* by default). The parameter’s probability density function is therefore effectively truncated by these bounds. In some situations, truncation may affect the performance of PESTPP-IES. Incidences of this occurrence may be reduced by ensuring that bounds are symmetrical with respect to initial parameter values cited in the “parameter data” section of the PEST control file (taking log transformation into account). Alternatively, the *par_sigma_range()* control variable may be used to increase the number of standard deviations implied by the parameter bounds so that bounds are less likely to be encountered. +In generating prior parameter realizations, and in adjusting these realizations through the ensemble smoother process, PESTPP-IES ensures that a parameter’s bounds are not violated if the *ies\_enforce\_bounds()* control variable is set to *true* (it is *true* by default). The parameter’s probability density function is therefore effectively truncated by these bounds. In some situations, truncation may affect the performance of PESTPP-IES. Incidences of this occurrence may be reduced by ensuring that bounds are symmetrical with respect to initial parameter values cited in the “parameter data” section of the PEST control file (taking log transformation into account). Alternatively, the *par\_sigma\_range()* control variable may be used to increase the number of standard deviations implied by the parameter bounds so that bounds are less likely to be encountered. -PESTPP-IES allows you to specify the properties of an assumed multi-Gaussian prior parameter probability distribution yourself; this distribution can include correlation between parameters. This is done using the *parcov()* control variable – the same variable that is available to users of PESTPP-GLM and PESTPP-OPT. As for other programs of the PEST++ suite, the filename that is supplied as the argument for this control variable can be a parameter uncertainty file (recognized by an extension of *.unc*), a covariance matrix file (recognized by an extension of *.cov*), or a binary, matrix-holding file (recognized by an extension of *.jco* or *.jcb*); see appendix B of this manual for specifications of these file types. In generating random parameter realizations comprising the initial ensemble, parameter values listed in the “parameter data” section of the PEST control file are taken as prior mean parameter values. If the *ies_enforce_bounds()* control variable is set to *true*, then parameter realizations are truncated at their bounds; nor are these bounds transgressed during subsequent parameter adjustment. +PESTPP-IES allows you to specify the properties of an assumed multi-Gaussian prior parameter probability distribution yourself; this distribution can include correlation between parameters. This is done using the *parcov()* control variable – the same variable that is available to users of PESTPP-GLM and PESTPP-OPT. As for other programs of the PEST++ suite, the filename that is supplied as the argument for this control variable can be a parameter uncertainty file (recognized by an extension of *.unc*), a covariance matrix file (recognized by an extension of *.cov*), or a binary, matrix-holding file (recognized by an extension of *.jco* or *.jcb*); see appendix B of this manual for specifications of these file types. In generating random parameter realizations comprising the initial ensemble, parameter values listed in the “parameter data” section of the PEST control file are taken as prior mean parameter values. If the *ies\_enforce\_bounds()* control variable is set to *true*, then parameter realizations are truncated at their bounds; nor are these bounds transgressed during subsequent parameter adjustment. -Unless PESTPP-IES is advised to the contrary (through the *ies_no_noise* option), realizations of measurement noise that are added to the values of observations provided in the “observation data” section of a PEST control file are generated in the manner already stated. That is, the noise associated with any observation is assumed to be statistically independent of the noise associated with all other observations. The standard deviation of noise associated with each observation is assumed to be the inverse of the weight associated with that observation (unless the optional “standard_deviation” column is found in the version 2 control file external observation data files). However, if an observation is given a weight of zero, it plays no part in the history-matching process; nevertheless, these observations are “carried” through the PESTPP-IES reporting process. +Unless PESTPP-IES is advised to the contrary (through the *ies\_no\_noise* option), realizations of measurement noise that are added to the values of observations provided in the “observation data” section of a PEST control file are generated in the manner already stated. That is, the noise associated with any observation is assumed to be statistically independent of the noise associated with all other observations. The standard deviation of noise associated with each observation is assumed to be the inverse of the weight associated with that observation (unless the optional “standard\_deviation” column is found in the version 2 control file external observation data files). However, if an observation is given a weight of zero, it plays no part in the history-matching process; nevertheless, these observations are “carried” through the PESTPP-IES reporting process. “Greater than” and “less than” observations are treated differently from ordinary observations. PESTPP-IES adds no noise to values for these observations provided in the “observation data” section of the PEST control file when formulating observation realizations. This reflects the fact that, on most occasions of their use, observations of this type reflect something that is known about a system rather than something that is measured. At the same time, the penalty for transgressing an observation of this type that is encapsulated in its associated weight should reflect a modeller’s capacity to tolerate this transgression in pursuit of fitting other data. If you are unhappy with this approach, you are free to generate your own observation realizations in which noise is added to these one-way observations; see below. -If the ­*ies_add_base()* control variable is suppled as *true* (which is the default), then PESTPP-IES adds an extra realization to each of the parameter and observation ensembles. For parameters, this “realization” is comprised of parameter values listed in the “parameter data” section of the PEST control file. For observations, this “realization” is comprised of measurements read from the “observation data” section of the PEST control file. Given the importance of the “base” realization, users can also give this realization added importance during parameter upgrade calculations through the use of the *ies_center_on()* option. This option instructs PESTPP-IES to use a specific realization as the “center” of the ensemble. In the standard form of the upgrade equations, the mean vector is used as the “center” of the ensemble. By setting *ies_center_on(base)*, PESTPP-IES will treat the base realization (comprised on control file parameter values) as the center of the ensemble. Limited testing has shown this can improve the resulting phi associated with the base realization. +If the ­*ies\_add\_base()* control variable is suppled as *true* (which is the default), then PESTPP-IES adds an extra realization to each of the parameter and observation ensembles. For parameters, this “realization” is comprised of parameter values listed in the “parameter data” section of the PEST control file. For observations, this “realization” is comprised of measurements read from the “observation data” section of the PEST control file. Given the importance of the “base” realization, users can also give this realization added importance during parameter upgrade calculations through the use of the *ies\_center\_on()* option. This option instructs PESTPP-IES to use a specific realization as the “center” of the ensemble. In the standard form of the upgrade equations, the mean vector is used as the “center” of the ensemble. By setting *ies\_center\_on(base)*, PESTPP-IES will treat the base realization (comprised on control file parameter values) as the center of the ensemble. Limited testing has shown this can improve the resulting phi associated with the base realization. **User-Supplied** Realizations of parameters and/or observations can be prepared by the user and provided to PESTPP-IES in comma-delimited files (i.e., CSV files), and in binary files which adopt the PEST Jacobian matrix file protocol or the PEST++ enhanced Jacobian matrix file protocol. PESTPP-IES recognizes the former through an extension of *.csv* and the latter through extensions of *.jco* and *.jcb*. Actually, as CSV and JCB files containing parameter and observation ensembles comprise part of PESTPP-IES’s output dataset, initial realizations can also be prepared by PESTPP-IES itself. -The value of the optional *ies_parameter_ensemble()* control variable is the name of a file containing a suite of parameter realizations which collectively comprise an ensemble. If this is a CSV file, it should contain *m*+1 columns, where *m* is the number of parameters featured in the “parameter data” section of the PEST control file. The first column must be comprised of text strings which endow each realization of the ensemble with a unique identifier. Each of the subsequent *m* columns must pertain to a single parameter; parameter names must occupy the first row of the CSV file as column headers. It is not necessary that these names be supplied in the same order as in the “parameter data” section of the PEST control file. +The value of the optional *ies\_parameter\_ensemble()* control variable is the name of a file containing a suite of parameter realizations which collectively comprise an ensemble. If this is a CSV file, it should contain *m*+1 columns, where *m* is the number of parameters featured in the “parameter data” section of the PEST control file. The first column must be comprised of text strings which endow each realization of the ensemble with a unique identifier. Each of the subsequent *m* columns must pertain to a single parameter; parameter names must occupy the first row of the CSV file as column headers. It is not necessary that these names be supplied in the same order as in the “parameter data” section of the PEST control file. -PESTPP-IES reads as many rows of the CSV as this file contains, unless a value has been provided for the *ies_num_reals()* control variable in the PEST control file. If this is the case, PESTPP-IES reads no more than *ies_num_reals()* rows (after the header row) of the CSV file. If *ies_num_reals()* exceeds the number of data rows that are present in the CSV file, then the number of realizations comprising the PESTPP-IES parameter ensemble is reduced in accordance with the contents of this file. +PESTPP-IES reads as many rows of the CSV as this file contains, unless a value has been provided for the *ies\_num\_reals()* control variable in the PEST control file. If this is the case, PESTPP-IES reads no more than *ies\_num\_reals()* rows (after the header row) of the CSV file. If *ies\_num\_reals()* exceeds the number of data rows that are present in the CSV file, then the number of realizations comprising the PESTPP-IES parameter ensemble is reduced in accordance with the contents of this file. -Through the *ies_csv_by_reals()* control variable a user can transpose rows and columns in CSV files that PESTPP-IES reads and writes. If *ies_csv_by_reals()* is set to *false*, then in all CSV files provided to PESTPP-IES or written by PESTPP-IES, parameters/observations are assigned to rows while realizations are assigned to columns. This option can be useful where CSV files are pre/postprocessed in Microsoft EXCEL and parameter/observation numbers are large. +Through the *ies\_csv\_by\_reals()* control variable a user can transpose rows and columns in CSV files that PESTPP-IES reads and writes. If *ies\_csv\_by\_reals()* is set to *false*, then in all CSV files provided to PESTPP-IES or written by PESTPP-IES, parameters/observations are assigned to rows while realizations are assigned to columns. This option can be useful where CSV files are pre/postprocessed in Microsoft EXCEL and parameter/observation numbers are large. Parameters that are declared as tied and fixed in a PEST control file can be included in a user-prepared parameter ensemble CSV of JCO/JCB file. As has already been discussed, a user can decide for him/herself whether the values of fixed parameters are invariant from ensemble to ensemble, or whether they are the same as the values provided for these parameters in the PEST control file. However, it is the user’s responsibility to ensure that tied parameters maintain the correct ratios to their parent parameters in all realizations. If the user fails in his/her responsibility, PESTPP-IES will detect this; values attributed to tied parameters will then be altered so that correct tied-to-parent parameter ratios are maintained. Furthermore, if omitted from user-supplied parameter realizations, values for fixed and tied parameters will be provided by PESTPP-IES in accordance with ratios supplied in the “parameter data” section of the PEST control file. -Optionally, one of the realizations provided in a user-supplied parameter ensemble can be named “base”. The significance of the base realization has already been explained. If a realization named “base” is provided in a CSV file, and if the *ies_add_base()* control variable has been set to *true* (which is the default) then PESTPP-IES assumes that the base realization has been provided by the user; it will not create one itself. However, if a base realization is not provided in a CSV file, and *ies_add_base()* has been set to *true*, then PESTPP-IES creates the base realization. As has already been discussed, parameter values comprising a PESTPP-IES-generated base realization are read from the “parameter data” section of the PEST control file. +Optionally, one of the realizations provided in a user-supplied parameter ensemble can be named “base”. The significance of the base realization has already been explained. If a realization named “base” is provided in a CSV file, and if the *ies\_add\_base()* control variable has been set to *true* (which is the default) then PESTPP-IES assumes that the base realization has been provided by the user; it will not create one itself. However, if a base realization is not provided in a CSV file, and *ies\_add\_base()* has been set to *true*, then PESTPP-IES creates the base realization. As has already been discussed, parameter values comprising a PESTPP-IES-generated base realization are read from the “parameter data” section of the PEST control file. -If the *ies_enforce_bounds()* control variable has been set to *true*, and if the values of any parameters supplied in a CSV file violate bounds provided in a PEST control file, PESTPP-IES alters parameter values to respect them. Note this form of parameter bound enforcement does not scale the entire realization to bring it within bounds, instead, individually bound-offending parameters values are simply changed to bring them to their respective bound values. In high dimensions with stochastic realizations, this “shrinking” type of enforcement almost guarantees that all realizations will be shrunk by varying degrees. Therefore, this type bound enforcement is not used in PESTPP-IES. However, PESTPP-IES does implement parameter change limit enforcement (in the same style as PEST and PESTPP-GLM with the variables FACPARMAX and RELPARMAX) via the *ies_enforce_chglim* option. Also note that if a restart observation ensemble is passed to PESTPP-IES, bounds enforcement on the initial parameter ensemble is foregone to avoid corrupting the first iteration upgrade calculation process. +If the *ies\_enforce\_bounds()* control variable has been set to *true*, and if the values of any parameters supplied in a CSV file violate bounds provided in a PEST control file, PESTPP-IES alters parameter values to respect them. Note this form of parameter bound enforcement does not scale the entire realization to bring it within bounds, instead, individually bound-offending parameters values are simply changed to bring them to their respective bound values. In high dimensions with stochastic realizations, this “shrinking” type of enforcement almost guarantees that all realizations will be shrunk by varying degrees. Therefore, this type bound enforcement is not used in PESTPP-IES. However, PESTPP-IES does implement parameter change limit enforcement (in the same style as PEST and PESTPP-GLM with the variables FACPARMAX and RELPARMAX) via the *ies\_enforce\_chglim* option. Also note that if a restart observation ensemble is passed to PESTPP-IES, bounds enforcement on the initial parameter ensemble is foregone to avoid corrupting the first iteration upgrade calculation process. -The optional *ies_observation_ensemble()* keyword provides the name of a CSV or JCO/JCB file containing realizations which comprise an observation plus noise ensemble. Similar protocols apply to this file as those that apply to user-supplied parameter ensemble files. If a user-provides both parameter and observation ensemble input files, then PESTPP-IES links realizations in these files according to the order in which they are supplied, regardless of realization names. If a base realization is supplied in each of these files, it is the user’s responsibility to ensure that these occupy the same row of their respective files. +The optional *ies\_observation\_ensemble()* keyword provides the name of a CSV or JCO/JCB file containing realizations which comprise an observation plus noise ensemble. Similar protocols apply to this file as those that apply to user-supplied parameter ensemble files. If a user-provides both parameter and observation ensemble input files, then PESTPP-IES links realizations in these files according to the order in which they are supplied, regardless of realization names. If a base realization is supplied in each of these files, it is the user’s responsibility to ensure that these occupy the same row of their respective files. That is worth saying again: it is important to note that PESTPP-IES does not require that user-supplied parameter and observation ensembles share realization names. If a user supplies either a parameter or observation ensemble, PESTPP-IES will check for realization name commonality between the initial parameter and observation ensemble, and if they share all realization names but are not aligned, PESTPP-IES will reorder the observation ensemble. If the realization names are not common between these two ensembles, but there are some shared names, PESTPP-IES will warn the user and continue. -### 9.2.3 “Regularization” +### 9.2.3 “Regularization” -Chen and Oliver (2013) provide two equations through which parameters comprising a particular realization within an ensemble are adjusted to provide a better fit with the calibration dataset. These are equations 18 and 19 in their paper. Equation 18 is expensive to compute. It incorporates a term that penalizes adjusted parameter fields whose statistical properties depart too far from those which characterize the prior parameter distribution. Equation 19 is simpler, and less expensive to compute. It adjusts parameters on the basis of current model-to-measurement misfit only. A user can inform PESTPP-IES to use the simpler and less expensive equation by providing the value *true* to the *ies_use_approx()* control variable. Despite the fact that the default value for this variable is *false*, *true* works well on most occasions. When *ies_use_approx* is set to *false*, a penalty for changing parameter values is enforced within the upgrade calculation process. +Chen and Oliver (2013) provide two equations through which parameters comprising a particular realization within an ensemble are adjusted to provide a better fit with the calibration dataset. These are equations 18 and 19 in their paper. Equation 18 is expensive to compute. It incorporates a term that penalizes adjusted parameter fields whose statistical properties depart too far from those which characterize the prior parameter distribution. Equation 19 is simpler, and less expensive to compute. It adjusts parameters on the basis of current model-to-measurement misfit only. A user can inform PESTPP-IES to use the simpler and less expensive equation by providing the value *true* to the *ies\_use\_approx()* control variable. Despite the fact that the default value for this variable is *false*, *true* works well on most occasions. When *ies\_use\_approx* is set to *false*, a penalty for changing parameter values is enforced within the upgrade calculation process. A phenomenon that is sometimes encountered in using an ensemble smoother is a collapse in diversity of parameter realizations as the iterative adjustment process progresses. Sometimes this collapse can invalidate the integrity of posterior parameter and predictive probability distributions that the ensemble attempts to characterize. @@ -2825,107 +2899,108 @@ PESTPP-IES also supports preferred-value Tikhonov regularization in which it is PEST and PESTPP-GLM calculate regularization weights using a numerical procedure that depends on the value of a user-supplied “measurement objective function”. This defines the level of model-to-measurement fit that a modeller is not prepared to exceed. Unfortunately, numerical calculation of a regularization weight factor through this means is an expensive undertaking. PESTPP-IES uses a simpler option, but with less intuitive appeal. -Optionally, a user can supply a value for the *ies_reg_factor()* control variable. If this variable is given a value greater than zero, then preferred value regularization is implemented. That is, an objective function penalty is incurred to the extent that the values of individual parameters that comprise a particular parameter realization depart from their initial values that were generated on the basis of the prior parameter probability distribution. A “regularization” objective function is computed which quantifies this departure. The weight assigned to each individual parameter departure is proportional to the inverse of the prior standard deviation of the respective parameter. (At the time of writing, a covariance matrix cannot be employed in formulation of the regularization objective function because of the computational requirements to invert a covariance matrix of greater than 30,000 entries per dimension, so that each parameter departure is assumed to be statistically independent of every other parameter departure). During every iteration of the parameter adjustment process, PESTPP-IES calculates a penalty function for each realization that is a function of the deviations from the initial value for each realization and the variance of each parameter. The value assigned to the *ies_reg_factor()* control variable is then a scaling factor to increase or decrease the penalty functions presence in the composite objective function . +Optionally, a user can supply a value for the *ies\_reg\_factor()* control variable. If this variable is given a value greater than zero, then preferred value regularization is implemented. That is, an objective function penalty is incurred to the extent that the values of individual parameters that comprise a particular parameter realization depart from their initial values that were generated on the basis of the prior parameter probability distribution. A “regularization” objective function is computed which quantifies this departure. The weight assigned to each individual parameter departure is proportional to the inverse of the prior standard deviation of the respective parameter. (At the time of writing, a covariance matrix cannot be employed in formulation of the regularization objective function because of the computational requirements to invert a covariance matrix of greater than 30,000 entries per dimension, so that each parameter departure is assumed to be statistically independent of every other parameter departure). During every iteration of the parameter adjustment process, PESTPP-IES calculates a penalty function for each realization that is a function of the deviations from the initial value for each realization and the variance of each parameter. The value assigned to the *ies\_reg\_factor()* control variable is then a scaling factor to increase or decrease the penalty functions presence in the composite objective function . -The PESTPP-IES default value for *ies_reg_factor()* is zero; that is, no “after the fact regularization” is applied (if *ies_use_approx* is false, then regularization penalties are baked into the upgrade calculation process in Bayesian proportions). Under these circumstances, PESTPP-IES seeks to minimize model-to-measurement misfit for all parameter realizations. Where regularization is applied, determination of a suitable value for *ies_reg_factor()* can be difficult. Its value depends on the nature of the inverse problem, on the number of parameters comprising a realization, and on the desired level of model-to-measurement misfit. Despite its dependence on all of these variables, a value in the vicinity of 0.25 often works well. Just as for specification of the PHIMILIM control variable in PEST and PESTPP-GLM, a little trial and error may be warranted in choosing a suitable value for *ies_reg_factor()*. Assignment of too high a value slows the progress of PESTPP-IES in reducing the objective function. Assignment of too low a value may provide insufficient insurance against parameter field collapse. +The PESTPP-IES default value for *ies\_reg\_factor()* is zero; that is, no “after the fact regularization” is applied (if *ies\_use\_approx* is false, then regularization penalties are baked into the upgrade calculation process in Bayesian proportions). Under these circumstances, PESTPP-IES seeks to minimize model-to-measurement misfit for all parameter realizations. Where regularization is applied, determination of a suitable value for *ies\_reg\_factor()* can be difficult. Its value depends on the nature of the inverse problem, on the number of parameters comprising a realization, and on the desired level of model-to-measurement misfit. Despite its dependence on all of these variables, a value in the vicinity of 0.25 often works well. Just as for specification of the PHIMILIM control variable in PEST and PESTPP-GLM, a little trial and error may be warranted in choosing a suitable value for *ies\_reg\_factor()*. Assignment of too high a value slows the progress of PESTPP-IES in reducing the objective function. Assignment of too low a value may provide insufficient insurance against parameter field collapse. -In some contexts, a more heuristic approach may be taken to balancing goodness of fit against ensemble diversity. A user may ascribe a low value to *ies_reg_factor()* so that, after a number of iterations have elapsed, a very good fit is attained between model outputs and the calibration dataset. He/she can then select an ensemble from a previous iteration that, in his/her opinion, best balances fit with the calibration dataset against parameter field diversity. +In some contexts, a more heuristic approach may be taken to balancing goodness of fit against ensemble diversity. A user may ascribe a low value to *ies\_reg\_factor()* so that, after a number of iterations have elapsed, a very good fit is attained between model outputs and the calibration dataset. He/she can then select an ensemble from a previous iteration that, in his/her opinion, best balances fit with the calibration dataset against parameter field diversity. In addition to its role in generating initial parameter realizations, the prior parameter probability distribution supports the following aspects of PESTPP-IES calculations. -1. It features in equation 18 of Chen and Oliver (2013). This term of the equation is omitted if equation 19 is used for parameter field adjustment in its stead; as stated above, this occurs if *ies_use_approx()* is set to *true*. +1. It features in equation 18 of Chen and Oliver (2013). This term of the equation is omitted if equation 19 is used for parameter field adjustment in its stead; as stated above, this occurs if *ies\_use\_approx()* is set to *true*. -2. It provides weights to individual “regularization observations” that measure departures of adjusted parameter fields from initial parameter fields. This occurs if *ies_reg_factor()* is set to a value greater than zero. +2. It provides weights to individual “regularization observations” that measure departures of adjusted parameter fields from initial parameter fields. This occurs if *ies\_reg\_factor()* is set to a value greater than zero. 3. It is used in prior parameter scaling; see below. -Where a user-supplied CSV or JCO/JCB file provides initial parameter realizations, the prior covariance matrix that is used for the above purposes can be calculated empirically from these realizations. Alternatively, a prior parameter covariance matrix can be read from a file whose name is supplied with the *parcov()* control variable. Regardless of whether the *parcov()* control variable has been supplied, PESTPP-IES calculates prior parameter variances empirically for the second and third of the above tasks from user-supplied initial parameter realizations (if the latter are available) if a value of *true* is supplied for the *ies_use_empirical_prior()* control variable. If this is done, off-diagonal elements of the prior parameter covariance matrix are assigned values of zero for use in the above calculations, this expediting their implementation. +Where a user-supplied CSV or JCO/JCB file provides initial parameter realizations, the prior covariance matrix that is used for the above purposes can be calculated empirically from these realizations. Alternatively, a prior parameter covariance matrix can be read from a file whose name is supplied with the *parcov()* control variable. Regardless of whether the *parcov()* control variable has been supplied, PESTPP-IES calculates prior parameter variances empirically for the second and third of the above tasks from user-supplied initial parameter realizations (if the latter are available) if a value of *true* is supplied for the *ies\_use\_empirical\_prior()* control variable. If this is done, off-diagonal elements of the prior parameter covariance matrix are assigned values of zero for use in the above calculations, this expediting their implementation. -Note that the default value for *ies_use_empirical_prior()* is *false*. Hence PESTPP-IES employs a user-supplied covariance matrix for the above roles if one is available. Note also that, even if *ies_use_empirical_prior()* is set to *true*, a user-supplied covariance matrix is always used in the first of the above roles if it can be accessed through a *parcov()* file. If this file is not available so that an empirical covariance matrix is used for this task, off-diagonal elements of this matrix are not automatically set to zero. +Note that the default value for *ies\_use\_empirical\_prior()* is *false*. Hence PESTPP-IES employs a user-supplied covariance matrix for the above roles if one is available. Note also that, even if *ies\_use\_empirical\_prior()* is set to *true*, a user-supplied covariance matrix is always used in the first of the above roles if it can be accessed through a *parcov()* file. If this file is not available so that an empirical covariance matrix is used for this task, off-diagonal elements of this matrix are not automatically set to zero. -### 9.2.4 Prior Parameter Scaling +### 9.2.4 Prior Parameter Scaling Like PESTPP-GLM and PEST, PESTPP-IES uses a Jacobian matrix as a basis for parameter adjustment. The mathematics of parameter adjustment which all of these programs implement is very similar, differing only in some of the details of how to handle problem ill-posedness. The major difference between PESTPP-IES on the one hand and PESTPP/PEST on the other hand is in how the Jacobian matrix is calculated. PESTPP-IES does not use finite parameter differences. Instead, it runs the model using the suite of random parameter fields that comprise an ensemble. It then inspects model outputs that correspond to members of the calibration dataset and calculates cross-covariances between these and individual parameters. From these covariances, with some matrix manipulation, it calculates an approximation to the Jacobian matrix. Where the number of realizations that comprise an ensemble is less than the number of adjustable parameters featured in the PEST control file, this Jacobian matrix is column-rank-deficient. Nevertheless, provided its rank is higher than the dimensionality of the calibration solution space, it can support attainment of parameter values which provide a good fit between model outputs and members of the calibration dataset. -In calculating model-output-to-parameter cross-covariances, certain numerical advantages can be gained if differences between individual parameter realizations and the mean parameter field are scaled. This is done by dividing the difference between each parameter and its mean by the prior standard deviation of that parameter. PESTPP-IES performs this scaling if the *ies_use_prior_scaling()* control variable is set to *true*. Experience has demonstrated that prior scaling can be beneficial for problems that involve a very high number of parameters (over three hundred thousand), but that it is not so effective for problems that involve fewer parameters. The default value for *ies_use_prior_scaling()* is *false*. +In calculating model-output-to-parameter cross-covariances, certain numerical advantages can be gained if differences between individual parameter realizations and the mean parameter field are scaled. This is done by dividing the difference between each parameter and its mean by the prior standard deviation of that parameter. PESTPP-IES performs this scaling if the *ies\_use\_prior\_scaling()* control variable is set to *true*. Experience has demonstrated that prior scaling can be beneficial for problems that involve a very high number of parameters (over three hundred thousand), but that it is not so effective for problems that involve fewer parameters. The default value for *ies\_use\_prior\_scaling()* is *false*. -### 9.2.5 The Marquardt Lambda +### 9.2.5 The Marquardt Lambda The Marquardt lambda plays a pivotal role in gradient based inversion. Use of a high lambda value in early iterations of an inversion process, and a low lambda value later in that process, can have a large impact on the rate at which a good fit with the calibration dataset is attained. However, the best value of the Marquardt lambda to use during any particular iteration must often be determined by trial and error. PESTPP-GLM and PEST test a number of parameter upgrades, calculated using different values of the Marquardt lambda; the cost is one model run for each tested lambda. Optionally, upgrades calculated using fractional lengths along these parameter upgrade vectors can also be tested. The parameter set that leads to the lowest objective function is selected as the upgraded parameter set. Where a single parameter field is being estimated in a standard model calibration process, this procedure constitutes a good investment in model runs. (In parallel computing environments the cost may actually be very small, as computing cores that would otherwise be idle while waiting to fill the next Jacobian matrix are given something to do.) This investment in finding the best parameter upgrade vector provides the best return on the previously much larger investment of filling a Jacobian matrix. -The situation is different, however, when many parameter fields comprising an ensemble are adjusted, such as is done by PESTPP-IES. For an ensemble smoother, the testing of parameter upgrades and the filling of a Jacobian matrix are the same operation. The cost of this operation would become prohibitive if a trial-and-error lambda search procedure accompanied the adjustment of each parameter realization comprising an ensemble. Hence a different strategy must be adopted. This strategy is to undertake lambda testing for only a limited number of parameter fields, and to then use the best lambda that emerges from that testing process when upgrading the rest of them. This limited number of realizations used to evaluate objective function values during upgrade testing is referred to as the “subset”. Options for how to pick the subset are available through the *ies_subset_how()* argument; valid choices for *ies_subset_how()* are “first” (the first *ies_subset_size()* realizations), “last” (the last *ies_subset_size()* realizations), “random” (randomly select *ies_subset_size()* realizations for each iteration), or “phi_based” (select *ies_subset_size()* realizations across the previous composite objective function distribution). Note that if the “base” parameter realization is present, it is always included in the selected subset as the objective function calculated for this realization has special significance. +The situation is different, however, when many parameter fields comprising an ensemble are adjusted, such as is done by PESTPP-IES. For an ensemble smoother, the testing of parameter upgrades and the filling of a Jacobian matrix are the same operation. The cost of this operation would become prohibitive if a trial-and-error lambda search procedure accompanied the adjustment of each parameter realization comprising an ensemble. Hence a different strategy must be adopted. This strategy is to undertake lambda testing for only a limited number of parameter fields, and to then use the best lambda that emerges from that testing process when upgrading the rest of them. This limited number of realizations used to evaluate objective function values during upgrade testing is referred to as the “subset”. Options for how to pick the subset are available through the *ies\_subset\_how()* argument; valid choices for *ies\_subset\_how()* are “first” (the first *ies\_subset\_size()* realizations), “last” (the last *ies\_subset\_size()* realizations), “random” (randomly select *ies\_subset\_size()* realizations for each iteration), or “phi\_based” (select *ies\_subset\_size()* realizations across the previous composite objective function distribution). Note that if the “base” parameter realization is present, it is always included in the selected subset as the objective function calculated for this realization has special significance. -The number of realizations that comprise the ensemble subset used for lambda testing is set by the value of the *ies_subset_size()* control variable. During each iteration of the ensemble smoother process, values of the Marquardt lambda used for testing realization upgrades are determined by applying a set of multipliers to the best lambda found during the previous iteration. These multipliers are provided through the *ies_lambda_mults()* control variable. A comma separated list of multipliers should be supplied by the user as arguments to this keyword; at least one of these multipliers should be less than 1.0 while, or course, one of them should be greater than 1.0. Line search factors (otherwise known as scale factors) that are applied to each of these lambdas can also be supplied. If so, this is done through the *lambda_scale_fac()* control variable, the same variable that is used by PESTPP-GLM. As for *ies_lambda_mults()*, scale factors should be supplied as a comma-separated list of numbers spanning a range from below 1.0 to greater than 1.0. The total number of model runs required to test parameter upgrades during a given iteration is thus *ies_subset_size()* times the number of multipliers supplied with the *ies_lambda_mults()* control variable times the number of factors supplied with the *lambda_scale_fac()* control variable. +The number of realizations that comprise the ensemble subset used for lambda testing is set by the value of the *ies\_subset\_size()* control variable. During each iteration of the ensemble smoother process, values of the Marquardt lambda used for testing realization upgrades are determined by applying a set of multipliers to the best lambda found during the previous iteration. These multipliers are provided through the *ies\_lambda\_mults()* control variable. A comma separated list of multipliers should be supplied by the user as arguments to this keyword; at least one of these multipliers should be less than 1.0 while, or course, one of them should be greater than 1.0. Line search factors (otherwise known as scale factors) that are applied to each of these lambdas can also be supplied. If so, this is done through the *lambda\_scale\_fac()* control variable, the same variable that is used by PESTPP-GLM. As for *ies\_lambda\_mults()*, scale factors should be supplied as a comma-separated list of numbers spanning a range from below 1.0 to greater than 1.0. The total number of model runs required to test parameter upgrades during a given iteration is thus *ies\_subset\_size()* times the number of multipliers supplied with the *ies\_lambda\_mults()* control variable times the number of factors supplied with the *lambda\_scale\_fac()* control variable. -The value of the Marquardt lambda to use during the first iteration of the ensemble smoother process can be supplied through the *ies_initial_lambda()* control variable. Lambda multipliers supplied through *ies_lambda_mults()* are applied to this value during the first iteration of this process. The PESTPP-IES default value for *ies_initial_lambda()* is $10^{\\text{floor}\\left( \\log\_{10}\\frac{\\mu\_{Փ}}{2n} \\right)}$ where *μ*Փ is the mean of objective functions achieved using realizations comprising the initial ensemble, and *n* is the number of non-zero-weighted observations featured in the “observation data” section of the PEST control file. +The value of the Marquardt lambda to use during the first iteration of the ensemble smoother process can be supplied through the *ies\_initial\_lambda()* control variable. Lambda multipliers supplied through *ies\_lambda\_mults()* are applied to this value during the first iteration of this process. The PESTPP-IES default value for *ies\_initial\_lambda()* is $10^{\\text{floor}\\left( \\log\_{10}\\frac{\\mu\_{Փ}}{2n} \\right)}$ where *μ*Փ is the mean of objective functions achieved using realizations comprising the initial ensemble, and *n* is the number of non-zero-weighted observations featured in the “observation data” section of the PEST control file. Suppose for example that the following lines appear in a PESTPP-IES control file. -
++ ies_initial_lambda(100)
++ ies_subset_size(4)
++ ies_lambda_mults(0.1,1.0,10.0)
++ ies_lambda_scale_fac(0.9,1.0,1.1)
+
++ ies_initial_lambda(100)
++ ies_subset_size(4)
++ ies_lambda_mults(0.1,1.0,10.0)
++ ies_lambda_scale_fac(0.9,1.0,1.1)
Figure 9.2 Part of a PESTPP-IES control file. From figure 9.1, the initial value of the Marquardt lambda is 100.0. During each iteration of the ensemble smoother process, PESTPP-IES employs three values of the Marquardt lambda, these being equal to 0.1, 1.0 and 10 times the value of the best Marquardt lambda from the previous iteration (or the initial Marquardt lambda in the first iteration). PESTPP-IES selects the first 4 realizations from the parameter ensemble and calculates updated parameter fields using these 3 Marquardt lambdas. It also calculates parameter upgrades corresponding to lengths along these lambda upgrade directions of 0.9 and 1.1 times that which is calculated using the Marquardt lambda alone (this corresponding to a line search factor of 1.0). Hence PESTPP‑IES commits a total 36 model runs to establishing the best value of lambda and the best line search factor. -After carrying out these model runs, PESTPP-IES chooses the lambda and scale factor combination that yields the lowest mean objective function for the subset of the ensemble that is dedicated to testing these variables. It then compares these with the prevailing mean objective function calculated for the same subset. If the ratio between the new and prevailing mean is greater than the value of the *ies_accept_phi_fac()* control variable (whose default value is 1.05), PESTPP-IES does not accept the lambda and scale factor that it has just determined. Instead, it multiplies lambda by a factor that is equal to the *lambda_inc_fac()* control variable (the default value for which is 10.0), and repeats the lambda and scale factor testing procedure described above on the same ensemble subset. Note however, that PESTPP-IES will update any individual realization in the subset that has a new phi that is less than the previous phi times the value of the *ies_accept_phi_fac()* control variable. Hence the mean phi value for the entire ensemble may still improve. +After carrying out these model runs, PESTPP-IES chooses the lambda and scale factor combination that yields the lowest mean objective function for the subset of the ensemble that is dedicated to testing these variables. It then compares these with the prevailing mean objective function calculated for the same subset. If the ratio between the new and prevailing mean is greater than the value of the *ies\_accept\_phi\_fac()* control variable (whose default value is 1.05), PESTPP-IES does not accept the lambda and scale factor that it has just determined. Instead, it multiplies lambda by a factor that is equal to the *lambda\_inc\_fac()* control variable (the default value for which is 10.0), and repeats the lambda and scale factor testing procedure described above on the same ensemble subset. Note however, that PESTPP-IES will update any individual realization in the subset that has a new phi that is less than the previous phi times the value of the *ies\_accept\_phi\_fac()* control variable. Hence the mean phi value for the entire ensemble may still improve. -If, after NPHINORED attempts, PESTPP-IES is not able to find a lambda and line search factor for which the objective function is less than *ies_accept_phi()* times the prevailing objective function for the ensemble subset, it declares the iterative smoothing process to be over and ceases execution. NPHINORED is a termination criterion supplied on the eighth line of the “control data” section of the PEST control file. +If, after NPHINORED attempts, PESTPP-IES is not able to find a lambda and line search factor for which the objective function is less than *ies\_accept\_phi()* times the prevailing objective function for the ensemble subset, it declares the iterative smoothing process to be over and ceases execution. NPHINORED is a termination criterion supplied on the eighth line of the “control data” section of the PEST control file. -Alternatively, if the mean objective function attained through the lambda and line search factor process described above is less than *ies_accept_phi_fac()* times the prevailing mean objective function for the ensemble subset, PESTPP-IES applies the best lambda and line search factor to the remainder of the ensemble. If the mean objective function for the entire ensemble was reduced from its prevailing mean then, on the next iteration of the smoother process, PESTPP-IES lowers the Marquardt lambda by applying a factor of *lambda_dec_fac()* to its current value. The default value of *lambda_dec_fac()* is 0.75. +Alternatively, if the mean objective function attained through the lambda and line search factor process described above is less than *ies\_accept\_phi\_fac()* times the prevailing mean objective function for the ensemble subset, PESTPP-IES applies the best lambda and line search factor to the remainder of the ensemble. If the mean objective function for the entire ensemble was reduced from its prevailing mean then, on the next iteration of the smoother process, PESTPP-IES lowers the Marquardt lambda by applying a factor of *lambda\_dec\_fac()* to its current value. The default value of *lambda\_dec\_fac()* is 0.75. -###
9.2.6 Restarting +### 9.2.6 Restarting -As has already been discussed, if either you or PESTPP-IES have generated a set of random parameter fields (or PESTPP-IES has improved them from a set of previous parameter fields), these can be provided to a newly-restarted PESTPP-IES parameter adjustment process using the *ies_parameter_ensembles()* control variable. If a model run has been undertaken for each of these fields (either by you or by PESTPP-IES), then the iterative ensemble smoother can be initiated from these parameter fields in conjunction with the model outputs which correspond to them. PESTPP-IES is instructed to do this using the *ies_restart_obs_ensemble()* control variable. The value for this variable is the name of a CSV or JCO/JCB file containing model outputs corresponding to the set of parameter fields which PESTPP-IES already has in its possession. +As has already been discussed, if either you or PESTPP-IES have generated a set of random parameter fields (or PESTPP-IES has improved them from a set of previous parameter fields), these can be provided to a newly-restarted PESTPP-IES parameter adjustment process using the *ies\_parameter\_ensembles()* control variable. If a model run has been undertaken for each of these fields (either by you or by PESTPP-IES), then the iterative ensemble smoother can be initiated from these parameter fields in conjunction with the model outputs which correspond to them. PESTPP-IES is instructed to do this using the *ies\_restart\_obs\_ensemble()* control variable. The value for this variable is the name of a CSV or JCO/JCB file containing model outputs corresponding to the set of parameter fields which PESTPP-IES already has in its possession. -If a user wishes to track (and optionally enforce) parameter regularization against the initial parameter ensemble, the *ies_restart_parameter_ensemble()* variable should be used in conjunction with the *ies_parameter_ensemble()* and *ies_restart_observation_ensemble()* options if restarting from an iteration other than the first. In this case, the *ies_restart_parameter_ensemble()* should be the ensemble of parameter values that corresponds with the *ies_restart_observation_ensemble()* ensemble of observation values, while *ies_parameter_ensemble()* should denote the initial parameter ensemble. Note that use of the *ies_restart_parameter_ensemble()* control variable requires use of the *ies_restart_observation_ensemble()* variable. Note also that the two ensembles cited by these variables must have identical numbers of realizations. +If a user wishes to track (and optionally enforce) parameter regularization against the initial parameter ensemble, the *ies\_restart\_parameter\_ensemble()* variable should be used in conjunction with the *ies\_parameter\_ensemble()* and *ies\_restart\_observation\_ensemble()* options if restarting from an iteration other than the first. In this case, the *ies\_restart\_parameter\_ensemble()* should be the ensemble of parameter values that corresponds with the *ies\_restart\_observation\_ensemble()* ensemble of observation values, while *ies\_parameter\_ensemble()* should denote the initial parameter ensemble. Note that use of the *ies\_restart\_parameter\_ensemble()* control variable requires use of the *ies\_restart\_observation\_ensemble()* variable. Note also that the two ensembles cited by these variables must have identical numbers of realizations. -As is the PESTPP-IES convention, the number of columns in the user-supplied, run-results CSV file must be equal to the number of non-zero weighted observations featured in the PEST control file plus one. Alternatively, results of a previous PESTPP-IES run can be supplied as a matrix in a JCO/JCB file; this matrix must possess as many columns as there are non-zero-weighted observations. (Actually PESTPP-IES will forgive you if model outputs corresponding to zero-weighted observations are also provided in these files.) If a CSV file is supplied, then its first row must name these model outputs; names must be in accordance with those listed in the “observation data” section of the PEST control file. The first column must contain unique observation set (i.e., realization) identifiers. There must be as many of these as there are members of the model output ensemble. Members of the model output ensemble are linked to members of the parameter ensemble by row number. However, if an ensemble of measured values is provided through the *ies_observation_ensemble()* keyword, the names of observation ensemble members in the filename supplied through this keyword must agree with those provided in the model output ensemble filename supplied with the *ies_restart_obs_ensemble()* keyword. The same protocol applies for an ensemble supplied in a JCO/JCB file. +As is the PESTPP-IES convention, the number of columns in the user-supplied, run-results CSV file must be equal to the number of non-zero weighted observations featured in the PEST control file plus one. Alternatively, results of a previous PESTPP-IES run can be supplied as a matrix in a JCO/JCB file; this matrix must possess as many columns as there are non-zero-weighted observations. (Actually PESTPP-IES will forgive you if model outputs corresponding to zero-weighted observations are also provided in these files.) If a CSV file is supplied, then its first row must name these model outputs; names must be in accordance with those listed in the “observation data” section of the PEST control file. The first column must contain unique observation set (i.e., realization) identifiers. There must be as many of these as there are members of the model output ensemble. Members of the model output ensemble are linked to members of the parameter ensemble by row number. However, if an ensemble of measured values is provided through the *ies\_observation\_ensemble()* keyword, the names of observation ensemble members in the filename supplied through this keyword must agree with those provided in the model output ensemble filename supplied with the *ies\_restart\_obs\_ensemble()* keyword. The same protocol applies for an ensemble supplied in a JCO/JCB file. -In the event of model run failure for certain realizations, the parameter and simulated observation ensemble CSV or JCB files produced by PESTPP-IES will have fewer rows than when the PESTPP-IES process commenced (or fewer columns if the *ies_csv_by_reals()* option is supplied as *false*). This is because they now possess fewer realizations than the “measured observation” ensemble, this being comprised of measurements plus realizations of measurement noise. Lack of coherence between these ensembles can make a PESTPP-IES restart difficult. On restart, PESTPP-IES does not require that the names of user-supplied parameter and observation ensembles provided in files whose names are cited with the *ies_parameter_ensemble()* and *ies_restart_obs_ensemble()* keywords be the same. Nor do these names need to be the same as realization names associated with the measurement observation ensemble supplied with the *ies_observation_ensemble()* keyword. Ensembles are simply read sequentially and matched to each other according to their order of appearance in respective files. +In the event of model run failure for certain realizations, the parameter and simulated observation ensemble CSV or JCB files produced by PESTPP-IES will have fewer rows than when the PESTPP-IES process commenced (or fewer columns if the *ies\_csv\_by\_reals()* option is supplied as *false*). This is because they now possess fewer realizations than the “measured observation” ensemble, this being comprised of measurements plus realizations of measurement noise. Lack of coherence between these ensembles can make a PESTPP-IES restart difficult. On restart, PESTPP-IES does not require that the names of user-supplied parameter and observation ensembles provided in files whose names are cited with the *ies\_parameter\_ensemble()* and *ies\_restart\_obs\_ensemble()* keywords be the same. Nor do these names need to be the same as realization names associated with the measurement observation ensemble supplied with the *ies\_observation\_ensemble()* keyword. Ensembles are simply read sequentially and matched to each other according to their order of appearance in respective files. -PESTPP-IES makes an exception to this protocol, however, if realization names are the same in filenames supplied with the *ies_parameter_ensemble()* and *ies_restart_obs_ensemble()* keywords. (This happens automatically if these files were written by PESTPP_IES.) In this case PESTPP-IES links these names to realization names appearing in the *ies_observation_ensemble()* CSV file, ignoring “lost” realizations from this file in the process. Alternatively, a user can remove lost realizations from the *ies_observation_ensemble()* file him/herself. This is easily accomplished using the Python Pandas library. The easiest and safest way to restart PESTPP-IES is by supplying *ies_observation_ensemble()* and *ies_parameter_ensemble()* as the “base” observation ensemble (that is observation values plus noise realizations) and the initial parameter ensemble, respectively. Meanwhile *ies_restart_observation_ensemble()* and *ies_restart_parameter_ensemble()* should cite ensembles produced from the same iteration of a previous PESTPP-IES run. This ensures that failed runs are handled correctly and that any regularization enforcement is with respect to the initial (prior) parameter ensemble. +PESTPP-IES makes an exception to this protocol, however, if realization names are the same in filenames supplied with the *ies\_parameter\_ensemble()* and *ies\_restart\_obs\_ensemble()* keywords. (This happens automatically if these files were written by PESTPP\_IES.) In this case PESTPP-IES links these names to realization names appearing in the *ies\_observation\_ensemble()* CSV file, ignoring “lost” realizations from this file in the process. Alternatively, a user can remove lost realizations from the *ies\_observation\_ensemble()* file him/herself. This is easily accomplished using the Python Pandas library. The easiest and safest way to restart PESTPP-IES is by supplying *ies\_observation\_ensemble()* and *ies\_parameter\_ensemble()* as the “base” observation ensemble (that is observation values plus noise realizations) and the initial parameter ensemble, respectively. Meanwhile *ies\_restart\_observation\_ensemble()* and *ies\_restart\_parameter\_ensemble()* should cite ensembles produced from the same iteration of a previous PESTPP-IES run. This ensures that failed runs are handled correctly and that any regularization enforcement is with respect to the initial (prior) parameter ensemble. -### 9.2.7 Failed Model Runs +### 9.2.7 Failed Model Runs Where model runs are based on random parameter realizations, the risk of occasional model run failure is high for some models. The parallel run manager used by programs of the PEST++ suite is able to accommodate model run failure in ways described in section 5.3 of this manual. When model run failure is encountered, PESTPP-IES drops the parameter set that precipitated this failure from the ensemble. The ensemble thus loses a member. -PESTPP-IES provides a mechanism for detection of model run failure that extends those provided by its run manager. If the objective function associated with a particular model run is calculated to be greater than a certain threshold, PESTPP-IES deems the model run to have failed. This threshold is supplied as the value of *ies_bad_phi()* control variable. +PESTPP-IES provides a mechanism for detection of model run failure that extends those provided by its run manager. If the objective function associated with a particular model run is calculated to be greater than a certain threshold, PESTPP-IES deems the model run to have failed. This threshold is supplied as the value of *ies\_bad\_phi()* control variable. -To forestall excessive PESTPP-IES run times incurred by occasional model failure, it is a good idea to set the *max_run_fail()* model run control variable to 1 (the default value for PESTPP‑IES), and to choose values for the *overdue_giveup_fac()* and/or *overdue_giveup_minutes()* control variables judiciously; see section 5.3. Note also that model run failure does not hurt PESTPP-IES as much as it hurts PESTPP-GLM or PEST. This is because the value of any model run undertaken by PESTPP-IES is lower than that undertaken by PEST or PESTPP-GLM. For the latter programs a failed model run during finite-difference derivatives calculation may lead to an empty column of the Jacobian matrix. In contrast, because PESTPP-IES uses an entire ensemble to fill a Jacobian matrix, a single failed model run does not result in an empty Jacobian matrix column. The outcome of model run failure is that the number of model runs employed in the averaging process through which this column is calculated is reduced by one. +To forestall excessive PESTPP-IES run times incurred by occasional model failure, it is a good idea to set the *max\_run\_fail()* model run control variable to 1 (the default value for PESTPP‑IES), and to choose values for the *overdue\_giveup\_fac()* and/or *overdue\_giveup\_minutes()* control variables judiciously; see section 5.3. Note also that model run failure does not hurt PESTPP-IES as much as it hurts PESTPP-GLM or PEST. This is because the value of any model run undertaken by PESTPP-IES is lower than that undertaken by PEST or PESTPP-GLM. For the latter programs a failed model run during finite-difference derivatives calculation may lead to an empty column of the Jacobian matrix. In contrast, because PESTPP-IES uses an entire ensemble to fill a Jacobian matrix, a single failed model run does not result in an empty Jacobian matrix column. The outcome of model run failure is that the number of model runs employed in the averaging process through which this column is calculated is reduced by one. -### 9.2.8 Reporting +### 9.2.8 Reporting -PESTPP-IES records its progress to the screen and to its run record file. In addition to this, it records a plethora of output files–this is intentional. In the ensemble setting the cost of evaluating new model outputs is high, a rerun of an ensemble. It is therefore easier for PESTPP-IES to write as much information as possible to avoid these additional costs. The output are discussed in the next section. These output files can be supplemented by additional files that record, in ASCII format, matrices that PESTPP-IES formulates in the course of upgrading parameter realizations. The extent of its output file production can be controlled using the *ies_verbose_level()* variable. This can be awarded values of 0,1 or 2. The default is 1. +PESTPP-IES records its progress to the screen and to its run record file. In addition to this, it records a plethora of output files–this is intentional. In the ensemble setting the cost of evaluating new model outputs is high, a rerun of an ensemble. It is therefore easier for PESTPP-IES to write as much information as possible to avoid these additional costs. The output are discussed in the next section. These output files can be supplemented by additional files that record, in ASCII format, matrices that PESTPP-IES formulates in the course of upgrading parameter realizations. The extent of its output file production can be controlled using the *ies\_verbose\_level()* variable. This can be awarded values of 0,1 or 2. The default is 1. -If a model is numerically unstable, a user may wish to be informed of parameter values that precipitate run failure. As is discussed below, PESTPP-IES records the values of all parameters in all realizations comprising an ensemble, together with model run results, in iteration-specific CSV or JCB files. Parameter sets used in lambda testing can also be recorded if the *save_lambda_ensembles()* control variable is set to *true*. +If a model is numerically unstable, a user may wish to be informed of parameter values that precipitate run failure. As is discussed below, PESTPP-IES records the values of all parameters in all realizations comprising an ensemble, together with model run results, in iteration-specific CSV or JCB files. Parameter sets used in lambda testing can also be recorded if the *save\_lambda\_ensembles()* control variable is set to *true*. -### 9.2.9 Termination Criteria, Objective Functions, and Upgrade Acceptance +### 9.2.9 Termination Criteria, Objective Functions, and Upgrade Acceptance Like PEST and PESTPP-GLM, PESTPP-IES reads termination criteria from the eighth line of the “control data” section of a PEST control file. PESTPP-IES ceases execution after NOPTMAX iterations have elapsed. However, during these NOPTMAX iterations it applies the PHIREDSTP, NPHISTP and NPHINORED termination criteria to the mean objective function calculated using all realizations of the ensemble over successive smoother iterations. If the relative reduction in the mean objective function is less than PHIREDSTP over NPHISTP iterations, or if NPHINORED iterations have elapsed since the last reduction in the mean objective function has occurred, PESTPP-IES ceases execution. -Note that, as described above, PESTPP-IES also ceases execution if, during any particular iteration of the smoother process, it cannot find in successive NPHINORED iterations, a lambda and scale factor that allows it to calculate parameter upgrades for which the objective function is less than *ies_accept_phi_fac()*. If the results of a particular iteration’s solution process do not meet the acceptable phi criteria (the previous iteration’s mean composite phi time *ies_accept_phi_fac*), then a “partial upgrade” is performed, where only realizations that have yield a phi meeting this criteria are updated. After this partial upgrade, the full ensemble phi statistics are recalculated to determine if the lambda value for the next iteration. If after a partial upgrade, the acceptable phi criteria is still not met, PESTPP-IES will return to upgrade calculations with an increased lambda. In this situation, PESTPP-IES will save “rejected” parameter and observation ensembles in case users want to inspect these quantities. +Note that, as described above, PESTPP-IES also ceases execution if, during any particular iteration of the smoother process, it cannot find in successive NPHINORED iterations, a lambda and scale factor that allows it to calculate parameter upgrades for which the objective function is less than *ies\_accept\_phi\_fac()*. If the results of a particular iteration’s solution process do not meet the acceptable phi criteria (the previous iteration’s mean composite phi time *ies\_accept\_phi\_fac*), then a “partial upgrade” is performed, where only realizations that have yield a phi meeting this criteria are updated. After this partial upgrade, the full ensemble phi statistics are recalculated to determine if the lambda value for the next iteration. If after a partial upgrade, the acceptable phi criteria is still not met, PESTPP-IES will return to upgrade calculations with an increased lambda. In this situation, PESTPP-IES will save “rejected” parameter and observation ensembles in case users want to inspect these quantities. Special values of NOPTMAX can instigate special PESTPP-IES behaviour. If NOPTMAX is set to -1, PESTPP-IES does not upgrade random parameter sets which comprise an ensemble. It simply runs the model once for each parameter set, records model output values, and then ceases execution, thereby effective undertaking unconstrained Monte Carlo analysis. If NOPTMAX is set to zero, execution of PESTPP-IES is even shorter. It evaluates only the parameter values listed in the control file­ – replicating the behaviour of PESTPP-GLM and PEST. If NOPTMAX is supplied as -2, then PESTPP-IES will calculate the mean value of the initial parameter ensemble, evaluate it (by running the model once) and record the results. -PESTPP-IES reports several different objective functions, namely “composite”, “measurement”, “regularization”, and “actual”, depending on the mode of operation. The “measurement” objective function is calculated using the current simulated outputs and the observations values in the pest control combined with realizations of additive measurement noise (described elsewhere in this manual). The measurement objective function is calculated using the weights in the pest control file (unless an *obscov* is supplied, described elsewhere, or unless a user-generated observations plus noise ensemble is supplied, described elsewhere). Note that if the *ies_no_noise* option is activated, the “measurement” and “actual” objective functions are the same and only the “actual” is reported to the screen and record file. +PESTPP-IES reports several different objective functions, namely “composite”, “measurement”, “regularization”, and “actual”, depending on the mode of operation. The “measurement” objective function is calculated using the current simulated outputs and the observations values in the pest control combined with realizations of additive measurement noise (described elsewhere in this manual). The measurement objective function is calculated using the weights in the pest control file (unless an *obscov* is supplied, described elsewhere, or unless a user-generated observations plus noise ensemble is supplied, described elsewhere). Note that if the *ies\_no\_noise* option is activated, the “measurement” and “actual” objective functions are the same and only the “actual” is reported to the screen and record file. -The “regularization” objective function is calculated as parameter value deviations from the initial (stochastic) realized values–this objective function is scaled by the diagonal of the prior parameter covariance matrix. Note the “regularization” objective function is only used and reported if the value of *ies_reg_factor* is supplied with a value greater than 0.0. +The “regularization” objective function is calculated as parameter value deviations from the initial (stochastic) realized values–this objective function is scaled by the diagonal of the prior parameter covariance matrix. Note the “regularization” objective function is only used and reported if the value of *ies\_reg\_factor* is supplied with a value greater than 0.0. -The “composite” objective function is simply the combination of the measurement and regularization objective functions. Note the regularization objective function is reported in the phi summary but the contribution to the composite objective function is controlled by *ies_reg_factor,* which is 0.0 by default. Note the composite objective function is used to select lambda-scale factor pairs and to control termination criteria. Similar to the “regularization” objective function, the “composite” objective function is only used and reported if *ies_reg_factor* is greater than 0.0. +The “composite” objective function is simply the combination of the measurement and regularization objective functions. Note the regularization objective function is reported in the phi summary but the contribution to the composite objective function is controlled by *ies\_reg\_factor,* which is 0.0 by default. Note the composite objective function is used to select lambda-scale factor pairs and to control termination criteria. Similar to the “regularization” objective function, the “composite” objective function is only used and reported if *ies\_reg\_factor* is greater than 0.0. -The “actual” objective function is calculated using the current simulated outputs and the observation values in the control file (that is, without measurement noise realizations). Through the *ies_no_noise* option, users can make the “measurement” and “actual” objective functions one in the same. This is an important consideration when subjective weighting is used to balance the contribution of several types of observations to the objective function–a process that can result in very small weights, which implies very large measurement noise. +The “actual” objective function is calculated using the current simulated outputs and the observation values in the control file (that is, without measurement noise realizations). Through the *ies\_no\_noise* option, users can make the “measurement” and “actual” objective functions one in the same. This is an important consideration when subjective weighting is used to balance the contribution of several types of observations to the objective function–a process that can result in very small weights, which implies very large measurement noise. -## 9.3 PESTPP-IES Output Files +## 9.3 PESTPP-IES Output Files -### 9.3.1 CSV Output Files -PESTPP-IES writes a suite of output files. Many of these are comma-delimited files (i.e., CSV files). Alternatively, the contents of some of these files (those which hold parameter and observation ensembles) can be recorded in binary JCB files, this option being activated if the *ies_save_binary()* control variable is suppled as *true*. CSV and JCB files that are written by PESTPP-IES are discussed in the current sub-section. Other files that are written by PESTPP-IES are discussed in the following sub-section. +### 9.3.1 CSV Output Files -The *ies_csv_by_reals()* flag can be used to control how parameter and observation CSV files are written by PESTPP-IES. If *ies_csv_by_reals()* is set to *true* (its default value) then each row of a CSV file records a single realization. Alternatively, if this flag is set to *false*, then each column of parameter and observation ensemble CSV files records a realization, this effectively transposing the ensemble stored in CSV format. +PESTPP-IES writes a suite of output files. Many of these are comma-delimited files (i.e., CSV files). Alternatively, the contents of some of these files (those which hold parameter and observation ensembles) can be recorded in binary JCB files, this option being activated if the *ies\_save\_binary()* control variable is suppled as *true*. CSV and JCB files that are written by PESTPP-IES are discussed in the current sub-section. Other files that are written by PESTPP-IES are discussed in the following sub-section. + +The *ies\_csv\_by\_reals()* flag can be used to control how parameter and observation CSV files are written by PESTPP-IES. If *ies\_csv\_by\_reals()* is set to *true* (its default value) then each row of a CSV file records a single realization. Alternatively, if this flag is set to *false*, then each column of parameter and observation ensemble CSV files records a realization, this effectively transposing the ensemble stored in CSV format. As always, it is assumed that the filename base of the PEST control file on which history-matching is based is named *case.pst*. *N* is used to signify iteration number in the following filenames. An iteration number of 0 corresponds to initial parameter fields and corresponding model outputs. @@ -2934,12 +3009,12 @@ As always, it is assumed that the filename base of the PEST control file on whic | *case.N.par.csv/jcb* | Values of all parameters in all realizations comprising an ensemble. For N=0, this is the initial ensemble, sampled from the prior parameter probability distribution. Otherwise, it is the parameter realizations at the end of iteration N | | *case.N.obs.csv/jcb* | The ensemble of model outputs values which correspond to observations listed in the “observation data” section of the PEST control file. These are calculated using all realizations comprising the parameter ensemble. | | *case.obs+noise.csv/jcb* | Base observation values. These are calculated by generating realizations of measurement noise and adding this noise to measured values listed in the “observation data” section of the PEST control file. | -| *case.N.L.lambda.F.scale.csv/jcb* | These files are produced if the *save_lambda_ensembles()* control variable is set to *true*. They record parameter values used in testing the effects of different Marquardt lambdas and line search factors. *L* is the value of the Marquardt lambda; *F* is the value of the line search factor. | +| *case.N.L.lambda.F.scale.csv/jcb* | These files are produced if the *save\_lambda\_ensembles()* control variable is set to *true*. They record parameter values used in testing the effects of different Marquardt lambdas and line search factors. *L* is the value of the Marquardt lambda; *F* is the value of the line search factor. | | *case.phi.actual.csv* | Objective functions calculated during all iterations of the ensemble smoother process for all members of the ensemble. Objective functions are computed from differences between model outputs and measurements recorded in the “observation data” section of the PEST control file using weights that are also provided in this section. | | *case.phi.group.csv* | Objective function components pertaining to different observation groups calculated for all iterations of the ensemble smoother process. Progression of the regularization objective function is also recorded; this measures discrepancies between current and initial parameter values. Contributions by different parameter groups to the regularization objective function are also listed. | | *case.phi.meas.csv* | Objective functions calculated during all iterations of the ensemble smoother process for all members of the ensemble. Objective functions recorded in this file are computed from differences between model outputs calculated using different parameter realizations and corresponding observation realizations, i.e., measurements recorded in the “observation data” section of the PEST control file supplemented with realizations of measurement noise. | | *case.phi.regul.csv* | Regularization objective functions calculated during each iteration of the ensemble smoother process for all members of the ensemble. For a particular realization this is calculated using differences between current and initial parameter values. The weight applied to a particular difference is the inverse of the prior standard deviation of the parameter. | -| *case.phi.composite.csv* | The composite objective function is the measurement objective function plus the regularization objective function multiplied by the value of the *regul_frac()* control variable. | +| *case.phi.composite.csv* | The composite objective function is the measurement objective function plus the regularization objective function multiplied by the value of the *regul\_frac()* control variable. | | *case.N.autoadaloc.csv* | The (optional) automatic adaptive localization summary for each iteration | | *case.pdc.csv* | A summary of prior-data conflict information | | *case.N.pcs.csv* | A summary of parameter changes by group compared to the initial parameter ensemble. | @@ -2948,52 +3023,56 @@ As always, it is assumed that the filename base of the PEST control file on whic Table 9.2 CSV and JCB files written by PESTPP-IES. It is assumed that the name of the PEST control file is *case.pst*. -### 9.3.2 Non-CSV Output Files +### 9.3.2 Non-CSV Output Files Non-CSV output files written by PESTPP-IES are listed in the following table. -| File | Contents | -|-------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *case.rec* | The run record file. This echoes information in the PEST control file, and then records a history of the ensemble smoother process. Note that parallel agents write “panther_worker.rec”. | -| *case.rmr* | Parallel run management record. This file is written if model runs are conducted in parallel. | -| *case.log* | Performance log. This file records the times at which various processing steps begin and end. | -| *case.rns* | Binary file used for model run management. This file is typically removed after a successful PESTPP-IES run. | -| *case.N.res.cov/.jcb* | (optional) residual covariance matrix saved each iteration. | -| *case.N.autoadaloc.mat* | The (optional) localization matrix yielded by the automatic adaptive localization process | -| *case.N.base.par* | The “base” realization parameter values for the Nth iteration in a PEST-style par file. Only written if the “base” realization is available. | -| *case.N.base.rei* | The “base” realization simulated values for the Nth iteration in a PEST-style residuals file. Only written if the “base” realization is available. | +| File | Contents | +|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *case.rec* | The run record file. This echoes information in the PEST control file, and then records a history of the ensemble smoother process. Note that parallel agents write “panther\_worker.rec”. | +| *case.rmr* | Parallel run management record. This file is written if model runs are conducted in parallel. | +| *case.log* | Performance log. This file records the times at which various processing steps begin and end. | +| *case.rns* | Binary file used for model run management. This file is typically removed after a successful PESTPP-IES run. | +| *case.N.res.cov/.jcb* | (optional) residual covariance matrix saved each iteration. | +| *case.N.autoadaloc.mat* | The (optional) localization matrix yielded by the automatic adaptive localization process | +| *case.N.base.par* | The “base” realization parameter values for the Nth iteration in a PEST-style par file. Only written if the “base” realization is available. | +| *case.N.base.rei* | The “base” realization simulated values for the Nth iteration in a PEST-style residuals file. Only written if the “base” realization is available. | Table 9.3 Non-CSV/JCB files written by PESTPP-IES. It is assumed that the name of the PEST control file is *case.pst*. -## 9.4 Summary of Control Variables +## 9.4 Summary of Control Variables + Table 9.4 lists PESTPP-IES control variables. All of these are optional. If a variable is not supplied, then a default is assumed for its value. Where appropriate, the value of the default is presented along with the name of the variable in the table below. Variables discussed in section 5.3.6 of this manual that control parallel run management are not listed in the following table. Note also that the number of control variables may change with time. Refer to the PEST++ web site for variables used by the latest version of PESTPP-IES. -
VariableTypeRole
ies_num_reals(50)integerThe number of realizations to draw in order to form parameter and observation ensembles.
parcov()textThe name of a file containing the prior parameter covariance matrix. This can be a parameter uncertainty file (extension .unc), a covariance matrix file (extension .cov) or a binary JCO or JCB file (extension .jco or .jcb).
par_sigma_range(4.0)realThe difference between a parameter’s upper and lower bounds expressed as standard deviations.
ies_parameter_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing user-supplied parameter realizations comprising the initial (prior) parameter ensemble. If this keyword is omitted, PESTPP-IES generates the initial parameter ensemble itself.
ies_observation_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing user-supplied observation plus noise realizations comprising the observation plus noise ensemble. If this keyword is omitted, PESTPP-IES generates the observation plus noise ensemble itself.
ies_add_base(true)BooleanIf set to true, instructs PESTPP-IES to include a “realization” in the initial parameter ensemble comprised of parameter values read from the “parameter data” section of the PEST control file. The corresponding observation ensemble is comprised of measurements read from the “observation data” section of the PEST control file.
ies_restart_observation_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing model outputs calculated using a parameter ensemble. If it reads this file, PESTPP-IES does not calculate these itself, proceeding to upgrade calculations instead.
ies_restart_parameter_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing a parameter ensemble that corresponds to the ies_restart_observation_ensemble(). This option requires that the ies_restart_observation_ensemble() control variable also be supplied. This ensemble is only used in the calculation of the regularization component of the objective function for a restarted PESTPP-IES analysis.
ies_enforce_bounds(true)BooleanIf set to true PESTPP-IES will not transgress bounds supplied in the PEST control file when generating or accepting parameter realizations, and when adjusting these realizations.
ies_initial_lambda()realThe initial Marquardt lambda. The default value is \(10^{\text{floor}\left( \log_{10}\frac{\mu_{Փ}}{2n} \right)}\text{.\ \ }\)If supplied as a negative value, then the abs(ies_initial_lambda) is used as multiplier of the default initial-phi-based value.
ies_lambda_mults(0.1,1.0,10.0)comma-separated realsFactors by which to multiply the best lambda from the previous iteration to yield values for testing parameter upgrades during the current iteration.
lambda_scale_fac(0.75,1.0,1.1)comma-separated realsLine search factors along parameter upgrade directions computed using different Marquardt lambdas.
ies_subset_size(4)integerNumber of realizations used in testing and evaluation of different Marquardt lambdas. If supplied as a negative value, then abs(ies_subset_size) is treated as a percentage of the current ensemble size – this allows the subset size to fluctuate with the size of the ensemble
ies_use_approx(true)BooleanUse complex or simple formula provided by Chen and Oliver (2013) for calculation of parameter upgrades. The more complex formula includes a function which constrains parameter realizations to respect prior means and probabilities.
ies_reg_factor(0.0)realRegularization objective function as a fraction of measurement objective function when constraining parameter realizations to respect initial values.
ies_bad_phi(1.0E300)realIf the objective function calculated as an outcome of a model run is greater than this value, the model run is deemed to have failed.
ies_bad_phi_sigma(1.0E300)realIf the objective function calculated for a given realization is greater than the current mean objective function of the ensemble plus the objective function standard deviation of the ensemble times ies_bad_phi_sigma(), that realization is treated as failed.
ies_use_prior_scaling(false)BooleanUse a scaling factor based on the prior parameter distribution when evaluating parameter-to-model-output covariance used in calculation of the randomized Jacobian matrix.
ies_use_empirical_prior(false)BooleanUse an empirical, diagonal parameter covariance matrix for certain calculations. This matrix is contained in a file whose name is provided with the ies_parameter_ensemble() keyword.
Ies_save_lambda_ensembles(false)BooleanSave a set of CSV or JCB files that record parameter realizations used when testing different Marquardt lambdas.
ies_verbose_level(1)0, 1 or 2The level of diagnostic output provided by PESTPP-IES. If set to 2, all intermediate matrices are saved to ASCII files. This can require a considerable amount of storage.
ies_accept_phi_fac(1.05)real > 1.0The factor applied to the previous best mean objective function to determine if the current mean objective function is acceptable.
ies_lambda_dec_fac(0.75)real < 1.0The factor by which to decrease the value of the Marquardt lambda during the next IES iteration if the current iteration of the ensemble smoother process was successful in lowering the mean objective function.
ies_lambda_inc_fac(10.0)real > 1.0The factor by which to increase the current value of the Marquardt lambda for further lambda testing if the current lambda testing cycle was unsuccessful.
ies_subset_how(random)“first”,”last”,
”random”,
”phi_based
How to select the subset of realizations for objective function evaluation during upgrade testing. Default is “random”.
ies_num_threads(-1)integer > 1The number of threads to use during the localized upgrade solution process, the automatic adaptive localization process. If the localizer contains many (>10K) rows, then multithreading can substantially speed up the upgrade calculation process. ies_num_threads() should not be greater than the number of physical cores on the host machine.
ies_localizer()textThe name of a matrix to use for localization. The extension of the file is used to determine the type: .mat is an ASCII matrix file, .jcb/.jco signifies use of (enhanced) Jacobian matrix format (a binary format), while .csv signifies a comma-delimited file. Note that adjustable parameters not listed in localization matrix columns are implicitly treated as “fixed” while non-zero weighted observations not listed in rows of this matrix are implicitly treated as zero-weighted.
ies_group_draws(true)BooleanA flag to draw from the (multivariate) Gaussian prior by parameter/observation groups. This is usually a good idea since groups of parameters/observations are likely to have prior correlation.
ies_save_binary(false)BooleanA flag to save parameter and observation ensembles in binary (i.e., JCB) format instead of CSV format.
ies_csv_by_reals(true)BooleanA flag to save parameter and observation ensemble CSV files by realization instead of by variable name. If true, each row of the CSV file is a realization. If false, each column of the CSV file is a realization.
ies_autoadaloc(false)BooleanFlag to activate automatic adaptive localization.
ies_autoadaloc_sigma_dist(1.0)RealReal number representing the factor by which a correlation coefficient must exceed the standard deviation of background correlation coefficients to be considered significant. Default is 1.0
tie_by_group(false)BooleanFlag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected.
ies_enforce_chglim(false)BooleanFlag to enforce parameter change limits (via FACPARMAX and RELPARMAX) in a way similar to PEST and PESTPP-GLM (by scaling the entire realization). Default is false.
ies_center_on()StringA realization name that should be used for the ensemble center in calculating the approximate Jacobian matrix. The realization name must be in both the parameter and observation ensembles. If not passed, the mean vector is used as the center. The value “_MEDIAN_” can also be used, which instructs PESTPP-IES to use the median vector for calculating anomalies.
enforce_tied_bounds(false)BooleanFlag to enforce parameter bounds on any tied parameters. Depending on the ration between the tied and free parameters, this option can greatly limit parameter changes.
ies_no_noise(false)BooleanFlag to not generate and use realizations of measurement noise. Default is False (that is, to use measurement noise).
ies_drop_conflicts(false)BooleanFlag to remove non-zero weighted observations that are in a prior-data conflict state from the upgrade calculations. Default is False.
ies_pdc_sigma_distance()Real > 0.0The number of standard deviations from the mean used in checking for prior-data conflict.
ies_save_rescov(False)BooleanFlag to save the iteration-level residual covariance matrix. If ies_save_binary is True, then a binary format file is written, otherwise an ASCII format (.cov) file is written. The file name is case.N.res.cov/.jcb. Note that this functionality does not scale beyond about 20,000 non-zero-weighted observations
obscov()textThe name of a file containing the observation noise covariance matrix. This can be a parameter uncertainty file (extension .unc), a covariance matrix file (extension .cov) or a binary JCO or JCB file (extension .jco or .jcb). Please see the section on this matrix above to understand the implications of using this matrix
rand_seed(358183147)unsigned integerSeed for the random number generator.
Ies_use_mda(false)BooleanFlag to use the (optionally iterative) Kalman update equation – the number of data assimilation iterations is controlled by NOPTMAX; NOPTMAX = 1 and ies_use_mda(true) results in the standard ensemble smoother Kalman update. If False, the GLM iterative ensemble smoother equation is used. Default is False
Ies_mda_init_fac(10.0)doubleThe initial MDA covariance inflation factor. Only used if ies_use_mda is true. Default is 10.0
Ies_mda_decl_fac(0.5)doubleThe final MDA covariance inflation factor. Only used in ies_use_mda is true. Default is 0.5
Ies_localization_type(local)textCan be either “local” for local analysis or “covariance” for covariance-only localization. Default is “local”
Ies_upgrades_in_memory(true)BooleanFlag to hold parameter upgrade ensembles in memory during testing. If False, parameter ensembles are saved to disk during testing and the best-phi ensemble is loaded from disk after testing – this can reduce memory pressure for very high dimensional problems. Default is True but is only activated if number of parameters > 100K.
Ies_ordered_binary(true)BooleanFlag to write control-file-ordered binary ensemble files. Only used if save_binary is true. If false, hash-ordered binary files are written – for very high dimensional problems, writing unordered binary can save lots of time. If not passed and number of parameters > 100K, then ies_ordered_binary is set to false.
ensemble_output_precision(6)intNumber of significant digits to use in ASCII format ensemble files. Default is 6
ies_multimodal_alpha(1.0)doubleThe fraction of the total ensemble size to use as the local neighborhood realizations in the multimodal solution process. Must be greater than zero and less than 1. Values of 0.1 to 0.25 seem to work well. Default is 1.0 (disable multi-modal solution process)
ies_weight_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing user-supplied weight vectors for each realization. If this keyword is omitted, PESTPP-IES uses the weight vector in the control file for all realizations. Only used with ies_multimodal_alpha
+
VariableTypeRole
ies_num_reals(50)integerThe number of realizations to draw in order to form parameter and observation ensembles.
parcov()textThe name of a file containing the prior parameter covariance matrix. This can be a parameter uncertainty file (extension .unc), a covariance matrix file (extension .cov) or a binary JCO or JCB file (extension .jco or .jcb).
par_sigma_range(4.0)realThe difference between a parameter’s upper and lower bounds expressed as standard deviations.
ies_parameter_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing user-supplied parameter realizations comprising the initial (prior) parameter ensemble. If this keyword is omitted, PESTPP-IES generates the initial parameter ensemble itself.
ies_observation_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing user-supplied observation plus noise realizations comprising the observation plus noise ensemble. If this keyword is omitted, PESTPP-IES generates the observation plus noise ensemble itself.
ies_add_base(true)BooleanIf set to true, instructs PESTPP-IES to include a “realization” in the initial parameter ensemble comprised of parameter values read from the “parameter data” section of the PEST control file. The corresponding observation ensemble is comprised of measurements read from the “observation data” section of the PEST control file.
ies_restart_observation_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing model outputs calculated using a parameter ensemble. If it reads this file, PESTPP-IES does not calculate these itself, proceeding to upgrade calculations instead.
ies_restart_parameter_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing a parameter ensemble that corresponds to the ies_restart_observation_ensemble(). This option requires that the ies_restart_observation_ensemble() control variable also be supplied. This ensemble is only used in the calculation of the regularization component of the objective function for a restarted PESTPP-IES analysis.
ies_enforce_bounds(true)BooleanIf set to true PESTPP-IES will not transgress bounds supplied in the PEST control file when generating or accepting parameter realizations, and when adjusting these realizations.
ies_initial_lambda()realThe initial Marquardt lambda. The default value is \(10^{\text{floor}\left( \log_{10}\frac{\mu_{Փ}}{2n} \right)}\text{.\ \ }\)If supplied as a negative value, then the abs(ies_initial_lambda) is used as multiplier of the default initial-phi-based value.
ies_lambda_mults(0.1,1.0,10.0)comma-separated realsFactors by which to multiply the best lambda from the previous iteration to yield values for testing parameter upgrades during the current iteration.
lambda_scale_fac(0.75,1.0,1.1)comma-separated realsLine search factors along parameter upgrade directions computed using different Marquardt lambdas.
ies_subset_size(4)integerNumber of realizations used in testing and evaluation of different Marquardt lambdas. If supplied as a negative value, then abs(ies_subset_size) is treated as a percentage of the current ensemble size – this allows the subset size to fluctuate with the size of the ensemble
ies_use_approx(true)BooleanUse complex or simple formula provided by Chen and Oliver (2013) for calculation of parameter upgrades. The more complex formula includes a function which constrains parameter realizations to respect prior means and probabilities.
ies_reg_factor(0.0)realRegularization objective function as a fraction of measurement objective function when constraining parameter realizations to respect initial values.
ies_bad_phi(1.0E300)realIf the objective function calculated as an outcome of a model run is greater than this value, the model run is deemed to have failed.
ies_bad_phi_sigma(1.0E300)realIf the objective function calculated for a given realization is greater than the current mean objective function of the ensemble plus the objective function standard deviation of the ensemble times ies_bad_phi_sigma(), that realization is treated as failed.
ies_use_prior_scaling(false)BooleanUse a scaling factor based on the prior parameter distribution when evaluating parameter-to-model-output covariance used in calculation of the randomized Jacobian matrix.
ies_use_empirical_prior(false)BooleanUse an empirical, diagonal parameter covariance matrix for certain calculations. This matrix is contained in a file whose name is provided with the ies_parameter_ensemble() keyword.
Ies_save_lambda_ensembles(false)BooleanSave a set of CSV or JCB files that record parameter realizations used when testing different Marquardt lambdas.
ies_verbose_level(1)0, 1 or 2The level of diagnostic output provided by PESTPP-IES. If set to 2, all intermediate matrices are saved to ASCII files. This can require a considerable amount of storage.
ies_accept_phi_fac(1.05)real > 1.0The factor applied to the previous best mean objective function to determine if the current mean objective function is acceptable.
ies_lambda_dec_fac(0.75)real < 1.0The factor by which to decrease the value of the Marquardt lambda during the next IES iteration if the current iteration of the ensemble smoother process was successful in lowering the mean objective function.
ies_lambda_inc_fac(10.0)real > 1.0The factor by which to increase the current value of the Marquardt lambda for further lambda testing if the current lambda testing cycle was unsuccessful.
ies_subset_how(random)“first”,”last”,
”random”,
”phi_based
How to select the subset of realizations for objective function evaluation during upgrade testing. Default is “random”.
ies_num_threads(-1)integer > 1The number of threads to use during the localized upgrade solution process, the automatic adaptive localization process. If the localizer contains many (>10K) rows, then multithreading can substantially speed up the upgrade calculation process. ies_num_threads() should not be greater than the number of physical cores on the host machine.
ies_localizer()textThe name of a matrix to use for localization. The extension of the file is used to determine the type: .mat is an ASCII matrix file, .jcb/.jco signifies use of (enhanced) Jacobian matrix format (a binary format), while .csv signifies a comma-delimited file. Note that adjustable parameters not listed in localization matrix columns are implicitly treated as “fixed” while non-zero weighted observations not listed in rows of this matrix are implicitly treated as zero-weighted.
ies_group_draws(true)BooleanA flag to draw from the (multivariate) Gaussian prior by parameter/observation groups. This is usually a good idea since groups of parameters/observations are likely to have prior correlation.
ies_save_binary(false)BooleanA flag to save parameter and observation ensembles in binary (i.e., JCB) format instead of CSV format.
ies_csv_by_reals(true)BooleanA flag to save parameter and observation ensemble CSV files by realization instead of by variable name. If true, each row of the CSV file is a realization. If false, each column of the CSV file is a realization.
ies_autoadaloc(false)BooleanFlag to activate automatic adaptive localization.
ies_autoadaloc_sigma_dist(1.0)RealReal number representing the factor by which a correlation coefficient must exceed the standard deviation of background correlation coefficients to be considered significant. Default is 1.0
tie_by_group(false)BooleanFlag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected.
ies_enforce_chglim(false)BooleanFlag to enforce parameter change limits (via FACPARMAX and RELPARMAX) in a way similar to PEST and PESTPP-GLM (by scaling the entire realization). Default is false.
ies_center_on()StringA realization name that should be used for the ensemble center in calculating the approximate Jacobian matrix. The realization name must be in both the parameter and observation ensembles. If not passed, the mean vector is used as the center. The value “_MEDIAN_” can also be used, which instructs PESTPP-IES to use the median vector for calculating anomalies.
enforce_tied_bounds(false)BooleanFlag to enforce parameter bounds on any tied parameters. Depending on the ration between the tied and free parameters, this option can greatly limit parameter changes.
ies_no_noise(false)BooleanFlag to not generate and use realizations of measurement noise. Default is False (that is, to use measurement noise).
ies_drop_conflicts(false)BooleanFlag to remove non-zero weighted observations that are in a prior-data conflict state from the upgrade calculations. Default is False.
ies_pdc_sigma_distance()Real > 0.0The number of standard deviations from the mean used in checking for prior-data conflict.
ies_save_rescov(False)BooleanFlag to save the iteration-level residual covariance matrix. If ies_save_binary is True, then a binary format file is written, otherwise an ASCII format (.cov) file is written. The file name is case.N.res.cov/.jcb. Note that this functionality does not scale beyond about 20,000 non-zero-weighted observations
obscov()textThe name of a file containing the observation noise covariance matrix. This can be a parameter uncertainty file (extension .unc), a covariance matrix file (extension .cov) or a binary JCO or JCB file (extension .jco or .jcb). Please see the section on this matrix above to understand the implications of using this matrix
rand_seed(358183147)unsigned integerSeed for the random number generator.
Ies_use_mda(false)BooleanFlag to use the (optionally iterative) Kalman update equation – the number of data assimilation iterations is controlled by NOPTMAX; NOPTMAX = 1 and ies_use_mda(true) results in the standard ensemble smoother Kalman update. If False, the GLM iterative ensemble smoother equation is used. Default is False
Ies_mda_init_fac(10.0)doubleThe initial MDA covariance inflation factor. Only used if ies_use_mda is true. Default is 10.0
Ies_mda_decl_fac(0.5)doubleThe final MDA covariance inflation factor. Only used in ies_use_mda is true. Default is 0.5
Ies_localization_type(local)textCan be either “local” for local analysis or “covariance” for covariance-only localization. Default is “local”
Ies_upgrades_in_memory(true)BooleanFlag to hold parameter upgrade ensembles in memory during testing. If False, parameter ensembles are saved to disk during testing and the best-phi ensemble is loaded from disk after testing – this can reduce memory pressure for very high dimensional problems. Default is True but is only activated if number of parameters > 100K.
Ies_ordered_binary(true)BooleanFlag to write control-file-ordered binary ensemble files. Only used if save_binary is true. If false, hash-ordered binary files are written – for very high dimensional problems, writing unordered binary can save lots of time. If not passed and number of parameters > 100K, then ies_ordered_binary is set to false.
ensemble_output_precision(6)intNumber of significant digits to use in ASCII format ensemble files. Default is 6
ies_multimodal_alpha(1.0)doubleThe fraction of the total ensemble size to use as the local neighborhood realizations in the multimodal solution process. Must be greater than zero and less than 1. Values of 0.1 to 0.25 seem to work well. Default is 1.0 (disable multi-modal solution process)
ies_weight_ensemble()textThe name of a CSV or JCO/JCB file (recognized by its extension) containing user-supplied weight vectors for each realization. If this keyword is omitted, PESTPP-IES uses the weight vector in the control file for all realizations. Only used with ies_multimodal_alpha
Table 9.4 PESTPP-IES control variables with default values. Parallel run management variables can be supplied in addition to these. See section 5.3.6. -#
10. PESTPP-SWP +# 10. PESTPP-SWP + + +## 10.1 Introduction -## 10.1 Introduction PESTPP-SWP runs a model using a suite of parameter fields. Parameter values that comprise these fields are supplied in a comma-delimited file (i.e., a CSV file) or in a binary (enhanced) Jacobian matrix file (i.e., a JCO or JCB file). PESTPP-SWP records the values of model outputs calculated using these parameter fields in another CSV file, together with objective function components calculated from these outputs. The PEST control file which PESTPP-SWP reads informs it of observation values, observation weights, observation groups, and the setting of the PESTMODE control variable. If PESTMODE is set to “regularization”, then PESTPP-SWP calculates a regularization objective function in addition to the measurement objective function. On most occasions of PESTPP-SWP usage, model runs are conducted in parallel. Use of PESTPP-SWP gives a modeller easy access to model run parallelization for the completion of model runs undertaken for any purpose whatsoever. A significant amount of functionality available through PyEMU makes use of PESTPP-SWP to undertake parallelized model runs; it bases its calculations on the outcomes of these runs. -## 10.2 Using PESTPP-SWP +## 10.2 Using PESTPP-SWP + As usual, variables which control how PESTPP-SWP operates must be placed in a PEST control file whose name is supplied on its command line; these variables should appear on lines that begin with the “++” character string. -PESTPP-SWP is directed to a CSV or JCO/JCB input file through the value supplied for its *sweep_parameter_csv_file()* control variable; the type of file is recognized by its extension. A CSV file must have as many columns as there are parameters featured in the PEST control file, plus one extra column on the left. The first column is reserved for the user-supplied realization name. Parameter and realizations names are provided in a JCB or JCO file according to the respective protocols of these files. +PESTPP-SWP is directed to a CSV or JCO/JCB input file through the value supplied for its *sweep\_parameter\_csv\_file()* control variable; the type of file is recognized by its extension. A CSV file must have as many columns as there are parameters featured in the PEST control file, plus one extra column on the left. The first column is reserved for the user-supplied realization name. Parameter and realizations names are provided in a JCB or JCO file according to the respective protocols of these files. The number of realizations contained in a user-prepared PESTPP-SWP input file depends on the number of parameter sets for which model runs are required. These realizations can be named according to the user’s taste. PESTPP-SWP carries out one model run for each realization. Parameter names provided in a CSV of JCO/JCB file must correspond to those that are featured in a PEST control file. -If the *sweep_parameter_csv_file()* control variable does not appear in the PEST control file that is cited on the PESTPP-SWP command line, PESTPP-SWP assumes an input filename of *sweep_in.csv*. +If the *sweep\_parameter\_csv\_file()* control variable does not appear in the PEST control file that is cited on the PESTPP-SWP command line, PESTPP-SWP assumes an input filename of *sweep\_in.csv*. Note the following. @@ -3001,37 +3080,41 @@ Note the following. - If a parameter is fixed in the PEST control file, and the value provided for that parameter in the CSV or JCO/JCB file differs from that in the PEST control file, the value in the PEST control file overrides that in the CSV or JCO/JCB file. -- If the value of the *ies_csv_by_reals()* control variable is supplied as *true*, then the roles of rows and columns can be reversed in a CSV input file. That is, columns pertain to realizations while rows pertain to parameter values. +- If the value of the *ies\_csv\_by\_reals()* control variable is supplied as *true*, then the roles of rows and columns can be reversed in a CSV input file. That is, columns pertain to realizations while rows pertain to parameter values. -PESTPP-SWP can fill in values for fixed and tied parameters if these are missing from its input file. Actually, it can provide values for other missing parameters as well if the *sweep_forgive()* control variable is set to *true*. These missing values are taken from the PEST control file which is read by PESTPP-SWP. +PESTPP-SWP can fill in values for fixed and tied parameters if these are missing from its input file. Actually, it can provide values for other missing parameters as well if the *sweep\_forgive()* control variable is set to *true*. These missing values are taken from the PEST control file which is read by PESTPP-SWP. -PESTPP-SWP writes a model output file whose name is provided through the *sweep_output_csv_file()* control variable. If this variable is not provided, PESTPP-SWP employs the name *sweep_out.csv* for its output file. +PESTPP-SWP writes a model output file whose name is provided through the *sweep\_output\_csv\_file()* control variable. If this variable is not provided, PESTPP-SWP employs the name *sweep\_out.csv* for its output file. -The control variable *sweep_chunk()* pertains to parallelization of model runs. Runs are done in bundles of size *N*, where *N* is the value supplied for this variable. (A chunk of 500 is the default). This number should be chosen wisely. It should be a multiple of the number of agents that PESTPP-SWP can use for carrying out model runs. +The control variable *sweep\_chunk()* pertains to parallelization of model runs. Runs are done in bundles of size *N*, where *N* is the value supplied for this variable. (A chunk of 500 is the default). This number should be chosen wisely. It should be a multiple of the number of agents that PESTPP-SWP can use for carrying out model runs. Also note that PESTPP-SWP can be particularly useful if users need complete model output files for a given set of runs. In this case, the file transfer capabilities of the parallel run manager can be used with PESTPP-SWP to run a sweep of parameter values and model output files can be transferred back to the master directory. -## 10.3 Summary of Control Variables +## 10.3 Summary of Control Variables + Table 10.1 tabulates PESTPP-SWP control variables. As usual, all of these variables are optional. If a variable is not supplied, then a default is assumed. Default values are presented along with the names of variables in the table below. Variables discussed in section 5.3.6 that control parallel run management are not listed in this table. The number of control variables may change with time. Refer to the PEST++ web site for variables used by the latest version of PESTPP-SWP. -| Variable | Type | Role | -|------------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *sweep_parameter_csv_file(sweep_in.csv)* | text | A user-provided CSV file that lists the parameter values employed for model runs. If this filename ends with *.jcb* or *.jco*, then the ensemble is read from a binary JCB or JCO file instead. | -| *Ies_csv_by_reals()* | Boolean | A flag to save parameter and observation ensemble CSV files by realization instead of by variable name. If true, each row of the CSV file is a realization. If false, each column of the CSV file is a realization. | -| *sweep_forgive(false)* | Boolean | If set to *true*, PESTPP-SWP provides values for missing variables in the user-supplied *sweep_parameter_csv_file()* file. | -| *sweep_output_csv_file(sweep_out.csv)* | text | The CSV file written by PESTPP-SWP. | -| *sweep_chunk(500)* | text | The number of runs to batch queue for the run manager. Each chunk is read, run and written as a single batch. | -| *enforce_tied_bounds(false)* | Boolean | Flag to enforce parameter bounds on any tied parameters | -| *tie_by_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | -| *ensemble_output_precision* | int | Number of significant digits to use in ASCII format ensemble files. Default is 6 | +| Variable | Type | Role | +|----------------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *sweep\_parameter\_csv\_file(sweep\_in.csv)* | text | A user-provided CSV file that lists the parameter values employed for model runs. If this filename ends with *.jcb* or *.jco*, then the ensemble is read from a binary JCB or JCO file instead. | +| *Ies\_csv\_by\_reals()* | Boolean | A flag to save parameter and observation ensemble CSV files by realization instead of by variable name. If true, each row of the CSV file is a realization. If false, each column of the CSV file is a realization. | +| *sweep\_forgive(false)* | Boolean | If set to *true*, PESTPP-SWP provides values for missing variables in the user-supplied *sweep\_parameter\_csv\_file()* file. | +| *sweep\_output\_csv\_file(sweep\_out.csv)* | text | The CSV file written by PESTPP-SWP. | +| *sweep\_chunk(500)* | text | The number of runs to batch queue for the run manager. Each chunk is read, run and written as a single batch. | +| *enforce\_tied\_bounds(false)* | Boolean | Flag to enforce parameter bounds on any tied parameters | +| *tie\_by\_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | +| *ensemble\_output\_precision* | int | Number of significant digits to use in ASCII format ensemble files. Default is 6 | Table 10.1 PESTPP-SWP control variables. Parallel run management variables can be supplied in addition to these; see section 5.3.6. -**PESTPP-PSO** -## 11.1 Introduction +# PESTPP-PSO + + +## 11.1 Introduction + **Publications** A complete description of the background on PESTPP-PSO (Particle Swarm Optimization within PEST++) and its basic operation can be found in *Siade et al*, (2019), along with three benchmark problems and two real-world case studies. Therefore, this manual will instead provide a more detailed description on how to implement the software. The reader is also referred to the work by *Coello et al*, (2004) for an additional detailed description on the multi-objective (Pareto) optimization framework that formed the basis of the corresponding method employed in this software. @@ -3059,7 +3142,7 @@ The use of PSO for integer programming can be seen throughout the clinical trial **Basic** PSO is an evolutionary algorithm that operates on the socio-cognitive behavior of individuals within a swarm. Individuals, or particles, “move” within decision space based on three components, (1) the momentum of the movement from the previous iteration, (2) the location in decision space that has had the best performance for that particle so far, in terms of the objective function as defined by Equation (11.1) (cognitive component), and (3) the location in decision space associated with the best performance observed by the entire swarm thus far (social component). A particle’s position in decision space is simply defined as the vector of values for the decision variables currently assigned to that particle. The term “decision variables” is used here to indicate that PSO is applicable to any form of optimization problem, e.g., calibration, management, design, etc., and therefore “parameters” are considered decision variables as well. The movement (or velocity) of a particle at a particular iteration, *t* + 1, is defined as, -*v*ij(*t*+1) = *ω*(*t*)*v*ij(*t*) + *c*1*r*1(*z*ij(*t*)−*p*ij(*t*)) + *c*2*r*2(*z*ig(*t*)−*p*ij(*t*)) (X.3) +*v*ij(*t* + 1) = *ω*(*t*)*v*ij(*t*) + *c*1*r*1(*z*ij(*t*)−*p*ij(*t*)) + *c*2*r*2(*z*ig(*t*)−*p*ij(*t*)) (X.3) where, subscript *i* denotes the decision variable index, subscript *j* denotes the particle index, *v* is the velocity, *ω* is the inertia, *c*1 is the cognitive constant, *c*2 is the social constant, *r* is a random value taken from the interval \[0,1\], *z*ij is the best position observed by particle *j* for parameter *i* (often referred to as the personal best or “*p*-best” position for particle *j*), *g* is the index of the best *p*-best position in the swarm or neighbourhood (often referred to as the global best, or “*g*-best” position), and *p* represents the current position of the particle in decision space. @@ -3067,7 +3150,7 @@ The basic single-objective PSO algorithm proceeds by updating each particle’s While basic PSO can approach such a problem, like all other optimization methods, if the problem is nonconvex it cannot guarantee a globally optimal solution. However, its global search approach to optimization makes it very effective at avoiding local minima. It is also important to point out that, like other evolutionary algorithms, the number of iterations required for convergence can be relatively high. This can be mitigated somewhat through the choice of values for inertia and the social and cognitive constants. It is recommended that one begin with a relatively high value for inertia (e.g., 0.7) and gradually lower the inertia over successive iterations, perhaps as low as 0.4. Another factor affecting convergence is the swarm size; the larger the swarm the faster the convergence. However, there comes a point where the speed-up in this trade-off diminishes; from the author’s experience this occurs somewhere around a swarm size of 50, but may still be problem-specific. See the sections regarding the use of this software for more details on how to manage these control variables. -### 11.1.2 Multi-Objective Particle Swarm optimization +### 11.1.2 Multi-Objective Particle Swarm optimization Multi-objective optimization studies often have numerous factors to consider, and some of these factors may be considered objectives (a Pareto front is desired for their trade-offs), or they may be considered as constraints (they are given a limit for which they cannot exceed). Generally, one could consider constraints as objectives in this context, as they can be mixed and matched depending on the perspective of the optimization problem (Equation 11.2). Additionally, the upper limit of the constraints may be perturbed slightly to examine its effects on the Pareto front; such constraints are often referred to as epsilon (*ε*) constraints. @@ -3077,18 +3160,20 @@ MOPSO, like most multi-objective optimization algorithms in use today, approxima The MOPSO algorithm employed in this software determines the Pareto optimal set iteratively, beginning with an initial swarm population. The initial swarm is executed through the simulation model and the set of non-dominated decision vectors amongst the initial swarm is stored in a *repository*. Then, MOPSO will update the swarm, according to a modified PSO method, and check the dominance relationships between the swarm and the repository (*Siade et al*, 2019). If new decision vectors are obtained that are non-dominated (thus far), they will be added to the repository, and conversely, if decision vectors in the repository become dominated by those in the swarm, they will be discarded. This repeats for a desired number of iterations. At each iteration, the repository objectives and decision vectors are stored to their associated output files (see Section 11.3). -### 11.1.2 Decision Variable Transformations +### 11.1.2 Decision Variable Transformations Currently, the decision variables (which could consist of parameter values, for example) have a pre-defined transformation status. This status is referred to as *eqlog*, which allows for logarithmic transformation, but with different logarithmic bases for each of the decision variables. The decision variable with the greatest difference between upper and lower bounds (in terms of magnitude) is assigned a logarithm base of 10 during transformation. This is equivalent to the *log* option employed in much of the PEST and PEST++ suite for the variable PARTRANS. The logarithm base for the remaining decision variables are set such that the transformed range for those variables is equivalent to that of the widest one, whose aforementioned base is 10. This ensures that the variability of all transformed decision variables appears exactly the same to the PSO procedure, which enhances overall performance. This could result in some decision variables essentially having no transformation (equivalent to *none* for PARTRANS) or even some variables experiencing an expansion effect, where their transformed range is wider than the original one. Please see *Siade et al*, (2019) for more details. -## 11.1 Using PESTPP-PSO +## 11.1 Using PESTPP-PSO + -### 11.1.1 General +### 11.1.1 General PESTPP-PSO was developed using the FORTRAN interface provided within the PEST++ source code. Currently, PESTPP-PSO is only designed to operate in parallel, and the command to execute the “manager” is as follows (which differs slightly from the other PEST++ calling programs), -| pestpp-pso case*.*pst port | +| | |----------------------------| +| pestpp-pso case*.*pst port | where, *case* represents the base name for the modeling study and *port* is the port number over which communications occur (please see previous documentation in this manual on the general usage of PEST++, e.g., Chapter 5). The main control file follows the format of a standard PEST control file (see Chapter 4). The calling program, PESTPP-PSO, will obtain most of the data regarding the optimization problem from this PEST control file. That is, it will collect some control, parameter and observation data; Figure 11.1 displays which data is actually used by PESTPP-PSO (shaded in grey). Some of this differs from most other PEST++ programs. It is important to note that PESTPP-PSO is designed to be compatible with PEST and its utilities (e.g., PESTCHEK). So, even if PESTPP-PSO is not using some of the variables listed in Figure 11.1, a dummy value must be entered in their place; this can be anything the user wants so long as it’s consistent with the format of the variable as defined by PEST (i.e., the dummy value for an integer variable should still be an integer, a character string should be a character string, etc.). @@ -3096,15 +3181,15 @@ PESTPP-PSO must use another PEST++ calling program to initiate the “agents”. ++PSO(*case*.pso) -
pcf
* control data
RSTFLE PESTMODE
NPAR NOBS NPARGP NPRIOR NOBSGP
NTPLFLE NINSFLE PRECIS DPOINT
RLAMBDA1 RLAMFAC PHIRATSUF PHIREDLAM NUMLAM
RELPARMAX FACPARMAX FACORIG
PHIREDSWH
NOPTMAX PHIREDSTP NPHISTP NPHINORED RELPARSTP NRELPAR
ICOV ICOR IEIG
* singular value decomposition
SVDMODE
MAXSING EIGTHRESH
EIGWRITE
* parameter groups
PARGPNME INCTYP DERINC DERINCLB FORCEN DERINCMUL DERMTHD
(one such line for each parameter group)
* parameter data
PARNME PARTRANS PARCHGLIM PARVAL1 PARLBND PARUBND PARGP SCALE OFFSET DERCOM
(one such line for each parameter)
PARNME PARTIED
(one such line for each tied parameter)
* observation groups
OBGNME
(one such line for each observation group)
* observation data
OBSNME OBSVAL WEIGHT OBGNME
(one such line for each observation)
* model command line
COMLINE
(one such line for each model command line)
* model input
TEMPFLE INFLE
(one such line for each template file)
* model output
INSFLE OUTFLE
(one such line for each instruction file)
* prior information
PILBL PIFAC * PARNME + PIFAC * log(PARNME) ... = PIVAL WEIGHT OBGNME
(one such line for each article of prior information)
* regularization
PHIMLIM PHIMACCEPT [FRACPHIM]
WFINIT WFMIN WFMAX
WFFAC WFTOL [IREGADJ]
++
++PSO(case.pst)
+
pcf
* control data
RSTFLE PESTMODE
NPAR NOBS NPARGP NPRIOR NOBSGP
NTPLFLE NINSFLE PRECIS DPOINT
RLAMBDA1 RLAMFAC PHIRATSUF PHIREDLAM NUMLAM
RELPARMAX FACPARMAX FACORIG
PHIREDSWH
NOPTMAX PHIREDSTP NPHISTP NPHINORED RELPARSTP NRELPAR
ICOV ICOR IEIG
* singular value decomposition
SVDMODE
MAXSING EIGTHRESH
EIGWRITE
* parameter groups
PARGPNME INCTYP DERINC DERINCLB FORCEN DERINCMUL DERMTHD
(one such line for each parameter group)
* parameter data
PARNME PARTRANS PARCHGLIM PARVAL1 PARLBND PARUBND PARGP SCALE OFFSET DERCOM
(one such line for each parameter)
PARNME PARTIED
(one such line for each tied parameter)
* observation groups
OBGNME
(one such line for each observation group)
* observation data
OBSNME OBSVAL WEIGHT OBGNME
(one such line for each observation)
* model command line
COMLINE
(one such line for each model command line)
* model input
TEMPFLE INFLE
(one such line for each template file)
* model output
INSFLE OUTFLE
(one such line for each instruction file)
* prior information
PILBL PIFAC * PARNME + PIFAC * log(PARNME) ... = PIVAL WEIGHT OBGNME
(one such line for each article of prior information)
* regularization
PHIMLIM PHIMACCEPT [FRACPHIM]
WFINIT WFMIN WFMAX
WFFAC WFTOL [IREGADJ]
++
++PSO(case.pst)
Figure 11.1. Variables comprising a minimalist PEST control file (see Figure 4.1), where the control variables used by PESTPP-PSO are shaded in grey. Note that the very last line designates the PSO control file. -###
11.1.2 Estimation Mode +### 11.1.2 Estimation Mode The algorithm employed in *estimation* mode is equivalent to the very basic form of PSO originally introduced by *Eberhart and Kennedy* (1995). Much of the basic mechanics of the algorithm can be summarized by Equation (11.3). The PSO control file for estimation mode will have a format as follows (“\*” sections can be in any order), -
* control data
RSTPSO NOBJGP NCON NFORG VERBOSE
NPOP C1 C2 ISEED
INITP VMAX IINERT FINERT INITER
NEIBR NNEIBR
* objective data
OBJNME OBJMETH
* constraint data
CONNME CONMETH UPLIM
(one such line for each constraint function)
+
* control data
RSTPSO NOBJGP NCON NFORG VERBOSE
NPOP C1 C2 ISEED
INITP VMAX IINERT FINERT INITER
NEIBR NNEIBR
* objective data
OBJNME OBJMETH
* constraint data
CONNME CONMETH UPLIM
(one such line for each constraint function)
Figure 11.2. Variables comprising the PESTPP-PSO control file containing PSO-specific control variables (in estimation mode). @@ -3170,11 +3255,11 @@ OBJNME is a character string and the name of the objective function being minimi CONNME is a character variable that defines the names of the constraints that are to be maintained during optimization (*f**i* in Equation 11.1). Each CONNME must correspond with an observation group in the PEST control file. CONMETH is similar to OBJMETH and determines if a constraint is comprised of a sum of squared residuals (enter a 1), or a general constraint that is treated as is (enter a 2). UPLIM is simply the upper limit applied to that constraint (*b**i* in Equation 11.1). Constraints with a lower limit can be converted to ones with an upper limit by simply multiplying the constraint value and its associated lower limit value by a -1. -###
11.2.3. Pareto mode +### 11.2.3. Pareto mode The algorithm employed in *pareto* mode (i.e., multi-objective optimization) is fundamentally based upon the basic form of PSO (Equation 11.3); however, the conceptualization and logical aspects of its operation are relatively complex, and the reader is referred to *Siade et al*, (2019) for these technical details. The PESTPP-PSO specs file for MOPSO is the same as that for standard PSO, with some minor modifications, -
* control data
RSTPSO NOBJGP NCON NFORG VERBOSE
NPOP C1 C2 ISEED
INITP VMAX IINERT FINERT INITER
NREP REPMODE RFIT RRAMP
* pareto groups
PTONME PTOLIM
(one such line for each pareto group)
* objective data
OBJNME OBJMETH PTOGPNME PTOW
(one such line for each objective function)
* constraint data
CONNME CONMETH UPLIM
(one such line for each constraint function)
+
* control data
RSTPSO NOBJGP NCON NFORG VERBOSE
NPOP C1 C2 ISEED
INITP VMAX IINERT FINERT INITER
NREP REPMODE RFIT RRAMP
* pareto groups
PTONME PTOLIM
(one such line for each pareto group)
* objective data
OBJNME OBJMETH PTOGPNME PTOW
(one such line for each objective function)
* constraint data
CONNME CONMETH UPLIM
(one such line for each constraint function)
Figure 11.3. Variables comprising the PESTPP-PSO control file containing PSO-specific control variables (in pareto mode). @@ -3204,7 +3289,7 @@ where, *f**j*adj is the adjusted fitness for the *j*-th re This real variable affects how *α* is adjusted at each iteration based on repository size, - (11.6)
+ (11.6)
where, *p*full is the percentage of the repository that is full. When the repository only has three positions, all fitness values are 1, so the value of *α* has no effect. Once the repository size becomes four or greater, the value for *α* begins to increase as a function of how full the repository is. The base value for *α* is 1.0, and then increases toward RFIT until the repository is full, in which case *α* equals RFIT. The value for RRAMP affects how quickly RFIT is reached. RRAMP cannot be 0.0; however, values close to 0.0 will yield an approximately linear increase in *α*. Negative values for RRAMP will cause *α* to approach RFIT more quickly, and the opposite applies to positive values. If you wish to have a constant value for *α* simply set RRAMP to a very large negative number, such as -5.0E+02; this will cause RFIT to be reached immediately. The converse is true for large positive values, i.e., *α* will remain at 1.0 and then suddenly jump to RFIT when the repository is full. The absolute value for RRAMP should not exceed 5.0E+02. *PTONME* and *PTOLIM* @@ -3220,41 +3305,40 @@ When using PESTPP-PSO, in either *estimation* or *pareto* mode, the initial swar To supply PESTPP-PSO with user-defined set of initial swarm positions, the user must supply a value of 2 for the control variable INITP (see Section 11.2.2), along with the path to an external text file containing these initial values. An example of a PSO control file for doing this is shown in Figure 11.4 (this is taken from the *Kursawe* (1991) benchmark problem), -
* control data
0 2 0 10 2
100 1.00E+00 1.00E+00 171
2 9.00E-01 4.00E-01 4.00E-01 1
lhs-initial-swarm.txt
100 2 5.0 3.0
0
2 -1
* pareto groups
obj01 0.00E+00
obj02 1.00E+03
* objective data
objfun01 2 obj01 1.00
objfun02 2 obj02 1.00
+
* control data
0 2 0 10 2
100 1.00E+00 1.00E+00 171
2 9.00E-01 4.00E-01 4.00E-01 1
lhs-initial-swarm.txt
100 2 5.0 3.0
0
2 -1
* pareto groups
obj01 0.00E+00
obj02 1.00E+03
* objective data
objfun01 2 obj01 1.00
objfun02 2 obj02 1.00
Figure 11.4. An example PSO control file where the initial swarm is set with an external file named *lhs-initial-swarm.txt*. The number of parameter values listed in the external initial-swarm file must be the same as the swarm size defined through the control variable NPOP. The format for this file is described in Figure 11.5; however, this format is likely to be extended to more flexible formats in the future, e.g., comma-separated-value files. -
NPOP
PARNME PARVAL-1 PARVAL-2 … PARVAL-NPOP
(one such line for each decision variable (or parameter))
+
NPOP
PARNME PARVAL-1 PARVAL-2 … PARVAL-NPOP
(one such line for each decision variable (or parameter))
Figure 11.5. Format of the (optional) initial-swarm external file that the user can use to define the initial swarm of the PSO algorithm, either in estimation or in Pareto modes. The external initial-swarm file can also be used in other ways. For example, if the user simply wishes to execute a large number of model-runs, e.g., from the output of a Monte Carlo algorithm, the user could develop an external initial-swarm file with these realizations listed. Then the user would set NPOP accordingly, along with NOPTMAX set to 0. Another example could be the case where the user wishes to restart the PSO algorithm from some iteration of a previous PSO run. In this case, the user could use the *case.pbs* (*estimation* mode) or the *case.par* (*pareto* mode) output file from a previous PSO run as the external initial-swarm file, as these output files use the same format as described in Figure 11.5. -##
+## 11.2 PESTPP-PSO Output Files -## 11.2 PESTPP-PSO Output Files Output files produced by PESTPP-PSO are listed in the following table. The contents of this table assume that the PEST control file for which PESTPP-PSO is executed is named *case.pst*. Table X.1 PESTPP-PSO output files. Note that each of these output files are updated after each iteration of the respective PSO algorithm. Therefore, if the user wishes to force-quit a simulation, much of the current status of the simulation will be contained in these output files. It is assumed that the name of the PEST control file is *case.pst*. -| File | Contents | -|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *case.rec* | The run record file. This file begins by echoing information provided in the PEST and PSO control files. It then records the history of the optimization process undertaken by PESTPP-PSO. The level of verbose-ness of this file can be controlled with the VERBOSE control variable in the PSO control file. | -| *case.pbs (estimation mode)* | A decision variable (parameter) value file. This file records the *p*-best particle positions at the end of the simulation. This file is updated after each iteration of the respective PSO algorithm and uses the same format as the external initial-swarm file described in Figure 11.5. | -| *case.gbs (estimation mode)* | A decision variable (parameter) file containing the single *g*-best particle position of the swarm at the end of the simulation. This file is updated after each iteration of the PSO algorithm and uses the same format as the external initial-swarm file described in Figure 11.5. | -| *case.obs* | This file contains the simulated observations of the *g*-best particle position (*estimation* mode) or the repository positions (*pareto* mode) at the end of the PSO or MOPSO simulation, respectively. This file is updated after each iteration of the respective PSO algorithm. | -| *case.rst* | The restart file. If PESTPP-PSO is restarted from a previous simulation (i.e., the control variable RSTPSO is set to 1), PESTPP-PSO will look for a file named *case.rst* to gain all the information necessary to restart the respective PSO simulation. | -| *case.rep (Pareto mode)* | This file contains the objective function values corresponding to the repository positions at the end of the simulation; that is, this file contains the Pareto front. This file will be updated at the end of each iteration. Furthermore, additional *case_x.rep* files will also be created at each iteration, where x is the iteration number. This helps the user visualise the convergence of the MOPSO algorithm. | -| *case.par (Pareto mode)* | This file contains the decision variable (or parameter) values associated with the objective function values that comprise the Pareto front (i.e., the weakly Pareto optimal set), at the end of the simulation. This file is also updated after each iteration and uses the same format as the external initial-swarm file described in Figure 11.5. | +| File | Contents | +|------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *case.rec* | The run record file. This file begins by echoing information provided in the PEST and PSO control files. It then records the history of the optimization process undertaken by PESTPP-PSO. The level of verbose-ness of this file can be controlled with the VERBOSE control variable in the PSO control file. | +| *case.pbs (estimation mode)* | A decision variable (parameter) value file. This file records the *p*-best particle positions at the end of the simulation. This file is updated after each iteration of the respective PSO algorithm and uses the same format as the external initial-swarm file described in Figure 11.5. | +| *case.gbs (estimation mode)* | A decision variable (parameter) file containing the single *g*-best particle position of the swarm at the end of the simulation. This file is updated after each iteration of the PSO algorithm and uses the same format as the external initial-swarm file described in Figure 11.5. | +| *case.obs* | This file contains the simulated observations of the *g*-best particle position (*estimation* mode) or the repository positions (*pareto* mode) at the end of the PSO or MOPSO simulation, respectively. This file is updated after each iteration of the respective PSO algorithm. | +| *case.rst* | The restart file. If PESTPP-PSO is restarted from a previous simulation (i.e., the control variable RSTPSO is set to 1), PESTPP-PSO will look for a file named *case.rst* to gain all the information necessary to restart the respective PSO simulation. | +| *case.rep (Pareto mode)* | This file contains the objective function values corresponding to the repository positions at the end of the simulation; that is, this file contains the Pareto front. This file will be updated at the end of each iteration. Furthermore, additional *case\_x.rep* files will also be created at each iteration, where x is the iteration number. This helps the user visualise the convergence of the MOPSO algorithm. | +| *case.par (Pareto mode)* | This file contains the decision variable (or parameter) values associated with the objective function values that comprise the Pareto front (i.e., the weakly Pareto optimal set), at the end of the simulation. This file is also updated after each iteration and uses the same format as the external initial-swarm file described in Figure 11.5. | + +# 12. PESTPP-DA -# -# 12. PESTPP-DA +## 12.1 Introduction -## 12.1 Introduction PESTPP-DA is a generic data assimilation tool. It supports both batch and sequential assimilation. The former being the standard protocol that all tools in the PEST and PEST++ suites support: running the model forward for the entire period of simulation each time a model run is requested. However, sequential estimation is a different beast all together. Usually, sequential estimation requires running the model for a specific period of time, extract observed simulated equivalent, implement data assimilation to update parameters or dynamic states, and restart the forward model to simulate the next period of time. To allow PESTPP-DA to handle a wide range of assimilation schemes, the “assimilation cycle” concept is introduced. A “cycle” is usually a period of time during which specific forcings are applied and/or specific discrete-time observations are available to be assimilated. When data assimilation is employed, the simulation period can be divided into as many cycles as required for the problem. Cycles can be used to represent one-time step; in this case the forward model will be run (advanced in time) for one time step to implement an Ensemble Kalman Filter. In other cases, the entire simulation period can be represented using one cycle to implement an Ensemble Smoother. The archetypal sequential data assimilation algorithm is the ensemble Kalman filter (Evensen 2003). “Assimilation cycles” and assimilation schemes are discussed in detail throughout this chapter. @@ -3262,9 +3346,10 @@ To facilitate PESTPP-DA’s ability to advance cycles, it will normally be neces It is important to note that the solution equations implemented in PESTPP-DA are identical to those implemented in PESTPP-IES: the standard and MDA iterative form (Emerick and Reynolds, 2013) of the Kalman update equations (Evensen, 2003), as well as the iterative ensemble smoother equations of Chen and Oliver (2013). However, PESTPP-DA facilitates arbitrary cycles of assimilation, while PESTPP-IES is batch estimation only. The arbitrary cycle definitions support in PESTPP-DA allow users to define any form of sequential data assimilation from hourly to daily to arbitrary mixtures of hours, days, months, seasons, decades, etc. This flexibility is unique to PESTPP-DA and makes PESTPP-DA a very flexible, thus powerful, data-assimilation tool. In the following section, the theory of data assimilation and terminology used is introduced. -## 12.2 Theory +## 12.2 Theory -### 12.2.1 Background and Basic Equations + +### 12.2.1 Background and Basic Equations Data assimilation is the process of optimally combining uncertain model inputs (parameters and system states) and uncertain observations to estimate model parameters and states as the modelled system evolves in time. The following concepts are used to describe a data assimilation problem: @@ -3288,7 +3373,7 @@ Whereas the solution mechanism and approach are similar, the primary difference As with PESTPP-IES, PESTPP-DA uses the *NOPTMAX* control variable to define the number of iterations to apply the solution equation. And, as with PESTPP-IES, PESTPP-DA uses the *NOPTMAX* values of 0 and -1 to define a “control file parameter value” run (a single model run) and a prior Monte Carlo run, respectively. For the control file parameter value run, PESTPP-DA uses the values of parameters listed in the control file, along with the cycle information to advance through each cycle, evaluating the control file parameters, recording simulated outputs and updating dynamic states. As the name implies, the prior Monte Carlo analysis with PESTPP-DA evaluates the prior parameter ensemble for each cycle, recording the simulated outputs and updating the dynamic states. -### 12.2.2 Schemes for Assimilating Temporal Data +### 12.2.2 Schemes for Assimilating Temporal Data The frequency of assimilating observations depends on the problem and on the need of the practitioner. For example, for weather forecasting atmospheric observations are typically assimilated at high frequency (ref) (in the order of minutes), while groundwater systems, which evolve slowly, might need assimilation frequency in the order of months or years. Sometimes, practitioner might be interested in sequentially assimilating observations for every model simulated time period, or, in other settings, all available historic observations may be assimilated simultaneously. @@ -3298,11 +3383,11 @@ The time cycle might consist of a single model time step, multiple model time st Although EnKF and EnKS schemes assimilate data on multiple time cycles, they differ in the way they restart the model (Ref). EnKF (Figure \*\*\*) restarts the model using the most updated state as initial conditions, while EnKS (Figure \*\*\*) restarts the model from a user defined time point (typically from the simulation beginning). PESTPP-DA implements a highly flexible data assimilation approach using standard template files and instructions files associated with each time cycles. For example, to implement EnKF (Fig \*\*\*), the user designs the structure of template files and instruction files to update model inputs using the most updated dynamic state and restart the model starting from end of last time cycles. Instruction files and templates files are used to define how to restart the model, which allows user to choose a rich combination of EnKF, EnKS, and ES. -### 12.2.2.1 Batch Data Assimilation with PESTPP-DA +### 12.2.2.1 Batch Data Assimilation with PESTPP-DA Batch data assimilation “Ensemble Smoother” with PESTPP-DA is conceptually the same process as used in the (optionally iterative) solution process in PESTPP-IES. This is method is widely used in model calibration. If no cycle information is found in control file, PESTPP-DA will resort to a batch assimilation process by assigning all parameters, template and instruction files a cycle value of -1, and all observations a cycle value of 0; or User can explicitly assign all observations, states, and parameters to the same cycle (cycle = 0). See PESTPP-IES for more information on ensemble-based batch data assimilation. -### 12.2.2.2 Sequential Data Assimilation with PESTPP-DA +### 12.2.2.2 Sequential Data Assimilation with PESTPP-DA The concept of sequential data assimilation can be thought of as applying the solution scheme discretely for each cycle, then advancing to the next cycle. This process is repeated for all cycles. @@ -3314,7 +3399,7 @@ The use of sequential data assimilation has several important implications when Another implication of sequential assimilation within the PESTPP-DA framework is that some template and/or instruction files may only apply to a given cycle or group of cycles. This is in contrast to the standard batch assimilation, where all template and instruction files are used for every model run. This means users need to define cycle information not only for parameter and observation data control file sections, but also for the template and instruction file sections. The need to define cycle information was one of the driving factors behind the development of the version 2 pest control file format. -### 12.2.4 State estimation, parameter estimation and joint state-parameter estimation +### 12.2.4 State estimation, parameter estimation and joint state-parameter estimation In the standard batch assimilation (Ensemble Smoother) scheme (without dynamic states and with a single assimilation cycle), there are no dynamic states to be estimated, only static and dynamic parameters (recall dynamic parameters include quantities like forcings like stress period recharge rates which can still be estimated in a smoother/batch formulation). @@ -3322,36 +3407,36 @@ However, in sequential estimation, where dynamic states are used to advance the Users will have to decided which formulation best suits their problem and setup the control file appropriately. The recommended practice is to include both parameters and dynamic states in the control file and use PEST’s parameter transformation to enable/disable estimation and/or states depending on the desired formulation – marking dynamic states as “fixed” results in treating them as “known” and simply transferring the final simulated conditions to the specified initial conditions when advancing the cycle. Or, alternatively, marking the non-state parameters as “fixed” results in a “state estimation” formulation. -Dynamic states, which are central to the operation of PESTPP-DA, are identified in two ways within the pest interface. First, any parameters and observations in the control file that share the same name are treated as dynamic states. The other method to define dynamic states is to add a column to the external observation data CSV file named “state_par_link”. For each observation in the control file that is a dynamic state, the “state_par_link” column should list which parameter the observation “links” to so that a dynamic state can be handled. +Dynamic states, which are central to the operation of PESTPP-DA, are identified in two ways within the pest interface. First, any parameters and observations in the control file that share the same name are treated as dynamic states. The other method to define dynamic states is to add a column to the external observation data CSV file named “state\_par\_link”. For each observation in the control file that is a dynamic state, the “state\_par\_link” column should list which parameter the observation “links” to so that a dynamic state can be handled. For example, in a groundwater modelling context, let’s assume we want to treat the simulated groundwater levels as dynamic states so that we can apply PESTPP-DA in a sequential data assimilation analysis using annual cycles for a very simple groundwater model – a cross section model with 10 model cells. To allow PESTPP-DA to advance the model over a series of annual assimilation cycles, we need to read the final simulated groundwater levels at the end of each year and also write initial groundwater levels as inputs to the model. First, we would construct an instruction file to read the final simulated groundwater levels for all 10 active model nodes. Second, we will need to construct a template file to write initial groundwaters for all ten active model nodes. We now need to identify the linkage between model output quantities (named in the instruction files) and the model input quantities (named in the template files) so that PESTPP-DA recognizes these as dynamic states, not as more typical “observations” and -“parameters”. To do this, we could take care to use the same naming scheme in both the template and instruction files so that the resulting parameter and observation quantities in the control file have the same names (e.g., “head_cell_1”,”head_cell_2”, etc). Or we could add a column to the external observation data CSV file called “state_par_link” and populate this column with the control file parameter name that corresponds to the current control file observation name. For example, if we named the simulated groundwater level observation for model cell 1 “simulated_gwlev_1” and named the initial groundwater level parameter for model cell 1 “init_gwlev_1”, then on the row of the observation data csv file that corresponds to “simulated_gwlev_1”, you should enter the value “init_gwlev_1” in the “state_par_link” column. Note that PESTPP-DA also allows you to use a mixture of the common-name and “state_par_link” approaches, and for observations that are not dynamic states, you should just leave the “par_state_link” column empty. +“parameters”. To do this, we could take care to use the same naming scheme in both the template and instruction files so that the resulting parameter and observation quantities in the control file have the same names (e.g., “head\_cell\_1”,”head\_cell\_2”, etc). Or we could add a column to the external observation data CSV file called “state\_par\_link” and populate this column with the control file parameter name that corresponds to the current control file observation name. For example, if we named the simulated groundwater level observation for model cell 1 “simulated\_gwlev\_1” and named the initial groundwater level parameter for model cell 1 “init\_gwlev\_1”, then on the row of the observation data csv file that corresponds to “simulated\_gwlev\_1”, you should enter the value “init\_gwlev\_1” in the “state\_par\_link” column. Note that PESTPP-DA also allows you to use a mixture of the common-name and “state\_par\_link” approaches, and for observations that are not dynamic states, you should just leave the “par\_state\_link” column empty. -It is important to note the dynamic states are subject to PEST’s “parameter transformation” and the “parameter bounds” information listed in the parameter data section of the control file. For example, if a dynamic state is assigned a “fixed” transformation status in the parameter data section of the control file, then that dynamic state is not adjusted during the solution process. Rather, it is simply transferred from final simulated value to specified initial value between cycles. Additionally, for dynamic states that are adjusted during the solution process (e.g., transformation status of “none” or “log”), the estimated values for these dynamic states are subject to the parameter upper and lower bounds listed in the control file if the *da_enforce_bounds* argument is true. This is an important consideration and users should carefully consider appropriate and physically reasonable bounds for dynamic states. +It is important to note the dynamic states are subject to PEST’s “parameter transformation” and the “parameter bounds” information listed in the parameter data section of the control file. For example, if a dynamic state is assigned a “fixed” transformation status in the parameter data section of the control file, then that dynamic state is not adjusted during the solution process. Rather, it is simply transferred from final simulated value to specified initial value between cycles. Additionally, for dynamic states that are adjusted during the solution process (e.g., transformation status of “none” or “log”), the estimated values for these dynamic states are subject to the parameter upper and lower bounds listed in the control file if the *da\_enforce\_bounds* argument is true. This is an important consideration and users should carefully consider appropriate and physically reasonable bounds for dynamic states. Below are several example sequential-estimation use cases that PESTPP-DA can be applied to. -Sequential Parameter Estimation – dynamic states are not estimated but must be tracked by pestpp-da so that the model can be evolved forward in time. Users should include parameters for the quantities that are to be estimated as well as parameters for the initial conditions that the model uses initialize its solution process. Observations should be added to record all simulated states and these state observations should be linked to the state parameter quantities above either through sharing the same names and/or through the optional “state_par_link” entry in the observation data section of the control file. The state parameter quantities should be marked as “fixed” in the control file. +Sequential Parameter Estimation – dynamic states are not estimated but must be tracked by pestpp-da so that the model can be evolved forward in time. Users should include parameters for the quantities that are to be estimated as well as parameters for the initial conditions that the model uses initialize its solution process. Observations should be added to record all simulated states and these state observations should be linked to the state parameter quantities above either through sharing the same names and/or through the optional “state\_par\_link” entry in the observation data section of the control file. The state parameter quantities should be marked as “fixed” in the control file. Sequential (joint) Initial-State-Parameter Estimation – the initial dynamic states are estimated along with any parameters (the presence of non-state parameters does not change this formulation). This formulation is identical to the Sequential Parameter Estimation use case except the initial state parameters should not be “fixed”. -Sequential (joint) Final-State-Parameter Estimation – the final dynamic states of each cycle are estimated along with any parameters (the presence of non-state parameters does not change this formulation). Users should add parameters for the initial states of each cycle as well as parameters to represent the final states of each cycle. These final-state parameters are never used in the model directly, they are only included in the control file as parameters to provide a mechanism to estimate the final states of each cycle. The initial state parameters should be marked as fixed, meaning the initial conditions of each cycle are not being estimated. At the completion of each cycle, PESTPP-DA transfers the estimated values of the final-state parameters to the (fixed) initial-state parameters, essentially updating the initial conditions of the next cycle. Users must tell PESTPP-DA how the final-state parameters relate to the initial-state parameters. This is done by supplying a “state_par_link” column in the parameter data section of the control file. For each final-state parameter, the value of “state_par_link” should be the name of the corresponding initial-state parameter. +Sequential (joint) Final-State-Parameter Estimation – the final dynamic states of each cycle are estimated along with any parameters (the presence of non-state parameters does not change this formulation). Users should add parameters for the initial states of each cycle as well as parameters to represent the final states of each cycle. These final-state parameters are never used in the model directly, they are only included in the control file as parameters to provide a mechanism to estimate the final states of each cycle. The initial state parameters should be marked as fixed, meaning the initial conditions of each cycle are not being estimated. At the completion of each cycle, PESTPP-DA transfers the estimated values of the final-state parameters to the (fixed) initial-state parameters, essentially updating the initial conditions of the next cycle. Users must tell PESTPP-DA how the final-state parameters relate to the initial-state parameters. This is done by supplying a “state\_par\_link” column in the parameter data section of the control file. For each final-state parameter, the value of “state\_par\_link” should be the name of the corresponding initial-state parameter. Users are cautioned against using final-state estimation because this formulation may violate the physical processes of the underlying model. For example, the estimated final-state parameter values in a groundwater model would be the groundwater level in every active model cell at the end of each cycle. These estimated water levels, derived from the PESTPP-DA solution process, may not be in harmony with the forcings, properties, or transient character of the simulation. However, in some situations, using estimated final states might be advantageous, depending on the forecast(s) of interest to be made with the model. -You can switch between using estimated and simulated final states by changing the value of *da_use_simulated_states*. If final-state parameters have been included in the control file and you are switching to simulated states, it will be useful to mark the final-states parameters as fixed to reduce the number of quantities to be estimated. +You can switch between using estimated and simulated final states by changing the value of *da\_use\_simulated\_states*. If final-state parameters have been included in the control file and you are switching to simulated states, it will be useful to mark the final-states parameters as fixed to reduce the number of quantities to be estimated. Sequential (joint) Final-and-Initial-State-Parameter Estimation – Estimating the initial and final states of each cycle, along with any parameters. This formulation is setup the same as the Sequential (joint) Final-State-Parameter Estimation except the initial-state parameters are not fixed. Like the Sequential (joint) Final-State-Parameter Estimation, users are cautioned against the formulation because it also may violate the physics of the underlying simulation. -### 12.2.4 Parameter, Observation and Weight Cycle Tables +### 12.2.4 Parameter, Observation and Weight Cycle Tables In sequential assimilation, PESTPP-DA needs to have access to the dynamic states. Invariably, one or more of these states may also correspond to a historic observation. For example, in a groundwater model, at different times and spatial locations during the historic period, groundwater levels may have been measured. Since PESTPP-DA is tracking the final simulated conditions, it, by default, is tracking the simulated equivalent to these observations (if you ignore the need for spatial and temporal interpolation from model nodes to observations and model output times to observations). In this case, users need to supply an observation value and weight for these observations so that they can be used in the solution scheme, in words, so that the information in these historic observations can be assimilated. But *what if the same model node also has observations during other cycles and what if these other observations have differencing quality or should be assigned a unique weight for other reasons?* Users could accommodate this situation by making cycle specific instruction files with unique observation names so that they are listed as unique entries in the control file. However, *this may result in a large number* of nearly identical instruction files to read the same model output locations for multiple cycles, which can make the problem setup overly complicated. -A more straightforward option is to specify the *da_observation_cycle_table* and, optionally, *da_weight_cycle_table*. These two csv files list cycles as rows and observation names as columns. Any observation listed in these tables must be listed in the control file and must be assigned a non-zero weight in the control file. Additionally, any observation listed in either of these two tables must have a cycle of -1 in the control file – the cycle(s) that a table-listed observation quantity applies to is determined from the tables. Not all non-zero weighted observations need to be listed. The observation cycle table allows users to specify the value of an observation for a specific cycle, overriding the observation value in the control, while the weight cycle table allows users to specify the weight value pertaining to the observation for a given cycle, overriding the weight value in the control file. In this way, spatial locations where repeated state measurements have been or are being collected can be represented in the control file as a single observation listed in a single instruction file, but the observed value and associated weighted can be provided in a more transparent and compact way. For cycles where observations values are not recorded, the observation cycle table should have an empty/null entry. \[add a figure that shows this table\]. +A more straightforward option is to specify the *da\_observation\_cycle\_table* and, optionally, *da\_weight\_cycle\_table*. These two csv files list cycles as rows and observation names as columns. Any observation listed in these tables must be listed in the control file and must be assigned a non-zero weight in the control file. Additionally, any observation listed in either of these two tables must have a cycle of -1 in the control file – the cycle(s) that a table-listed observation quantity applies to is determined from the tables. Not all non-zero weighted observations need to be listed. The observation cycle table allows users to specify the value of an observation for a specific cycle, overriding the observation value in the control, while the weight cycle table allows users to specify the weight value pertaining to the observation for a given cycle, overriding the weight value in the control file. In this way, spatial locations where repeated state measurements have been or are being collected can be represented in the control file as a single observation listed in a single instruction file, but the observed value and associated weighted can be provided in a more transparent and compact way. For cycles where observations values are not recorded, the observation cycle table should have an empty/null entry. \[add a figure that shows this table\]. In a similar way, when “fixed” parameters have values that may change across cycles, a “parameter cycle table” can be used. For example, one of the fixed (non-adjustable) parameter quantities listed in the control file may represent the length of the time simulated for each cycle. Rather than constructing a unique template file for each cycle and having a unique time-length parameter for each cycle, users can supply a parameter cycle table that lists the cycle-specific values for any/all fixed parameters – not all fixed parameters need to be listed in the parameter cycle table, only those fixed parameters that have values that change across cycles. The row index of this table should list parameter names and the column header lists the cycle values. Just as with the observation and weight cycle tables, any parameter listed in the parameter cycle table should have a cycle value of -1 because the cycles to which each parameter applies is determined from the column header. If a table-listed parameter does not apply to a given cycle, then that row-column entry in the table should be left blank or empty. Only “fixed” parameters should be listed in the parameter cycle table. -### 12.2.5 Steps for Data Assimilation implementation +### 12.2.5 Steps for Data Assimilation implementation Given a model with a set of ASCII input files, executable file (or a chain of executable files) that represent a forward model run simulating the physical system, a resulting set of output files, and a set of transient observations. The following steps would implement data assimilation: @@ -3371,17 +3456,17 @@ Given a model with a set of ASCII input files, executable file (or a chain of ex 8. Generate an ensemble of error perturbations that will be added to observations. PESTPP-DA can generate this ensemble based on the weight of the observation (see\*\*\*). -9. In sequential data assimilation is desired, identify the model output names for dynamic states. Dynamic states must exist in the parameter data and observation data. Use “state_par_link” to link states in the observation data and state in the parameter data. You may use the same name in both parameter and observation section to flag dynamic states. +9. In sequential data assimilation is desired, identify the model output names for dynamic states. Dynamic states must exist in the parameter data and observation data. Use “state\_par\_link” to link states in the observation data and state in the parameter data. You may use the same name in both parameter and observation section to flag dynamic states. 10. Consider using localization when the number of states/parameters is large and the ensemble size is small (see \*\* for more details). 11. Choose either iterative or MDA solution method. -### 12.2.12 Running PESTPP-DA +### 12.2.12 Running PESTPP-DA PESTPP-DA is run exactly like all other tools in the PEST++ suite – See section 5 of this manual for how to run the tools in the PEST++ suite. As is described in that section, model runs can be undertaken in series or in parallel. In either case, a prematurely terminated PESTPP-DA run can be restarted by supplying the requisite global parameter and observation ensemble files (described below). -To restart at a given cycle number, users can supply the optional *da_hotstart_cycle* argument along with the requisite parameter, observation and noise ensemble files. To ensure a proper restart (especially if realizations have failed during evaluation), users should supply the corresponding files written by PESTPP-DA. +To restart at a given cycle number, users can supply the optional *da\_hotstart\_cycle* argument along with the requisite parameter, observation and noise ensemble files. To ensure a proper restart (especially if realizations have failed during evaluation), users should supply the corresponding files written by PESTPP-DA. The version 2 pest control file format is required to specify cycle numbers. If no cycle numbers are found, then PESTPP-DA proceeds with a batch assimilation (e.g., smoother) assuming all parameters and observations are in cycle 0. @@ -3389,9 +3474,9 @@ As previously discussed, if NOPTMAX is set to zero, PESTPP-DA will run the model One important note related to the PEST control and parallel run management: because the template and/or instruction files may vary depending on the cycle number, the PESTPP-DA master instance passes the cycle number to the agents at run time so that the agents know what parameter and observation control file quantities are being used and which template and instruction files are in use. So, it is important that the agents are populated with a control file that lists the same cycles information as the control file that the master is using. -Given the complexity of cycle-dependent interface (parameters, observations, template files, instruction files), users may wish to “debug” or test this interface by setting *debug_parse_only* equal to “true” in the control file. With this option, PESTPP-DA will process each listed cycle, checking the parameters and observations against the templates and instruction files to ensure they agree. After this checking, PESTPP-DA will exit. +Given the complexity of cycle-dependent interface (parameters, observations, template files, instruction files), users may wish to “debug” or test this interface by setting *debug\_parse\_only* equal to “true” in the control file. With this option, PESTPP-DA will process each listed cycle, checking the parameters and observations against the templates and instruction files to ensure they agree. After this checking, PESTPP-DA will exit. -Note that comprehensive interface checking is also made during the initialization of each PESTPP-DA analysis if NOPTMAX is not equal to zero because it can be quickest to check the cycles first, rather than start the PESTPP-DA process only to discover a cycle is not defined properly. However, for high dimensional problems, this checking can take significant time. Therefore, users can disable this interface checking by setting *check_tplins* to “false” in the control file. +Note that comprehensive interface checking is also made during the initialization of each PESTPP-DA analysis if NOPTMAX is not equal to zero because it can be quickest to check the cycles first, rather than start the PESTPP-DA process only to discover a cycle is not defined properly. However, for high dimensional problems, this checking can take significant time. Therefore, users can disable this interface checking by setting *check\_tplins* to “false” in the control file. The “cycle” values assigned to the various components in the control file can be assigned as integers or as a string. The string is colon-delimited, zero-based, start-stop-stride quantity. For example, if users want to have the same recharge multiplier parameter applied every March of a monthly-based simulation that simulates several years of time, the cycle value for the march recharge multiplier could be specified as “2::12”, which reads “starting with the 3rd cycle and every 12th cycle through the end of the cycles”. If users wanted a parameter to apply every cycle from the 4th to the 15th, the cycle string value would be “3:16”. A few more examples of string-based cycle values: @@ -3403,64 +3488,68 @@ The “cycle” values assigned to the various components in the control file ca In this way, the string-based cycle values allow users to apply sophisticated rules about how parameters and/or observations are used across multiple cycles. -### 12.2.13 Other uses for PESTPP-DA +### 12.2.13 Other uses for PESTPP-DA Although PESTPP-DA is a tool designed for flexible sequential and batch data assimilation, the generalized nature of the cycle concept, in concert with the observation and weight cycle tables, also provides a range of other functionality. In this way, the cycle concept can be thought of as an outer iteration process. For example, users can undertake the advanced “direct predictive hypothesis testing” analysis (e.g., Moore et al., 2010) with PESTPP-DA by constructing a generic weight cycle table where each cycle includes increasing weight on a control file observation quantity that represents a simulated outcome of interest. For example, assume a model has been constructed to simulate surface-water/groundwater exchange (SGE) along an important river reach. Further assume that the simulated SGE along this reach is included in the control file as an observation. To test the hypothesis that the SGE for this reach could be zero, users should set the observation value quantity in the control file to 0.0 and set the weight to 1.0 (this weight will not be used but simply activates this quantity in the PESTPP-DA cycle process). Now users can construct a weight cycle table. Let’s use 10 cycles. For the historic observations that are being assimilated, the entries for all cycles in the weight cycle table for these observations should be identical to the weights in the control file. The entries for the SGE “observation” in the weight cycle table should slow increase from 0.0 in the first cycle to a value large enough to dominate the objective function in the last cycle. Conceptually, during each PESTPP-DA “cycle”, a (iterative) ensemble smoother formulation will be used to minimize the objective function, but as cycles progress, the desire to force the SGE towards zero increasingly features in the objective function. In this way, the compatibility between the fitting the historic observations and the ability to make SGE be zero is directly tested. If the ability to fit the past observations is maintained while also making the simulated SGE zero, then one cannot reject the hypothesis that the SGE could be zero on the basis of compatibility with historic observations. This technique is very similar to “pareto mode” in PEST(\_HP), except here, we can take advantage of the computational efficiency of the iterative ensemble solver in PESTPP-DA. Figure 12.XXX depicts the results of such an analysis -Chart, scatter chart Description automatically generated +Chart, scatter chart Description automatically generated Figure 12.XXX. Results of a direct predictive hypothesis testing analysis where the relation between fitting historic observations and a desire to make surface-water/groundwater exchange (SGE) zero is evaluated. The ensemble-based pareto trade-off between these two quantities shows that simulating an SGE of zero is not compatible with the historic observations. -### 12.2.14 PESTPP-DA Output Files +### 12.2.14 PESTPP-DA Output Files The following table summarizes the contents of files that are recorded by PESTPP-DA when it is asked to undertake highly-parameterized inversion. Most of these have been discussed above. It is assumed that the PEST control file on which the inversion process is based is named *case.pst*. Since the parameters and observations being used can change across cycles, the PESTPP-DA output files for a given cycle may not contain all of the parameters and observations listed in the control file. However, any file tagged with “global” in the name will contain all parameters and observations listed in the control file. -
FileContents
case.recRun record file. This file records a complete history of the inversion process. It is available for user-inspection at any time during that process.
case.rmrParallel run management record file.
case.logperformance record. This file records the times commenced and completed various processing tasks.
case.global.<cycle>.pe.csv
case.global.<cycle>.pe.jcb
The “global” parameter ensemble at the end of cycle <cycle>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.global.<cycle>.oe.csv
case.global.<cycle>.oe.jcb
The “global” simulated output (e.g., observation) ensemble at the end of cycle <cycle>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.global.<cycle>.obs+noise..csv
case.global.<cycle>.obs+noise.jcb
The “global” observations plus noise ensemble at the end of cycle <cycle>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<cycle>.<iter>.par.csv
case.<cycle>.<iter>.par.jcb
The parameter ensemble at the end of cycle <cycle> and iteration <iter>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<cycle>.<iter>.obs.csv
case.<cycle>.<iter>.obs.jcb
The simulated output (e.g., observation) ensemble at the end of cycle <cycle> and iteration <iter>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<cycle>.obs+noise.csv
case.<cycle>.obs+noise.jcb
The observations plus noise ensemble at the start of cycle <cycle>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format.
case.<cycle>.<iter>.base.parA pest parameter value file for the “base” realization if present in the ensemble
case.<cycle>.<iter>.base.reiA pest residual value file for the “base” realization if present in the ensemble
case.global.prior.pe.csv
case.gobal.prior.pe.jcb
The global prior parameter ensemble. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.global.phi.actual.csvThe global actual objective function (phi) ensemble csv record. “actual” refers to fact that these objective function values do not rely on the noise realizations.
Case.global.<cycle>.< iter>.pcs.csvThe global parameter change summary for cycle <cycle> after iteration <iter>
+
FileContents
case.recRun record file. This file records a complete history of the inversion process. It is available for user-inspection at any time during that process.
case.rmrParallel run management record file.
case.logperformance record. This file records the times commenced and completed various processing tasks.
case.global.<cycle>.pe.csv
case.global.<cycle>.pe.jcb
The “global” parameter ensemble at the end of cycle <cycle>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.global.<cycle>.oe.csv
case.global.<cycle>.oe.jcb
The “global” simulated output (e.g., observation) ensemble at the end of cycle <cycle>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.global.<cycle>.obs+noise..csv
case.global.<cycle>.obs+noise.jcb
The “global” observations plus noise ensemble at the end of cycle <cycle>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<cycle>.<iter>.par.csv
case.<cycle>.<iter>.par.jcb
The parameter ensemble at the end of cycle <cycle> and iteration <iter>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<cycle>.<iter>.obs.csv
case.<cycle>.<iter>.obs.jcb
The simulated output (e.g., observation) ensemble at the end of cycle <cycle> and iteration <iter>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<cycle>.obs+noise.csv
case.<cycle>.obs+noise.jcb
The observations plus noise ensemble at the start of cycle <cycle>. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format.
case.<cycle>.<iter>.base.parA pest parameter value file for the “base” realization if present in the ensemble
case.<cycle>.<iter>.base.reiA pest residual value file for the “base” realization if present in the ensemble
case.global.prior.pe.csv
case.gobal.prior.pe.jcb
The global prior parameter ensemble. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.global.phi.actual.csvThe global actual objective function (phi) ensemble csv record. “actual” refers to fact that these objective function values do not rely on the noise realizations.
Case.global.<cycle>.< iter>.pcs.csvThe global parameter change summary for cycle <cycle> after iteration <iter>
Table 12.1. Files recorded by PESTPP-DA. -##
12.4 Summary of PESTPP-DA Control Variables +## 12.4 Summary of PESTPP-DA Control Variables + -### 12.4.1 General +### 12.4.1 General Like all the tools in the PEST++ suite, PESTPP-DA uses a control file. However, because the sequential assimilation process that is unique to PESTPP-DA, it requires the use of the version 2 control file, and this control file should use external csv files for all sections. These external csv files have a column labelled “cycle” for parameter data, observation data, model input and model output information. TODO: Add use case table images here -### 12.4.2 Control Variables in the PEST Control File +### 12.4.2 Control Variables in the PEST Control File -As has been discussed, PESTPP-DA shares its solution techniques with PESTPP-IES, so, it stands to reason that PESTPP-DA would use many of the same optional control file arguments as PESTPP-IES. For example, both PESTPP-DA and PESTPP-IES use a prior parameter ensemble and users can supply these ensembles through existing files. In PESTPP-IES, this ensemble can optionally be supplied as *ies_parameter_ensemble.* In PESTPP-DA, the corresponding argument is *da_parameter_ensemble*. In fact, all PESTPP-IES arguments are also supported by PESTPP-DA – every single one! And users can have both *ies_parameter_ensemble* and *da_parameter_ensemble* listed and PESTPP-DA will use the “da” argument. However, if only the “ies” argument is suppled, PESTPP-DA will use that value. In this way, users can use the same arguments for both PESTPP-DA and PESTPP-IES. +As has been discussed, PESTPP-DA shares its solution techniques with PESTPP-IES, so, it stands to reason that PESTPP-DA would use many of the same optional control file arguments as PESTPP-IES. For example, both PESTPP-DA and PESTPP-IES use a prior parameter ensemble and users can supply these ensembles through existing files. In PESTPP-IES, this ensemble can optionally be supplied as *ies\_parameter\_ensemble.* In PESTPP-DA, the corresponding argument is *da\_parameter\_ensemble*. In fact, all PESTPP-IES arguments are also supported by PESTPP-DA – every single one! And users can have both *ies\_parameter\_ensemble* and *da\_parameter\_ensemble* listed and PESTPP-DA will use the “da” argument. However, if only the “ies” argument is suppled, PESTPP-DA will use that value. In this way, users can use the same arguments for both PESTPP-DA and PESTPP-IES. There are however, a few PESTPP-DA arguments that only apply to PESTPP-DA, these being the arguments that apply the cycle control process. -### 12.4.3 PEST++ Control Variables +### 12.4.3 PEST++ Control Variables Table 12.XXX lists PEST++ control variables that are specific to only PESTPP-DA; many, many, many other optional control variables that can be used with PESTPP-DA are listed in section 9.4 . All of these are optional. If a variable is not supplied, a default value is employed. The value of the default is presented along with the name of each variable in the table below. Variables are grouped in approximate accordance with their roles. Variables discussed in section 5.3.6 that control parallel run management are not listed in table 12.XXX. -| Variable | Type | Role | -|---------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *da_observation_cycle_table()* | text | The name of a CSV file representing the cycle-specific observation values. The row index should be integer cycle numbers and the column labels (header) should be observation names from the control file – these observation names in the control must be assigned a non-zero weight to activate their use in this table. Missing values should be represented by an empty entry. | -| *da_weight_cycle_table()* | text | The name of a CSV file representing the cycle-specific weight values. The row index should be integer cycle numbers and the column labels (header) should be observation names from the control file – these observation names in the control must be assigned a non-zero weight to activate their use in this table. Missing values should be represented by an empty entry. | -| *da_hostart_cycle()* | integer | The cycle number to start PESTPP-DA assimilation/simulation process. If no parameter and restart observation ensemble files are provided, a prior parameter ensemble is generated and used. If this argument is not supplied, the minimum cycle value found in control file quantities is used. | -| *Da_stop_cycle()* | integer | The cycle number to stop PESTPP-DA on. If not supplied, PESTPP-DA will process all cycles found. | -| *Da_use_simulated_states(true)* | bool | Use the simulated states at the end of each cycle as the initial states for the next cycle. This option should only be set to False for cases that include both initial and final state parameters quantities. As mentioned above, users are caution against setting this option to False. | -| *Da_noptmax_schedule* | string | A two column ascii filename. The columns in this file should be integers can be space, tab, comma delimited. The integers correspond to the cycle number and noptmax value to use. For cycles that are not listed in the is file, the value of noptmax in the control file is used. In this way, users can control how many iterations should be used for each assimilation cycle. | +| Variable | Type | Role | +|------------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *da\_observation\_cycle\_table()* | text | The name of a CSV file representing the cycle-specific observation values. The row index should be integer cycle numbers and the column labels (header) should be observation names from the control file – these observation names in the control must be assigned a non-zero weight to activate their use in this table. Missing values should be represented by an empty entry. | +| *da\_weight\_cycle\_table()* | text | The name of a CSV file representing the cycle-specific weight values. The row index should be integer cycle numbers and the column labels (header) should be observation names from the control file – these observation names in the control must be assigned a non-zero weight to activate their use in this table. Missing values should be represented by an empty entry. | +| *da\_hostart\_cycle()* | integer | The cycle number to start PESTPP-DA assimilation/simulation process. If no parameter and restart observation ensemble files are provided, a prior parameter ensemble is generated and used. If this argument is not supplied, the minimum cycle value found in control file quantities is used. | +| *Da\_stop\_cycle()* | integer | The cycle number to stop PESTPP-DA on. If not supplied, PESTPP-DA will process all cycles found. | +| *Da\_use\_simulated\_states(true)* | bool | Use the simulated states at the end of each cycle as the initial states for the next cycle. This option should only be set to False for cases that include both initial and final state parameters quantities. As mentioned above, users are caution against setting this option to False. | +| *Da\_noptmax\_schedule* | string | A two column ascii filename. The columns in this file should be integers can be space, tab, comma delimited. The integers correspond to the cycle number and noptmax value to use. For cycles that are not listed in the is file, the value of noptmax in the control file is used. In this way, users can control how many iterations should be used for each assimilation cycle. | Table 12.2. PESTPP-DA specific control arguments. PESTPP-DA shares all other control arguments with PESTPP-IES -# 13. PESTPP-MOU +# 13. PESTPP-MOU + + +## 13.1 Introduction -## 13.1 Introduction PESTPP-MOU is a tool for constrained single and multiple objective optimization under uncertainty (CMOU) with evolutionary heuristics. It implements several popular “global” evolutionary optimization algorithms including simulated binary cross over, differential evolution (including self-adaptive differential evolution), and particle swarm optimization. PESTPP-MOU uses the pareto dominance concepts and processes available in NSGA-II and SPEA-2 to seek multidimensional pareto frontiers. More importantly, PESTPP-MOU implements the same “chance” concepts and mechanics as PESTPP-OPT for model output quantities used in the optimization problem, which includes both model-based constraints and model-based objectives. -## 13.2 Theory +## 13.2 Theory -### 13.2.1 Background and Basic Equations + +### 13.2.1 Background and Basic Equations The core concepts related to the use of PESTPP-MOU are: @@ -3498,93 +3587,93 @@ Available generators are: Users are encouraged to google these to find out more about their behavior -### 13.2.2 Evaluating chances in a population-based algorithm +### 13.2.2 Evaluating chances in a population-based algorithm In contrast to PESTPP-OPT, which operates on a single solution, PESTPP-MOU uses a population of decision variable sets, which means there is no single point in decision variable space to evaluate chances, so some decisions must be made about how to evaluate model-based constraints and/or objective uncertainty (and ultimately risk). Similar to PESTPP-OPT, PESTPP-MOU support both FOSM and stack-based chance processes. However, given that the expected use of PESTPP-MOU is in more nonlinear settings that PESTPP-OPT, it is expected that the stack-based chance formulation will be more appropriate. -Stack-based chances can be evaluated in two ways with PESTPP-MOU: at each population individual (*mou_chance_point(all)*) or at a single representative point in decision variable space and then broadcast to each population individual (*mou_chance_point(single*)). Additionally, chances can be reused across generations or evaluated anew each generation via the *opt_reuse_chance* integer argument. Each of these options has implications for rigorousness and computational burden. For example, evaluating the population stack at a single representative point during evaluation of the initial population and then reusing this chance for all remaining generations is the computationally cheapest option: you only have to evaluate the parameter stack once for the entire analysis. On the other extreme, you can evaluate the stack at each population individual for each generation. This results in running the full parameter stacks hundreds if not thousands of times. The choice of how and when to evaluate chances in very problem specific and depends on many factors like nonlinearity of the problem, model run time, and available computational resources. Preliminary testing has indicated that evaluating the stack at each population individual of the initial population and then reusing the stack or subsequent generations might be an appropriate compromise in some settings (how’s that for vague…). +Stack-based chances can be evaluated in two ways with PESTPP-MOU: at each population individual (*mou\_chance\_point(all)*) or at a single representative point in decision variable space and then broadcast to each population individual (*mou\_chance\_point(single*)). Additionally, chances can be reused across generations or evaluated anew each generation via the *opt\_reuse\_chance* integer argument. Each of these options has implications for rigorousness and computational burden. For example, evaluating the population stack at a single representative point during evaluation of the initial population and then reusing this chance for all remaining generations is the computationally cheapest option: you only have to evaluate the parameter stack once for the entire analysis. On the other extreme, you can evaluate the stack at each population individual for each generation. This results in running the full parameter stacks hundreds if not thousands of times. The choice of how and when to evaluate chances in very problem specific and depends on many factors like nonlinearity of the problem, model run time, and available computational resources. Preliminary testing has indicated that evaluating the stack at each population individual of the initial population and then reusing the stack or subsequent generations might be an appropriate compromise in some settings (how’s that for vague…). When chances are reused for generations, the PDFs/CDFs of the constraints/objectives are translated from the points in decision variable space where they were evaluated to the new population individual s in a minimum-Euclidean-distance sense. This assumes that points near each other in decision-variable space yield more similar chance results than points that are distance. The translation of PDFs/CDFs is done by differencing the simulated constraint/objective values between two points, assuming these values represent the mean of the PDFs. -### +### -### 13.2.3 PESTPP-MOU workflow +### 13.2.3 PESTPP-MOU workflow -PESTPP-MOU commences execution by either generating or loading a decision variable population, depending on whether an existing population was supplied. It then evaluates this initial population by running the model once for each population individual. PESTPP-MOU then generates a new population of offspring from the initial population using the specified *mou_generator*. This new population is then evaluated by running it through the model. +PESTPP-MOU commences execution by either generating or loading a decision variable population, depending on whether an existing population was supplied. It then evaluates this initial population by running the model once for each population individual. PESTPP-MOU then generates a new population of offspring from the initial population using the specified *mou\_generator*. This new population is then evaluated by running it through the model. -If chances processes are active (via the *opt_risk* argument) and chances should be evaluated this generation (as indicated by the *opt_reuse_chance* argument), then any chance-related model runs are also evaluated at the same time as the offspring population. +If chances processes are active (via the *opt\_risk* argument) and chances should be evaluated this generation (as indicated by the *opt\_reuse\_chance* argument), then any chance-related model runs are also evaluated at the same time as the offspring population. -Once all requested runs have finished, PESTPP-MOU uses the designated environmental selector (via *mou_env_selector* argument) to keep only the best individual s from the parent and offspring populations, combining these best individuals into the new population to use as the parent population for the next generation. +Once all requested runs have finished, PESTPP-MOU uses the designated environmental selector (via *mou\_env\_selector* argument) to keep only the best individual s from the parent and offspring populations, combining these best individuals into the new population to use as the parent population for the next generation. The selection of the “best” individuals depends on the problem formulation. For single objective problems, the “best” individuals are simply those that have objective function values closer to the requested extrema (minimum or maximum). For multiple objective problems, things are more complicated and the concept of “pareto dominance” is used to sort/rank the population according to each individual’s location in objective function space relative to all other individuals of the population. If constraints are included, things get even more complicated because now we don’t want to include infeasible individuals (individuals who violate constraints) in the population for the next generation unless there are not enough feasible individuals. PESTPP-MOU uses the constrained fast nondominated sorting process of NSGA-II and a variant of the SPEA-II ranking process for constrained multi-objective environmental sorting. -### 13.2.4 Advanced functionality +### 13.2.4 Advanced functionality -PESTPP-MOU implements several advanced functionality elements to increase its capacity as a decision support tool. The first of these is the option to treat “risk” as an objective to be maximized simultaneously with the other objective(s). That is, this option transforms the *OPT_RISK* argument into an objective to be maximized (drive risk towards a value of 1.0). In this way, PESTPP-MOU seeks to map the trade-off between objectives and also risk at the same time. To activate this option, an adjustable parameter named “\_risk\_” (leading and trailing underscores required) must be included in the pest interface and *mou_risk_objective* should be passed as “true”. This functionality can be useful in setting where the risk stance is not known a priori or if the use of the desired risk stance results in largely infeasible solutions. However, it is important to note the treating risk as an objective can increase the complexity and nonlinearity of the optimization solution process, requiring more generations, and in some cases, degrading the quality of the solution. +PESTPP-MOU implements several advanced functionality elements to increase its capacity as a decision support tool. The first of these is the option to treat “risk” as an objective to be maximized simultaneously with the other objective(s). That is, this option transforms the *OPT\_RISK* argument into an objective to be maximized (drive risk towards a value of 1.0). In this way, PESTPP-MOU seeks to map the trade-off between objectives and also risk at the same time. To activate this option, an adjustable parameter named “\_risk\_” (leading and trailing underscores required) must be included in the pest interface and *mou\_risk\_objective* should be passed as “true”. This functionality can be useful in setting where the risk stance is not known a priori or if the use of the desired risk stance results in largely infeasible solutions. However, it is important to note the treating risk as an objective can increase the complexity and nonlinearity of the optimization solution process, requiring more generations, and in some cases, degrading the quality of the solution. -PESTPP-MOU also supports self-adaptive differential evolution, where the differential evolution algorithmic controls (“f” value, cross over rate, and mutation rate) are treated as decision variables. This functionality is activated automatically when decision variables named “\_DE_F”, “\_CR\_”, and/or “\_MR\_” are found in the decision variable set. Users must take care to ensure these algorithmic decision variables are given reasonable ranges. +PESTPP-MOU also supports self-adaptive differential evolution, where the differential evolution algorithmic controls (“f” value, cross over rate, and mutation rate) are treated as decision variables. This functionality is activated automatically when decision variables named “\_DE\_F”, “\_CR\_”, and/or “\_MR\_” are found in the decision variable set. Users must take care to ensure these algorithmic decision variables are given reasonable ranges. -### 13.2.5 Running PESTPP-MOU +### 13.2.5 Running PESTPP-MOU PESTPP-MOU is run exactly like all other tools in the PEST++ suite – See section 5 of this manual for how to run the tools in the PEST++ suite. As is described in that section, model runs can be undertaken in series or in parallel. In either case, a prematurely terminated PESTPP-MOU run can be restarted by supplying the requisite decision variable population file. As previously discussed, if NOPTMAX is set to zero, PESTPP-MOU will run the model once using the values listed in the parameter data section of the control file. Furthermore, if NOPTMAX is set to -1, PESTPP-MOU will evaluate the initial decision variable population, and, optionally and chance runs, record outputs and then quit. -Constraints/objectives are identified in exactly the same way as PESTPP-OPT: via the observation/prior information equation group names. Group names that start with “less_than” are identified as less-than constraints/minimization objectives; group names that start with “greater_than” are identified as greater-than constraints/maximization objectives. The distinction between constraints and objectives is made via the *mou_objectives* argument. Any observation and prior information equation names (not group names) passed via *MOU_OBJECTIVES* are treated as objectives, not constraints. While it may seem tempting to make all constraints objectives, in practice, the algorithm elements encoded in PESTPP-MOU can tolerate up to 5 objectives, and, more realistically, 2-3 objectives may be a better choice. +Constraints/objectives are identified in exactly the same way as PESTPP-OPT: via the observation/prior information equation group names. Group names that start with “less\_than” are identified as less-than constraints/minimization objectives; group names that start with “greater\_than” are identified as greater-than constraints/maximization objectives. The distinction between constraints and objectives is made via the *mou\_objectives* argument. Any observation and prior information equation names (not group names) passed via *MOU\_OBJECTIVES* are treated as objectives, not constraints. While it may seem tempting to make all constraints objectives, in practice, the algorithm elements encoded in PESTPP-MOU can tolerate up to 5 objectives, and, more realistically, 2-3 objectives may be a better choice. -Decision variables are distinguished from parameters through the *opt_dec_var_groups* option which lists parameter groups whose members should be treated as decision variables. If this option is not specified, then all adjustable parameters as treated as decision variables. As with the number of objectives, it is important to point out the global evolutionary optimization methods do not scale to high dimensions; a maximum realistic number of decision variables is likely hundreds. +Decision variables are distinguished from parameters through the *opt\_dec\_var\_groups* option which lists parameter groups whose members should be treated as decision variables. If this option is not specified, then all adjustable parameters as treated as decision variables. As with the number of objectives, it is important to point out the global evolutionary optimization methods do not scale to high dimensions; a maximum realistic number of decision variables is likely hundreds. -### 13.2.6 PESTPP-DA Output Files +### 13.2.6 PESTPP-DA Output Files The following table summarizes the contents of files that are recorded by PESTPP-DA. Most of these have been discussed above. It is assumed that the PEST control file on which the inversion process is based is named *case.pst*. -
FileContents
case.recRun record file. This file records a complete history of the inversion process. It is available for user-inspection at any time during that process.
case.rmrParallel run management record file.
case.logperformance record. This file records the times commenced and completed various processing tasks.
case.pareto.summary.csvA summary of pareto dominant solutions for each generation.
case.chance.obs_pop.csv
case.chance.obs_pop.jcb
The current generation chance shifted simulate outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.chance.dv_pop.csv
case.chance.dv_pop.jcb
The current generation shifted decision-variable population that corresponds with the chance-shifted simulated outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.obs_pop.csv
case.obs_pop.jcb
The current generation raw (unshifted) simulate outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case..dv_pop.csv
case.dv_pop.jcb
The current generation decision-variable population that corresponds with the raw simulated outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<iter>.obs_pop.csv
case.<iter>.obs_pop.jcb
The <iter> generation raw (unshifted) simulate outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<iter>.dv_pop.csv
case.<iter>.dv_pop.jcb
The <iter> generation decision-variable population that corresponds with the raw simulated outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<iter>.chance.obs_pop.csv
case.<iter>.chance.obs_pop.jcb
The <iter> generation chance-shifted simulate outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<iter>.chance.dv_pop.csv
case.<iter>.chance.dv_pop.jcb
The <iter> generation decision-variable population that corresponds with the chance-shifted simulated outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.lineage.csvThe listing of parents used to generate each offspring for each generation
+
FileContents
case.recRun record file. This file records a complete history of the inversion process. It is available for user-inspection at any time during that process.
case.rmrParallel run management record file.
case.logperformance record. This file records the times commenced and completed various processing tasks.
case.pareto.summary.csvA summary of pareto dominant solutions for each generation.
case.chance.obs_pop.csv
case.chance.obs_pop.jcb
The current generation chance shifted simulate outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.chance.dv_pop.csv
case.chance.dv_pop.jcb
The current generation shifted decision-variable population that corresponds with the chance-shifted simulated outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.obs_pop.csv
case.obs_pop.jcb
The current generation raw (unshifted) simulate outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case..dv_pop.csv
case.dv_pop.jcb
The current generation decision-variable population that corresponds with the raw simulated outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<iter>.obs_pop.csv
case.<iter>.obs_pop.jcb
The <iter> generation raw (unshifted) simulate outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<iter>.dv_pop.csv
case.<iter>.dv_pop.jcb
The <iter> generation decision-variable population that corresponds with the raw simulated outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<iter>.chance.obs_pop.csv
case.<iter>.chance.obs_pop.jcb
The <iter> generation chance-shifted simulate outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.<iter>.chance.dv_pop.csv
case.<iter>.chance.dv_pop.jcb
The <iter> generation decision-variable population that corresponds with the chance-shifted simulated outputs. Depending on the value of SAVE_BINARY, the file may be stored in csv format or binary format
case.lineage.csvThe listing of parents used to generate each offspring for each generation
Table 13.1. Files recorded by PESTPP-MOU. -##
+## 13.4 Summary of PESTPP-MOU Control Variables -## 13.4 Summary of PESTPP-MOU Control Variables -### 13.4.1 General +### 13.4.1 General Like all the tools in the PEST++ suite, PESTPP-MOU uses a control file, template files, and instruction files. -### 13.4.2 Control Variables in the PEST Control File +### 13.4.2 Control Variables in the PEST Control File -### 13.4.3 PEST++ Control Variables +### 13.4.3 PEST++ Control Variables Table 12.XXX lists PEST++ control variables that are specific to only PESTPP-MOU; many other optional control variables that can be used with PESTPP-MOU are listed in the PESTPP-OPT section of the manual. All of these are optional. If a variable is not supplied, a default value is employed. The value of the default is presented along with the name of each variable in the table below. Variables discussed in section 5.3.6 that control parallel run management are not listed in table 13.2. -| Variable | Type | Role | -|-------------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| *Mou_population_size(100)* | int | The size of the population to use in PESTPP-MOU. Default is 100 | -| *Mou_generator(DE)* | text | The generator to use. Valid values are “de” (differential evolution), “pso” (particle swarm optimization), “sbx” (simulated binary cross over), and “pm” (polynomial mutation), “simplex” (gradient simplex with increased parallelism). Default is “de” | -| *Mou_population_file()* | text | A file ending in .csv or .jco/.jcb that contains the initial population to use. If not passed, the initial population is generated from a uniform distribution defined by decision variable upper and lower bounds. | -| *Mou_obs_population_restart_file()* | text | File ending in .csv or .jcb/.jco that contains the simulation results to restart PESTPP-MOU with. Requires *mou_population_file* and it is up to the user to make sure these two files are coherent (e.g., from the same generation of a previous PESTPP-MOU analysis) | -| *Mou_objectives()* | text | Comma-separated list of observations and/or prior information equations to use as objectives. These quantities must have a group name that defines the desired extrema to seek (minimize or maximize) using the same rules used to name constraints. | -| *Mou_max_archive_size(500)* | Int | The maximum number of solutions to track in the archive. default is 500. | -| *Mou_risk_obj(false)* | Boolean | Flag to use risk-as-an-objective. Requires an adjustable parameter named “\_risk\_” and chance processes to be active. | -| *Mou_verbose_level(1)* | Int | Level of verbosity for PESTPP-MOU. 1 is minimal, 4 is everything. | -| *Mou_env_selector(NSGA)* | Text | Which environmental selector to use. Values are “nsga” or “spea”. Default is “nsga” | -| *Mou_crossover_prob(0.75)* | Double | The crossover probability used in several generators. Default is 0.75 | -| *Mou_mutation_prob()* | Double | The mutation probability used in several generators. If not passed, then 1.0 over number of decision variables is used. | -| *Mou_mating_selector(tournament)* | text | The mating selector to use. Values can be “tournament” or “random”. Default is “tournament” | -| *Mou_de_f(0.8)* | Double | The differential evolution “f” factor. Default is 0.8 | -| *Mou_save_population_every(-1)* | Int | How often, in generations, to save the population files to disk. Default is -1 – don’t save any generation-specific populations. | -| *Mou_pso_cognitive_const(2.0)* | double | The particle swarm cognitive constant. The default is 2.0 | -| *Mou_pso_omega(0.7)* | Double | The particle swarm omega value. Default is 0.7 | -| *Mou_pso_social_const(2.0)* | Double | The particle swarm social constant. Default is 2.0 | -| *Mou_population_schedule()* | Text | A two column ascii file that defines the size of the population to use for each generation. Generations not listed use *mou_population_size*. This can be useful for finding a group of feasible initial population individuals. | -| *Mou_simplex_reflections(10)* | int | Number of poor performing individuals to reflect. Must be less than the population size minus 1. Default is 10. | -| *Mou_simplex_factors(0.5,.0.7,0.8)* | double | Backtracking points to test along each reflected simplex individual. | -| *Mou_simplex_mutation(false)* | boolean | Flag to add guassian mutation to the reflected simplex individuals. Default is false | +| Variable | Type | Role | +|-----------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| *Mou\_population\_size(100)* | int | The size of the population to use in PESTPP-MOU. Default is 100 | +| *Mou\_generator(DE)* | text | The generator to use. Valid values are “de” (differential evolution), “pso” (particle swarm optimization), “sbx” (simulated binary cross over), and “pm” (polynomial mutation), “simplex” (gradient simplex with increased parallelism). Default is “de” | +| *Mou\_population\_file()* | text | A file ending in .csv or .jco/.jcb that contains the initial population to use. If not passed, the initial population is generated from a uniform distribution defined by decision variable upper and lower bounds. | +| *Mou\_obs\_population\_restart\_file()* | text | File ending in .csv or .jcb/.jco that contains the simulation results to restart PESTPP-MOU with. Requires *mou\_population\_file* and it is up to the user to make sure these two files are coherent (e.g., from the same generation of a previous PESTPP-MOU analysis) | +| *Mou\_objectives()* | text | Comma-separated list of observations and/or prior information equations to use as objectives. These quantities must have a group name that defines the desired extrema to seek (minimize or maximize) using the same rules used to name constraints. | +| *Mou\_max\_archive\_size(500)* | Int | The maximum number of solutions to track in the archive. default is 500. | +| *Mou\_risk\_obj(false)* | Boolean | Flag to use risk-as-an-objective. Requires an adjustable parameter named “\_risk\_” and chance processes to be active. | +| *Mou\_verbose\_level(1)* | Int | Level of verbosity for PESTPP-MOU. 1 is minimal, 4 is everything. | +| *Mou\_env\_selector(NSGA)* | Text | Which environmental selector to use. Values are “nsga” or “spea”. Default is “nsga” | +| *Mou\_crossover\_prob(0.75)* | Double | The crossover probability used in several generators. Default is 0.75 | +| *Mou\_mutation\_prob()* | Double | The mutation probability used in several generators. If not passed, then 1.0 over number of decision variables is used. | +| *Mou\_mating\_selector(tournament)* | text | The mating selector to use. Values can be “tournament” or “random”. Default is “tournament” | +| *Mou\_de\_f(0.8)* | Double | The differential evolution “f” factor. Default is 0.8 | +| *Mou\_save\_population\_every(-1)* | Int | How often, in generations, to save the population files to disk. Default is -1 – don’t save any generation-specific populations. | +| *Mou\_pso\_cognitive\_const(2.0)* | double | The particle swarm cognitive constant. The default is 2.0 | +| *Mou\_pso\_omega(0.7)* | Double | The particle swarm omega value. Default is 0.7 | +| *Mou\_pso\_social\_const(2.0)* | Double | The particle swarm social constant. Default is 2.0 | +| *Mou\_population\_schedule()* | Text | A two column ascii file that defines the size of the population to use for each generation. Generations not listed use *mou\_population\_size*. This can be useful for finding a group of feasible initial population individuals. | +| *Mou\_simplex\_reflections(10)* | int | Number of poor performing individuals to reflect. Must be less than the population size minus 1. Default is 10. | +| *Mou\_simplex\_factors(0.5,.0.7,0.8)* | double | Backtracking points to test along each reflected simplex individual. | +| *Mou\_simplex\_mutation(false)* | boolean | Flag to add guassian mutation to the reflected simplex individuals. Default is false | Table 13.2. PESTPP-MOU specific control arguments. PESTPP-MOU shares many other control arguments with PESTPP-OPT -# 14. References +# 14. References + Ahlfeld, D.P. and Mulligan, A.E., 2000. Optimal Management of Flow in Groundwater Systems. Vol 1. Academic Press. @@ -3682,7 +3771,9 @@ White, J.T., Doherty, J.E. and Hughes, J.D., 2014. Quantifying the predictive co Zhang, J., Lin, G., Li, W., Wu, L., & Zeng, L. (2018). An iterative local updating ensemble smoother for estimation and uncertainty assessment of hydrologic model parameters with multimodal distributions. *Water Resources Research*, 54, 1716– 1733. -**Appendix** +# Appendix A. PEST Control File Specifications + + This appendix provides specifications for a PEST control file. Variables are recognized by their position in the file. They must be placed on the correct line of this file and separated from their neighbours by at least one space. @@ -3697,13 +3788,13 @@ PEST, BEOPEST and many of the PEST-support utility programs which are documented Lines that begin with “++” are used for the insertion of variables which control the operation of the PEST++ suite of programs. -Comments can be placed on their own line. Alternatively, they can be placed at the end of a line which provides PEST control data. In either case, a comment follows a “#” character. Note, however, that this character is not construed as denoting the presence of an ensuing comment under any of the following circumstances: +Comments can be placed on their own line. Alternatively, they can be placed at the end of a line which provides PEST control data. In either case, a comment follows a “\#” character. Note, however, that this character is not construed as denoting the presence of an ensuing comment under any of the following circumstances: - it is not preceded by a space, tab or the beginning of a line; - it is part of a string that is enclosed in quotes. -These exceptions preclude mis-construing the presence of the “#” character in a filename as signifying the start of a comment. +These exceptions preclude mis-construing the presence of the “\#” character in a filename as signifying the start of a comment. Some of the older utilities that are documented in part II of the PEST manual do not tolerate the presence of blank lines, “++” lines or comments. All of these items can be removed from a PEST control file using the PSTCLEAN utility supplied with the PEST suite. @@ -3747,7 +3838,7 @@ EIGWRITE LSQRMODE -LSQR_ATOL LSQR_BTOL LSQR_CONLIM LSQR_ITNLIM +LSQR\_ATOL LSQR\_BTOL LSQR\_CONLIM LSQR\_ITNLIM LSQRWRITE @@ -3765,7 +3856,7 @@ BASEPESTFILE BASEJACFILE -SVDA_MULBPA SVDA_SCALADJ SVDA_EXTSUPER SVDA_SUPDERCALC SVDA_PAR_EXCL +SVDA\_MULBPA SVDA\_SCALADJ SVDA\_EXTSUPER SVDA\_SUPDERCALC SVDA\_PAR\_EXCL \* parameter groups @@ -3845,19 +3936,19 @@ WFFAC WFTOL IREGADJ \[NOPTREGADJ REGWEIGHTRAT \[REGSINGTHRESH\]\] \* pareto -PARETO_OBSGROUP +PARETO\_OBSGROUP -PARETO_WTFAC_START PARETO_WTFAC_FIN NUM_WTFAC_INC +PARETO\_WTFAC\_START PARETO\_WTFAC\_FIN NUM\_WTFAC\_INC -NUM_ITER_START NUM_ITER_GEN NUM_ITER_FIN +NUM\_ITER\_START NUM\_ITER\_GEN NUM\_ITER\_FIN -ALT_TERM +ALT\_TERM -OBS_TERM ABOVE_OR_BELOW OBS_THRESH NUM_ITER_THRESH (*only if ALT_TERM is non-zero*) +OBS\_TERM ABOVE\_OR\_BELOW OBS\_THRESH NUM\_ITER\_THRESH (*only if ALT\_TERM is non-zero*) -NOBS_REPORT +NOBS\_REPORT -OBS_REPORT_1 OBS_REPORT_2 OBS_REPORT_3.. (*NOBS_REPORT items*) +OBS\_REPORT\_1 OBS\_REPORT\_2 OBS\_REPORT\_3.. (*NOBS\_REPORT items*) Table A1.1 Variables in the “control data” section of the PEST control file. @@ -3878,7 +3969,7 @@ Table A1.1 Variables in the “control data” section of the PEST control file. | NUMCOM | integer | greater than zero | number of command lines used to run model | | JACFILE | integer | 0, 1 or -1 | indicates whether model provides external derivatives file | | MESSFILE | integer | zero or one | indicates whether PEST writes PEST-to-model message file | -| OBSREREF | text | “obsreref”, “obsreref_N” or “noobsreref” | activates or de-activates observation re-referencing (with an optional pause after re-referencing runs) | +| OBSREREF | text | “obsreref”, “obsreref\_N” or “noobsreref” | activates or de-activates observation re-referencing (with an optional pause after re-referencing runs) | | RLAMBDA1 | real | zero or greater | initial Marquardt lambda | | RLAMFAC | real | positive or negative, but not zero | dictates Marquardt lambda adjustment process | | PHIRATSUF | real | between zero and one | fractional objective function sufficient for end of current iteration | @@ -3958,38 +4049,38 @@ Table A1.5 Variables in the optional “LSQR” section of the PEST control file | Variable | Type | Values | Description | |--------------|----------|-------------------|--------------------------------------------| | LSQRMODE | integer | zero or one | activates LSQR solution of inverse problem | -| LSQR_ATOL | real | zero or greater | LSQR algorithm *atol* variable | -| LSQR_BTOL | real | zero or greater | LSQR algorithm *btol* variable | -| LSQR_CONLIM | real | zero or greater | LSQR algorithm *conlim* variable | -| LSQR_ITNLIM | integer | greater than zero | LSQR algorithm *itnlim* variable | -| LSQR_WRITE | integer | zero or one | instructs PEST to write LSQR file | +| LSQR\_ATOL | real | zero or greater | LSQR algorithm *atol* variable | +| LSQR\_BTOL | real | zero or greater | LSQR algorithm *btol* variable | +| LSQR\_CONLIM | real | zero or greater | LSQR algorithm *conlim* variable | +| LSQR\_ITNLIM | integer | greater than zero | LSQR algorithm *itnlim* variable | +| LSQR\_WRITE | integer | zero or one | instructs PEST to write LSQR file | Table A1.6 Variables in the optional “SVD-assist” section of the PEST control file. -| Variable | Type | Values | Description | -|-----------------|----------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| BASEPESTFILE | text | a filename | name of base PEST control file | -| BASEJACFILE | text | a filename | name of base PEST Jacobian matrix file | -| SVDA_MULBPA | integer | zero or one | instructs PEST to record multiple BPA files | -| SVDA_SCALADJ | integer | -4 to 4 | sets type of parameter scaling undertaken in super parameter definition | -| SVDA_EXTSUPER | integer | 0, 1, 2, -2, 3 | sets means used to calculate super parameters | -| SVDA_SUPDERCALC | integer | zero or one | instructs PEST to compute super parameter sensitivities from base parameter sensitivities | -| SVDA_PAR_EXCL | integer | 0, 1 or -1 | if set to 1, instructs PEST to compute super parameters on basis only of observation group in base parameter PEST control file to which pareto-adjustable weighting is assigned in super parameter PEST control file. If set to -1 all groups other than this form basis for super parameter definition | +| Variable | Type | Values | Description | +|------------------|----------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| BASEPESTFILE | text | a filename | name of base PEST control file | +| BASEJACFILE | text | a filename | name of base PEST Jacobian matrix file | +| SVDA\_MULBPA | integer | zero or one | instructs PEST to record multiple BPA files | +| SVDA\_SCALADJ | integer | -4 to 4 | sets type of parameter scaling undertaken in super parameter definition | +| SVDA\_EXTSUPER | integer | 0, 1, 2, -2, 3 | sets means used to calculate super parameters | +| SVDA\_SUPDERCALC | integer | zero or one | instructs PEST to compute super parameter sensitivities from base parameter sensitivities | +| SVDA\_PAR\_EXCL | integer | 0, 1 or -1 | if set to 1, instructs PEST to compute super parameters on basis only of observation group in base parameter PEST control file to which pareto-adjustable weighting is assigned in super parameter PEST control file. If set to -1 all groups other than this form basis for super parameter definition | Table A1.7 Variables required for each parameter group in the “parameter groups” section of the PEST control file. -| Variable | Type | Values | Description | -|--------------|----------|-------------------------------------------------------------|---------------------------------------------------------------------------------------| -| PARGPNME | text | 12 characters or less | parameter group name | -| INCTYP | text | “relative”, “absolute”, “rel_to_max” | method by which parameter increments are calculated | -| DERINC | real | greater than zero | absolute or relative parameter increment | -| DERINCLB | real | zero or greater | absolute lower bound of relative parameter increment | -| FORCEN | text | “switch”, “always_2”, “always_3”, “switch_5”, “always_5” | determines whether higher order derivatives calculation is undertaken | -| DERINCMUL | real | greater than zero | derivative increment multiplier when undertaking higher order derivatives calculation | -| DERMTHD | text | “parabolic”, “outside_pts”, “best_fit”, “minvar”, “maxprec” | method of higher order derivatives calculation | -| SPLITTHRESH | real | greater than zero (or zero to deactivate) | slope threshold for split slope analysis | -| SPLITRELDIFF | real | greater than zero | relative slope difference threshold for action | -| SPLITACTION | text | text | “smaller”, “zero” or “previous” | +| Variable | Type | Values | Description | +|--------------|----------|---------------------------------------------------------------|---------------------------------------------------------------------------------------| +| PARGPNME | text | 12 characters or less | parameter group name | +| INCTYP | text | “relative”, “absolute”, “rel\_to\_max” | method by which parameter increments are calculated | +| DERINC | real | greater than zero | absolute or relative parameter increment | +| DERINCLB | real | zero or greater | absolute lower bound of relative parameter increment | +| FORCEN | text | “switch”, “always\_2”, “always\_3”, “switch\_5”, “always\_5” | determines whether higher order derivatives calculation is undertaken | +| DERINCMUL | real | greater than zero | derivative increment multiplier when undertaking higher order derivatives calculation | +| DERMTHD | text | “parabolic”, “outside\_pts”, “best\_fit”, “minvar”, “maxprec” | method of higher order derivatives calculation | +| SPLITTHRESH | real | greater than zero (or zero to deactivate) | slope threshold for split slope analysis | +| SPLITRELDIFF | real | greater than zero | relative slope difference threshold for action | +| SPLITACTION | text | text | “smaller”, “zero” or “previous” | Table A1.8 Variables required for each parameter in the “parameter data” section of the PEST control file. @@ -4105,35 +4196,41 @@ Table A1.17 Variables in the optional “regularization” section of the PEST c Table A1.18 Variables in the optional “pareto” section of the PEST control file. -| Variable | Type | Values | Description | -|--------------------|----------|---------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------| -| PARETO_OBSGROUP | text | 12 characters or less | name of observation group whose weights are subject to multiplication by a variable weight factor | -| PARETO_WTFAC_START | real | zero or greater | initial weight factor for user-specified observation group | -| PARETO_WTFAC_FIN | real | greater than PARETO_WTFAC_START | final weight factor for user-specified observation group | -| NUM_WTFAC_INT | integer | greater than zero | number of weight factor increments to employ in traversing Pareto front | -| NUM_ITER_START | integer | zero or greater | number of optimization iterations to employ when using initial weight factor | -| NUM_ITER_GEN | integer | greater than zero | number of optimization iterations to employ when using any weight factor other than PARETO_WTFAC_START or PARETO_WTFAC_FIN | -| NUM_ITER_FIN | integer | zero or greater | number of optimization iterations to employ when using final weight factor | -| ALT_TERM | integer | zero or one | set to one in order to activate PEST termination determined by value of a specified model output | -| OBS_TERM | text | 20 characters or less | the name of an observation cited in the “observation data” section of the PEST control file whose value will be monitored for possible PEST run termination | -| ABOVE_OR_BELOW | text | “above” or “below” | determines whether the monitored model output must be above or below the threshold to precipitate run termination | -| OBS_THRESH | real | any number | value that monitored model output must exceed or undercut to precipitate model run termination | -| ITER_THRESH | integer | zero or greater | the number of optimization iterations for which the model output threshold must be exceeded or undercut to precipitate run termination | -| NOBS_REPORT | integer | zero or greater | number of model outputs whose values to report | -| OBS_REPORT_N | text | 20 characters or less | the name of the *N*’th observation whose value is reported in the POD and PPD files written by PEST when run in “pareto” mode | - -**Appendix** -**B.1** +| Variable | Type | Values | Description | +|----------------------|----------|-----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------| +| PARETO\_OBSGROUP | text | 12 characters or less | name of observation group whose weights are subject to multiplication by a variable weight factor | +| PARETO\_WTFAC\_START | real | zero or greater | initial weight factor for user-specified observation group | +| PARETO\_WTFAC\_FIN | real | greater than PARETO\_WTFAC\_START | final weight factor for user-specified observation group | +| NUM\_WTFAC\_INT | integer | greater than zero | number of weight factor increments to employ in traversing Pareto front | +| NUM\_ITER\_START | integer | zero or greater | number of optimization iterations to employ when using initial weight factor | +| NUM\_ITER\_GEN | integer | greater than zero | number of optimization iterations to employ when using any weight factor other than PARETO\_WTFAC\_START or PARETO\_WTFAC\_FIN | +| NUM\_ITER\_FIN | integer | zero or greater | number of optimization iterations to employ when using final weight factor | +| ALT\_TERM | integer | zero or one | set to one in order to activate PEST termination determined by value of a specified model output | +| OBS\_TERM | text | 20 characters or less | the name of an observation cited in the “observation data” section of the PEST control file whose value will be monitored for possible PEST run termination | +| ABOVE\_OR\_BELOW | text | “above” or “below” | determines whether the monitored model output must be above or below the threshold to precipitate run termination | +| OBS\_THRESH | real | any number | value that monitored model output must exceed or undercut to precipitate model run termination | +| ITER\_THRESH | integer | zero or greater | the number of optimization iterations for which the model output threshold must be exceeded or undercut to precipitate run termination | +| NOBS\_REPORT | integer | zero or greater | number of model outputs whose values to report | +| OBS\_REPORT\_N | text | 20 characters or less | the name of the *N*’th observation whose value is reported in the POD and PPD files written by PEST when run in “pareto” mode | + +# Appendix B. Some File Formats + + +## B.1 Introduction + + This appendix provides formats for two types of files that are used by a number of programs belonging to the PEST++ suite. These file types are also used by members of the PEST suite. Most of this appendix is reproduced from Part II of the PEST manual. -**B.2** +## B.2 Matrix File + + **B.2.1** A number of programs of the PEST++ suite read and/or write a parameter covariance matrix. Optionally, this matrix can reside in a file that adopts “PEST matrix file” specifications. Programs of the PEST++ suite identify this type of file by an extension of *.cov*; however, this is not part of its specifications. **B.2.2** The specifications of a matrix file are illustrated by example. A PEST-compatible matrix file holding a matrix with three rows and four columns is illustrated in figure B.1. -
3 4 2
3.4423 23.323 2.3232 1.3232
5.4231 3.3124 4.4331 3.4442
7.4233 5.4432 7.5362 8.4232
* row names
apar1
apar2
apar3
* column names
aobs1
aobs2
aobs3
aobs4
+
3 4 2
3.4423 23.323 2.3232 1.3232
5.4231 3.3124 4.4331 3.4442
7.4233 5.4432 7.5362 8.4232
* row names
apar1
apar2
apar3
* column names
aobs1
aobs2
aobs3
aobs4
Figure B.1 An example of a matrix file. @@ -4147,11 +4244,13 @@ For a square matrix ICODE can be set to “1”. This indicates that rows and co A special ICODE value is reserved for diagonal matrices. If NCOL is equal to NROW, then ICODE may be set to “-1”. In this case only the diagonal elements of the matrix need to be presented following the integer header line; these should be listed one to a line as illustrated in figure B.2. Following that should be the string “\* row and column names” (for if ICODE is set to “-1” it is assumed that these are the same), followed by the names themselves. -
5 5 -1
4.5
4.5
2.4
7.53
5.32
* row and column names
par1
par2
par3
par4
par5
+
5 5 -1
4.5
4.5
2.4
7.53
5.32
* row and column names
par1
par2
par3
par4
par5
Figure B.2 A matrix file containing a diagonal matrix. -**B.3** +##
B.3 Uncertainty Files + + **B.3.1** The PEST++ *parcov()* control variable can accept the name of a parameter uncertainty file as its argument. Programs of the PEST++ suite recognize this type of file by an extension of *.unc*. A parameter uncertainty file gives these programs access to a number of covariance matrices that specify prior uncertainties for subsets of parameters. Individual uncertainties can also be provided for parameters that have no statistical correlation with other parameters. @@ -4160,7 +4259,7 @@ Note that specifications provided herein for a parameter uncertainty file differ **B.3.2** Figure B.3 illustrates an uncertainty file. -
~ An example of an uncertainty file
START STANDARD_DEVIATION
std_multiplier 3.0
ro9 1.0
ro10 1.0
ro4 1.0
END STANDARD_DEVIATION
START COVARIANCE_MATRIX
file "mat.dat"
variance_multiplier 1e-2
END COVARIANCE_MATRIX
START COVARIANCE_MATRIX
file "cov.mat"
variance_multiplier 1.0
parameter_list_file “list.dat”
END COVARIANCE_MATRIX
START COVARIANCE_MATRIX
file "cov1.mat"
first_parameter kpp1
last_parameter kpp129
END COVARIANCE_MATRIX
+
~ An example of an uncertainty file
START STANDARD_DEVIATION
std_multiplier 3.0
ro9 1.0
ro10 1.0
ro4 1.0
END STANDARD_DEVIATION
START COVARIANCE_MATRIX
file "mat.dat"
variance_multiplier 1e-2
END COVARIANCE_MATRIX
START COVARIANCE_MATRIX
file "cov.mat"
variance_multiplier 1.0
parameter_list_file “list.dat”
END COVARIANCE_MATRIX
START COVARIANCE_MATRIX
file "cov1.mat"
first_parameter kpp1
last_parameter kpp129
END COVARIANCE_MATRIX
Figure B.3 Example of an uncertainty file. @@ -4178,29 +4277,29 @@ An uncertainty file is subdivided into blocks. Each block implements one of the 1. Parameters cited in an uncertainty file, and the files cited therein, are matched by name to those featured in a PEST control file which defines a particular inverse problem. -2. The uncertainty of an individual element of an overall k vector can be specified only once. Thus, for example, any particular element of a k vector cannot be cited in a STANDARD_DEVIATION block of an uncertainty file if it is also cited in a matrix provided through a COVARIANCE_MATRIX block of the same uncertainty file. +2. The uncertainty of an individual element of an overall k vector can be specified only once. Thus, for example, any particular element of a k vector cannot be cited in a STANDARD\_DEVIATION block of an uncertainty file if it is also cited in a matrix provided through a COVARIANCE\_MATRIX block of the same uncertainty file. 3. If a parameter is log-transformed in the current inverse or uncertainty analysis problem (as specified in the PEST control file which governs that problem), then specifications of variance, covariance or standard deviation provided in an uncertainty file must pertain to the log (to base 10) of the parameter. -Each block of an uncertainty file must begin with a START line and finish with an END line as illustrated in figure B.3; in both cases the type of block must be correctly characterised following the START and END designators. Within each block, data entry must follow the keyword protocol. Thus, each line must comprise a keyword, followed by the value (numerical or text) associated with that keyword. Filenames must be surrounded by quotes if they contain spaces. With one exception (the *std_multiplier* keyword in the STANDARD_DEVIATION block), keywords within a block can be supplied in any order; some can be omitted if desired. Keywords and block names are case insensitive. +Each block of an uncertainty file must begin with a START line and finish with an END line as illustrated in figure B.3; in both cases the type of block must be correctly characterised following the START and END designators. Within each block, data entry must follow the keyword protocol. Thus, each line must comprise a keyword, followed by the value (numerical or text) associated with that keyword. Filenames must be surrounded by quotes if they contain spaces. With one exception (the *std\_multiplier* keyword in the STANDARD\_DEVIATION block), keywords within a block can be supplied in any order; some can be omitted if desired. Keywords and block names are case insensitive. -Blank lines can appear anywhere within an uncertainty file. So too can comment lines; these are recognised through the fact that their first character is “#”. +Blank lines can appear anywhere within an uncertainty file. So too can comment lines; these are recognised through the fact that their first character is “\#”. Each of the blocks appearing in an uncertainty file is now discussed in detail. **B.3.2.1** -In a STANDARD_DEVIATION block, entity names (i.e., individual parameters) are listed one to a line followed by their standard deviations. As stated above, if a parameter is log-transformed in a parameter estimation or uncertainty analysis process, then this standard deviation should pertain to the log (to base 10) of the parameter. Parameters can be supplied in any order. Optionally a *std_multiplier* keyword can be supplied in the STANDARD_DEVIATION block; if so, it must be the first item in the block. All standard deviations supplied on ensuing lines are multiplied by this factor (the default value of which is 1.0). +In a STANDARD\_DEVIATION block, entity names (i.e., individual parameters) are listed one to a line followed by their standard deviations. As stated above, if a parameter is log-transformed in a parameter estimation or uncertainty analysis process, then this standard deviation should pertain to the log (to base 10) of the parameter. Parameters can be supplied in any order. Optionally a *std\_multiplier* keyword can be supplied in the STANDARD\_DEVIATION block; if so, it must be the first item in the block. All standard deviations supplied on ensuing lines are multiplied by this factor (the default value of which is 1.0). -Parameters cited in a STANDARD_DEVIATION block are assumed to be uncorrelated with other parameters/observations. Thus, off-diagonal elements of the covariance matrix corresponding to these items are zero. Pertinent diagonal elements of the covariance matrix are calculated by squaring standard deviations (after multiplication by the *std_multiplier*). +Parameters cited in a STANDARD\_DEVIATION block are assumed to be uncorrelated with other parameters/observations. Thus, off-diagonal elements of the covariance matrix corresponding to these items are zero. Pertinent diagonal elements of the covariance matrix are calculated by squaring standard deviations (after multiplication by the *std\_multiplier*). -If a parameter is featured in a STANDARD_DEVIATION block but is not featured in the PEST control file which defines an inverse, optimization of uncertainty analysis problem, programs of the PEST++ suite ignore it. +If a parameter is featured in a STANDARD\_DEVIATION block but is not featured in the PEST control file which defines an inverse, optimization of uncertainty analysis problem, programs of the PEST++ suite ignore it. **B.3.2.2** -Where a parameter uncertainty file provides one or more covariance matrices, each for a subgroup of k which shows within-group parameter correlation, all of these matrices are collectively included in the larger C(k) covariance matrix, together with variances calculated from parameter standard deviations supplied in one or more STANDARD_DEVIATION blocks which may also be featured in the parameter uncertainty file. Optionally, all elements of a user-supplied covariance matrix provided through a COVARIANCE_MATRIX block can be multiplied by a factor. This factor (for which the default value is 1.0) is supplied following the *variance_multiplier* keyword. +Where a parameter uncertainty file provides one or more covariance matrices, each for a subgroup of k which shows within-group parameter correlation, all of these matrices are collectively included in the larger C(k) covariance matrix, together with variances calculated from parameter standard deviations supplied in one or more STANDARD\_DEVIATION blocks which may also be featured in the parameter uncertainty file. Optionally, all elements of a user-supplied covariance matrix provided through a COVARIANCE\_MATRIX block can be multiplied by a factor. This factor (for which the default value is 1.0) is supplied following the *variance\_multiplier* keyword. -A number of options are available for storage of the matrix housed in the covariance matrix file cited in a COVARIANCE_MATRIX block. Matrix storage may follow the PEST matrix file protocol described in section B.2. If this is the case, then the first line of this file must include 3 integers, the first two of which (specifying the number of rows and columns in the matrix) must be identical. The third integer must be “1” or “-1”. The matrix itself must follow this integer header line. Elements within this matrix must be space-delimited; rows can be wrapped onto consecutive lines, but each new matrix row must start on a new line. This matrix must be followed by a line containing the “\* row and column names” string. Following this must be the names of the parameters to which the matrix pertains. If the matrix file follows this protocol then the COVARIANCE_MATRIX block provided in the parameter uncertainty file must be identical in format to the first of the COVARIANCE_MATRIX blocks shown in figure B.3; it can only feature a *file* keyword and an optional *variance_multiplier* keyword. +A number of options are available for storage of the matrix housed in the covariance matrix file cited in a COVARIANCE\_MATRIX block. Matrix storage may follow the PEST matrix file protocol described in section B.2. If this is the case, then the first line of this file must include 3 integers, the first two of which (specifying the number of rows and columns in the matrix) must be identical. The third integer must be “1” or “-1”. The matrix itself must follow this integer header line. Elements within this matrix must be space-delimited; rows can be wrapped onto consecutive lines, but each new matrix row must start on a new line. This matrix must be followed by a line containing the “\* row and column names” string. Following this must be the names of the parameters to which the matrix pertains. If the matrix file follows this protocol then the COVARIANCE\_MATRIX block provided in the parameter uncertainty file must be identical in format to the first of the COVARIANCE\_MATRIX blocks shown in figure B.3; it can only feature a *file* keyword and an optional *variance\_multiplier* keyword. -The PLPROC parameterization utility supplied with PEST writes matrix files whose format is slightly different from that described in section B.2 of this manual. This format retains a three-integer header. The first two numbers in this header must specify the number of rows and number of columns in the matrix. The third number must be 1 or -1; 1 indicates that the matrix is non-diagonal while -1 specifies a diagonal matrix. The matrix itself follows this header line. However, the matrix is not followed by a list of row and column names; instead, the end of the file coincides with the end of the matrix. In this case the COVARIANCE_MATRIX block must adopt either the second or third protocols illustrated in figure B.3 for linking matrix rows and columns to the names of parameters cited in the “parameter data” section of a PEST control file. For the second option the user provides a file in which parameters are listed one to a line. There must be as many such lines are there are rows/columns in the covariance matrix. Alternatively, the third protocol can be followed. In this case the COVARIANCE_MATRIX block must contain both of the *first parameter* and *last_parameter* keywords. These refer back to the PEST control file which defines the current problem. Parameters within this PEST control file between and including the user-nominated *first parameter* and *last_parameter* parameters are then associated with rows and columns of the covariance matrix, with the ordering of parameters in the matrix file being the same as that in the PEST control file. Naturally, the number of parameters in this implied parameter list must be the same as the number of rows and columns in the covariance matrix. +The PLPROC parameterization utility supplied with PEST writes matrix files whose format is slightly different from that described in section B.2 of this manual. This format retains a three-integer header. The first two numbers in this header must specify the number of rows and number of columns in the matrix. The third number must be 1 or -1; 1 indicates that the matrix is non-diagonal while -1 specifies a diagonal matrix. The matrix itself follows this header line. However, the matrix is not followed by a list of row and column names; instead, the end of the file coincides with the end of the matrix. In this case the COVARIANCE\_MATRIX block must adopt either the second or third protocols illustrated in figure B.3 for linking matrix rows and columns to the names of parameters cited in the “parameter data” section of a PEST control file. For the second option the user provides a file in which parameters are listed one to a line. There must be as many such lines are there are rows/columns in the covariance matrix. Alternatively, the third protocol can be followed. In this case the COVARIANCE\_MATRIX block must contain both of the *first parameter* and *last\_parameter* keywords. These refer back to the PEST control file which defines the current problem. Parameters within this PEST control file between and including the user-nominated *first parameter* and *last\_parameter* parameters are then associated with rows and columns of the covariance matrix, with the ordering of parameters in the matrix file being the same as that in the PEST control file. Naturally, the number of parameters in this implied parameter list must be the same as the number of rows and columns in the covariance matrix. In all of the above cases the *variance multiplier* keyword is optional. If omitted, it is assumed to be 1.0. @@ -4208,20 +4307,22 @@ The following should be noted. 1. A covariance matrix must be positive definite. -2. If the first or second of the above COVARIANCE_MATRIX block protocols is adopted, then the order of rows and columns of the covariance matrix (which corresponds to the order of parameters listed either within the matrix file itself or in a user-supplied parameter list file) is arbitrary. The PEST++ program which reads a parameter uncertainty file will re-arrange matrix rows and columns so that they correspond to the order of adjustable parameters supplied in the PEST control file on which the current inverse or uncertainty analysis problem is based. +2. If the first or second of the above COVARIANCE\_MATRIX block protocols is adopted, then the order of rows and columns of the covariance matrix (which corresponds to the order of parameters listed either within the matrix file itself or in a user-supplied parameter list file) is arbitrary. The PEST++ program which reads a parameter uncertainty file will re-arrange matrix rows and columns so that they correspond to the order of adjustable parameters supplied in the PEST control file on which the current inverse or uncertainty analysis problem is based. -3. A covariance matrix that is cited in a parameter uncertainty file need not describe all of the parameters associated with the current inverse problem, for it need only pertain to a subset of these. Other parameters can be associated with other COVARIANCE_MATRIX blocks and/or can be cited in one or more STANDARD_DEVIATION blocks supplied in the same uncertainty file. However, a covariance matrix must not be associated with any parameters which do NOT pertain to the current inverse or uncertainty analysis problem. +3. A covariance matrix that is cited in a parameter uncertainty file need not describe all of the parameters associated with the current inverse problem, for it need only pertain to a subset of these. Other parameters can be associated with other COVARIANCE\_MATRIX blocks and/or can be cited in one or more STANDARD\_DEVIATION blocks supplied in the same uncertainty file. However, a covariance matrix must not be associated with any parameters which do NOT pertain to the current inverse or uncertainty analysis problem. 4. If a parameter is log-transformed in the PEST control file, the variance and covariances pertaining to that parameter as supplied in a covariance matrix file must in fact pertain to the log of that parameter. -**B.4** +##
B.4 JCO File + + **B.4.1** A JCO file is a binary file. It is used by members of the PEST and PEST++ suites to hold a Jacobian matrix. However, it can hold any matrix in which rows and columns are labelled. When holding a Jacobian matrix, columns pertain to parameters. In accordance with the old PEST protocol, parameter names are at most 12 characters in length. Rows pertain to observations; in accordance with the old PEST protocol, observation names are at most 20 characters in length. **B.4.2** Specifications are shown in Figure B.4.1 -
negncol, nrow 32 bit integers
ncount 32 bit integer
index, value 32 bit integer, 64 bit real
repeat the above ncount times
parname 12 bit character
repeat the above ncol times
obsname 20 bit character
repeat the above nrow times
+
negncol, nrow 32 bit integers
ncount 32 bit integer
index, value 32 bit integer, 64 bit real
repeat the above ncount times
parname 12 bit character
repeat the above ncol times
obsname 20 bit character
repeat the above nrow times
Figure B.4.1. Protocol of a JCO file. @@ -4238,14 +4339,16 @@ Variables cited in figure B.4.1 are as follows: If a value is not provided for a particular matrix element in a JCO file, its value is assumed to be zero. -**B.5** +##
B.5 JCB File + + **B.5.1** A JCB file is a binary file. It is used by members of the PEST++ suite to hold a matrix for which rows and columns are labelled (often members of an ensemble). Row and column names can be up to 200 characters in length. **B.5.2** Specifications are shown in Figure B.5.2. -
ncol, nrow 32 bit integers
ncount 32 bit integer
irow, icol, value 32 bit integer, 32 bit integer, 64 bit real
repeat the above noount times
colname 200 bit character
repeat the above ncol times
rowname 200 bit character
repeat the above nrow times
+
ncol, nrow 32 bit integers
ncount 32 bit integer
irow, icol, value 32 bit integer, 32 bit integer, 64 bit real
repeat the above noount times
colname 200 bit character
repeat the above ncol times
rowname 200 bit character
repeat the above nrow times
Figure B.5.2. Protocol of a JCB file. diff --git a/documentation/tocgen.py b/documentation/tocgen.py index 47dec1b98..b9488dc70 100644 --- a/documentation/tocgen.py +++ b/documentation/tocgen.py @@ -1,3 +1,4 @@ +import sys import os def processFile(inFile, outFile): @@ -156,7 +157,7 @@ def clean_4_toc(docx_file,inFile,outFile,run_pandoc=True): if __name__ == "__main__": - clean_4_toc("pestpp_users_guide_v5.1.6.docx","file.md","temp.md",True) + clean_4_toc(sys.argv[1],"file.md","temp.md",True) processFile("temp.md","pestpp_users_manual.md") diff --git a/src/libs/common/config_os.h b/src/libs/common/config_os.h index aefe46682..4f30f6895 100644 --- a/src/libs/common/config_os.h +++ b/src/libs/common/config_os.h @@ -2,7 +2,7 @@ #define CONFIG_OS_H_ -#define PESTPP_VERSION "5.1.6"; +#define PESTPP_VERSION "5.1.9"; #if defined(_WIN32) || defined(_WIN64) #define OS_WIN diff --git a/src/libs/pestpp_common/Pest.cpp b/src/libs/pestpp_common/Pest.cpp index 216ede397..f21242379 100644 --- a/src/libs/pestpp_common/Pest.cpp +++ b/src/libs/pestpp_common/Pest.cpp @@ -38,6 +38,7 @@ #include "QSqrtMatrix.h" #include #include "network_package.h" +#include using namespace::std; @@ -1329,11 +1330,16 @@ int Pest::process_ctl_file(ifstream& fin, string _pst_filename, ofstream& f_rec) } - string tcol; + string tcol,pcol; if (cnames.find("PARTRANS") != cnames.end()) tcol = "PARTRANS"; else tcol = "TRANSFORM"; + if (cnames.find("PARNME") != cnames.end()) + pcol = "PARNME"; + else + pcol = "NAME"; + vector partrans = efile.get_col_string_vector(tcol); set s_partrans(partrans.begin(), partrans.end()); if (s_partrans.find("TIED") != s_partrans.end()) @@ -1353,8 +1359,8 @@ int Pest::process_ctl_file(ifstream& fin, string _pst_filename, ofstream& f_rec) //save any tied pars for processing later bc the par its tied to //might not have been processed yet. row_map = efile.get_row_map(ro); - if (row_map["PARTRANS"] == "TIED") - temp_tied_map[row_map["PARNME"]] = row_map["PARTIED"]; + if (row_map.at(tcol) == "TIED") + temp_tied_map[row_map.at(pcol)] = row_map.at("PARTIED"); } efile.keep_cols(efile_keep_cols); if (efiles_map.find(section) == efiles_map.end()) @@ -1689,11 +1695,17 @@ int Pest::process_ctl_file(ifstream& fin, string _pst_filename, ofstream& f_rec) // handle any tied pars found in external files double numer, demon, ratio; + vector missing; for (auto p: temp_tied_map) { name = p.first; string name_tied = p.second; numer = ctl_parameters[name]; +// if (ctl_parameters.find(name_tied) == ctl_parameters.end()) +// { +// missing.push_back(name_tied); +// continue; +// } demon = ctl_parameters[name_tied]; if (demon == 0.0) { @@ -1713,6 +1725,16 @@ int Pest::process_ctl_file(ifstream& fin, string _pst_filename, ofstream& f_rec) tied_names.insert(name_tied); } + if (missing.size() > 0) + { + ss.str(""); + ss << "Error: the following `partied` parameters were not found in the control file:"; + for (auto& m: missing) + ss << m << ","; + f_rec << ss.str() << endl; + throw runtime_error(ss.str()); + } + //process pestpp options map arg_map, line_arg_map; vector dup; @@ -3155,7 +3177,7 @@ void Pest::tokens_to_par_rec(ofstream &f_rec, const vector& tokens, Tran { float f; convert_ip(tokens[9],f); - pi.dercom = std::round(f); + pi.dercom = (int)std::floor(f + 0.5); } else pi.dercom = 1; diff --git a/src/libs/pestpp_common/SVDSolver.cpp b/src/libs/pestpp_common/SVDSolver.cpp index 2458da6e7..88ae66d01 100644 --- a/src/libs/pestpp_common/SVDSolver.cpp +++ b/src/libs/pestpp_common/SVDSolver.cpp @@ -238,9 +238,19 @@ ModelRun SVDSolver::solve(RunManagerAbstract &run_manager, TerminationController ModelRun prev_run(best_upgrade_run); bool upgrade_start = (restart_controller.get_restart_option() == RestartController::RestartOption::RESUME_UPGRADE_RUNS); best_upgrade_run = iteration_upgrd(run_manager, termination_ctl, prev_run, upgrade_start); + if ((global_iter_num == 2) && (pest_scenario.get_pestpp_options().get_glm_debug_high_2nd_iter_phi())) + { + Observations fake_obs(optimum_run.get_obs()); + for (auto& oname : fake_obs.get_keys()) + { + fake_obs.update_rec(oname,1.0e+10); + } + best_upgrade_run.set_observations(fake_obs); + } // reload best parameters and set flag to switch to central derivatives next iteration double prev_phi = prev_run.get_phi(*regul_scheme_ptr); double best_new_phi = best_upgrade_run.get_phi(*regul_scheme_ptr); + double phi_ratio = best_new_phi / prev_phi; cout << endl << " ...Lambda testing complete for iteration " << termination_ctl.get_iteration_number() + 1 << endl; @@ -288,11 +298,6 @@ ModelRun SVDSolver::solve(RunManagerAbstract &run_manager, TerminationController best_upgrade_run.get_obs(), *(best_upgrade_run.get_obj_func_ptr()), best_upgrade_run.get_ctl_pars()); file_manager.close_file(filename.str()); - // par file for this iteration - output_file_writer.write_par(file_manager.open_ofile_ext("par"), best_upgrade_run.get_ctl_pars(), *(par_transform.get_offset_ptr()), - *(par_transform.get_scale_ptr())); - file_manager.close_file("par"); - filename.str(""); // reset the stringstream filename << global_iter_num << ".par"; output_file_writer.write_par(file_manager.open_ofile_ext(filename.str()), best_upgrade_run.get_ctl_pars(), *(par_transform.get_offset_ptr()), @@ -328,6 +333,10 @@ ModelRun SVDSolver::solve(RunManagerAbstract &run_manager, TerminationController // jacobian calculated next iteration will be at the current parameters and // will be more accurate than the one caluculated at the begining of this iteration save_nextjac = true; + // par file for this iteration + output_file_writer.write_par(file_manager.open_ofile_ext("par"), best_upgrade_run.get_ctl_pars(), *(par_transform.get_offset_ptr()), + *(par_transform.get_scale_ptr())); + file_manager.close_file("par"); } os << endl; iteration_update_and_report(os, prev_run, best_upgrade_run, termination_ctl, run_manager); @@ -1140,21 +1149,24 @@ ModelRun SVDSolver::iteration_upgrd(RunManagerAbstract &run_manager, Termination RestartController::write_upgrade_runs_built(fout_restart); } //instance of a Mat for the jco + Mat j; + LinearAnalysis la(j, pest_scenario, file_manager, *performance_log, parcov, rand_gen_ptr); + pair> fosm_real_info; - Mat j(jacobian.get_sim_obs_names(), jacobian.get_base_numeric_par_names(), - jacobian.get_matrix_ptr()); - if (pest_scenario.get_prior_info_ptr()->get_nnz_pi() > 0) - { - vector pi_names = pest_scenario.get_ctl_ordered_pi_names(); - j.drop_rows(pi_names); - } - LinearAnalysis la(j, pest_scenario, file_manager, *performance_log, parcov, rand_gen_ptr); + if (pest_scenario.get_pestpp_options().get_uncert_flag()) { cout << "-->starting iteration FOSM process..." << endl; performance_log->log_event("LinearAnalysis::glm_iter_fosm"); - + Mat j(jacobian.get_sim_obs_names(), jacobian.get_base_numeric_par_names(), + jacobian.get_matrix_ptr()); + if (pest_scenario.get_prior_info_ptr()->get_nnz_pi() > 0) + { + vector pi_names = pest_scenario.get_ctl_ordered_pi_names(); + j.drop_rows(pi_names); + } + LinearAnalysis la(j, pest_scenario, file_manager, *performance_log, parcov, rand_gen_ptr); try { la.glm_iter_fosm(base_run, output_file_writer, termination_ctl.get_iteration_number(), &run_manager); diff --git a/src/libs/pestpp_common/covariance.cpp b/src/libs/pestpp_common/covariance.cpp index ab3236b44..6b84630b8 100644 --- a/src/libs/pestpp_common/covariance.cpp +++ b/src/libs/pestpp_common/covariance.cpp @@ -1334,7 +1334,7 @@ void Covariance::from_uncertainty_file(const string &filename, vector &o if (find(names.begin(), names.end(), name) != names.end()) throw runtime_error(name + " listed more than once in uncertainty file:" + filename); names.push_back(tokens[0]); - triplet_list.push_back(Eigen::Triplet(irow, jcol, val * std_mlt)); + triplet_list.push_back(Eigen::Triplet(irow, jcol, (val * std_mlt)*(val * std_mlt))); irow++, jcol++; } diff --git a/src/libs/pestpp_common/linear_analysis.cpp b/src/libs/pestpp_common/linear_analysis.cpp index a73f0c08d..cada6608e 100644 --- a/src/libs/pestpp_common/linear_analysis.cpp +++ b/src/libs/pestpp_common/linear_analysis.cpp @@ -1140,7 +1140,8 @@ void LinearAnalysis::write_par_credible_range(ofstream &fout, string sum_filenam pair range; for (auto &pname : ordered_names) { - if (find(jacobian.cn_ptr()->begin(), jacobian.cn_ptr()->end(), pname) == jacobian.cn_ptr()->end()) + //if (find(jacobian.cn_ptr()->begin(), jacobian.cn_ptr()->end(), pname) == jacobian.cn_ptr()->end()) + if (prior_vars.find(pname) == prior_vars.end()) missing.push_back(pname); else { @@ -1149,7 +1150,7 @@ void LinearAnalysis::write_par_credible_range(ofstream &fout, string sum_filenam //if (parinfo.get_parameter_rec_ptr(pname)->tranform_type == ParameterRec::TRAN_TYPE::LOG) // value = log10(value); //range = get_range(value, prior_vars[pname], parinfo.get_parameter_rec_ptr(pname)->tranform_type); - stdev = sqrt(prior_vars[pname]); + stdev = sqrt(prior_vars.at(pname)); fout << setw(20) << pest_utils::lower_cp(pname) << setw(20) << value << setw(20) << stdev << setw(20) << value - (2.0*stdev) << setw(20) << value + (2.0*stdev); @@ -1158,7 +1159,7 @@ void LinearAnalysis::write_par_credible_range(ofstream &fout, string sum_filenam //posterior value = opt_pars.get_rec(pname); - stdev = sqrt(post_vars[pname]); + stdev = sqrt(post_vars.at(pname)); //range = get_range(value, post_vars[pname], parinfo.get_parameter_rec_ptr(pname)->tranform_type); fout << setw(20) << value << setw(20) << stdev << setw(20) << diff --git a/src/libs/pestpp_common/pest_data_structs.cpp b/src/libs/pestpp_common/pest_data_structs.cpp index f33356db4..09cd688a5 100644 --- a/src/libs/pestpp_common/pest_data_structs.cpp +++ b/src/libs/pestpp_common/pest_data_structs.cpp @@ -477,7 +477,7 @@ PestppOptions::ARG_STATUS PestppOptions::assign_value_by_key(string key, const s passed_args.insert("BASE_JACOBIAN"); passed_args.insert("BASE_JACOBIAN_FILENAME"); - //convert_ip(org_value, basejac_filename); + //convert_ip(org_value, basejac_filename);f basejac_filename = org_value; } @@ -579,6 +579,10 @@ PestppOptions::ARG_STATUS PestppOptions::assign_value_by_key(string key, const s { glm_debug_real_fail = pest_utils::parse_string_arg_to_bool(value); } + else if (key == "GLM_DEBUG_HIGH_2ND_ITER_PHI") + { + glm_debug_high_2nd_iter_phi = pest_utils::parse_string_arg_to_bool(value); + } else if (key == "UPGRADE_AUGMENT") { cout << "++UPGRADE_AUGMENT is deprecated and no longer supported...ignoring" << endl; @@ -1565,6 +1569,7 @@ void PestppOptions::summary(ostream& os) const os << "glm_accept_mc_phi: " << glm_accept_mc_phi << endl; os << "glm_rebase_super: " << glm_rebase_super << endl; os << "glm_iter_mc: " << glm_iter_mc << endl; + os << "glm_high_2nd_iter_phi: " << glm_debug_high_2nd_iter_phi << endl; // if (global_opt == OPT_DE) // { @@ -1774,6 +1779,7 @@ void PestppOptions::set_defaults() set_glm_accept_mc_phi(false); set_glm_rebase_super(false); set_glm_iter_mc(false); + set_glm_debug_high_2nd_iter_phi(false); set_prediction_names(vector()); set_parcov_filename(string()); set_obscov_filename(string()); diff --git a/src/libs/pestpp_common/pest_data_structs.h b/src/libs/pestpp_common/pest_data_structs.h index 707d6b270..b1f965d8d 100644 --- a/src/libs/pestpp_common/pest_data_structs.h +++ b/src/libs/pestpp_common/pest_data_structs.h @@ -295,6 +295,9 @@ class PestppOptions { void set_glm_rebase_super(bool _flag) { glm_rebase_super = _flag; } bool get_glm_iter_mc() const { return glm_iter_mc; } void set_glm_iter_mc(bool _flag) { glm_iter_mc = _flag; } + bool get_glm_debug_high_2nd_iter_phi() const {return glm_debug_high_2nd_iter_phi;} + void set_glm_debug_high_2nd_iter_phi(bool _flag) {glm_debug_high_2nd_iter_phi = _flag;} + @@ -638,6 +641,7 @@ class PestppOptions { bool glm_accept_mc_phi; bool glm_rebase_super; bool glm_iter_mc; + bool glm_debug_high_2nd_iter_phi; vector base_lambda_vec; vector lambda_scale_vec; diff --git a/src/programs/pestpp-ies/pestpp-ies.cpp b/src/programs/pestpp-ies/pestpp-ies.cpp index 31f009ad8..c2634b4b6 100644 --- a/src/programs/pestpp-ies/pestpp-ies.cpp +++ b/src/programs/pestpp-ies/pestpp-ies.cpp @@ -35,9 +35,9 @@ using namespace pest_utils; int main(int argc, char* argv[]) { -#ifndef _DEBUG - try { -#endif +//#ifndef _DEBUG +// try { +//#endif string version = PESTPP_VERSION; cout << endl << endl; cout << " pestpp-ies: a GLM iterative ensemble smoother" << endl << endl; @@ -301,20 +301,20 @@ int main(int argc, char* argv[]) fout_rec.close(); return 0; -#ifndef _DEBUG - } - catch (exception &e) - { - cout << "Error condition prevents further execution: " << endl << e.what() << endl; - //cout << "press enter to continue" << endl; - //char buf[256]; - //OperSys::gets_s(buf, sizeof(buf)); - return 1; - } - catch (...) - { - cout << "Error condition prevents further execution" << endl; - return 1; - } -#endif +//#ifndef _DEBUG +// } +// catch (exception &e) +// { +// cout << "Error condition prevents further execution: " << endl << e.what() << endl; +// //cout << "press enter to continue" << endl; +// //char buf[256]; +// //OperSys::gets_s(buf, sizeof(buf)); +// return 1; +// } +// catch (...) +// { +// cout << "Error condition prevents further execution" << endl; +// return 1; +// } +//#endif }