diff --git a/benchmarks/basic_tests.py b/benchmarks/basic_tests.py index fb10f86b..1be7eb11 100644 --- a/benchmarks/basic_tests.py +++ b/benchmarks/basic_tests.py @@ -1485,14 +1485,323 @@ def sweep_bin_test(): #def fail_test(): # raise Exception("fail please") -def invest(): - pst = pyemu.Pst(os.path.join("PESTPPTest","PestPilotPointTest.pst")) + +def tenpar_collapse_invest(): + model_d = "ies_10par_xsec" + pyemu.Ensemble.reseed() + base_d = os.path.join(model_d, "template") + new_d = os.path.join(model_d, "test_template") + if os.path.exists(new_d): + shutil.rmtree(new_d) + shutil.copytree(base_d, new_d) + print(platform.platform().lower()) + pst = pyemu.Pst(os.path.join(new_d, "pest.pst")) + print(pst.model_command) + + obs = pst.observation_data + obs.loc[:,"weight"] = 0.0 + + nzoname = obs.obsnme.str.startswith("h01").index[3] + obs.loc[nzoname,"weight"] = 10.0 + + obs.loc[nzoname,"standard_deviation"] = 0.1 + + pst.parameter_data.loc[:,"partrans"] = "log" + pst.pestpp_options["ies_multimodal_alpha"] = 0.99 + + # set noptmax + num_reals = [10,30,50,100,400,1000] + pst.control_data.noptmax = 10 + # for num_real in num_reals: + + # # wipe all pestpp options + # pst.pestpp_options = {} + # pst.pestpp_options["ies_num_reals"] = num_real + # pst.write(os.path.join(new_d, "pest.pst"),version=2) + + # m_d = os.path.join(model_d,"master_ies_base_{0}reals".format(pst.pestpp_options["ies_num_reals"])) + # if os.path.exists(m_d): + # shutil.rmtree(m_d) + # num_workers = 50 + # if num_real > 500: + # num_workers = 200 + # pyemu.os_utils.start_workers(new_d, exe_path, "pest.pst", num_workers, master_dir=m_d, + # worker_root=model_d,port=port,verbose=True) + + # pst.pestpp_options = {} + # pst.pestpp_options["ies_num_reals"] = 100000 + # pst.control_data.noptmax = -1 + # pst.write(os.path.join(new_d, "pest.pst")) + + # m_d = os.path.join(model_d,"master_ies_base_{0}reals".format(pst.pestpp_options["ies_num_reals"])) + # if os.path.exists(m_d): + # shutil.rmtree(m_d) + # pyemu.os_utils.start_workers(new_d, exe_path, "pest.pst", 200, master_dir=m_d, + # worker_root=model_d,port=port,verbose=True) + + + #pst.observation_data.loc[nzoname,"obsval"] += 8 + pst.observation_data.loc[nzoname,"weight"] = 100.0 + pst.observation_data.loc[nzoname,"obsval"] -= 1.5 + + # for num_real in num_reals: + + # # wipe all pestpp options + # pst.pestpp_options = {} + # pst.pestpp_options["ies_num_reals"] = num_real + # pst.write(os.path.join(new_d, "pest.pst"),version=2) + + # m_d = os.path.join(model_d,"master_ies_corrupt_{0}reals".format(pst.pestpp_options["ies_num_reals"])) + # if os.path.exists(m_d): + # shutil.rmtree(m_d) + # num_workers = 50 + # if num_real > 500: + # num_workers = 200 + # pyemu.os_utils.start_workers(new_d, exe_path, "pest.pst", num_workers, master_dir=m_d, + # worker_root=model_d,port=port,verbose=True) + + pst.pestpp_options = {} + pst.pestpp_options["ies_num_reals"] = 100000 + pst.control_data.noptmax = -1 + pst.write(os.path.join(new_d, "pest.pst"),version=2) + + m_d = os.path.join(model_d,"master_ies_corrupt_{0}reals".format(pst.pestpp_options["ies_num_reals"])) + if os.path.exists(m_d): + shutil.rmtree(m_d) + pyemu.os_utils.start_workers(new_d, exe_path, "pest.pst", 200, master_dir=m_d, + worker_root=model_d,port=port,verbose=True) + +def plot_collapse_invest(): + b_d = "ies_10par_xsec" + m_ds = [os.path.join(b_d,d) for d in os.listdir(b_d) if os.path.isdir(os.path.join(b_d,d)) and d.startswith("master") and d.endswith("reals")] + + pes, oes, phidfs = {},{},{} + names = [] + name_dict = {} + min_phi = 1e300 + for m_d in m_ds: + phidf = pd.read_csv(os.path.join(m_d,"pest.phi.actual.csv")) + if "corrupt" in m_d: + min_phi = min(min_phi,phidf.iloc[:,6:].values.min()) + pst = pyemu.Pst(os.path.join(m_d,"pest.pst")) + p,o = [],[] + for i in [0,phidf.iteration.max()]: + oe = pd.read_csv(os.path.join(m_d,"pest.{0}.obs.csv".format(i)),index_col=0) + oe.index = oe.index.map(str) + pe = pd.read_csv(os.path.join(m_d,"pest.{0}.par.csv".format(i)),index_col=0) + pe.index = pe.index.map(str) + o.append(oe) + p.append(pe) + #if phidf.iteration.max() == 0: + #realphis = phidf.iloc[0,6:] + #realkeep = realphis.loc[realphis < pst.nnz_obs * 1.1] + #print(realkeep.shape) + #print(o[0].index) + #print(p[0].index) + + # p.append(p[0])#.loc[realkeep.index,:].copy()) + + # o.append(o[0])#.loc[realkeep.index,:].copy()) + + if phidf.iteration.max() == 0: + nreals = oe.shape[0] + name = "{0}, {1} realizations".format(m_d.split("_")[-2],nreals) + else: + + name = "{0}, {1} realizations".format(m_d.split("_")[-2],m_d.split("_")[-1].replace("reals","")) + nreals = int(m_d.split("_")[-1].replace("reals","")) + if nreals not in name_dict: + name_dict[nreals] = [] + name_dict[nreals].append(name) + names.append(name) + pes[name] = p + oes[name] = o + phidfs[name] = phidf + + #print(min_phi) + reals = list(name_dict.keys()) + reals.sort() + + # now filter + #thresh = min_phi * 5 + thresh = 10 + corrupt_thresh = min_phi * 1.1 + names = [] + for nreals in reals: + m_ds = name_dict[nreals] + m_ds.sort() + for m_d in m_ds: + p = pes[m_d] + o = oes[m_d] + phidf = phidfs[m_d] + #print(m_d,len(p)) + assert len(p) == 2 + assert len(o) == 2 + ppost = p[-1] + opost = o[-1] + phipost = phidf.iloc[-1,:].copy() + phipost = phipost.iloc[6:] + #print(phipost) + if "corrupt" in m_d: + phipost = phipost.loc[phipost<=corrupt_thresh] + else: + phipost = phipost.loc[phipost<=thresh] + print(m_d,phipost.shape) + pes[m_d][-1] = ppost.loc[phipost.index.values,:].copy() + oes[m_d][-1] = opost.loc[phipost.index.values,:].copy() + names.append(m_d) + + + import matplotlib.pyplot as plt + from matplotlib.backends.backend_pdf import PdfPages + + with PdfPages("collapse_compare_2ax.pdf") as pdf: + for par in pst.adj_par_names: + fig,axes = plt.subplots(2,1,figsize=(8.5,8.5)) + mn,mx = 1e30,-1e30 + for im_d,m_d in enumerate(names): + ax = axes[0] + if "corrupt" in m_d: + ax = axes[1] + + p = pes[m_d] + o = oes[m_d] + #print(m_d,len(p)) + color = plt.cm.jet(np.linspace(0, 1, len(names))) + pp = p[-1] + if "stage" not in par: + pp.loc[:,par] = np.log10(pp.loc[:,par].values) + pp.loc[:,par].plot(ax=ax,kind="hist",color=color[im_d],alpha=0.5,label=m_d,density=True) + mn = min(mn,ax.get_xlim()[0]) + mx = max(mn,ax.get_xlim()[1]) + axes[0].set_title("pure",loc="left") + axes[1].set_title("corrupt",loc="left") + + for ax in axes: + ax.set_xlim(mn,mx) + ax.set_yticks([]) + ax.grid() + ax.legend(loc="upper left") + + plt.tight_layout() + pdf.savefig() + plt.close(fig) + + + with PdfPages("collapse_compare.pdf") as pdf: + for par in pst.adj_par_names: + fig,axes = plt.subplots(len(names),1,figsize=(8.5,8.5)) + mn,mx = 1e30,-1e30 + for m_d,ax in zip(names,axes): + p = pes[m_d] + o = oes[m_d] + #print(m_d,len(p)) + + colors = ["0.5","b"] + labels = ["prior","posterior"] + for pp,oo,c,l in zip(p,o,colors,labels): + if "stage" not in par: + pp.loc[:,par] = np.log10(pp.loc[:,par].values) + pp.loc[:,par].plot(ax=ax,kind="hist",color=c,alpha=0.5,label=l,density=True) + ax.set_title("{0}, {1} {2} posterior realizations".format(par,m_d,pp.shape[0]),loc="left") + ax.set_yticks([]) + mn = min(mn,ax.get_xlim()[0]) + mx = max(mn,ax.get_xlim()[1]) + print(m_d) + + for ax in axes: + ax.set_xlim(mn,mx) + plt.tight_layout() + pdf.savefig() + plt.close(fig) + + for obs in pst.obs_names: + fig,axes = plt.subplots(len(names),1,figsize=(8.5,8.5)) + mn,mx = 1e30,-1e30 + for m_d,ax in zip(names,axes): + p = pes[m_d] + o = oes[m_d] + colors = ["0.5","b"] + labels = ["prior","posterior"] + for pp,oo,c,l in zip(p,o,colors,labels): + oo.loc[:,obs].plot(kind="hist",color=c,alpha=0.5,label=l,ax=ax,density=True) + ax.set_title("{0}, {1}, {2} posterior realizations".format(obs,m_d,oo.shape[0]),loc="left") + mn = min(mn,ax.get_xlim()[0]) + mx = max(mn,ax.get_xlim()[1]) + for ax in axes: + ax.set_xlim(mn,mx) + + plt.tight_layout() + pdf.savefig() + plt.close(fig) + + + + + print(m_ds) + + +def tenpar_uniform_invest(): + model_d = "ies_10par_xsec" + pyemu.Ensemble.reseed() + base_d = os.path.join(model_d, "template") + new_d = os.path.join(model_d, "test_template_geo") + if os.path.exists(new_d): + shutil.rmtree(new_d) + shutil.copytree(base_d, new_d) + print(platform.platform().lower()) + pst = pyemu.Pst(os.path.join(new_d, "pest.pst")) + print(pst.model_command) + obs = pst.observation_data + obs.loc[:,"weight"] = 0.0 + obs.loc["h01_03","weight"] = 1.0 + par = pst.parameter_data + par.loc[:,"partrans"] = "log" + par.loc["stage","partrans"] = "fixed" + v = pyemu.geostats.ExpVario(contribution=1.0,a=10) + gs = pyemu.geostats.GeoStruct(variograms=v) + x = np.cumsum(np.ones(10)) + y = np.ones(10) + print(pst.adj_par_names) + cov = gs.covariance_matrix(x,y,names = pst.adj_par_names).to_dataframe() + cov.loc["stage",:] = 0.0 + cov.loc[:,"stage"] = 0.0 + + cov.loc["stage","stage"] = 0.1 + #import matplotlib.pyplot as plt + #plt.imshow(cov.values) + #plt.show() + par.loc["stage","partrans"] = "none" + + pe = pyemu.ParameterEnsemble.from_gaussian_draw(pst=pst,cov=pyemu.Cov.from_dataframe(cov),num_reals=20) + pe.enforce() + pe.to_csv(os.path.join(new_d,"geoprior.csv")) + pst.pestpp_options["ies_par_en"] = "geoprior.csv" + pst.control_data.noptmax = 10 + pst.write(os.path.join(new_d,"pest.pst")) + pyemu.os_utils.run("pestpp-ies pest.pst",cwd=new_d) + + for i,val in enumerate(np.linspace(1,5,pe.shape[0])): + print(val) + pe.iloc[i,:5] = val + new_d = os.path.join(model_d, "test_template_uniform") + if os.path.exists(new_d): + shutil.rmtree(new_d) + shutil.copytree(base_d, new_d) + pe.to_csv(os.path.join(new_d,"uniprior.csv")) + pst.pestpp_options["ies_par_en"] = "uniprior.csv" + pst.control_data.noptmax = 10 + pst.write(os.path.join(new_d,"pest.pst")) + pyemu.os_utils.run("pestpp-ies pest.pst",cwd=new_d) if __name__ == "__main__": - invest() + tenpar_uniform_invest() + #tenpar_collapse_invest() + #plot_collapse_invest() + #run() #mf6_v5_ies_test() #prep_ends() diff --git a/documentation/pestpp_users_guide_v5.2.13.docx b/documentation/pestpp_users_guide_v5.2.13.docx deleted file mode 100644 index e49b24cd..00000000 Binary files a/documentation/pestpp_users_guide_v5.2.13.docx and /dev/null differ diff --git a/documentation/pestpp_users_manual.md b/documentation/pestpp_users_manual.md index c09da4f4..47fe7dd4 100644 --- a/documentation/pestpp_users_manual.md +++ b/documentation/pestpp_users_manual.md @@ -1,13 +1,13 @@ A close up of a purple sign Description automatically generated -# Version 5.2.13 +# Version 5.2.14 PEST++ Development Team -October 2024 +November 2024 # Acknowledgements @@ -70,7 +70,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI # Table of Contents -- [Version 5.2.13](#s1) +- [Version 5.2.14](#s1) - [Acknowledgements](#s2) - [Preface](#s3) - [License](#s4) @@ -241,7 +241,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI - [9.1.12 Use of observation noise covariance matrices](#s13-1-12) - [9.1.13 Detecting and resolving prior-data conflict](#s13-1-13) - [9.1.14 Multi-modal solution process](#s13-1-14) - - [9.1.15 Mean-Update Iterations](#s13-1-15) + - [9.1.15 Covariance Reinflation](#s13-1-15) - [9.2 Using PESTPP-IES](#s13-2) - [9.2.1 General](#s13-2-1) - [9.2.2 Initial Realizations](#s13-2-2) @@ -3610,15 +3610,15 @@ Closely related to the multimodal solution process is the use of a “weights” Figure 9.2 – A demonstration of the multi-modal solution process using a weight ensemble on the ZDT1 benchmark problem. The standard solution process using single weight vector drives the posterior towards a single point, while the multi-modal upgrade process uses unique weights on each of the two objectives (observations in the control file) such that each realization targets a different point on the trade-off between the two objectives. -### 9.1.15 Mean-Update Iterations +### 9.1.15 Covariance Reinflation In highly nonlinear problems, the gradient between parameters and simulated equivalents to observations can change (drastically) between iterations of PESTPP-IES. This can drive the need for several iterations in order to fully assimilate data. However, during each iteration, due to the solution equations, both the mean and (co)variance of the parameter ensemble can change, with the latter usually decreasing substantially each iteration, meaning that across multiple iterations, the variance of the parameter ensemble reduces, sometime dramatically, this is especially true in highly nonlinear problems where changing gradients can condition different parameters (and/or parameter combinations) each iteration, and in cases where the prior simulated ensemble has a consistent bias compared to observed counterparts. Spurious correlations exacerbate this problem. -To combat the variance reduction that occurs across multiple iterations, it might be desirable in some settings to “reset” the variance of the current parameter ensemble to the prior state; this can also be labelled “reinflation. Mechanically, this is done by subtracting the mean from the prior ensemble (forming the so-called deviations) and then adding the mean from the current parameter ensemble, thereby translating the prior ensemble across parameter space but (more or less) preserving the variance of the prior ensemble. +To combat the variance reduction that occurs across multiple iterations, it might be desirable in some settings to “reset” the variance of the current parameter ensemble to the prior state; this can also be labelled “reinflation. Mechanically, this is done by subtracting the mean from the prior ensemble (forming the so-called deviations or “anomalies”) and then adding the mean from the current parameter ensemble, thereby translating the prior ensemble across parameter space but (more or less) preserving the variance of the prior ensemble. -This option is implemented in PESTPP-IES via the *ies_n_iter_mean* option. As the name implies, the argument is the number of iterations to undertake before resetting the variance to the prior ensemble state. Some other behavioural characteristics of using mean-update iterations: +This option is implemented in PESTPP-IES via the *ies_n_iter_reinflation* option. As the name implies, the argument is the number of iterations to undertake before resetting the variance to the prior ensemble state. Some other behavioural characteristics of using parameter covariance reinflation: -- The phi reduction each iteration may not satisfy the requirements for a “successful” iteration. However, PESTPP-IES will continue iterating anyway and will also undertake partial upgrades of realizations that do show phi improvement; essentially PESTPP-IES will use all of it regular tricks to seek a good fit for iterations less than *ies_n_iter_mean*. +- The phi reduction each iteration may not satisfy the requirements for a “successful” iteration. However, PESTPP-IES will continue iterating anyway and will also undertake partial upgrades of realizations that do show phi improvement; essentially PESTPP-IES will use all of it regular tricks to seek a good fit for iterations less than *ies_n_iter_reinflation*. - The iteration count in PESTPP-IES will be incremented during the reinflation. This is so the results of the reinflation can be tracked in the output files. @@ -3824,7 +3824,7 @@ If users want to have more fine-grained control of the weight adjustment, option ### 9.2.11 Selective Updates -In highly nonlinear settings, some realizations may show an increase in phi across iterations, while the majority of realizations shows decreases – see Figure 9.4 for an example. This indicates that, because of nonlinearity, the optimal parameter ensemble update that is effective at reducing phi for the ensemble as a whole is not ideal for all realizations. To combat this problem, PESTPP-IES will, by default (as of version 5.2.13), only update realizations that meet the phi reduction criteria. This is identical to the partial upgrade process that is triggered automatically of the mean phi of the entire ensemble does not meet the phi reduction criteria. This behaviour is controlled with the *ies_update_by_reals* option, which is true by default. +In highly nonlinear settings, some realizations may show an increase in phi across iterations, while the majority of realizations shows decreases – see Figure 9.4 for an example. This indicates that, because of nonlinearity, the optimal parameter ensemble update that is effective at reducing phi for the ensemble as a whole is not ideal for all realizations. To combat this problem, PESTPP-IES will, by default (as of version 5.2.13), only update realizations that meet the phi reduction criteria. This is identical to the partial upgrade process that is triggered automatically of the mean phi of the entire ensemble does not meet the phi reduction criteria. This behaviour is controlled with the *ies_update_by_reals* option, which is false by default. A graph of a graph Description automatically generated with medium confidence @@ -4159,14 +4159,14 @@ Note also that the number of control variables may change with time. Refer to th A flag to use internal weight balancing for each realization. This option should be used in conjunction with the multi-modal solution process. -ies_n_iter_mean +ies_n_iter_reinflation int -The number of mean-shift iterations to use. Default is 0, which is indicates not to do the prior-mean-shifting process +The number of between covariance re-inflation. Default is 0, which is indicates not re-inflate parameter covariance ies_update_by_reals bool -Flag to indicate whether or not to update each realization according to its phi reduction. +Flag to indicate whether or not to update each realization according to its phi reduction. Default is False. diff --git a/src/libs/common/config_os.h b/src/libs/common/config_os.h index 353586c3..c55ffe16 100644 --- a/src/libs/common/config_os.h +++ b/src/libs/common/config_os.h @@ -2,7 +2,7 @@ #define CONFIG_OS_H_ -#define PESTPP_VERSION "5.2.13"; +#define PESTPP_VERSION "5.2.14"; #if defined(_WIN32) || defined(_WIN64) #define OS_WIN diff --git a/src/libs/pestpp_common/DataAssimilator.cpp b/src/libs/pestpp_common/DataAssimilator.cpp index 23c909cf..bbbf2dd1 100644 --- a/src/libs/pestpp_common/DataAssimilator.cpp +++ b/src/libs/pestpp_common/DataAssimilator.cpp @@ -152,7 +152,7 @@ void DataAssimilator::da_update(int cycle) accept = solve_glm(cycle); report_and_save(cycle); ph.update(oe, pe); - last_best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + last_best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); last_best_std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); ph.report(true); ph.write(iter, run_mgr_ptr->get_total_runs()); diff --git a/src/libs/pestpp_common/EnsembleMethodUtils.cpp b/src/libs/pestpp_common/EnsembleMethodUtils.cpp index 727af4e0..106e87ca 100644 --- a/src/libs/pestpp_common/EnsembleMethodUtils.cpp +++ b/src/libs/pestpp_common/EnsembleMethodUtils.cpp @@ -500,6 +500,37 @@ void EnsembleSolver::update_multimodal_components(const double mm_alpha) { } performance_log->log_event("...sorting composite score"); sortedset fitness_sorted(composite_score.begin(), composite_score.end(), compFunctor); + set dups; + if (fitness_sorted.size() != composite_score.size()) + { + performance_log->log_event("duplicates in composite score...dealing"); + map counts; + for (auto& score : composite_score) + { + if (counts.find(score.second) == counts.end()) + counts[score.second] = 0; + counts[score.second]++; + } + for (auto& score : composite_score) + { + if (counts[score.second] > 1) + { + bool isin = false; + for (auto &ii: fitness_sorted) { + if (ii.first == score.first) { + isin = true; + break; + } + } + + if (!isin) { + dups.insert(score.first); + } + } + + } + + } real_idxs.clear(); oe_real_names_case.clear(); pe_real_names_case.clear(); @@ -518,6 +549,10 @@ void EnsembleSolver::update_multimodal_components(const double mm_alpha) { //cout << real_name << "," << ii->first << "," << ii->second << endl; } + for (auto& dup: dups) + { + real_idxs.push_back(real_map.at(dup)); + } if (real_idxs.size() != subset_size+1) { @@ -2313,10 +2348,23 @@ map L2PhiHandler::get_par_group_contrib(Eigen::VectorXd &phi_vec void L2PhiHandler::update(ObservationEnsemble & oe, ParameterEnsemble & pe) { + ObservationInfo oinfo = pest_scenario->get_ctl_observation_info(); + num_conflict_group.clear(); + for (auto& group : pest_scenario->get_ctl_ordered_obs_group_names()) + { + num_conflict_group[group] = 0; + } + vector in_conflict = detect_simulation_data_conflict(oe,""); + string group; + for (auto& ic : in_conflict) + { + group = oinfo.get_group(ic); + num_conflict_group[group]++; + } //build up obs group and par group idx maps for group reporting obs_group_idx_map.clear(); vector nnz_obs = oe_base->get_var_names(); - ObservationInfo oinfo = pest_scenario->get_ctl_observation_info(); + vector idx; for (auto& og : pest_scenario->get_ctl_ordered_obs_group_names()) obs_group_idx_map[og] = vector(); @@ -2386,10 +2434,22 @@ void L2PhiHandler::update(ObservationEnsemble & oe, ParameterEnsemble & pe) void L2PhiHandler::update(ObservationEnsemble & oe, ParameterEnsemble & pe, ObservationEnsemble& weights) { + ObservationInfo oinfo = pest_scenario->get_ctl_observation_info(); + num_conflict_group.clear(); + for (auto& group : pest_scenario->get_ctl_ordered_obs_group_names()) + { + num_conflict_group[group] = 0; + } + vector in_conflict = detect_simulation_data_conflict(oe,""); + string group; + for (auto& ic : in_conflict) + { + group = oinfo.get_group(ic); + num_conflict_group[group]++; + } //build up obs group and par group idx maps for group reporting obs_group_idx_map.clear(); vector nnz_obs = oe_base->get_var_names(); - ObservationInfo oinfo = pest_scenario->get_ctl_observation_info(); vector idx; for (auto& og : pest_scenario->get_ctl_ordered_obs_group_names()) obs_group_idx_map[og] = vector(); @@ -2551,6 +2611,17 @@ double L2PhiHandler::calc_std(map *phi_map) return sqrt(var / (phi_map->size() - 1)); } +double L2PhiHandler::get_representative_phi(phiType pt) +{ + if (pest_scenario->get_pestpp_options().get_ies_n_iter_mean() < 0) { + return get_min(pt); + } + + else { + return get_mean(pt); + } +} + double L2PhiHandler::get_mean(phiType pt) { //double mean = 0.0; @@ -2645,6 +2716,104 @@ bool cmp_pair(pair& first, pair& second) return first.second > second.second; } +vector L2PhiHandler::detect_simulation_data_conflict(ObservationEnsemble& _oe, string csv_tag) { + vector in_conflict; + + ofstream pdccsv; + if (csv_tag.size() > 0) + pdccsv.open(file_manager->get_base_filename() + csv_tag); + + double smin, smax, omin, omax, smin_stat, smax_stat, omin_stat, omax_stat; + map smap, omap; + vector snames = _oe.get_var_names(); + vector onames = oe_base->get_var_names(); + vector temp = get_lt_obs_names(); + set ineq_lt(temp.begin(), temp.end()); + //set::iterator end = ineq.end(); + temp = get_gt_obs_names(); + set ineq_gt(temp.begin(), temp.end()); + temp.resize(0); + + for (int i = 0; i < snames.size(); i++) { + smap[snames[i]] = i; + } + for (int i = 0; i < onames.size(); i++) { + omap[onames[i]] = i; + } + int sidx, oidx; + bool use_stat_dist = true; + if (pest_scenario->get_pestpp_options().get_ies_pdc_sigma_distance() <= 0.0) + use_stat_dist = false; + + double smn, sstd, omn, ostd, dist; + double sd = abs(pest_scenario->get_pestpp_options().get_ies_pdc_sigma_distance()); + int oe_nr = _oe.shape().first; + int oe_base_nr = oe_base->shape().first; + Eigen::VectorXd t; + + if (csv_tag.size() > 0) + { + pdccsv << "name,obs_mean,obs_std,obs_min,obs_max,obs_stat_min,obs_stat_max,sim_mean,sim_std,sim_min,sim_max,sim_stat_min,sim_stat_max,distance"; + pdccsv << endl; + } + + for (auto oname: pest_scenario->get_ctl_ordered_nz_obs_names()) { + //if (ineq.find(oname) != end) + // continue; + sidx = smap[oname]; + oidx = omap[oname]; + smin = _oe.get_eigen_ptr()->col(sidx).minCoeff(); + omin = oe_base->get_eigen_ptr()->col(oidx).minCoeff(); + smax = _oe.get_eigen_ptr()->col(sidx).maxCoeff(); + omax = oe_base->get_eigen_ptr()->col(oidx).maxCoeff(); + t = _oe.get_eigen_ptr()->col(sidx); + smn = t.mean(); + sstd = std::sqrt((t.array() - smn).square().sum() / (oe_nr - 1)); + smin_stat = smn - (sd * sstd); + smax_stat = smn + (sd * sstd); + t = oe_base->get_eigen_ptr()->col(oidx); + omn = t.mean(); + ostd = std::sqrt((t.array() - omn).square().sum() / (oe_base_nr - 1)); + omin_stat = omn - (sd * ostd); + omax_stat = omn + (sd * ostd); + bool conflicted = false; + if (use_stat_dist) { + if (ineq_lt.find(oname) != ineq_lt.end()) { + if (smin_stat > omax_stat) + conflicted = true; + } else if (ineq_gt.find(oname) != ineq_gt.end()) { + if (smax_stat < omin_stat) + conflicted = true; + } else if ((smin_stat > omax_stat) || (smax_stat < omin_stat)) { + conflicted = true; + } + } else { + if (ineq_lt.find(oname) != ineq_lt.end()) { + if (smin > omax) + conflicted = true; + } else if (ineq_gt.find(oname) != ineq_gt.end()) { + if (smax < omin) + conflicted = true; + } else if ((smin > omax) || (smax < omin)) { + conflicted = true; + } + } + if (conflicted) { + in_conflict.push_back(oname); + if (csv_tag.size() > 0) { + dist = max((smin - omax), (omin - smax)); + + pdccsv << pest_utils::lower_cp(oname) << "," << omn << "," << ostd << "," << omin << "," << omax << "," + << omin_stat << "," + << omax_stat; + pdccsv << "," << smn << "," << sstd << "," << smin << "," << smax << "," << smin_stat << "," + << smax_stat << "," << dist << endl; + } + } + } + pdccsv.close(); + return in_conflict; +} void L2PhiHandler::report_group(bool echo) { @@ -2665,6 +2834,8 @@ void L2PhiHandler::report_group(bool echo) { snzgroups.emplace(oi_ptr->get_group(o)); } + if (snzgroups.size() == 0) + return; double tot = 0, ptot = 0; double v = 0,pv = 0; @@ -2738,7 +2909,7 @@ void L2PhiHandler::report_group(bool echo) { ss << " --- observation group phi summary --- " << endl; ss << " (computed using 'actual' phi)" << endl; ss << " (sorted by mean phi)" << endl; - ss << left << setw(len) << "group" << right << setw(6) << "count" << setw(10) << "mean" << setw(10) << "std"; + ss << left << setw(len) << "group" << right << setw(6) << "count" << setw(11) << "nconflict" << setw(10) << "mean" << setw(10) << "std"; ss << setw(10) << "min" << setw(10) << "max"; ss << setw(10) << "percent" << setw(10) << "std" << endl; //<< setw(10) << "min " << setw(10) << "max " << endl; f << ss.str(); @@ -2771,6 +2942,8 @@ void L2PhiHandler::report_group(bool echo) { ss.str(""); ss << left << setw(len) << pest_utils::lower_cp(g) << " "; ss << right << setw(5) << nzc << " "; + ss << right << setw(10) << num_conflict_group[g] << " "; + ss << right << setw(9) << setprecision(3) << mn_map[g] << " "; ss << setw(9) << setprecision(3) << std_map[g] << " "; ss << setw(9) << setprecision(3) << mmn_map[g] << " "; @@ -3411,6 +3584,7 @@ map L2PhiHandler::calc_regul(ParameterEnsemble & pe) return phi_map; } + vector L2PhiHandler::get_violating_realizations(ObservationEnsemble& oe, const vector& viol_obs_names) { stringstream ss; @@ -3661,6 +3835,7 @@ ParChangeSummarizer::ParChangeSummarizer(ParameterEnsemble *_base_pe_ptr, FileMa void ParChangeSummarizer::summarize(ParameterEnsemble &pe, string filename) { update(pe); + map excess_std_reduction = get_npar_per_group_with_excess_std_reduction(pe); vector grp_names;// = base_pe_ptr->get_pest_scenario().get_ctl_ordered_par_group_names(); vector> pairs; //for (auto m : mean_change) @@ -3675,7 +3850,7 @@ void ParChangeSummarizer::summarize(ParameterEnsemble &pe, string filename) { grp_names.push_back(pairs[i].second); } - int mxlen = 15; + int mxlen = 7; for (auto& g : grp_names) mxlen = max(mxlen, (int)g.size()); @@ -3686,10 +3861,10 @@ void ParChangeSummarizer::summarize(ParameterEnsemble &pe, string filename) cout << ss.str(); frec << ss.str(); ss.str(""); - ss << setw(mxlen) << left << "group" << setw(10) << right << "mean chg" << setw(10) << "std chg"; - ss << setw(10) << "n at ubnd" << setw(10) << "% at ubnd"; - ss << setw(10) << "n at lbnd" << setw(10) << "% at lbnd"; - ss << setw(10) << "init CV" << setw(10) << "curr CV" << setw(10) << endl; + ss << setw(mxlen) << left << "group" << right << setw(6) << "count" << setw(10) << right << "mean chg" << setw(9) << "std chg"; + ss << setw(11) << "n at ubnd" << setw(11) << "% at ubnd"; + ss << setw(11) << "n at lbnd" << setw(11) << "% at lbnd"; + ss << setw(12) << "n std decr" << endl; cout << ss.str(); frec << ss.str(); @@ -3703,16 +3878,18 @@ void ParChangeSummarizer::summarize(ParameterEnsemble &pe, string filename) double std_diff = std_change[grp_name]; ss.str(""); - ss << setw(mxlen) << left << pest_utils::lower_cp(grp_name) << setw(10) << setprecision(4) << right << mean_diff * 100.0; - ss << setw(10) << setprecision(4) << std_diff * 100.0; + ss << setw(mxlen) << left << pest_utils::lower_cp(grp_name) << right << setw(6) << pargp2par_map[grp_name].size(); + ss << setw(10) << setprecision(4) << right << mean_diff * 100.0; + ss << setw(9) << setprecision(4) << std_diff * 100.0; num_out = num_at_ubound[grp_name]; percent_out = percent_at_ubound[grp_name]; - ss << setw(10) << num_out << setw(10) << setprecision(4) << percent_out; + ss << setw(11) << num_out << setw(11) << setprecision(4) << percent_out; num_out = num_at_lbound[grp_name]; percent_out = percent_at_lbound[grp_name]; - ss << setw(10) << num_out << setw(10) << setprecision(4) << percent_out; - ss << setw(10) << setprecision(4) << init_cv[grp_name] << setw(10) << curr_cv[grp_name] << setw(10) << setprecision(4) << endl; - if (i < 15) + ss << setw(11) << num_out << setw(11) << setprecision(4) << percent_out; + //ss << setw(10) << setprecision(4) << init_cv[grp_name] << setw(10) << curr_cv[grp_name] << setw(10) << setprecision(4) << endl; + ss << setw(12) << excess_std_reduction[grp_name] << endl; + if (i < 15) cout << ss.str(); frec << ss.str(); i++; @@ -3723,6 +3900,9 @@ void ParChangeSummarizer::summarize(ParameterEnsemble &pe, string filename) //ss << " 'n CV decr' is the number of parameters with current CV less " << cv_dec_threshold*100.0 << "% of the initial CV" << endl; ss << " Note: the parameter change statistics implicitly include the effect of " << endl; ss << " realizations that have failed or have been dropped." << endl; + ss << " Note: the 'n std decr' is the number of parameters with current" << endl; + ss << " std less 5% of their initial std." << endl; + cout << ss.str(); frec << ss.str(); if (grp_names.size() > 15) @@ -3761,6 +3941,64 @@ void ParChangeSummarizer::write_to_csv(string& filename) } +map ParChangeSummarizer::get_npar_per_group_with_excess_std_reduction(ParameterEnsemble& _pe, double thresh) +{ + stringstream ss; + map pr_mn,pr_std; + base_pe_ptr->fill_moment_maps(pr_mn,pr_std); + map pt_mn,pt_std; + _pe.fill_moment_maps(pt_mn,pt_std); + + double ratio; + map results; + + ParameterInfo* pi = _pe.get_pest_scenario_ptr()->get_ctl_parameter_info_ptr_4_mod(); + string group; + for (auto& pname : _pe.get_pest_scenario_ptr()->get_ctl_parameters()) + { + group = pi->get_parameter_rec_ptr(pname.first)->group; + if (results.find(group) == results.end()) + { + results[group] = 0; + } + } + + double demon = 0.0; + for (auto& std : pt_std) + { + demon = pr_std.at(std.first); + if (demon <= 0.0) + { + /*ss.str(""); + ss << "L2PhiHandler: prior std <=0 for par '" << std.first << "': " << std.second; + throw runtime_error(ss.str());*/ + ratio = 1.0; + } + else { + ratio = std.second / demon; + } + if (!isnormal(ratio) && (ratio != 0.0)) + { + ss.str(""); + ss << "L2PhiHandler: posterior-prior std ratio not normal for par '" << std.first << "': " << ratio; + throw runtime_error(ss.str()); + } + ratio = 1.0 - ratio; + group = pi->get_parameter_rec_ptr(std.first)->group; +// if (results.find(group) == results.end()) { +// results[group] = 0; +// } + if (ratio >= thresh) + { + results[group]++; + } + + } + return results; + + +} + void ParChangeSummarizer:: update(ParameterEnsemble& pe) { @@ -4138,14 +4376,14 @@ void EnsembleMethod::sanity_checks() } - if (pest_scenario.get_control_info().noptmax > 10) - { - warnings.push_back("noptmax > 10, don't expect anything meaningful from the results!"); - } +// if (pest_scenario.get_control_info().noptmax > 10) +// { +// warnings.push_back("noptmax > 10, don't expect anything meaningful from the results!"); +// } else if (pest_scenario.get_control_info().noptmax > 3) { - warnings.push_back("noptmax > 3, this is a lot of iterations for an ensemble method, please consider using fewer iterations for better outcomes"); + warnings.push_back("noptmax > 3, this is a lot of iterations for an ensemble method..."); } if (pest_scenario.get_control_info().pestmode == ControlInfo::PestMode::REGUL) @@ -4274,10 +4512,7 @@ void EnsembleMethod::sanity_checks() { errors.push_back("multimodal alpha < 0.001"); } - if (ppo->get_ies_n_iter_mean() > 0) - { - warnings.push_back("mean-shifting iterations is a new concept and subject to change - experimental at best"); - } + if (warnings.size() > 0) { @@ -4330,8 +4565,8 @@ bool EnsembleMethod::should_terminate() message(1, "nphinored (also used for consecutive bad lambda cycles): ", nphinored); int n_mean_iter = pest_scenario.get_pestpp_options().get_ies_n_iter_mean(); vector::iterator begin_idx = best_mean_phis.begin(); - if (best_mean_phis.size() > n_mean_iter) - begin_idx += (n_mean_iter+1); //bc of prior phi and then adding the mean shift to the list + if ((n_mean_iter > 0) && (best_mean_phis.size() > n_mean_iter)) + begin_idx = best_mean_phis.end() - (n_mean_iter+1); //bc of prior phi and then adding the mean shift to the list if (best_mean_phis.size() > 0) { @@ -4352,7 +4587,7 @@ bool EnsembleMethod::should_terminate() for (auto& phi : best_mean_phis) { ratio = (phi - best_phi_yet) / phi; - if ((i> n_mean_iter) && (ratio <= phiredstp)) + if ((i>=(iter - n_mean_iter)) && (ratio <= phiredstp)) count++; i++; } @@ -4844,6 +5079,7 @@ void EnsembleMethod::initialize(int cycle, bool run, bool use_existing) //transfer_dynamic_state_from_oe_to_initial_pe(_pe, _oe); pe = _pe; oe = _oe; + return; } @@ -4931,7 +5167,13 @@ void EnsembleMethod::initialize(int cycle, bool run, bool use_existing) lambda_min = 1.0E-30; consec_bad_lambda_cycles = 0; - + reinflate_to_minphi_real = false; + if (pest_scenario.get_pestpp_options().get_ies_n_iter_mean() < 0) + { + message(2,"n_iter_mean < 0, using min-phi real for re-inflation, resetting n_iter_reinflate to positive"); + reinflate_to_minphi_real = true; + pest_scenario.get_pestpp_options_ptr()->set_ies_n_iter_mean(-1 * pest_scenario.get_pestpp_options().get_ies_n_iter_mean()); + } lam_mults = pest_scenario.get_pestpp_options().get_ies_lam_mults(); if (lam_mults.size() == 0) lam_mults.push_back(1.0); @@ -5412,6 +5654,8 @@ void EnsembleMethod::initialize(int cycle, bool run, bool use_existing) //the hard way to restart if (obs_restart_csv.size() > 0) initialize_restart(); + //we need this for the prior mean shifting + weights_base = weights; if (!run) return; @@ -5570,7 +5814,8 @@ void EnsembleMethod::initialize(int cycle, bool run, bool use_existing) ph.report(true); pcs = ParChangeSummarizer(&pe_base, &file_manager, &output_file_writer); - vector in_conflict = detect_prior_data_conflict(); + message(1,"checking for prior-data conflict"); + vector in_conflict = ph.detect_simulation_data_conflict(oe,"pdc.csv"); if (in_conflict.size() > 0) { ss.str(""); @@ -5716,13 +5961,16 @@ void EnsembleMethod::initialize(int cycle, bool run, bool use_existing) ph.write(0, run_mgr_ptr->get_total_runs()); if (ppo->get_ies_save_rescov()) ph.save_residual_cov(oe, 0); - best_mean_phis.push_back(ph.get_mean(L2PhiHandler::phiType::COMPOSITE)); - if (!pest_scenario.get_pestpp_options().get_ies_use_approx()) + + best_mean_phis.push_back(ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE)); + last_best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); + + if (!pest_scenario.get_pestpp_options().get_ies_use_approx()) { message(1, "using full (MAP) update solution"); } - last_best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + last_best_std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); last_best_lam = pest_scenario.get_pestpp_options().get_ies_init_lam(); bool continue_anyway = false; @@ -7007,7 +7255,7 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto //move the estimated states to the oe, which will then later be transferred back to the pe //transfer_dynamic_state_from_pe_to_oe(pe, oe); ph.update(oe, pe, weights); - double best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + double best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); double best_std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); best_mean_phis.push_back(best_mean); @@ -7089,18 +7337,19 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto continue; } - if (pest_scenario.get_pestpp_options().get_ies_updatebyreals()) - { - message(1, "updating realizations with reduced phi"); - update_reals_by_phi(pe_lams[i], oe_lams[i],subset_idxs); - } +// if (pest_scenario.get_pestpp_options().get_ies_updatebyreals()) +// { +// message(1, "updating realizations with reduced phi"); +// update_reals_by_phi(pe_lams[i], oe_lams[i],subset_idxs); +// } ph.update(oe_lams[i], pe_lams[i], weights); message(0, "phi summary for lambda, scale fac:", vals, echo); ph.report(echo); - mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); + std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); ph.write_lambda(iter,oe_lams[i].shape().first,last_best_lam,last_best_mean, last_best_std, @@ -7121,9 +7370,6 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto } - - - if ((best_idx != -1) && (use_subset) && (local_subset_size < pe.shape().first)) { double acc_phi = last_best_mean * acc_fac; @@ -7144,8 +7390,7 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto string m = ss.str(); message(0, m); ph.update(oe, pe,weights); - best_mean_phis.push_back(ph.get_mean(L2PhiHandler::phiType::COMPOSITE)); - + best_mean_phis.push_back(ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE)); if (!use_mda) { message(1, "updating realizations with reduced phi"); @@ -7154,7 +7399,8 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto ph.update(oe, pe,weights); //re-check phi - double new_best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + double new_best_mean; + new_best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); if (new_best_mean < best_mean) { best_mean = new_best_mean; @@ -7319,7 +7565,8 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto } performance_log->log_event("updating phi"); ph.update(oe_lam_best, pe_lams[best_idx], weights); - best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + + best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); best_std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); message(1, "phi summary for entire ensemble using lambda,scale_fac ", vector({ lam_vals[best_idx],scale_vals[best_idx] })); ph.report(true, false); @@ -7327,12 +7574,12 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto else { ph.update(oe_lam_best, pe_lams[best_idx], weights); - best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); best_std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); } ph.update(oe_lam_best, pe_lams[best_idx], weights); - best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); best_std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); message(1, "last best mean phi * acceptable phi factor: ", last_best_mean * acc_fac); message(1, "current best mean phi: ", best_mean); @@ -7354,6 +7601,16 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto last_best_mean = best_mean; if (pest_scenario.get_pestpp_options().get_ies_updatebyreals()) { + + if (pe.shape().first > pe_lams[best_idx].shape().first) + { + performance_log->log_event("aligning rows of pe with pe_lams[best_idx]"); + pe.keep_rows(pe_lams[best_idx].get_real_names()); + } + if (oe.shape().first > oe_lam_best.shape().first) { + performance_log->log_event("aligning rows of oe with oe_lam_best"); + oe.keep_rows(oe_lam_best.get_real_names()); + } message(0, "only updating realizations with reduced phi"); update_reals_by_phi(pe_lams[best_idx], oe_lam_best); } @@ -7385,7 +7642,8 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto } ph.update(oe, pe, weights); //re-check phi - double new_best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + double new_best_mean; + new_best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); if (new_best_mean < best_mean) { best_mean = new_best_mean; @@ -7430,21 +7688,53 @@ bool EnsembleMethod::solve(bool use_mda, vector inflation_factors, vecto } void EnsembleMethod::reset_par_ensemble_to_prior_mean(){ + string min_phi_name = ""; + if (true) + { + map pmap = ph.get_phi_map(L2PhiHandler::phiType::ACTUAL); + double min_phi = numeric_limits::max(); + + for (auto& item : pmap) + { + if (item.second < min_phi) + { + min_phi = item.second; + min_phi_name = item.first; + } + } + vector real_names = oe.get_real_names(); + int idx = distance(real_names.begin(),find(real_names.begin(),real_names.end(),min_phi_name)); + if (idx == real_names.size()) + { + throw_em_error("min phi realization name not found in obs ensemble"); + } + real_names = pe.get_real_names(); + min_phi_name = real_names[idx]; + } message(0,"resetting current parameter ensemble to prior ensemble with current ensemble mean"); performance_log->log_event("getting prior parameter ensemble mean-centered anomalies"); - Eigen::MatrixXd anoms = pe_base.get_eigen_anomalies(pe.get_real_names(), pe.get_var_names()); + Eigen::MatrixXd anoms = pe_base.get_eigen_anomalies(pe_base.get_real_names(), pe.get_var_names(), pest_scenario.get_pestpp_options().get_ies_center_on()); performance_log->log_event("getting current parameter ensemble mean vector"); vector mean_vec = pe.get_mean_stl_var_vector(); + Eigen::VectorXd offset(mean_vec.size()); + for (int i=0;ilog_event("adding mean to anomalies"); + + performance_log->log_event("adding offset to anomalies"); for (int i = 0; i < mean_vec.size(); i++) { - anoms.col(i) = anoms.col(i).array() + mean_vec[i]; + anoms.col(i) = anoms.col(i).array() + offset[i]; } performance_log->log_event("forming new parameter ensemble of mean-shifted prior realizations"); - ParameterEnsemble new_pe = ParameterEnsemble(&pest_scenario,&rand_gen,anoms,pe.get_real_names(),pe.get_var_names()); + ParameterEnsemble new_pe = ParameterEnsemble(&pest_scenario,&rand_gen,anoms,pe_base.get_real_names(),pe.get_var_names()); new_pe.set_trans_status(pe.get_trans_status()); new_pe.set_fixed_info(pe.get_fixed_info()); @@ -7458,16 +7748,19 @@ void EnsembleMethod::reset_par_ensemble_to_prior_mean(){ ss << "iteration:" << iter; vector temp; ofstream& frec = file_manager.rec_ofstream(); - + oe = oe_base; + weights = weights_base; run_ensemble_util(performance_log,frec,new_pe,oe,run_mgr_ptr,false,temp,NetPackage::NULL_DA_CYCLE, ss.str()); pe = new_pe; new_pe = ParameterEnsemble(); report_and_save(NetPackage::NULL_DA_CYCLE); - ph.update(oe,pe,weights);\ + ph.update(oe,pe,weights); message(0,"mean-shifted prior phi report:"); - best_mean_phis.push_back(ph.get_mean(L2PhiHandler::phiType::COMPOSITE)); - last_best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + + best_mean_phis.push_back(ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE)); + last_best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); + last_best_std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); ph.report(true,true); @@ -7475,9 +7768,10 @@ void EnsembleMethod::reset_par_ensemble_to_prior_mean(){ ss.str(""); ss << file_manager.get_base_filename() << "." << iter << ".meanshift.pcs.csv"; pcs.summarize(pe, ss.str()); + last_best_lam = pest_scenario.get_pestpp_options().get_ies_init_lam(); double phi_lam = get_lambda(); last_best_lam = phi_lam; - message(1,"iter = ies_n_iter_mean, resetting lambda to ",last_best_lam); + message(1,"iter = ies_n_iter_reinflate, resetting lambda to ",last_best_lam); consec_bad_lambda_cycles = 0; } @@ -7893,7 +8187,7 @@ void EnsembleMethod::add_bases() Parameters pars = pest_scenario.get_ctl_parameters(); pe.get_par_transform().active_ctl2numeric_ip(pars); vector drop{ pe.shape().first - 1 }; - pe.drop_rows(drop); + pe.drop_rows(drop,true); pe.append(BASE_REAL_NAME, pars); } @@ -7945,7 +8239,7 @@ void EnsembleMethod::add_bases() message(1, mess); vector drop; drop.push_back(oreal); - oe_base.drop_rows(drop); + oe_base.drop_rows(drop,true); oe_base.append(BASE_REAL_NAME, obs); //rnames.insert(rnames.begin() + idx, string(base_name)); rnames[idx] = BASE_REAL_NAME; @@ -7954,13 +8248,13 @@ void EnsembleMethod::add_bases() { string oreal = wrnames[idx]; ss.str(""); - ss << "warning: 'base' realization in par ensenmble but not in weight ensemble," << endl; + ss << "warning: 'base' realization in par ensemble but not in weight ensemble," << endl; ss << " replacing weight realization '" << oreal << "' with 'base' weights"; string mess = ss.str(); message(1, mess); vector drop; drop.push_back(oreal); - weights.drop_rows(drop); + weights.drop_rows(drop,true); weights.append(BASE_REAL_NAME, wobs); //rnames.insert(rnames.begin() + idx, string(base_name)); @@ -8575,6 +8869,7 @@ void EnsembleMethod::initialize_restart() ss << "restart oe has too many rows: " << oe.shape().first << " compared to oe_base: " << oe_base.shape().first; throw_em_error(ss.str()); } + } @@ -8689,115 +8984,6 @@ void EnsembleMethod::zero_weight_obs(vector& obs_to_zero_weight, bool up message(1, ss.str()); } -vector EnsembleMethod::detect_prior_data_conflict(bool save) -{ - message(1, "checking for prior-data conflict..."); - //for now, just really simple metric - checking for overlap - // write out conflicted obs and some related info to a csv file - ofstream pdccsv(file_manager.get_base_filename() + ".pdc.csv"); - - vector in_conflict; - double smin, smax, omin, omax, smin_stat, smax_stat, omin_stat, omax_stat; - map smap, omap; - vector snames = oe.get_var_names(); - vector onames = oe_base.get_var_names(); - vector temp = ph.get_lt_obs_names(); - set ineq_lt(temp.begin(), temp.end()); - //set::iterator end = ineq.end(); - temp = ph.get_gt_obs_names(); - set ineq_gt(temp.begin(), temp.end()); - temp.resize(0); - - for (int i = 0; i < snames.size(); i++) - { - smap[snames[i]] = i; - } - for (int i = 0; i < onames.size(); i++) - { - omap[onames[i]] = i; - } - int sidx, oidx; - bool use_stat_dist = true; - if (pest_scenario.get_pestpp_options().get_ies_pdc_sigma_distance() <= 0.0) - use_stat_dist = false; - - double smn, sstd, omn, ostd, dist; - double sd = abs(pest_scenario.get_pestpp_options().get_ies_pdc_sigma_distance()); - int oe_nr = oe.shape().first; - int oe_base_nr = oe_base.shape().first; - Eigen::VectorXd t; - - pdccsv << "name,obs_mean,obs_std,obs_min,obs_max,obs_stat_min,obs_stat_max,sim_mean,sim_std,sim_min,sim_max,sim_stat_min,sim_stat_max,distance" << endl; - for (auto oname : pest_scenario.get_ctl_ordered_nz_obs_names()) - { - //if (ineq.find(oname) != end) - // continue; - sidx = smap[oname]; - oidx = omap[oname]; - smin = oe.get_eigen_ptr()->col(sidx).minCoeff(); - omin = oe_base.get_eigen_ptr()->col(oidx).minCoeff(); - smax = oe.get_eigen_ptr()->col(sidx).maxCoeff(); - omax = oe_base.get_eigen_ptr()->col(oidx).maxCoeff(); - t = oe.get_eigen_ptr()->col(sidx); - smn = t.mean(); - sstd = std::sqrt((t.array() - smn).square().sum() / (oe_nr - 1)); - smin_stat = smn - (sd * sstd); - smax_stat = smn + (sd * sstd); - t = oe_base.get_eigen_ptr()->col(oidx); - omn = t.mean(); - ostd = std::sqrt((t.array() - omn).square().sum() / (oe_base_nr - 1)); - omin_stat = omn - (sd * ostd); - omax_stat = omn + (sd * ostd); - bool conflicted = false; - if (use_stat_dist) - { - if (ineq_lt.find(oname) != ineq_lt.end()) - { - if (smin_stat > omax_stat) - conflicted = true; - } - else if (ineq_gt.find(oname) != ineq_gt.end()) - { - if (smax_stat < omin_stat) - conflicted = true; - } - else if ((smin_stat > omax_stat) || (smax_stat < omin_stat)) - { - conflicted = true; - } - } - else - { - if (ineq_lt.find(oname) != ineq_lt.end()) - { - if (smin > omax) - conflicted = true; - } - else if (ineq_gt.find(oname) != ineq_gt.end()) - { - if (smax < omin) - conflicted = true; - } - else if ((smin > omax) || (smax < omin)) - { - conflicted = true; - } - } - if (conflicted) - { - in_conflict.push_back(oname); - dist = max((smin - omax), (omin - smax)); - - pdccsv << pest_utils::lower_cp(oname) << "," << omn << "," << ostd << "," << omin << "," << omax << "," << omin_stat << "," - << omax_stat; - pdccsv << "," << smn << "," << sstd << "," << smin << "," << smax << "," << smin_stat << "," - << smax_stat << "," << dist << endl; - - } - } - - return in_conflict; -} Eigen::MatrixXd EnsembleMethod::get_Am(const vector& real_names, const vector& par_names) { diff --git a/src/libs/pestpp_common/EnsembleMethodUtils.h b/src/libs/pestpp_common/EnsembleMethodUtils.h index 93c06b17..5f947c65 100644 --- a/src/libs/pestpp_common/EnsembleMethodUtils.h +++ b/src/libs/pestpp_common/EnsembleMethodUtils.h @@ -77,6 +77,7 @@ class L2PhiHandler double get_std(phiType pt); double get_max(phiType pt); double get_min(phiType pt); + double get_representative_phi(phiType pt); double calc_mean(map *phi_map); double calc_std(map *phi_map); @@ -119,6 +120,7 @@ class L2PhiHandler map> get_meas_phi_weight_ensemble(ObservationEnsemble& oe, ObservationEnsemble& weights); vector get_violating_realizations(ObservationEnsemble& oe, const vector& viol_obs_names); + vector detect_simulation_data_conflict(ObservationEnsemble& _oe, string csv_tag); private: string tag; @@ -157,6 +159,7 @@ class L2PhiHandler map composite; map actual; map noise; + map num_conflict_group; vector lt_obs_names; vector gt_obs_names; @@ -197,6 +200,8 @@ class ParChangeSummarizer void update(ParameterEnsemble& pe); void write_to_csv(string& filename); + map get_npar_per_group_with_excess_std_reduction(ParameterEnsemble& _pe, double thresh=0.95); + }; @@ -462,9 +467,10 @@ class EnsembleMethod vector act_obs_names, act_par_names; vector violation_obs; ParameterEnsemble pe, pe_base; - ObservationEnsemble oe, oe_base, weights; + ObservationEnsemble oe, oe_base, weights, weights_base; Eigen::DiagonalMatrix obscov_inv_sqrt, parcov_inv_sqrt; bool oe_drawn, pe_drawn; + bool reinflate_to_minphi_real; bool solve_glm(int cycle = NetPackage::NULL_DA_CYCLE); @@ -493,7 +499,6 @@ class EnsembleMethod Eigen::MatrixXd get_Am(const vector& real_names, const vector& par_names); - vector detect_prior_data_conflict(bool save=true); void zero_weight_obs(vector& obs_to_zero_weight, bool update_obscov = true, bool update_oe_base = true); diff --git a/src/libs/pestpp_common/EnsembleSmoother.cpp b/src/libs/pestpp_common/EnsembleSmoother.cpp index a3f21f45..c19ceb75 100644 --- a/src/libs/pestpp_common/EnsembleSmoother.cpp +++ b/src/libs/pestpp_common/EnsembleSmoother.cpp @@ -37,10 +37,11 @@ void IterEnsembleSmoother::iterate_2_solution() bool accept; int n_iter_mean = pest_scenario.get_pestpp_options().get_ies_n_iter_mean(); - + int solution_iter = 0; for (int i = 0; i < pest_scenario.get_control_info().noptmax; i++) { iter++; + solution_iter++; message(0, "starting solve for iteration:", iter); ss.str(""); ss << "starting solve for iteration: " << iter; @@ -54,9 +55,12 @@ void IterEnsembleSmoother::iterate_2_solution() } report_and_save(NetPackage::NULL_DA_CYCLE); ph.update(oe,pe, weights); - last_best_mean = ph.get_mean(L2PhiHandler::phiType::COMPOSITE); + last_best_mean = ph.get_representative_phi(L2PhiHandler::phiType::COMPOSITE); last_best_std = ph.get_std(L2PhiHandler::phiType::COMPOSITE); ph.report(true); + ss.str(""); + ss << "." << iter << ".pdc.csv"; + ph.detect_simulation_data_conflict(oe,ss.str()); ph.write(iter, run_mgr_ptr->get_total_runs()); if (pest_scenario.get_pestpp_options().get_ies_save_rescov()) ph.save_residual_cov(oe,iter); @@ -68,7 +72,7 @@ void IterEnsembleSmoother::iterate_2_solution() else consec_bad_lambda_cycles++; - if (iter == n_iter_mean) + if ((n_iter_mean> 0) && (solution_iter % n_iter_mean == 0)) { iter++; reset_par_ensemble_to_prior_mean(); diff --git a/src/libs/pestpp_common/pest_data_structs.cpp b/src/libs/pestpp_common/pest_data_structs.cpp index cfad3779..e6ec529f 100644 --- a/src/libs/pestpp_common/pest_data_structs.cpp +++ b/src/libs/pestpp_common/pest_data_structs.cpp @@ -1181,8 +1181,10 @@ bool PestppOptions::assign_ies_value_by_key(const string& key, const string& val ies_phi_factors_by_real = pest_utils::parse_string_arg_to_bool(value); return true; } - else if (key == "IES_N_ITER_MEAN") + else if ((key == "IES_N_ITER_MEAN") || (key == "IES_N_ITER_REINFLATE")) { + passed_args.insert("IES_N_ITER_MEAN"); + passed_args.insert("IES_N_ITER_REINFLATE"); convert_ip(value,ies_n_iter_mean); return true; } @@ -1828,7 +1830,7 @@ void PestppOptions::summary(ostream& os) const os << "ies_localizer_forgive_extra: " << ies_localizer_forgive_missing << endl; os << "ies_phi_factors_file: " << ies_phi_fractions_file << endl; os << "ies_phi_factors_by_real: " << ies_phi_factors_by_real << endl; - os << "ies_n_iter_mean: " << ies_n_iter_mean << endl; + os << "ies_n_iter_reinflate: " << ies_n_iter_mean << endl; os << "ies_updatebyreals: " << ies_updatebyreals << endl; @@ -2017,7 +2019,7 @@ void PestppOptions::set_defaults() set_ies_phi_fractions_files(""); set_ies_phi_factors_by_real(false); set_ies_n_iter_mean(0); - set_ies_updatebyreals(true); + set_ies_updatebyreals(false); // DA parameters