From 054429cb3129e8bb779a805ebba6944b3b333f02 Mon Sep 17 00:00:00 2001 From: Raffaele Montuoro Date: Fri, 12 May 2017 23:03:31 -0600 Subject: [PATCH 01/51] Add support for multiple coupler instances --- scripts/create_newcase | 18 +++- scripts/lib/CIME/XML/env_mach_pes.py | 1 + scripts/lib/CIME/build.py | 3 +- scripts/lib/CIME/case.py | 39 +++++--- scripts/lib/CIME/case_run.py | 25 +++-- scripts/lib/CIME/case_setup.py | 4 +- scripts/lib/CIME/case_st_archive.py | 10 +- scripts/lib/CIME/get_timing.py | 25 ++++- scripts/lib/CIME/preview_namelists.py | 14 +++ src/drivers/mct/cime_config/buildnml | 26 ++--- .../mct/cime_config/config_archive.xml | 4 +- .../mct/cime_config/config_component.xml | 1 + .../cime_config/namelist_definition_drv.xml | 16 +++ src/drivers/mct/main/cime_comp_mod.F90 | 99 +++++++++++++++++-- src/drivers/mct/main/seq_rest_mod.F90 | 12 ++- src/drivers/mct/shr/seq_comm_mct.F90 | 3 +- src/drivers/mct/shr/seq_infodata_mod.F90 | 18 +++- 17 files changed, 247 insertions(+), 71 deletions(-) diff --git a/scripts/create_newcase b/scripts/create_newcase index f2568f9b2b0..d663954bb68 100755 --- a/scripts/create_newcase +++ b/scripts/create_newcase @@ -46,6 +46,10 @@ OR help="Specify a compiler. " "To see list of supported compilers for each machine, use the utility query_config in this directory") + parser.add_argument("--ncouplers",default=1, + help="Specify number of coupler instances. " + "Set the number of coupler instances in the case.") + parser.add_argument("--ninst",default=1, help="Specify number of component instances" "Set the number of component instances in the case.") @@ -142,6 +146,9 @@ OR expect(args.gridfile is not None, "User grid specification file must be set if the user grid is requested") + expect(not (int(args.ncouplers) > 1 and int(args.ninst) > 1), + "Only one component instance per coupler is allowed when using multiple couplers") + run_unsupported = False if model == "cesm": run_unsupported = args.run_unsupported @@ -155,7 +162,8 @@ OR return args.case, args.compset, args.res, args.machine, args.compiler,\ args.mpilib, args.project, args.pecount, \ args.user_mods_dir, args.pesfile, \ - args.user_grid, args.gridfile, args.srcroot, args.test, args.ninst, \ + args.user_grid, args.gridfile, args.srcroot, args.test, args.ncouplers, \ + args.ninst, \ args.walltime, args.queue, args.output_root, args.script_root, \ run_unsupported, args.answer, args.input_dir @@ -167,7 +175,7 @@ def _main_func(description): casename, compset, grid, machine, compiler, \ mpilib, project, pecount, \ user_mods_dir, pesfile, \ - user_grid, gridfile, srcroot, test, ninst, walltime, queue, \ + user_grid, gridfile, srcroot, test, ncouplers, ninst, walltime, queue, \ output_root, script_root, run_unsupported, \ answer, input_dir = parse_command_line(sys.argv, cimeroot, description) @@ -187,9 +195,11 @@ def _main_func(description): with Case(caseroot, read_only=False) as case: # Configure the Case - case.create(casename, srcroot, compset, grid, user_mods_dir=user_mods_dir, machine_name=machine, project=project, + case.create(casename, srcroot, compset, grid, user_mods_dir=user_mods_dir, + machine_name=machine, project=project, pecount=pecount, compiler=compiler, mpilib=mpilib, - pesfile=pesfile,user_grid=user_grid, gridfile=gridfile, ninst=ninst, test=test, + pesfile=pesfile,user_grid=user_grid, gridfile=gridfile, + ncouplers=ncouplers, ninst=ninst, test=test, walltime=walltime, queue=queue, output_root=output_root, run_unsupported=run_unsupported, answer=answer, input_dir=input_dir) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index 1a4ab613dd1..f69d73a76f8 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -63,6 +63,7 @@ def get_total_tasks(self, comp_classes): pstrid = self.get_value("PSTRID", attribute={"component":comp}) tt = rootpe + (ntasks - 1) * pstrid + 1 total_tasks = max(tt, total_tasks) + total_tasks *= self.get_value("NINST_CPL") return total_tasks def get_tasks_per_node(self, total_tasks, max_thread_count): diff --git a/scripts/lib/CIME/build.py b/scripts/lib/CIME/build.py index 1442545336f..eec9593bf85 100644 --- a/scripts/lib/CIME/build.py +++ b/scripts/lib/CIME/build.py @@ -398,11 +398,10 @@ def _case_build_impl(caseroot, case, sharedlib_only, model_only, buildlist): complist = [] for comp_class in comp_classes: + ninst = case.get_value("NINST_{}".format(comp_class)) if comp_class == "CPL": - ninst = 1 config_dir = None else: - ninst = case.get_value("NINST_{}".format(comp_class)) config_dir = os.path.dirname(case.get_value("CONFIG_{}_FILE".format(comp_class))) comp = case.get_value("COMP_{}".format(comp_class)) thrds = case.get_value("NTHRDS_{}".format(comp_class)) diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index 97b0c56ce4e..7fdb926b9e3 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -635,7 +635,8 @@ def _get_component_config_data(self, files): self.clean_up_lookups() - def _setup_mach_pes(self, pecount, ninst, machine_name, mpilib): + + def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): #-------------------------------------------- # pe layout #-------------------------------------------- @@ -719,13 +720,15 @@ def _setup_mach_pes(self, pecount, ninst, machine_name, mpilib): val = -1*val*pes_per_node if val > pesize: pesize = val + pesize *= int(ncouplers) # Make sure that every component has been accounted for # set, nthrds and ntasks to 1 otherwise. Also set the ninst values here. for compclass in self._component_classes: + key = "NINST_{}".format(compclass) if compclass == "CPL": + mach_pes_obj.set_value(key, ncouplers) continue - key = "NINST_{}".format(compclass) # ESP models are currently limited to 1 instance if compclass == "ESP": mach_pes_obj.set_value(key, 1) @@ -744,8 +747,10 @@ def _setup_mach_pes(self, pecount, ninst, machine_name, mpilib): def configure(self, compset_name, grid_name, machine_name=None, project=None, pecount=None, compiler=None, mpilib=None, - pesfile=None,user_grid=False, gridfile=None, ninst=1, test=False, - walltime=None, queue=None, output_root=None, run_unsupported=False, answer=None, + pesfile=None,user_grid=False, gridfile=None, + ncouplers=1, ninst=1, test=False, + walltime=None, queue=None, output_root=None, + run_unsupported=False, answer=None, input_dir=None): expect(check_name(compset_name, additional_chars='.'), "Invalid compset name {}".format(compset_name)) @@ -834,7 +839,10 @@ def configure(self, compset_name, grid_name, machine_name=None, env_mach_specific_obj.populate(machobj) self.schedule_rewrite(env_mach_specific_obj) - pesize = self._setup_mach_pes(pecount, ninst, machine_name, mpilib) + pesize = self._setup_mach_pes(pecount, ncouplers, ninst, machine_name, mpilib) + + if ncouplers > 1: + logger.info(" Coupler has %s instances" % ncouplers) #-------------------------------------------- # batch system @@ -1077,6 +1085,9 @@ def create_caseroot(self, clone=False): len(self._component_description[component_class])>0: append_status("Component {} is {}".format(component_class, self._component_description[component_class]),"README.case", caseroot=self._caseroot) if component_class == "CPL": + append_status("Using %s coupler instances" % + (self.get_value("NINST_CPL")), + "README.case", caseroot=self._caseroot) continue comp_grid = "{}_GRID".format(component_class) @@ -1446,10 +1457,13 @@ def get_latest_cpl_log(self, coupler_log_path=None): else: return None - def create(self, casename, srcroot, compset_name, grid_name, user_mods_dir=None, machine_name=None, + def create(self, casename, srcroot, compset_name, grid_name, + user_mods_dir=None, machine_name=None, project=None, pecount=None, compiler=None, mpilib=None, - pesfile=None,user_grid=False, gridfile=None, ninst=1, test=False, - walltime=None, queue=None, output_root=None, run_unsupported=False, answer=None, + pesfile=None,user_grid=False, gridfile=None, + ncouplers=1, ninst=1, test=False, + walltime=None, queue=None, output_root=None, + run_unsupported=False, answer=None, input_dir=None): try: # Set values for env_case.xml @@ -1458,10 +1472,13 @@ def create(self, casename, srcroot, compset_name, grid_name, user_mods_dir=None, self.set_lookup_value("SRCROOT", srcroot) # Configure the Case - self.configure(compset_name, grid_name, machine_name=machine_name, project=project, + self.configure(compset_name, grid_name, machine_name=machine_name, + project=project, pecount=pecount, compiler=compiler, mpilib=mpilib, - pesfile=pesfile,user_grid=user_grid, gridfile=gridfile, ninst=ninst, test=test, - walltime=walltime, queue=queue, output_root=output_root, + pesfile=pesfile,user_grid=user_grid, gridfile=gridfile, + ncouplers=ncouplers, ninst=ninst, test=test, + walltime=walltime, queue=queue, + output_root=output_root, run_unsupported=run_unsupported, answer=answer, input_dir=input_dir) diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 51b66340c74..096b4c77135 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -1,6 +1,6 @@ from CIME.XML.standard_module_setup import * from CIME.case_submit import submit -from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status +from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status, append_status from CIME.check_lockedfiles import check_lockedfiles from CIME.get_timing import get_timing from CIME.provenance import save_prerun_provenance, save_postrun_provenance @@ -152,21 +152,32 @@ def post_run_check(case, lid): rundir = case.get_value("RUNDIR") model = case.get_value("MODEL") + cpl_ninst = case.get_value("NINST_CPL") + cpl_logs = [] + if cpl_ninst > 1: + for inst in range(cpl_ninst): + cpl_logs.append(os.path.join(rundir, "cpl_%04d.log." % (inst+1) + lid)) + else: + cpl_logs = [os.path.join(rundir, "cpl" + ".log." + lid)] + # find the last model.log and cpl.log model_logfile = os.path.join(rundir, model + ".log." + lid) - cpl_logfile = os.path.join(rundir, "cpl" + ".log." + lid) if not os.path.isfile(model_logfile): expect(False, "Model did not complete, no {} log file ".format(model_logfile)) - elif not os.path.isfile(cpl_logfile): - expect(False, "Model did not complete, no cpl log file '{}'".format(cpl_logfile)) elif os.stat(model_logfile).st_size == 0: expect(False, "Run FAILED") else: - with open(cpl_logfile, 'r') as fd: - if 'SUCCESSFUL TERMINATION' not in fd.read(): - expect(False, "Model did not complete - see {} \n ".format(cpl_logfile)) + count_ok = 0 + for cpl_logfile in cpl_logs: + if not os.path.isfile(cpl_logfile): + break + with open(cpl_logfile, 'r') as fd: + if 'SUCCESSFUL TERMINATION' in fd.read(): + count_ok += 1 + if count_ok != cpl_ninst: + expect(False, "Model did not complete - see {} \n " .format(cpl_logfile)) ############################################################################### def save_logs(case, lid): diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index e6195967f3f..e4aebfdf924 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -34,11 +34,13 @@ def _build_usernl_files(case, model, comp): expect(os.path.isdir(model_dir), "cannot find cime_config directory {} for component {}".format(model_dir, comp)) + ninst = case.get_value("NINST_CPL") if comp == "cpl": if not os.path.exists("user_nl_cpl"): shutil.copy(os.path.join(model_dir, "user_nl_cpl"), ".") else: - ninst = case.get_value("NINST_{}".format(model)) + if ninst == 1: + ninst = case.get_value("NINST_{}".format(model)) nlfile = "user_nl_{}".format(comp) model_nl = os.path.join(model_dir, nlfile) if ninst > 1: diff --git a/scripts/lib/CIME/case_st_archive.py b/scripts/lib/CIME/case_st_archive.py index 30fab631282..c997cbeb107 100644 --- a/scripts/lib/CIME/case_st_archive.py +++ b/scripts/lib/CIME/case_st_archive.py @@ -39,8 +39,9 @@ def _get_datenames(case, last_date=None): rundir = case.get_value('RUNDIR') expect(isdir(rundir), 'Cannot open directory {} '.format(rundir)) casename = case.get_value("CASE") - files = sorted(glob.glob(os.path.join(rundir, casename + '.cpl.r*.nc'))) - + files = sorted(glob.glob(os.path.join(rundir, casename + '.cpl*.r*.nc'))) + if not files: + expect(False, 'Cannot find a {}.cpl*.r.*.nc file in directory {} '.format(casename, rundir)) datenames = [] for filename in files: names = filename.split('.') @@ -59,10 +60,7 @@ def _get_datenames(case, last_date=None): def _get_ninst_info(case, compclass): ############################################################################### - if compclass != 'cpl': - ninst = case.get_value('NINST_' + compclass.upper()) - else: - ninst = 1 + ninst = case.get_value('NINST_' + compclass.upper()) ninst_strings = [] if ninst is None: ninst = 1 diff --git a/scripts/lib/CIME/get_timing.py b/scripts/lib/CIME/get_timing.py index 0a4805f2f88..a00cd26705f 100644 --- a/scripts/lib/CIME/get_timing.py +++ b/scripts/lib/CIME/get_timing.py @@ -93,6 +93,14 @@ def gettime(self, heading_padded): return (0, 0, False) def getTiming(self): + ninst = self.case.get_value("NINST_CPL") + if ninst > 1: + for inst in range(ninst): + self._getTiming(inst+1) + else: + self._getTiming() + + def _getTiming(self, inst=0): components=self.case.get_values("COMP_CLASSES") for s in components: self.models[s] = _GetTimingInfo(s) @@ -150,11 +158,16 @@ def getTiming(self): not continue_run: inittype = "TRUE" - binfilename = os.path.join(rundir, "timing", "model_timing_stats") + if inst > 0: + inst_label = '_%04d' % inst + else: + inst_label = '' + + binfilename = os.path.join(rundir, "timing", "model_timing%s_stats" % inst_label) finfilename = os.path.join(self.caseroot, "timing", - "{}_timing_stats.{}".format(cime_model, self.lid)) + "{}_timing{}_stats.{}".format(cime_model, inst_label, self.lid)) foutfilename = os.path.join(self.caseroot, "timing", - "{}_timing.{}.{}".format(cime_model, caseid, self.lid)) + "{}_timing{}.{}.{}".format(cime_model, inst_label, caseid, self.lid)) timingDir = os.path.join(self.caseroot, "timing") if not os.path.isdir(timingDir): @@ -239,7 +252,11 @@ def getTiming(self): maxthrds = 0 for k in self.case.get_values("COMP_CLASSES"): m = self.models[k] - self.write(" {} = {:<8s} {:<6d} {:<6d} {:<6d} x {:<6d} {:<6d} ({:<6d}) \n".format(m.name.lower(), m.comp, (m.ntasks*m.nthrds *smt_factor), m.rootpe, m.ntasks, m.nthrds, m.ninst, m.pstrid)) + if m.comp == "cpl": + comp_label = m.comp + inst_label + else: + comp_label = m.comp + self.write(" {} = {:<8s} {:<6d} {:<6d} {:<6d} x {:<6d} {:<6d} ({:<6d}) \n".format(m.name.lower(), comp_label, (m.ntasks*m.nthrds *smt_factor), m.rootpe, m.ntasks, m.nthrds, m.ninst, m.pstrid)) if m.nthrds > maxthrds: maxthrds = m.nthrds nmax = self.gettime(' CPL:INIT ')[1] diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 2a4462f3954..8e6763abe89 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -66,6 +66,8 @@ def create_namelists(case, component=None): # Create namelists - must have cpl last in the list below # Note - cpl must be last in the loop below so that in generating its namelist, # it can use xml vars potentially set by other component's buildnml scripts + xmlfac = {} + cpl_ninst = case.get_value("NINST_CPL") models = case.get_values("COMP_CLASSES") models += [models.pop(0)] for model in models: @@ -74,8 +76,20 @@ def create_namelists(case, component=None): config_dir = os.path.dirname(config_file) if model_str == "cpl": compname = "drv" + complist = [m for m in models if m.upper() != "CPL"] + if cpl_ninst > 1: + xmlfac = {"NINST" : cpl_ninst, "NTASKS" : 1} else: compname = case.get_value("COMP_{}".format(model_str.upper())) + complist = [model_str.upper()] + if cpl_ninst > 1: + xmlfac = {"NINST" : cpl_ninst, "NTASKS" : cpl_ninst} + + xmlsave = {} + for k in xmlfac.keys(): + for m in complist: + key = "{}_{}" .format(k, m.upper()) + xmlsave[key] = case.get_value(key) if component is None or component == model_str: # first look in the case SourceMods directory diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index 322e558c67f..fc06d7f33ed 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -255,20 +255,14 @@ def _create_component_modelio_namelists(case, files): config['component'] = model entries = nmlgen.init_defaults(infiles, config, skip_entry_loop=True) - if model == 'cpl': - inst_count = 1 - else: - inst_count = case.get_value("NINST_" + model.upper()) + inst_count = case.get_value("NINST_" + model.upper()) + inst_string = "" inst_index = 1 while inst_index <= inst_count: - inst_string = inst_index - if inst_index <= 999: - inst_string = "0" + str(inst_string) - if inst_index <= 99: - inst_string = "0" + str(inst_string) - if inst_index <= 9: - inst_string = "0" + str(inst_string) + # determine instance string + if inst_count > 1: + inst_string = '_%04d' % inst_index # set default values for entry in entries: @@ -281,17 +275,11 @@ def _create_component_modelio_namelists(case, files): moddiro = case.get_value('RUNDIR') nmlgen.set_value('diro', moddiro) - if inst_count > 1: - logfile = model + "_" + inst_string + ".log." + str(lid) - else: - logfile = model + ".log." + str(lid) + logfile = model + inst_string + ".log." + str(lid) nmlgen.set_value('logfile', logfile) # Write output file - if inst_count > 1: - modelio_file = model + "_modelio.nml_" + str(inst_string) - else: - modelio_file = model + "_modelio.nml" + modelio_file = model + "_modelio.nml" + inst_string nmlgen.write_modelio_file(os.path.join(confdir, modelio_file)) inst_index = inst_index + 1 diff --git a/src/drivers/mct/cime_config/config_archive.xml b/src/drivers/mct/cime_config/config_archive.xml index 5f59d56a82d..e2aaeb96db7 100644 --- a/src/drivers/mct/cime_config/config_archive.xml +++ b/src/drivers/mct/cime_config/config_archive.xml @@ -4,8 +4,8 @@ \.h.*.nc$ unset - rpointer.drv - $CASE.cpl.r.$DATENAME.nc + rpointer$NINST_STRING.drv + $CASE.cpl$NINST_STRING.r.$DATENAME.nc diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index f338d0f7521..7bb13ca4118 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1952,6 +1952,7 @@ integer + 1 1 1 1 diff --git a/src/drivers/mct/cime_config/namelist_definition_drv.xml b/src/drivers/mct/cime_config/namelist_definition_drv.xml index b9336e100bc..81c8efb6068 100644 --- a/src/drivers/mct/cime_config/namelist_definition_drv.xml +++ b/src/drivers/mct/cime_config/namelist_definition_drv.xml @@ -40,6 +40,22 @@ that have only a small number of allowed values. --> + + + + + + integer + cesm_cpl + cesm_cpl + + Number of CESM coupler instances. + + + $NINST_CPL + + + diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index 70bd88e5faa..cf0ff5c5433 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -26,10 +26,11 @@ module cime_comp_mod use shr_sys_mod, only: shr_sys_abort, shr_sys_flush use shr_const_mod, only: shr_const_cday use shr_file_mod, only: shr_file_setLogLevel, shr_file_setLogUnit - use shr_file_mod, only: shr_file_setIO, shr_file_getUnit + use shr_file_mod, only: shr_file_setIO, shr_file_getUnit, shr_file_freeUnit use shr_scam_mod, only: shr_scam_checkSurface use shr_map_mod, only: shr_map_setDopole use shr_mpi_mod, only: shr_mpi_min, shr_mpi_max + use shr_mpi_mod, only: shr_mpi_bcast, shr_mpi_commrank, shr_mpi_commsize use shr_mem_mod, only: shr_mem_init, shr_mem_getusage use shr_cal_mod, only: shr_cal_date2ymd, shr_cal_ymd2date, shr_cal_advdateInt use shr_orb_mod, only: shr_orb_params @@ -533,6 +534,9 @@ module cime_comp_mod logical :: iamin_CPLALLROFID ! pe associated with CPLALLROFID logical :: iamin_CPLALLWAVID ! pe associated with CPLALLWAVID + ! suffix for log and timing files if multi coupler driver + character(len=seq_comm_namelen) :: cpl_inst_tag + !---------------------------------------------------------------------------- ! complist: list of comps on this pe !---------------------------------------------------------------------------- @@ -594,6 +598,7 @@ subroutine cime_pre_init1() logical :: comp_iamin(num_inst_total) character(len=seq_comm_namelen) :: comp_name(num_inst_total) integer :: i, it + integer :: num_inst_cpl, cpl_id call mpi_init(ierr) call shr_mpi_chkerr(ierr,subname//' mpi_init') @@ -602,6 +607,9 @@ subroutine cime_pre_init1() comp_comm = MPI_COMM_NULL time_brun = mpi_wtime() + !--- Initialize multiple coupler instances, if requested --- + call cesm_cpl_init(Global_Comm, num_inst_cpl, cpl_id) + call shr_pio_init1(num_inst_total,NLFileName, Global_Comm) ! ! If pio_async_interface is true Global_Comm is MPI_COMM_NULL on the servernodes @@ -609,7 +617,13 @@ subroutine cime_pre_init1() ! ! if (Global_Comm /= MPI_COMM_NULL) then - call seq_comm_init(Global_Comm, NLFileName) + if (num_inst_cpl > 1) then + call seq_comm_init(Global_Comm, NLFileName, Comm_ID=cpl_id) + write(cpl_inst_tag,'("_",i4.4)') cpl_id + else + call seq_comm_init(Global_Comm, NLFileName) + cpl_inst_tag = '' + end if !--- set task based threading counts --- call seq_comm_getinfo(GLOID,pethreads=pethreads_GLOID,iam=iam_GLOID) @@ -756,10 +770,10 @@ subroutine cime_pre_init1() !---------------------------------------------------------- if (iamroot_CPLID) then - inquire(file='cpl_modelio.nml',exist=exists) + inquire(file='cpl_modelio.nml'//trim(cpl_inst_tag),exist=exists) if (exists) then logunit = shr_file_getUnit() - call shr_file_setIO('cpl_modelio.nml',logunit) + call shr_file_setIO('cpl_modelio.nml'//trim(cpl_inst_tag),logunit) call shr_file_setLogUnit(logunit) loglevel = 1 call shr_file_setLogLevel(loglevel) @@ -780,6 +794,8 @@ subroutine cime_pre_init1() write(logunit,'(2A)') subname,' USE_ESMF_LIB is NOT set, using esmf_wrf_timemgr' #endif write(logunit,'(2A)') subname,' MCT_INTERFACE is set' + if (num_inst_cpl > 1) & + write(logunit,'(2A,I0,A)') subname,' Driver is running with',num_inst_cpl,'instances' endif ! @@ -843,7 +859,12 @@ subroutine cime_pre_init2() !| Initialize infodata !---------------------------------------------------------- - call seq_infodata_init(infodata,nlfilename, GLOID, pioid) + if (len(cpl_inst_tag) > 0) then + call seq_infodata_init(infodata,nlfilename, GLOID, pioid, & + cpl_tag=cpl_inst_tag) + else + call seq_infodata_init(infodata,nlfilename, GLOID, pioid) + end if !---------------------------------------------------------- ! Print Model heading and copyright message @@ -3573,7 +3594,7 @@ subroutine cime_run() call seq_rest_write(EClock_d, seq_SyncClock, infodata, & atm, lnd, ice, ocn, rof, glc, wav, esp, & fractions_ax, fractions_lx, fractions_ix, fractions_ox, & - fractions_rx, fractions_gx, fractions_wx) + fractions_rx, fractions_gx, fractions_wx, tag=trim(cpl_inst_tag)) if (drv_threading) call seq_comm_setnthreads(nthreads_GLOID) call t_drvstopf ('CPL:RESTART',cplrun=.true.) @@ -3852,7 +3873,8 @@ subroutine cime_run() call mpi_barrier(mpicom_GLOID,ierr) call t_stopf("sync1_tprof") - write(timing_file,'(a,i8.8,a1,i5.5)') trim(tchkpt_dir)//"/model_timing_",ymd,"_",tod + write(timing_file,'(a,i8.8,a1,i5.5)') & + trim(tchkpt_dir)//"/cesm_timing"//trim(cpl_inst_tag)//"_",ymd,"_",tod if (output_perf) then call t_prf(filename=trim(timing_file), mpicom=mpicom_GLOID, & num_outpe=0, output_thispe=output_perf) @@ -3965,10 +3987,11 @@ subroutine cime_final() call t_set_prefixf("final:") if (output_perf) then - call t_prf(trim(timing_dir)//'/model_timing', mpicom=mpicom_GLOID, & - output_thispe=output_perf) + call t_prf(trim(timing_dir)//'/model_timing'//trim(cpl_inst_tag), & + mpicom=mpicom_GLOID, output_thispe=output_perf) else - call t_prf(trim(timing_dir)//'/model_timing', mpicom=mpicom_GLOID) + call t_prf(trim(timing_dir)//'/model_timing'//trim(cpl_inst_tag), & + mpicom=mpicom_GLOID) endif call t_unset_prefixf() @@ -4042,4 +4065,60 @@ subroutine cime_comp_barriers(mpicom, timer) endif end subroutine cime_comp_barriers +subroutine cime_cpl_init(comm, ninst, id) + + !----------------------------------------------------------------------- + ! + ! Initialize multiple coupler instances, if requested + ! + !----------------------------------------------------------------------- + + implicit none + + integer , intent(inout) :: comm + integer , intent(out) :: ninst + integer , intent(out) :: id ! instance ID, starts from 1 + ! + ! Local variables + ! + integer :: ierr, inst_comm, mype, nu, numpes !, pes + integer :: cpl_ninst + + namelist /cime_cpl/ cpl_ninst + + call shr_mpi_commrank(comm, mype , ' cime_cpl_init') + call shr_mpi_commsize(comm, numpes, ' cime_cpl_init') + + ninst = 1 + id = 0 + + if (mype == 0) then + ! Read coupler namelist if it exists + cpl_ninst = 1 + nu = shr_file_getUnit() + open(unit = nu, file = NLFileName, status = 'old', iostat = ierr) + rewind(unit = nu) + read(unit = nu, nml = cime_cpl, iostat = ierr) + close(unit = nu) + call shr_file_freeUnit(nu) + ninst = max(cpl_ninst, 1) + end if + + call shr_mpi_bcast(ninst, comm, 'cpl_ninst') + + if (mod(numpes, ninst) /= 0) then + call shr_sys_abort(subname // & + ' : Total PE number must be a multiple of coupler instance number') + end if + + if (ninst > 1) then + id = mype * ninst / numpes + 1 + call mpi_comm_split(comm, id, 0, inst_comm, ierr) + if (ierr /= 0) & + call shr_sys_abort(subname // ' : Error in generating coupler instances') + comm = inst_comm + end if + +end subroutine cime_cpl_init + end module cime_comp_mod diff --git a/src/drivers/mct/main/seq_rest_mod.F90 b/src/drivers/mct/main/seq_rest_mod.F90 index 4c364534b65..c3764128787 100644 --- a/src/drivers/mct/main/seq_rest_mod.F90 +++ b/src/drivers/mct/main/seq_rest_mod.F90 @@ -285,7 +285,7 @@ end subroutine seq_rest_read subroutine seq_rest_write(EClock_d, seq_SyncClock, infodata, & atm, lnd, ice, ocn, rof, glc, wav, esp, & fractions_ax, fractions_lx, fractions_ix, fractions_ox, & - fractions_rx, fractions_gx, fractions_wx) + fractions_rx, fractions_gx, fractions_wx, tag) implicit none @@ -307,6 +307,7 @@ subroutine seq_rest_write(EClock_d, seq_SyncClock, infodata, & type(mct_aVect) , intent(inout) :: fractions_rx(:) ! Fractions on rof grid/decomp type(mct_aVect) , intent(inout) :: fractions_gx(:) ! Fractions on glc grid/decomp type(mct_aVect) , intent(inout) :: fractions_wx(:) ! Fractions on wav grid/decomp + character(len=*), optional, intent(in) :: tag integer(IN) :: n,n1,n2,n3,fk integer(IN) :: curr_ymd ! Current date YYYYMMDD @@ -367,8 +368,13 @@ subroutine seq_rest_write(EClock_d, seq_SyncClock, infodata, & call seq_timemgr_EClockGetData( EClock_d, curr_ymd=curr_ymd, curr_tod=curr_tod) call shr_cal_date2ymd(curr_ymd,yy,mm,dd) - write(rest_file,"(2a,i4.4,a,i2.2,a,i2.2,a,i5.5,a)") & - trim(case_name), '.cpl.r.', yy,'-',mm,'-',dd,'-',curr_tod,'.nc' + if (present(tag)) then + write(rest_file,"(2a,i4.4,a,i2.2,a,i2.2,a,i5.5,a)") & + trim(case_name), '.cpl'//trim(tag)//'.r.',yy,'-',mm,'-',dd,'-',curr_tod,'.nc' + else + write(rest_file,"(2a,i4.4,a,i2.2,a,i2.2,a,i5.5,a)") & + trim(case_name), '.cpl.r.', yy,'-',mm,'-',dd,'-',curr_tod,'.nc' + end if ! Write driver data to restart file diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index 226526adc38..118b1e770bd 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -198,7 +198,7 @@ integer function seq_comm_get_ncomps() end function seq_comm_get_ncomps - subroutine seq_comm_init(Comm_in, nmlfile) + subroutine seq_comm_init(Comm_in, nmlfile, Comm_ID) !---------------------------------------------------------- ! @@ -206,6 +206,7 @@ subroutine seq_comm_init(Comm_in, nmlfile) implicit none integer, intent(in) :: Comm_in character(len=*), intent(IN) :: nmlfile + integer, optional, intent(in) :: Comm_ID ! ! Local variables ! diff --git a/src/drivers/mct/shr/seq_infodata_mod.F90 b/src/drivers/mct/shr/seq_infodata_mod.F90 index d83d29dcf10..0136e3df253 100644 --- a/src/drivers/mct/shr/seq_infodata_mod.F90 +++ b/src/drivers/mct/shr/seq_infodata_mod.F90 @@ -18,6 +18,7 @@ ! !REVISION HISTORY: ! 2005-Nov-11 - E. Kluzek - creation of shr_inputinfo_mod ! 2007-Nov-15 - T. Craig - refactor for ccsm4 system and move to seq_infodata_mod +! 2016-Dec-08 - R. Montuoro - updated for multiple coupler instances ! ! !INTERFACE: ------------------------------------------------------------------ @@ -285,7 +286,7 @@ MODULE seq_infodata_mod ! ! !INTERFACE: ------------------------------------------------------------------ -SUBROUTINE seq_infodata_Init( infodata, nmlfile, ID, pioid) +SUBROUTINE seq_infodata_Init( infodata, nmlfile, ID, pioid, cpl_tag) ! !USES: @@ -302,6 +303,7 @@ SUBROUTINE seq_infodata_Init( infodata, nmlfile, ID, pioid) character(len=*), intent(IN) :: nmlfile ! Name-list filename integer(SHR_KIND_IN), intent(IN) :: ID ! seq_comm ID type(file_desc_T) :: pioid + character(len=*), optional, intent(IN) :: cpl_tag ! cpl instance suffix !EOP !----- local ----- @@ -588,6 +590,20 @@ SUBROUTINE seq_infodata_Init( infodata, nmlfile, ID, pioid) infodata%brnch_retain_casename = brnch_retain_casename infodata%restart_pfile = restart_pfile infodata%restart_file = restart_file + if (present(cpl_tag)) then + if (len(cpl_tag) > 0) then + if (trim(restart_file) /= trim(sp_str)) then + write(logunit,*) trim(subname),' ERROR: restart_file can '//& + 'only be read from restart pointer files when using multiple couplers ' + call shr_sys_abort(subname//' ERROR: invalid settings for restart_file ') + end if + end if + infodata%restart_file = restart_file + infodata%restart_pfile = trim(restart_pfile) // trim(cpl_tag) + else + infodata%restart_pfile = restart_pfile + infodata%restart_file = restart_file + end if infodata%single_column = single_column infodata%scmlat = scmlat infodata%scmlon = scmlon From bf1acfbd7c8395a87053719c6a08cf48a197697a Mon Sep 17 00:00:00 2001 From: Raffaele Montuoro Date: Fri, 12 May 2017 23:03:31 -0600 Subject: [PATCH 02/51] Add support for multiple coupler instances --- scripts/lib/CIME/case.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index 7fdb926b9e3..9d0cdf706f6 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -635,7 +635,6 @@ def _get_component_config_data(self, files): self.clean_up_lookups() - def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): #-------------------------------------------- # pe layout From 427740c716be74e7cb99c218b1d09f72b3051894 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 15 May 2017 15:51:45 -0600 Subject: [PATCH 03/51] pylint and doc fixes --- scripts/create_newcase | 8 +++++--- scripts/lib/CIME/case_run.py | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/create_newcase b/scripts/create_newcase index d663954bb68..3b70a47c2b9 100755 --- a/scripts/create_newcase +++ b/scripts/create_newcase @@ -48,11 +48,13 @@ OR parser.add_argument("--ncouplers",default=1, help="Specify number of coupler instances. " - "Set the number of coupler instances in the case.") + "Set the number of coupler instances in the case. " + "If ncouplers is > 1 then ninst should = 1.") parser.add_argument("--ninst",default=1, - help="Specify number of component instances" - "Set the number of component instances in the case.") + help="Specify number of component instances. " + "Set the number of component instances in the case. " + "If ninst > 1 then ncouplers should = 1") parser.add_argument("--mpilib", "-mpilib", help="Specify the mpilib. " diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 096b4c77135..32543b470b5 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -1,6 +1,6 @@ from CIME.XML.standard_module_setup import * from CIME.case_submit import submit -from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status, append_status +from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status from CIME.check_lockedfiles import check_lockedfiles from CIME.get_timing import get_timing from CIME.provenance import save_prerun_provenance, save_postrun_provenance @@ -153,14 +153,14 @@ def post_run_check(case, lid): rundir = case.get_value("RUNDIR") model = case.get_value("MODEL") cpl_ninst = case.get_value("NINST_CPL") - cpl_logs = [] if cpl_ninst > 1: for inst in range(cpl_ninst): cpl_logs.append(os.path.join(rundir, "cpl_%04d.log." % (inst+1) + lid)) else: cpl_logs = [os.path.join(rundir, "cpl" + ".log." + lid)] - + cpl_logfile = cpl_logs[0] + # find the last model.log and cpl.log model_logfile = os.path.join(rundir, model + ".log." + lid) @@ -176,7 +176,7 @@ def post_run_check(case, lid): with open(cpl_logfile, 'r') as fd: if 'SUCCESSFUL TERMINATION' in fd.read(): count_ok += 1 - if count_ok != cpl_ninst: + if count_ok != cpl_ninst: expect(False, "Model did not complete - see {} \n " .format(cpl_logfile)) ############################################################################### From 45ef6f10881c553b58014e34c86f38e76420bec1 Mon Sep 17 00:00:00 2001 From: Raffaele Montuoro Date: Fri, 12 May 2017 23:03:31 -0600 Subject: [PATCH 04/51] Add support for multiple coupler instances --- scripts/lib/CIME/case.py | 1 + scripts/lib/CIME/case_run.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index 9d0cdf706f6..7fdb926b9e3 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -635,6 +635,7 @@ def _get_component_config_data(self, files): self.clean_up_lookups() + def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): #-------------------------------------------- # pe layout diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 32543b470b5..9a010bff7cb 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -1,6 +1,6 @@ from CIME.XML.standard_module_setup import * from CIME.case_submit import submit -from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status +from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status, append_status from CIME.check_lockedfiles import check_lockedfiles from CIME.get_timing import get_timing from CIME.provenance import save_prerun_provenance, save_postrun_provenance From 01ca9185b322289aaf95734bf79017d24aab5808 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 15 May 2017 15:51:45 -0600 Subject: [PATCH 05/51] pylint and doc fixes --- scripts/lib/CIME/case_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 9a010bff7cb..32543b470b5 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -1,6 +1,6 @@ from CIME.XML.standard_module_setup import * from CIME.case_submit import submit -from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status, append_status +from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status from CIME.check_lockedfiles import check_lockedfiles from CIME.get_timing import get_timing from CIME.provenance import save_prerun_provenance, save_postrun_provenance From 98468665fb3b56e3670df2f195d9ab4c64166841 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 19 May 2017 08:06:52 -0600 Subject: [PATCH 06/51] add support for multiple coupler history --- src/drivers/mct/main/cime_comp_mod.F90 | 7 ++++--- src/drivers/mct/main/seq_hist_mod.F90 | 16 +++++++++------- src/drivers/mct/main/seq_rest_mod.F90 | 11 +++-------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index cf0ff5c5433..aaacbfcf7cf 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -2059,7 +2059,7 @@ subroutine cime_init() call seq_hist_write(infodata, EClock_d, & atm, lnd, ice, ocn, rof, glc, wav, & fractions_ax, fractions_lx, fractions_ix, fractions_ox, & - fractions_rx, fractions_gx, fractions_wx) + fractions_rx, fractions_gx, fractions_wx, trim(cpl_inst_tag)) if (drv_threading) call seq_comm_setnthreads(nthreads_GLOID) call t_adj_detailf(-2) @@ -3618,14 +3618,15 @@ subroutine cime_run() call seq_hist_write(infodata, EClock_d, & atm, lnd, ice, ocn, rof, glc, wav, & fractions_ax, fractions_lx, fractions_ix, fractions_ox, & - fractions_rx, fractions_gx, fractions_wx) + fractions_rx, fractions_gx, fractions_wx, trim(cpl_inst_tag)) if (drv_threading) call seq_comm_setnthreads(nthreads_GLOID) endif if (do_histavg) then call seq_hist_writeavg(infodata, EClock_d, & - atm, lnd, ice, ocn, rof, glc, wav, histavg_alarm) + atm, lnd, ice, ocn, rof, glc, wav, histavg_alarm, & + trim(cpl_inst_tag)) endif if (do_hist_a2x) then diff --git a/src/drivers/mct/main/seq_hist_mod.F90 b/src/drivers/mct/main/seq_hist_mod.F90 index 24c7a56a033..ca9d9c70488 100644 --- a/src/drivers/mct/main/seq_hist_mod.F90 +++ b/src/drivers/mct/main/seq_hist_mod.F90 @@ -130,7 +130,7 @@ module seq_hist_mod subroutine seq_hist_write(infodata, EClock_d, & atm, lnd, ice, ocn, rof, glc, wav, & fractions_ax, fractions_lx, fractions_ix, fractions_ox, fractions_rx, & - fractions_gx, fractions_wx) + fractions_gx, fractions_wx, tag) implicit none ! @@ -151,6 +151,7 @@ subroutine seq_hist_write(infodata, EClock_d, & type(mct_aVect) , intent(inout) :: fractions_rx(:) ! Fractions on rof grid/decomp type(mct_aVect) , intent(inout) :: fractions_gx(:) ! Fractions on glc grid/decomp type(mct_aVect) , intent(inout) :: fractions_wx(:) ! Fractions on wav grid/decomp + character(len=*) , intent(in) :: tag ! ! Local Variables integer(IN) :: curr_ymd ! Current date YYYYMMDD @@ -215,7 +216,7 @@ subroutine seq_hist_write(infodata, EClock_d, & calendar=calendar) call shr_cal_date2ymd(curr_ymd,yy,mm,dd) write(hist_file,"(2a,i4.4,a,i2.2,a,i2.2,a,i5.5,a)") & - trim(case_name), '.cpl.hi.', yy,'-',mm,'-',dd,'-',curr_tod,'.nc' + trim(case_name), '.cpl'//tag//'.hi.', yy,'-',mm,'-',dd,'-',curr_tod,'.nc' time_units = 'days since ' & // seq_io_date2yyyymmdd(start_ymd) // ' ' // seq_io_sec2hms(start_tod) @@ -389,7 +390,7 @@ end subroutine seq_hist_write !=============================================================================== subroutine seq_hist_writeavg(infodata, EClock_d, & - atm, lnd, ice, ocn, rof, glc, wav, write_now) + atm, lnd, ice, ocn, rof, glc, wav, write_now, tag) implicit none @@ -403,6 +404,7 @@ subroutine seq_hist_writeavg(infodata, EClock_d, & type (component_type) , intent(in) :: glc(:) type (component_type) , intent(in) :: wav(:) logical , intent(in) :: write_now ! write or accumulate + character(len=*) , intent(in) :: tag integer(IN) :: curr_ymd ! Current date YYYYMMDD integer(IN) :: curr_tod ! Current time-of-day (s) @@ -764,19 +766,19 @@ subroutine seq_hist_writeavg(infodata, EClock_d, & if (seq_timemgr_histavg_type == seq_timemgr_type_nyear) then call shr_cal_date2ymd(prev_ymd, yy, mm, dd) write(hist_file, "(2a, i4.4, a)") & - trim(case_name), '.cpl.ha.', yy, '.nc' + trim(case_name), '.cpl'//tag//'.ha.', yy, '.nc' elseif (seq_timemgr_histavg_type == seq_timemgr_type_nmonth) then call shr_cal_date2ymd(prev_ymd, yy, mm, dd) write(hist_file, "(2a, i4.4, a, i2.2, a)") & - trim(case_name), '.cpl.ha.', yy, '-', mm, '.nc' + trim(case_name), '.cpl'//tag//'.ha.', yy, '-', mm, '.nc' elseif (seq_timemgr_histavg_type == seq_timemgr_type_nday) then call shr_cal_date2ymd(prev_ymd, yy, mm, dd) write(hist_file, "(2a, i4.4, a, i2.2, a, i2.2, a)") & - trim(case_name), '.cpl.ha.', yy, '-', mm, '-', dd, '.nc' + trim(case_name), '.cpl'//tag//'.ha.', yy, '-', mm, '-', dd, '.nc' else call shr_cal_date2ymd(curr_ymd, yy, mm, dd) write(hist_file, "(2a, i4.4, a, i2.2, a, i2.2, a, i5.5, a)") & - trim(case_name), '.cpl.ha.', yy, '-', mm, '-', dd, '-', curr_tod, '.nc' + trim(case_name), '.cpl'//tag//'.ha.', yy, '-', mm, '-', dd, '-', curr_tod, '.nc' endif time_units = 'days since ' & diff --git a/src/drivers/mct/main/seq_rest_mod.F90 b/src/drivers/mct/main/seq_rest_mod.F90 index c3764128787..50025aa6b63 100644 --- a/src/drivers/mct/main/seq_rest_mod.F90 +++ b/src/drivers/mct/main/seq_rest_mod.F90 @@ -307,7 +307,7 @@ subroutine seq_rest_write(EClock_d, seq_SyncClock, infodata, & type(mct_aVect) , intent(inout) :: fractions_rx(:) ! Fractions on rof grid/decomp type(mct_aVect) , intent(inout) :: fractions_gx(:) ! Fractions on glc grid/decomp type(mct_aVect) , intent(inout) :: fractions_wx(:) ! Fractions on wav grid/decomp - character(len=*), optional, intent(in) :: tag + character(len=*) , intent(in) :: tag integer(IN) :: n,n1,n2,n3,fk integer(IN) :: curr_ymd ! Current date YYYYMMDD @@ -368,13 +368,8 @@ subroutine seq_rest_write(EClock_d, seq_SyncClock, infodata, & call seq_timemgr_EClockGetData( EClock_d, curr_ymd=curr_ymd, curr_tod=curr_tod) call shr_cal_date2ymd(curr_ymd,yy,mm,dd) - if (present(tag)) then - write(rest_file,"(2a,i4.4,a,i2.2,a,i2.2,a,i5.5,a)") & - trim(case_name), '.cpl'//trim(tag)//'.r.',yy,'-',mm,'-',dd,'-',curr_tod,'.nc' - else - write(rest_file,"(2a,i4.4,a,i2.2,a,i2.2,a,i5.5,a)") & - trim(case_name), '.cpl.r.', yy,'-',mm,'-',dd,'-',curr_tod,'.nc' - end if + write(rest_file,"(2a,i4.4,a,i2.2,a,i2.2,a,i5.5,a)") & + trim(case_name), '.cpl'//trim(tag)//'.r.',yy,'-',mm,'-',dd,'-',curr_tod,'.nc' ! Write driver data to restart file From f4ec1781ffbb9ae5edbfd091fc24055d77df8bf8 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 19 May 2017 11:54:54 -0600 Subject: [PATCH 07/51] toward a working ERS_C4 test --- scripts/Tools/check_case | 3 +-- scripts/lib/CIME/XML/entry_id.py | 33 ++++++++++++++++----------- scripts/lib/CIME/build.py | 2 +- scripts/lib/CIME/case_run.py | 2 +- scripts/lib/CIME/case_setup.py | 2 +- scripts/lib/CIME/case_submit.py | 2 +- scripts/lib/CIME/check_lockedfiles.py | 23 +++++++++---------- scripts/lib/CIME/test_scheduler.py | 7 ++++++ 8 files changed, 43 insertions(+), 31 deletions(-) diff --git a/scripts/Tools/check_case b/scripts/Tools/check_case index 47279a2cd4a..07968a75ecb 100755 --- a/scripts/Tools/check_case +++ b/scripts/Tools/check_case @@ -50,9 +50,8 @@ def _main_func(description): parse_command_line(sys.argv, description) - check_lockedfiles() - with Case(read_only=False) as case: + check_lockedfiles(case) create_namelists(case) build_complete = case.get_value("BUILD_COMPLETE") diff --git a/scripts/lib/CIME/XML/entry_id.py b/scripts/lib/CIME/XML/entry_id.py index aa46d4cee79..88ee46143bc 100644 --- a/scripts/lib/CIME/XML/entry_id.py +++ b/scripts/lib/CIME/XML/entry_id.py @@ -121,7 +121,7 @@ def _get_value_match(self, node, attributes=None, exact_match=False): max_score = score mnode = node else: - expect(False, + expect(False, "match attribute can only have a value of 'last' or 'first', value is %s" %match_type) return mnode.text @@ -400,20 +400,27 @@ def compare_xml(self, other): if f1val is not None: f2val = other.get_value(vid, resolved=False) if f1val != f2val: + logger.info("HERE %s %s "%(f1val, f2val)) xmldiffs[vid] = [f1val, f2val] else: - f1val = ET.tostring(node, method="text") - f2val = ET.tostring(f2match, method="text") - if f2val != f1val: - f1value_nodes = self.get_nodes("value", root=node) - for valnode in f1value_nodes: - f2valnodes = other.get_nodes("value", root=f2match, attributes=valnode.attrib) - for f2valnode in f2valnodes: - if valnode.attrib is None and f2valnode.attrib is None or \ - f2valnode.attrib == valnode.attrib: - if other.get_resolved_value(f2valnode.text) != self.get_resolved_value(valnode.text): - xmldiffs["{}:{}".format(vid, valnode.attrib)] = [valnode.text, f2valnode.text] - + for comp in self.get_values("COMP_CLASSES"): + f1val = self.get_value("{}_{}".format(vid,comp), resolved=False) + if f1val is not None: + f2val = other.get_value("{}_{}".format(vid,comp), resolved=False) + if f1val != f2val: + xmldiffs[vid] = [f1val, f2val] + else: + f1val = ET.tostring(node, method="text") + f2val = ET.tostring(f2match, method="text") + if f2val != f1val: + f1value_nodes = self.get_nodes("value", root=node) + for valnode in f1value_nodes: + f2valnodes = other.get_nodes("value", root=f2match, attributes=valnode.attrib) + for f2valnode in f2valnodes: + if valnode.attrib is None and f2valnode.attrib is None or \ + f2valnode.attrib == valnode.attrib: + if other.get_resolved_value(f2valnode.text) != self.get_resolved_value(valnode.text): + xmldiffs["{}:{}".format(vid, valnode.attrib)] = [valnode.text, f2valnode.text] return xmldiffs def __iter__(self): diff --git a/scripts/lib/CIME/build.py b/scripts/lib/CIME/build.py index eec9593bf85..d472d37326c 100644 --- a/scripts/lib/CIME/build.py +++ b/scripts/lib/CIME/build.py @@ -382,7 +382,7 @@ def _case_build_impl(caseroot, case, sharedlib_only, model_only, buildlist): comp_classes = case.get_values("COMP_CLASSES") - check_lockedfiles(caseroot) + check_lockedfiles(case) # Retrieve relevant case data # This environment variable gets set for cesm Make and diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 32543b470b5..95e8eb294d0 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -28,7 +28,7 @@ def pre_run_check(case, lid, skip_pnl=False): shutil.copy(env_mach_pes,"{}.{}".format(env_mach_pes, lid)) # check for locked files. - check_lockedfiles(case.get_value("CASEROOT")) + check_lockedfiles(case) logger.debug("check_lockedfiles OK") # check that build is done diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index e4aebfdf924..a31db04dae5 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -146,7 +146,7 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): unlock_file("env_build.xml") case.flush() - check_lockedfiles() + check_lockedfiles(case) env_mach_pes = case.get_env("mach_pes") pestot = env_mach_pes.get_total_tasks(models) logger.debug("at update TOTALPES = {}".format(pestot)) diff --git a/scripts/lib/CIME/case_submit.py b/scripts/lib/CIME/case_submit.py index 675a56ad0cc..915239eaa7f 100644 --- a/scripts/lib/CIME/case_submit.py +++ b/scripts/lib/CIME/case_submit.py @@ -108,7 +108,7 @@ def submit(case, job=None, resubmit=False, no_batch=False, skip_pnl=False, raise def check_case(case, caseroot): - check_lockedfiles(caseroot) + check_lockedfiles(case) create_namelists(case) # Must be called before check_all_input_data logger.info("Checking that inputdata is available as part of case submission") check_all_input_data(case) diff --git a/scripts/lib/CIME/check_lockedfiles.py b/scripts/lib/CIME/check_lockedfiles.py index dffb588bad2..f2bc049385d 100644 --- a/scripts/lib/CIME/check_lockedfiles.py +++ b/scripts/lib/CIME/check_lockedfiles.py @@ -77,13 +77,13 @@ def check_pelayouts_require_rebuild(case, models): unlock_file("env_mach_pes.xml", case.get_value("CASEROOT")) -def check_lockedfiles(caseroot=None): +def check_lockedfiles(case): """ Check that all lockedfiles match what's in case If caseroot is not specified, it is set to the current working directory """ - caseroot = os.getcwd() if caseroot is None else caseroot + caseroot = case.get_value("CASEROOT") lockedfiles = glob.glob(os.path.join(caseroot, "LockedFiles", "*.xml")) for lfile in lockedfiles: fpart = os.path.basename(lfile) @@ -91,19 +91,20 @@ def check_lockedfiles(caseroot=None): if fpart.count('.') > 1: continue cfile = os.path.join(caseroot, fpart) + components = case.get_values("COMP_CLASSES") if os.path.isfile(cfile): objname = fpart.split('.')[0] if objname == "env_build": - f1obj = EnvBuild(caseroot, cfile) + f1obj = case.get_env('build') f2obj = EnvBuild(caseroot, lfile) elif objname == "env_mach_pes": - f1obj = EnvMachPes(caseroot, cfile) - f2obj = EnvMachPes(caseroot, lfile) + f1obj = case.get_env('mach_pes') + f2obj = EnvMachPes(caseroot, lfile, components=components) elif objname == "env_case": - f1obj = EnvCase(caseroot, cfile) + f1obj = case.get_env('case') f2obj = EnvCase(caseroot, lfile) elif objname == "env_batch": - f1obj = EnvBatch(caseroot, cfile) + f1obj = case.get_env('batch') f2obj = EnvBatch(caseroot, lfile) else: logging.warn("Locked XML file '{}' is not current being handled".format(fpart)) @@ -122,15 +123,13 @@ def check_lockedfiles(caseroot=None): " recover the original copy from LockedFiles") elif objname == "env_build": logging.warn("Setting build complete to False") - f1obj.set_value("BUILD_COMPLETE", False) + case.set_value("BUILD_COMPLETE", False) if "PIO_VERSION" in diffs.keys(): - f1obj.set_value("BUILD_STATUS", 2) - f1obj.write() + case.set_value("BUILD_STATUS", 2) logging.critical("Changing PIO_VERSION requires running " "case.build --clean-all and rebuilding") else: - f1obj.set_value("BUILD_STATUS", 1) - f1obj.write() + case.set_value("BUILD_STATUS", 1) elif objname == "env_batch": expect(False, "Batch configuration has changed, please run case.setup --reset") else: diff --git a/scripts/lib/CIME/test_scheduler.py b/scripts/lib/CIME/test_scheduler.py index f8494068a85..8f8fded042e 100644 --- a/scripts/lib/CIME/test_scheduler.py +++ b/scripts/lib/CIME/test_scheduler.py @@ -405,6 +405,10 @@ def _create_newcase_phase(self, test): ninst = case_opt[1:] create_newcase_cmd += " --ninst {}".format(ninst) logger.debug (" NINST set to {}".format(ninst)) + if case_opt.startswith('C'): + ncpl = case_opt[1:] + create_newcase_cmd += " --ncouplers {}" .format(ncpl) + logger.debug (" NCPL set to {}" .format(ncpl)) if case_opt.startswith('P'): pesize = case_opt[1:] create_newcase_cmd += " --pecount {}".format(pesize) @@ -524,6 +528,9 @@ def _xml_phase(self, test): elif opt.startswith('N'): # handled in create_newcase continue + elif opt.startswith('C'): + # handled in create_newcase + continue elif opt.startswith('IOP'): logger.warn("IOP test option not yet implemented") else: From b78ecb72ff35ac5a33c1dbd7a4b5889e2fb913f3 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 19 May 2017 15:51:54 -0600 Subject: [PATCH 08/51] ERS_C4.f19_g16_rx1.A is working --- .../CIME/SystemTests/system_tests_common.py | 153 ++++++++++-------- 1 file changed, 90 insertions(+), 63 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/system_tests_common.py b/scripts/lib/CIME/SystemTests/system_tests_common.py index 6a6b07c5b4b..5af8ec7df39 100644 --- a/scripts/lib/CIME/SystemTests/system_tests_common.py +++ b/scripts/lib/CIME/SystemTests/system_tests_common.py @@ -234,15 +234,18 @@ def run_indv(self, suffix="base", st_archive=False): case_st_archive(self._case) def _coupler_log_indicates_run_complete(self): - newestcpllogfile = self._case.get_latest_cpl_log() - logger.debug("Latest Coupler log file is {}".format(newestcpllogfile)) + newestcpllogfiles = self._get_latest_cpl_logs() + logger.debug("Latest Coupler log file(s) {}" .format(newestcpllogfiles)) # Exception is raised if the file is not compressed - try: - if "SUCCESSFUL TERMINATION" in gzip.open(newestcpllogfile, 'rb').read(): - return True - except: - logger.info("{} is not compressed, assuming run failed".format(newestcpllogfile)) - return False + allgood = len(newestcpllogfiles) + for cpllog in newestcpllogfiles: + try: + if "SUCCESSFUL TERMINATION" in gzip.open(cpllog, 'rb').read(): + allgood = allgood - 1 + except: + logger.info("{} is not compressed, assuming run failed".format(cpllog)) + + return allgood==0 def _component_compare_copy(self, suffix): comments = copy(self._case, suffix) @@ -304,33 +307,33 @@ def _check_for_memleak(self): Examine memory usage as recorded in the cpl log file and look for unexpected increases. """ - cpllog = self._case.get_latest_cpl_log() - - memlist = self._get_mem_usage(cpllog) + latestcpllogs = self._get_latest_cpl_logs() + for cpllog in latestcpllogs: + memlist = self._get_mem_usage(cpllog) - with self._test_status: - if len(memlist)<3: - self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS, comments="insuffiencient data for memleak test") - else: - finaldate = int(memlist[-1][0]) - originaldate = int(memlist[0][0]) - finalmem = float(memlist[-1][1]) - originalmem = float(memlist[0][1]) - memdiff = -1 - if originalmem > 0: - memdiff = (finalmem - originalmem)/originalmem - tolerance = self._case.get_value("TEST_MEMLEAK_TOLERANCE") - if tolerance is None: - tolerance = 0.1 - expect(tolerance > 0.0, "Bad value for memleak tolerance in test") - if memdiff < 0: + with self._test_status: + if len(memlist)<3: self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS, comments="insuffiencient data for memleak test") - elif memdiff < tolerance: - self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS) else: - comment = "memleak detected, memory went from {:f} to {:f} in {:d} days".format(originalmem, finalmem, finaldate-originaldate) - append_testlog(comment) - self._test_status.set_status(MEMLEAK_PHASE, TEST_FAIL_STATUS, comments=comment) + finaldate = int(memlist[-1][0]) + originaldate = int(memlist[0][0]) + finalmem = float(memlist[-1][1]) + originalmem = float(memlist[0][1]) + memdiff = -1 + if originalmem > 0: + memdiff = (finalmem - originalmem)/originalmem + tolerance = self._case.get_value("TEST_MEMLEAK_TOLERANCE") + if tolerance is None: + tolerance = 0.1 + expect(tolerance > 0.0, "Bad value for memleak tolerance in test") + if memdiff < 0: + self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS, comments="insuffiencient data for memleak test") + elif memdiff < tolerance: + self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS) + else: + comment = "memleak detected, memory went from {:f} to {:f} in {:d} days".format(originalmem, finalmem, finaldate-originaldate) + append_testlog(comment) + self._test_status.set_status(MEMLEAK_PHASE, TEST_FAIL_STATUS, comments=comment) def compare_env_run(self, expected=None): """ @@ -349,6 +352,25 @@ def compare_env_run(self, expected=None): return False return True + def _get_latest_cpl_logs(self): + """ + find and return the latest cpl log file in the run directory + """ + coupler_log_path = self._case.get_value("RUNDIR") + cpllogs = glob.glob(os.path.join(coupler_log_path, 'cpl*.log.*')) + lastcpllogs = [] + if cpllogs: + lastcpllogs.append(max(cpllogs, key=os.path.getctime)) + basename = os.path.basename(lastcpllogs[0]) + suffix = basename.split('.',1)[1] + for log in cpllogs: + if log in lastcpllogs: + continue + if log.endswith(suffix): + lastcpllogs.append(log) + + return lastcpllogs + def _compare_baseline(self): """ compare the current test output to a baseline result @@ -364,42 +386,37 @@ def _compare_baseline(self): basecmp_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), baseline_name) # compare memory usage to baseline - newestcpllogfile = self._case.get_latest_cpl_log() - memlist = self._get_mem_usage(newestcpllogfile) - baselog = os.path.join(basecmp_dir, "cpl.log.gz") - if not os.path.isfile(baselog): - # for backward compatibility - baselog = os.path.join(basecmp_dir, "cpl.log") - - if os.path.isfile(baselog) and len(memlist) > 3: - blmem = self._get_mem_usage(baselog) - blmem = 0 if blmem == [] else blmem[-1][1] - curmem = memlist[-1][1] - if blmem != 0: - diff = (curmem - blmem) / blmem - if diff < 0.1: + newestcpllogfiles = self._get_latest_cpl_logs() + memlist = self._get_mem_usage(newestcpllogfiles[0]) + for cpllog in newestcpllogfiles: + m = re.search(r"(cpl.*.log).*.gz",cpllog) + if m is not None: + baselog = os.path.join(basecmp_dir, m.group(1))+".gz" + if baselog is None or not os.path.isfile(baselog): + # for backward compatibility + baselog = os.path.join(basecmp_dir, "cpl.log") + if os.path.isfile(baselog) and len(memlist) > 3: + blmem = self._get_mem_usage(baselog)[-1][1] + curmem = memlist[-1][1] + diff = (curmem-blmem)/blmem + if(diff < 0.1): self._test_status.set_status(MEMCOMP_PHASE, TEST_PASS_STATUS) else: comment = "Error: Memory usage increase > 10% from baseline" self._test_status.set_status(MEMCOMP_PHASE, TEST_FAIL_STATUS, comments=comment) append_testlog(comment) - else: - comment = "Error: Could not determine baseline memory usage" - self._test_status.set_status(MEMCOMP_PHASE, TEST_FAIL_STATUS, comments=comment) - append_testlog(comment) - - # compare throughput to baseline - current = self._get_throughput(newestcpllogfile) - baseline = self._get_throughput(baselog) - #comparing ypd so bigger is better - if baseline is not None and current is not None: - diff = (baseline - current)/baseline - if(diff < 0.25): - self._test_status.set_status(THROUGHPUT_PHASE, TEST_PASS_STATUS) - else: - comment = "Error: Computation time increase > 25% from baseline" - self._test_status.set_status(THROUGHPUT_PHASE, TEST_FAIL_STATUS, comments=comment) - append_testlog(comment) + # compare throughput to baseline + current = self._get_throughput(cpllog) + baseline = self._get_throughput(baselog) + #comparing ypd so bigger is better + if baseline is not None and current is not None: + diff = (baseline - current)/baseline + if(diff < 0.25): + self._test_status.set_status(THROUGHPUT_PHASE, TEST_PASS_STATUS) + else: + comment = "Error: Computation time increase > 25% from baseline" + self._test_status.set_status(THROUGHPUT_PHASE, TEST_FAIL_STATUS, comments=comment) + append_testlog(comment) def _generate_baseline(self): """ @@ -412,6 +429,16 @@ def _generate_baseline(self): status = TEST_PASS_STATUS if success else TEST_FAIL_STATUS baseline_name = self._case.get_value("BASEGEN_CASE") self._test_status.set_status("{}".format(GENERATE_PHASE), status, comments=os.path.dirname(baseline_name)) + basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), self._case.get_value("BASEGEN_CASE")) + # copy latest cpl log to baseline + # drop the date so that the name is generic + newestcpllogfiles = self._get_latest_cpl_logs() + for cpllog in newestcpllogfiles: + m = re.search(r"(cpl.*.log).*.gz",cpllog) + if m is not None: + baselog = os.path.join(basegen_dir, m.group(1))+".gz" + shutil.copyfile(cpllog, + os.path.join(basegen_dir,baselog)) class FakeTest(SystemTestsCommon): """ From 0cfd7caadaf473c54cf257f1bca450d50c3c7e9f Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 9 Jun 2017 16:40:23 -0600 Subject: [PATCH 09/51] fix issues in rebase --- scripts/lib/CIME/XML/entry_id.py | 10 +++++----- scripts/lib/CIME/XML/env_mach_pes.py | 2 ++ scripts/lib/CIME/case_submit.py | 5 ++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/scripts/lib/CIME/XML/entry_id.py b/scripts/lib/CIME/XML/entry_id.py index 88ee46143bc..35d386f1c79 100644 --- a/scripts/lib/CIME/XML/entry_id.py +++ b/scripts/lib/CIME/XML/entry_id.py @@ -416,11 +416,11 @@ def compare_xml(self, other): f1value_nodes = self.get_nodes("value", root=node) for valnode in f1value_nodes: f2valnodes = other.get_nodes("value", root=f2match, attributes=valnode.attrib) - for f2valnode in f2valnodes: - if valnode.attrib is None and f2valnode.attrib is None or \ - f2valnode.attrib == valnode.attrib: - if other.get_resolved_value(f2valnode.text) != self.get_resolved_value(valnode.text): - xmldiffs["{}:{}".format(vid, valnode.attrib)] = [valnode.text, f2valnode.text] + for f2valnode in f2valnodes: + if valnode.attrib is None and f2valnode.attrib is None or \ + f2valnode.attrib == valnode.attrib: + if other.get_resolved_value(f2valnode.text) != self.get_resolved_value(valnode.text): + xmldiffs["{}:{}".format(vid, valnode.attrib)] = [valnode.text, f2valnode.text] return xmldiffs def __iter__(self): diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index f69d73a76f8..ed627f87a15 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -63,6 +63,8 @@ def get_total_tasks(self, comp_classes): pstrid = self.get_value("PSTRID", attribute={"component":comp}) tt = rootpe + (ntasks - 1) * pstrid + 1 total_tasks = max(tt, total_tasks) + import pdb + pdb.set_trace() total_tasks *= self.get_value("NINST_CPL") return total_tasks diff --git a/scripts/lib/CIME/case_submit.py b/scripts/lib/CIME/case_submit.py index 915239eaa7f..778ccf391ec 100644 --- a/scripts/lib/CIME/case_submit.py +++ b/scripts/lib/CIME/case_submit.py @@ -18,7 +18,6 @@ def _submit(case, job=None, resubmit=False, no_batch=False, skip_pnl=False, mail_user=None, mail_type='never', batch_args=None): caseroot = case.get_value("CASEROOT") - if job is None: if case.get_value("TEST"): job = "case.test" @@ -33,7 +32,7 @@ def _submit(case, job=None, resubmit=False, no_batch=False, skip_pnl=False, case.set_value("CONTINUE_RUN", True) else: if job in ("case.test","case.run"): - check_case(case, caseroot) + check_case(case) check_DA_settings(case) if case.get_value("MACH") == "mira": with open(".original_host", "w") as fd: @@ -107,7 +106,7 @@ def submit(case, job=None, resubmit=False, no_batch=False, skip_pnl=False, raise -def check_case(case, caseroot): +def check_case(case): check_lockedfiles(case) create_namelists(case) # Must be called before check_all_input_data logger.info("Checking that inputdata is available as part of case submission") From 65026bae0099d7edb13fed84f8d60a69d4b3a7b2 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 16 Jun 2017 13:36:57 -0600 Subject: [PATCH 10/51] add mcc test compare multi coupler to single instance run --- config/config_tests.xml | 18 ++++++++++ scripts/lib/CIME/SystemTests/mcc.py | 34 +++++++++++++++++++ scripts/lib/CIME/XML/env_mach_pes.py | 2 -- scripts/lib/update_acme_tests.py | 3 +- .../mct/cime_config/testdefs/testlist_drv.xml | 9 +++++ 5 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 scripts/lib/CIME/SystemTests/mcc.py diff --git a/config/config_tests.xml b/config/config_tests.xml index a24577b1ef0..bba453bc98a 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -517,6 +517,24 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu FALSE + + multi-instance validation vs multi-coupler (default length) + 1 + FALSE + FALSE + none + $STOP_OPTION + $STOP_N + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + multi-instance validation vs single instance (default length) 1 diff --git a/scripts/lib/CIME/SystemTests/mcc.py b/scripts/lib/CIME/SystemTests/mcc.py new file mode 100644 index 00000000000..872fb1eb09c --- /dev/null +++ b/scripts/lib/CIME/SystemTests/mcc.py @@ -0,0 +1,34 @@ +""" +Implemetation of CIME MCC test: Compares ensemble methods + +This does two runs: In the first we run a three member ensemble using the +original multi component single coupler method and in the second we use +the new multi coupler method. We then compare results with the expectation that they are bfb +""" +from CIME.XML.standard_module_setup import * +from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo +from CIME.case_setup import case_setup + +logger = logging.getLogger(__name__) + + +class MCC(SystemTestsCompareTwo): + + def __init__(self, case): + self._comp_classes = [] + self._test_instances = 3 + SystemTestsCompareTwo.__init__(self, case, + separate_builds = False, + run_two_suffix = 'multicoupler', + run_one_description = 'single instance', + run_two_description = 'multi coupler') + + def _case_one_setup(self): + # The multicoupler case will increase the number of tasks by the + # number of requested couplers. + self._case.set_value("NINST_CPL", self._test_instances) + case_setup(self._case, test_mode=False, reset=True) + + def _case_two_setup(self): + self._case.set_value("NINST_CPL", 1) + case_setup(self._case, test_mode=True, reset=True) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index ed627f87a15..f69d73a76f8 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -63,8 +63,6 @@ def get_total_tasks(self, comp_classes): pstrid = self.get_value("PSTRID", attribute={"component":comp}) tt = rootpe + (ntasks - 1) * pstrid + 1 total_tasks = max(tt, total_tasks) - import pdb - pdb.set_trace() total_tasks *= self.get_value("NINST_CPL") return total_tasks diff --git a/scripts/lib/update_acme_tests.py b/scripts/lib/update_acme_tests.py index bc1671ffe81..115b87dfcf5 100644 --- a/scripts/lib/update_acme_tests.py +++ b/scripts/lib/update_acme_tests.py @@ -51,7 +51,8 @@ "PET_P32.f19_f19.A", "SMS.T42_T42.S", "PRE.f19_f19.ADESP", - "PRE.f19_f19.ADESP_TEST") + "PRE.f19_f19.ADESP_TEST", + "MCC_P12.f19_g16_rx1.A") ), # diff --git a/src/drivers/mct/cime_config/testdefs/testlist_drv.xml b/src/drivers/mct/cime_config/testdefs/testlist_drv.xml index d3a1d3eb8df..7cd1cb792ab 100644 --- a/src/drivers/mct/cime_config/testdefs/testlist_drv.xml +++ b/src/drivers/mct/cime_config/testdefs/testlist_drv.xml @@ -23,6 +23,15 @@ + + + + + + + + + From 7aa8688e728ed76fbc4ab9aba6260bd1b0c43e20 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 16 Jun 2017 13:37:57 -0600 Subject: [PATCH 11/51] update test description --- config/config_tests.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config_tests.xml b/config/config_tests.xml index bba453bc98a..bcd33a90285 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -518,7 +518,7 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu - multi-instance validation vs multi-coupler (default length) + multi-coupler validation vs single-instance (default length) 1 FALSE FALSE From fa070e140dd55f0d4ab6bfff9da12e3ca7ddd921 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 19 Jun 2017 08:11:47 -0600 Subject: [PATCH 12/51] fix interface, remove debug print --- scripts/Tools/check_lockedfiles | 4 +++- scripts/lib/CIME/XML/entry_id.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/Tools/check_lockedfiles b/scripts/Tools/check_lockedfiles index 50cf745f439..3864f7d9cb8 100755 --- a/scripts/Tools/check_lockedfiles +++ b/scripts/Tools/check_lockedfiles @@ -5,6 +5,7 @@ This script compares xml files from standard_script_setup import * from CIME.check_lockedfiles import check_lockedfiles +from CIME.case import Case def parse_command_line(args, description): parser = argparse.ArgumentParser( @@ -40,7 +41,8 @@ def _main_func(description): caseroot = parse_command_line(sys.argv, description) - check_lockedfiles(caseroot) + with Case(read_only=True) as case: + check_lockedfiles(case) if __name__ == "__main__": _main_func(__doc__) diff --git a/scripts/lib/CIME/XML/entry_id.py b/scripts/lib/CIME/XML/entry_id.py index 35d386f1c79..94b2aca8423 100644 --- a/scripts/lib/CIME/XML/entry_id.py +++ b/scripts/lib/CIME/XML/entry_id.py @@ -400,7 +400,6 @@ def compare_xml(self, other): if f1val is not None: f2val = other.get_value(vid, resolved=False) if f1val != f2val: - logger.info("HERE %s %s "%(f1val, f2val)) xmldiffs[vid] = [f1val, f2val] else: for comp in self.get_values("COMP_CLASSES"): From 4a443f30bb5dfb9979314475e94887c5528b7d2c Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 19 Jun 2017 10:12:27 -0600 Subject: [PATCH 13/51] change names in response to code review --- .../cime_config/namelist_definition_drv.xml | 8 +++--- src/drivers/mct/main/cime_comp_mod.F90 | 28 +++++++++---------- src/drivers/mct/main/seq_hist_mod.F90 | 18 ++++++------ src/drivers/mct/shr/seq_comm_mct.F90 | 7 +++-- 4 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/drivers/mct/cime_config/namelist_definition_drv.xml b/src/drivers/mct/cime_config/namelist_definition_drv.xml index 81c8efb6068..78237b864cf 100644 --- a/src/drivers/mct/cime_config/namelist_definition_drv.xml +++ b/src/drivers/mct/cime_config/namelist_definition_drv.xml @@ -41,13 +41,13 @@ --> - + - + integer - cesm_cpl - cesm_cpl + cime_cpl_inst + cime_cpl_inst Number of CESM coupler instances. diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index aaacbfcf7cf..d23eb310060 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -608,7 +608,7 @@ subroutine cime_pre_init1() time_brun = mpi_wtime() !--- Initialize multiple coupler instances, if requested --- - call cesm_cpl_init(Global_Comm, num_inst_cpl, cpl_id) + call cime_cpl_init(Global_Comm, num_inst_cpl, cpl_id) call shr_pio_init1(num_inst_total,NLFileName, Global_Comm) ! @@ -618,7 +618,7 @@ subroutine cime_pre_init1() ! if (Global_Comm /= MPI_COMM_NULL) then if (num_inst_cpl > 1) then - call seq_comm_init(Global_Comm, NLFileName, Comm_ID=cpl_id) + call seq_comm_init(Global_Comm, NLFileName, cpl_comm_ID=cpl_id) write(cpl_inst_tag,'("_",i4.4)') cpl_id else call seq_comm_init(Global_Comm, NLFileName) @@ -4066,7 +4066,7 @@ subroutine cime_comp_barriers(mpicom, timer) endif end subroutine cime_comp_barriers -subroutine cime_cpl_init(comm, ninst, id) +subroutine cime_cpl_init(comm, num_inst_cpl, id) !----------------------------------------------------------------------- ! @@ -4077,43 +4077,43 @@ subroutine cime_cpl_init(comm, ninst, id) implicit none integer , intent(inout) :: comm - integer , intent(out) :: ninst + integer , intent(out) :: num_inst_cpl integer , intent(out) :: id ! instance ID, starts from 1 ! ! Local variables ! integer :: ierr, inst_comm, mype, nu, numpes !, pes - integer :: cpl_ninst + integer :: ninst_cpl - namelist /cime_cpl/ cpl_ninst + namelist /cime_cpl_inst/ ninst_cpl call shr_mpi_commrank(comm, mype , ' cime_cpl_init') call shr_mpi_commsize(comm, numpes, ' cime_cpl_init') - ninst = 1 + num_inst_cpl = 1 id = 0 if (mype == 0) then ! Read coupler namelist if it exists - cpl_ninst = 1 + ninst_cpl = 1 nu = shr_file_getUnit() open(unit = nu, file = NLFileName, status = 'old', iostat = ierr) rewind(unit = nu) - read(unit = nu, nml = cime_cpl, iostat = ierr) + read(unit = nu, nml = cime_cpl_inst, iostat = ierr) close(unit = nu) call shr_file_freeUnit(nu) - ninst = max(cpl_ninst, 1) + num_inst_cpl = max(ninst_cpl, 1) end if - call shr_mpi_bcast(ninst, comm, 'cpl_ninst') + call shr_mpi_bcast(num_inst_cpl, comm, 'ninst_cpl') - if (mod(numpes, ninst) /= 0) then + if (mod(numpes, num_inst_cpl) /= 0) then call shr_sys_abort(subname // & ' : Total PE number must be a multiple of coupler instance number') end if - if (ninst > 1) then - id = mype * ninst / numpes + 1 + if (num_inst_cpl > 1) then + id = mype * num_inst_cpl / numpes + 1 call mpi_comm_split(comm, id, 0, inst_comm, ierr) if (ierr /= 0) & call shr_sys_abort(subname // ' : Error in generating coupler instances') diff --git a/src/drivers/mct/main/seq_hist_mod.F90 b/src/drivers/mct/main/seq_hist_mod.F90 index ca9d9c70488..e5fe1143d99 100644 --- a/src/drivers/mct/main/seq_hist_mod.F90 +++ b/src/drivers/mct/main/seq_hist_mod.F90 @@ -130,7 +130,7 @@ module seq_hist_mod subroutine seq_hist_write(infodata, EClock_d, & atm, lnd, ice, ocn, rof, glc, wav, & fractions_ax, fractions_lx, fractions_ix, fractions_ox, fractions_rx, & - fractions_gx, fractions_wx, tag) + fractions_gx, fractions_wx, cpl_inst_tag) implicit none ! @@ -151,7 +151,7 @@ subroutine seq_hist_write(infodata, EClock_d, & type(mct_aVect) , intent(inout) :: fractions_rx(:) ! Fractions on rof grid/decomp type(mct_aVect) , intent(inout) :: fractions_gx(:) ! Fractions on glc grid/decomp type(mct_aVect) , intent(inout) :: fractions_wx(:) ! Fractions on wav grid/decomp - character(len=*) , intent(in) :: tag + character(len=*) , intent(in) :: cpl_inst_tag ! ! Local Variables integer(IN) :: curr_ymd ! Current date YYYYMMDD @@ -216,7 +216,7 @@ subroutine seq_hist_write(infodata, EClock_d, & calendar=calendar) call shr_cal_date2ymd(curr_ymd,yy,mm,dd) write(hist_file,"(2a,i4.4,a,i2.2,a,i2.2,a,i5.5,a)") & - trim(case_name), '.cpl'//tag//'.hi.', yy,'-',mm,'-',dd,'-',curr_tod,'.nc' + trim(case_name), '.cpl'//cpl_inst_tag//'.hi.', yy,'-',mm,'-',dd,'-',curr_tod,'.nc' time_units = 'days since ' & // seq_io_date2yyyymmdd(start_ymd) // ' ' // seq_io_sec2hms(start_tod) @@ -390,7 +390,7 @@ end subroutine seq_hist_write !=============================================================================== subroutine seq_hist_writeavg(infodata, EClock_d, & - atm, lnd, ice, ocn, rof, glc, wav, write_now, tag) + atm, lnd, ice, ocn, rof, glc, wav, write_now, cpl_inst_tag) implicit none @@ -404,7 +404,7 @@ subroutine seq_hist_writeavg(infodata, EClock_d, & type (component_type) , intent(in) :: glc(:) type (component_type) , intent(in) :: wav(:) logical , intent(in) :: write_now ! write or accumulate - character(len=*) , intent(in) :: tag + character(len=*) , intent(in) :: cpl_inst_tag integer(IN) :: curr_ymd ! Current date YYYYMMDD integer(IN) :: curr_tod ! Current time-of-day (s) @@ -766,19 +766,19 @@ subroutine seq_hist_writeavg(infodata, EClock_d, & if (seq_timemgr_histavg_type == seq_timemgr_type_nyear) then call shr_cal_date2ymd(prev_ymd, yy, mm, dd) write(hist_file, "(2a, i4.4, a)") & - trim(case_name), '.cpl'//tag//'.ha.', yy, '.nc' + trim(case_name), '.cpl'//cpl_inst_tag//'.ha.', yy, '.nc' elseif (seq_timemgr_histavg_type == seq_timemgr_type_nmonth) then call shr_cal_date2ymd(prev_ymd, yy, mm, dd) write(hist_file, "(2a, i4.4, a, i2.2, a)") & - trim(case_name), '.cpl'//tag//'.ha.', yy, '-', mm, '.nc' + trim(case_name), '.cpl'//cpl_inst_tag//'.ha.', yy, '-', mm, '.nc' elseif (seq_timemgr_histavg_type == seq_timemgr_type_nday) then call shr_cal_date2ymd(prev_ymd, yy, mm, dd) write(hist_file, "(2a, i4.4, a, i2.2, a, i2.2, a)") & - trim(case_name), '.cpl'//tag//'.ha.', yy, '-', mm, '-', dd, '.nc' + trim(case_name), '.cpl'//cpl_inst_tag//'.ha.', yy, '-', mm, '-', dd, '.nc' else call shr_cal_date2ymd(curr_ymd, yy, mm, dd) write(hist_file, "(2a, i4.4, a, i2.2, a, i2.2, a, i5.5, a)") & - trim(case_name), '.cpl'//tag//'.ha.', yy, '-', mm, '-', dd, '-', curr_tod, '.nc' + trim(case_name), '.cpl'//cpl_inst_tag//'.ha.', yy, '-', mm, '-', dd, '-', curr_tod, '.nc' endif time_units = 'days since ' & diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index 118b1e770bd..ecd8dd3bdb6 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -198,15 +198,16 @@ integer function seq_comm_get_ncomps() end function seq_comm_get_ncomps - subroutine seq_comm_init(Comm_in, nmlfile, Comm_ID) - + subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) !---------------------------------------------------------- ! ! Arguments implicit none integer, intent(in) :: Comm_in character(len=*), intent(IN) :: nmlfile - integer, optional, intent(in) :: Comm_ID + ! Optional argument cpl_comm_id is used to identify the particular + ! coupler instance used by each component instance in a multi-coupler case. + integer, optional, intent(in) :: Cpl_comm_id ! ! Local variables ! From 0e717c7e717fcf02e0b6a36b326e051e9824ea56 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 21 Jun 2017 10:05:27 -0600 Subject: [PATCH 14/51] remove --ncouplers replace with --ninst-coupler --- scripts/create_newcase | 29 ++++++++++++++++------------- scripts/lib/CIME/test_scheduler.py | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/scripts/create_newcase b/scripts/create_newcase index 3b70a47c2b9..32719fa568f 100755 --- a/scripts/create_newcase +++ b/scripts/create_newcase @@ -46,15 +46,14 @@ OR help="Specify a compiler. " "To see list of supported compilers for each machine, use the utility query_config in this directory") - parser.add_argument("--ncouplers",default=1, - help="Specify number of coupler instances. " - "Set the number of coupler instances in the case. " - "If ncouplers is > 1 then ninst should = 1.") + parser.add_argument("--ninst-couplers",action="store_true", + help="Specify that ninst should modify number of coupler instances " + "default is to have one coupler supporting multiple component instances.") parser.add_argument("--ninst",default=1, - help="Specify number of component instances. " - "Set the number of component instances in the case. " - "If ninst > 1 then ncouplers should = 1") + help="Specify number of model ensemble instances. " + "Default is multiple components and one coupler. Use --ninst-couplers to " + "run multiple couplers in the ensemble.") parser.add_argument("--mpilib", "-mpilib", help="Specify the mpilib. " @@ -148,9 +147,6 @@ OR expect(args.gridfile is not None, "User grid specification file must be set if the user grid is requested") - expect(not (int(args.ncouplers) > 1 and int(args.ninst) > 1), - "Only one component instance per coupler is allowed when using multiple couplers") - run_unsupported = False if model == "cesm": run_unsupported = args.run_unsupported @@ -161,12 +157,19 @@ OR if args.input_dir is not None: args.input_dir = os.path.abspath(args.input_dir) + if args.ninst_couplers: + ncouplers = args.ninst + ninst = 1 + else: + ncouplers = 1 + ninst = args.ninst + + return args.case, args.compset, args.res, args.machine, args.compiler,\ args.mpilib, args.project, args.pecount, \ args.user_mods_dir, args.pesfile, \ - args.user_grid, args.gridfile, args.srcroot, args.test, args.ncouplers, \ - args.ninst, \ - args.walltime, args.queue, args.output_root, args.script_root, \ + args.user_grid, args.gridfile, args.srcroot, args.test, ncouplers, \ + ninst, args.walltime, args.queue, args.output_root, args.script_root, \ run_unsupported, args.answer, args.input_dir ############################################################################### diff --git a/scripts/lib/CIME/test_scheduler.py b/scripts/lib/CIME/test_scheduler.py index 8f8fded042e..a8679e27c33 100644 --- a/scripts/lib/CIME/test_scheduler.py +++ b/scripts/lib/CIME/test_scheduler.py @@ -407,7 +407,7 @@ def _create_newcase_phase(self, test): logger.debug (" NINST set to {}".format(ninst)) if case_opt.startswith('C'): ncpl = case_opt[1:] - create_newcase_cmd += " --ncouplers {}" .format(ncpl) + create_newcase_cmd += " --ninst {} --ninst-coupler" .format(ncpl) logger.debug (" NCPL set to {}" .format(ncpl)) if case_opt.startswith('P'): pesize = case_opt[1:] From a30d850af9d7c4641c693cb51bbd90151f1de685 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 21 Jun 2017 10:48:22 -0600 Subject: [PATCH 15/51] fix typo --- scripts/lib/CIME/test_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/CIME/test_scheduler.py b/scripts/lib/CIME/test_scheduler.py index a8679e27c33..d839abf5a1d 100644 --- a/scripts/lib/CIME/test_scheduler.py +++ b/scripts/lib/CIME/test_scheduler.py @@ -407,7 +407,7 @@ def _create_newcase_phase(self, test): logger.debug (" NINST set to {}".format(ninst)) if case_opt.startswith('C'): ncpl = case_opt[1:] - create_newcase_cmd += " --ninst {} --ninst-coupler" .format(ncpl) + create_newcase_cmd += " --ninst {} --ninst-couplers" .format(ncpl) logger.debug (" NCPL set to {}" .format(ncpl)) if case_opt.startswith('P'): pesize = case_opt[1:] From 3b2f63d9e728aa556f7a9f5d7c13861092a7e8f6 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Thu, 22 Jun 2017 08:10:30 -0600 Subject: [PATCH 16/51] update doc --- doc/source/users_guide/multi-instance.rst | 70 ++++++++++++----------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/doc/source/users_guide/multi-instance.rst b/doc/source/users_guide/multi-instance.rst index a2caca38e05..48440429ad7 100644 --- a/doc/source/users_guide/multi-instance.rst +++ b/doc/source/users_guide/multi-instance.rst @@ -1,44 +1,42 @@ .. _multi-instance: -**TODO: Need to update PE elements and explain + and - values** - - Multi-instance component functionality ====================================== -The CIME coupling infrastructure is capable of running multiple component instances under one model executable. -One caveat: If N multiple instances of any one active component are used, the same number of multiple instances of ALL active components are required. -More details are discussed below. +The CIME coupling infrastructure is capable of running multiple component instances (ensembles) under one model executable. There are two modes of ensemble capability, single coupler in which all component instances are handled by a single coupler component or multi-coupler in which each instance includes a separate coupler component. In the multi-coupler mode the entire model is duplicated for each instance while in the single coupler mode only active components need be duplicated. In most cases the multi-coupler mode will give better performance and should be used. -The primary motivation for this development was to be able to run an ensemble Kalman-Filter for data assimilation and parameter estimation (UQ, for example). -However, it also provides the ability to run a set of experiments within a single model executable where each instance can have a different namelist, and to have all the output go to one directory. +The primary motivation for this development was to be able to run an ensemble Kalman-Filter for data assimilation and parameter estimation (UQ, for example). +However, it also provides the ability to run a set of experiments within a single model executable where each instance can have a different namelist, and to have all the output go to one directory. An F compset is used in the following example. Using the multiple-instance code involves the following steps: 1. Create the case. :: - > create_newcase --case Fmulti --compset F --res ne30_g16 + > create_newcase --case Fmulti --compset F --res ne30_ne30_mg17 > cd Fmulti -2. Assume this is the out-of-the-box pe-layout: +2. Assume this is the out-of-the-box pe-layout: :: - NTASKS(ATM)=128, NTHRDS(ATM)=1, ROOTPE(ATM)=0, NINST(ATM)=1 - NTASKS(LND)=128, NTHRDS(LND)=1, ROOTPE(LND)=0, NINST(LND)=1 - NTASKS(ICE)=128, NTHRDS(ICE)=1, ROOTPE(ICE)=0, NINST(ICE)=1 - NTASKS(OCN)=128, NTHRDS(OCN)=1, ROOTPE(OCN)=0, NINST(OCN)=1 - NTASKS(GLC)=128, NTHRDS(GLC)=1, ROOTPE(GLC)=0, NINST(GLC)=1 - NTASKS(WAV)=128, NTHRDS(WAV)=1, ROOTPE(WAV)=0, NINST(WAV)=1 - NTASKS(CPL)=128, NTHRDS(CPL)=1, ROOTPE(CPL)=0 - -The atm, lnd and rof are active components in this compset. The ocn is a prescribed data component, cice is a mixed prescribed/active component (ice-coverage is prescribed), and glc and wav are stub components. - -Let's say we want to run two instances of CAM in this experiment. -We will also have to run two instances of CLM, CICE and RTM. -However, we can run either one or two instances of DOCN, and we can ignore glc and wav since they do not do anything in this compset as stub components. - -To run two instances of CAM, CLM, CICE, RTM and DOCN, invoke the following commands in your **$CASEROOT** directory: + Comp NTASKS NTHRDS ROOTPE + CPL : 144/ 1; 0 + ATM : 144/ 1; 0 + LND : 144/ 1; 0 + ICE : 144/ 1; 0 + OCN : 144/ 1; 0 + ROF : 144/ 1; 0 + GLC : 144/ 1; 0 + WAV : 144/ 1; 0 + ESP : 1/ 1; 0 + +The atm, lnd and rof are active components in this compset. The ocn is a prescribed data component, cice is a mixed prescribed/active component (ice-coverage is prescribed), and glc, wav and esp are stub components. + +Let's say we want to run two instances of CAM in this experiment. +We will also have to run two instances of CLM, CICE and RTM. +However, we can run either one or two instances of DOCN, and we can ignore the stub components since they do not do anything in this compset. + +To run two instances of CAM, CLM, CICE, RTM and DOCN, invoke the following :ref: `xmlchange` commands in your **$CASEROOT** directory: :: > ./xmlchange NINST_ATM=2 @@ -47,16 +45,22 @@ To run two instances of CAM, CLM, CICE, RTM and DOCN, invoke the following comma > ./xmlchange NINST_ROF=2 > ./xmlchange NINST_OCN=2 -As a result, you will have two instances of CAM, CLM and CICE (prescribed), RTM, and DOCN, each running concurrently on 64 MPI tasks. +As a result, you will have two instances of CAM, CLM and CICE (prescribed), RTM, and DOCN, each running concurrently on 72 MPI tasks and all using the same coupler component. In this single coupler mode the number of tasks for each component instance is NTASKS_COMPONENT/NINST_COMPONENT and the total number of tasks is the same as for the single instance case. + +Now consider the multi coupler model. +To use this mode change the NINST values for the individual components back to 1 and the NINST_CPL to 2. +:: + > ./xmlchange NINST=1 + > ./xmlchange NINST_CPL=2 -**TODO: put in reference to xmlchange".** +This configuration will run each component instance on the original 144 tasks but will generate two copies of the model (in the same executable) for a total of 288 tasks. 3. Set up the case :: > ./case.setup -A new **user_nl_xxx_NNNN** file (where NNNN is the number of the component instances) is generated when **case.setup** is called. +A new **user_nl_xxx_NNNN** file (where NNNN is the number of the component instances) is generated when **case.setup** is called. When calling **case.setup** with the **env_mach_pes.xml** file specifically, these files are created in **$CASEROOT**: :: @@ -79,17 +83,15 @@ Also, **case.setup** creates the following ``*_in_*`` files and ``*txt*`` files lnd_in_0001, lnd_in_0002 rof_in_0001, rof_in_0002 -The namelist for each component instance can be modified by changing the corresponding **user_nl_xxx_NNNN** file. -Modifying **user_nl_cam_0002** will result in your namelist changes being active ONLY for the second instance of CAM. +The namelist for each component instance can be modified by changing the corresponding **user_nl_xxx_NNNN** file. +Modifying **user_nl_cam_0002** will result in your namelist changes being active ONLY for the second instance of CAM. To change the DOCN stream txt file instance 0002, copy **docn.streams.txt.prescribed_0002** to your **$CASEROOT** directory with the name **user_docn.streams.txt.prescribed_0002** and modify it accordlingly. Also keep these important points in mind: #. **Multiple component instances can differ ONLY in namelist settings; they ALL use the same model executable.** -#. Multiple-instance implementation supports only one coupler component. - #. Calling **case.setup** with ``--clean`` *DOES NOT* remove the **user_nl_xxx_NN** files created by **case.setup**. -#. Multiple instances generally should un concurrently, which is the default setting in **env_mach_pes.xml**. - The serial setting is only for EXPERT USERS in upcoming development code implementations. +#. A special variable NINST_LAYOUT is provided for some experimental compsets, its value should be + 'concurrent' for all but a few special cases. From 8657b2ffe8de79a8da66093d89718b3915b5603e Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Thu, 22 Jun 2017 08:15:59 -0600 Subject: [PATCH 17/51] update doc --- doc/source/users_guide/multi-instance.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/users_guide/multi-instance.rst b/doc/source/users_guide/multi-instance.rst index 48440429ad7..0c088f2ba04 100644 --- a/doc/source/users_guide/multi-instance.rst +++ b/doc/source/users_guide/multi-instance.rst @@ -89,9 +89,14 @@ To change the DOCN stream txt file instance 0002, copy **docn.streams.txt.prescr Also keep these important points in mind: +#. Note that these changes can be made at create_newcase time with option --ninst #, use the additional option --ninst-couplers to invoke the multi-coupler mode. + #. **Multiple component instances can differ ONLY in namelist settings; they ALL use the same model executable.** #. Calling **case.setup** with ``--clean`` *DOES NOT* remove the **user_nl_xxx_NN** files created by **case.setup**. #. A special variable NINST_LAYOUT is provided for some experimental compsets, its value should be 'concurrent' for all but a few special cases. + +#. In create test these options can be invoked with testname modifiers _N# for the single coupler mode and + _C# for the multi-coupler mode. From aeba6546a96541cd2bb5acab8421f2da45e2c2d2 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Thu, 22 Jun 2017 09:58:24 -0600 Subject: [PATCH 18/51] clarify use of _N and _C options to create_test and enforce --- doc/source/users_guide/multi-instance.rst | 7 +++---- scripts/lib/CIME/test_scheduler.py | 4 ++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/source/users_guide/multi-instance.rst b/doc/source/users_guide/multi-instance.rst index 0c088f2ba04..ce038897805 100644 --- a/doc/source/users_guide/multi-instance.rst +++ b/doc/source/users_guide/multi-instance.rst @@ -89,14 +89,13 @@ To change the DOCN stream txt file instance 0002, copy **docn.streams.txt.prescr Also keep these important points in mind: -#. Note that these changes can be made at create_newcase time with option --ninst #, use the additional option --ninst-couplers to invoke the multi-coupler mode. +#. Note that these changes can be made at create_newcase time with option --ninst # where # is a positive integer, use the additional logical option --ninst-couplers to invoke the multi-coupler mode. #. **Multiple component instances can differ ONLY in namelist settings; they ALL use the same model executable.** -#. Calling **case.setup** with ``--clean`` *DOES NOT* remove the **user_nl_xxx_NN** files created by **case.setup**. +#. Calling **case.setup** with ``--clean`` *DOES NOT* remove the **user_nl_xxx_NN** (where xxx is the component name) files created by **case.setup**. #. A special variable NINST_LAYOUT is provided for some experimental compsets, its value should be 'concurrent' for all but a few special cases. -#. In create test these options can be invoked with testname modifiers _N# for the single coupler mode and - _C# for the multi-coupler mode. +#. In **create_test** these options can be invoked with testname modifiers _N# for the single coupler mode and _C# for the multi-coupler mode. These are mutually exclusive options, they cannot be combined. diff --git a/scripts/lib/CIME/test_scheduler.py b/scripts/lib/CIME/test_scheduler.py index d839abf5a1d..adad7690899 100644 --- a/scripts/lib/CIME/test_scheduler.py +++ b/scripts/lib/CIME/test_scheduler.py @@ -395,6 +395,8 @@ def _create_newcase_phase(self, test): create_newcase_cmd += " --user-mods-dir {}".format(test_mod_file) mpilib = None + ninst = 1 + ncpl = 1 if case_opts is not None: for case_opt in case_opts: # pylint: disable=not-an-iterable if case_opt.startswith('M'): @@ -402,10 +404,12 @@ def _create_newcase_phase(self, test): create_newcase_cmd += " --mpilib {}".format(mpilib) logger.debug (" MPILIB set to {}".format(mpilib)) if case_opt.startswith('N'): + expect(ncpl == 1,"Cannot combine _C and _N options") ninst = case_opt[1:] create_newcase_cmd += " --ninst {}".format(ninst) logger.debug (" NINST set to {}".format(ninst)) if case_opt.startswith('C'): + expect(ninst == 1,"Cannot combine _C and _N options") ncpl = case_opt[1:] create_newcase_cmd += " --ninst {} --ninst-couplers" .format(ncpl) logger.debug (" NCPL set to {}" .format(ncpl)) From cc35348ef0ef974bc38adc5b5dac69203e4bbc09 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Thu, 22 Jun 2017 19:15:45 -0600 Subject: [PATCH 19/51] refactor based on suggestions --- config/config_tests.xml | 2 +- scripts/lib/CIME/SystemTests/mcc.py | 4 +-- scripts/lib/CIME/XML/env_mach_pes.py | 28 ++++++++++++++++++- scripts/lib/CIME/build.py | 6 ++-- scripts/lib/CIME/case.py | 4 +-- scripts/lib/CIME/case_run.py | 8 +++--- scripts/lib/CIME/case_setup.py | 9 +++--- scripts/lib/CIME/case_st_archive.py | 7 +++-- scripts/lib/CIME/get_timing.py | 2 +- scripts/lib/CIME/preview_namelists.py | 10 +++---- src/drivers/mct/cime_config/buildnml | 6 ++-- .../mct/cime_config/config_component.xml | 11 ++++++-- .../cime_config/namelist_definition_drv.xml | 6 ++-- 13 files changed, 72 insertions(+), 31 deletions(-) diff --git a/config/config_tests.xml b/config/config_tests.xml index bcd33a90285..9fa7ca1fa5e 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -525,7 +525,7 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu none $STOP_OPTION $STOP_N - 1 + 1 1 1 1 diff --git a/scripts/lib/CIME/SystemTests/mcc.py b/scripts/lib/CIME/SystemTests/mcc.py index 872fb1eb09c..2db5706de5b 100644 --- a/scripts/lib/CIME/SystemTests/mcc.py +++ b/scripts/lib/CIME/SystemTests/mcc.py @@ -26,9 +26,9 @@ def __init__(self, case): def _case_one_setup(self): # The multicoupler case will increase the number of tasks by the # number of requested couplers. - self._case.set_value("NINST_CPL", self._test_instances) + self._case.set_value("COUPLER_COUNT", self._test_instances) case_setup(self._case, test_mode=False, reset=True) def _case_two_setup(self): - self._case.set_value("NINST_CPL", 1) + self._case.set_value("COUPLER_COUNT", 1) case_setup(self._case, test_mode=True, reset=True) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index f69d73a76f8..213f304cbf9 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -19,6 +19,32 @@ def __init__(self, case_root=None, infile="env_mach_pes.xml", components=None): schema = os.path.join(get_cime_root(), "config", "xml_schemas", "env_mach_pes.xsd") EnvBase.__init__(self, case_root, infile, schema=schema) + def set_value(self, vid, value, subgroup=None, ignore_type=False): + """ + Set the value of an entry-id field to value + Returns the value or None if not found + subgroup is ignored in the general routine and applied in specific methods + """ + vid, comp, iscompvar = self.check_if_comp_var(vid, None) + if vid == "COUPLER_COUNT": + if value > 1: + for othercomp in self._components: + if othercomp != "CPL": + ninst_string = "NINST_{}".format(othercomp) + expect(self.get_value(ninst_string)==1, + "Cannot change COUPLER_COUNT value if {} > 1".format(ninst_string)) + elif value < 0: + # negative value effectively overrides safety check + value = -value + elif vid == "NINST": + if value > 1: + coupler_count = self.get_value("COUPLER_COUNT") + expect(coupler_count == 1,"Cannot change NINST value if COUPLER_COUNT > 1") + elif value < 0: + # negative value effectively overrides safety check + value = -value + return EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + def get_value(self, vid, attribute=None, resolved=True, subgroup=None, pes_per_node=None): # pylint: disable=arguments-differ value = EnvBase.get_value(self, vid, attribute, resolved, subgroup) @@ -63,7 +89,7 @@ def get_total_tasks(self, comp_classes): pstrid = self.get_value("PSTRID", attribute={"component":comp}) tt = rootpe + (ntasks - 1) * pstrid + 1 total_tasks = max(tt, total_tasks) - total_tasks *= self.get_value("NINST_CPL") + total_tasks *= self.get_value("COUPLER_COUNT") return total_tasks def get_tasks_per_node(self, total_tasks, max_thread_count): diff --git a/scripts/lib/CIME/build.py b/scripts/lib/CIME/build.py index d472d37326c..01eb184f8de 100644 --- a/scripts/lib/CIME/build.py +++ b/scripts/lib/CIME/build.py @@ -398,14 +398,16 @@ def _case_build_impl(caseroot, case, sharedlib_only, model_only, buildlist): complist = [] for comp_class in comp_classes: - ninst = case.get_value("NINST_{}".format(comp_class)) if comp_class == "CPL": + ninst = case.get_value("COUPLER_COUNT") config_dir = None + expect(ninst is not None,"Failed to get COUPLER_COUNT value") else: + ninst = case.get_value("NINST_{}".format(comp_class)) config_dir = os.path.dirname(case.get_value("CONFIG_{}_FILE".format(comp_class))) + expect(ninst is not None,"Failed to get ninst for comp_class {}".format(comp_class)) comp = case.get_value("COMP_{}".format(comp_class)) thrds = case.get_value("NTHRDS_{}".format(comp_class)) - expect(ninst is not None,"Failed to get ninst for comp_class {}".format(comp_class)) complist.append((comp_class.lower(), comp, thrds, ninst, config_dir )) os.environ["COMP_{}".format(comp_class)] = comp diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index 7fdb926b9e3..742b03c23cc 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -727,7 +727,7 @@ def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): for compclass in self._component_classes: key = "NINST_{}".format(compclass) if compclass == "CPL": - mach_pes_obj.set_value(key, ncouplers) + mach_pes_obj.set_value("COUPLER_COUNT", ncouplers) continue # ESP models are currently limited to 1 instance if compclass == "ESP": @@ -1086,7 +1086,7 @@ def create_caseroot(self, clone=False): append_status("Component {} is {}".format(component_class, self._component_description[component_class]),"README.case", caseroot=self._caseroot) if component_class == "CPL": append_status("Using %s coupler instances" % - (self.get_value("NINST_CPL")), + (self.get_value("COUPLER_COUNT")), "README.case", caseroot=self._caseroot) continue comp_grid = "{}_GRID".format(component_class) diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 95e8eb294d0..d929554b6a8 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -152,10 +152,10 @@ def post_run_check(case, lid): rundir = case.get_value("RUNDIR") model = case.get_value("MODEL") - cpl_ninst = case.get_value("NINST_CPL") + coupler_count = case.get_value("COUPLER_COUNT") cpl_logs = [] - if cpl_ninst > 1: - for inst in range(cpl_ninst): + if coupler_count > 1: + for inst in range(coupler_count): cpl_logs.append(os.path.join(rundir, "cpl_%04d.log." % (inst+1) + lid)) else: cpl_logs = [os.path.join(rundir, "cpl" + ".log." + lid)] @@ -176,7 +176,7 @@ def post_run_check(case, lid): with open(cpl_logfile, 'r') as fd: if 'SUCCESSFUL TERMINATION' in fd.read(): count_ok += 1 - if count_ok != cpl_ninst: + if count_ok != coupler_count: expect(False, "Model did not complete - see {} \n " .format(cpl_logfile)) ############################################################################### diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index a31db04dae5..2db74f0ebf6 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -34,7 +34,7 @@ def _build_usernl_files(case, model, comp): expect(os.path.isdir(model_dir), "cannot find cime_config directory {} for component {}".format(model_dir, comp)) - ninst = case.get_value("NINST_CPL") + ninst = case.get_value("COUPLER_COUNT") if comp == "cpl": if not os.path.exists("user_nl_cpl"): shutil.copy(os.path.join(model_dir, "user_nl_cpl"), ".") @@ -124,13 +124,14 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component. for comp in models: if comp == "CPL": - continue - ninst = case.get_value("NINST_{}".format(comp)) + ninst = case.get_value("COUPLER_COUNT") + else: + ninst = case.get_value("NINST_{}".format(comp)) ntasks = case.get_value("NTASKS_{}".format(comp)) # ESP models are currently limited to 1 instance expect((comp != "ESP") or (ninst == 1), "ESP components may only have one instance") - if ninst > ntasks: + if ninst > ntasks and comp != "CPL": if ntasks == 1: case.set_value("NTASKS_{}".format(comp), ninst) else: diff --git a/scripts/lib/CIME/case_st_archive.py b/scripts/lib/CIME/case_st_archive.py index c997cbeb107..0e9c0b7b197 100644 --- a/scripts/lib/CIME/case_st_archive.py +++ b/scripts/lib/CIME/case_st_archive.py @@ -59,8 +59,11 @@ def _get_datenames(case, last_date=None): ############################################################################### def _get_ninst_info(case, compclass): ############################################################################### - - ninst = case.get_value('NINST_' + compclass.upper()) + comp = compclass.upper() + if comp == "CPL": + ninst = case.get_value("COUPLER_COUNT") + else: + ninst = case.get_value('NINST_' + compclass.upper()) ninst_strings = [] if ninst is None: ninst = 1 diff --git a/scripts/lib/CIME/get_timing.py b/scripts/lib/CIME/get_timing.py index a00cd26705f..156526ef148 100644 --- a/scripts/lib/CIME/get_timing.py +++ b/scripts/lib/CIME/get_timing.py @@ -93,7 +93,7 @@ def gettime(self, heading_padded): return (0, 0, False) def getTiming(self): - ninst = self.case.get_value("NINST_CPL") + ninst = self.case.get_value("COUPLER_COUNT") if ninst > 1: for inst in range(ninst): self._getTiming(inst+1) diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 8e6763abe89..5f5e82d62b0 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -67,7 +67,7 @@ def create_namelists(case, component=None): # Note - cpl must be last in the loop below so that in generating its namelist, # it can use xml vars potentially set by other component's buildnml scripts xmlfac = {} - cpl_ninst = case.get_value("NINST_CPL") + coupler_count = case.get_value("COUPLER_COUNT") models = case.get_values("COMP_CLASSES") models += [models.pop(0)] for model in models: @@ -77,13 +77,13 @@ def create_namelists(case, component=None): if model_str == "cpl": compname = "drv" complist = [m for m in models if m.upper() != "CPL"] - if cpl_ninst > 1: - xmlfac = {"NINST" : cpl_ninst, "NTASKS" : 1} + if coupler_count > 1: + xmlfac = {"NINST" : -(coupler_count), "NTASKS" : 1} else: compname = case.get_value("COMP_{}".format(model_str.upper())) complist = [model_str.upper()] - if cpl_ninst > 1: - xmlfac = {"NINST" : cpl_ninst, "NTASKS" : cpl_ninst} + if coupler_count > 1: + xmlfac = {"NINST" : -(coupler_count), "NTASKS" : coupler_count} xmlsave = {} for k in xmlfac.keys(): diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index fc06d7f33ed..954507d0268 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -254,8 +254,10 @@ def _create_component_modelio_namelists(case, files): config = {} config['component'] = model entries = nmlgen.init_defaults(infiles, config, skip_entry_loop=True) - - inst_count = case.get_value("NINST_" + model.upper()) + if model == "cpl": + inst_count = case.get_value("COUPLER_COUNT") + else: + inst_count = case.get_value("NINST_" + model.upper()) inst_string = "" inst_index = 1 diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index 7bb13ca4118..55454a6e359 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1949,10 +1949,17 @@ ROOTPE (mpi task in MPI_COMM_WORLD) for each component + + integer + 1 + mach_pes + env_mach_pes.xml + Number of couplers in ensemble: if COUPLER_COUNT > 1 then NINST must be 1 for all components. + + integer - 1 1 1 1 @@ -1964,7 +1971,7 @@ mach_pes env_mach_pes.xml - Number of instances for each component + Number of instances for each component in single coupler mode diff --git a/src/drivers/mct/cime_config/namelist_definition_drv.xml b/src/drivers/mct/cime_config/namelist_definition_drv.xml index 78237b864cf..cae879d267e 100644 --- a/src/drivers/mct/cime_config/namelist_definition_drv.xml +++ b/src/drivers/mct/cime_config/namelist_definition_drv.xml @@ -44,15 +44,15 @@ - + integer cime_cpl_inst cime_cpl_inst - Number of CESM coupler instances. + Number of CESM coupler instances. If > 1 then all component instances must equal 1. - $NINST_CPL + $COUPLER_COUNT From 1b543d9255dbacaaba11c3be0117c5f901073e4f Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 23 Jun 2017 07:03:38 -0600 Subject: [PATCH 20/51] fix parsing of log files --- .../CIME/SystemTests/system_tests_common.py | 5 ++- scripts/lib/CIME/preview_namelists.py | 32 ++++++++++++------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/system_tests_common.py b/scripts/lib/CIME/SystemTests/system_tests_common.py index 5af8ec7df39..3ccc31772bc 100644 --- a/scripts/lib/CIME/SystemTests/system_tests_common.py +++ b/scripts/lib/CIME/SystemTests/system_tests_common.py @@ -368,7 +368,6 @@ def _get_latest_cpl_logs(self): continue if log.endswith(suffix): lastcpllogs.append(log) - return lastcpllogs def _compare_baseline(self): @@ -389,7 +388,7 @@ def _compare_baseline(self): newestcpllogfiles = self._get_latest_cpl_logs() memlist = self._get_mem_usage(newestcpllogfiles[0]) for cpllog in newestcpllogfiles: - m = re.search(r"(cpl.*.log).*.gz",cpllog) + m = re.search(r"/(cpl.*.log).*.gz",cpllog) if m is not None: baselog = os.path.join(basecmp_dir, m.group(1))+".gz" if baselog is None or not os.path.isfile(baselog): @@ -434,7 +433,7 @@ def _generate_baseline(self): # drop the date so that the name is generic newestcpllogfiles = self._get_latest_cpl_logs() for cpllog in newestcpllogfiles: - m = re.search(r"(cpl.*.log).*.gz",cpllog) + m = re.search(r"/(cpl.*.log).*.gz",cpllog) if m is not None: baselog = os.path.join(basegen_dir, m.group(1))+".gz" shutil.copyfile(cpllog, diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 5f5e82d62b0..841875f0bae 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -74,22 +74,30 @@ def create_namelists(case, component=None): model_str = model.lower() config_file = case.get_value("CONFIG_{}_FILE".format(model_str.upper())) config_dir = os.path.dirname(config_file) + # Multicoupler mode (coupler_count > 1) must temporarily change + # NINST and NTASKS settings so that the component buildnml scripts + # will work correctly. After the call to buildnml the original values + # are restored. if model_str == "cpl": compname = "drv" - complist = [m for m in models if m.upper() != "CPL"] - if coupler_count > 1: - xmlfac = {"NINST" : -(coupler_count), "NTASKS" : 1} else: compname = case.get_value("COMP_{}".format(model_str.upper())) - complist = [model_str.upper()] - if coupler_count > 1: - xmlfac = {"NINST" : -(coupler_count), "NTASKS" : coupler_count} - - xmlsave = {} - for k in xmlfac.keys(): - for m in complist: - key = "{}_{}" .format(k, m.upper()) - xmlsave[key] = case.get_value(key) + + if coupler_count > 1: + if model_str == "cpl": + complist = [m for m in models if m.upper() != "CPL"] + if coupler_count > 1: + xmlfac = {"NINST" : -(coupler_count), "NTASKS" : 1} + else: + complist = [model_str.upper()] + if coupler_count > 1: + xmlfac = {"NINST" : -(coupler_count), "NTASKS" : coupler_count} + + xmlsave = {} + for k in xmlfac.keys(): + for m in complist: + key = "{}_{}" .format(k, m.upper()) + xmlsave[key] = case.get_value(key) if component is None or component == model_str: # first look in the case SourceMods directory From 198ee768e7b9cf2c35ccf930538eb9dbc5a43af0 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 23 Jun 2017 07:24:24 -0600 Subject: [PATCH 21/51] update documentation --- src/drivers/mct/cime_config/config_component.xml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index 55454a6e359..cd29abc59fb 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1954,7 +1954,7 @@ 1 mach_pes env_mach_pes.xml - Number of couplers in ensemble: if COUPLER_COUNT > 1 then NINST must be 1 for all components. + Number of couplers in ensemble: if COUPLER_COUNT > 1 then NINST must be 1 for all components. Each coupler instance is an instance of the entier model. @@ -1971,7 +1971,9 @@ mach_pes env_mach_pes.xml - Number of instances for each component in single coupler mode + Number of instances for each component in single coupler mode. + In multiple coupler mode (COUPLER_COUNT > 1) this must be 1 and + there are COUPLER_COUNT instances of the entire model. From 47a19c7779eeb32aedb0c69ae942b4b04696ecbd Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 23 Jun 2017 08:08:39 -0600 Subject: [PATCH 22/51] fix typo --- src/drivers/mct/cime_config/config_component.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index cd29abc59fb..6f56d0b1adc 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1954,7 +1954,7 @@ 1 mach_pes env_mach_pes.xml - Number of couplers in ensemble: if COUPLER_COUNT > 1 then NINST must be 1 for all components. Each coupler instance is an instance of the entier model. + Number of couplers in ensemble: if COUPLER_COUNT > 1 then NINST must be 1 for all components. Each coupler instance is an instance of the entire model. From 2aa42c042802089045af73cd5c7ce9a2d42661b2 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 23 Jun 2017 11:55:18 -0600 Subject: [PATCH 23/51] fix issue with assigning ninst --- scripts/lib/CIME/XML/env_mach_pes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index 213f304cbf9..35910052629 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -25,7 +25,7 @@ def set_value(self, vid, value, subgroup=None, ignore_type=False): Returns the value or None if not found subgroup is ignored in the general routine and applied in specific methods """ - vid, comp, iscompvar = self.check_if_comp_var(vid, None) + nvid, comp, iscompvar = self.check_if_comp_var(vid, None) if vid == "COUPLER_COUNT": if value > 1: for othercomp in self._components: @@ -36,7 +36,7 @@ def set_value(self, vid, value, subgroup=None, ignore_type=False): elif value < 0: # negative value effectively overrides safety check value = -value - elif vid == "NINST": + elif nvid == "NINST": if value > 1: coupler_count = self.get_value("COUPLER_COUNT") expect(coupler_count == 1,"Cannot change NINST value if COUPLER_COUNT > 1") From f6bd631a8927b06a24e61133f11ab64e03fb0b1f Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 26 Jun 2017 07:47:35 -0600 Subject: [PATCH 24/51] Revert "refactor based on suggestions" This reverts commit fbc5bbaf980c4bd3ccccba9937936c3f5e9b1d82. Conflicts: scripts/lib/CIME/XML/env_mach_pes.py scripts/lib/CIME/preview_namelists.py src/drivers/mct/cime_config/config_component.xml --- config/config_tests.xml | 2 +- scripts/lib/CIME/SystemTests/mcc.py | 4 +- scripts/lib/CIME/XML/env_mach_pes.py | 48 +++++++++++-------- scripts/lib/CIME/build.py | 6 +-- scripts/lib/CIME/case.py | 4 +- scripts/lib/CIME/case_run.py | 8 ++-- scripts/lib/CIME/case_setup.py | 9 ++-- scripts/lib/CIME/case_st_archive.py | 7 +-- scripts/lib/CIME/get_timing.py | 2 +- scripts/lib/CIME/preview_namelists.py | 31 ++++++------ src/drivers/mct/cime_config/buildnml | 6 +-- .../mct/cime_config/config_component.xml | 17 ++++--- .../cime_config/namelist_definition_drv.xml | 6 +-- 13 files changed, 77 insertions(+), 73 deletions(-) diff --git a/config/config_tests.xml b/config/config_tests.xml index 9fa7ca1fa5e..bcd33a90285 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -525,7 +525,7 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu none $STOP_OPTION $STOP_N - 1 + 1 1 1 1 diff --git a/scripts/lib/CIME/SystemTests/mcc.py b/scripts/lib/CIME/SystemTests/mcc.py index 2db5706de5b..872fb1eb09c 100644 --- a/scripts/lib/CIME/SystemTests/mcc.py +++ b/scripts/lib/CIME/SystemTests/mcc.py @@ -26,9 +26,9 @@ def __init__(self, case): def _case_one_setup(self): # The multicoupler case will increase the number of tasks by the # number of requested couplers. - self._case.set_value("COUPLER_COUNT", self._test_instances) + self._case.set_value("NINST_CPL", self._test_instances) case_setup(self._case, test_mode=False, reset=True) def _case_two_setup(self): - self._case.set_value("COUPLER_COUNT", 1) + self._case.set_value("NINST_CPL", 1) case_setup(self._case, test_mode=True, reset=True) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index 35910052629..d7914b12058 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -25,25 +25,35 @@ def set_value(self, vid, value, subgroup=None, ignore_type=False): Returns the value or None if not found subgroup is ignored in the general routine and applied in specific methods """ + oldcomps = self._components nvid, comp, iscompvar = self.check_if_comp_var(vid, None) - if vid == "COUPLER_COUNT": - if value > 1: - for othercomp in self._components: - if othercomp != "CPL": - ninst_string = "NINST_{}".format(othercomp) - expect(self.get_value(ninst_string)==1, - "Cannot change COUPLER_COUNT value if {} > 1".format(ninst_string)) - elif value < 0: - # negative value effectively overrides safety check - value = -value - elif nvid == "NINST": - if value > 1: - coupler_count = self.get_value("COUPLER_COUNT") - expect(coupler_count == 1,"Cannot change NINST value if COUPLER_COUNT > 1") - elif value < 0: - # negative value effectively overrides safety check - value = -value - return EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + if nvid == "NINST": + if self.get_value("MULTI_COUPLER"): + self._components = "CPL" + if comp is None: + comp = "CPL" + expect(comp == "CPL","Cannot change {} when MULTI_COUPLER flag is True".format(vid)) + else: + expect(comp is None or comp != "CPL","Cannot change NINST_CPL if MULTI_COUPLER flag is False") + if vid == "MULTI_COUPLER": + oldval = self.get_value("MULTI_COUPLER") + if oldval != value: + newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + if value: + maxinst = 1 + for tcomp in self._components: + tcomp_inst = "NINST_{}".format(tcomp) + maxinst = max(1, self.get_value(tcomp)) + EnvBase.set_value(self,tcomp_inst, 1) + self.set_value("NINST_CPL", maxinst) + else: + ninst = self.get_value("NINST_CPL") + newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + EnvBase.set_value(self, "NINST", ninst) + else: + newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + self._components = oldcomps + return newval def get_value(self, vid, attribute=None, resolved=True, subgroup=None, pes_per_node=None): # pylint: disable=arguments-differ value = EnvBase.get_value(self, vid, attribute, resolved, subgroup) @@ -89,7 +99,7 @@ def get_total_tasks(self, comp_classes): pstrid = self.get_value("PSTRID", attribute={"component":comp}) tt = rootpe + (ntasks - 1) * pstrid + 1 total_tasks = max(tt, total_tasks) - total_tasks *= self.get_value("COUPLER_COUNT") + total_tasks *= self.get_value("NINST_CPL") return total_tasks def get_tasks_per_node(self, total_tasks, max_thread_count): diff --git a/scripts/lib/CIME/build.py b/scripts/lib/CIME/build.py index 01eb184f8de..d472d37326c 100644 --- a/scripts/lib/CIME/build.py +++ b/scripts/lib/CIME/build.py @@ -398,16 +398,14 @@ def _case_build_impl(caseroot, case, sharedlib_only, model_only, buildlist): complist = [] for comp_class in comp_classes: + ninst = case.get_value("NINST_{}".format(comp_class)) if comp_class == "CPL": - ninst = case.get_value("COUPLER_COUNT") config_dir = None - expect(ninst is not None,"Failed to get COUPLER_COUNT value") else: - ninst = case.get_value("NINST_{}".format(comp_class)) config_dir = os.path.dirname(case.get_value("CONFIG_{}_FILE".format(comp_class))) - expect(ninst is not None,"Failed to get ninst for comp_class {}".format(comp_class)) comp = case.get_value("COMP_{}".format(comp_class)) thrds = case.get_value("NTHRDS_{}".format(comp_class)) + expect(ninst is not None,"Failed to get ninst for comp_class {}".format(comp_class)) complist.append((comp_class.lower(), comp, thrds, ninst, config_dir )) os.environ["COMP_{}".format(comp_class)] = comp diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index 742b03c23cc..7fdb926b9e3 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -727,7 +727,7 @@ def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): for compclass in self._component_classes: key = "NINST_{}".format(compclass) if compclass == "CPL": - mach_pes_obj.set_value("COUPLER_COUNT", ncouplers) + mach_pes_obj.set_value(key, ncouplers) continue # ESP models are currently limited to 1 instance if compclass == "ESP": @@ -1086,7 +1086,7 @@ def create_caseroot(self, clone=False): append_status("Component {} is {}".format(component_class, self._component_description[component_class]),"README.case", caseroot=self._caseroot) if component_class == "CPL": append_status("Using %s coupler instances" % - (self.get_value("COUPLER_COUNT")), + (self.get_value("NINST_CPL")), "README.case", caseroot=self._caseroot) continue comp_grid = "{}_GRID".format(component_class) diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index d929554b6a8..95e8eb294d0 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -152,10 +152,10 @@ def post_run_check(case, lid): rundir = case.get_value("RUNDIR") model = case.get_value("MODEL") - coupler_count = case.get_value("COUPLER_COUNT") + cpl_ninst = case.get_value("NINST_CPL") cpl_logs = [] - if coupler_count > 1: - for inst in range(coupler_count): + if cpl_ninst > 1: + for inst in range(cpl_ninst): cpl_logs.append(os.path.join(rundir, "cpl_%04d.log." % (inst+1) + lid)) else: cpl_logs = [os.path.join(rundir, "cpl" + ".log." + lid)] @@ -176,7 +176,7 @@ def post_run_check(case, lid): with open(cpl_logfile, 'r') as fd: if 'SUCCESSFUL TERMINATION' in fd.read(): count_ok += 1 - if count_ok != coupler_count: + if count_ok != cpl_ninst: expect(False, "Model did not complete - see {} \n " .format(cpl_logfile)) ############################################################################### diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index 2db74f0ebf6..a31db04dae5 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -34,7 +34,7 @@ def _build_usernl_files(case, model, comp): expect(os.path.isdir(model_dir), "cannot find cime_config directory {} for component {}".format(model_dir, comp)) - ninst = case.get_value("COUPLER_COUNT") + ninst = case.get_value("NINST_CPL") if comp == "cpl": if not os.path.exists("user_nl_cpl"): shutil.copy(os.path.join(model_dir, "user_nl_cpl"), ".") @@ -124,14 +124,13 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component. for comp in models: if comp == "CPL": - ninst = case.get_value("COUPLER_COUNT") - else: - ninst = case.get_value("NINST_{}".format(comp)) + continue + ninst = case.get_value("NINST_{}".format(comp)) ntasks = case.get_value("NTASKS_{}".format(comp)) # ESP models are currently limited to 1 instance expect((comp != "ESP") or (ninst == 1), "ESP components may only have one instance") - if ninst > ntasks and comp != "CPL": + if ninst > ntasks: if ntasks == 1: case.set_value("NTASKS_{}".format(comp), ninst) else: diff --git a/scripts/lib/CIME/case_st_archive.py b/scripts/lib/CIME/case_st_archive.py index 0e9c0b7b197..c997cbeb107 100644 --- a/scripts/lib/CIME/case_st_archive.py +++ b/scripts/lib/CIME/case_st_archive.py @@ -59,11 +59,8 @@ def _get_datenames(case, last_date=None): ############################################################################### def _get_ninst_info(case, compclass): ############################################################################### - comp = compclass.upper() - if comp == "CPL": - ninst = case.get_value("COUPLER_COUNT") - else: - ninst = case.get_value('NINST_' + compclass.upper()) + + ninst = case.get_value('NINST_' + compclass.upper()) ninst_strings = [] if ninst is None: ninst = 1 diff --git a/scripts/lib/CIME/get_timing.py b/scripts/lib/CIME/get_timing.py index 156526ef148..a00cd26705f 100644 --- a/scripts/lib/CIME/get_timing.py +++ b/scripts/lib/CIME/get_timing.py @@ -93,7 +93,7 @@ def gettime(self, heading_padded): return (0, 0, False) def getTiming(self): - ninst = self.case.get_value("COUPLER_COUNT") + ninst = self.case.get_value("NINST_CPL") if ninst > 1: for inst in range(ninst): self._getTiming(inst+1) diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 841875f0bae..2fa5d1196bc 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -67,7 +67,7 @@ def create_namelists(case, component=None): # Note - cpl must be last in the loop below so that in generating its namelist, # it can use xml vars potentially set by other component's buildnml scripts xmlfac = {} - coupler_count = case.get_value("COUPLER_COUNT") + cpl_ninst = case.get_value("NINST_CPL") models = case.get_values("COMP_CLASSES") models += [models.pop(0)] for model in models: @@ -83,21 +83,20 @@ def create_namelists(case, component=None): else: compname = case.get_value("COMP_{}".format(model_str.upper())) - if coupler_count > 1: - if model_str == "cpl": - complist = [m for m in models if m.upper() != "CPL"] - if coupler_count > 1: - xmlfac = {"NINST" : -(coupler_count), "NTASKS" : 1} - else: - complist = [model_str.upper()] - if coupler_count > 1: - xmlfac = {"NINST" : -(coupler_count), "NTASKS" : coupler_count} - - xmlsave = {} - for k in xmlfac.keys(): - for m in complist: - key = "{}_{}" .format(k, m.upper()) - xmlsave[key] = case.get_value(key) + complist = [m for m in models if m.upper() != "CPL"] + if cpl_ninst > 1: + xmlfac = {"NINST" : cpl_ninst, "NTASKS" : 1} + else: + compname = case.get_value("COMP_{}".format(model_str.upper())) + complist = [model_str.upper()] + if cpl_ninst > 1: + xmlfac = {"NINST" : cpl_ninst, "NTASKS" : cpl_ninst} + + xmlsave = {} + for k in xmlfac.keys(): + for m in complist: + key = "{}_{}" .format(k, m.upper()) + xmlsave[key] = case.get_value(key) if component is None or component == model_str: # first look in the case SourceMods directory diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index 954507d0268..fc06d7f33ed 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -254,10 +254,8 @@ def _create_component_modelio_namelists(case, files): config = {} config['component'] = model entries = nmlgen.init_defaults(infiles, config, skip_entry_loop=True) - if model == "cpl": - inst_count = case.get_value("COUPLER_COUNT") - else: - inst_count = case.get_value("NINST_" + model.upper()) + + inst_count = case.get_value("NINST_" + model.upper()) inst_string = "" inst_index = 1 diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index 6f56d0b1adc..ab89ce2f859 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1949,17 +1949,21 @@ ROOTPE (mpi task in MPI_COMM_WORLD) for each component - - integer - 1 + + logical + False mach_pes env_mach_pes.xml - Number of couplers in ensemble: if COUPLER_COUNT > 1 then NINST must be 1 for all components. Each coupler instance is an instance of the entire model. + MULTI_COUPLER mode provides a separate coupler component for each + ensemble member all components must have an equal number of members. If + MULTI_COUPLER mode is False prognostic components must have the same number + of members but data or stub components may also have 1 member. integer + 1 1 1 1 @@ -1971,9 +1975,8 @@ mach_pes env_mach_pes.xml - Number of instances for each component in single coupler mode. - In multiple coupler mode (COUPLER_COUNT > 1) this must be 1 and - there are COUPLER_COUNT instances of the entire model. + Number of instances for each component. If MULTI_COUPLER is True + only NINST_CPL is used, if MULTI_COUPLER is False NINST_CPL is 1. diff --git a/src/drivers/mct/cime_config/namelist_definition_drv.xml b/src/drivers/mct/cime_config/namelist_definition_drv.xml index cae879d267e..78237b864cf 100644 --- a/src/drivers/mct/cime_config/namelist_definition_drv.xml +++ b/src/drivers/mct/cime_config/namelist_definition_drv.xml @@ -44,15 +44,15 @@ - + integer cime_cpl_inst cime_cpl_inst - Number of CESM coupler instances. If > 1 then all component instances must equal 1. + Number of CESM coupler instances. - $COUPLER_COUNT + $NINST_CPL From 998f2820540aaf8a903ea0e030fdbfebf6ef482b Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 26 Jun 2017 15:53:29 -0600 Subject: [PATCH 25/51] refactor based on code review --- scripts/create_newcase | 6 +- scripts/lib/CIME/SystemTests/mcc.py | 10 ++-- scripts/lib/CIME/XML/env_mach_pes.py | 57 +++++++++++++------ scripts/lib/CIME/case.py | 6 +- scripts/lib/CIME/preview_namelists.py | 29 +++------- scripts/lib/CIME/test_scheduler.py | 2 +- src/drivers/mct/cime_config/buildnml | 3 +- .../mct/cime_config/config_component.xml | 3 +- 8 files changed, 65 insertions(+), 51 deletions(-) diff --git a/scripts/create_newcase b/scripts/create_newcase index 32719fa568f..b8d421647b5 100755 --- a/scripts/create_newcase +++ b/scripts/create_newcase @@ -46,13 +46,13 @@ OR help="Specify a compiler. " "To see list of supported compilers for each machine, use the utility query_config in this directory") - parser.add_argument("--ninst-couplers",action="store_true", + parser.add_argument("--multi-coupler",action="store_true", help="Specify that ninst should modify number of coupler instances " "default is to have one coupler supporting multiple component instances.") parser.add_argument("--ninst",default=1, help="Specify number of model ensemble instances. " - "Default is multiple components and one coupler. Use --ninst-couplers to " + "Default is multiple components and one coupler. Use --multi-coupler to " "run multiple couplers in the ensemble.") parser.add_argument("--mpilib", "-mpilib", @@ -157,7 +157,7 @@ OR if args.input_dir is not None: args.input_dir = os.path.abspath(args.input_dir) - if args.ninst_couplers: + if args.multi_coupler: ncouplers = args.ninst ninst = 1 else: diff --git a/scripts/lib/CIME/SystemTests/mcc.py b/scripts/lib/CIME/SystemTests/mcc.py index 872fb1eb09c..b6220231584 100644 --- a/scripts/lib/CIME/SystemTests/mcc.py +++ b/scripts/lib/CIME/SystemTests/mcc.py @@ -2,8 +2,7 @@ Implemetation of CIME MCC test: Compares ensemble methods This does two runs: In the first we run a three member ensemble using the -original multi component single coupler method and in the second we use -the new multi coupler method. We then compare results with the expectation that they are bfb + MULTI_COUPLER capability, then we run a second single instance case and compare """ from CIME.XML.standard_module_setup import * from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo @@ -19,13 +18,14 @@ def __init__(self, case): self._test_instances = 3 SystemTestsCompareTwo.__init__(self, case, separate_builds = False, - run_two_suffix = 'multicoupler', - run_one_description = 'single instance', - run_two_description = 'multi coupler') + run_two_suffix = 'single_instance', + run_two_description = 'single instance', + run_one_description = 'multi coupler') def _case_one_setup(self): # The multicoupler case will increase the number of tasks by the # number of requested couplers. + self._case.set_value("MULTI_COUPLER",True) self._case.set_value("NINST_CPL", self._test_instances) case_setup(self._case, test_mode=False, reset=True) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index d7914b12058..9ba313821c3 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -25,31 +25,51 @@ def set_value(self, vid, value, subgroup=None, ignore_type=False): Returns the value or None if not found subgroup is ignored in the general routine and applied in specific methods """ - oldcomps = self._components + oldcomps = self._components[:] nvid, comp, iscompvar = self.check_if_comp_var(vid, None) if nvid == "NINST": if self.get_value("MULTI_COUPLER"): - self._components = "CPL" + self._components = ["CPL"] if comp is None: comp = "CPL" - expect(comp == "CPL","Cannot change {} when MULTI_COUPLER flag is True".format(vid)) - else: - expect(comp is None or comp != "CPL","Cannot change NINST_CPL if MULTI_COUPLER flag is False") + expect(comp == "CPL" or value == 1,"Cannot change {} when MULTI_COUPLER flag is TRUE".format(vid)) + elif value != 1: + if 'CPL' in self._components: + self._components.remove('CPL') + if 'ESP' in self._components and value != 1: + self._components.remove('ESP') + expect(comp is None or comp != "CPL","Cannot change NINST_CPL if MULTI_COUPLER flag is FALSE") + # Toggling the if vid == "MULTI_COUPLER": oldval = self.get_value("MULTI_COUPLER") - if oldval != value: - newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) - if value: - maxinst = 1 - for tcomp in self._components: - tcomp_inst = "NINST_{}".format(tcomp) - maxinst = max(1, self.get_value(tcomp)) - EnvBase.set_value(self,tcomp_inst, 1) + newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + if value: + maxinst = 1 + for tcomp in self._components: + if tcomp in ["CPL"]: + continue + tcomp_inst = "NINST_{}".format(tcomp) + maxinst = max(maxinst, self.get_value(tcomp_inst)) + EnvBase.set_value(self,tcomp_inst, 1) self.set_value("NINST_CPL", maxinst) - else: - ninst = self.get_value("NINST_CPL") - newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) - EnvBase.set_value(self, "NINST", ninst) + else: + ninst = self.get_value("NINST_CPL") + newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + cpl_index = None + esp_index = None + if 'CPL' in self._components: + cpl_index = self._components.index('CPL') + del self._components[cpl_index] + if 'ESP' in self._components: + esp_index = self._components.index('ESP') + del self._components[esp_index] + + EnvBase.set_value(self, "NINST", ninst) + if cpl_index is not None: + self._components.insert(cpl_index,'CPL') + if esp_index is not None: + self._components.insert(esp_index,'ESP') + EnvBase.set_value(self, "NINST_CPL", 1) else: newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) self._components = oldcomps @@ -99,7 +119,8 @@ def get_total_tasks(self, comp_classes): pstrid = self.get_value("PSTRID", attribute={"component":comp}) tt = rootpe + (ntasks - 1) * pstrid + 1 total_tasks = max(tt, total_tasks) - total_tasks *= self.get_value("NINST_CPL") + if self.get_value("MULTI_COUPLER"): + total_tasks *= self.get_value("NINST_CPL") return total_tasks def get_tasks_per_node(self, total_tasks, max_thread_count): diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index 7fdb926b9e3..3122f27df70 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -727,12 +727,14 @@ def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): for compclass in self._component_classes: key = "NINST_{}".format(compclass) if compclass == "CPL": - mach_pes_obj.set_value(key, ncouplers) + if ncouplers > 1: + mach_pes_obj.set_value("MULTI_COUPLER", True) + mach_pes_obj.set_value(key, ncouplers) continue # ESP models are currently limited to 1 instance if compclass == "ESP": mach_pes_obj.set_value(key, 1) - else: + elif ncouplers == 1: mach_pes_obj.set_value(key, ninst) key = "NTASKS_{}".format(compclass) diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 2fa5d1196bc..81071e50649 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -74,30 +74,17 @@ def create_namelists(case, component=None): model_str = model.lower() config_file = case.get_value("CONFIG_{}_FILE".format(model_str.upper())) config_dir = os.path.dirname(config_file) - # Multicoupler mode (coupler_count > 1) must temporarily change - # NINST and NTASKS settings so that the component buildnml scripts - # will work correctly. After the call to buildnml the original values - # are restored. + multicoupler = case.get_value("MULTI_COUPLER") if model_str == "cpl": compname = "drv" else: compname = case.get_value("COMP_{}".format(model_str.upper())) - - complist = [m for m in models if m.upper() != "CPL"] - if cpl_ninst > 1: - xmlfac = {"NINST" : cpl_ninst, "NTASKS" : 1} - else: - compname = case.get_value("COMP_{}".format(model_str.upper())) - complist = [model_str.upper()] - if cpl_ninst > 1: - xmlfac = {"NINST" : cpl_ninst, "NTASKS" : cpl_ninst} - - xmlsave = {} - for k in xmlfac.keys(): - for m in complist: - key = "{}_{}" .format(k, m.upper()) - xmlsave[key] = case.get_value(key) - + # Multicoupler mode (MULTI_COUPLER ) must temporarily change + # NINST and NTASKS settings so that the component buildnml scripts + # will work correctly. After the call to buildnml the original values + # are restored. + if multicoupler and model_str != "cpl": + case.set_value("MULTI_COUPLER",False) if component is None or component == model_str: # first look in the case SourceMods directory cmd = os.path.join(caseroot, "SourceMods", "src."+compname, "buildnml") @@ -109,6 +96,8 @@ def create_namelists(case, component=None): expect(os.path.isfile(cmd), "Could not find buildnml file for component {}".format(compname)) run_sub_or_cmd(cmd, (caseroot), "buildnml", (case, caseroot, compname), case=case) + if multicoupler and model_str != "cpl": + case.set_value("MULTI_COUPLER",True) logger.info("Finished creating component namelists") # Save namelists to docdir diff --git a/scripts/lib/CIME/test_scheduler.py b/scripts/lib/CIME/test_scheduler.py index adad7690899..5514ae6e90d 100644 --- a/scripts/lib/CIME/test_scheduler.py +++ b/scripts/lib/CIME/test_scheduler.py @@ -411,7 +411,7 @@ def _create_newcase_phase(self, test): if case_opt.startswith('C'): expect(ninst == 1,"Cannot combine _C and _N options") ncpl = case_opt[1:] - create_newcase_cmd += " --ninst {} --ninst-couplers" .format(ncpl) + create_newcase_cmd += " --ninst {} --multi-coupler" .format(ncpl) logger.debug (" NCPL set to {}" .format(ncpl)) if case_opt.startswith('P'): pesize = case_opt[1:] diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index fc06d7f33ed..c5b5f35e479 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -248,8 +248,9 @@ def _create_component_modelio_namelists(case, files): confdir = os.path.join(case.get_value("CASEBUILD"), "cplconf") lid = os.environ["LID"] if "LID" in os.environ else get_timestamp("%y%m%d-%H%M%S") - models = ["cpl", "atm", "lnd", "ice", "ocn", "glc", "rof", "wav", "esp"] + models = case.get_values("COMP_CLASSES") for model in models: + model = model.lower() with NamelistGenerator(case, definition_file) as nmlgen: config = {} config['component'] = model diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index ab89ce2f859..d63299fa74b 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1951,7 +1951,8 @@ logical - False + FALSE + TRUE,FALSE mach_pes env_mach_pes.xml MULTI_COUPLER mode provides a separate coupler component for each From 217d0f29143e99913397cdc28525f2e1a9572f85 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 26 Jun 2017 16:03:49 -0600 Subject: [PATCH 26/51] fix pylint issues --- scripts/Tools/check_lockedfiles | 2 +- scripts/lib/CIME/XML/env_mach_pes.py | 3 +-- scripts/lib/CIME/preview_namelists.py | 2 -- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/Tools/check_lockedfiles b/scripts/Tools/check_lockedfiles index 3864f7d9cb8..852ec72c20e 100755 --- a/scripts/Tools/check_lockedfiles +++ b/scripts/Tools/check_lockedfiles @@ -41,7 +41,7 @@ def _main_func(description): caseroot = parse_command_line(sys.argv, description) - with Case(read_only=True) as case: + with Case(case_root=caseroot, read_only=True) as case: check_lockedfiles(case) if __name__ == "__main__": diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index 9ba313821c3..ac26295d013 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -26,7 +26,7 @@ def set_value(self, vid, value, subgroup=None, ignore_type=False): subgroup is ignored in the general routine and applied in specific methods """ oldcomps = self._components[:] - nvid, comp, iscompvar = self.check_if_comp_var(vid, None) + nvid, comp, _ = self.check_if_comp_var(vid, None) if nvid == "NINST": if self.get_value("MULTI_COUPLER"): self._components = ["CPL"] @@ -41,7 +41,6 @@ def set_value(self, vid, value, subgroup=None, ignore_type=False): expect(comp is None or comp != "CPL","Cannot change NINST_CPL if MULTI_COUPLER flag is FALSE") # Toggling the if vid == "MULTI_COUPLER": - oldval = self.get_value("MULTI_COUPLER") newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) if value: maxinst = 1 diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 81071e50649..5efc021ce64 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -66,8 +66,6 @@ def create_namelists(case, component=None): # Create namelists - must have cpl last in the list below # Note - cpl must be last in the loop below so that in generating its namelist, # it can use xml vars potentially set by other component's buildnml scripts - xmlfac = {} - cpl_ninst = case.get_value("NINST_CPL") models = case.get_values("COMP_CLASSES") models += [models.pop(0)] for model in models: From c0a8aa4cd9026f58d12c9c02a117376cef5fa945 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 27 Jun 2017 10:07:44 -0600 Subject: [PATCH 27/51] fix issue in MCC test --- config/config_tests.xml | 10 ++-------- scripts/lib/CIME/preview_namelists.py | 1 + src/drivers/mct/cime_config/buildnml | 10 +++++++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/config/config_tests.xml b/config/config_tests.xml index bcd33a90285..46eb4ef26f2 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -525,14 +525,8 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu none $STOP_OPTION $STOP_N - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + TRUE + 3 diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 5efc021ce64..fff57c5e3ea 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -83,6 +83,7 @@ def create_namelists(case, component=None): # are restored. if multicoupler and model_str != "cpl": case.set_value("MULTI_COUPLER",False) + print "HERE NINST_{} = {}".format(model_str.upper(), case.get_value("NINST_{}".format(model_str.upper()))) if component is None or component == model_str: # first look in the case SourceMods directory cmd = os.path.join(caseroot, "SourceMods", "src."+compname, "buildnml") diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index c5b5f35e479..4ada65835bc 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -247,7 +247,7 @@ def _create_component_modelio_namelists(case, files): confdir = os.path.join(case.get_value("CASEBUILD"), "cplconf") lid = os.environ["LID"] if "LID" in os.environ else get_timestamp("%y%m%d-%H%M%S") - + inst_cpl = case.get_value("NINST_CPL") models = case.get_values("COMP_CLASSES") for model in models: model = model.lower() @@ -255,8 +255,12 @@ def _create_component_modelio_namelists(case, files): config = {} config['component'] = model entries = nmlgen.init_defaults(infiles, config, skip_entry_loop=True) - - inst_count = case.get_value("NINST_" + model.upper()) + if inst_cpl == 1: + inst_count = case.get_value("NINST_" + model.upper()) + elif model != 'ESP': + inst_count = inst_cpl + else: + inst_count = 1 inst_string = "" inst_index = 1 From b30e386a6776d8c13c7697978026014013769911 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 27 Jun 2017 10:12:45 -0600 Subject: [PATCH 28/51] remove debug print statement --- scripts/lib/CIME/preview_namelists.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index fff57c5e3ea..5efc021ce64 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -83,7 +83,6 @@ def create_namelists(case, component=None): # are restored. if multicoupler and model_str != "cpl": case.set_value("MULTI_COUPLER",False) - print "HERE NINST_{} = {}".format(model_str.upper(), case.get_value("NINST_{}".format(model_str.upper()))) if component is None or component == model_str: # first look in the case SourceMods directory cmd = os.path.join(caseroot, "SourceMods", "src."+compname, "buildnml") From c0ee673721dac15420b408ce7d29088fb9c7fdbb Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 27 Jun 2017 14:51:24 -0600 Subject: [PATCH 29/51] update comments based on review --- scripts/lib/CIME/XML/generic_xml.py | 4 +++- scripts/lib/CIME/case_run.py | 4 ++-- scripts/lib/CIME/preview_namelists.py | 8 ++++---- src/drivers/mct/cime_config/config_component.xml | 3 ++- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/scripts/lib/CIME/XML/generic_xml.py b/scripts/lib/CIME/XML/generic_xml.py index 4e30414d0ca..23ea46ea551 100644 --- a/scripts/lib/CIME/XML/generic_xml.py +++ b/scripts/lib/CIME/XML/generic_xml.py @@ -5,7 +5,7 @@ from CIME.XML.standard_module_setup import * from distutils.spawn import find_executable from xml.dom import minidom - +from CIME.utils import append_status import getpass logger = logging.getLogger(__name__) @@ -82,6 +82,8 @@ def write(self, outfile=None): doc = minidom.parseString(xmlstr) with open(outfile,'w') as xmlout: doc.writexml(xmlout,addindent=' ') + append_status("Flush completed at {}".format(get_timestamp()), CaseStatus) + def get_node(self, nodename, attributes=None, root=None, xpath=None): """ diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 95e8eb294d0..216dd90190c 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -1,6 +1,6 @@ from CIME.XML.standard_module_setup import * from CIME.case_submit import submit -from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status +from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status, get_timestamp from CIME.check_lockedfiles import check_lockedfiles from CIME.get_timing import get_timing from CIME.provenance import save_prerun_provenance, save_postrun_provenance @@ -255,7 +255,7 @@ def case_run(case, skip_pnl=False): "You are not calling the run script via the submit script. " "As a result, short-term archiving will not be called automatically." "Please submit your run using the submit script like so:" - " ./case.submit") + " ./case.submit Time: {}".format(get_timestamp()) # Forces user to use case.submit if they re-submit if case.get_value("TESTCASE") is None: diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 5efc021ce64..feb00b6c970 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -77,10 +77,10 @@ def create_namelists(case, component=None): compname = "drv" else: compname = case.get_value("COMP_{}".format(model_str.upper())) - # Multicoupler mode (MULTI_COUPLER ) must temporarily change - # NINST and NTASKS settings so that the component buildnml scripts - # will work correctly. After the call to buildnml the original values - # are restored. + # We must temporarily toggle out of Multicoupler mode (MULTI_COUPLER ) + # so that the component build namelists use the correct number of instances + # we can get rid of this hack when all of the components understand MULTI_COUPLER + # mode if multicoupler and model_str != "cpl": case.set_value("MULTI_COUPLER",False) if component is None or component == model_str: diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index d63299fa74b..be0cfec3bd7 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1977,7 +1977,8 @@ mach_pes env_mach_pes.xml Number of instances for each component. If MULTI_COUPLER is True - only NINST_CPL is used, if MULTI_COUPLER is False NINST_CPL is 1. + only NINST_CPL is used and all components have NINST_CPL instances; + if MULTI_COUPLER is False NINST_CPL is 1. From 5f1e939e7f6f00ff37744ea040948961a59a46e6 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 27 Jun 2017 16:27:06 -0600 Subject: [PATCH 30/51] remove NINST_CPL = another major refactor" --- config/config_tests.xml | 1 - scripts/create_newcase | 18 +-- scripts/lib/CIME/SystemTests/mcc.py | 5 +- scripts/lib/CIME/XML/env_mach_pes.py | 119 ++++++++++-------- scripts/lib/CIME/XML/generic_xml.py | 3 +- scripts/lib/CIME/build.py | 11 +- scripts/lib/CIME/buildnml.py | 2 +- scripts/lib/CIME/case.py | 23 ++-- scripts/lib/CIME/case_run.py | 2 +- scripts/lib/CIME/case_setup.py | 11 +- scripts/lib/CIME/preview_namelists.py | 8 -- src/drivers/mct/cime_config/buildnml | 11 +- .../mct/cime_config/config_component.xml | 1 - .../cime_config/namelist_definition_drv.xml | 4 +- 14 files changed, 114 insertions(+), 105 deletions(-) diff --git a/config/config_tests.xml b/config/config_tests.xml index 46eb4ef26f2..a21b2c98454 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -526,7 +526,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu $STOP_OPTION $STOP_N TRUE - 3 diff --git a/scripts/create_newcase b/scripts/create_newcase index b8d421647b5..8cee8598c80 100755 --- a/scripts/create_newcase +++ b/scripts/create_newcase @@ -157,19 +157,11 @@ OR if args.input_dir is not None: args.input_dir = os.path.abspath(args.input_dir) - if args.multi_coupler: - ncouplers = args.ninst - ninst = 1 - else: - ncouplers = 1 - ninst = args.ninst - - return args.case, args.compset, args.res, args.machine, args.compiler,\ args.mpilib, args.project, args.pecount, \ args.user_mods_dir, args.pesfile, \ - args.user_grid, args.gridfile, args.srcroot, args.test, ncouplers, \ - ninst, args.walltime, args.queue, args.output_root, args.script_root, \ + args.user_grid, args.gridfile, args.srcroot, args.test, args.multi_coupler, \ + args.ninst, args.walltime, args.queue, args.output_root, args.script_root, \ run_unsupported, args.answer, args.input_dir ############################################################################### @@ -180,8 +172,8 @@ def _main_func(description): casename, compset, grid, machine, compiler, \ mpilib, project, pecount, \ user_mods_dir, pesfile, \ - user_grid, gridfile, srcroot, test, ncouplers, ninst, walltime, queue, \ - output_root, script_root, run_unsupported, \ + user_grid, gridfile, srcroot, test, multi_coupler, ninst, walltime, \ + queue, output_root, script_root, run_unsupported, \ answer, input_dir = parse_command_line(sys.argv, cimeroot, description) if script_root is None: @@ -204,7 +196,7 @@ def _main_func(description): machine_name=machine, project=project, pecount=pecount, compiler=compiler, mpilib=mpilib, pesfile=pesfile,user_grid=user_grid, gridfile=gridfile, - ncouplers=ncouplers, ninst=ninst, test=test, + multi_coupler=multi_coupler, ninst=ninst, test=test, walltime=walltime, queue=queue, output_root=output_root, run_unsupported=run_unsupported, answer=answer, input_dir=input_dir) diff --git a/scripts/lib/CIME/SystemTests/mcc.py b/scripts/lib/CIME/SystemTests/mcc.py index b6220231584..abdd74a9315 100644 --- a/scripts/lib/CIME/SystemTests/mcc.py +++ b/scripts/lib/CIME/SystemTests/mcc.py @@ -26,9 +26,10 @@ def _case_one_setup(self): # The multicoupler case will increase the number of tasks by the # number of requested couplers. self._case.set_value("MULTI_COUPLER",True) - self._case.set_value("NINST_CPL", self._test_instances) + self._case.set_value("NINST", self._test_instances) + self._case.set_value("NINST_ESP", 1) case_setup(self._case, test_mode=False, reset=True) def _case_two_setup(self): - self._case.set_value("NINST_CPL", 1) + self._case.set_value("NINST", 1) case_setup(self._case, test_mode=True, reset=True) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index ac26295d013..f656cd31f1a 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -19,62 +19,69 @@ def __init__(self, case_root=None, infile="env_mach_pes.xml", components=None): schema = os.path.join(get_cime_root(), "config", "xml_schemas", "env_mach_pes.xsd") EnvBase.__init__(self, case_root, infile, schema=schema) - def set_value(self, vid, value, subgroup=None, ignore_type=False): - """ - Set the value of an entry-id field to value - Returns the value or None if not found - subgroup is ignored in the general routine and applied in specific methods - """ - oldcomps = self._components[:] - nvid, comp, _ = self.check_if_comp_var(vid, None) - if nvid == "NINST": - if self.get_value("MULTI_COUPLER"): - self._components = ["CPL"] - if comp is None: - comp = "CPL" - expect(comp == "CPL" or value == 1,"Cannot change {} when MULTI_COUPLER flag is TRUE".format(vid)) - elif value != 1: - if 'CPL' in self._components: - self._components.remove('CPL') - if 'ESP' in self._components and value != 1: - self._components.remove('ESP') - expect(comp is None or comp != "CPL","Cannot change NINST_CPL if MULTI_COUPLER flag is FALSE") - # Toggling the - if vid == "MULTI_COUPLER": - newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) - if value: - maxinst = 1 - for tcomp in self._components: - if tcomp in ["CPL"]: - continue - tcomp_inst = "NINST_{}".format(tcomp) - maxinst = max(maxinst, self.get_value(tcomp_inst)) - EnvBase.set_value(self,tcomp_inst, 1) - self.set_value("NINST_CPL", maxinst) - else: - ninst = self.get_value("NINST_CPL") - newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) - cpl_index = None - esp_index = None - if 'CPL' in self._components: - cpl_index = self._components.index('CPL') - del self._components[cpl_index] - if 'ESP' in self._components: - esp_index = self._components.index('ESP') - del self._components[esp_index] - - EnvBase.set_value(self, "NINST", ninst) - if cpl_index is not None: - self._components.insert(cpl_index,'CPL') - if esp_index is not None: - self._components.insert(esp_index,'ESP') - EnvBase.set_value(self, "NINST_CPL", 1) - else: - newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) - self._components = oldcomps - return newval + # def set_value(self, vid, value, subgroup=None, ignore_type=False): + # """ + # Set the value of an entry-id field to value + # Returns the value or None if not found + # subgroup is ignored in the general routine and applied in specific methods + # """ + # oldcomps = self._components[:] + # nvid, comp, _ = self.check_if_comp_var(vid, None) + # if nvid == "NINST": + # if self.get_value("MULTI_COUPLER"): + # self._components = ["CPL"] + # if comp is None: + # comp = "CPL" + # expect(comp == "CPL" or value == 1,"Cannot change {} when MULTI_COUPLER flag is TRUE".format(vid)) + # elif value != 1: + # if 'CPL' in self._components: + # self._components.remove('CPL') + # if 'ESP' in self._components and value != 1: + # self._components.remove('ESP') + # expect(comp is None or comp != "CPL","Cannot change NINST_CPL if MULTI_COUPLER flag is FALSE") + # # Toggling the + # if vid == "MULTI_COUPLER": + # newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + # if value: + # maxinst = 1 + # for tcomp in self._components: + # if tcomp in ["CPL"]: + # continue + # tcomp_inst = "NINST_{}".format(tcomp) + # maxinst = max(maxinst, self.get_value(tcomp_inst)) + # EnvBase.set_value(self,tcomp_inst, 1) + # self.set_value("NINST_CPL", maxinst) + # else: + # ninst = self.get_value("NINST_CPL") + # newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + # cpl_index = None + # esp_index = None + # if 'CPL' in self._components: + # cpl_index = self._components.index('CPL') + # del self._components[cpl_index] + # if 'ESP' in self._components: + # esp_index = self._components.index('ESP') + # del self._components[esp_index] + + # EnvBase.set_value(self, "NINST", ninst) + # if cpl_index is not None: + # self._components.insert(cpl_index,'CPL') + # if esp_index is not None: + # self._components.insert(esp_index,'ESP') + # EnvBase.set_value(self, "NINST_CPL", 1) + # else: + # newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) + # self._components = oldcomps + # return newval def get_value(self, vid, attribute=None, resolved=True, subgroup=None, pes_per_node=None): # pylint: disable=arguments-differ + + if vid == "NINST_MAX": + value = 1 + for comp in self._components: + value = max(value, self.get_value("NINST_{}".format(comp))) + return value + value = EnvBase.get_value(self, vid, attribute, resolved, subgroup) if "NTASKS" in vid or "ROOTPE" in vid: @@ -112,14 +119,16 @@ def get_cost_pes(self, totaltasks, max_thread_count, machine=None): def get_total_tasks(self, comp_classes): total_tasks = 0 + maxinst = 1 for comp in comp_classes: ntasks = self.get_value("NTASKS", attribute={"component":comp}) rootpe = self.get_value("ROOTPE", attribute={"component":comp}) pstrid = self.get_value("PSTRID", attribute={"component":comp}) + maxinst = max(maxinst, self.get_value("NINST", attribute={"component":comp})) tt = rootpe + (ntasks - 1) * pstrid + 1 total_tasks = max(tt, total_tasks) if self.get_value("MULTI_COUPLER"): - total_tasks *= self.get_value("NINST_CPL") + total_tasks *= maxinst return total_tasks def get_tasks_per_node(self, total_tasks, max_thread_count): diff --git a/scripts/lib/CIME/XML/generic_xml.py b/scripts/lib/CIME/XML/generic_xml.py index 23ea46ea551..5a5c7fd9364 100644 --- a/scripts/lib/CIME/XML/generic_xml.py +++ b/scripts/lib/CIME/XML/generic_xml.py @@ -82,8 +82,6 @@ def write(self, outfile=None): doc = minidom.parseString(xmlstr) with open(outfile,'w') as xmlout: doc.writexml(xmlout,addindent=' ') - append_status("Flush completed at {}".format(get_timestamp()), CaseStatus) - def get_node(self, nodename, attributes=None, root=None, xpath=None): """ @@ -286,6 +284,7 @@ def set_element_text(self, element_name, new_text, attributes=None, root=None, x return None def get_raw_record(self, root=None): + logger.info("writing file {}".format(self.filename)) if root is None: root = self.root try: diff --git a/scripts/lib/CIME/build.py b/scripts/lib/CIME/build.py index d472d37326c..a4817fb060e 100644 --- a/scripts/lib/CIME/build.py +++ b/scripts/lib/CIME/build.py @@ -395,14 +395,21 @@ def _case_build_impl(caseroot, case, sharedlib_only, model_only, buildlist): incroot = os.path.abspath(case.get_value("INCROOT")) libroot = os.path.abspath(case.get_value("LIBROOT")) sharedlibroot = os.path.abspath(case.get_value("SHAREDLIBROOT")) - + multi_coupler = case.get_value("MULTI_COUPLER") complist = [] + ninst = 1 for comp_class in comp_classes: - ninst = case.get_value("NINST_{}".format(comp_class)) if comp_class == "CPL": config_dir = None + if multi_coupler: + ninst = case.get_value("NINST_MAX") else: config_dir = os.path.dirname(case.get_value("CONFIG_{}_FILE".format(comp_class))) + if multi_coupler: + ninst = 1 + else: + ninst = case.get_value("NINST_{}".format(comp_class)) + comp = case.get_value("COMP_{}".format(comp_class)) thrds = case.get_value("NTHRDS_{}".format(comp_class)) expect(ninst is not None,"Failed to get ninst for comp_class {}".format(comp_class)) diff --git a/scripts/lib/CIME/buildnml.py b/scripts/lib/CIME/buildnml.py index acfb56a24b8..a600f681432 100644 --- a/scripts/lib/CIME/buildnml.py +++ b/scripts/lib/CIME/buildnml.py @@ -78,7 +78,7 @@ def build_xcpl_nml(case, caseroot, compname): if ninst == 1: filename = os.path.join(rundir, "{}_in".format(compname)) else: - filename = os.path.join(rundir, "{}_in_{:4.4d}".format(compname, i)) + filename = os.path.join(rundir, "{}_in_{:04d}".format(compname, i)) with open(filename, 'w') as infile: infile.write("{:<20d} ! i-direction global dimension\n".format(nx)) diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index 3122f27df70..eddc50e70c4 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -636,7 +636,7 @@ def _get_component_config_data(self, files): self.clean_up_lookups() - def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): + def _setup_mach_pes(self, pecount, multi_coupler, ninst, machine_name, mpilib): #-------------------------------------------- # pe layout #-------------------------------------------- @@ -720,21 +720,20 @@ def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): val = -1*val*pes_per_node if val > pesize: pesize = val - pesize *= int(ncouplers) + if multi_coupler: + pesize *= int(ninst) + mach_pes_obj.set_value("MULTI_COUPLER", True) # Make sure that every component has been accounted for # set, nthrds and ntasks to 1 otherwise. Also set the ninst values here. for compclass in self._component_classes: key = "NINST_{}".format(compclass) if compclass == "CPL": - if ncouplers > 1: - mach_pes_obj.set_value("MULTI_COUPLER", True) - mach_pes_obj.set_value(key, ncouplers) continue # ESP models are currently limited to 1 instance if compclass == "ESP": mach_pes_obj.set_value(key, 1) - elif ncouplers == 1: + else: mach_pes_obj.set_value(key, ninst) key = "NTASKS_{}".format(compclass) @@ -750,7 +749,7 @@ def _setup_mach_pes(self, pecount, ncouplers, ninst, machine_name, mpilib): def configure(self, compset_name, grid_name, machine_name=None, project=None, pecount=None, compiler=None, mpilib=None, pesfile=None,user_grid=False, gridfile=None, - ncouplers=1, ninst=1, test=False, + multi_coupler=False, ninst=1, test=False, walltime=None, queue=None, output_root=None, run_unsupported=False, answer=None, input_dir=None): @@ -841,10 +840,10 @@ def configure(self, compset_name, grid_name, machine_name=None, env_mach_specific_obj.populate(machobj) self.schedule_rewrite(env_mach_specific_obj) - pesize = self._setup_mach_pes(pecount, ncouplers, ninst, machine_name, mpilib) + pesize = self._setup_mach_pes(pecount, multi_coupler, ninst, machine_name, mpilib) - if ncouplers > 1: - logger.info(" Coupler has %s instances" % ncouplers) + if multi_coupler and ninst>1: + logger.info(" Coupler has %s instances" % ninst) #-------------------------------------------- # batch system @@ -1463,7 +1462,7 @@ def create(self, casename, srcroot, compset_name, grid_name, user_mods_dir=None, machine_name=None, project=None, pecount=None, compiler=None, mpilib=None, pesfile=None,user_grid=False, gridfile=None, - ncouplers=1, ninst=1, test=False, + multi_coupler=False, ninst=1, test=False, walltime=None, queue=None, output_root=None, run_unsupported=False, answer=None, input_dir=None): @@ -1478,7 +1477,7 @@ def create(self, casename, srcroot, compset_name, grid_name, project=project, pecount=pecount, compiler=compiler, mpilib=mpilib, pesfile=pesfile,user_grid=user_grid, gridfile=gridfile, - ncouplers=ncouplers, ninst=ninst, test=test, + multi_coupler=multi_coupler, ninst=ninst, test=test, walltime=walltime, queue=queue, output_root=output_root, run_unsupported=run_unsupported, answer=answer, diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 216dd90190c..2afe44b8df6 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -255,7 +255,7 @@ def case_run(case, skip_pnl=False): "You are not calling the run script via the submit script. " "As a result, short-term archiving will not be called automatically." "Please submit your run using the submit script like so:" - " ./case.submit Time: {}".format(get_timestamp()) + " ./case.submit Time: {}".format(get_timestamp())) # Forces user to use case.submit if they re-submit if case.get_value("TESTCASE") is None: diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index a31db04dae5..162ed0d2a1d 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -33,8 +33,10 @@ def _build_usernl_files(case, model, comp): expect(os.path.isdir(model_dir), "cannot find cime_config directory {} for component {}".format(model_dir, comp)) - - ninst = case.get_value("NINST_CPL") + ninst = 1 + multi_coupler = case.get_value("MULTI_COUPLER") + if multi_coupler: + ninst = case.get_value("NINST_MAX") if comp == "cpl": if not os.path.exists("user_nl_cpl"): shutil.copy(os.path.join(model_dir, "user_nl_cpl"), ".") @@ -122,6 +124,7 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): # Check ninst. # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component. + multi_coupler = case.get_value("MULTI_COUPLER") for comp in models: if comp == "CPL": continue @@ -135,6 +138,10 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): case.set_value("NTASKS_{}".format(comp), ninst) else: expect(False, "NINST_{} value {:d} greater than NTASKS_{} {:d}".format(comp, ninst, comp, ntasks)) + # But the NINST_LAYOUT may only be concurrent in multi_coupler mode + if multi_coupler: + expect(case.get_value("NINST_LAYOUT_{}".format(comp)) == "concurrent", + "If multi_coupler is TRUE, NINST_LAYOUT_{} must be concurrent".format(comp)) if os.path.exists("case.run"): logger.info("Machine/Decomp/Pes configuration has already been done ...skipping") diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index feb00b6c970..3c4b8013d5f 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -77,12 +77,6 @@ def create_namelists(case, component=None): compname = "drv" else: compname = case.get_value("COMP_{}".format(model_str.upper())) - # We must temporarily toggle out of Multicoupler mode (MULTI_COUPLER ) - # so that the component build namelists use the correct number of instances - # we can get rid of this hack when all of the components understand MULTI_COUPLER - # mode - if multicoupler and model_str != "cpl": - case.set_value("MULTI_COUPLER",False) if component is None or component == model_str: # first look in the case SourceMods directory cmd = os.path.join(caseroot, "SourceMods", "src."+compname, "buildnml") @@ -94,8 +88,6 @@ def create_namelists(case, component=None): expect(os.path.isfile(cmd), "Could not find buildnml file for component {}".format(compname)) run_sub_or_cmd(cmd, (caseroot), "buildnml", (case, caseroot, compname), case=case) - if multicoupler and model_str != "cpl": - case.set_value("MULTI_COUPLER",True) logger.info("Finished creating component namelists") # Save namelists to docdir diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index 4ada65835bc..9614097e704 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -247,18 +247,23 @@ def _create_component_modelio_namelists(case, files): confdir = os.path.join(case.get_value("CASEBUILD"), "cplconf") lid = os.environ["LID"] if "LID" in os.environ else get_timestamp("%y%m%d-%H%M%S") - inst_cpl = case.get_value("NINST_CPL") models = case.get_values("COMP_CLASSES") + #if we are in multi-coupler mode the number of instances of cpl will be the max + # of any NINST_* value + maxinst = 1 + if case.get_value("MULTI_COUPLER"): + maxinst = case.get_value("NINST_MAX") + for model in models: model = model.lower() with NamelistGenerator(case, definition_file) as nmlgen: config = {} config['component'] = model entries = nmlgen.init_defaults(infiles, config, skip_entry_loop=True) - if inst_cpl == 1: + if maxinst == 1: inst_count = case.get_value("NINST_" + model.upper()) elif model != 'ESP': - inst_count = inst_cpl + inst_count = maxinst else: inst_count = 1 diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index be0cfec3bd7..4f9f01fa4da 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1964,7 +1964,6 @@ integer - 1 1 1 1 diff --git a/src/drivers/mct/cime_config/namelist_definition_drv.xml b/src/drivers/mct/cime_config/namelist_definition_drv.xml index 78237b864cf..87383203b71 100644 --- a/src/drivers/mct/cime_config/namelist_definition_drv.xml +++ b/src/drivers/mct/cime_config/namelist_definition_drv.xml @@ -44,7 +44,7 @@ - + integer cime_cpl_inst cime_cpl_inst @@ -52,7 +52,7 @@ Number of CESM coupler instances. - $NINST_CPL + $NINST_MAX From 2f8ad79b66e8a9d80dec1ee352b1bfd2888610a0 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 27 Jun 2017 19:32:49 -0600 Subject: [PATCH 31/51] fix issues with finding / defining cpl log --- scripts/lib/CIME/XML/env_mach_pes.py | 55 --------------------------- scripts/lib/CIME/XML/generic_xml.py | 1 - scripts/lib/CIME/case_run.py | 4 +- scripts/lib/CIME/preview_namelists.py | 1 - src/drivers/mct/cime_config/buildnml | 6 +-- 5 files changed, 6 insertions(+), 61 deletions(-) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index f656cd31f1a..c16fd30ad3c 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -19,61 +19,6 @@ def __init__(self, case_root=None, infile="env_mach_pes.xml", components=None): schema = os.path.join(get_cime_root(), "config", "xml_schemas", "env_mach_pes.xsd") EnvBase.__init__(self, case_root, infile, schema=schema) - # def set_value(self, vid, value, subgroup=None, ignore_type=False): - # """ - # Set the value of an entry-id field to value - # Returns the value or None if not found - # subgroup is ignored in the general routine and applied in specific methods - # """ - # oldcomps = self._components[:] - # nvid, comp, _ = self.check_if_comp_var(vid, None) - # if nvid == "NINST": - # if self.get_value("MULTI_COUPLER"): - # self._components = ["CPL"] - # if comp is None: - # comp = "CPL" - # expect(comp == "CPL" or value == 1,"Cannot change {} when MULTI_COUPLER flag is TRUE".format(vid)) - # elif value != 1: - # if 'CPL' in self._components: - # self._components.remove('CPL') - # if 'ESP' in self._components and value != 1: - # self._components.remove('ESP') - # expect(comp is None or comp != "CPL","Cannot change NINST_CPL if MULTI_COUPLER flag is FALSE") - # # Toggling the - # if vid == "MULTI_COUPLER": - # newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) - # if value: - # maxinst = 1 - # for tcomp in self._components: - # if tcomp in ["CPL"]: - # continue - # tcomp_inst = "NINST_{}".format(tcomp) - # maxinst = max(maxinst, self.get_value(tcomp_inst)) - # EnvBase.set_value(self,tcomp_inst, 1) - # self.set_value("NINST_CPL", maxinst) - # else: - # ninst = self.get_value("NINST_CPL") - # newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) - # cpl_index = None - # esp_index = None - # if 'CPL' in self._components: - # cpl_index = self._components.index('CPL') - # del self._components[cpl_index] - # if 'ESP' in self._components: - # esp_index = self._components.index('ESP') - # del self._components[esp_index] - - # EnvBase.set_value(self, "NINST", ninst) - # if cpl_index is not None: - # self._components.insert(cpl_index,'CPL') - # if esp_index is not None: - # self._components.insert(esp_index,'ESP') - # EnvBase.set_value(self, "NINST_CPL", 1) - # else: - # newval = EnvBase.set_value(self, vid, value,subgroup=subgroup, ignore_type=ignore_type) - # self._components = oldcomps - # return newval - def get_value(self, vid, attribute=None, resolved=True, subgroup=None, pes_per_node=None): # pylint: disable=arguments-differ if vid == "NINST_MAX": diff --git a/scripts/lib/CIME/XML/generic_xml.py b/scripts/lib/CIME/XML/generic_xml.py index 5a5c7fd9364..78e74d2065c 100644 --- a/scripts/lib/CIME/XML/generic_xml.py +++ b/scripts/lib/CIME/XML/generic_xml.py @@ -5,7 +5,6 @@ from CIME.XML.standard_module_setup import * from distutils.spawn import find_executable from xml.dom import minidom -from CIME.utils import append_status import getpass logger = logging.getLogger(__name__) diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index 2afe44b8df6..ee4271f573f 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -152,7 +152,9 @@ def post_run_check(case, lid): rundir = case.get_value("RUNDIR") model = case.get_value("MODEL") - cpl_ninst = case.get_value("NINST_CPL") + cpl_ninst = 1 + if case.get_value("MULTI_COUPLER"): + cpl_ninst = case.get_value("NINST_MAX") cpl_logs = [] if cpl_ninst > 1: for inst in range(cpl_ninst): diff --git a/scripts/lib/CIME/preview_namelists.py b/scripts/lib/CIME/preview_namelists.py index 3c4b8013d5f..d259dfdd546 100644 --- a/scripts/lib/CIME/preview_namelists.py +++ b/scripts/lib/CIME/preview_namelists.py @@ -72,7 +72,6 @@ def create_namelists(case, component=None): model_str = model.lower() config_file = case.get_value("CONFIG_{}_FILE".format(model_str.upper())) config_dir = os.path.dirname(config_file) - multicoupler = case.get_value("MULTI_COUPLER") if model_str == "cpl": compname = "drv" else: diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index 9614097e704..1770dca3ae7 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -260,9 +260,9 @@ def _create_component_modelio_namelists(case, files): config = {} config['component'] = model entries = nmlgen.init_defaults(infiles, config, skip_entry_loop=True) - if maxinst == 1: + if maxinst == 1 and model != 'cpl': inst_count = case.get_value("NINST_" + model.upper()) - elif model != 'ESP': + elif model != 'esp': inst_count = maxinst else: inst_count = 1 @@ -272,7 +272,7 @@ def _create_component_modelio_namelists(case, files): while inst_index <= inst_count: # determine instance string if inst_count > 1: - inst_string = '_%04d' % inst_index + inst_string = '_{04d}'.format(inst_index) # set default values for entry in entries: From 33ea849df03a1dce81cdc1b1c3349d72725dadd9 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 28 Jun 2017 08:01:25 -0600 Subject: [PATCH 32/51] update documentation --- doc/source/users_guide/multi-instance.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/users_guide/multi-instance.rst b/doc/source/users_guide/multi-instance.rst index ce038897805..97827ef3de7 100644 --- a/doc/source/users_guide/multi-instance.rst +++ b/doc/source/users_guide/multi-instance.rst @@ -48,10 +48,9 @@ To run two instances of CAM, CLM, CICE, RTM and DOCN, invoke the following :ref: As a result, you will have two instances of CAM, CLM and CICE (prescribed), RTM, and DOCN, each running concurrently on 72 MPI tasks and all using the same coupler component. In this single coupler mode the number of tasks for each component instance is NTASKS_COMPONENT/NINST_COMPONENT and the total number of tasks is the same as for the single instance case. Now consider the multi coupler model. -To use this mode change the NINST values for the individual components back to 1 and the NINST_CPL to 2. +To use this mode change :: - > ./xmlchange NINST=1 - > ./xmlchange NINST_CPL=2 + > ./xmlchange MULTI_COUPLER=TRUE This configuration will run each component instance on the original 144 tasks but will generate two copies of the model (in the same executable) for a total of 288 tasks. @@ -96,6 +95,8 @@ Also keep these important points in mind: #. Calling **case.setup** with ``--clean`` *DOES NOT* remove the **user_nl_xxx_NN** (where xxx is the component name) files created by **case.setup**. #. A special variable NINST_LAYOUT is provided for some experimental compsets, its value should be - 'concurrent' for all but a few special cases. + 'concurrent' for all but a few special cases and it cannot be used if MULTI_COUPLER=TRUE. #. In **create_test** these options can be invoked with testname modifiers _N# for the single coupler mode and _C# for the multi-coupler mode. These are mutually exclusive options, they cannot be combined. + +#. In create_newcase you may use --ninst # to set the number of instances and --multi-coupler for multi-coupler mode. From ff7347eef3b3f426bcd6e9407ef48568abe6aaea Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 28 Jun 2017 13:30:38 -0600 Subject: [PATCH 33/51] special handling for esp --- scripts/lib/CIME/XML/generic_xml.py | 2 +- src/drivers/mct/cime_config/buildnml | 2 +- src/drivers/mct/main/cime_comp_mod.F90 | 48 ++++++++++++++------------ 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/scripts/lib/CIME/XML/generic_xml.py b/scripts/lib/CIME/XML/generic_xml.py index 78e74d2065c..efad45fa2ee 100644 --- a/scripts/lib/CIME/XML/generic_xml.py +++ b/scripts/lib/CIME/XML/generic_xml.py @@ -283,7 +283,7 @@ def set_element_text(self, element_name, new_text, attributes=None, root=None, x return None def get_raw_record(self, root=None): - logger.info("writing file {}".format(self.filename)) + logger.debug("writing file {}".format(self.filename)) if root is None: root = self.root try: diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index 1770dca3ae7..9720b6572e2 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -272,7 +272,7 @@ def _create_component_modelio_namelists(case, files): while inst_index <= inst_count: # determine instance string if inst_count > 1: - inst_string = '_{04d}'.format(inst_index) + inst_string = '_{:04d}'.format(inst_index) # set default values for entry in entries: diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index d23eb310060..8c2a894f2f2 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -507,8 +507,7 @@ module cime_comp_mod !---------------------------------------------------------------------------- ! communicator groups and related !---------------------------------------------------------------------------- - integer :: Global_Comm - + integer :: driver_comm integer :: mpicom_GLOID ! MPI global communicator integer :: mpicom_CPLID ! MPI cpl communicator integer :: mpicom_OCNID ! MPI ocn communicator for ensemble member 1 @@ -599,29 +598,31 @@ subroutine cime_pre_init1() character(len=seq_comm_namelen) :: comp_name(num_inst_total) integer :: i, it integer :: num_inst_cpl, cpl_id + integer :: cpl_comm call mpi_init(ierr) call shr_mpi_chkerr(ierr,subname//' mpi_init') + call mpi_comm_dup(MPI_COMM_WORLD, driver_comm, ierr) + call shr_mpi_chkerr(ierr,subname//' mpi_comm_dup') - Global_Comm=MPI_COMM_WORLD comp_comm = MPI_COMM_NULL time_brun = mpi_wtime() !--- Initialize multiple coupler instances, if requested --- - call cime_cpl_init(Global_Comm, num_inst_cpl, cpl_id) + call cime_cpl_init(driver_comm, cpl_comm, num_inst_cpl, cpl_id) - call shr_pio_init1(num_inst_total,NLFileName, Global_Comm) + call shr_pio_init1(num_inst_total,NLFileName, cpl_comm) ! - ! If pio_async_interface is true Global_Comm is MPI_COMM_NULL on the servernodes + ! If pio_async_interface is true Driver_comm is MPI_COMM_NULL on the servernodes ! and server nodes do not return from shr_pio_init2 ! - ! if (Global_Comm /= MPI_COMM_NULL) then + ! if (Driver_comm /= MPI_COMM_NULL) then if (num_inst_cpl > 1) then - call seq_comm_init(Global_Comm, NLFileName, cpl_comm_ID=cpl_id) + call seq_comm_init(cpl_comm, NLFileName, cpl_comm_ID=cpl_id) write(cpl_inst_tag,'("_",i4.4)') cpl_id else - call seq_comm_init(Global_Comm, NLFileName) + call seq_comm_init(cpl_comm, NLFileName) cpl_inst_tag = '' end if @@ -2542,7 +2543,6 @@ subroutine cime_run() !---------------------------------------------------------- !| WAV SETUP-SEND !---------------------------------------------------------- - if (wav_present .and. wavrun_alarm) then !---------------------------------------------------------- @@ -2743,7 +2743,6 @@ subroutine cime_run() !---------------------------------------------------------- !| ATM/OCN SETUP (cesm1_orig, cesm1_orig_tight, cesm1_mod or cesm1_mod_tight) !---------------------------------------------------------- - if ((trim(cpl_seq_option) == 'CESM1_ORIG' .or. & trim(cpl_seq_option) == 'CESM1_ORIG_TIGHT' .or. & trim(cpl_seq_option) == 'CESM1_MOD' .or. & @@ -3581,7 +3580,6 @@ subroutine cime_run() !---------------------------------------------------------- !| Write driver restart file !---------------------------------------------------------- - if ( (restart_alarm .or. drv_pause) .and. iamin_CPLID) then call cime_comp_barriers(mpicom=mpicom_CPLID, timer='CPL:RESTART_BARRIER') call t_drvstartf ('CPL:RESTART',cplrun=.true.,barrier=mpicom_CPLID) @@ -3741,11 +3739,13 @@ subroutine cime_run() call t_drvstopf ('CPL:HISTORY',cplrun=.true.) endif - !---------------------------------------------------------- !| RUN ESP MODEL !---------------------------------------------------------- if (esp_present .and. esprun_alarm) then + ! Make sure that all couplers are here in multicoupler mode before running ESP component + call mpi_barrier(driver_comm, ierr) + call component_run(Eclock_e, esp, esp_run, infodata, & comp_prognostic=esp_prognostic, comp_num=comp_num_esp, & timer_barrier= 'CPL:ESP_RUN_BARRIER', timer_comp_run='CPL:ESP_RUN', & @@ -4066,7 +4066,7 @@ subroutine cime_comp_barriers(mpicom, timer) endif end subroutine cime_comp_barriers -subroutine cime_cpl_init(comm, num_inst_cpl, id) +subroutine cime_cpl_init(comm_in, comm_out, num_inst_cpl, id) !----------------------------------------------------------------------- ! @@ -4076,7 +4076,8 @@ subroutine cime_cpl_init(comm, num_inst_cpl, id) implicit none - integer , intent(inout) :: comm + integer , intent(in) :: comm_in + integer , intent(in) :: comm_out integer , intent(out) :: num_inst_cpl integer , intent(out) :: id ! instance ID, starts from 1 ! @@ -4087,8 +4088,8 @@ subroutine cime_cpl_init(comm, num_inst_cpl, id) namelist /cime_cpl_inst/ ninst_cpl - call shr_mpi_commrank(comm, mype , ' cime_cpl_init') - call shr_mpi_commsize(comm, numpes, ' cime_cpl_init') + call shr_mpi_commrank(comm_in, mype , ' cime_cpl_init') + call shr_mpi_commsize(comm_in, numpes, ' cime_cpl_init') num_inst_cpl = 1 id = 0 @@ -4105,19 +4106,20 @@ subroutine cime_cpl_init(comm, num_inst_cpl, id) num_inst_cpl = max(ninst_cpl, 1) end if - call shr_mpi_bcast(num_inst_cpl, comm, 'ninst_cpl') + call shr_mpi_bcast(num_inst_cpl, comm_in, 'ninst_cpl') if (mod(numpes, num_inst_cpl) /= 0) then call shr_sys_abort(subname // & ' : Total PE number must be a multiple of coupler instance number') end if - if (num_inst_cpl > 1) then + if (num_inst_cpl == 1) then + call mpi_comm_dup(comm_in, comm_out, ierr) + call shr_mpi_chkerr(ierr,subname//' mpi_comm_dup') + else id = mype * num_inst_cpl / numpes + 1 - call mpi_comm_split(comm, id, 0, inst_comm, ierr) - if (ierr /= 0) & - call shr_sys_abort(subname // ' : Error in generating coupler instances') - comm = inst_comm + call mpi_comm_split(comm_in, id, 0, comm_out, ierr) + call shr_mpi_chkerr(ierr,subname//' mpi_comm_split') end if end subroutine cime_cpl_init From a08062ee89a0e592e8786b9611110ff6c011be04 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 18 Jul 2017 09:28:12 -0600 Subject: [PATCH 34/51] fix pylint issues --- scripts/lib/CIME/SystemTests/system_tests_compare_two.py | 1 - scripts/lib/CIME/case_submit.py | 1 - 2 files changed, 2 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py index 27b26687145..d6aaa5175d7 100644 --- a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py +++ b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py @@ -228,7 +228,6 @@ def run_phase(self, success_change=False): # pylint: disable=arguments-differ self._case_two_custom_prerun_action() self.run_indv(suffix = self._run_two_suffix) self._case_two_custom_postrun_action() - # Compare results # Case1 is the "main" case, and we need to do the comparisons from there self._activate_case1() diff --git a/scripts/lib/CIME/case_submit.py b/scripts/lib/CIME/case_submit.py index 778ccf391ec..90176e85571 100644 --- a/scripts/lib/CIME/case_submit.py +++ b/scripts/lib/CIME/case_submit.py @@ -17,7 +17,6 @@ def _submit(case, job=None, resubmit=False, no_batch=False, skip_pnl=False, mail_user=None, mail_type='never', batch_args=None): - caseroot = case.get_value("CASEROOT") if job is None: if case.get_value("TEST"): job = "case.test" From 922c452a37ebd9a4a9e9d02b624397f2c61b00e8 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Thu, 3 Aug 2017 14:17:51 -0600 Subject: [PATCH 35/51] remove inst dimension from coupler files --- scripts/lib/CIME/SystemTests/mcc.py | 2 +- scripts/lib/CIME/get_timing.py | 6 +++++- src/build_scripts/buildlib.csm_share | 30 ++++++++++++++++++++-------- src/drivers/mct/shr/seq_comm_mct.F90 | 2 -- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/mcc.py b/scripts/lib/CIME/SystemTests/mcc.py index abdd74a9315..ce08c0f799b 100644 --- a/scripts/lib/CIME/SystemTests/mcc.py +++ b/scripts/lib/CIME/SystemTests/mcc.py @@ -17,7 +17,7 @@ def __init__(self, case): self._comp_classes = [] self._test_instances = 3 SystemTestsCompareTwo.__init__(self, case, - separate_builds = False, + separate_builds = True, run_two_suffix = 'single_instance', run_two_description = 'single instance', run_one_description = 'multi coupler') diff --git a/scripts/lib/CIME/get_timing.py b/scripts/lib/CIME/get_timing.py index a00cd26705f..e8163417123 100644 --- a/scripts/lib/CIME/get_timing.py +++ b/scripts/lib/CIME/get_timing.py @@ -93,7 +93,11 @@ def gettime(self, heading_padded): return (0, 0, False) def getTiming(self): - ninst = self.case.get_value("NINST_CPL") + ninst = 1 + multi_coupler = self.case.get_value("MULTI_COUPLER") + if multi_coupler: + ninst = self.case.get_value("NINST_MAX") + if ninst > 1: for inst in range(ninst): self._getTiming(inst+1) diff --git a/src/build_scripts/buildlib.csm_share b/src/build_scripts/buildlib.csm_share index c53c3897118..784c7b6bb8c 100755 --- a/src/build_scripts/buildlib.csm_share +++ b/src/build_scripts/buildlib.csm_share @@ -19,14 +19,27 @@ my $USE_ESMF_LIB = `./xmlquery USE_ESMF_LIB --value`; my $GMAKE_J = `./xmlquery GMAKE_J --value`; my $GMAKE = `./xmlquery GMAKE --value`; my $CASETOOLS = `./xmlquery CASETOOLS --value`; -my $NINST_ATM = `./xmlquery NINST_ATM --value`; -my $NINST_ICE = `./xmlquery NINST_ICE --value`; -my $NINST_GLC = `./xmlquery NINST_GLC --value`; -my $NINST_LND = `./xmlquery NINST_LND --value`; -my $NINST_OCN = `./xmlquery NINST_OCN --value`; -my $NINST_ROF = `./xmlquery NINST_ROF --value`; -my $NINST_WAV = `./xmlquery NINST_WAV --value`; -my $NINST_ESP = `./xmlquery NINST_ESP --value`; +my $multi_coupler = `./xmlquery MULTI_COUPLER --value`; + +my $NINST_ATM = 1; +my $NINST_ICE = 1; +my $NINST_GLC = 1; +my $NINST_LND = 1; +my $NINST_OCN = 1; +my $NINST_ROF = 1; +my $NINST_WAV = 1; +my $NINST_ESP = 1; + +if ($multi_coupler == "FALSE") { + my $NINST_ATM = `./xmlquery NINST_ATM --value`; + my $NINST_ICE = `./xmlquery NINST_ICE --value`; + my $NINST_GLC = `./xmlquery NINST_GLC --value`; + my $NINST_LND = `./xmlquery NINST_LND --value`; + my $NINST_OCN = `./xmlquery NINST_OCN --value`; + my $NINST_ROF = `./xmlquery NINST_ROF --value`; + my $NINST_WAV = `./xmlquery NINST_WAV --value`; + my $NINST_ESP = `./xmlquery NINST_ESP --value`; +} my $NINST_VALUE = `./xmlquery NINST_VALUE --value`; $ENV{PIO_VERSION} = `./xmlquery PIO_VERSION --value`; #-------------------------------------------------------------------- @@ -73,6 +86,7 @@ if($#fp != $#filepath){ close(F); } my $multiinst_cppdefs = ""; + $multiinst_cppdefs = "$multiinst_cppdefs -DNUM_COMP_INST_ATM=$NINST_ATM"; $multiinst_cppdefs = "$multiinst_cppdefs -DNUM_COMP_INST_LND=$NINST_LND"; $multiinst_cppdefs = "$multiinst_cppdefs -DNUM_COMP_INST_OCN=$NINST_OCN"; diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index ecd8dd3bdb6..73f7213ba52 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -242,7 +242,6 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) esp_ntasks, esp_rootpe, esp_pestride, esp_nthreads, esp_layout, & cpl_ntasks, cpl_rootpe, cpl_pestride, cpl_nthreads !---------------------------------------------------------- - ! make sure this is first pass and set comms unset if (seq_comm_mct_initialized) then write(logunit,*) trim(subname),' ERROR seq_comm_init already called ' @@ -337,7 +336,6 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) !--- compute some other num_inst values - num_inst_xao = max(num_inst_atm,num_inst_ocn) num_inst_frc = num_inst_ice From be035d5eb77454982cb8ea035617975c68562d47 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 4 Aug 2017 09:34:36 -0600 Subject: [PATCH 36/51] PRE for multi instance cases --- scripts/lib/CIME/SystemTests/pre.py | 50 ++++++++++++------- scripts/lib/CIME/case_setup.py | 2 +- .../data_comps/desp/desp_comp_mod.F90 | 25 +++++----- src/drivers/mct/main/cime_comp_mod.F90 | 9 ++-- src/drivers/mct/shr/seq_comm_mct.F90 | 40 ++++++++------- 5 files changed, 73 insertions(+), 53 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/pre.py b/scripts/lib/CIME/SystemTests/pre.py index 5e95d7bd401..e67efda721d 100644 --- a/scripts/lib/CIME/SystemTests/pre.py +++ b/scripts/lib/CIME/SystemTests/pre.py @@ -85,26 +85,40 @@ def run_phase(self): # pylint: disable=arguments-differ else: pause_comps = pause_comps.split(':') + multi_coupler = self._case.get_value("MULTI_COUPLER") + for comp in pause_comps: + if comp == "cpl": + if multi_coupler: + ninst = self._case.get_value("NINST_MAX") + else: + ninst = 1 + else: + ninst = self._case.get_value("NINST_{}".format(comp.upper())) comp_name = self._case.get_value('COMP_{}'.format(comp.upper())) - rname = '*.{}.r.*'.format(comp_name) - restart_files_1 = glob.glob(os.path.join(rundir1, rname)) - expect((len(restart_files_1) > 0), "No case1 restart files for {}".format(comp)) - restart_files_2 = glob.glob(os.path.join(rundir2, rname)) - expect((len(restart_files_2) > len(restart_files_1)), - "No pause (restart) files found in case2 for {}".format(comp)) - # Do cprnc of restart files. - rfile1 = restart_files_1[len(restart_files_1) - 1] - # rfile2 has to match rfile1 (same time string) - parts = os.path.basename(rfile1).split(".") - glob_str = "*.{}".format(".".join(parts[len(parts)-4:])) - restart_files_2 = glob.glob(os.path.join(rundir2, glob_str)) - expect((len(restart_files_2) == 1), - "Missing case2 restart file, {}", glob_str) - rfile2 = restart_files_2[0] - ok = cprnc(comp, rfile1, rfile2, self._case, rundir2)[0] - logger.warning("CPRNC result for {}: {}".format(os.path.basename(rfile1), "PASS" if (ok == should_match) else "FAIL")) - compare_ok = compare_ok and (should_match == ok) + for index in range(1,ninst+1): + if ninst == 1: + rname = '*.{}.r.*'.format(comp_name) + else: + rname = '*.{}_{:04d}.r.*'.format(comp_name, index) + + restart_files_1 = glob.glob(os.path.join(rundir1, rname)) + expect((len(restart_files_1) > 0), "No case1 restart files for {}".format(comp)) + restart_files_2 = glob.glob(os.path.join(rundir2, rname)) + expect((len(restart_files_2) > len(restart_files_1)), + "No pause (restart) files found in case2 for {}".format(comp)) + # Do cprnc of restart files. + rfile1 = restart_files_1[len(restart_files_1) - 1] + # rfile2 has to match rfile1 (same time string) + parts = os.path.basename(rfile1).split(".") + glob_str = "*.{}".format(".".join(parts[len(parts)-4:])) + restart_files_2 = glob.glob(os.path.join(rundir2, glob_str)) + expect((len(restart_files_2) == 1), + "Missing case2 restart file, {}", glob_str) + rfile2 = restart_files_2[0] + ok = cprnc(comp, rfile1, rfile2, self._case, rundir2)[0] + logger.warning("CPRNC result for {}: {}".format(os.path.basename(rfile1), "PASS" if (ok == should_match) else "FAIL")) + compare_ok = compare_ok and (should_match == ok) expect(compare_ok, "Not all restart files {}".format("matched" if should_match else "failed to match")) diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index 162ed0d2a1d..66f0fb20215 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -45,7 +45,7 @@ def _build_usernl_files(case, model, comp): ninst = case.get_value("NINST_{}".format(model)) nlfile = "user_nl_{}".format(comp) model_nl = os.path.join(model_dir, nlfile) - if ninst > 1: + if ninst > 1 and not comp.endswith("esp"): for inst_counter in xrange(1, ninst+1): inst_nlfile = "{}_{:04d}".format(nlfile, inst_counter) if not os.path.exists(inst_nlfile): diff --git a/src/components/data_comps/desp/desp_comp_mod.F90 b/src/components/data_comps/desp/desp_comp_mod.F90 index 4133be759c3..c48803dfbe3 100644 --- a/src/components/data_comps/desp/desp_comp_mod.F90 +++ b/src/components/data_comps/desp/desp_comp_mod.F90 @@ -18,6 +18,8 @@ module desp_comp_mod use seq_timemgr_mod, only: seq_timemgr_EClockGetData use seq_timemgr_mod, only: seq_timemgr_RestartAlarmIsOn use seq_comm_mct, only: seq_comm_inst, seq_comm_name, seq_comm_suffix + use seq_comm_mct, only: num_inst_cpl + implicit none private @@ -99,8 +101,6 @@ module desp_comp_mod subroutine desp_comp_init(EClock, espid, mpicom_in, phase, read_restart, & esp_present, esp_prognostic) - use pio, only: iosystem_desc_t - use shr_pio_mod, only: shr_pio_getiosys, shr_pio_getiotype ! !INPUT/OUTPUT PARAMETERS: @@ -120,8 +120,6 @@ subroutine desp_comp_init(EClock, espid, mpicom_in, phase, read_restart, & integer(IN) :: shrloglev ! original log level integer(IN) :: nunit ! unit number - type(iosystem_desc_t), pointer :: iosystem - character(len=CL) :: fileName ! generic file name character(len=CL) :: rest_file ! restart filename @@ -219,8 +217,7 @@ subroutine desp_comp_init(EClock, espid, mpicom_in, phase, read_restart, & ! Initialize PIO !------------------------------------------------------------------------ - iosystem => shr_pio_getiosys(trim(inst_name)) - call shr_strdata_pioinit(SDESP, iosystem, shr_pio_getiotype(trim(inst_name))) + call shr_strdata_pioinit(SDESP, COMPID) !------------------------------------------------------------------------ ! Validate mode @@ -349,7 +346,7 @@ subroutine desp_comp_run(EClock, case_name, pause_sig, atm_resume, & character(len=CL), intent(inout) :: ice_resume(num_inst_ice) character(len=CL), intent(inout) :: glc_resume(num_inst_glc) character(len=CL), intent(inout) :: wav_resume(num_inst_wav) - character(len=CL), intent(inout) :: cpl_resume + character(len=CL), intent(inout) :: cpl_resume(num_inst_cpl) !--- local --- integer(IN) :: CurrentYMD ! model date @@ -389,7 +386,7 @@ subroutine desp_comp_run(EClock, case_name, pause_sig, atm_resume, & ice_resume(:) = ' ' glc_resume(:) = ' ' wav_resume(:) = ' ' - cpl_resume = ' ' + cpl_resume(:) = ' ' !-------------------------------------------------------------------------- ! Reset shr logging to my log file @@ -474,8 +471,8 @@ subroutine desp_comp_run(EClock, case_name, pause_sig, atm_resume, & varname = 'T' case('drv') call get_restart_filenames(ind, cpl_resume, errcode) - allocate(rfilenames(1)) - rfilenames(1) = cpl_resume + allocate(rfilenames(size(cpl_resume))) + rfilenames = cpl_resume varname = 'x2oacc_ox_Foxx_swnet' case default call shr_sys_abort(subname//'Unrecognized ind') @@ -610,7 +607,7 @@ end subroutine desp_comp_final subroutine get_restart_filenames_a(comp_ind, filenames, retcode) use seq_comm_mct, only: ATMID, LNDID, OCNID, ICEID, GLCID, ROFID - use seq_comm_mct, only: WAVID, CPLID, seq_comm_suffix + use seq_comm_mct, only: WAVID, CPLID, seq_comm_suffix, cpl_inst_tag, num_inst_cpl use shr_file_mod, only: shr_file_getUnit, shr_file_freeUnit ! Dummy arguments @@ -657,7 +654,11 @@ subroutine get_restart_filenames_a(comp_ind, filenames, retcode) rpointer_name = rpprefix//comp_names(comp_ind) do ind = 1, num_inst - rpointer_name = rpprefix//comp_names(comp_ind)//trim(seq_comm_suffix(ids(ind))) + if (num_inst_cpl > 1) then + rpointer_name = rpprefix//comp_names(comp_ind)//trim(cpl_inst_tag) + else + rpointer_name = rpprefix//comp_names(comp_ind)//trim(seq_comm_suffix(ids(ind))) + endif if (my_task == master_task) then inquire(file=rpointer_name, EXIST=file_exists) ! POP decided to not follow the convention diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index 8c2a894f2f2..586fee7327c 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -72,6 +72,7 @@ module cime_comp_mod use seq_comm_mct, only: seq_comm_iamin, seq_comm_name, seq_comm_namelen use seq_comm_mct, only: seq_comm_init, seq_comm_setnthreads, seq_comm_getnthreads use seq_comm_mct, only: seq_comm_getinfo => seq_comm_setptrs + use seq_comm_mct, only: cpl_inst_tag ! clock & alarm routines and variables use seq_timemgr_mod, only: seq_timemgr_type @@ -533,8 +534,6 @@ module cime_comp_mod logical :: iamin_CPLALLROFID ! pe associated with CPLALLROFID logical :: iamin_CPLALLWAVID ! pe associated with CPLALLWAVID - ! suffix for log and timing files if multi coupler driver - character(len=seq_comm_namelen) :: cpl_inst_tag !---------------------------------------------------------------------------- ! complist: list of comps on this pe @@ -588,7 +587,7 @@ module cime_comp_mod subroutine cime_pre_init1() use shr_pio_mod, only : shr_pio_init1, shr_pio_init2 - + use seq_comm_mct, only: num_inst_cpl !---------------------------------------------------------- !| Initialize MCT and MPI communicators and IO !---------------------------------------------------------- @@ -597,7 +596,7 @@ subroutine cime_pre_init1() logical :: comp_iamin(num_inst_total) character(len=seq_comm_namelen) :: comp_name(num_inst_total) integer :: i, it - integer :: num_inst_cpl, cpl_id + integer :: cpl_id integer :: cpl_comm call mpi_init(ierr) @@ -4067,7 +4066,7 @@ subroutine cime_comp_barriers(mpicom, timer) end subroutine cime_comp_barriers subroutine cime_cpl_init(comm_in, comm_out, num_inst_cpl, id) - + use seq_comm_mct, only : cpl_inst_iamin !----------------------------------------------------------------------- ! ! Initialize multiple coupler instances, if requested diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index 73f7213ba52..aa53cb488d1 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -83,7 +83,8 @@ module seq_comm_mct num_inst_wav + & num_inst_rof + & num_inst_esp + 1 - + integer, public :: num_inst_cpl = 1 + integer, public :: cpl_inst_iamin = 1 integer, public :: num_inst_min, num_inst_max integer, public :: num_inst_xao ! for xao flux integer, public :: num_inst_frc ! for fractions @@ -148,6 +149,10 @@ module seq_comm_mct integer, parameter, public :: seq_comm_namelen=16 + ! suffix for log and timing files if multi coupler driver + character(len=seq_comm_namelen), public :: cpl_inst_tag + + type seq_comm_type character(len=seq_comm_namelen) :: name ! my name character(len=seq_comm_namelen) :: suffix ! recommended suffix @@ -181,7 +186,7 @@ module seq_comm_mct character(*), parameter :: F12 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')','(',a,2i6,')')" character(*), parameter :: F13 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')')" character(*), parameter :: F14 = "(a,a,'(',i3,' ',a,')',a, 6x,' (',a,i6,')',' (',a,i3,')')" - integer :: Global_Comm + integer :: Coupler_Comm character(len=32), public :: & @@ -248,7 +253,9 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) call shr_sys_abort() endif seq_comm_mct_initialized = .true. - Global_Comm = Comm_in + + call mpi_comm_dup(Comm_in, Coupler_Comm, ierr) + call shr_mpi_chkerr(ierr,subname//' mpi_comm_dup') !! Initialize seq_comms elements @@ -273,9 +280,9 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) ! Initialize MPI ! Note that if no MPI, will call MCTs fake version - call mpi_comm_rank(GLOBAL_COMM, mype , ierr) + call mpi_comm_rank(Coupler_Comm, mype , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_rank comm_world') - call mpi_comm_size(GLOBAL_COMM, numpes, ierr) + call mpi_comm_size(Coupler_Comm, numpes, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_size comm_world') ! Initialize gloiam on all IDs @@ -383,7 +390,7 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) pelist(2,1) = numpes-1 pelist(3,1) = 1 end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, Coupler_Comm, ierr) call seq_comm_setcomm(GLOID, pelist,iname='GLOBAL') if (mype == 0) then @@ -391,7 +398,7 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) pelist(2,1) = cpl_rootpe + (cpl_ntasks -1) * cpl_pestride pelist(3,1) = cpl_pestride end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, Coupler_Comm, ierr) call seq_comm_setcomm(CPLID,pelist,cpl_nthreads,'CPL') call comp_comm_init(global_comm, atm_rootpe, atm_nthreads, atm_layout, atm_ntasks, atm_pestride, num_inst_atm, & @@ -430,7 +437,7 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) do n = 1,ncomps gloroot = -999 if (seq_comms(n)%iamroot) gloroot = seq_comms(n)%gloiam - call shr_mpi_max(gloroot,seq_comms(n)%gloroot,GLOBAL_COMM, & + call shr_mpi_max(gloroot,seq_comms(n)%gloroot,Coupler_Comm, & trim(subname)//' gloroot',all=.true.) enddo @@ -468,11 +475,10 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) call shr_sys_abort() endif - call mct_world_init(ncomps, GLOBAL_COMM, comms, comps) + call mct_world_init(ncomps, Coupler_Comm, comms, comps) deallocate(comps,comms) - call seq_comm_printcomms() end subroutine seq_comm_init @@ -627,11 +633,11 @@ subroutine seq_comm_setcomm(ID,pelist,nthreads,iname,inst,tinst) call shr_sys_abort() endif - call mpi_comm_group(GLOBAL_COMM, mpigrp_world, ierr) + call mpi_comm_group(Coupler_Comm, mpigrp_world, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_group mpigrp_world') call mpi_group_range_incl(mpigrp_world, 1, pelist, mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_group_range_incl mpigrp') - call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) + call mpi_comm_create(Coupler_Comm, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') ntasks = ((pelist(2,1) - pelist(1,1)) / pelist(3,1)) + 1 @@ -743,7 +749,7 @@ subroutine seq_comm_joincomm(ID1,ID2,ID,iname,inst,tinst) call mpi_group_union(seq_comms(ID1)%mpigrp,seq_comms(ID2)%mpigrp,mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_union mpigrp') - call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) + call mpi_comm_create(Coupler_Comm, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') seq_comms(ID)%set = .true. @@ -868,7 +874,7 @@ subroutine seq_comm_jcommarr(IDs,ID,iname,inst,tinst) call mpi_group_union(mpigrpp,seq_comms(IDs(n))%mpigrp,mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_union mpigrp') enddo - call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) + call mpi_comm_create(Coupler_Comm, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') seq_comms(ID)%set = .true. @@ -948,13 +954,13 @@ subroutine seq_comm_printcomms() character(*),parameter :: subName = '(seq_comm_printcomms) ' integer :: n,mype,npes,ierr - call mpi_comm_size(GLOBAL_COMM, npes , ierr) + call mpi_comm_size(Coupler_Comm, npes , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_size comm_world') - call mpi_comm_rank(GLOBAL_COMM, mype , ierr) + call mpi_comm_rank(Coupler_Comm, mype , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_rank comm_world') call shr_sys_flush(logunit) - call mpi_barrier(GLOBAL_COMM,ierr) + call mpi_barrier(Coupler_Comm,ierr) if (mype == 0) then do n = 1,ncomps write(logunit,'(a,4i6,2x,3a)') trim(subName),n, & From de57ec2fb1a73bd417bfe46f39ed07821dd790b1 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 14 Aug 2017 10:11:14 -0600 Subject: [PATCH 37/51] branch from mutiple_couplers --- scripts/lib/CIME/case.py | 6 +- scripts/lib/CIME/case_setup.py | 3 - scripts/lib/CIME/hist_utils.py | 10 +-- src/build_scripts/buildlib.csm_share | 20 +++--- .../mct/cime_config/config_component.xml | 8 +-- .../cime_config/namelist_definition_drv.xml | 13 ++-- src/drivers/mct/main/cime_comp_mod.F90 | 66 +++++++++---------- 7 files changed, 58 insertions(+), 68 deletions(-) diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index eddc50e70c4..59c15b6a22c 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -730,11 +730,7 @@ def _setup_mach_pes(self, pecount, multi_coupler, ninst, machine_name, mpilib): key = "NINST_{}".format(compclass) if compclass == "CPL": continue - # ESP models are currently limited to 1 instance - if compclass == "ESP": - mach_pes_obj.set_value(key, 1) - else: - mach_pes_obj.set_value(key, ninst) + mach_pes_obj.set_value(key, ninst) key = "NTASKS_{}".format(compclass) if key not in pes_ntasks.keys(): diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index 66f0fb20215..212d7d0ff45 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -130,9 +130,6 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): continue ninst = case.get_value("NINST_{}".format(comp)) ntasks = case.get_value("NTASKS_{}".format(comp)) - # ESP models are currently limited to 1 instance - expect((comp != "ESP") or (ninst == 1), - "ESP components may only have one instance") if ninst > ntasks: if ntasks == 1: case.set_value("NTASKS_{}".format(comp), ninst) diff --git a/scripts/lib/CIME/hist_utils.py b/scripts/lib/CIME/hist_utils.py index 60169684999..bb9714683ee 100644 --- a/scripts/lib/CIME/hist_utils.py +++ b/scripts/lib/CIME/hist_utils.py @@ -182,10 +182,10 @@ def _compare_hists(case, from_dir1, from_dir2, suffix1="", suffix2="", outfile_s all_success = True num_compared = 0 comments = "Comparing hists for case '{}' dir1='{}', suffix1='{}', dir2='{}' suffix2='{}'\n".format(testcase, from_dir1, suffix1, from_dir2, suffix2) - multiinst_cpl_compare = False + multiinst_driver_compare = False for model in _iter_model_file_substrs(case): if model == 'cpl' and suffix2 == 'multiinst': - multiinst_cpl_compare = True + multiinst_driver_compare = True comments += " comparing model '{}'\n".format(model) hists1 = _get_latest_hist_files(testcase, model, from_dir1, suffix1) hists2 = _get_latest_hist_files(testcase, model, from_dir2, suffix2) @@ -205,7 +205,7 @@ def _compare_hists(case, from_dir1, from_dir2, suffix1="", suffix2="", outfile_s for hist1, hist2 in match_ups: success, cprnc_log_file = cprnc(model, hist1, hist2, case, from_dir1, - multiinst_cpl_compare=multiinst_cpl_compare, + multiinst_driver_compare=multiinst_driver_compare, outfile_suffix=outfile_suffix) if success: comments += " {} matched {}\n".format(hist1, hist2) @@ -237,7 +237,7 @@ def compare_test(case, suffix1, suffix2): return _compare_hists(case, rundir, rundir, suffix1, suffix2) -def cprnc(model, file1, file2, case, rundir, multiinst_cpl_compare=False, outfile_suffix=""): +def cprnc(model, file1, file2, case, rundir, multiinst_driver_compare=False, outfile_suffix=""): """ Run cprnc to compare two individual nc files @@ -277,7 +277,7 @@ def cprnc(model, file1, file2, case, rundir, multiinst_cpl_compare=False, outfil with open(output_filename, "r") as fd: out = fd.read() - if multiinst_cpl_compare: + if multiinst_driver_compare: # In a multiinstance test the cpl hist file will have a different number of # dimensions and so cprnc will indicate that the files seem to be DIFFERENT # in this case we only want to check that the fields we are able to compare diff --git a/src/build_scripts/buildlib.csm_share b/src/build_scripts/buildlib.csm_share index 784c7b6bb8c..0537c810e80 100755 --- a/src/build_scripts/buildlib.csm_share +++ b/src/build_scripts/buildlib.csm_share @@ -19,7 +19,7 @@ my $USE_ESMF_LIB = `./xmlquery USE_ESMF_LIB --value`; my $GMAKE_J = `./xmlquery GMAKE_J --value`; my $GMAKE = `./xmlquery GMAKE --value`; my $CASETOOLS = `./xmlquery CASETOOLS --value`; -my $multi_coupler = `./xmlquery MULTI_COUPLER --value`; +my $multi_driver = `./xmlquery MULTI_DRIVER --value`; my $NINST_ATM = 1; my $NINST_ICE = 1; @@ -30,15 +30,15 @@ my $NINST_ROF = 1; my $NINST_WAV = 1; my $NINST_ESP = 1; -if ($multi_coupler == "FALSE") { - my $NINST_ATM = `./xmlquery NINST_ATM --value`; - my $NINST_ICE = `./xmlquery NINST_ICE --value`; - my $NINST_GLC = `./xmlquery NINST_GLC --value`; - my $NINST_LND = `./xmlquery NINST_LND --value`; - my $NINST_OCN = `./xmlquery NINST_OCN --value`; - my $NINST_ROF = `./xmlquery NINST_ROF --value`; - my $NINST_WAV = `./xmlquery NINST_WAV --value`; - my $NINST_ESP = `./xmlquery NINST_ESP --value`; +if ($multi_driver eq "FALSE") { + $NINST_ATM = `./xmlquery NINST_ATM --value`; + $NINST_ICE = `./xmlquery NINST_ICE --value`; + $NINST_GLC = `./xmlquery NINST_GLC --value`; + $NINST_LND = `./xmlquery NINST_LND --value`; + $NINST_OCN = `./xmlquery NINST_OCN --value`; + $NINST_ROF = `./xmlquery NINST_ROF --value`; + $NINST_WAV = `./xmlquery NINST_WAV --value`; + $NINST_ESP = `./xmlquery NINST_ESP --value`; } my $NINST_VALUE = `./xmlquery NINST_VALUE --value`; $ENV{PIO_VERSION} = `./xmlquery PIO_VERSION --value`; diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index 4f9f01fa4da..ab89c2287db 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1949,15 +1949,15 @@ ROOTPE (mpi task in MPI_COMM_WORLD) for each component - + logical FALSE TRUE,FALSE mach_pes env_mach_pes.xml - MULTI_COUPLER mode provides a separate coupler component for each - ensemble member all components must have an equal number of members. If - MULTI_COUPLER mode is False prognostic components must have the same number + MULTI_DRIVER mode provides a separate driver (and coupler) component for each + ensemble member. All components must have an equal number of members. If + MULTI_DRIVER mode is False prognostic components must have the same number of members but data or stub components may also have 1 member. diff --git a/src/drivers/mct/cime_config/namelist_definition_drv.xml b/src/drivers/mct/cime_config/namelist_definition_drv.xml index 87383203b71..3c167f7a211 100644 --- a/src/drivers/mct/cime_config/namelist_definition_drv.xml +++ b/src/drivers/mct/cime_config/namelist_definition_drv.xml @@ -41,18 +41,19 @@ --> - + - + integer - cime_cpl_inst - cime_cpl_inst + cime_driver_inst + cime_driver_inst - Number of CESM coupler instances. + Number of CESM driver instances. Only used if MULTI_DRIVER is TRUE. - $NINST_MAX + 1 + $NINST_MAX diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index 586fee7327c..c0e12d5853b 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -443,10 +443,6 @@ module cime_comp_mod integer :: budget_ltann ! long term budget flag for end of year writing integer :: budget_ltend ! long term budget flag for end of run writing -! character(CL) :: hist_r2x_flds = 'all' -! character(CL) :: hist_l2x_flds = 'all' -! character(CL) :: hist_a2x24hr_flds = 'all' - character(CL) :: hist_a2x_flds = & 'Faxa_swndr:Faxa_swvdr:Faxa_swndf:Faxa_swvdf' @@ -508,7 +504,7 @@ module cime_comp_mod !---------------------------------------------------------------------------- ! communicator groups and related !---------------------------------------------------------------------------- - integer :: driver_comm + integer :: global_comm integer :: mpicom_GLOID ! MPI global communicator integer :: mpicom_CPLID ! MPI cpl communicator integer :: mpicom_OCNID ! MPI ocn communicator for ensemble member 1 @@ -587,7 +583,7 @@ module cime_comp_mod subroutine cime_pre_init1() use shr_pio_mod, only : shr_pio_init1, shr_pio_init2 - use seq_comm_mct, only: num_inst_cpl + use seq_comm_mct, only: num_inst_driver !---------------------------------------------------------- !| Initialize MCT and MPI communicators and IO !---------------------------------------------------------- @@ -596,32 +592,32 @@ subroutine cime_pre_init1() logical :: comp_iamin(num_inst_total) character(len=seq_comm_namelen) :: comp_name(num_inst_total) integer :: i, it - integer :: cpl_id - integer :: cpl_comm + integer :: driver_id + integer :: driver_comm call mpi_init(ierr) call shr_mpi_chkerr(ierr,subname//' mpi_init') - call mpi_comm_dup(MPI_COMM_WORLD, driver_comm, ierr) + call mpi_comm_dup(MPI_COMM_WORLD, global_comm, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_dup') comp_comm = MPI_COMM_NULL time_brun = mpi_wtime() - !--- Initialize multiple coupler instances, if requested --- - call cime_cpl_init(driver_comm, cpl_comm, num_inst_cpl, cpl_id) + !--- Initialize multiple driver instances, if requested --- + call cime_cpl_init(global_comm, driver_comm, num_inst_driver, driver_id) - call shr_pio_init1(num_inst_total,NLFileName, cpl_comm) + call shr_pio_init1(num_inst_total,NLFileName, driver_comm) ! - ! If pio_async_interface is true Driver_comm is MPI_COMM_NULL on the servernodes + ! If pio_async_interface is true Global_comm is MPI_COMM_NULL on the servernodes ! and server nodes do not return from shr_pio_init2 ! - ! if (Driver_comm /= MPI_COMM_NULL) then + ! if (Global_comm /= MPI_COMM_NULL) then - if (num_inst_cpl > 1) then - call seq_comm_init(cpl_comm, NLFileName, cpl_comm_ID=cpl_id) - write(cpl_inst_tag,'("_",i4.4)') cpl_id + if (num_inst_driver > 1) then + call seq_comm_init(global_comm, driver_comm, NLFileName, driver_comm_ID=driver_id) + write(cpl_inst_tag,'("_",i4.4)') driver_id else - call seq_comm_init(cpl_comm, NLFileName) + call seq_comm_init(global_comm, driver_comm, NLFileName) cpl_inst_tag = '' end if @@ -794,8 +790,8 @@ subroutine cime_pre_init1() write(logunit,'(2A)') subname,' USE_ESMF_LIB is NOT set, using esmf_wrf_timemgr' #endif write(logunit,'(2A)') subname,' MCT_INTERFACE is set' - if (num_inst_cpl > 1) & - write(logunit,'(2A,I0,A)') subname,' Driver is running with',num_inst_cpl,'instances' + if (num_inst_driver > 1) & + write(logunit,'(2A,I0,A)') subname,' Driver is running with',num_inst_driver,'instances' endif ! @@ -3743,7 +3739,7 @@ subroutine cime_run() !---------------------------------------------------------- if (esp_present .and. esprun_alarm) then ! Make sure that all couplers are here in multicoupler mode before running ESP component - call mpi_barrier(driver_comm, ierr) + call mpi_barrier(global_comm, ierr) call component_run(Eclock_e, esp, esp_run, infodata, & comp_prognostic=esp_prognostic, comp_num=comp_num_esp, & @@ -4065,8 +4061,7 @@ subroutine cime_comp_barriers(mpicom, timer) endif end subroutine cime_comp_barriers -subroutine cime_cpl_init(comm_in, comm_out, num_inst_cpl, id) - use seq_comm_mct, only : cpl_inst_iamin +subroutine cime_cpl_init(comm_in, comm_out, num_inst_driver, id) !----------------------------------------------------------------------- ! ! Initialize multiple coupler instances, if requested @@ -4077,50 +4072,51 @@ subroutine cime_cpl_init(comm_in, comm_out, num_inst_cpl, id) integer , intent(in) :: comm_in integer , intent(in) :: comm_out - integer , intent(out) :: num_inst_cpl + integer , intent(out) :: num_inst_driver integer , intent(out) :: id ! instance ID, starts from 1 ! ! Local variables ! integer :: ierr, inst_comm, mype, nu, numpes !, pes - integer :: ninst_cpl + integer :: ninst_driver, drvpes - namelist /cime_cpl_inst/ ninst_cpl + namelist /cime_driver_inst/ ninst_driver call shr_mpi_commrank(comm_in, mype , ' cime_cpl_init') call shr_mpi_commsize(comm_in, numpes, ' cime_cpl_init') - num_inst_cpl = 1 + num_inst_driver = 1 id = 0 if (mype == 0) then ! Read coupler namelist if it exists - ninst_cpl = 1 + ninst_driver = 1 nu = shr_file_getUnit() open(unit = nu, file = NLFileName, status = 'old', iostat = ierr) rewind(unit = nu) - read(unit = nu, nml = cime_cpl_inst, iostat = ierr) + read(unit = nu, nml = cime_driver_inst, iostat = ierr) close(unit = nu) call shr_file_freeUnit(nu) - num_inst_cpl = max(ninst_cpl, 1) + num_inst_driver = max(ninst_driver, 1) end if - call shr_mpi_bcast(num_inst_cpl, comm_in, 'ninst_cpl') + call shr_mpi_bcast(num_inst_driver, comm_in, 'ninst_driver') - if (mod(numpes, num_inst_cpl) /= 0) then + if (mod(numpes, num_inst_driver) /= 0) then call shr_sys_abort(subname // & ' : Total PE number must be a multiple of coupler instance number') end if - if (num_inst_cpl == 1) then + if (num_inst_driver == 1) then call mpi_comm_dup(comm_in, comm_out, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_dup') else - id = mype * num_inst_cpl / numpes + 1 + id = mype * num_inst_driver / numpes + 1 call mpi_comm_split(comm_in, id, 0, comm_out, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_split') end if - + call shr_mpi_commsize(comm_out, drvpes, ' cime_cpl_init') + print *,__FILE__,__LINE__,numpes,drvpes end subroutine cime_cpl_init end module cime_comp_mod From bae415d1f548513845527c0d4ff9566e5ea9b034 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 14 Aug 2017 15:48:10 -0600 Subject: [PATCH 38/51] fix unit tests --- .../data_comps/desp/desp_comp_mod.F90 | 4 +- src/drivers/mct/main/cime_comp_mod.F90 | 2 +- src/drivers/mct/shr/seq_comm_mct.F90 | 63 +++++++++++-------- .../mct/unit_test/utils/mct_wrapper_mod.F90 | 2 +- 4 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/components/data_comps/desp/desp_comp_mod.F90 b/src/components/data_comps/desp/desp_comp_mod.F90 index c48803dfbe3..b26eaa70916 100644 --- a/src/components/data_comps/desp/desp_comp_mod.F90 +++ b/src/components/data_comps/desp/desp_comp_mod.F90 @@ -18,7 +18,7 @@ module desp_comp_mod use seq_timemgr_mod, only: seq_timemgr_EClockGetData use seq_timemgr_mod, only: seq_timemgr_RestartAlarmIsOn use seq_comm_mct, only: seq_comm_inst, seq_comm_name, seq_comm_suffix - use seq_comm_mct, only: num_inst_cpl + use seq_comm_mct, only: num_inst_cpl => num_inst_driver implicit none @@ -607,7 +607,7 @@ end subroutine desp_comp_final subroutine get_restart_filenames_a(comp_ind, filenames, retcode) use seq_comm_mct, only: ATMID, LNDID, OCNID, ICEID, GLCID, ROFID - use seq_comm_mct, only: WAVID, CPLID, seq_comm_suffix, cpl_inst_tag, num_inst_cpl + use seq_comm_mct, only: WAVID, CPLID, seq_comm_suffix, cpl_inst_tag use shr_file_mod, only: shr_file_getUnit, shr_file_freeUnit ! Dummy arguments diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index c0e12d5853b..518f44e2360 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -614,7 +614,7 @@ subroutine cime_pre_init1() ! if (Global_comm /= MPI_COMM_NULL) then if (num_inst_driver > 1) then - call seq_comm_init(global_comm, driver_comm, NLFileName, driver_comm_ID=driver_id) + call seq_comm_init(global_comm, driver_comm, NLFileName, drv_comm_ID=driver_id) write(cpl_inst_tag,'("_",i4.4)') driver_id else call seq_comm_init(global_comm, driver_comm, NLFileName) diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index aa53cb488d1..9c50957daab 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -27,7 +27,6 @@ module seq_comm_mct private #include - save !-------------------------------------------------------------------------- ! Public interfaces @@ -83,7 +82,7 @@ module seq_comm_mct num_inst_wav + & num_inst_rof + & num_inst_esp + 1 - integer, public :: num_inst_cpl = 1 + integer, public :: num_inst_driver = 1 integer, public :: cpl_inst_iamin = 1 integer, public :: num_inst_min, num_inst_max integer, public :: num_inst_xao ! for xao flux @@ -186,8 +185,6 @@ module seq_comm_mct character(*), parameter :: F12 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')','(',a,2i6,')')" character(*), parameter :: F13 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')')" character(*), parameter :: F14 = "(a,a,'(',i3,' ',a,')',a, 6x,' (',a,i6,')',' (',a,i3,')')" - integer :: Coupler_Comm - character(len=32), public :: & atm_layout, lnd_layout, ice_layout, glc_layout, rof_layout, & @@ -202,8 +199,7 @@ integer function seq_comm_get_ncomps() seq_comm_get_ncomps = ncomps end function seq_comm_get_ncomps - - subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) + subroutine seq_comm_init(Global_comm, Driver_Comm, nmlfile, Drv_comm_id) !---------------------------------------------------------- ! ! Arguments @@ -385,21 +381,29 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) count = count + 1 CPLID = count + if (global_mype == 0) then + pelist(1,1) = 0 + pelist(2,1) = global_numpes-1 + pelist(3,1) = 1 + end if + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, global_comm, ierr) + call seq_comm_setcomm(global_comm, GLOID, pelist,iname='GLOBAL') + if (mype == 0) then pelist(1,1) = 0 pelist(2,1) = numpes-1 pelist(3,1) = 1 end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, Coupler_Comm, ierr) - call seq_comm_setcomm(GLOID, pelist,iname='GLOBAL') + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, driver_comm, ierr) + call seq_comm_setcomm(driver_comm, DRVID, pelist,iname='DRIVER') if (mype == 0) then pelist(1,1) = cpl_rootpe pelist(2,1) = cpl_rootpe + (cpl_ntasks -1) * cpl_pestride pelist(3,1) = cpl_pestride end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, Coupler_Comm, ierr) - call seq_comm_setcomm(CPLID,pelist,cpl_nthreads,'CPL') + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, Driver_comm, ierr) + call seq_comm_setcomm(driver_comm, CPLID,pelist,cpl_nthreads,'CPL') call comp_comm_init(global_comm, atm_rootpe, atm_nthreads, atm_layout, atm_ntasks, atm_pestride, num_inst_atm, & CPLID, ATMID, CPLATMID, ALLATMID, CPLALLATMID, 'ATM', count) @@ -479,7 +483,7 @@ subroutine seq_comm_init(Comm_in, nmlfile, Cpl_comm_id) deallocate(comps,comms) - call seq_comm_printcomms() + call seq_comm_printcomms(global_comm) end subroutine seq_comm_init @@ -562,11 +566,11 @@ subroutine comp_comm_init(global_comm, comp_rootpe, comp_nthreads, comp_layout, pelist(3,1) = cstr(n) endif call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) - call seq_comm_setcomm(COMPID(n), pelist, comp_nthreads,name, n, num_inst_comp) - call seq_comm_joincomm(CPLID, COMPID(n), CPLCOMPID(n), 'CPL'//name, n, num_inst_comp) + call seq_comm_setcomm(driver_comm, COMPID(n), pelist, comp_nthreads,name, n, num_inst_comp) + call seq_comm_joincomm(driver_comm, CPLID, COMPID(n), CPLCOMPID(n), 'CPL'//name, n, num_inst_comp) enddo - call seq_comm_jcommarr(COMPID, ALLCOMPID, 'ALL'//name//'ID', 1, 1) - call seq_comm_joincomm(CPLID, ALLCOMPID, CPLALLCOMPID, 'CPLALL'//name//'ID', 1, 1) + call seq_comm_jcommarr(global_comm, COMPID, ALLCOMPID, 'ALL'//name//'ID', 1, 1) + call seq_comm_joincomm(driver_comm, CPLID, ALLCOMPID, CPLALLCOMPID, 'CPLALL'//name//'ID', 1, 1) end subroutine comp_comm_init @@ -609,9 +613,10 @@ subroutine seq_comm_clean() end subroutine seq_comm_clean !--------------------------------------------------------- - subroutine seq_comm_setcomm(ID,pelist,nthreads,iname,inst,tinst) + subroutine seq_comm_setcomm(comm_in, ID,pelist,nthreads,iname,inst,tinst) implicit none + integer, intent(in) :: comm_in integer,intent(IN) :: ID integer,intent(IN) :: pelist(:,:) integer,intent(IN),optional :: nthreads @@ -633,11 +638,12 @@ subroutine seq_comm_setcomm(ID,pelist,nthreads,iname,inst,tinst) call shr_sys_abort() endif - call mpi_comm_group(Coupler_Comm, mpigrp_world, ierr) + call mpi_comm_group(Comm_in, mpigrp_world, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_group mpigrp_world') call mpi_group_range_incl(mpigrp_world, 1, pelist, mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_group_range_incl mpigrp') - call mpi_comm_create(Coupler_Comm, mpigrp, mpicom, ierr) + call mpi_comm_create(Comm_in, mpigrp, mpicom, ierr) + call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') ntasks = ((pelist(2,1) - pelist(1,1)) / pelist(3,1)) + 1 @@ -705,9 +711,10 @@ subroutine seq_comm_setcomm(ID,pelist,nthreads,iname,inst,tinst) end subroutine seq_comm_setcomm !--------------------------------------------------------- - subroutine seq_comm_joincomm(ID1,ID2,ID,iname,inst,tinst) + subroutine seq_comm_joincomm(COMM_IN, ID1,ID2,ID,iname,inst,tinst) implicit none + integer, intent(in) :: comm_in integer,intent(IN) :: ID1 ! src id integer,intent(IN) :: ID2 ! srd id integer,intent(IN) :: ID ! computed id @@ -749,7 +756,8 @@ subroutine seq_comm_joincomm(ID1,ID2,ID,iname,inst,tinst) call mpi_group_union(seq_comms(ID1)%mpigrp,seq_comms(ID2)%mpigrp,mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_union mpigrp') - call mpi_comm_create(Coupler_Comm, mpigrp, mpicom, ierr) + call mpi_comm_create(Comm_in, mpigrp, mpicom, ierr) + call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') seq_comms(ID)%set = .true. @@ -827,9 +835,10 @@ subroutine seq_comm_joincomm(ID1,ID2,ID,iname,inst,tinst) end subroutine seq_comm_joincomm !--------------------------------------------------------- - subroutine seq_comm_jcommarr(IDs,ID,iname,inst,tinst) + subroutine seq_comm_jcommarr(comm_in, IDs,ID,iname,inst,tinst) implicit none + integer, intent(in) :: comm_in integer,intent(IN) :: IDs(:) ! src id integer,intent(IN) :: ID ! computed id character(len=*),intent(IN),optional :: iname ! comm name @@ -874,7 +883,7 @@ subroutine seq_comm_jcommarr(IDs,ID,iname,inst,tinst) call mpi_group_union(mpigrpp,seq_comms(IDs(n))%mpigrp,mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_union mpigrp') enddo - call mpi_comm_create(Coupler_Comm, mpigrp, mpicom, ierr) + call mpi_comm_create(Comm_in, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') seq_comms(ID)%set = .true. @@ -948,19 +957,21 @@ subroutine seq_comm_jcommarr(IDs,ID,iname,inst,tinst) end subroutine seq_comm_jcommarr !--------------------------------------------------------- - subroutine seq_comm_printcomms() + subroutine seq_comm_printcomms(comm_in) implicit none + integer, intent(in) :: comm_in character(*),parameter :: subName = '(seq_comm_printcomms) ' integer :: n,mype,npes,ierr - call mpi_comm_size(Coupler_Comm, npes , ierr) + call mpi_comm_size(Comm_in, npes , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_size comm_world') - call mpi_comm_rank(Coupler_Comm, mype , ierr) + call mpi_comm_rank(Comm_in, mype , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_rank comm_world') call shr_sys_flush(logunit) - call mpi_barrier(Coupler_Comm,ierr) + call mpi_barrier(Comm_in,ierr) + if (mype == 0) then do n = 1,ncomps write(logunit,'(a,4i6,2x,3a)') trim(subName),n, & diff --git a/src/drivers/mct/unit_test/utils/mct_wrapper_mod.F90 b/src/drivers/mct/unit_test/utils/mct_wrapper_mod.F90 index 81448636646..c93d4924f75 100644 --- a/src/drivers/mct/unit_test/utils/mct_wrapper_mod.F90 +++ b/src/drivers/mct/unit_test/utils/mct_wrapper_mod.F90 @@ -39,7 +39,7 @@ subroutine mct_init() character(len=*), parameter :: subname = 'mct_init' !----------------------------------------------------------------------- - call seq_comm_init(Comm_in = mct_communicator, nmlfile = ' ') + call seq_comm_init(mct_communicator, mct_communicator, nmlfile = ' ') end subroutine mct_init !----------------------------------------------------------------------- From ae3b71996e6f7079ba244068f44a7f7163bcb080 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 16 Aug 2017 10:10:01 -0600 Subject: [PATCH 39/51] working MCC test --- config/config_tests.xml | 4 +- doc/source/users_guide/multi-instance.rst | 16 +-- scripts/create_newcase | 14 +-- scripts/lib/CIME/SystemTests/mcc.py | 7 +- scripts/lib/CIME/SystemTests/pre.py | 4 +- scripts/lib/CIME/XML/env_mach_pes.py | 2 +- scripts/lib/CIME/build.py | 6 +- scripts/lib/CIME/case.py | 18 +-- scripts/lib/CIME/case_run.py | 2 +- scripts/lib/CIME/case_setup.py | 14 +-- scripts/lib/CIME/get_timing.py | 4 +- scripts/lib/CIME/test_scheduler.py | 2 +- .../data_comps/desp/cime_config/buildnml | 2 - src/components/xcpl_comps/xshare/dead_mod.F90 | 2 +- src/drivers/mct/cime_config/buildnml | 3 +- .../mct/cime_config/config_component.xml | 6 +- .../cime_config/namelist_definition_drv.xml | 2 +- src/drivers/mct/main/cime_comp_mod.F90 | 1 - src/drivers/mct/shr/seq_comm_mct.F90 | 114 +++++++++--------- src/externals/mct/mct/m_MCTWorld.F90 | 2 +- 20 files changed, 112 insertions(+), 113 deletions(-) diff --git a/config/config_tests.xml b/config/config_tests.xml index a21b2c98454..29daeff4d0b 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -518,14 +518,14 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu - multi-coupler validation vs single-instance (default length) + multi-driver validation vs single-instance (default length) 1 FALSE FALSE none $STOP_OPTION $STOP_N - TRUE + TRUE diff --git a/doc/source/users_guide/multi-instance.rst b/doc/source/users_guide/multi-instance.rst index 97827ef3de7..1e6ea4d7511 100644 --- a/doc/source/users_guide/multi-instance.rst +++ b/doc/source/users_guide/multi-instance.rst @@ -3,7 +3,7 @@ Multi-instance component functionality ====================================== -The CIME coupling infrastructure is capable of running multiple component instances (ensembles) under one model executable. There are two modes of ensemble capability, single coupler in which all component instances are handled by a single coupler component or multi-coupler in which each instance includes a separate coupler component. In the multi-coupler mode the entire model is duplicated for each instance while in the single coupler mode only active components need be duplicated. In most cases the multi-coupler mode will give better performance and should be used. +The CIME coupling infrastructure is capable of running multiple component instances (ensembles) under one model executable. There are two modes of ensemble capability, single driver in which all component instances are handled by a single driver/coupler component or multi-driver in which each instance includes a separate driver/coupler component. In the multi-driver mode the entire model is duplicated for each instance while in the single driver mode only active components need be duplicated. In most cases the multi-driver mode will give better performance and should be used. The primary motivation for this development was to be able to run an ensemble Kalman-Filter for data assimilation and parameter estimation (UQ, for example). However, it also provides the ability to run a set of experiments within a single model executable where each instance can have a different namelist, and to have all the output go to one directory. @@ -45,12 +45,12 @@ To run two instances of CAM, CLM, CICE, RTM and DOCN, invoke the following :ref: > ./xmlchange NINST_ROF=2 > ./xmlchange NINST_OCN=2 -As a result, you will have two instances of CAM, CLM and CICE (prescribed), RTM, and DOCN, each running concurrently on 72 MPI tasks and all using the same coupler component. In this single coupler mode the number of tasks for each component instance is NTASKS_COMPONENT/NINST_COMPONENT and the total number of tasks is the same as for the single instance case. +As a result, you will have two instances of CAM, CLM and CICE (prescribed), RTM, and DOCN, each running concurrently on 72 MPI tasks and all using the same driver/coupler component. In this single driver/coupler mode the number of tasks for each component instance is NTASKS_COMPONENT/NINST_COMPONENT and the total number of tasks is the same as for the single instance case. -Now consider the multi coupler model. +Now consider the multi driver model. To use this mode change :: - > ./xmlchange MULTI_COUPLER=TRUE + > ./xmlchange MULTI_DRIVER=TRUE This configuration will run each component instance on the original 144 tasks but will generate two copies of the model (in the same executable) for a total of 288 tasks. @@ -88,15 +88,15 @@ To change the DOCN stream txt file instance 0002, copy **docn.streams.txt.prescr Also keep these important points in mind: -#. Note that these changes can be made at create_newcase time with option --ninst # where # is a positive integer, use the additional logical option --ninst-couplers to invoke the multi-coupler mode. +#. Note that these changes can be made at create_newcase time with option --ninst # where # is a positive integer, use the additional logical option --multi-driver to invoke the multi-driver mode. #. **Multiple component instances can differ ONLY in namelist settings; they ALL use the same model executable.** #. Calling **case.setup** with ``--clean`` *DOES NOT* remove the **user_nl_xxx_NN** (where xxx is the component name) files created by **case.setup**. #. A special variable NINST_LAYOUT is provided for some experimental compsets, its value should be - 'concurrent' for all but a few special cases and it cannot be used if MULTI_COUPLER=TRUE. + 'concurrent' for all but a few special cases and it cannot be used if MULTI_DRIVER=TRUE. -#. In **create_test** these options can be invoked with testname modifiers _N# for the single coupler mode and _C# for the multi-coupler mode. These are mutually exclusive options, they cannot be combined. +#. In **create_test** these options can be invoked with testname modifiers _N# for the single driver mode and _C# for the multi-driver mode. These are mutually exclusive options, they cannot be combined. -#. In create_newcase you may use --ninst # to set the number of instances and --multi-coupler for multi-coupler mode. +#. In create_newcase you may use --ninst # to set the number of instances and --multi-driver for multi-driver mode. diff --git a/scripts/create_newcase b/scripts/create_newcase index 8cee8598c80..1e9aa029b62 100755 --- a/scripts/create_newcase +++ b/scripts/create_newcase @@ -46,13 +46,13 @@ OR help="Specify a compiler. " "To see list of supported compilers for each machine, use the utility query_config in this directory") - parser.add_argument("--multi-coupler",action="store_true", - help="Specify that ninst should modify number of coupler instances " - "default is to have one coupler supporting multiple component instances.") + parser.add_argument("--multi-driver",action="store_true", + help="Specify that ninst should modify number of driver/coupler instances " + "default is to have one driver/coupler supporting multiple component instances.") parser.add_argument("--ninst",default=1, help="Specify number of model ensemble instances. " - "Default is multiple components and one coupler. Use --multi-coupler to " + "Default is multiple components and one coupler. Use --multi-driver to " "run multiple couplers in the ensemble.") parser.add_argument("--mpilib", "-mpilib", @@ -160,7 +160,7 @@ OR return args.case, args.compset, args.res, args.machine, args.compiler,\ args.mpilib, args.project, args.pecount, \ args.user_mods_dir, args.pesfile, \ - args.user_grid, args.gridfile, args.srcroot, args.test, args.multi_coupler, \ + args.user_grid, args.gridfile, args.srcroot, args.test, args.multi_driver, \ args.ninst, args.walltime, args.queue, args.output_root, args.script_root, \ run_unsupported, args.answer, args.input_dir @@ -172,7 +172,7 @@ def _main_func(description): casename, compset, grid, machine, compiler, \ mpilib, project, pecount, \ user_mods_dir, pesfile, \ - user_grid, gridfile, srcroot, test, multi_coupler, ninst, walltime, \ + user_grid, gridfile, srcroot, test, multi_driver, ninst, walltime, \ queue, output_root, script_root, run_unsupported, \ answer, input_dir = parse_command_line(sys.argv, cimeroot, description) @@ -196,7 +196,7 @@ def _main_func(description): machine_name=machine, project=project, pecount=pecount, compiler=compiler, mpilib=mpilib, pesfile=pesfile,user_grid=user_grid, gridfile=gridfile, - multi_coupler=multi_coupler, ninst=ninst, test=test, + multi_driver=multi_driver, ninst=ninst, test=test, walltime=walltime, queue=queue, output_root=output_root, run_unsupported=run_unsupported, answer=answer, input_dir=input_dir) diff --git a/scripts/lib/CIME/SystemTests/mcc.py b/scripts/lib/CIME/SystemTests/mcc.py index ce08c0f799b..c5dab4548c3 100644 --- a/scripts/lib/CIME/SystemTests/mcc.py +++ b/scripts/lib/CIME/SystemTests/mcc.py @@ -2,7 +2,7 @@ Implemetation of CIME MCC test: Compares ensemble methods This does two runs: In the first we run a three member ensemble using the - MULTI_COUPLER capability, then we run a second single instance case and compare + MULTI_DRIVER capability, then we run a second single instance case and compare """ from CIME.XML.standard_module_setup import * from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo @@ -20,14 +20,13 @@ def __init__(self, case): separate_builds = True, run_two_suffix = 'single_instance', run_two_description = 'single instance', - run_one_description = 'multi coupler') + run_one_description = 'multi driver') def _case_one_setup(self): # The multicoupler case will increase the number of tasks by the # number of requested couplers. - self._case.set_value("MULTI_COUPLER",True) + self._case.set_value("MULTI_DRIVER",True) self._case.set_value("NINST", self._test_instances) - self._case.set_value("NINST_ESP", 1) case_setup(self._case, test_mode=False, reset=True) def _case_two_setup(self): diff --git a/scripts/lib/CIME/SystemTests/pre.py b/scripts/lib/CIME/SystemTests/pre.py index e67efda721d..47d03b5da1b 100644 --- a/scripts/lib/CIME/SystemTests/pre.py +++ b/scripts/lib/CIME/SystemTests/pre.py @@ -85,11 +85,11 @@ def run_phase(self): # pylint: disable=arguments-differ else: pause_comps = pause_comps.split(':') - multi_coupler = self._case.get_value("MULTI_COUPLER") + multi_driver = self._case.get_value("MULTI_DRIVER") for comp in pause_comps: if comp == "cpl": - if multi_coupler: + if multi_driver: ninst = self._case.get_value("NINST_MAX") else: ninst = 1 diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index c16fd30ad3c..59e7141f4c5 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -72,7 +72,7 @@ def get_total_tasks(self, comp_classes): maxinst = max(maxinst, self.get_value("NINST", attribute={"component":comp})) tt = rootpe + (ntasks - 1) * pstrid + 1 total_tasks = max(tt, total_tasks) - if self.get_value("MULTI_COUPLER"): + if self.get_value("MULTI_DRIVER"): total_tasks *= maxinst return total_tasks diff --git a/scripts/lib/CIME/build.py b/scripts/lib/CIME/build.py index a4817fb060e..bf7b3909f51 100644 --- a/scripts/lib/CIME/build.py +++ b/scripts/lib/CIME/build.py @@ -395,17 +395,17 @@ def _case_build_impl(caseroot, case, sharedlib_only, model_only, buildlist): incroot = os.path.abspath(case.get_value("INCROOT")) libroot = os.path.abspath(case.get_value("LIBROOT")) sharedlibroot = os.path.abspath(case.get_value("SHAREDLIBROOT")) - multi_coupler = case.get_value("MULTI_COUPLER") + multi_driver = case.get_value("MULTI_DRIVER") complist = [] ninst = 1 for comp_class in comp_classes: if comp_class == "CPL": config_dir = None - if multi_coupler: + if multi_driver: ninst = case.get_value("NINST_MAX") else: config_dir = os.path.dirname(case.get_value("CONFIG_{}_FILE".format(comp_class))) - if multi_coupler: + if multi_driver: ninst = 1 else: ninst = case.get_value("NINST_{}".format(comp_class)) diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index 59c15b6a22c..25e89ff7c58 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -636,7 +636,7 @@ def _get_component_config_data(self, files): self.clean_up_lookups() - def _setup_mach_pes(self, pecount, multi_coupler, ninst, machine_name, mpilib): + def _setup_mach_pes(self, pecount, multi_driver, ninst, machine_name, mpilib): #-------------------------------------------- # pe layout #-------------------------------------------- @@ -720,9 +720,9 @@ def _setup_mach_pes(self, pecount, multi_coupler, ninst, machine_name, mpilib): val = -1*val*pes_per_node if val > pesize: pesize = val - if multi_coupler: + if multi_driver: pesize *= int(ninst) - mach_pes_obj.set_value("MULTI_COUPLER", True) + mach_pes_obj.set_value("MULTI_DRIVER", True) # Make sure that every component has been accounted for # set, nthrds and ntasks to 1 otherwise. Also set the ninst values here. @@ -745,7 +745,7 @@ def _setup_mach_pes(self, pecount, multi_coupler, ninst, machine_name, mpilib): def configure(self, compset_name, grid_name, machine_name=None, project=None, pecount=None, compiler=None, mpilib=None, pesfile=None,user_grid=False, gridfile=None, - multi_coupler=False, ninst=1, test=False, + multi_driver=False, ninst=1, test=False, walltime=None, queue=None, output_root=None, run_unsupported=False, answer=None, input_dir=None): @@ -836,10 +836,10 @@ def configure(self, compset_name, grid_name, machine_name=None, env_mach_specific_obj.populate(machobj) self.schedule_rewrite(env_mach_specific_obj) - pesize = self._setup_mach_pes(pecount, multi_coupler, ninst, machine_name, mpilib) + pesize = self._setup_mach_pes(pecount, multi_driver, ninst, machine_name, mpilib) - if multi_coupler and ninst>1: - logger.info(" Coupler has %s instances" % ninst) + if multi_driver and ninst>1: + logger.info(" Driver/Coupler has %s instances" % ninst) #-------------------------------------------- # batch system @@ -1458,7 +1458,7 @@ def create(self, casename, srcroot, compset_name, grid_name, user_mods_dir=None, machine_name=None, project=None, pecount=None, compiler=None, mpilib=None, pesfile=None,user_grid=False, gridfile=None, - multi_coupler=False, ninst=1, test=False, + multi_driver=False, ninst=1, test=False, walltime=None, queue=None, output_root=None, run_unsupported=False, answer=None, input_dir=None): @@ -1473,7 +1473,7 @@ def create(self, casename, srcroot, compset_name, grid_name, project=project, pecount=pecount, compiler=compiler, mpilib=mpilib, pesfile=pesfile,user_grid=user_grid, gridfile=gridfile, - multi_coupler=multi_coupler, ninst=ninst, test=test, + multi_driver=multi_driver, ninst=ninst, test=test, walltime=walltime, queue=queue, output_root=output_root, run_unsupported=run_unsupported, answer=answer, diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py index ee4271f573f..e2d09ef7258 100644 --- a/scripts/lib/CIME/case_run.py +++ b/scripts/lib/CIME/case_run.py @@ -153,7 +153,7 @@ def post_run_check(case, lid): rundir = case.get_value("RUNDIR") model = case.get_value("MODEL") cpl_ninst = 1 - if case.get_value("MULTI_COUPLER"): + if case.get_value("MULTI_DRIVER"): cpl_ninst = case.get_value("NINST_MAX") cpl_logs = [] if cpl_ninst > 1: diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index 212d7d0ff45..e621f671a10 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -34,8 +34,8 @@ def _build_usernl_files(case, model, comp): expect(os.path.isdir(model_dir), "cannot find cime_config directory {} for component {}".format(model_dir, comp)) ninst = 1 - multi_coupler = case.get_value("MULTI_COUPLER") - if multi_coupler: + multi_driver = case.get_value("MULTI_DRIVER") + if multi_driver: ninst = case.get_value("NINST_MAX") if comp == "cpl": if not os.path.exists("user_nl_cpl"): @@ -45,7 +45,7 @@ def _build_usernl_files(case, model, comp): ninst = case.get_value("NINST_{}".format(model)) nlfile = "user_nl_{}".format(comp) model_nl = os.path.join(model_dir, nlfile) - if ninst > 1 and not comp.endswith("esp"): + if ninst > 1: for inst_counter in xrange(1, ninst+1): inst_nlfile = "{}_{:04d}".format(nlfile, inst_counter) if not os.path.exists(inst_nlfile): @@ -124,7 +124,7 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): # Check ninst. # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component. - multi_coupler = case.get_value("MULTI_COUPLER") + multi_driver = case.get_value("MULTI_DRIVER") for comp in models: if comp == "CPL": continue @@ -135,10 +135,10 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): case.set_value("NTASKS_{}".format(comp), ninst) else: expect(False, "NINST_{} value {:d} greater than NTASKS_{} {:d}".format(comp, ninst, comp, ntasks)) - # But the NINST_LAYOUT may only be concurrent in multi_coupler mode - if multi_coupler: + # But the NINST_LAYOUT may only be concurrent in multi_driver mode + if multi_driver: expect(case.get_value("NINST_LAYOUT_{}".format(comp)) == "concurrent", - "If multi_coupler is TRUE, NINST_LAYOUT_{} must be concurrent".format(comp)) + "If multi_driver is TRUE, NINST_LAYOUT_{} must be concurrent".format(comp)) if os.path.exists("case.run"): logger.info("Machine/Decomp/Pes configuration has already been done ...skipping") diff --git a/scripts/lib/CIME/get_timing.py b/scripts/lib/CIME/get_timing.py index e8163417123..427d4e2825c 100644 --- a/scripts/lib/CIME/get_timing.py +++ b/scripts/lib/CIME/get_timing.py @@ -94,8 +94,8 @@ def gettime(self, heading_padded): def getTiming(self): ninst = 1 - multi_coupler = self.case.get_value("MULTI_COUPLER") - if multi_coupler: + multi_driver = self.case.get_value("MULTI_DRIVER") + if multi_driver: ninst = self.case.get_value("NINST_MAX") if ninst > 1: diff --git a/scripts/lib/CIME/test_scheduler.py b/scripts/lib/CIME/test_scheduler.py index 5514ae6e90d..f2cf884b13a 100644 --- a/scripts/lib/CIME/test_scheduler.py +++ b/scripts/lib/CIME/test_scheduler.py @@ -411,7 +411,7 @@ def _create_newcase_phase(self, test): if case_opt.startswith('C'): expect(ninst == 1,"Cannot combine _C and _N options") ncpl = case_opt[1:] - create_newcase_cmd += " --ninst {} --multi-coupler" .format(ncpl) + create_newcase_cmd += " --ninst {} --multi-driver" .format(ncpl) logger.debug (" NCPL set to {}" .format(ncpl)) if case_opt.startswith('P'): pesize = case_opt[1:] diff --git a/src/components/data_comps/desp/cime_config/buildnml b/src/components/data_comps/desp/cime_config/buildnml index 3641dc313a8..04067b00f69 100755 --- a/src/components/data_comps/desp/cime_config/buildnml +++ b/src/components/data_comps/desp/cime_config/buildnml @@ -42,8 +42,6 @@ def _create_namelists(case, confdir, inst_string, infile, nmlgen): #---------------------------------------------------- # Check for incompatible options. #---------------------------------------------------- - # At this point, we don't know what multiple instances of ESP means - expect(len(inst_string) == 0, "Multiple ESP instances not supported") #---------------------------------------------------- # Log some settings. diff --git a/src/components/xcpl_comps/xshare/dead_mod.F90 b/src/components/xcpl_comps/xshare/dead_mod.F90 index 8b5ded6b346..51bffefa930 100644 --- a/src/components/xcpl_comps/xshare/dead_mod.F90 +++ b/src/components/xcpl_comps/xshare/dead_mod.F90 @@ -216,7 +216,7 @@ subroutine dead_setNewGrid(decomp_type,nxg,nyg,totpe,mype,lsize,gbuf,seg_len,npr i = i + 1 enddo - write(logunit,*) 'dead_setNewGrid decomp seg ',mype,lsize,nx +! write(logunit,*) 'dead_setNewGrid decomp seg ',mype,lsize,nx found = .true. diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index 9720b6572e2..abe6288f680 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -41,6 +41,7 @@ def _create_drv_namelists(case, infile, confdir, nmlgen, files): config['BUDGETS'] = case.get_value('BUDGETS') config['MACH'] = case.get_value('MACH') config['MPILIB'] = case.get_value('MPILIB') + config['MULTI_DRIVER'] = '.true.' if case.get_value('MULTI_DRIVER') else '.false.' config['OS'] = case.get_value('OS') config['glc_nec'] = 0 if case.get_value('GLC_NEC') == 0 else case.get_value('GLC_NEC') config['single_column'] = 'true' if case.get_value('PTS_MODE') else 'false' @@ -251,7 +252,7 @@ def _create_component_modelio_namelists(case, files): #if we are in multi-coupler mode the number of instances of cpl will be the max # of any NINST_* value maxinst = 1 - if case.get_value("MULTI_COUPLER"): + if case.get_value("MULTI_DRIVER"): maxinst = case.get_value("NINST_MAX") for model in models: diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index ab89c2287db..6763c090f68 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1975,9 +1975,9 @@ mach_pes env_mach_pes.xml - Number of instances for each component. If MULTI_COUPLER is True - only NINST_CPL is used and all components have NINST_CPL instances; - if MULTI_COUPLER is False NINST_CPL is 1. + Number of instances for each component. If MULTI_DRIVER is True + the NINST_MAX value will be used. + diff --git a/src/drivers/mct/cime_config/namelist_definition_drv.xml b/src/drivers/mct/cime_config/namelist_definition_drv.xml index 3c167f7a211..083b03fda4f 100644 --- a/src/drivers/mct/cime_config/namelist_definition_drv.xml +++ b/src/drivers/mct/cime_config/namelist_definition_drv.xml @@ -53,7 +53,7 @@ 1 - $NINST_MAX + $NINST_MAX diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index 518f44e2360..fba2aa7cc71 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -4116,7 +4116,6 @@ subroutine cime_cpl_init(comm_in, comm_out, num_inst_driver, id) call shr_mpi_chkerr(ierr,subname//' mpi_comm_split') end if call shr_mpi_commsize(comm_out, drvpes, ' cime_cpl_init') - print *,__FILE__,__LINE__,numpes,drvpes end subroutine cime_cpl_init end module cime_comp_mod diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index 9c50957daab..23357583594 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -82,11 +82,11 @@ module seq_comm_mct num_inst_wav + & num_inst_rof + & num_inst_esp + 1 - integer, public :: num_inst_driver = 1 - integer, public :: cpl_inst_iamin = 1 + integer, public :: num_inst_min, num_inst_max integer, public :: num_inst_xao ! for xao flux integer, public :: num_inst_frc ! for fractions + integer, public :: num_inst_driver = 1 !!! Each component instance needs two communicators: one internal to the !!! instance, and one for communicating with the coupler. @@ -151,7 +151,6 @@ module seq_comm_mct ! suffix for log and timing files if multi coupler driver character(len=seq_comm_namelen), public :: cpl_inst_tag - type seq_comm_type character(len=seq_comm_namelen) :: name ! my name character(len=seq_comm_namelen) :: suffix ! recommended suffix @@ -185,6 +184,8 @@ module seq_comm_mct character(*), parameter :: F12 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')','(',a,2i6,')')" character(*), parameter :: F13 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')')" character(*), parameter :: F14 = "(a,a,'(',i3,' ',a,')',a, 6x,' (',a,i6,')',' (',a,i3,')')" + integer :: Global_Comm + character(len=32), public :: & atm_layout, lnd_layout, ice_layout, glc_layout, rof_layout, & @@ -199,16 +200,15 @@ integer function seq_comm_get_ncomps() seq_comm_get_ncomps = ncomps end function seq_comm_get_ncomps - subroutine seq_comm_init(Global_comm, Driver_Comm, nmlfile, Drv_comm_id) + subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) !---------------------------------------------------------- ! ! Arguments implicit none - integer, intent(in) :: Comm_in + integer, intent(in) :: global_comm_in + integer, intent(in) :: driver_comm_in character(len=*), intent(IN) :: nmlfile - ! Optional argument cpl_comm_id is used to identify the particular - ! coupler instance used by each component instance in a multi-coupler case. - integer, optional, intent(in) :: Cpl_comm_id + integer, intent(in), optional :: drv_comm_id ! ! Local variables ! @@ -243,12 +243,14 @@ subroutine seq_comm_init(Global_comm, Driver_Comm, nmlfile, Drv_comm_id) esp_ntasks, esp_rootpe, esp_pestride, esp_nthreads, esp_layout, & cpl_ntasks, cpl_rootpe, cpl_pestride, cpl_nthreads !---------------------------------------------------------- + ! make sure this is first pass and set comms unset if (seq_comm_mct_initialized) then write(logunit,*) trim(subname),' ERROR seq_comm_init already called ' call shr_sys_abort() endif seq_comm_mct_initialized = .true. + Global_Comm = driver_comm_in call mpi_comm_dup(Comm_in, Coupler_Comm, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_dup') @@ -276,10 +278,17 @@ subroutine seq_comm_init(Global_comm, Driver_Comm, nmlfile, Drv_comm_id) ! Initialize MPI ! Note that if no MPI, will call MCTs fake version - call mpi_comm_rank(Coupler_Comm, mype , ierr) - call shr_mpi_chkerr(ierr,subname//' mpi_comm_rank comm_world') - call mpi_comm_size(Coupler_Comm, numpes, ierr) + call mpi_comm_size(GLOBAL_COMM_IN, global_numpes , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_size comm_world') + call mpi_comm_rank(GLOBAL_COMM, mype , ierr) + call shr_mpi_chkerr(ierr,subname//' mpi_comm_rank driver') + call mpi_comm_size(GLOBAL_COMM, numpes, ierr) + call shr_mpi_chkerr(ierr,subname//' mpi_comm_size driver') + + if (mod(global_numpes, numpes) .ne. 0) then + write(logunit,*) trim(subname),' ERROR: numpes driver: ', numpes, ' should divide global_numpes: ',global_numpes + call shr_sys_abort(trim(subname)//' ERROR decomposition error ') + endif ! Initialize gloiam on all IDs @@ -339,6 +348,7 @@ subroutine seq_comm_init(Global_comm, Driver_Comm, nmlfile, Drv_comm_id) !--- compute some other num_inst values + num_inst_xao = max(num_inst_atm,num_inst_ocn) num_inst_frc = num_inst_ice @@ -381,46 +391,38 @@ subroutine seq_comm_init(Global_comm, Driver_Comm, nmlfile, Drv_comm_id) count = count + 1 CPLID = count - if (global_mype == 0) then - pelist(1,1) = 0 - pelist(2,1) = global_numpes-1 - pelist(3,1) = 1 - end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, global_comm, ierr) - call seq_comm_setcomm(global_comm, GLOID, pelist,iname='GLOBAL') - if (mype == 0) then pelist(1,1) = 0 pelist(2,1) = numpes-1 pelist(3,1) = 1 end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, driver_comm, ierr) - call seq_comm_setcomm(driver_comm, DRVID, pelist,iname='DRIVER') + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) + call seq_comm_setcomm(GLOID, pelist,iname='GLOBAL') if (mype == 0) then pelist(1,1) = cpl_rootpe pelist(2,1) = cpl_rootpe + (cpl_ntasks -1) * cpl_pestride pelist(3,1) = cpl_pestride end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, Driver_comm, ierr) - call seq_comm_setcomm(driver_comm, CPLID,pelist,cpl_nthreads,'CPL') + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) + call seq_comm_setcomm(CPLID,pelist,cpl_nthreads,'CPL') call comp_comm_init(global_comm, atm_rootpe, atm_nthreads, atm_layout, atm_ntasks, atm_pestride, num_inst_atm, & - CPLID, ATMID, CPLATMID, ALLATMID, CPLALLATMID, 'ATM', count) + CPLID, ATMID, CPLATMID, ALLATMID, CPLALLATMID, 'ATM', count, drv_comm_id) call comp_comm_init(global_comm, lnd_rootpe, lnd_nthreads, lnd_layout, lnd_ntasks, lnd_pestride, num_inst_lnd, & - CPLID, LNDID, CPLLNDID, ALLLNDID, CPLALLLNDID, 'LND', count) + CPLID, LNDID, CPLLNDID, ALLLNDID, CPLALLLNDID, 'LND', count, drv_comm_id) call comp_comm_init(global_comm, ice_rootpe, ice_nthreads, ice_layout, ice_ntasks, ice_pestride, num_inst_ice, & - CPLID, ICEID, CPLICEID, ALLICEID, CPLALLICEID, 'ICE', count) + CPLID, ICEID, CPLICEID, ALLICEID, CPLALLICEID, 'ICE', count, drv_comm_id) call comp_comm_init(global_comm, ocn_rootpe, ocn_nthreads, ocn_layout, ocn_ntasks, ocn_pestride, num_inst_ocn, & - CPLID, OCNID, CPLOCNID, ALLOCNID, CPLALLOCNID, 'OCN', count) + CPLID, OCNID, CPLOCNID, ALLOCNID, CPLALLOCNID, 'OCN', count, drv_comm_id) call comp_comm_init(global_comm, rof_rootpe, rof_nthreads, rof_layout, rof_ntasks, rof_pestride, num_inst_rof, & - CPLID, ROFID, CPLROFID, ALLROFID, CPLALLROFID, 'ROF', count) + CPLID, ROFID, CPLROFID, ALLROFID, CPLALLROFID, 'ROF', count, drv_comm_id) call comp_comm_init(global_comm, glc_rootpe, glc_nthreads, glc_layout, glc_ntasks, glc_pestride, num_inst_glc, & - CPLID, GLCID, CPLGLCID, ALLGLCID, CPLALLGLCID, 'GLC', count) + CPLID, GLCID, CPLGLCID, ALLGLCID, CPLALLGLCID, 'GLC', count, drv_comm_id) call comp_comm_init(global_comm, wav_rootpe, wav_nthreads, wav_layout, wav_ntasks, wav_pestride, num_inst_wav, & - CPLID, WAVID, CPLWAVID, ALLWAVID, CPLALLWAVID, 'WAV', count) + CPLID, WAVID, CPLWAVID, ALLWAVID, CPLALLWAVID, 'WAV', count, drv_comm_id) call comp_comm_init(global_comm, esp_rootpe, esp_nthreads, esp_layout, esp_ntasks, esp_pestride, num_inst_esp, & - CPLID, ESPID, CPLESPID, ALLESPID, CPLALLESPID, 'ESP', count) + CPLID, ESPID, CPLESPID, ALLESPID, CPLALLESPID, 'ESP', count, drv_comm_id) if (count /= ncomps) then write(logunit,*) trim(subname),' ERROR in ID count ',count,ncomps @@ -441,7 +443,7 @@ subroutine seq_comm_init(Global_comm, Driver_Comm, nmlfile, Drv_comm_id) do n = 1,ncomps gloroot = -999 if (seq_comms(n)%iamroot) gloroot = seq_comms(n)%gloiam - call shr_mpi_max(gloroot,seq_comms(n)%gloroot,Coupler_Comm, & + call shr_mpi_max(gloroot,seq_comms(n)%gloroot,GLOBAL_COMM, & trim(subname)//' gloroot',all=.true.) enddo @@ -479,17 +481,18 @@ subroutine seq_comm_init(Global_comm, Driver_Comm, nmlfile, Drv_comm_id) call shr_sys_abort() endif - call mct_world_init(ncomps, Coupler_Comm, comms, comps) + call mct_world_init(ncomps, GLOBAL_COMM, comms, comps) deallocate(comps,comms) - call seq_comm_printcomms(global_comm) + + call seq_comm_printcomms() end subroutine seq_comm_init subroutine comp_comm_init(global_comm, comp_rootpe, comp_nthreads, comp_layout, & comp_ntasks, comp_pestride, num_inst_comp, & - CPLID, COMPID, CPLCOMPID, ALLCOMPID, CPLALLCOMPID, name, count) + CPLID, COMPID, CPLCOMPID, ALLCOMPID, CPLALLCOMPID, name, count, drv_comm_id) integer, intent(in) :: global_comm integer, intent(in) :: comp_rootpe integer, intent(in) :: comp_nthreads @@ -503,6 +506,7 @@ subroutine comp_comm_init(global_comm, comp_rootpe, comp_nthreads, comp_layout, integer, intent(out) :: ALLCOMPID integer, intent(out) :: CPLALLCOMPID integer, intent(inout) :: count + integer, intent(in), optional :: drv_comm_id character(len=*), intent(in) :: name character(len=*), parameter :: subname = "comp_comm_init" @@ -566,11 +570,15 @@ subroutine comp_comm_init(global_comm, comp_rootpe, comp_nthreads, comp_layout, pelist(3,1) = cstr(n) endif call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) - call seq_comm_setcomm(driver_comm, COMPID(n), pelist, comp_nthreads,name, n, num_inst_comp) - call seq_comm_joincomm(driver_comm, CPLID, COMPID(n), CPLCOMPID(n), 'CPL'//name, n, num_inst_comp) + if (present(drv_comm_id)) then + call seq_comm_setcomm(COMPID(n), pelist, comp_nthreads,name, drv_comm_id) + else + call seq_comm_setcomm(COMPID(n), pelist, comp_nthreads,name, n, num_inst_comp) + endif + call seq_comm_joincomm(CPLID, COMPID(n), CPLCOMPID(n), 'CPL'//name, n, num_inst_comp) enddo - call seq_comm_jcommarr(global_comm, COMPID, ALLCOMPID, 'ALL'//name//'ID', 1, 1) - call seq_comm_joincomm(driver_comm, CPLID, ALLCOMPID, CPLALLCOMPID, 'CPLALL'//name//'ID', 1, 1) + call seq_comm_jcommarr(COMPID, ALLCOMPID, 'ALL'//name//'ID', 1, 1) + call seq_comm_joincomm(CPLID, ALLCOMPID, CPLALLCOMPID, 'CPLALL'//name//'ID', 1, 1) end subroutine comp_comm_init @@ -613,10 +621,9 @@ subroutine seq_comm_clean() end subroutine seq_comm_clean !--------------------------------------------------------- - subroutine seq_comm_setcomm(comm_in, ID,pelist,nthreads,iname,inst,tinst) + subroutine seq_comm_setcomm(ID,pelist,nthreads,iname,inst,tinst) implicit none - integer, intent(in) :: comm_in integer,intent(IN) :: ID integer,intent(IN) :: pelist(:,:) integer,intent(IN),optional :: nthreads @@ -638,11 +645,11 @@ subroutine seq_comm_setcomm(comm_in, ID,pelist,nthreads,iname,inst,tinst) call shr_sys_abort() endif - call mpi_comm_group(Comm_in, mpigrp_world, ierr) + call mpi_comm_group(GLOBAL_COMM, mpigrp_world, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_group mpigrp_world') call mpi_group_range_incl(mpigrp_world, 1, pelist, mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_group_range_incl mpigrp') - call mpi_comm_create(Comm_in, mpigrp, mpicom, ierr) + call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') @@ -711,10 +718,9 @@ subroutine seq_comm_setcomm(comm_in, ID,pelist,nthreads,iname,inst,tinst) end subroutine seq_comm_setcomm !--------------------------------------------------------- - subroutine seq_comm_joincomm(COMM_IN, ID1,ID2,ID,iname,inst,tinst) + subroutine seq_comm_joincomm(ID1,ID2,ID,iname,inst,tinst) implicit none - integer, intent(in) :: comm_in integer,intent(IN) :: ID1 ! src id integer,intent(IN) :: ID2 ! srd id integer,intent(IN) :: ID ! computed id @@ -756,8 +762,7 @@ subroutine seq_comm_joincomm(COMM_IN, ID1,ID2,ID,iname,inst,tinst) call mpi_group_union(seq_comms(ID1)%mpigrp,seq_comms(ID2)%mpigrp,mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_union mpigrp') - call mpi_comm_create(Comm_in, mpigrp, mpicom, ierr) - + call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') seq_comms(ID)%set = .true. @@ -835,10 +840,9 @@ subroutine seq_comm_joincomm(COMM_IN, ID1,ID2,ID,iname,inst,tinst) end subroutine seq_comm_joincomm !--------------------------------------------------------- - subroutine seq_comm_jcommarr(comm_in, IDs,ID,iname,inst,tinst) + subroutine seq_comm_jcommarr(IDs,ID,iname,inst,tinst) implicit none - integer, intent(in) :: comm_in integer,intent(IN) :: IDs(:) ! src id integer,intent(IN) :: ID ! computed id character(len=*),intent(IN),optional :: iname ! comm name @@ -883,7 +887,7 @@ subroutine seq_comm_jcommarr(comm_in, IDs,ID,iname,inst,tinst) call mpi_group_union(mpigrpp,seq_comms(IDs(n))%mpigrp,mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_union mpigrp') enddo - call mpi_comm_create(Comm_in, mpigrp, mpicom, ierr) + call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') seq_comms(ID)%set = .true. @@ -957,21 +961,19 @@ subroutine seq_comm_jcommarr(comm_in, IDs,ID,iname,inst,tinst) end subroutine seq_comm_jcommarr !--------------------------------------------------------- - subroutine seq_comm_printcomms(comm_in) + subroutine seq_comm_printcomms() implicit none - integer, intent(in) :: comm_in character(*),parameter :: subName = '(seq_comm_printcomms) ' integer :: n,mype,npes,ierr - call mpi_comm_size(Comm_in, npes , ierr) + call mpi_comm_size(GLOBAL_COMM, npes , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_size comm_world') - call mpi_comm_rank(Comm_in, mype , ierr) + call mpi_comm_rank(GLOBAL_COMM, mype , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_rank comm_world') call shr_sys_flush(logunit) - call mpi_barrier(Comm_in,ierr) - + call mpi_barrier(GLOBAL_COMM,ierr) if (mype == 0) then do n = 1,ncomps write(logunit,'(a,4i6,2x,3a)') trim(subName),n, & diff --git a/src/externals/mct/mct/m_MCTWorld.F90 b/src/externals/mct/mct/m_MCTWorld.F90 index 3ec6498526e..1c7ab01d495 100644 --- a/src/externals/mct/mct/m_MCTWorld.F90 +++ b/src/externals/mct/mct/m_MCTWorld.F90 @@ -282,6 +282,7 @@ subroutine initm_(ncomps,globalcomm,mycomms,myids) ! allocate a tmp array for the receive on root. if(myGid == 0) then + print *,__FILE__,__LINE__,Gsize, ncomps allocate(tmparray(0:Gsize-1,ncomps),stat=ier) if(ier/=0) call die(myname_,'allocate(tmparray)',ier) @@ -880,4 +881,3 @@ end subroutine printnp_ end module m_MCTWorld - From a94f377cdfd482933eff857c8c867c3c424d7a76 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 16 Aug 2017 12:20:55 -0600 Subject: [PATCH 40/51] remove esp single inst restriction --- src/drivers/mct/cime_config/buildnml | 4 +- src/drivers/mct/shr/seq_comm_mct.F90 | 90 ++++++++++++++-------------- 2 files changed, 47 insertions(+), 47 deletions(-) diff --git a/src/drivers/mct/cime_config/buildnml b/src/drivers/mct/cime_config/buildnml index abe6288f680..5d6d57ee7a2 100755 --- a/src/drivers/mct/cime_config/buildnml +++ b/src/drivers/mct/cime_config/buildnml @@ -263,10 +263,8 @@ def _create_component_modelio_namelists(case, files): entries = nmlgen.init_defaults(infiles, config, skip_entry_loop=True) if maxinst == 1 and model != 'cpl': inst_count = case.get_value("NINST_" + model.upper()) - elif model != 'esp': - inst_count = maxinst else: - inst_count = 1 + inst_count = maxinst inst_string = "" inst_index = 1 diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index 23357583594..8f2d26aa98d 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -185,7 +185,7 @@ module seq_comm_mct character(*), parameter :: F13 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')')" character(*), parameter :: F14 = "(a,a,'(',i3,' ',a,')',a, 6x,' (',a,i6,')',' (',a,i3,')')" integer :: Global_Comm - + integer :: driver_comm character(len=32), public :: & atm_layout, lnd_layout, ice_layout, glc_layout, rof_layout, & @@ -250,7 +250,8 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) call shr_sys_abort() endif seq_comm_mct_initialized = .true. - Global_Comm = driver_comm_in + global_comm = global_comm_in + driver_comm = driver_comm_in call mpi_comm_dup(Comm_in, Coupler_Comm, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_dup') @@ -280,9 +281,9 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) call mpi_comm_size(GLOBAL_COMM_IN, global_numpes , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_size comm_world') - call mpi_comm_rank(GLOBAL_COMM, mype , ierr) + call mpi_comm_rank(DRIVER_COMM, mype , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_rank driver') - call mpi_comm_size(GLOBAL_COMM, numpes, ierr) + call mpi_comm_size(DRIVER_COMM, numpes, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_size driver') if (mod(global_numpes, numpes) .ne. 0) then @@ -327,24 +328,24 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) call shr_file_freeUnit(nu) end if - call shr_mpi_bcast(atm_nthreads,GLOBAL_COMM,'atm_nthreads') - call shr_mpi_bcast(lnd_nthreads,GLOBAL_COMM,'lnd_nthreads') - call shr_mpi_bcast(ocn_nthreads,GLOBAL_COMM,'ocn_nthreads') - call shr_mpi_bcast(ice_nthreads,GLOBAL_COMM,'ice_nthreads') - call shr_mpi_bcast(glc_nthreads,GLOBAL_COMM,'glc_nthreads') - call shr_mpi_bcast(wav_nthreads,GLOBAL_COMM,'wav_nthreads') - call shr_mpi_bcast(rof_nthreads,GLOBAL_COMM,'rof_nthreads') - call shr_mpi_bcast(esp_nthreads,GLOBAL_COMM,'esp_nthreads') - call shr_mpi_bcast(cpl_nthreads,GLOBAL_COMM,'cpl_nthreads') - - call shr_mpi_bcast(atm_layout,GLOBAL_COMM,'atm_layout') - call shr_mpi_bcast(lnd_layout,GLOBAL_COMM,'lnd_layout') - call shr_mpi_bcast(ocn_layout,GLOBAL_COMM,'ocn_layout') - call shr_mpi_bcast(ice_layout,GLOBAL_COMM,'ice_layout') - call shr_mpi_bcast(glc_layout,GLOBAL_COMM,'glc_layout') - call shr_mpi_bcast(wav_layout,GLOBAL_COMM,'wav_layout') - call shr_mpi_bcast(rof_layout,GLOBAL_COMM,'rof_layout') - call shr_mpi_bcast(esp_layout,GLOBAL_COMM,'esp_layout') + call shr_mpi_bcast(atm_nthreads,DRIVER_COMM,'atm_nthreads') + call shr_mpi_bcast(lnd_nthreads,DRIVER_COMM,'lnd_nthreads') + call shr_mpi_bcast(ocn_nthreads,DRIVER_COMM,'ocn_nthreads') + call shr_mpi_bcast(ice_nthreads,DRIVER_COMM,'ice_nthreads') + call shr_mpi_bcast(glc_nthreads,DRIVER_COMM,'glc_nthreads') + call shr_mpi_bcast(wav_nthreads,DRIVER_COMM,'wav_nthreads') + call shr_mpi_bcast(rof_nthreads,DRIVER_COMM,'rof_nthreads') + call shr_mpi_bcast(esp_nthreads,DRIVER_COMM,'esp_nthreads') + call shr_mpi_bcast(cpl_nthreads,DRIVER_COMM,'cpl_nthreads') + + call shr_mpi_bcast(atm_layout,DRIVER_COMM,'atm_layout') + call shr_mpi_bcast(lnd_layout,DRIVER_COMM,'lnd_layout') + call shr_mpi_bcast(ocn_layout,DRIVER_COMM,'ocn_layout') + call shr_mpi_bcast(ice_layout,DRIVER_COMM,'ice_layout') + call shr_mpi_bcast(glc_layout,DRIVER_COMM,'glc_layout') + call shr_mpi_bcast(wav_layout,DRIVER_COMM,'wav_layout') + call shr_mpi_bcast(rof_layout,DRIVER_COMM,'rof_layout') + call shr_mpi_bcast(esp_layout,DRIVER_COMM,'esp_layout') !--- compute some other num_inst values @@ -396,7 +397,7 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) pelist(2,1) = numpes-1 pelist(3,1) = 1 end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, DRIVER_COMM, ierr) call seq_comm_setcomm(GLOID, pelist,iname='GLOBAL') if (mype == 0) then @@ -404,24 +405,24 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) pelist(2,1) = cpl_rootpe + (cpl_ntasks -1) * cpl_pestride pelist(3,1) = cpl_pestride end if - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, DRIVER_COMM, ierr) call seq_comm_setcomm(CPLID,pelist,cpl_nthreads,'CPL') - call comp_comm_init(global_comm, atm_rootpe, atm_nthreads, atm_layout, atm_ntasks, atm_pestride, num_inst_atm, & + call comp_comm_init(driver_comm, atm_rootpe, atm_nthreads, atm_layout, atm_ntasks, atm_pestride, num_inst_atm, & CPLID, ATMID, CPLATMID, ALLATMID, CPLALLATMID, 'ATM', count, drv_comm_id) - call comp_comm_init(global_comm, lnd_rootpe, lnd_nthreads, lnd_layout, lnd_ntasks, lnd_pestride, num_inst_lnd, & + call comp_comm_init(driver_comm, lnd_rootpe, lnd_nthreads, lnd_layout, lnd_ntasks, lnd_pestride, num_inst_lnd, & CPLID, LNDID, CPLLNDID, ALLLNDID, CPLALLLNDID, 'LND', count, drv_comm_id) - call comp_comm_init(global_comm, ice_rootpe, ice_nthreads, ice_layout, ice_ntasks, ice_pestride, num_inst_ice, & + call comp_comm_init(driver_comm, ice_rootpe, ice_nthreads, ice_layout, ice_ntasks, ice_pestride, num_inst_ice, & CPLID, ICEID, CPLICEID, ALLICEID, CPLALLICEID, 'ICE', count, drv_comm_id) - call comp_comm_init(global_comm, ocn_rootpe, ocn_nthreads, ocn_layout, ocn_ntasks, ocn_pestride, num_inst_ocn, & + call comp_comm_init(driver_comm, ocn_rootpe, ocn_nthreads, ocn_layout, ocn_ntasks, ocn_pestride, num_inst_ocn, & CPLID, OCNID, CPLOCNID, ALLOCNID, CPLALLOCNID, 'OCN', count, drv_comm_id) - call comp_comm_init(global_comm, rof_rootpe, rof_nthreads, rof_layout, rof_ntasks, rof_pestride, num_inst_rof, & + call comp_comm_init(driver_comm, rof_rootpe, rof_nthreads, rof_layout, rof_ntasks, rof_pestride, num_inst_rof, & CPLID, ROFID, CPLROFID, ALLROFID, CPLALLROFID, 'ROF', count, drv_comm_id) - call comp_comm_init(global_comm, glc_rootpe, glc_nthreads, glc_layout, glc_ntasks, glc_pestride, num_inst_glc, & + call comp_comm_init(driver_comm, glc_rootpe, glc_nthreads, glc_layout, glc_ntasks, glc_pestride, num_inst_glc, & CPLID, GLCID, CPLGLCID, ALLGLCID, CPLALLGLCID, 'GLC', count, drv_comm_id) - call comp_comm_init(global_comm, wav_rootpe, wav_nthreads, wav_layout, wav_ntasks, wav_pestride, num_inst_wav, & + call comp_comm_init(driver_comm, wav_rootpe, wav_nthreads, wav_layout, wav_ntasks, wav_pestride, num_inst_wav, & CPLID, WAVID, CPLWAVID, ALLWAVID, CPLALLWAVID, 'WAV', count, drv_comm_id) - call comp_comm_init(global_comm, esp_rootpe, esp_nthreads, esp_layout, esp_ntasks, esp_pestride, num_inst_esp, & + call comp_comm_init(driver_comm, esp_rootpe, esp_nthreads, esp_layout, esp_ntasks, esp_pestride, num_inst_esp, & CPLID, ESPID, CPLESPID, ALLESPID, CPLALLESPID, 'ESP', count, drv_comm_id) if (count /= ncomps) then @@ -443,7 +444,7 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) do n = 1,ncomps gloroot = -999 if (seq_comms(n)%iamroot) gloroot = seq_comms(n)%gloiam - call shr_mpi_max(gloroot,seq_comms(n)%gloroot,GLOBAL_COMM, & + call shr_mpi_max(gloroot,seq_comms(n)%gloroot,DRIVER_COMM, & trim(subname)//' gloroot',all=.true.) enddo @@ -481,7 +482,7 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) call shr_sys_abort() endif - call mct_world_init(ncomps, GLOBAL_COMM, comms, comps) + call mct_world_init(ncomps, DRIVER_COMM, comms, comps) deallocate(comps,comms) @@ -490,10 +491,10 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) end subroutine seq_comm_init - subroutine comp_comm_init(global_comm, comp_rootpe, comp_nthreads, comp_layout, & + subroutine comp_comm_init(driver_comm, comp_rootpe, comp_nthreads, comp_layout, & comp_ntasks, comp_pestride, num_inst_comp, & CPLID, COMPID, CPLCOMPID, ALLCOMPID, CPLALLCOMPID, name, count, drv_comm_id) - integer, intent(in) :: global_comm + integer, intent(in) :: driver_comm integer, intent(in) :: comp_rootpe integer, intent(in) :: comp_nthreads character(len=*), intent(in) :: comp_layout @@ -519,7 +520,7 @@ subroutine comp_comm_init(global_comm, comp_rootpe, comp_nthreads, comp_layout, integer :: ierr integer :: mype - call mpi_comm_rank(global_comm, mype, ierr) + call mpi_comm_rank(driver_comm, mype, ierr) count = count + 1 ALLCOMPID = count @@ -569,7 +570,7 @@ subroutine comp_comm_init(global_comm, comp_rootpe, comp_nthreads, comp_layout, pelist(2,1) = cmax(n) pelist(3,1) = cstr(n) endif - call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, GLOBAL_COMM, ierr) + call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, DRIVER_COMM, ierr) if (present(drv_comm_id)) then call seq_comm_setcomm(COMPID(n), pelist, comp_nthreads,name, drv_comm_id) else @@ -645,11 +646,11 @@ subroutine seq_comm_setcomm(ID,pelist,nthreads,iname,inst,tinst) call shr_sys_abort() endif - call mpi_comm_group(GLOBAL_COMM, mpigrp_world, ierr) + call mpi_comm_group(DRIVER_COMM, mpigrp_world, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_group mpigrp_world') call mpi_group_range_incl(mpigrp_world, 1, pelist, mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_group_range_incl mpigrp') - call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) + call mpi_comm_create(DRIVER_COMM, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') @@ -762,7 +763,7 @@ subroutine seq_comm_joincomm(ID1,ID2,ID,iname,inst,tinst) call mpi_group_union(seq_comms(ID1)%mpigrp,seq_comms(ID2)%mpigrp,mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_union mpigrp') - call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) + call mpi_comm_create(DRIVER_COMM, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') seq_comms(ID)%set = .true. @@ -887,6 +888,7 @@ subroutine seq_comm_jcommarr(IDs,ID,iname,inst,tinst) call mpi_group_union(mpigrpp,seq_comms(IDs(n))%mpigrp,mpigrp,ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_union mpigrp') enddo + ! The allcompid is created across multiple drivers. call mpi_comm_create(GLOBAL_COMM, mpigrp, mpicom, ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_create mpigrp') @@ -967,13 +969,13 @@ subroutine seq_comm_printcomms() character(*),parameter :: subName = '(seq_comm_printcomms) ' integer :: n,mype,npes,ierr - call mpi_comm_size(GLOBAL_COMM, npes , ierr) + call mpi_comm_size(DRIVER_COMM, npes , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_size comm_world') - call mpi_comm_rank(GLOBAL_COMM, mype , ierr) + call mpi_comm_rank(DRIVER_COMM, mype , ierr) call shr_mpi_chkerr(ierr,subname//' mpi_comm_rank comm_world') call shr_sys_flush(logunit) - call mpi_barrier(GLOBAL_COMM,ierr) + call mpi_barrier(DRIVER_COMM,ierr) if (mype == 0) then do n = 1,ncomps write(logunit,'(a,4i6,2x,3a)') trim(subName),n, & From 294655dbe014cc5d93a8a4ecfb5fc58234d0d4a5 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 16 Aug 2017 15:14:37 -0600 Subject: [PATCH 41/51] fix merge issues --- src/components/data_comps/desp/cime_config/buildnml | 4 ++-- src/drivers/mct/shr/seq_comm_mct.F90 | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/components/data_comps/desp/cime_config/buildnml b/src/components/data_comps/desp/cime_config/buildnml index 04067b00f69..633bf8109b7 100755 --- a/src/components/data_comps/desp/cime_config/buildnml +++ b/src/components/data_comps/desp/cime_config/buildnml @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements #################################################################################### -def _create_namelists(case, confdir, inst_string, infile, nmlgen): +def _create_namelists(case, confdir, infile, nmlgen): #################################################################################### """Write out the namelist for this component. @@ -146,7 +146,7 @@ def buildnml(case, caseroot, compname): namelist_infile = [infile] # create namelist and stream file(s) data component - _create_namelists(case, confdir, inst_string, namelist_infile, nmlgen) + _create_namelists(case, confdir, namelist_infile, nmlgen) # copy namelist files and stream text files, to rundir if os.path.isdir(rundir): diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index 8f2d26aa98d..d001e5649fc 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -215,7 +215,7 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) logical :: error_state integer :: ierr, n, count character(*), parameter :: subName = '(seq_comm_init) ' - integer :: mype,numpes,myncomps,max_threads,gloroot + integer :: mype,numpes,myncomps,max_threads,gloroot, global_numpes integer :: pelist(3,1) ! start, stop, stride for group integer, pointer :: comps(:) ! array with component ids integer, pointer :: comms(:) ! array with mpicoms @@ -253,9 +253,6 @@ subroutine seq_comm_init(global_comm_in, driver_comm_in, nmlfile, drv_comm_id) global_comm = global_comm_in driver_comm = driver_comm_in - call mpi_comm_dup(Comm_in, Coupler_Comm, ierr) - call shr_mpi_chkerr(ierr,subname//' mpi_comm_dup') - !! Initialize seq_comms elements do n = 1,ncomps From 872ff5e0cfda6a6b815d3e328d6df38f0637fc0c Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 16 Aug 2017 16:10:31 -0600 Subject: [PATCH 42/51] provide global_comm to desp --- src/components/data_comps/desp/desp_comp_mod.F90 | 10 +++++++++- src/drivers/mct/shr/seq_comm_mct.F90 | 4 +++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/components/data_comps/desp/desp_comp_mod.F90 b/src/components/data_comps/desp/desp_comp_mod.F90 index b26eaa70916..c8d066199de 100644 --- a/src/components/data_comps/desp/desp_comp_mod.F90 +++ b/src/components/data_comps/desp/desp_comp_mod.F90 @@ -19,7 +19,8 @@ module desp_comp_mod use seq_timemgr_mod, only: seq_timemgr_RestartAlarmIsOn use seq_comm_mct, only: seq_comm_inst, seq_comm_name, seq_comm_suffix use seq_comm_mct, only: num_inst_cpl => num_inst_driver - + ! Used to link esp components across multiple drivers + use seq_comm_mct, only: global_comm implicit none private @@ -130,6 +131,7 @@ subroutine desp_comp_init(EClock, espid, mpicom_in, phase, read_restart, & integer(IN) :: CurrentTOD ! model sec into model date integer(IN) :: stepno ! step number character(len=CL) :: calendar ! calendar type + integer :: global_mype, global_numpes !----- define namelist ----- namelist / desp_nml / & @@ -219,6 +221,12 @@ subroutine desp_comp_init(EClock, espid, mpicom_in, phase, read_restart, & call shr_strdata_pioinit(SDESP, COMPID) + call mpi_comm_rank(global_comm, global_mype, ierr) + call mpi_comm_size(global_comm, global_numpes, ierr) + + write(logunit,*)'DESP: I am global rank ',global_mype,' of ',global_numpes + + !------------------------------------------------------------------------ ! Validate mode !------------------------------------------------------------------------ diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index d001e5649fc..f3527bc5a36 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -184,7 +184,9 @@ module seq_comm_mct character(*), parameter :: F12 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')','(',a,2i6,')')" character(*), parameter :: F13 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')')" character(*), parameter :: F14 = "(a,a,'(',i3,' ',a,')',a, 6x,' (',a,i6,')',' (',a,i3,')')" - integer :: Global_Comm + + ! Exposed for use in the esp component, please don't use this elsewhere + integer, public :: Global_Comm integer :: driver_comm character(len=32), public :: & From 8fb363b15cdf680cfeebf57b9e7eed3c03da39d5 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 18 Aug 2017 10:08:55 -0600 Subject: [PATCH 43/51] fix namelist read error --- src/drivers/mct/main/cime_comp_mod.F90 | 12 ++++++++++-- src/drivers/mct/shr/seq_comm_mct.F90 | 6 ++++-- src/externals/mct/mct/m_MCTWorld.F90 | 1 - 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index fba2aa7cc71..cd717d6c057 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -4071,7 +4071,7 @@ subroutine cime_cpl_init(comm_in, comm_out, num_inst_driver, id) implicit none integer , intent(in) :: comm_in - integer , intent(in) :: comm_out + integer , intent(out) :: comm_out integer , intent(out) :: num_inst_driver integer , intent(out) :: id ! instance ID, starts from 1 ! @@ -4079,6 +4079,7 @@ subroutine cime_cpl_init(comm_in, comm_out, num_inst_driver, id) ! integer :: ierr, inst_comm, mype, nu, numpes !, pes integer :: ninst_driver, drvpes + character(len=*), parameter :: subname = '(cime_cpl_init) ' namelist /cime_driver_inst/ ninst_driver @@ -4094,7 +4095,14 @@ subroutine cime_cpl_init(comm_in, comm_out, num_inst_driver, id) nu = shr_file_getUnit() open(unit = nu, file = NLFileName, status = 'old', iostat = ierr) rewind(unit = nu) - read(unit = nu, nml = cime_driver_inst, iostat = ierr) + ierr = 1 + do while ( ierr /= 0 ) + read(unit = nu, nml = cime_driver_inst, iostat = ierr) + if (ierr < 0) then + call shr_sys_abort( subname//':: namelist read returns an'// & + ' end of file or end of record condition' ) + endif + enddo close(unit = nu) call shr_file_freeUnit(nu) num_inst_driver = max(ninst_driver, 1) diff --git a/src/drivers/mct/shr/seq_comm_mct.F90 b/src/drivers/mct/shr/seq_comm_mct.F90 index f3527bc5a36..ab4f9c4ebe4 100644 --- a/src/drivers/mct/shr/seq_comm_mct.F90 +++ b/src/drivers/mct/shr/seq_comm_mct.F90 @@ -180,7 +180,7 @@ module seq_comm_mct character(*), parameter :: layout_concurrent = 'concurrent' character(*), parameter :: layout_sequential = 'sequential' - character(*), parameter :: F11 = "(a,a,'(',i3,' ',a,')',a, 3i6,' (',a,i6,')',' (',a,i3,')')" + character(*), parameter :: F11 = "(a,a,'(',i3,' ',a,')',a, 3i6,' (',a,i6,')',' (',a,i3,')','(',a,a,')')" character(*), parameter :: F12 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')','(',a,2i6,')')" character(*), parameter :: F13 = "(a,a,'(',i3,' ',a,')',a,2i6,6x,' (',a,i6,')',' (',a,i3,')')" character(*), parameter :: F14 = "(a,a,'(',i3,' ',a,')',a, 6x,' (',a,i6,')',' (',a,i3,')')" @@ -571,6 +571,7 @@ subroutine comp_comm_init(driver_comm, comp_rootpe, comp_nthreads, comp_layout, endif call mpi_bcast(pelist, size(pelist), MPI_INTEGER, 0, DRIVER_COMM, ierr) if (present(drv_comm_id)) then + print *,__FILE__,__LINE__,drv_comm_id call seq_comm_setcomm(COMPID(n), pelist, comp_nthreads,name, drv_comm_id) else call seq_comm_setcomm(COMPID(n), pelist, comp_nthreads,name, n, num_inst_comp) @@ -712,7 +713,8 @@ subroutine seq_comm_setcomm(ID,pelist,nthreads,iname,inst,tinst) if (seq_comms(ID)%iamroot) then write(logunit,F11) trim(subname),' initialize ID ',ID,seq_comms(ID)%name, & - ' pelist =',pelist,' npes =',seq_comms(ID)%npes,' nthreads =',seq_comms(ID)%nthreads + ' pelist =',pelist,' npes =',seq_comms(ID)%npes,' nthreads =',seq_comms(ID)%nthreads,& + ' suffix =',trim(seq_comms(ID)%suffix) endif end subroutine seq_comm_setcomm diff --git a/src/externals/mct/mct/m_MCTWorld.F90 b/src/externals/mct/mct/m_MCTWorld.F90 index 1c7ab01d495..d30582f489d 100644 --- a/src/externals/mct/mct/m_MCTWorld.F90 +++ b/src/externals/mct/mct/m_MCTWorld.F90 @@ -282,7 +282,6 @@ subroutine initm_(ncomps,globalcomm,mycomms,myids) ! allocate a tmp array for the receive on root. if(myGid == 0) then - print *,__FILE__,__LINE__,Gsize, ncomps allocate(tmparray(0:Gsize-1,ncomps),stat=ier) if(ier/=0) call die(myname_,'allocate(tmparray)',ier) From 8f2ab433e727c1111177a77af6ec82de744cdea6 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 30 Aug 2017 14:18:11 -0600 Subject: [PATCH 44/51] fix merge issue --- scripts/lib/CIME/SystemTests/system_tests_compare_two.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py index d6aaa5175d7..6c22e3d560c 100644 --- a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py +++ b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py @@ -222,7 +222,7 @@ def run_phase(self, success_change=False): # pylint: disable=arguments-differ self._activate_case2() # we need to make sure run2 is properly staged. if run_type != "startup": - check_case(self._case2, self._caseroot2) + check_case(self._case2) self._force_case2_settings() self._case_two_custom_prerun_action() From a57d571ac4814f7ef82408166638b9ff75693f96 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 1 Sep 2017 07:26:34 -0600 Subject: [PATCH 45/51] update multi-instance doc --- doc/source/users_guide/multi-instance.rst | 65 ++++++++++++----------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/doc/source/users_guide/multi-instance.rst b/doc/source/users_guide/multi-instance.rst index 1e6ea4d7511..e0c942497cc 100644 --- a/doc/source/users_guide/multi-instance.rst +++ b/doc/source/users_guide/multi-instance.rst @@ -3,17 +3,30 @@ Multi-instance component functionality ====================================== -The CIME coupling infrastructure is capable of running multiple component instances (ensembles) under one model executable. There are two modes of ensemble capability, single driver in which all component instances are handled by a single driver/coupler component or multi-driver in which each instance includes a separate driver/coupler component. In the multi-driver mode the entire model is duplicated for each instance while in the single driver mode only active components need be duplicated. In most cases the multi-driver mode will give better performance and should be used. - -The primary motivation for this development was to be able to run an ensemble Kalman-Filter for data assimilation and parameter estimation (UQ, for example). -However, it also provides the ability to run a set of experiments within a single model executable where each instance can have a different namelist, and to have all the output go to one directory. - -An F compset is used in the following example. Using the multiple-instance code involves the following steps: +The CIME coupling infrastructure is capable of running multiple +component instances (ensembles) under one model executable. There are +two modes of ensemble capability, single driver in which all component +instances are handled by a single driver/coupler component or +multi-driver in which each instance includes a separate driver/coupler +component. In the multi-driver mode the entire model is duplicated +for each instance while in the single driver mode only active +components need be duplicated. In most cases the multi-driver mode +will give better performance and should be used. + +The primary motivation for this development was to be able to run an +ensemble Kalman-Filter for data assimilation and parameter estimation +(UQ, for example). However, it also provides the ability to run a set +of experiments within a single model executable where each instance +can have a different namelist, and to have all the output go to one +directory. + +An F compset is used in the following example. Using the +multiple-instance code involves the following steps: 1. Create the case. :: - > create_newcase --case Fmulti --compset F --res ne30_ne30_mg17 + > create_newcase --case Fmulti --compset F2000_DEV --res f19_f19_mg17 > cd Fmulti 2. Assume this is the out-of-the-box pe-layout: @@ -30,22 +43,27 @@ An F compset is used in the following example. Using the multiple-instance code WAV : 144/ 1; 0 ESP : 1/ 1; 0 -The atm, lnd and rof are active components in this compset. The ocn is a prescribed data component, cice is a mixed prescribed/active component (ice-coverage is prescribed), and glc, wav and esp are stub components. +The atm, lnd, rof and glc are active components in this compset. The ocn is +a prescribed data component, cice is a mixed prescribed/active +component (ice-coverage is prescribed), and wav and esp are stub +components. -Let's say we want to run two instances of CAM in this experiment. -We will also have to run two instances of CLM, CICE and RTM. -However, we can run either one or two instances of DOCN, and we can ignore the stub components since they do not do anything in this compset. +Let's say we want to run two instances of CAM in this experiment. We +will also have to run two instances of CLM, CICE, RTM and GLC. However, we +can run either one or two instances of DOCN, and we can ignore the +stub components since they do not do anything in this compset. -To run two instances of CAM, CLM, CICE, RTM and DOCN, invoke the following :ref: `xmlchange` commands in your **$CASEROOT** directory: +To run two instances of CAM, CLM, CICE, RTM, GLC and DOCN, invoke the following :ref: `xmlchange` commands in your **$CASEROOT** directory: :: > ./xmlchange NINST_ATM=2 > ./xmlchange NINST_LND=2 > ./xmlchange NINST_ICE=2 > ./xmlchange NINST_ROF=2 + > ./xmlchange NINST_GLC=2 > ./xmlchange NINST_OCN=2 -As a result, you will have two instances of CAM, CLM and CICE (prescribed), RTM, and DOCN, each running concurrently on 72 MPI tasks and all using the same driver/coupler component. In this single driver/coupler mode the number of tasks for each component instance is NTASKS_COMPONENT/NINST_COMPONENT and the total number of tasks is the same as for the single instance case. +As a result, you will have two instances of CAM, CLM and CICE (prescribed), RTM, GLC, and DOCN, each running concurrently on 72 MPI tasks and all using the same driver/coupler component. In this single driver/coupler mode the number of tasks for each component instance is NTASKS_COMPONENT/NINST_COMPONENT and the total number of tasks is the same as for the single instance case. Now consider the multi driver model. To use this mode change @@ -63,25 +81,12 @@ A new **user_nl_xxx_NNNN** file (where NNNN is the number of the component insta When calling **case.setup** with the **env_mach_pes.xml** file specifically, these files are created in **$CASEROOT**: :: - user_nl_cam_0001, user_nl_cam_0002 - user_nl_cice_0001, user_nl_cice_0002 - user_nl_clm_0001, user_nl_clm_0002 - user_nl_rtm_0001, user_nl_rtm_0002 - user_nl_docn_0001, user_nl_docn_0002 + user_nl_cam_0001 user_nl_clm_0001 user_nl_docn_0001 user_nl_cice_0001 + user_nl_cism_0001 user_nl_mosart_0001 + user_nl_cam_0002 user_nl_clm_0002 user_nl_docn_0002 user_nl_cice_0002 + user_nl_cism_0002 user_nl_mosart_0002 user_nl_cpl -Also, **case.setup** creates the following ``*_in_*`` files and ``*txt*`` files in **$CASEROOT/CaseDocs**: -:: - - atm_in_0001, atm_in_0002 - docn.streams.txt.prescribed_0001, docn.streams.txt.prescribed_0002 - docn_in_0001, docn_in_0002 - docn_ocn_in_0001, docn_ocn_in_0002 - drv_flds_in, drv_in - ice_in_0001, ice_in_0002 - lnd_in_0001, lnd_in_0002 - rof_in_0001, rof_in_0002 - The namelist for each component instance can be modified by changing the corresponding **user_nl_xxx_NNNN** file. Modifying **user_nl_cam_0002** will result in your namelist changes being active ONLY for the second instance of CAM. To change the DOCN stream txt file instance 0002, copy **docn.streams.txt.prescribed_0002** to your **$CASEROOT** directory with the name **user_docn.streams.txt.prescribed_0002** and modify it accordlingly. From 1b541998aa023703711df621cd66280d434f7ec8 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 1 Sep 2017 07:50:39 -0600 Subject: [PATCH 46/51] response to comments --- scripts/create_newcase | 4 ++-- scripts/lib/CIME/SystemTests/pre.py | 1 + scripts/lib/CIME/SystemTests/system_tests_common.py | 2 ++ scripts/lib/CIME/XML/env_mach_pes.py | 3 ++- scripts/lib/CIME/get_timing.py | 6 +++--- src/build_scripts/buildlib.csm_share | 2 +- src/components/xcpl_comps/xshare/dead_mod.F90 | 2 -- src/drivers/mct/cime_config/config_component.xml | 2 +- src/drivers/mct/main/cime_comp_mod.F90 | 6 +++--- 9 files changed, 15 insertions(+), 13 deletions(-) diff --git a/scripts/create_newcase b/scripts/create_newcase index 1e9aa029b62..87fa9225817 100755 --- a/scripts/create_newcase +++ b/scripts/create_newcase @@ -52,8 +52,8 @@ OR parser.add_argument("--ninst",default=1, help="Specify number of model ensemble instances. " - "Default is multiple components and one coupler. Use --multi-driver to " - "run multiple couplers in the ensemble.") + "Default is multiple components and one driver/coupler. Use --multi-driver to " + "run multiple driver/couplers in the ensemble.") parser.add_argument("--mpilib", "-mpilib", help="Specify the mpilib. " diff --git a/scripts/lib/CIME/SystemTests/pre.py b/scripts/lib/CIME/SystemTests/pre.py index 47d03b5da1b..65771d177e6 100644 --- a/scripts/lib/CIME/SystemTests/pre.py +++ b/scripts/lib/CIME/SystemTests/pre.py @@ -95,6 +95,7 @@ def run_phase(self): # pylint: disable=arguments-differ ninst = 1 else: ninst = self._case.get_value("NINST_{}".format(comp.upper())) + comp_name = self._case.get_value('COMP_{}'.format(comp.upper())) for index in range(1,ninst+1): if ninst == 1: diff --git a/scripts/lib/CIME/SystemTests/system_tests_common.py b/scripts/lib/CIME/SystemTests/system_tests_common.py index 3ccc31772bc..9af2e5d9195 100644 --- a/scripts/lib/CIME/SystemTests/system_tests_common.py +++ b/scripts/lib/CIME/SystemTests/system_tests_common.py @@ -366,8 +366,10 @@ def _get_latest_cpl_logs(self): for log in cpllogs: if log in lastcpllogs: continue + if log.endswith(suffix): lastcpllogs.append(log) + return lastcpllogs def _compare_baseline(self): diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index 59e7141f4c5..f2f2b4e2623 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -20,7 +20,8 @@ def __init__(self, case_root=None, infile="env_mach_pes.xml", components=None): EnvBase.__init__(self, case_root, infile, schema=schema) def get_value(self, vid, attribute=None, resolved=True, subgroup=None, pes_per_node=None): # pylint: disable=arguments-differ - + # Special variable NINST_MAX is used to determine the number of + # drivers in multi-driver mode. if vid == "NINST_MAX": value = 1 for comp in self._components: diff --git a/scripts/lib/CIME/get_timing.py b/scripts/lib/CIME/get_timing.py index 427d4e2825c..3fbfcd1c499 100644 --- a/scripts/lib/CIME/get_timing.py +++ b/scripts/lib/CIME/get_timing.py @@ -125,7 +125,7 @@ def _getTiming(self, inst=0): ncpl_base_period = self.case.get_value("NCPL_BASE_PERIOD") ncpl = 0 for compclass in self.case.get_values("COMP_CLASSES"): - ncpl = max(ncpl, self.case.get_value("%s_NCPL"%compclass)) + ncpl = max(ncpl, self.case.get_value("{}_NCPL".format(compclass))) ocn_ncpl = self.case.get_value("OCN_NCPL") compset = self.case.get_value("COMPSET") @@ -163,11 +163,11 @@ def _getTiming(self, inst=0): inittype = "TRUE" if inst > 0: - inst_label = '_%04d' % inst + inst_label = '_{04d}'.format(inst) else: inst_label = '' - binfilename = os.path.join(rundir, "timing", "model_timing%s_stats" % inst_label) + binfilename = os.path.join(rundir, "timing", "model_timing{}_stats" . format(inst_label)) finfilename = os.path.join(self.caseroot, "timing", "{}_timing{}_stats.{}".format(cime_model, inst_label, self.lid)) foutfilename = os.path.join(self.caseroot, "timing", diff --git a/src/build_scripts/buildlib.csm_share b/src/build_scripts/buildlib.csm_share index 0537c810e80..2d86437118a 100755 --- a/src/build_scripts/buildlib.csm_share +++ b/src/build_scripts/buildlib.csm_share @@ -32,7 +32,7 @@ my $NINST_ESP = 1; if ($multi_driver eq "FALSE") { $NINST_ATM = `./xmlquery NINST_ATM --value`; - $NINST_ICE = `./xmlquery NINST_ICE --value`; + $NINST_ICE = `./xmlquery NINST_ICE --value`; $NINST_GLC = `./xmlquery NINST_GLC --value`; $NINST_LND = `./xmlquery NINST_LND --value`; $NINST_OCN = `./xmlquery NINST_OCN --value`; diff --git a/src/components/xcpl_comps/xshare/dead_mod.F90 b/src/components/xcpl_comps/xshare/dead_mod.F90 index 51bffefa930..786c3496706 100644 --- a/src/components/xcpl_comps/xshare/dead_mod.F90 +++ b/src/components/xcpl_comps/xshare/dead_mod.F90 @@ -216,8 +216,6 @@ subroutine dead_setNewGrid(decomp_type,nxg,nyg,totpe,mype,lsize,gbuf,seg_len,npr i = i + 1 enddo -! write(logunit,*) 'dead_setNewGrid decomp seg ',mype,lsize,nx - found = .true. endif diff --git a/src/drivers/mct/cime_config/config_component.xml b/src/drivers/mct/cime_config/config_component.xml index 6763c090f68..7c4456af959 100644 --- a/src/drivers/mct/cime_config/config_component.xml +++ b/src/drivers/mct/cime_config/config_component.xml @@ -1955,7 +1955,7 @@ TRUE,FALSE mach_pes env_mach_pes.xml - MULTI_DRIVER mode provides a separate driver (and coupler) component for each + MULTI_DRIVER mode provides a separate driver/coupler component for each ensemble member. All components must have an equal number of members. If MULTI_DRIVER mode is False prognostic components must have the same number of members but data or stub components may also have 1 member. diff --git a/src/drivers/mct/main/cime_comp_mod.F90 b/src/drivers/mct/main/cime_comp_mod.F90 index cd717d6c057..b3059e28cd6 100644 --- a/src/drivers/mct/main/cime_comp_mod.F90 +++ b/src/drivers/mct/main/cime_comp_mod.F90 @@ -855,7 +855,7 @@ subroutine cime_pre_init2() !| Initialize infodata !---------------------------------------------------------- - if (len(cpl_inst_tag) > 0) then + if (len_trim(cpl_inst_tag) > 0) then call seq_infodata_init(infodata,nlfilename, GLOID, pioid, & cpl_tag=cpl_inst_tag) else @@ -3587,7 +3587,7 @@ subroutine cime_run() call seq_rest_write(EClock_d, seq_SyncClock, infodata, & atm, lnd, ice, ocn, rof, glc, wav, esp, & fractions_ax, fractions_lx, fractions_ix, fractions_ox, & - fractions_rx, fractions_gx, fractions_wx, tag=trim(cpl_inst_tag)) + fractions_rx, fractions_gx, fractions_wx, trim(cpl_inst_tag)) if (drv_threading) call seq_comm_setnthreads(nthreads_GLOID) call t_drvstopf ('CPL:RESTART',cplrun=.true.) @@ -4096,7 +4096,7 @@ subroutine cime_cpl_init(comm_in, comm_out, num_inst_driver, id) open(unit = nu, file = NLFileName, status = 'old', iostat = ierr) rewind(unit = nu) ierr = 1 - do while ( ierr /= 0 ) + do while ( ierr /= 0 ) read(unit = nu, nml = cime_driver_inst, iostat = ierr) if (ierr < 0) then call shr_sys_abort( subname//':: namelist read returns an'// & From 2d4d9ef19c87a82aa57a838b1f319e5e80905056 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 1 Sep 2017 11:34:39 -0600 Subject: [PATCH 47/51] remove incorrect comment --- config/config_tests.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/config/config_tests.xml b/config/config_tests.xml index 29daeff4d0b..2190bdff6de 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -125,7 +125,6 @@ NCR multi-instance validation vs single instance - concurrent PE for instance do an initial run test with NINST 1 (suffix: base) do an initial run test with NINST 2 (suffix: multiinst for both _0001 and _0002) compare base and _0001 and _0002 - (***note that NCR_script and NCK_script are the same - but NCR_build.csh and NCK_build.csh are different***) NOC multi-instance validation for single instance ocean (default length) do an initial run test with NINST 2 (other than ocn), with mod to instance 1 (suffix: inst1_base, inst2_mod) From bcbe68d86f91d6d288e21d23a73e759090596404 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Fri, 1 Sep 2017 16:59:13 -0600 Subject: [PATCH 48/51] additional note in doc --- doc/source/users_guide/multi-instance.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/users_guide/multi-instance.rst b/doc/source/users_guide/multi-instance.rst index e0c942497cc..ac9885f173b 100644 --- a/doc/source/users_guide/multi-instance.rst +++ b/doc/source/users_guide/multi-instance.rst @@ -105,3 +105,5 @@ Also keep these important points in mind: #. In **create_test** these options can be invoked with testname modifiers _N# for the single driver mode and _C# for the multi-driver mode. These are mutually exclusive options, they cannot be combined. #. In create_newcase you may use --ninst # to set the number of instances and --multi-driver for multi-driver mode. + +#. In multi-driver mode you will always get 1 instance of each component for each driver/coupler, if you change a case using xmlchange MULTI_COUPLER=TRUE you will get a number of driver/couplers equal to the maximum NINST value over all components. From 0eea1ff6078d1d45c7b884f0890510cb865442ba Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 5 Sep 2017 09:55:08 -0600 Subject: [PATCH 49/51] doc update; --- doc/source/users_guide/multi-instance.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/users_guide/multi-instance.rst b/doc/source/users_guide/multi-instance.rst index ac9885f173b..ebd79cdaa5f 100644 --- a/doc/source/users_guide/multi-instance.rst +++ b/doc/source/users_guide/multi-instance.rst @@ -77,7 +77,7 @@ This configuration will run each component instance on the original 144 tasks bu > ./case.setup -A new **user_nl_xxx_NNNN** file (where NNNN is the number of the component instances) is generated when **case.setup** is called. +A new **user_nl_xxx_NNNN** file is generated for each component instance when case.setup is called (where xxx is the component type and NNNN is the number of the component instance). When calling **case.setup** with the **env_mach_pes.xml** file specifically, these files are created in **$CASEROOT**: :: From 25a1bdfd0edb9fdbc3c31c5be6aa697b0444502a Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 5 Sep 2017 14:30:02 -0600 Subject: [PATCH 50/51] additional error check in case.setup --- scripts/lib/CIME/XML/env_mach_pes.py | 18 ++++++++++++++++++ scripts/lib/CIME/case_setup.py | 5 ++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/scripts/lib/CIME/XML/env_mach_pes.py b/scripts/lib/CIME/XML/env_mach_pes.py index f2f2b4e2623..2995aeefc7b 100644 --- a/scripts/lib/CIME/XML/env_mach_pes.py +++ b/scripts/lib/CIME/XML/env_mach_pes.py @@ -38,6 +38,24 @@ def get_value(self, vid, attribute=None, resolved=True, subgroup=None, pes_per_n return value + def set_value(self, vid, value, subgroup=None, ignore_type=False): + """ + Set the value of an entry-id field to value + Returns the value or None if not found + subgroup is ignored in the general routine and applied in specific methods + """ + if vid == "MULTI_DRIVER" and value: + ninst_max = self.get_value("NINST_MAX") + for comp in self._components: + if comp == "CPL": + continue + ninst = self.get_value("NINST_{}".format(comp)) + expect(ninst == ninst_max, + "All components must have the same NINST value in multi_driver mode. NINST_{}={} shoud be {}".format(comp,ninst,ninst_max)) + + return EnvBase.set_value(self, vid, value, subgroup=subgroup, ignore_type=ignore_type) + + def get_max_thread_count(self, comp_classes): ''' Find the maximum number of openmp threads for any component in the case ''' max_threads = 1 diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index e621f671a10..722a456a5dc 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -36,7 +36,10 @@ def _build_usernl_files(case, model, comp): ninst = 1 multi_driver = case.get_value("MULTI_DRIVER") if multi_driver: - ninst = case.get_value("NINST_MAX") + ninst_max = case.get_value("NINST_MAX") + if model not in ("DRV","CPL"): + ninst_model = case.get_value("NINST_{}".format(model)) + expect(ninst_model==ninst_max,"MULTI_DRIVER mode, all components must have same NINST value. NINST_{} != {}".format(model,ninst_max)) if comp == "cpl": if not os.path.exists("user_nl_cpl"): shutil.copy(os.path.join(model_dir, "user_nl_cpl"), ".") From dd12a685d144f17f360e3d564a6c761d9c0a3a49 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 5 Sep 2017 16:13:57 -0600 Subject: [PATCH 51/51] fix pylint error --- scripts/lib/CIME/get_timing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/CIME/get_timing.py b/scripts/lib/CIME/get_timing.py index 3fbfcd1c499..330e365215e 100644 --- a/scripts/lib/CIME/get_timing.py +++ b/scripts/lib/CIME/get_timing.py @@ -163,7 +163,7 @@ def _getTiming(self, inst=0): inittype = "TRUE" if inst > 0: - inst_label = '_{04d}'.format(inst) + inst_label = '_{:04d}'.format(inst) else: inst_label = ''