From 9f38fa0d1521904bd25a7cd3b93bd9e20a6ccccb Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 28 Aug 2017 18:49:48 -0600 Subject: [PATCH 1/3] mv erp test to compare two --- config/cesm/config_archive.xml | 4 +- config/config_tests.xml | 4 + scripts/lib/CIME/SystemTests/erp.py | 172 +++++++++------------------- 3 files changed, 62 insertions(+), 118 deletions(-) diff --git a/config/cesm/config_archive.xml b/config/cesm/config_archive.xml index 903aaf05f64..f148d3c4e14 100644 --- a/config/cesm/config_archive.xml +++ b/config/cesm/config_archive.xml @@ -78,11 +78,11 @@ \.h.*.nc$|\.d[dovt]\. unset - rpointer.ocn.restart$NINST_STRING + rpointer.ocn$NINST_STRING.restart ./$CASE.pop$NINST_STRING.r.$DATENAME.nc,RESTART_FMT=nc - rpointer.ocn.ovf$NINST_STRING + rpointer.ocn$NINST_STRING.ovf ./$CASE.pop$NINST_STRING.ro.$DATENAME diff --git a/config/config_tests.xml b/config/config_tests.xml index d19cd1394bc..a24577b1ef0 100644 --- a/config/config_tests.xml +++ b/config/config_tests.xml @@ -245,6 +245,10 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu ndays 11 FALSE + $STOP_N / 2 + 1 + $STOP_OPTION + $STOP_OPTION + $STOP_N diff --git a/scripts/lib/CIME/SystemTests/erp.py b/scripts/lib/CIME/SystemTests/erp.py index 9cde51af190..67f413e1e60 100644 --- a/scripts/lib/CIME/SystemTests/erp.py +++ b/scripts/lib/CIME/SystemTests/erp.py @@ -1,5 +1,5 @@ """ -CIME ERP test. This class inherits from SystemTestsCommon +CIME ERP test. This class inherits from SystemTestsCompareTwo This is a pes counts hybrid (open-MP/MPI) restart bfb test from startup. This is just like an ERS test but the pe-counts/threading @@ -8,132 +8,72 @@ (2) Do a restart test with half the number of tasks and threads (suffix rest) """ -import shutil from CIME.XML.standard_module_setup import * from CIME.case_setup import case_setup -import CIME.utils -from CIME.SystemTests.system_tests_common import SystemTestsCommon +from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo from CIME.check_lockedfiles import * +from CIME.case_st_archive import _get_datenames logger = logging.getLogger(__name__) -class ERP(SystemTestsCommon): +class ERP(SystemTestsCompareTwo): def __init__(self, case): """ initialize a test object """ - SystemTestsCommon.__init__(self, case) + SystemTestsCompareTwo.__init__(self, case, + separate_builds = True, + run_two_suffix = 'rest', + run_one_description = 'initial', + run_two_description = 'restart') - def build_phase(self, sharedlib_only=False, model_only=False): - """ - Build two cases. Case one uses defaults, case2 uses half the number of threads - and tasks. This test will fail for components (e.g. pop) that do not reproduce exactly - with different numbers of mpi tasks. - """ + def _common_setup(self): self._case.set_value("BUILD_THREADED",True) - if sharedlib_only: - return self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only) - - exeroot = self._case.get_value("EXEROOT") - cime_model = CIME.utils.get_model() - - # Make backup copies of the ORIGINAL env_mach_pes.xml and - # env_build.xml in LockedFiles if they are not there. If there - # are already copies there then simply copy them back to - # have the starting env_mach_pes.xml and env_build.xml - machpes1 = "env_mach_pes.ERP1.xml" - envbuild1 = "env_build.ERP1.xml" - if is_locked(machpes1): - restore(machpes1, newname="env_mach_pes.xml") - else: - lock_file("env_mach_pes.xml", newname=machpes1) - - if is_locked(envbuild1): - restore(envbuild1, newname="env_build.xml") - - # Build two executables, one using the original tasks and threads (ERP1) and - # one using the modified tasks and threads (ERP2) - # The reason we currently need two executables that CESM-CICE has a compile time decomposition - # For cases where ERP works, changing this decomposition will not affect answers, but it will - # affect the executable that is used - for bld in range(1,3): - logging.warn("Starting bld {}".format(bld)) - - if (bld == 2): - # halve the number of tasks and threads - for comp in self._case.get_values("COMP_CLASSES"): - ntasks = self._case.get_value("NTASKS_{}".format(comp)) - nthreads = self._case.get_value("NTHRDS_{}".format(comp)) - rootpe = self._case.get_value("ROOTPE_{}".format(comp)) - if ( nthreads > 1 ): - self._case.set_value("NTHRDS_{}".format(comp), nthreads/2) - if ( ntasks > 1 ): - self._case.set_value("NTASKS_{}".format(comp), ntasks/2) - self._case.set_value("ROOTPE_{}".format(comp), rootpe/2) - - # Note, some components, like CESM-CICE, have - # decomposition information in env_build.xml - # case_setup(self._case, test_mode=True, reset=True)that - # needs to be regenerated for the above new tasks and thread counts - case_setup(self._case, test_mode=True, reset=True) - - # Now rebuild the system, given updated information in env_build.xml - self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only) - shutil.move("{}/{}.exe".format(exeroot,cime_model), - "{}/{}.ERP{}.exe".format(exeroot,cime_model,bld)) - - # Make copies of the new env_mach_pes.xml and the new - # env_build.xml to be used in the run phase - lock_file("env_mach_pes.xml", newname="env_mach_pes.ERP{}.xml".format(bld)) - lock_file("env_build.xml", newname="env_build.ERP{}.xml".format(bld)) - - def run_phase(self): - # run will have values 1,2 - for run in range(1,3): - - expect(is_locked("env_mach_pes.ERP{:d}.xml".format(run)), - "ERROR: LockedFiles/env_mach_pes.ERP{:d}.xml does not exist, run case.build".format(run )) - - # Use the second env_mach_pes.xml and env_build.xml files - restore("env_mach_pes.ERP{:d}.xml".format(run), newname="env_mach_pes.xml") - restore("env_build.ERP{:d}.xml".format(run), newname="env_build.xml") - - # update the case to use the new values - self._case.read_xml() - - # Use the second executable that was created - exeroot = self._case.get_value("EXEROOT") - cime_model = CIME.utils.get_model() - exefile = os.path.join(exeroot,"{}.exe".format(cime_model)) - exefile2 = os.path.join(exeroot,"{}.ERP{:d}.exe".format(cime_model,run)) - if (os.path.isfile(exefile)): - os.remove(exefile) - shutil.copy(exefile2, exefile) - - stop_n = self._case.get_value("STOP_N") - stop_option = self._case.get_value("STOP_OPTION") - - if run == 1: - expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n)) - rest_n = stop_n/2 + 1 - self._case.set_value("REST_N", rest_n) - self._case.set_value("REST_OPTION", stop_option) - self._case.set_value("HIST_N", stop_n) - self._case.set_value("HIST_OPTION", stop_option) - self._case.set_value("CONTINUE_RUN", False) - suffix = "base" - else: - rest_n = stop_n/2 + 1 - stop_new = stop_n - rest_n - expect(stop_new > 0, "ERROR: stop_n value {:d} too short {:d} {:d}".format(stop_new,stop_n,rest_n)) - self._case.set_value("STOP_N", stop_new) - self._case.set_value("CONTINUE_RUN", True) - self._case.set_value("REST_OPTION","never") - suffix = "rest" - - case_setup(self._case, test_mode=True, reset=True) - - self.run_indv(suffix=suffix) - self._component_compare_test("base", "rest") + def _case_one_setup(self): + stop_n = self._case.get_value("STOP_N") + + expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n)) + + def _case_two_setup(self): + # halve the number of tasks and threads + for comp in self._case.get_values("COMP_CLASSES"): + ntasks = self._case1.get_value("NTASKS_{}".format(comp)) + nthreads = self._case1.get_value("NTHRDS_{}".format(comp)) + rootpe = self._case1.get_value("ROOTPE_{}".format(comp)) + if ( nthreads > 1 ): + self._case.set_value("NTHRDS_{}".format(comp), nthreads/2) + if ( ntasks > 1 ): + self._case.set_value("NTASKS_{}".format(comp), ntasks/2) + self._case.set_value("ROOTPE_{}".format(comp), rootpe/2) + + stop_n = self._case1.get_value("STOP_N") + rest_n = self._case1.get_value("REST_N") + stop_new = stop_n - rest_n + expect(stop_new > 0, "ERROR: stop_n value {:d} too short {:d} {:d}".format(stop_new,stop_n,rest_n)) + self._case.set_value("STOP_N", stop_new) + self._case.set_value("HIST_N", stop_n) + self._case.set_value("CONTINUE_RUN", True) + self._case.set_value("REST_OPTION","never") + + # Note, some components, like CESM-CICE, have + # decomposition information in env_build.xml that + # needs to be regenerated for the above new tasks and thread counts + case_setup(self._case, test_mode=True, reset=True) + + def _case_one_custom_postrun_action(self): + rundir1 = self._case1.get_value("RUNDIR") + rundir2 = self._case2.get_value("RUNDIR") + case = self._case1.get_value("CASE") + datenames = _get_datenames(self._case1) + for file_ in glob.iglob(os.path.join(rundir1,"*")): + logger.info("File is {}".format(file_)) + if os.path.basename(file_).startswith("rpointer"): + logger.info("Copy {} to {}".format(file_, rundir2)) + shutil.copy(file_, rundir2) + elif os.path.basename(file_).startswith(case) and datenames[0] in file_: + file_case2 = os.path.join(rundir2, os.path.basename(file_)) + if not os.path.isfile(file_case2): + logger.info("Link {} to {}".format(file_, rundir2)) + os.symlink(file_, file_case2) From de0e5858bc2d2c7a898fd1e73ee9c84a0c5246fa Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 29 Aug 2017 16:13:40 -0600 Subject: [PATCH 2/3] add code to copy incomplete history files --- scripts/lib/CIME/SystemTests/erp.py | 16 +--------- .../SystemTests/system_tests_compare_two.py | 30 +++++++++++++++++++ scripts/lib/CIME/case_st_archive.py | 4 +-- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/erp.py b/scripts/lib/CIME/SystemTests/erp.py index 67f413e1e60..08352c6f6cb 100644 --- a/scripts/lib/CIME/SystemTests/erp.py +++ b/scripts/lib/CIME/SystemTests/erp.py @@ -12,7 +12,6 @@ from CIME.case_setup import case_setup from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo from CIME.check_lockedfiles import * -from CIME.case_st_archive import _get_datenames logger = logging.getLogger(__name__) @@ -63,17 +62,4 @@ def _case_two_setup(self): case_setup(self._case, test_mode=True, reset=True) def _case_one_custom_postrun_action(self): - rundir1 = self._case1.get_value("RUNDIR") - rundir2 = self._case2.get_value("RUNDIR") - case = self._case1.get_value("CASE") - datenames = _get_datenames(self._case1) - for file_ in glob.iglob(os.path.join(rundir1,"*")): - logger.info("File is {}".format(file_)) - if os.path.basename(file_).startswith("rpointer"): - logger.info("Copy {} to {}".format(file_, rundir2)) - shutil.copy(file_, rundir2) - elif os.path.basename(file_).startswith(case) and datenames[0] in file_: - file_case2 = os.path.join(rundir2, os.path.basename(file_)) - if not os.path.isfile(file_case2): - logger.info("Link {} to {}".format(file_, rundir2)) - os.symlink(file_, file_case2) + self.setup_restart() diff --git a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py index 8c312df7d35..1e7a7ae8599 100644 --- a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py +++ b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py @@ -38,6 +38,8 @@ from CIME.SystemTests.system_tests_common import SystemTestsCommon from CIME.case import Case from CIME.case_submit import check_case +from CIME.case_st_archive import get_datenames, get_histfiles_for_restarts +from CIME.XML.env_archive import EnvArchive import shutil, os, glob @@ -235,6 +237,34 @@ def run_phase(self, success_change=False): # pylint: disable=arguments-differ self._component_compare_test(self._run_one_suffix, self._run_two_suffix, success_change=success_change) + def setup_restart(self): + """ copy rpointer files and link restart and history from case1 to case2 """ + rundir1 = self._case1.get_value("RUNDIR") + rundir2 = self._case2.get_value("RUNDIR") + case = self._case1.get_value("CASE") + datenames = get_datenames(self._case1) + arch = self._case1.get_env("archive") + arch_entries = arch.get_entries() + + + for file_ in glob.iglob(os.path.join(rundir1,"*")): + logger.debug("File is {}".format(file_)) + if os.path.basename(file_).startswith("rpointer"): + logger.info("Copy {} to {}".format(file_, rundir2)) + shutil.copy(file_, rundir2) + elif os.path.basename(file_).startswith(case) and datenames[0] in file_: + file_case2 = os.path.join(rundir2, os.path.basename(file_)) + if not os.path.isfile(file_case2): + logger.info("Link {} to {}".format(file_, rundir2)) + os.symlink(file_, file_case2) + for arch_entry in arch_entries: + histfiles = get_histfiles_for_restarts(self._case1, arch, arch_entry, file_) + for histfile in histfiles: + logger.info("Copying histfile {}".format(histfile)) + shutil.copy(os.path.join(rundir1,histfile), rundir2) + + + # ======================================================================== # Private methods # ======================================================================== diff --git a/scripts/lib/CIME/case_st_archive.py b/scripts/lib/CIME/case_st_archive.py index 3f469655bbb..ac0beeacde1 100644 --- a/scripts/lib/CIME/case_st_archive.py +++ b/scripts/lib/CIME/case_st_archive.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) ############################################################################### -def _get_datenames(case, last_date=None): +def get_datenames(case, last_date=None): ############################################################################### if last_date is not None: try: @@ -365,7 +365,7 @@ def _archive_process(case, archive, last_date, archive_incomplete_logs, copy_onl logger.info('-------------------------------------------') logger.info('doing short term archiving for {} ({})'.format(compname, compclass)) logger.info('-------------------------------------------') - datenames = _get_datenames(case, last_date) + datenames = get_datenames(case, last_date) for datename in datenames: logger.info('Archiving for date %s' % datename) datename_is_last = False From 93fdfd907556b3c1990b6a704c2c9cf97e783932 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 30 Aug 2017 11:49:55 -0600 Subject: [PATCH 3/3] do not copy unneeded hist files --- .../SystemTests/system_tests_compare_two.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py index 1e7a7ae8599..b9ab1e1b466 100644 --- a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py +++ b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py @@ -254,16 +254,19 @@ def setup_restart(self): shutil.copy(file_, rundir2) elif os.path.basename(file_).startswith(case) and datenames[0] in file_: file_case2 = os.path.join(rundir2, os.path.basename(file_)) - if not os.path.isfile(file_case2): + hfile_found = False + for arch_entry in arch_entries: + histfiles = get_histfiles_for_restarts(self._case1, arch, arch_entry, file_) + for histfile in histfiles: + logger.info("Copying histfile {}".format(histfile)) + shutil.copy(os.path.join(rundir1,histfile), rundir2) + for suffix in arch.get_hist_file_extensions(arch_entry): + hfile = re.compile(suffix) + if hfile.search(file_): + hfile_found = True + if not os.path.isfile(file_case2) and not hfile_found: logger.info("Link {} to {}".format(file_, rundir2)) os.symlink(file_, file_case2) - for arch_entry in arch_entries: - histfiles = get_histfiles_for_restarts(self._case1, arch, arch_entry, file_) - for histfile in histfiles: - logger.info("Copying histfile {}".format(histfile)) - shutil.copy(os.path.join(rundir1,histfile), rundir2) - - # ======================================================================== # Private methods