diff --git a/config/cesm/config_archive.xml b/config/cesm/config_archive.xml
index 903aaf05f64..f148d3c4e14 100644
--- a/config/cesm/config_archive.xml
+++ b/config/cesm/config_archive.xml
@@ -78,11 +78,11 @@
\.h.*.nc$|\.d[dovt]\.
unset
- rpointer.ocn.restart$NINST_STRING
+ rpointer.ocn$NINST_STRING.restart
./$CASE.pop$NINST_STRING.r.$DATENAME.nc,RESTART_FMT=nc
- rpointer.ocn.ovf$NINST_STRING
+ rpointer.ocn$NINST_STRING.ovf
./$CASE.pop$NINST_STRING.ro.$DATENAME
diff --git a/config/config_tests.xml b/config/config_tests.xml
index d19cd1394bc..a24577b1ef0 100644
--- a/config/config_tests.xml
+++ b/config/config_tests.xml
@@ -245,6 +245,10 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu
ndays
11
FALSE
+ $STOP_N / 2 + 1
+ $STOP_OPTION
+ $STOP_OPTION
+ $STOP_N
diff --git a/scripts/lib/CIME/SystemTests/erp.py b/scripts/lib/CIME/SystemTests/erp.py
index 9cde51af190..05021bbda31 100644
--- a/scripts/lib/CIME/SystemTests/erp.py
+++ b/scripts/lib/CIME/SystemTests/erp.py
@@ -1,5 +1,5 @@
"""
-CIME ERP test. This class inherits from SystemTestsCommon
+CIME ERP test. This class inherits from SystemTestsCompareTwo
This is a pes counts hybrid (open-MP/MPI) restart bfb test from
startup. This is just like an ERS test but the pe-counts/threading
@@ -8,132 +8,58 @@
(2) Do a restart test with half the number of tasks and threads (suffix rest)
"""
-import shutil
from CIME.XML.standard_module_setup import *
from CIME.case_setup import case_setup
-import CIME.utils
-from CIME.SystemTests.system_tests_common import SystemTestsCommon
+from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo
from CIME.check_lockedfiles import *
logger = logging.getLogger(__name__)
-class ERP(SystemTestsCommon):
+class ERP(SystemTestsCompareTwo):
def __init__(self, case):
"""
initialize a test object
"""
- SystemTestsCommon.__init__(self, case)
+ SystemTestsCompareTwo.__init__(self, case,
+ separate_builds = True,
+ run_two_suffix = 'rest',
+ run_one_description = 'initial',
+ run_two_description = 'restart')
- def build_phase(self, sharedlib_only=False, model_only=False):
- """
- Build two cases. Case one uses defaults, case2 uses half the number of threads
- and tasks. This test will fail for components (e.g. pop) that do not reproduce exactly
- with different numbers of mpi tasks.
- """
+ def _common_setup(self):
self._case.set_value("BUILD_THREADED",True)
- if sharedlib_only:
- return self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
-
- exeroot = self._case.get_value("EXEROOT")
- cime_model = CIME.utils.get_model()
-
- # Make backup copies of the ORIGINAL env_mach_pes.xml and
- # env_build.xml in LockedFiles if they are not there. If there
- # are already copies there then simply copy them back to
- # have the starting env_mach_pes.xml and env_build.xml
- machpes1 = "env_mach_pes.ERP1.xml"
- envbuild1 = "env_build.ERP1.xml"
- if is_locked(machpes1):
- restore(machpes1, newname="env_mach_pes.xml")
- else:
- lock_file("env_mach_pes.xml", newname=machpes1)
-
- if is_locked(envbuild1):
- restore(envbuild1, newname="env_build.xml")
-
- # Build two executables, one using the original tasks and threads (ERP1) and
- # one using the modified tasks and threads (ERP2)
- # The reason we currently need two executables that CESM-CICE has a compile time decomposition
- # For cases where ERP works, changing this decomposition will not affect answers, but it will
- # affect the executable that is used
- for bld in range(1,3):
- logging.warn("Starting bld {}".format(bld))
-
- if (bld == 2):
- # halve the number of tasks and threads
- for comp in self._case.get_values("COMP_CLASSES"):
- ntasks = self._case.get_value("NTASKS_{}".format(comp))
- nthreads = self._case.get_value("NTHRDS_{}".format(comp))
- rootpe = self._case.get_value("ROOTPE_{}".format(comp))
- if ( nthreads > 1 ):
- self._case.set_value("NTHRDS_{}".format(comp), nthreads/2)
- if ( ntasks > 1 ):
- self._case.set_value("NTASKS_{}".format(comp), ntasks/2)
- self._case.set_value("ROOTPE_{}".format(comp), rootpe/2)
-
- # Note, some components, like CESM-CICE, have
- # decomposition information in env_build.xml
- # case_setup(self._case, test_mode=True, reset=True)that
- # needs to be regenerated for the above new tasks and thread counts
- case_setup(self._case, test_mode=True, reset=True)
-
- # Now rebuild the system, given updated information in env_build.xml
- self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
- shutil.move("{}/{}.exe".format(exeroot,cime_model),
- "{}/{}.ERP{}.exe".format(exeroot,cime_model,bld))
-
- # Make copies of the new env_mach_pes.xml and the new
- # env_build.xml to be used in the run phase
- lock_file("env_mach_pes.xml", newname="env_mach_pes.ERP{}.xml".format(bld))
- lock_file("env_build.xml", newname="env_build.ERP{}.xml".format(bld))
-
- def run_phase(self):
- # run will have values 1,2
- for run in range(1,3):
-
- expect(is_locked("env_mach_pes.ERP{:d}.xml".format(run)),
- "ERROR: LockedFiles/env_mach_pes.ERP{:d}.xml does not exist, run case.build".format(run ))
-
- # Use the second env_mach_pes.xml and env_build.xml files
- restore("env_mach_pes.ERP{:d}.xml".format(run), newname="env_mach_pes.xml")
- restore("env_build.ERP{:d}.xml".format(run), newname="env_build.xml")
-
- # update the case to use the new values
- self._case.read_xml()
-
- # Use the second executable that was created
- exeroot = self._case.get_value("EXEROOT")
- cime_model = CIME.utils.get_model()
- exefile = os.path.join(exeroot,"{}.exe".format(cime_model))
- exefile2 = os.path.join(exeroot,"{}.ERP{:d}.exe".format(cime_model,run))
- if (os.path.isfile(exefile)):
- os.remove(exefile)
- shutil.copy(exefile2, exefile)
-
- stop_n = self._case.get_value("STOP_N")
- stop_option = self._case.get_value("STOP_OPTION")
-
- if run == 1:
- expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n))
- rest_n = stop_n/2 + 1
- self._case.set_value("REST_N", rest_n)
- self._case.set_value("REST_OPTION", stop_option)
- self._case.set_value("HIST_N", stop_n)
- self._case.set_value("HIST_OPTION", stop_option)
- self._case.set_value("CONTINUE_RUN", False)
- suffix = "base"
- else:
- rest_n = stop_n/2 + 1
- stop_new = stop_n - rest_n
- expect(stop_new > 0, "ERROR: stop_n value {:d} too short {:d} {:d}".format(stop_new,stop_n,rest_n))
- self._case.set_value("STOP_N", stop_new)
- self._case.set_value("CONTINUE_RUN", True)
- self._case.set_value("REST_OPTION","never")
- suffix = "rest"
-
- case_setup(self._case, test_mode=True, reset=True)
-
- self.run_indv(suffix=suffix)
- self._component_compare_test("base", "rest")
+ def _case_one_setup(self):
+ stop_n = self._case.get_value("STOP_N")
+
+ expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n))
+
+ def _case_two_setup(self):
+ # halve the number of tasks and threads
+ for comp in self._case.get_values("COMP_CLASSES"):
+ ntasks = self._case1.get_value("NTASKS_{}".format(comp))
+ nthreads = self._case1.get_value("NTHRDS_{}".format(comp))
+ rootpe = self._case1.get_value("ROOTPE_{}".format(comp))
+ if ( nthreads > 1 ):
+ self._case.set_value("NTHRDS_{}".format(comp), nthreads/2)
+ if ( ntasks > 1 ):
+ self._case.set_value("NTASKS_{}".format(comp), ntasks/2)
+ self._case.set_value("ROOTPE_{}".format(comp), rootpe/2)
+
+ stop_n = self._case1.get_value("STOP_N")
+ rest_n = self._case1.get_value("REST_N")
+ stop_new = stop_n - rest_n
+ expect(stop_new > 0, "ERROR: stop_n value {:d} too short {:d} {:d}".format(stop_new,stop_n,rest_n))
+ self._case.set_value("STOP_N", stop_new)
+ self._case.set_value("HIST_N", stop_n)
+ self._case.set_value("CONTINUE_RUN", True)
+ self._case.set_value("REST_OPTION","never")
+
+ # Note, some components, like CESM-CICE, have
+ # decomposition information in env_build.xml that
+ # needs to be regenerated for the above new tasks and thread counts
+ case_setup(self._case, test_mode=True, reset=True)
+
+ def _case_one_custom_postrun_action(self):
+ self.copy_case1_restarts_to_case2()
diff --git a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py
index 8c312df7d35..1ec2569443e 100644
--- a/scripts/lib/CIME/SystemTests/system_tests_compare_two.py
+++ b/scripts/lib/CIME/SystemTests/system_tests_compare_two.py
@@ -38,6 +38,7 @@
from CIME.SystemTests.system_tests_common import SystemTestsCommon
from CIME.case import Case
from CIME.case_submit import check_case
+from CIME.case_st_archive import archive_last_restarts
import shutil, os, glob
@@ -235,6 +236,20 @@ def run_phase(self, success_change=False): # pylint: disable=arguments-differ
self._component_compare_test(self._run_one_suffix, self._run_two_suffix, success_change=success_change)
+ def copy_case1_restarts_to_case2(self):
+ """
+ Makes a copy (or symlink) of restart files and related files
+ (necessary history files, rpointer files) from case1 to case2.
+
+ This is not done automatically, but can be called by individual
+ tests where case2 does a continue_run using case1's restart
+ files.
+ """
+ rundir2 = self._case2.get_value("RUNDIR")
+ archive_last_restarts(case = self._case1,
+ archive_restdir = rundir2,
+ link_to_restart_files = True)
+
# ========================================================================
# Private methods
# ========================================================================
diff --git a/scripts/lib/CIME/XML/env_archive.py b/scripts/lib/CIME/XML/env_archive.py
index 366b2a5511e..1034644c34b 100644
--- a/scripts/lib/CIME/XML/env_archive.py
+++ b/scripts/lib/CIME/XML/env_archive.py
@@ -38,14 +38,19 @@ def __init__(self, case_root=None, infile="env_archive.xml"):
def get_entries(self):
return self.get_nodes('comp_archive_spec')
+ def get_entry(self, compname):
+ return self.get_optional_node('comp_archive_spec', attributes={"compname":compname})
+
def get_entry_info(self, archive_entry):
- compname = archive_entry.attrib['compname']
- compclass = archive_entry.attrib['compclass']
+ compname = archive_entry.get('compname')
+ compclass = archive_entry.get('compclass')
return compname,compclass
def get_entry_value(self, name, archive_entry):
node = self.get_optional_node(name, root=archive_entry)
- return node.text
+ if node is not None:
+ return node.text
+ return None
def get_rest_file_extensions(self, archive_entry):
file_extensions = []
diff --git a/scripts/lib/CIME/case_st_archive.py b/scripts/lib/CIME/case_st_archive.py
index 3f469655bbb..30fab631282 100644
--- a/scripts/lib/CIME/case_st_archive.py
+++ b/scripts/lib/CIME/case_st_archive.py
@@ -6,16 +6,30 @@
from CIME.XML.standard_module_setup import *
from CIME.case_submit import submit
-from CIME.XML.env_archive import EnvArchive
-from CIME.utils import run_and_log_case_status, ls_sorted_by_mtime
+from CIME.utils import run_and_log_case_status, ls_sorted_by_mtime, symlink_force
from os.path import isdir, join
import datetime
logger = logging.getLogger(__name__)
+###############################################################################
+def _get_archive_file_fn(copy_only):
+###############################################################################
+ """
+ Returns the function to use for archiving some files
+ """
+ return shutil.copyfile if copy_only else shutil.move
+
+
###############################################################################
def _get_datenames(case, last_date=None):
###############################################################################
+ """
+ Returns a list of datenames giving the dates of cpl restart files
+
+ If there are no cpl restart files, this will return []
+ """
+
if last_date is not None:
try:
last = datetime.datetime.strptime(last_date, '%Y-%m-%d')
@@ -26,8 +40,7 @@ def _get_datenames(case, last_date=None):
expect(isdir(rundir), 'Cannot open directory {} '.format(rundir))
casename = case.get_value("CASE")
files = sorted(glob.glob(os.path.join(rundir, casename + '.cpl.r*.nc')))
- if not files:
- expect(False, 'Cannot find a {}.cpl.r.*.nc file in directory {} '.format(casename, rundir))
+
datenames = []
for filename in files:
names = filename.split('.')
@@ -62,6 +75,22 @@ def _get_ninst_info(case, compclass):
logger.debug("ninst and ninst_strings are: {} and {} for {}".format(ninst, ninst_strings, compclass))
return ninst, ninst_strings
+###############################################################################
+def _get_component_archive_entries(case, archive):
+###############################################################################
+ """
+ Each time this is generator function is called, it yields a tuple
+ (archive_entry, compname, compclass) for one component in this
+ case's compset components.
+ """
+ compset_comps = case.get_compset_components()
+ compset_comps.append('cpl')
+ compset_comps.append('dart')
+
+ for compname in compset_comps:
+ archive_entry = archive.get_entry(compname)
+ if archive_entry is not None:
+ yield(archive_entry, compname, archive_entry.get("compclass"))
###############################################################################
def _archive_rpointer_files(case, archive, archive_entry, archive_restdir,
@@ -222,16 +251,53 @@ def get_histfiles_for_restarts(case, archive, archive_entry, restfile):
return histfiles
###############################################################################
-def _archive_restarts(case, archive, archive_entry,
- compclass, compname, datename, datename_is_last,
- archive_file_fn):
+def _archive_restarts_date(case, archive,
+ datename, datename_is_last,
+ archive_restdir, archive_file_fn,
+ link_to_last_restart_files=False):
###############################################################################
+ """
+ Archive restart files for a single date
+
+ Returns a dictionary of histfiles that need saving in the run
+ directory, indexed by compname
+ """
+ logger.info('-------------------------------------------')
+ logger.info('Archiving restarts for date {}'.format(datename))
+ logger.info('-------------------------------------------')
+
+ histfiles_savein_rundir_by_compname = {}
+
+ for (archive_entry, compname, compclass) in _get_component_archive_entries(case, archive):
+ logger.info('Archiving restarts for {} ({})'.format(compname, compclass))
+
+ # archive restarts
+ histfiles_savein_rundir = _archive_restarts_date_comp(case, archive, archive_entry,
+ compclass, compname,
+ datename, datename_is_last,
+ archive_restdir, archive_file_fn,
+ link_to_last_restart_files)
+ histfiles_savein_rundir_by_compname[compname] = histfiles_savein_rundir
+
+ return histfiles_savein_rundir_by_compname
+
+###############################################################################
+def _archive_restarts_date_comp(case, archive, archive_entry,
+ compclass, compname, datename, datename_is_last,
+ archive_restdir, archive_file_fn,
+ link_to_last_restart_files=False):
+###############################################################################
+ """
+ Archive restart files for a single date and single component
+
+ If link_to_last_restart_files is True, then make a symlink to the
+ last set of restart files (i.e., the set with datename_is_last
+ True); if False (the default), copy them. (This has no effect on the
+ history files that are associated with these restart files.)
+ """
- # determine directory for archiving restarts based on datename
- dout_s_root = case.get_value("DOUT_S_ROOT")
rundir = case.get_value("RUNDIR")
casename = case.get_value("CASE")
- archive_restdir = join(dout_s_root, 'rest', datename)
if datename_is_last or case.get_value('DOUT_S_SAVE_INTERIM_RESTART_FILES'):
if not os.path.exists(archive_restdir):
os.makedirs(archive_restdir)
@@ -247,6 +313,14 @@ def _archive_restarts(case, archive, archive_entry,
# copy latest restart files to archive restart directory
histfiles_savein_rundir = []
+ # determine function to use for last set of restart files
+ if link_to_last_restart_files:
+ last_restart_file_fn = symlink_force
+ last_restart_file_fn_msg = "linking"
+ else:
+ last_restart_file_fn = shutil.copy
+ last_restart_file_fn_msg = "copying"
+
# get file_extension suffixes
for suffix in archive.get_rest_file_extensions(archive_entry):
for i in range(ninst):
@@ -291,8 +365,9 @@ def _archive_restarts(case, archive, archive_entry,
if datename_is_last:
srcfile = os.path.join(rundir, restfile)
destfile = os.path.join(archive_restdir, restfile)
- shutil.copy(srcfile, destfile)
- logger.info("copying \n{} to \n{}".format(srcfile, destfile))
+ last_restart_file_fn(srcfile, destfile)
+ logger.info("{} \n{} to \n{}".format(
+ last_restart_file_fn_msg, srcfile, destfile))
for histfile in histfiles_for_restart:
srcfile = os.path.join(rundir, histfile)
destfile = os.path.join(archive_restdir, histfile)
@@ -341,50 +416,35 @@ def _archive_process(case, archive, last_date, archive_incomplete_logs, copy_onl
"""
logger.debug('In archive_process...')
- compset_comps = case.get_compset_components()
- compset_comps.append('cpl')
- compset_comps.append('dart')
- if copy_only is True:
- archive_file_fn = shutil.copyfile
- else:
- archive_file_fn = shutil.move
+ archive_file_fn = _get_archive_file_fn(copy_only)
# archive log files
_archive_log_files(case, archive_incomplete_logs, archive_file_fn)
- for archive_entry in archive.get_entries():
- # determine compname and compclass
- compname, compclass = archive.get_entry_info(archive_entry)
-
- # check for validity of compname
- if compname not in compset_comps:
- continue
-
- # archive restarts and all necessary associated fields (e.g. rpointer files)
- logger.info('-------------------------------------------')
- logger.info('doing short term archiving for {} ({})'.format(compname, compclass))
- logger.info('-------------------------------------------')
- datenames = _get_datenames(case, last_date)
- for datename in datenames:
- logger.info('Archiving for date %s' % datename)
- datename_is_last = False
- if datename == datenames[-1]:
- datename_is_last = True
-
- # archive restarts
- histfiles_savein_rundir = _archive_restarts(case, archive, archive_entry,
- compclass, compname,
- datename, datename_is_last,
- archive_file_fn)
-
- # if the last datename for restart files, then archive history files
- # for this compname
- if datename_is_last:
- logger.info("histfiles_savein_rundir {} ".format(histfiles_savein_rundir))
- _archive_history_files(case, archive, archive_entry,
- compclass, compname, histfiles_savein_rundir,
- archive_file_fn)
+ # archive restarts and all necessary associated files (e.g. rpointer files)
+ histfiles_savein_rundir_by_compname = {}
+ dout_s_root = case.get_value("DOUT_S_ROOT")
+ datenames = _get_datenames(case, last_date)
+ for datename in datenames:
+ datename_is_last = False
+ if datename == datenames[-1]:
+ datename_is_last = True
+
+ archive_restdir = join(dout_s_root, 'rest', datename)
+ histfiles_savein_rundir_by_compname_this_date = _archive_restarts_date(
+ case, archive, datename, datename_is_last, archive_restdir, archive_file_fn)
+ if datename_is_last:
+ histfiles_savein_rundir_by_compname = histfiles_savein_rundir_by_compname_this_date
+
+ # archive history files
+ for (archive_entry, compname, compclass) in _get_component_archive_entries(case, archive):
+ logger.info('Archiving history files for {} ({})'.format(compname, compclass))
+ histfiles_savein_rundir = histfiles_savein_rundir_by_compname.get(compname, [])
+ logger.info("histfiles_savein_rundir {} ".format(histfiles_savein_rundir))
+ _archive_history_files(case, archive, archive_entry,
+ compclass, compname, histfiles_savein_rundir,
+ archive_file_fn)
###############################################################################
def restore_from_archive(case, rest_dir=None):
@@ -409,6 +469,39 @@ def restore_from_archive(case, rest_dir=None):
shutil.copy(item, rundir)
+###############################################################################
+def archive_last_restarts(case, archive_restdir, link_to_restart_files=False):
+###############################################################################
+ """
+ Convenience function for archiving just the last set of restart
+ files to a given directory. This also saves files attached to the
+ restart set, such as rpointer files and necessary history
+ files. However, it does not save other files that are typically
+ archived (e.g., history files, log files).
+
+ Files are copied to the directory given by archive_restdir.
+
+ If link_to_restart_files is True, then symlinks rather than copies
+ are done for the restart files. (This has no effect on the history
+ files that are associated with these restart files.)
+ """
+ archive = case.get_env('archive')
+ datenames = _get_datenames(case)
+ expect(len(datenames) >= 1, "No restart dates found")
+ last_datename = datenames[-1]
+
+ # Not currently used for anything if we're only archiving the last
+ # set of restart files, but needed to satisfy the following interface
+ archive_file_fn = _get_archive_file_fn(copy_only=False)
+
+ _ = _archive_restarts_date(case=case,
+ archive=archive,
+ datename=last_datename,
+ datename_is_last=True,
+ archive_restdir=archive_restdir,
+ archive_file_fn=archive_file_fn,
+ link_to_last_restart_files=link_to_restart_files)
+
###############################################################################
def case_st_archive(case, last_date=None, archive_incomplete_logs=True, copy_only=False, no_resubmit=False):
###############################################################################
@@ -434,7 +527,7 @@ def case_st_archive(case, last_date=None, archive_incomplete_logs=True, copy_onl
logger.info("st_archive starting")
- archive = EnvArchive(infile=os.path.join(caseroot, 'env_archive.xml'))
+ archive = case.get_env('archive')
functor = lambda: _archive_process(case, archive, last_date, archive_incomplete_logs, copy_only)
run_and_log_case_status(functor, "st_archive", caseroot=caseroot)
diff --git a/scripts/lib/CIME/utils.py b/scripts/lib/CIME/utils.py
index 93c2c5cd915..6709badf26b 100644
--- a/scripts/lib/CIME/utils.py
+++ b/scripts/lib/CIME/utils.py
@@ -2,7 +2,7 @@
Common functions used by cime python scripts
Warning: you cannot use CIME Classes in this module as it causes circular dependencies
"""
-import logging, gzip, sys, os, time, re, shutil, glob, string, random, imp
+import logging, gzip, sys, os, time, re, shutil, glob, string, random, imp, errno
import stat as statlib
import warnings
from contextlib import contextmanager
@@ -602,6 +602,21 @@ def safe_copy(src_dir, tgt_dir, file_map):
os.remove(full_tgt)
shutil.copy2(full_src, full_tgt)
+def symlink_force(target, link_name):
+ """
+ Makes a symlink from link_name to target. Unlike the standard
+ os.symlink, this will work even if link_name already exists (in
+ which case link_name will be overwritten).
+ """
+ try:
+ os.symlink(target, link_name)
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ os.remove(link_name)
+ os.symlink(target, link_name)
+ else:
+ raise e
+
def find_proc_id(proc_name=None,
children_only=False,
of_parent=None):