diff --git a/cime/cime_config/acme/machines/config_batch.xml b/cime/cime_config/acme/machines/config_batch.xml index d5e57c65192e..aa9abfb4e3a5 100644 --- a/cime/cime_config/acme/machines/config_batch.xml +++ b/cime/cime_config/acme/machines/config_batch.xml @@ -329,8 +329,8 @@ -l nodes={{ num_nodes }} - batch - debug + batch + debug diff --git a/cime/cime_config/acme/machines/config_compilers.xml b/cime/cime_config/acme/machines/config_compilers.xml index 97d61978c5a7..94e4a123e92e 100644 --- a/cime/cime_config/acme/machines/config_compilers.xml +++ b/cime/cime_config/acme/machines/config_compilers.xml @@ -644,6 +644,17 @@ for mct, etc. /projects/install/rhel6-x86_64/ACME/AlbanyTrilinos/Albany/build/install + + -O2 + -O2 + --host=Linux + $(NETCDFROOT) + $(PNETCDFROOT) + $(shell $(NETCDF_PATH)/bin/nf-config --flibs) -lblas -llapack + -lstdc++ -lmpi_cxx + /projects/install/rhel6-x86_64/ACME/AlbanyTrilinos/Albany/build/install + + -O2 -O2 @@ -823,6 +834,8 @@ for mct, etc. -O2 -O2 + $(NETCDFROOT) + $(PNETCDFROOT) --host=Linux lustre $(shell nf-config --flibs) diff --git a/cime/cime_config/acme/machines/config_machines.xml b/cime/cime_config/acme/machines/config_machines.xml index 59be75cd165e..4408d9d2cdad 100644 --- a/cime/cime_config/acme/machines/config_machines.xml +++ b/cime/cime_config/acme/machines/config_machines.xml @@ -508,7 +508,7 @@ sonproxy.sandia.gov:80 acme_developer LINUX - gnu + gnu,intel openmpi,mpi-serial $ENV{HOME}/acme/scratch $CIME_OUTPUT_ROOT/$CASE/run @@ -546,7 +546,8 @@ sems-env sems-git sems-python/2.7.9 - sems-gcc/5.3.0 + sems-gcc/5.3.0 + sems-intel/16.0.3 sems-openmpi/1.8.7 sems-cmake/2.8.12 sems-netcdf/4.3.2/parallel @@ -1461,7 +1462,7 @@ subversion subversion/1.8.3 cmake - cmake/2.8.10.2 + cmake3/3.6.0 @@ -1527,25 +1528,28 @@ - cray-netcdf/4.4.0 + cray-netcdf/4.4.1.1 - cray-netcdf-hdf5parallel/4.4.0 + cray-netcdf-hdf5parallel/4.4.1.1 cray-parallel-netcdf/1.7.0 - $COMPILER $MPILIB - $CESM_REPO 1 1 1 128M + /opt/cray/netcdf-hdf5parallel/4.4.1.1/PGI/15.3/ + /opt/cray/parallel-netcdf/1.7.0/PGI/15.3 + /opt/cray/netcdf-hdf5parallel/4.4.1.1/PGI/15.3/ + /opt/cray/parallel-netcdf/1.7.0/PGI/15.3 + /opt/cray/netcdf-hdf5parallel/4.4.1.1/INTEL/15.0/ + /opt/cray/parallel-netcdf/1.7.0/INTEL/15.0 + /opt/cray/netcdf-hdf5parallel/4.4.1.1/CRAY/8.3/ + /opt/cray/parallel-netcdf/1.7.0/CRAY/8.3 128M diff --git a/cime/externals/pio1/pio/CMakeLists.txt b/cime/externals/pio1/pio/CMakeLists.txt index ae07c36cdd29..3b10c1a2cbda 100644 --- a/cime/externals/pio1/pio/CMakeLists.txt +++ b/cime/externals/pio1/pio/CMakeLists.txt @@ -30,6 +30,8 @@ endif() # Netcdf is required +SET (CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../pio2/cmake" ${CMAKE_MODULE_PATH}) + #SET (NETCDF_FIND_COMPONENTS F90) FIND_PACKAGE(NetCDF "4.3.3" COMPONENTS C Fortran) IF (${NetCDF_Fortran_FOUND}) diff --git a/cime/utils/python/CIME/BuildTools/configure.py b/cime/utils/python/CIME/BuildTools/configure.py index 3d53f8938b91..b610b087aa3c 100644 --- a/cime/utils/python/CIME/BuildTools/configure.py +++ b/cime/utils/python/CIME/BuildTools/configure.py @@ -37,7 +37,7 @@ def configure(machobj, output_dir, macros_format, compiler, mpilib, debug, sysos """ # Macros generation. suffixes = {'Makefile': 'make', 'CMake': 'cmake'} - macro_maker = Compilers(machobj) + macro_maker = Compilers(machobj, compiler=compiler, mpilib=mpilib) for form in macros_format: out_file_name = os.path.join(output_dir,"Macros."+suffixes[form]) macro_maker.write_macros_file(macros_file=out_file_name, output_format=suffixes[form]) diff --git a/cime/utils/python/CIME/aprun.py b/cime/utils/python/CIME/aprun.py new file mode 100755 index 000000000000..25b4f3c13150 --- /dev/null +++ b/cime/utils/python/CIME/aprun.py @@ -0,0 +1,133 @@ +""" +Aprun is far too complex to handle purely through XML. We need python +code to compute and assemble aprun commands. +""" + +from CIME.XML.standard_module_setup import * + +import math + +logger = logging.getLogger(__name__) + +############################################################################### +def _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, + max_tasks_per_node, pes_per_node, + pio_numtasks, pio_async_interface, + compiler, machine, run_exe): +############################################################################### + """ + No one really understands this code, but we can at least test it. + + >>> ntasks = [512, 675, 168, 512, 128, 168, 168, 512, 1] + >>> nthreads = [2, 2, 2, 2, 4, 2, 2, 2, 1] + >>> rootpes = [0, 0, 512, 0, 680, 512, 512, 0, 0] + >>> pstrids = [1, 1, 1, 1, 1, 1, 1, 1, 1] + >>> max_tasks_per_node = 16 + >>> pes_per_node = 16 + >>> pio_numtasks = -1 + >>> pio_async_interface = False + >>> compiler = "pgi" + >>> machine = "titan" + >>> run_exe = "acme.exe" + >>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe) + 'aprun -S 4 -n 680 -N 8 -d 2 acme.exe : -S 2 -n 128 -N 4 -d 4 acme.exe ' + >>> compiler = "intel" + >>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe) + 'aprun -S 4 -cc numa_node -n 680 -N 8 -d 2 acme.exe : -S 2 -cc numa_node -n 128 -N 4 -d 4 acme.exe ' + """ + max_tasks_per_node = 1 if max_tasks_per_node < 1 else max_tasks_per_node + + total_tasks = 0 + for ntask, rootpe, pstrid in zip(ntasks, rootpes, pstrids): + tt = rootpe + (ntask - 1) * pstrid + 1 + total_tasks = max(tt, total_tasks) + + # Check if we need to add pio's tasks to the total task count + if pio_async_interface: + total_tasks += pio_numtasks if pio_numtasks > 0 else pes_per_node + + # Compute max threads for each mpi task + maxt = [0] * total_tasks + for ntask, nthrd, rootpe, pstrid in zip(ntasks, nthreads, rootpes, pstrids): + c2 = 0 + while c2 < ntask: + s = rootpe + c2 * pstrid + if nthrd > maxt[s]: + maxt[s] = nthrd + + c2 += 1 + + logger.info("total tasks is: %s" % total_tasks) + + # make sure all maxt values at least 1 + for c1 in xrange(0, total_tasks): + if maxt[c1] < 1: + maxt[c1] = 1 + + # Compute task and thread settings for batch commands + tasks_per_node, task_count, thread_count, max_thread_count, aprun = \ + 0, 1, maxt[0], maxt[0], "aprun" + for c1 in xrange(1, total_tasks): + if maxt[c1] != thread_count: + tasks_per_node = min(pes_per_node, max_tasks_per_node / thread_count) + + tasks_per_node = min(task_count, tasks_per_node) + + # Compute for every subset + task_per_numa = int(math.ceil(tasks_per_node / 2.0)) + # Option for Titan + if machine == "titan" and tasks_per_node > 1: + aprun += " -S %d" % task_per_numa + if compiler == "intel": + aprun += " -cc numa_node" + + aprun += " -n %d -N %d -d %d %s :" % (task_count, tasks_per_node, thread_count, run_exe) + + thread_count = maxt[c1] + max_thread_count = max(max_thread_count, maxt[c1]) + task_count = 1 + + else: + task_count += 1 + + if pes_per_node > 0: + tasks_per_node = min(pes_per_node, max_tasks_per_node / thread_count) + else: + tasks_per_node = max_tasks_per_node / thread_count + + tasks_per_node = min(task_count, tasks_per_node) + + task_per_numa = int(math.ceil(tasks_per_node / 2.0)) + + # Special option for Titan with intel compiler + if machine == "titan" and tasks_per_node > 1: + aprun += " -S %d" % task_per_numa + if compiler == "intel": + aprun += " -cc numa_node" + + aprun += " -n %d -N %d -d %d %s " % (task_count, tasks_per_node, thread_count, run_exe) + + return aprun + +############################################################################### +def get_aprun_cmd_for_case(case, run_exe): +############################################################################### + """ + Given a case, construct and return the aprun command + """ + models = case.get_values("COMP_CLASSES") + ntasks, nthreads, rootpes, pstrids = [], [], [], [] + for model in models: + model = "CPL" if model == "DRV" else model + for the_list, item_name in zip([ntasks, nthreads, rootpes, pstrids], + ["NTASKS", "NTHRDS", "ROOTPE", "PSTRID"]): + the_list.append(case.get_value("_".join([item_name, model]))) + + return _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, + case.get_value("MAX_TASKS_PER_NODE"), + case.get_value("PES_PER_NODE"), + case.get_value("PIO_NUMTASKS"), + case.get_value("PIO_ASYNC_INTERFACE"), + case.get_value("COMPILER"), + case.get_value("MACH"), + run_exe) diff --git a/cime/utils/python/CIME/case.py b/cime/utils/python/CIME/case.py index fc6b1542f0db..a4d49aaa5465 100644 --- a/cime/utils/python/CIME/case.py +++ b/cime/utils/python/CIME/case.py @@ -33,6 +33,7 @@ from CIME.XML.env_batch import EnvBatch from CIME.user_mod_support import apply_user_mods from CIME.case_setup import case_setup +from CIME.aprun import get_aprun_cmd_for_case logger = logging.getLogger(__name__) @@ -98,13 +99,14 @@ def __init__(self, case_root=None, read_only=True): self.thread_count = None + self.total_tasks = None self.tasks_per_node = None self.num_nodes = None self.tasks_per_numa = None self.cores_per_task = None # check if case has been configured and if so initialize derived if self.get_value("CASEROOT") is not None: - self.initialize_derived_attributes() + self._initialize_derived_attributes() def check_if_comp_var(self, vid): @@ -117,20 +119,20 @@ def check_if_comp_var(self, vid): return vid, comp, iscompvar return vid, comp, iscompvar - def initialize_derived_attributes(self): + def _initialize_derived_attributes(self): """ These are derived variables which can be used in the config_* files for variable substitution using the {{ var }} syntax """ env_mach_pes = self.get_env("mach_pes") comp_classes = self.get_values("COMP_CLASSES") - total_tasks = env_mach_pes.get_total_tasks(comp_classes) pes_per_node = self.get_value("PES_PER_NODE") + self.total_tasks = env_mach_pes.get_total_tasks(comp_classes) self.thread_count = env_mach_pes.get_max_thread_count(comp_classes) - self.tasks_per_node = env_mach_pes.get_tasks_per_node(total_tasks, self.thread_count) - logger.debug("total_tasks %s thread_count %s"%(total_tasks, self.thread_count)) - self.num_nodes = env_mach_pes.get_total_nodes(total_tasks, self.thread_count) + self.tasks_per_node = env_mach_pes.get_tasks_per_node(self.total_tasks, self.thread_count) + logger.debug("total_tasks %s thread_count %s"%(self.total_tasks, self.thread_count)) + self.num_nodes = env_mach_pes.get_total_nodes(self.total_tasks, self.thread_count) self.tasks_per_numa = int(math.ceil(self.tasks_per_node / 2.0)) smt_factor = max(1,int(self.get_value("MAX_TASKS_PER_NODE") / pes_per_node)) @@ -138,8 +140,6 @@ def initialize_derived_attributes(self): threads_per_core = 1 if (threads_per_node <= pes_per_node) else smt_factor self.cores_per_task = self.thread_count / threads_per_core - return total_tasks - # Define __enter__ and __exit__ so that we can use this as a context manager # and force a flush on exit. def __enter__(self): @@ -802,10 +802,10 @@ def configure(self, compset_name, grid_name, machine_name=None, if test: self.set_value("TEST",True) - total_tasks = self.initialize_derived_attributes() + self._initialize_derived_attributes() # Make sure that parallel IO is not specified if total_tasks==1 - if total_tasks == 1: + if self.total_tasks == 1: for compclass in self._component_classes: key = "PIO_TYPENAME_%s"%compclass pio_typename = self.get_value(key) @@ -813,7 +813,7 @@ def configure(self, compset_name, grid_name, machine_name=None, self.set_value(key, "netcdf") # Set TOTAL_CORES - self.set_value("TOTAL_CORES", total_tasks * self.cores_per_task ) + self.set_value("TOTAL_CORES", self.total_tasks * self.cores_per_task ) def get_compset_var_settings(self): compset_obj = Compsets(infile=self.get_value("COMPSETS_SPEC_FILE")) @@ -1091,20 +1091,17 @@ def get_mpirun_cmd(self, job="case.run"): } executable, args = env_mach_specific.get_mpirun(self, mpi_attribs, job=job) - # special case for aprun if using < 1 full node - if executable == "aprun": - totalpes = self.get_value("TOTALPES") - pes_per_node = self.get_value("PES_PER_NODE") - if totalpes < pes_per_node: - args["tasks_per_node"] = "-N "+str(totalpes) - - mpi_arg_string = " ".join(args.values()) + # special case for aprun + if executable == "aprun": + return get_aprun_cmd_for_case(self, run_exe) + " " + run_misc_suffix + else: + mpi_arg_string = " ".join(args.values()) - if self.get_value("BATCH_SYSTEM") == "cobalt": - mpi_arg_string += " : " + if self.get_value("BATCH_SYSTEM") == "cobalt": + mpi_arg_string += " : " - return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix) + return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix) def set_model_version(self, model): version = "unknown"