diff --git a/cime/cime_config/acme/machines/config_batch.xml b/cime/cime_config/acme/machines/config_batch.xml
index d5e57c65192e..aa9abfb4e3a5 100644
--- a/cime/cime_config/acme/machines/config_batch.xml
+++ b/cime/cime_config/acme/machines/config_batch.xml
@@ -329,8 +329,8 @@
-l nodes={{ num_nodes }}
- batch
- debug
+ batch
+ debug
diff --git a/cime/cime_config/acme/machines/config_compilers.xml b/cime/cime_config/acme/machines/config_compilers.xml
index 97d61978c5a7..94e4a123e92e 100644
--- a/cime/cime_config/acme/machines/config_compilers.xml
+++ b/cime/cime_config/acme/machines/config_compilers.xml
@@ -644,6 +644,17 @@ for mct, etc.
/projects/install/rhel6-x86_64/ACME/AlbanyTrilinos/Albany/build/install
+
+ -O2
+ -O2
+ --host=Linux
+ $(NETCDFROOT)
+ $(PNETCDFROOT)
+ $(shell $(NETCDF_PATH)/bin/nf-config --flibs) -lblas -llapack
+ -lstdc++ -lmpi_cxx
+ /projects/install/rhel6-x86_64/ACME/AlbanyTrilinos/Albany/build/install
+
+
-O2
-O2
@@ -823,6 +834,8 @@ for mct, etc.
-O2
-O2
+ $(NETCDFROOT)
+ $(PNETCDFROOT)
--host=Linux
lustre
$(shell nf-config --flibs)
diff --git a/cime/cime_config/acme/machines/config_machines.xml b/cime/cime_config/acme/machines/config_machines.xml
index 59be75cd165e..4408d9d2cdad 100644
--- a/cime/cime_config/acme/machines/config_machines.xml
+++ b/cime/cime_config/acme/machines/config_machines.xml
@@ -508,7 +508,7 @@
sonproxy.sandia.gov:80
acme_developer
LINUX
- gnu
+ gnu,intel
openmpi,mpi-serial
$ENV{HOME}/acme/scratch
$CIME_OUTPUT_ROOT/$CASE/run
@@ -546,7 +546,8 @@
sems-env
sems-git
sems-python/2.7.9
- sems-gcc/5.3.0
+ sems-gcc/5.3.0
+ sems-intel/16.0.3
sems-openmpi/1.8.7
sems-cmake/2.8.12
sems-netcdf/4.3.2/parallel
@@ -1461,7 +1462,7 @@
subversion
subversion/1.8.3
cmake
- cmake/2.8.10.2
+ cmake3/3.6.0
@@ -1527,25 +1528,28 @@
- cray-netcdf/4.4.0
+ cray-netcdf/4.4.1.1
- cray-netcdf-hdf5parallel/4.4.0
+ cray-netcdf-hdf5parallel/4.4.1.1
cray-parallel-netcdf/1.7.0
-
$COMPILER
$MPILIB
- $CESM_REPO
1
1
1
128M
+ /opt/cray/netcdf-hdf5parallel/4.4.1.1/PGI/15.3/
+ /opt/cray/parallel-netcdf/1.7.0/PGI/15.3
+ /opt/cray/netcdf-hdf5parallel/4.4.1.1/PGI/15.3/
+ /opt/cray/parallel-netcdf/1.7.0/PGI/15.3
+ /opt/cray/netcdf-hdf5parallel/4.4.1.1/INTEL/15.0/
+ /opt/cray/parallel-netcdf/1.7.0/INTEL/15.0
+ /opt/cray/netcdf-hdf5parallel/4.4.1.1/CRAY/8.3/
+ /opt/cray/parallel-netcdf/1.7.0/CRAY/8.3
128M
diff --git a/cime/externals/pio1/pio/CMakeLists.txt b/cime/externals/pio1/pio/CMakeLists.txt
index ae07c36cdd29..3b10c1a2cbda 100644
--- a/cime/externals/pio1/pio/CMakeLists.txt
+++ b/cime/externals/pio1/pio/CMakeLists.txt
@@ -30,6 +30,8 @@ endif()
# Netcdf is required
+SET (CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../pio2/cmake" ${CMAKE_MODULE_PATH})
+
#SET (NETCDF_FIND_COMPONENTS F90)
FIND_PACKAGE(NetCDF "4.3.3" COMPONENTS C Fortran)
IF (${NetCDF_Fortran_FOUND})
diff --git a/cime/utils/python/CIME/BuildTools/configure.py b/cime/utils/python/CIME/BuildTools/configure.py
index 3d53f8938b91..b610b087aa3c 100644
--- a/cime/utils/python/CIME/BuildTools/configure.py
+++ b/cime/utils/python/CIME/BuildTools/configure.py
@@ -37,7 +37,7 @@ def configure(machobj, output_dir, macros_format, compiler, mpilib, debug, sysos
"""
# Macros generation.
suffixes = {'Makefile': 'make', 'CMake': 'cmake'}
- macro_maker = Compilers(machobj)
+ macro_maker = Compilers(machobj, compiler=compiler, mpilib=mpilib)
for form in macros_format:
out_file_name = os.path.join(output_dir,"Macros."+suffixes[form])
macro_maker.write_macros_file(macros_file=out_file_name, output_format=suffixes[form])
diff --git a/cime/utils/python/CIME/aprun.py b/cime/utils/python/CIME/aprun.py
new file mode 100755
index 000000000000..25b4f3c13150
--- /dev/null
+++ b/cime/utils/python/CIME/aprun.py
@@ -0,0 +1,133 @@
+"""
+Aprun is far too complex to handle purely through XML. We need python
+code to compute and assemble aprun commands.
+"""
+
+from CIME.XML.standard_module_setup import *
+
+import math
+
+logger = logging.getLogger(__name__)
+
+###############################################################################
+def _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids,
+ max_tasks_per_node, pes_per_node,
+ pio_numtasks, pio_async_interface,
+ compiler, machine, run_exe):
+###############################################################################
+ """
+ No one really understands this code, but we can at least test it.
+
+ >>> ntasks = [512, 675, 168, 512, 128, 168, 168, 512, 1]
+ >>> nthreads = [2, 2, 2, 2, 4, 2, 2, 2, 1]
+ >>> rootpes = [0, 0, 512, 0, 680, 512, 512, 0, 0]
+ >>> pstrids = [1, 1, 1, 1, 1, 1, 1, 1, 1]
+ >>> max_tasks_per_node = 16
+ >>> pes_per_node = 16
+ >>> pio_numtasks = -1
+ >>> pio_async_interface = False
+ >>> compiler = "pgi"
+ >>> machine = "titan"
+ >>> run_exe = "acme.exe"
+ >>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
+ 'aprun -S 4 -n 680 -N 8 -d 2 acme.exe : -S 2 -n 128 -N 4 -d 4 acme.exe '
+ >>> compiler = "intel"
+ >>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
+ 'aprun -S 4 -cc numa_node -n 680 -N 8 -d 2 acme.exe : -S 2 -cc numa_node -n 128 -N 4 -d 4 acme.exe '
+ """
+ max_tasks_per_node = 1 if max_tasks_per_node < 1 else max_tasks_per_node
+
+ total_tasks = 0
+ for ntask, rootpe, pstrid in zip(ntasks, rootpes, pstrids):
+ tt = rootpe + (ntask - 1) * pstrid + 1
+ total_tasks = max(tt, total_tasks)
+
+ # Check if we need to add pio's tasks to the total task count
+ if pio_async_interface:
+ total_tasks += pio_numtasks if pio_numtasks > 0 else pes_per_node
+
+ # Compute max threads for each mpi task
+ maxt = [0] * total_tasks
+ for ntask, nthrd, rootpe, pstrid in zip(ntasks, nthreads, rootpes, pstrids):
+ c2 = 0
+ while c2 < ntask:
+ s = rootpe + c2 * pstrid
+ if nthrd > maxt[s]:
+ maxt[s] = nthrd
+
+ c2 += 1
+
+ logger.info("total tasks is: %s" % total_tasks)
+
+ # make sure all maxt values at least 1
+ for c1 in xrange(0, total_tasks):
+ if maxt[c1] < 1:
+ maxt[c1] = 1
+
+ # Compute task and thread settings for batch commands
+ tasks_per_node, task_count, thread_count, max_thread_count, aprun = \
+ 0, 1, maxt[0], maxt[0], "aprun"
+ for c1 in xrange(1, total_tasks):
+ if maxt[c1] != thread_count:
+ tasks_per_node = min(pes_per_node, max_tasks_per_node / thread_count)
+
+ tasks_per_node = min(task_count, tasks_per_node)
+
+ # Compute for every subset
+ task_per_numa = int(math.ceil(tasks_per_node / 2.0))
+ # Option for Titan
+ if machine == "titan" and tasks_per_node > 1:
+ aprun += " -S %d" % task_per_numa
+ if compiler == "intel":
+ aprun += " -cc numa_node"
+
+ aprun += " -n %d -N %d -d %d %s :" % (task_count, tasks_per_node, thread_count, run_exe)
+
+ thread_count = maxt[c1]
+ max_thread_count = max(max_thread_count, maxt[c1])
+ task_count = 1
+
+ else:
+ task_count += 1
+
+ if pes_per_node > 0:
+ tasks_per_node = min(pes_per_node, max_tasks_per_node / thread_count)
+ else:
+ tasks_per_node = max_tasks_per_node / thread_count
+
+ tasks_per_node = min(task_count, tasks_per_node)
+
+ task_per_numa = int(math.ceil(tasks_per_node / 2.0))
+
+ # Special option for Titan with intel compiler
+ if machine == "titan" and tasks_per_node > 1:
+ aprun += " -S %d" % task_per_numa
+ if compiler == "intel":
+ aprun += " -cc numa_node"
+
+ aprun += " -n %d -N %d -d %d %s " % (task_count, tasks_per_node, thread_count, run_exe)
+
+ return aprun
+
+###############################################################################
+def get_aprun_cmd_for_case(case, run_exe):
+###############################################################################
+ """
+ Given a case, construct and return the aprun command
+ """
+ models = case.get_values("COMP_CLASSES")
+ ntasks, nthreads, rootpes, pstrids = [], [], [], []
+ for model in models:
+ model = "CPL" if model == "DRV" else model
+ for the_list, item_name in zip([ntasks, nthreads, rootpes, pstrids],
+ ["NTASKS", "NTHRDS", "ROOTPE", "PSTRID"]):
+ the_list.append(case.get_value("_".join([item_name, model])))
+
+ return _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids,
+ case.get_value("MAX_TASKS_PER_NODE"),
+ case.get_value("PES_PER_NODE"),
+ case.get_value("PIO_NUMTASKS"),
+ case.get_value("PIO_ASYNC_INTERFACE"),
+ case.get_value("COMPILER"),
+ case.get_value("MACH"),
+ run_exe)
diff --git a/cime/utils/python/CIME/case.py b/cime/utils/python/CIME/case.py
index fc6b1542f0db..a4d49aaa5465 100644
--- a/cime/utils/python/CIME/case.py
+++ b/cime/utils/python/CIME/case.py
@@ -33,6 +33,7 @@
from CIME.XML.env_batch import EnvBatch
from CIME.user_mod_support import apply_user_mods
from CIME.case_setup import case_setup
+from CIME.aprun import get_aprun_cmd_for_case
logger = logging.getLogger(__name__)
@@ -98,13 +99,14 @@ def __init__(self, case_root=None, read_only=True):
self.thread_count = None
+ self.total_tasks = None
self.tasks_per_node = None
self.num_nodes = None
self.tasks_per_numa = None
self.cores_per_task = None
# check if case has been configured and if so initialize derived
if self.get_value("CASEROOT") is not None:
- self.initialize_derived_attributes()
+ self._initialize_derived_attributes()
def check_if_comp_var(self, vid):
@@ -117,20 +119,20 @@ def check_if_comp_var(self, vid):
return vid, comp, iscompvar
return vid, comp, iscompvar
- def initialize_derived_attributes(self):
+ def _initialize_derived_attributes(self):
"""
These are derived variables which can be used in the config_* files
for variable substitution using the {{ var }} syntax
"""
env_mach_pes = self.get_env("mach_pes")
comp_classes = self.get_values("COMP_CLASSES")
- total_tasks = env_mach_pes.get_total_tasks(comp_classes)
pes_per_node = self.get_value("PES_PER_NODE")
+ self.total_tasks = env_mach_pes.get_total_tasks(comp_classes)
self.thread_count = env_mach_pes.get_max_thread_count(comp_classes)
- self.tasks_per_node = env_mach_pes.get_tasks_per_node(total_tasks, self.thread_count)
- logger.debug("total_tasks %s thread_count %s"%(total_tasks, self.thread_count))
- self.num_nodes = env_mach_pes.get_total_nodes(total_tasks, self.thread_count)
+ self.tasks_per_node = env_mach_pes.get_tasks_per_node(self.total_tasks, self.thread_count)
+ logger.debug("total_tasks %s thread_count %s"%(self.total_tasks, self.thread_count))
+ self.num_nodes = env_mach_pes.get_total_nodes(self.total_tasks, self.thread_count)
self.tasks_per_numa = int(math.ceil(self.tasks_per_node / 2.0))
smt_factor = max(1,int(self.get_value("MAX_TASKS_PER_NODE") / pes_per_node))
@@ -138,8 +140,6 @@ def initialize_derived_attributes(self):
threads_per_core = 1 if (threads_per_node <= pes_per_node) else smt_factor
self.cores_per_task = self.thread_count / threads_per_core
- return total_tasks
-
# Define __enter__ and __exit__ so that we can use this as a context manager
# and force a flush on exit.
def __enter__(self):
@@ -802,10 +802,10 @@ def configure(self, compset_name, grid_name, machine_name=None,
if test:
self.set_value("TEST",True)
- total_tasks = self.initialize_derived_attributes()
+ self._initialize_derived_attributes()
# Make sure that parallel IO is not specified if total_tasks==1
- if total_tasks == 1:
+ if self.total_tasks == 1:
for compclass in self._component_classes:
key = "PIO_TYPENAME_%s"%compclass
pio_typename = self.get_value(key)
@@ -813,7 +813,7 @@ def configure(self, compset_name, grid_name, machine_name=None,
self.set_value(key, "netcdf")
# Set TOTAL_CORES
- self.set_value("TOTAL_CORES", total_tasks * self.cores_per_task )
+ self.set_value("TOTAL_CORES", self.total_tasks * self.cores_per_task )
def get_compset_var_settings(self):
compset_obj = Compsets(infile=self.get_value("COMPSETS_SPEC_FILE"))
@@ -1091,20 +1091,17 @@ def get_mpirun_cmd(self, job="case.run"):
}
executable, args = env_mach_specific.get_mpirun(self, mpi_attribs, job=job)
- # special case for aprun if using < 1 full node
- if executable == "aprun":
- totalpes = self.get_value("TOTALPES")
- pes_per_node = self.get_value("PES_PER_NODE")
- if totalpes < pes_per_node:
- args["tasks_per_node"] = "-N "+str(totalpes)
-
- mpi_arg_string = " ".join(args.values())
+ # special case for aprun
+ if executable == "aprun":
+ return get_aprun_cmd_for_case(self, run_exe) + " " + run_misc_suffix
+ else:
+ mpi_arg_string = " ".join(args.values())
- if self.get_value("BATCH_SYSTEM") == "cobalt":
- mpi_arg_string += " : "
+ if self.get_value("BATCH_SYSTEM") == "cobalt":
+ mpi_arg_string += " : "
- return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix)
+ return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix)
def set_model_version(self, model):
version = "unknown"