Merge branch 'jgfouca/cime/fixes_for_titan' (PR #1321)

Restore Titan support after CIME5.2 merge. A number of issues prevented successful create, set-up, build and run phases on Titan after the merge of CIME5.2. Even with these resolved, some functionality was lost, e.g. support for using a different number of MPI tasks per node for nonoverlapping components and support for more than one compiler. Finally, support for netcdf4 files was missing when using PGI. 1) Remove CESM_REPO. 2) Add better support for netcdf and pnetcdf in config*.xml . 3) Use findNetcdf.cmake from pio2 in pio1. 4) Use newer cmake, required by pio2. 5) Fix aprun command construction. 6) Fix Macro maker, to enable multiple compiler support. 7) Update modules and paths to enable netcdf4 support and multiple compiler support. Testing has focused on production-like cases, including low resolution water cycle and high resolution GMPAS, both with and without the v3 MPAS meshes and with a number of different PE layouts. acme_developer is not yet working, for reasons distinct from the pull request, and without these changes, progress will not be possible on getting the acme_developer to work again. A unit test was added for the updated aprun command construction capability, and this was passed. * jgfouca/cime/fixes_for_titan: Fix type in melvin config change update modules to support netcdf4 files on Titan Fix macro maker Make netcdfroot less hardcoded for titan Fix spelling of NTHRDS Remove outdated comment Add test for intel compiler Add unit test Re-introduce task_maker algorithm to compute aprun Fixing walltime for titan debug queue Fix aprun command construction Numerous titan fixes.
ESMCI · Mar 28, 2017 · b1eeb1e · b1eeb1e
2 parents a6697b5 + fc9d6d0
commit b1eeb1e
Show file tree

Hide file tree

Showing 7 changed files with 184 additions and 35 deletions.
diff --git a/cime_config/acme/machines/config_batch.xml b/cime_config/acme/machines/config_batch.xml
@@ -329,8 +329,8 @@
        <directive>-l nodes={{ num_nodes }}</directive>
      </directives>
      <queues>
-       <queue walltimemax="24:00" default="true">batch</queue>
-       <queue walltimemax="24:00">debug</queue>
+       <queue walltimemax="24:00:00" default="true">batch</queue>
+       <queue walltimemax="01:00:00" jobmin="0" jobmax="64">debug</queue>
      </queues>
    </batch_system>
 

diff --git a/cime_config/acme/machines/config_compilers.xml b/cime_config/acme/machines/config_compilers.xml
@@ -644,6 +644,17 @@ for mct, etc.
   <ALBANY_PATH>/projects/install/rhel6-x86_64/ACME/AlbanyTrilinos/Albany/build/install</ALBANY_PATH>
 </compiler>
 
+<compiler COMPILER="intel" MACH="melvin">
+  <ADD_FFLAGS DEBUG="FALSE"> -O2  </ADD_FFLAGS>
+  <ADD_CFLAGS DEBUG="FALSE"> -O2  </ADD_CFLAGS>
+  <CONFIG_ARGS> --host=Linux </CONFIG_ARGS>
+  <NETCDF_PATH>$(NETCDFROOT)</NETCDF_PATH>
+  <PNETCDF_PATH>$(PNETCDFROOT)</PNETCDF_PATH>
+  <ADD_SLIBS> $(shell $(NETCDF_PATH)/bin/nf-config --flibs) -lblas -llapack</ADD_SLIBS>
+  <CXX_LIBS>-lstdc++ -lmpi_cxx</CXX_LIBS>
+  <ALBANY_PATH>/projects/install/rhel6-x86_64/ACME/AlbanyTrilinos/Albany/build/install</ALBANY_PATH>
+</compiler>
+
 <compiler COMPILER="gnu" MACH="sandia-srn-sems">
   <ADD_FFLAGS DEBUG="FALSE"> -O2  </ADD_FFLAGS>
   <ADD_CFLAGS DEBUG="FALSE"> -O2  </ADD_CFLAGS>
@@ -823,6 +834,8 @@ for mct, etc.
 <compiler COMPILER="pgi" MACH="titan">
   <ADD_CFLAGS DEBUG="FALSE"> -O2 </ADD_CFLAGS>
   <ADD_FFLAGS DEBUG="FALSE"> -O2 </ADD_FFLAGS>
+  <NETCDF_PATH>$(NETCDFROOT)</NETCDF_PATH>
+  <PNETCDF_PATH>$(PNETCDFROOT)</PNETCDF_PATH>
   <CONFIG_ARGS> --host=Linux </CONFIG_ARGS>
   <PIO_FILESYSTEM_HINTS>lustre</PIO_FILESYSTEM_HINTS>
   <ADD_SLIBS> $(shell nf-config --flibs) </ADD_SLIBS>

diff --git a/cime_config/acme/machines/config_machines.xml b/cime_config/acme/machines/config_machines.xml
@@ -508,7 +508,7 @@
     <PROXY>sonproxy.sandia.gov:80</PROXY>
     <TESTS>acme_developer</TESTS>
     <OS>LINUX</OS>
-    <COMPILERS>gnu</COMPILERS>
+    <COMPILERS>gnu,intel</COMPILERS>
     <MPILIBS>openmpi,mpi-serial</MPILIBS>
     <CIME_OUTPUT_ROOT>$ENV{HOME}/acme/scratch</CIME_OUTPUT_ROOT>
     <RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR>
@@ -546,7 +546,8 @@
         <command name="load">sems-env</command>
         <command name="load">sems-git</command>
         <command name="load">sems-python/2.7.9</command>
-        <command name="load">sems-gcc/5.3.0</command>
+        <command name="load" compiler="gnu">sems-gcc/5.3.0</command>
+        <command name="load" compiler="intel">sems-intel/16.0.3</command>
         <command name="load">sems-openmpi/1.8.7</command>
         <command name="load">sems-cmake/2.8.12</command>
         <command name="load">sems-netcdf/4.3.2/parallel</command>
@@ -1461,7 +1462,7 @@
         <command name="unload">subversion</command>
         <command name="load">subversion/1.8.3</command>
         <command name="unload">cmake</command>
-        <command name="load">cmake/2.8.10.2</command>
+        <command name="load">cmake3/3.6.0</command>
       </modules>
 
       <modules compiler="pgiacc"> <!-- changing pgi_acc to pgiacc -->
@@ -1527,25 +1528,28 @@
       </modules>
       <!-- mpi lib settings -->
       <modules mpilib="mpi-serial">
-        <command name="load">cray-netcdf/4.4.0</command>
+        <command name="load">cray-netcdf/4.4.1.1</command>
       </modules>
       <modules mpilib="!mpi-serial">
-        <command name="load">cray-netcdf-hdf5parallel/4.4.0</command>
+        <command name="load">cray-netcdf-hdf5parallel/4.4.1.1</command>
         <command name="load">cray-parallel-netcdf/1.7.0</command>
       </modules>
       <!-- Default -->
-      <!--
-       set CESM_REPO = `./xmlquery CCSM_REPOTAG -value`
-       if($status == 0) then
-      -->
       <environment_variables>
         <env name="COMPILER">$COMPILER</env>
         <env name="MPILIB">$MPILIB</env>
-        <env name="CESM_REPO">$CESM_REPO</env>
         <env name="MPICH_ENV_DISPLAY">1</env>
         <env name="MPICH_VERSION_DISPLAY">1</env>
         <env name="MPICH_CPUMASK_DISPLAY">1</env>
         <env name="MPSTKZ">128M</env>
+	<env name="NETCDFROOT" compiler="pgi">/opt/cray/netcdf-hdf5parallel/4.4.1.1/PGI/15.3/</env>
+	<env name="PNETCDFROOT" compiler="pgi" mpilib="!mpi-serial">/opt/cray/parallel-netcdf/1.7.0/PGI/15.3</env>
+	<env name="NETCDFROOT" compiler="pgiacc">/opt/cray/netcdf-hdf5parallel/4.4.1.1/PGI/15.3/</env>
+	<env name="PNETCDFROOT" compiler="pgiacc" mpilib="!mpi-serial">/opt/cray/parallel-netcdf/1.7.0/PGI/15.3</env>
+	<env name="NETCDFROOT" compiler="intel">/opt/cray/netcdf-hdf5parallel/4.4.1.1/INTEL/15.0/</env>
+	<env name="PNETCDFROOT" compiler="intel" mpilib="!mpi-serial">/opt/cray/parallel-netcdf/1.7.0/INTEL/15.0</env>
+	<env name="NETCDFROOT" compiler="cray">/opt/cray/netcdf-hdf5parallel/4.4.1.1/CRAY/8.3/</env>
+	<env name="PNETCDFROOT" compiler="cray" mpilib="!mpi-serial">/opt/cray/parallel-netcdf/1.7.0/CRAY/8.3</env>
         <env name="OMP_STACKSIZE">128M</env>
       </environment_variables>
 

diff --git a/externals/pio1/pio/CMakeLists.txt b/externals/pio1/pio/CMakeLists.txt
@@ -30,6 +30,8 @@ endif()
 
 # Netcdf is required
 
+SET (CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../pio2/cmake" ${CMAKE_MODULE_PATH})
+
 #SET (NETCDF_FIND_COMPONENTS F90)
 FIND_PACKAGE(NetCDF "4.3.3" COMPONENTS C Fortran)
 IF (${NetCDF_Fortran_FOUND})

diff --git a/utils/python/CIME/BuildTools/configure.py b/utils/python/CIME/BuildTools/configure.py
@@ -37,7 +37,7 @@ def configure(machobj, output_dir, macros_format, compiler, mpilib, debug, sysos
     """
     # Macros generation.
     suffixes = {'Makefile': 'make', 'CMake': 'cmake'}
-    macro_maker = Compilers(machobj)
+    macro_maker = Compilers(machobj, compiler=compiler, mpilib=mpilib)
     for form in macros_format:
         out_file_name = os.path.join(output_dir,"Macros."+suffixes[form])
         macro_maker.write_macros_file(macros_file=out_file_name, output_format=suffixes[form])

diff --git a/utils/python/CIME/aprun.py b/utils/python/CIME/aprun.py
@@ -0,0 +1,133 @@
+"""
+Aprun is far too complex to handle purely through XML. We need python
+code to compute and assemble aprun commands.
+"""
+
+from CIME.XML.standard_module_setup import *
+
+import math
+
+logger = logging.getLogger(__name__)
+
+###############################################################################
+def _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids,
+                                 max_tasks_per_node, pes_per_node,
+                                 pio_numtasks, pio_async_interface,
+                                 compiler, machine, run_exe):
+###############################################################################
+    """
+    No one really understands this code, but we can at least test it.
+
+    >>> ntasks = [512, 675, 168, 512, 128, 168, 168, 512, 1]
+    >>> nthreads = [2, 2, 2, 2, 4, 2, 2, 2, 1]
+    >>> rootpes = [0, 0, 512, 0, 680, 512, 512, 0, 0]
+    >>> pstrids = [1, 1, 1, 1, 1, 1, 1, 1, 1]
+    >>> max_tasks_per_node = 16
+    >>> pes_per_node = 16
+    >>> pio_numtasks = -1
+    >>> pio_async_interface = False
+    >>> compiler = "pgi"
+    >>> machine = "titan"
+    >>> run_exe = "acme.exe"
+    >>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
+    'aprun -S 4 -n 680 -N 8 -d 2 acme.exe : -S 2 -n 128 -N 4 -d 4 acme.exe '
+    >>> compiler = "intel"
+    >>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
+    'aprun -S 4 -cc numa_node -n 680 -N 8 -d 2 acme.exe : -S 2 -cc numa_node -n 128 -N 4 -d 4 acme.exe '
+    """
+    max_tasks_per_node = 1 if max_tasks_per_node < 1 else max_tasks_per_node
+
+    total_tasks = 0
+    for ntask, rootpe, pstrid in zip(ntasks, rootpes, pstrids):
+        tt = rootpe + (ntask - 1) * pstrid + 1
+        total_tasks = max(tt, total_tasks)
+
+    # Check if we need to add pio's tasks to the total task count
+    if pio_async_interface:
+        total_tasks += pio_numtasks if pio_numtasks > 0 else pes_per_node
+
+    # Compute max threads for each mpi task
+    maxt = [0] * total_tasks
+    for ntask, nthrd, rootpe, pstrid in zip(ntasks, nthreads, rootpes, pstrids):
+        c2 = 0
+        while c2 < ntask:
+            s = rootpe + c2 * pstrid
+            if nthrd > maxt[s]:
+                maxt[s] = nthrd
+
+            c2 += 1
+
+    logger.info("total tasks is: %s" % total_tasks)
+
+    # make sure all maxt values at least 1
+    for c1 in xrange(0, total_tasks):
+        if maxt[c1] < 1:
+            maxt[c1] = 1
+
+    # Compute task and thread settings for batch commands
+    tasks_per_node, task_count, thread_count, max_thread_count, aprun = \
+        0, 1, maxt[0], maxt[0], "aprun"
+    for c1 in xrange(1, total_tasks):
+        if maxt[c1] != thread_count:
+            tasks_per_node = min(pes_per_node, max_tasks_per_node / thread_count)
+
+            tasks_per_node = min(task_count, tasks_per_node)
+
+            # Compute for every subset
+            task_per_numa = int(math.ceil(tasks_per_node / 2.0))
+            # Option for Titan
+            if machine == "titan" and tasks_per_node > 1:
+                aprun += " -S %d" % task_per_numa
+                if compiler == "intel":
+                    aprun += " -cc numa_node"
+
+            aprun += " -n %d -N %d -d %d %s :" % (task_count, tasks_per_node, thread_count, run_exe)
+
+            thread_count = maxt[c1]
+            max_thread_count = max(max_thread_count, maxt[c1])
+            task_count = 1
+
+        else:
+            task_count += 1
+
+    if pes_per_node > 0:
+        tasks_per_node = min(pes_per_node, max_tasks_per_node / thread_count)
+    else:
+        tasks_per_node = max_tasks_per_node / thread_count
+
+    tasks_per_node = min(task_count, tasks_per_node)
+
+    task_per_numa = int(math.ceil(tasks_per_node / 2.0))
+
+    # Special option for Titan with intel compiler
+    if machine == "titan" and tasks_per_node > 1:
+        aprun += " -S %d" % task_per_numa
+        if compiler == "intel":
+            aprun += " -cc numa_node"
+
+    aprun += " -n %d -N %d -d %d %s " % (task_count, tasks_per_node, thread_count, run_exe)
+
+    return aprun
+
+###############################################################################
+def get_aprun_cmd_for_case(case, run_exe):
+###############################################################################
+    """
+    Given a case, construct and return the aprun command
+    """
+    models = case.get_values("COMP_CLASSES")
+    ntasks, nthreads, rootpes, pstrids = [], [], [], []
+    for model in models:
+        model = "CPL" if model == "DRV" else model
+        for the_list, item_name in zip([ntasks, nthreads, rootpes, pstrids],
+                                       ["NTASKS", "NTHRDS", "ROOTPE", "PSTRID"]):
+            the_list.append(case.get_value("_".join([item_name, model])))
+
+    return _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids,
+                                        case.get_value("MAX_TASKS_PER_NODE"),
+                                        case.get_value("PES_PER_NODE"),
+                                        case.get_value("PIO_NUMTASKS"),
+                                        case.get_value("PIO_ASYNC_INTERFACE"),
+                                        case.get_value("COMPILER"),
+                                        case.get_value("MACH"),
+                                        run_exe)
diff --git a/utils/python/CIME/case.py b/utils/python/CIME/case.py
@@ -33,6 +33,7 @@
 from CIME.XML.env_batch             import EnvBatch
 from CIME.user_mod_support          import apply_user_mods
 from CIME.case_setup import case_setup
+from CIME.aprun import get_aprun_cmd_for_case
 
 logger = logging.getLogger(__name__)
 
@@ -98,13 +99,14 @@ def __init__(self, case_root=None, read_only=True):
 
 
         self.thread_count = None
+        self.total_tasks = None
         self.tasks_per_node = None
         self.num_nodes = None
         self.tasks_per_numa = None
         self.cores_per_task = None
         # check if case has been configured and if so initialize derived
         if self.get_value("CASEROOT") is not None:
-            self.initialize_derived_attributes()
+            self._initialize_derived_attributes()
 
 
     def check_if_comp_var(self, vid):
@@ -117,29 +119,27 @@ def check_if_comp_var(self, vid):
                 return vid, comp, iscompvar
         return vid, comp, iscompvar
 
-    def initialize_derived_attributes(self):
+    def _initialize_derived_attributes(self):
         """
         These are derived variables which can be used in the config_* files
         for variable substitution using the {{ var }} syntax
         """
         env_mach_pes = self.get_env("mach_pes")
         comp_classes = self.get_values("COMP_CLASSES")
-        total_tasks  = env_mach_pes.get_total_tasks(comp_classes)
         pes_per_node = self.get_value("PES_PER_NODE")
 
+        self.total_tasks = env_mach_pes.get_total_tasks(comp_classes)
         self.thread_count = env_mach_pes.get_max_thread_count(comp_classes)
-        self.tasks_per_node = env_mach_pes.get_tasks_per_node(total_tasks, self.thread_count)
-        logger.debug("total_tasks %s thread_count %s"%(total_tasks, self.thread_count))
-        self.num_nodes = env_mach_pes.get_total_nodes(total_tasks, self.thread_count)
+        self.tasks_per_node = env_mach_pes.get_tasks_per_node(self.total_tasks, self.thread_count)
+        logger.debug("total_tasks %s thread_count %s"%(self.total_tasks, self.thread_count))
+        self.num_nodes = env_mach_pes.get_total_nodes(self.total_tasks, self.thread_count)
         self.tasks_per_numa = int(math.ceil(self.tasks_per_node / 2.0))
         smt_factor = max(1,int(self.get_value("MAX_TASKS_PER_NODE") / pes_per_node))
 
         threads_per_node = self.tasks_per_node * self.thread_count
         threads_per_core = 1 if (threads_per_node <= pes_per_node) else smt_factor
         self.cores_per_task = self.thread_count / threads_per_core
 
-        return total_tasks
-
     # Define __enter__ and __exit__ so that we can use this as a context manager
     # and force a flush on exit.
     def __enter__(self):
@@ -802,18 +802,18 @@ def configure(self, compset_name, grid_name, machine_name=None,
         if test:
             self.set_value("TEST",True)
 
-        total_tasks = self.initialize_derived_attributes()
+        self._initialize_derived_attributes()
 
         # Make sure that parallel IO is not specified if total_tasks==1
-        if total_tasks == 1:
+        if self.total_tasks == 1:
             for compclass in self._component_classes:
                 key = "PIO_TYPENAME_%s"%compclass
                 pio_typename = self.get_value(key)
                 if pio_typename in ("pnetcdf", "netcdf4p"):
                     self.set_value(key, "netcdf")
 
         # Set TOTAL_CORES
-        self.set_value("TOTAL_CORES", total_tasks * self.cores_per_task )
+        self.set_value("TOTAL_CORES", self.total_tasks * self.cores_per_task )
 
     def get_compset_var_settings(self):
         compset_obj = Compsets(infile=self.get_value("COMPSETS_SPEC_FILE"))
@@ -1091,20 +1091,17 @@ def get_mpirun_cmd(self, job="case.run"):
             }
 
         executable, args = env_mach_specific.get_mpirun(self, mpi_attribs, job=job)
-        # special case for aprun if using < 1 full node
-        if executable == "aprun":
-            totalpes = self.get_value("TOTALPES")
-            pes_per_node = self.get_value("PES_PER_NODE")
-            if totalpes < pes_per_node:
-                args["tasks_per_node"] = "-N "+str(totalpes)
-
-        mpi_arg_string = " ".join(args.values())
 
+        # special case for aprun
+        if executable == "aprun":
+            return get_aprun_cmd_for_case(self, run_exe) + " " + run_misc_suffix
+        else:
+            mpi_arg_string = " ".join(args.values())
 
-        if self.get_value("BATCH_SYSTEM") == "cobalt":
-            mpi_arg_string += " : "
+            if self.get_value("BATCH_SYSTEM") == "cobalt":
+                mpi_arg_string += " : "
 
-        return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix)
+            return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix)
 
     def set_model_version(self, model):
         version = "unknown"