diff --git a/cime/cime_config/acme/machines/config_batch.xml b/cime/cime_config/acme/machines/config_batch.xml
index d5e57c65192e..aa9abfb4e3a5 100644
--- a/cime/cime_config/acme/machines/config_batch.xml
+++ b/cime/cime_config/acme/machines/config_batch.xml
@@ -329,8 +329,8 @@
        <directive>-l nodes={{ num_nodes }}</directive>
      </directives>
      <queues>
-       <queue walltimemax="24:00" default="true">batch</queue>
-       <queue walltimemax="24:00">debug</queue>
+       <queue walltimemax="24:00:00" default="true">batch</queue>
+       <queue walltimemax="01:00:00" jobmin="0" jobmax="64">debug</queue>
      </queues>
    </batch_system>
 
diff --git a/cime/cime_config/acme/machines/config_compilers.xml b/cime/cime_config/acme/machines/config_compilers.xml
index 97d61978c5a7..94e4a123e92e 100644
--- a/cime/cime_config/acme/machines/config_compilers.xml
+++ b/cime/cime_config/acme/machines/config_compilers.xml
@@ -644,6 +644,17 @@ for mct, etc.
   <ALBANY_PATH>/projects/install/rhel6-x86_64/ACME/AlbanyTrilinos/Albany/build/install</ALBANY_PATH>
 </compiler>
 
+<compiler COMPILER="intel" MACH="melvin">
+  <ADD_FFLAGS DEBUG="FALSE"> -O2  </ADD_FFLAGS>
+  <ADD_CFLAGS DEBUG="FALSE"> -O2  </ADD_CFLAGS>
+  <CONFIG_ARGS> --host=Linux </CONFIG_ARGS>
+  <NETCDF_PATH>$(NETCDFROOT)</NETCDF_PATH>
+  <PNETCDF_PATH>$(PNETCDFROOT)</PNETCDF_PATH>
+  <ADD_SLIBS> $(shell $(NETCDF_PATH)/bin/nf-config --flibs) -lblas -llapack</ADD_SLIBS>
+  <CXX_LIBS>-lstdc++ -lmpi_cxx</CXX_LIBS>
+  <ALBANY_PATH>/projects/install/rhel6-x86_64/ACME/AlbanyTrilinos/Albany/build/install</ALBANY_PATH>
+</compiler>
+
 <compiler COMPILER="gnu" MACH="sandia-srn-sems">
   <ADD_FFLAGS DEBUG="FALSE"> -O2  </ADD_FFLAGS>
   <ADD_CFLAGS DEBUG="FALSE"> -O2  </ADD_CFLAGS>
@@ -823,6 +834,8 @@ for mct, etc.
 <compiler COMPILER="pgi" MACH="titan">
   <ADD_CFLAGS DEBUG="FALSE"> -O2 </ADD_CFLAGS>
   <ADD_FFLAGS DEBUG="FALSE"> -O2 </ADD_FFLAGS>
+  <NETCDF_PATH>$(NETCDFROOT)</NETCDF_PATH>
+  <PNETCDF_PATH>$(PNETCDFROOT)</PNETCDF_PATH>
   <CONFIG_ARGS> --host=Linux </CONFIG_ARGS>
   <PIO_FILESYSTEM_HINTS>lustre</PIO_FILESYSTEM_HINTS>
   <ADD_SLIBS> $(shell nf-config --flibs) </ADD_SLIBS>
diff --git a/cime/cime_config/acme/machines/config_machines.xml b/cime/cime_config/acme/machines/config_machines.xml
index 59be75cd165e..4408d9d2cdad 100644
--- a/cime/cime_config/acme/machines/config_machines.xml
+++ b/cime/cime_config/acme/machines/config_machines.xml
@@ -508,7 +508,7 @@
     <PROXY>sonproxy.sandia.gov:80</PROXY>
     <TESTS>acme_developer</TESTS>
     <OS>LINUX</OS>
-    <COMPILERS>gnu</COMPILERS>
+    <COMPILERS>gnu,intel</COMPILERS>
     <MPILIBS>openmpi,mpi-serial</MPILIBS>
     <CIME_OUTPUT_ROOT>$ENV{HOME}/acme/scratch</CIME_OUTPUT_ROOT>
     <RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR>
@@ -546,7 +546,8 @@
         <command name="load">sems-env</command>
         <command name="load">sems-git</command>
         <command name="load">sems-python/2.7.9</command>
-        <command name="load">sems-gcc/5.3.0</command>
+        <command name="load" compiler="gcc">sems-gcc/5.3.0</command>
+        <command name="load" compiler="intel">sems-intel/16.0.3</command>
         <command name="load">sems-openmpi/1.8.7</command>
         <command name="load">sems-cmake/2.8.12</command>
         <command name="load">sems-netcdf/4.3.2/parallel</command>
@@ -1461,7 +1462,7 @@
         <command name="unload">subversion</command>
         <command name="load">subversion/1.8.3</command>
         <command name="unload">cmake</command>
-        <command name="load">cmake/2.8.10.2</command>
+        <command name="load">cmake3/3.6.0</command>
       </modules>
 
       <modules compiler="pgiacc"> <!-- changing pgi_acc to pgiacc -->
@@ -1527,25 +1528,28 @@
       </modules>
       <!-- mpi lib settings -->
       <modules mpilib="mpi-serial">
-        <command name="load">cray-netcdf/4.4.0</command>
+        <command name="load">cray-netcdf/4.4.1.1</command>
       </modules>
       <modules mpilib="!mpi-serial">
-        <command name="load">cray-netcdf-hdf5parallel/4.4.0</command>
+        <command name="load">cray-netcdf-hdf5parallel/4.4.1.1</command>
         <command name="load">cray-parallel-netcdf/1.7.0</command>
       </modules>
       <!-- Default -->
-      <!--
-       set CESM_REPO = `./xmlquery CCSM_REPOTAG -value`
-       if($status == 0) then
-      -->
       <environment_variables>
         <env name="COMPILER">$COMPILER</env>
         <env name="MPILIB">$MPILIB</env>
-        <env name="CESM_REPO">$CESM_REPO</env>
         <env name="MPICH_ENV_DISPLAY">1</env>
         <env name="MPICH_VERSION_DISPLAY">1</env>
         <env name="MPICH_CPUMASK_DISPLAY">1</env>
         <env name="MPSTKZ">128M</env>
+	<env name="NETCDFROOT" compiler="pgi">/opt/cray/netcdf-hdf5parallel/4.4.1.1/PGI/15.3/</env>
+	<env name="PNETCDFROOT" compiler="pgi" mpilib="!mpi-serial">/opt/cray/parallel-netcdf/1.7.0/PGI/15.3</env>
+	<env name="NETCDFROOT" compiler="pgiacc">/opt/cray/netcdf-hdf5parallel/4.4.1.1/PGI/15.3/</env>
+	<env name="PNETCDFROOT" compiler="pgiacc" mpilib="!mpi-serial">/opt/cray/parallel-netcdf/1.7.0/PGI/15.3</env>
+	<env name="NETCDFROOT" compiler="intel">/opt/cray/netcdf-hdf5parallel/4.4.1.1/INTEL/15.0/</env>
+	<env name="PNETCDFROOT" compiler="intel" mpilib="!mpi-serial">/opt/cray/parallel-netcdf/1.7.0/INTEL/15.0</env>
+	<env name="NETCDFROOT" compiler="cray">/opt/cray/netcdf-hdf5parallel/4.4.1.1/CRAY/8.3/</env>
+	<env name="PNETCDFROOT" compiler="cray" mpilib="!mpi-serial">/opt/cray/parallel-netcdf/1.7.0/CRAY/8.3</env>
         <env name="OMP_STACKSIZE">128M</env>
       </environment_variables>
 
diff --git a/cime/externals/pio1/pio/CMakeLists.txt b/cime/externals/pio1/pio/CMakeLists.txt
index ae07c36cdd29..3b10c1a2cbda 100644
--- a/cime/externals/pio1/pio/CMakeLists.txt
+++ b/cime/externals/pio1/pio/CMakeLists.txt
@@ -30,6 +30,8 @@ endif()
 
 # Netcdf is required
 
+SET (CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../pio2/cmake" ${CMAKE_MODULE_PATH})
+
 #SET (NETCDF_FIND_COMPONENTS F90)
 FIND_PACKAGE(NetCDF "4.3.3" COMPONENTS C Fortran)
 IF (${NetCDF_Fortran_FOUND})
diff --git a/cime/utils/python/CIME/BuildTools/configure.py b/cime/utils/python/CIME/BuildTools/configure.py
index 3d53f8938b91..b610b087aa3c 100644
--- a/cime/utils/python/CIME/BuildTools/configure.py
+++ b/cime/utils/python/CIME/BuildTools/configure.py
@@ -37,7 +37,7 @@ def configure(machobj, output_dir, macros_format, compiler, mpilib, debug, sysos
     """
     # Macros generation.
     suffixes = {'Makefile': 'make', 'CMake': 'cmake'}
-    macro_maker = Compilers(machobj)
+    macro_maker = Compilers(machobj, compiler=compiler, mpilib=mpilib)
     for form in macros_format:
         out_file_name = os.path.join(output_dir,"Macros."+suffixes[form])
         macro_maker.write_macros_file(macros_file=out_file_name, output_format=suffixes[form])
diff --git a/cime/utils/python/CIME/aprun.py b/cime/utils/python/CIME/aprun.py
new file mode 100755
index 000000000000..25b4f3c13150
--- /dev/null
+++ b/cime/utils/python/CIME/aprun.py
@@ -0,0 +1,133 @@
+"""
+Aprun is far too complex to handle purely through XML. We need python
+code to compute and assemble aprun commands.
+"""
+
+from CIME.XML.standard_module_setup import *
+
+import math
+
+logger = logging.getLogger(__name__)
+
+###############################################################################
+def _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids,
+                                 max_tasks_per_node, pes_per_node,
+                                 pio_numtasks, pio_async_interface,
+                                 compiler, machine, run_exe):
+###############################################################################
+    """
+    No one really understands this code, but we can at least test it.
+
+    >>> ntasks = [512, 675, 168, 512, 128, 168, 168, 512, 1]
+    >>> nthreads = [2, 2, 2, 2, 4, 2, 2, 2, 1]
+    >>> rootpes = [0, 0, 512, 0, 680, 512, 512, 0, 0]
+    >>> pstrids = [1, 1, 1, 1, 1, 1, 1, 1, 1]
+    >>> max_tasks_per_node = 16
+    >>> pes_per_node = 16
+    >>> pio_numtasks = -1
+    >>> pio_async_interface = False
+    >>> compiler = "pgi"
+    >>> machine = "titan"
+    >>> run_exe = "acme.exe"
+    >>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
+    'aprun -S 4 -n 680 -N 8 -d 2 acme.exe : -S 2 -n 128 -N 4 -d 4 acme.exe '
+    >>> compiler = "intel"
+    >>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
+    'aprun -S 4 -cc numa_node -n 680 -N 8 -d 2 acme.exe : -S 2 -cc numa_node -n 128 -N 4 -d 4 acme.exe '
+    """
+    max_tasks_per_node = 1 if max_tasks_per_node < 1 else max_tasks_per_node
+
+    total_tasks = 0
+    for ntask, rootpe, pstrid in zip(ntasks, rootpes, pstrids):
+        tt = rootpe + (ntask - 1) * pstrid + 1
+        total_tasks = max(tt, total_tasks)
+
+    # Check if we need to add pio's tasks to the total task count
+    if pio_async_interface:
+        total_tasks += pio_numtasks if pio_numtasks > 0 else pes_per_node
+
+    # Compute max threads for each mpi task
+    maxt = [0] * total_tasks
+    for ntask, nthrd, rootpe, pstrid in zip(ntasks, nthreads, rootpes, pstrids):
+        c2 = 0
+        while c2 < ntask:
+            s = rootpe + c2 * pstrid
+            if nthrd > maxt[s]:
+                maxt[s] = nthrd
+
+            c2 += 1
+
+    logger.info("total tasks is: %s" % total_tasks)
+
+    # make sure all maxt values at least 1
+    for c1 in xrange(0, total_tasks):
+        if maxt[c1] < 1:
+            maxt[c1] = 1
+
+    # Compute task and thread settings for batch commands
+    tasks_per_node, task_count, thread_count, max_thread_count, aprun = \
+        0, 1, maxt[0], maxt[0], "aprun"
+    for c1 in xrange(1, total_tasks):
+        if maxt[c1] != thread_count:
+            tasks_per_node = min(pes_per_node, max_tasks_per_node / thread_count)
+
+            tasks_per_node = min(task_count, tasks_per_node)
+
+            # Compute for every subset
+            task_per_numa = int(math.ceil(tasks_per_node / 2.0))
+            # Option for Titan
+            if machine == "titan" and tasks_per_node > 1:
+                aprun += " -S %d" % task_per_numa
+                if compiler == "intel":
+                    aprun += " -cc numa_node"
+
+            aprun += " -n %d -N %d -d %d %s :" % (task_count, tasks_per_node, thread_count, run_exe)
+
+            thread_count = maxt[c1]
+            max_thread_count = max(max_thread_count, maxt[c1])
+            task_count = 1
+
+        else:
+            task_count += 1
+
+    if pes_per_node > 0:
+        tasks_per_node = min(pes_per_node, max_tasks_per_node / thread_count)
+    else:
+        tasks_per_node = max_tasks_per_node / thread_count
+
+    tasks_per_node = min(task_count, tasks_per_node)
+
+    task_per_numa = int(math.ceil(tasks_per_node / 2.0))
+
+    # Special option for Titan with intel compiler
+    if machine == "titan" and tasks_per_node > 1:
+        aprun += " -S %d" % task_per_numa
+        if compiler == "intel":
+            aprun += " -cc numa_node"
+
+    aprun += " -n %d -N %d -d %d %s " % (task_count, tasks_per_node, thread_count, run_exe)
+
+    return aprun
+
+###############################################################################
+def get_aprun_cmd_for_case(case, run_exe):
+###############################################################################
+    """
+    Given a case, construct and return the aprun command
+    """
+    models = case.get_values("COMP_CLASSES")
+    ntasks, nthreads, rootpes, pstrids = [], [], [], []
+    for model in models:
+        model = "CPL" if model == "DRV" else model
+        for the_list, item_name in zip([ntasks, nthreads, rootpes, pstrids],
+                                       ["NTASKS", "NTHRDS", "ROOTPE", "PSTRID"]):
+            the_list.append(case.get_value("_".join([item_name, model])))
+
+    return _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids,
+                                        case.get_value("MAX_TASKS_PER_NODE"),
+                                        case.get_value("PES_PER_NODE"),
+                                        case.get_value("PIO_NUMTASKS"),
+                                        case.get_value("PIO_ASYNC_INTERFACE"),
+                                        case.get_value("COMPILER"),
+                                        case.get_value("MACH"),
+                                        run_exe)
diff --git a/cime/utils/python/CIME/case.py b/cime/utils/python/CIME/case.py
index fc6b1542f0db..a4d49aaa5465 100644
--- a/cime/utils/python/CIME/case.py
+++ b/cime/utils/python/CIME/case.py
@@ -33,6 +33,7 @@
 from CIME.XML.env_batch             import EnvBatch
 from CIME.user_mod_support          import apply_user_mods
 from CIME.case_setup import case_setup
+from CIME.aprun import get_aprun_cmd_for_case
 
 logger = logging.getLogger(__name__)
 
@@ -98,13 +99,14 @@ def __init__(self, case_root=None, read_only=True):
 
 
         self.thread_count = None
+        self.total_tasks = None
         self.tasks_per_node = None
         self.num_nodes = None
         self.tasks_per_numa = None
         self.cores_per_task = None
         # check if case has been configured and if so initialize derived
         if self.get_value("CASEROOT") is not None:
-            self.initialize_derived_attributes()
+            self._initialize_derived_attributes()
 
 
     def check_if_comp_var(self, vid):
@@ -117,20 +119,20 @@ def check_if_comp_var(self, vid):
                 return vid, comp, iscompvar
         return vid, comp, iscompvar
 
-    def initialize_derived_attributes(self):
+    def _initialize_derived_attributes(self):
         """
         These are derived variables which can be used in the config_* files
         for variable substitution using the {{ var }} syntax
         """
         env_mach_pes = self.get_env("mach_pes")
         comp_classes = self.get_values("COMP_CLASSES")
-        total_tasks  = env_mach_pes.get_total_tasks(comp_classes)
         pes_per_node = self.get_value("PES_PER_NODE")
 
+        self.total_tasks = env_mach_pes.get_total_tasks(comp_classes)
         self.thread_count = env_mach_pes.get_max_thread_count(comp_classes)
-        self.tasks_per_node = env_mach_pes.get_tasks_per_node(total_tasks, self.thread_count)
-        logger.debug("total_tasks %s thread_count %s"%(total_tasks, self.thread_count))
-        self.num_nodes = env_mach_pes.get_total_nodes(total_tasks, self.thread_count)
+        self.tasks_per_node = env_mach_pes.get_tasks_per_node(self.total_tasks, self.thread_count)
+        logger.debug("total_tasks %s thread_count %s"%(self.total_tasks, self.thread_count))
+        self.num_nodes = env_mach_pes.get_total_nodes(self.total_tasks, self.thread_count)
         self.tasks_per_numa = int(math.ceil(self.tasks_per_node / 2.0))
         smt_factor = max(1,int(self.get_value("MAX_TASKS_PER_NODE") / pes_per_node))
 
@@ -138,8 +140,6 @@ def initialize_derived_attributes(self):
         threads_per_core = 1 if (threads_per_node <= pes_per_node) else smt_factor
         self.cores_per_task = self.thread_count / threads_per_core
 
-        return total_tasks
-
     # Define __enter__ and __exit__ so that we can use this as a context manager
     # and force a flush on exit.
     def __enter__(self):
@@ -802,10 +802,10 @@ def configure(self, compset_name, grid_name, machine_name=None,
         if test:
             self.set_value("TEST",True)
 
-        total_tasks = self.initialize_derived_attributes()
+        self._initialize_derived_attributes()
 
         # Make sure that parallel IO is not specified if total_tasks==1
-        if total_tasks == 1:
+        if self.total_tasks == 1:
             for compclass in self._component_classes:
                 key = "PIO_TYPENAME_%s"%compclass
                 pio_typename = self.get_value(key)
@@ -813,7 +813,7 @@ def configure(self, compset_name, grid_name, machine_name=None,
                     self.set_value(key, "netcdf")
 
         # Set TOTAL_CORES
-        self.set_value("TOTAL_CORES", total_tasks * self.cores_per_task )
+        self.set_value("TOTAL_CORES", self.total_tasks * self.cores_per_task )
 
     def get_compset_var_settings(self):
         compset_obj = Compsets(infile=self.get_value("COMPSETS_SPEC_FILE"))
@@ -1091,20 +1091,17 @@ def get_mpirun_cmd(self, job="case.run"):
             }
 
         executable, args = env_mach_specific.get_mpirun(self, mpi_attribs, job=job)
-        # special case for aprun if using < 1 full node
-        if executable == "aprun":
-            totalpes = self.get_value("TOTALPES")
-            pes_per_node = self.get_value("PES_PER_NODE")
-            if totalpes < pes_per_node:
-                args["tasks_per_node"] = "-N "+str(totalpes)
-
-        mpi_arg_string = " ".join(args.values())
 
+        # special case for aprun
+        if executable == "aprun":
+            return get_aprun_cmd_for_case(self, run_exe) + " " + run_misc_suffix
+        else:
+            mpi_arg_string = " ".join(args.values())
 
-        if self.get_value("BATCH_SYSTEM") == "cobalt":
-            mpi_arg_string += " : "
+            if self.get_value("BATCH_SYSTEM") == "cobalt":
+                mpi_arg_string += " : "
 
-        return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix)
+            return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix)
 
     def set_model_version(self, model):
         version = "unknown"