Skip to content

Commit

Permalink
Merge pull request #4307 from jasonb5/fix_batch_arg
Browse files Browse the repository at this point in the history
Updates config_batch.xml to facilitate queue specific flags


Updates config_batch.xml to facilitate queue specific flags e.g.
<argument job_queue="long">-w docker</argument> would only
apply when the queue name is long. The behavior of argument
is now inline with the rest of the XML system, i.e. the argument is
now stored in the XML nodes text not in attributes of the node.

Test suite: scripts_regression_tests.py
Test baseline: n/a
Test namelist changes: n/a
Test status: n/a

Fixes #4120
User interface changes?: n/a
Update gh-pages html (Y/N)?: N
  • Loading branch information
jedwards4b authored Sep 16, 2022
2 parents 4299d04 + 1d52869 commit 058cc8b
Show file tree
Hide file tree
Showing 6 changed files with 278 additions and 85 deletions.
159 changes: 111 additions & 48 deletions CIME/XML/env_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Interface to the env_batch.xml file. This class inherits from EnvBase
"""

import os
from CIME.XML.standard_module_setup import *
from CIME.XML.env_base import EnvBase
from CIME import utils
Expand All @@ -18,6 +19,7 @@
from CIME.locked_files import lock_file, unlock_file
from collections import OrderedDict
import stat, re, math
import pathlib

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -557,72 +559,133 @@ def get_submit_args(self, case, job):
"""
return a list of touples (flag, name)
"""
submitargs = " "
bs_nodes = self.get_children("batch_system")

submit_arg_nodes = self._get_arg_nodes(case, bs_nodes)

submitargs = self._process_args(case, submit_arg_nodes, job)

return submitargs

def _get_arg_nodes(self, case, bs_nodes):
submit_arg_nodes = []

for node in bs_nodes:
sanode = self.get_optional_child("submit_args", root=node)
if sanode is not None:
submit_arg_nodes += self.get_children("arg", root=sanode)
arg_nodes = self.get_children("arg", root=sanode)

if len(arg_nodes) > 0:
check_paths = [case.get_value("BATCH_SPEC_FILE")]

user_config_path = os.path.join(
pathlib.Path().home(), ".cime", "config_batch.xml"
)

if os.path.exists(user_config_path):
check_paths.append(user_config_path)

logger.warning(
'Deprecated "arg" node detected in {}, check files {}'.format(
self.filename, ", ".join(check_paths)
)
)

submit_arg_nodes += arg_nodes

submit_arg_nodes += self.get_children("argument", root=sanode)

return submit_arg_nodes

def _process_args(self, case, submit_arg_nodes, job):
submitargs = " "

for arg in submit_arg_nodes:
flag = self.get(arg, "flag")
name = self.get(arg, "name")
try:
flag, name = self._get_argument(case, arg)
except ValueError:
continue

if self._batchtype == "cobalt" and job == "case.st_archive":
if flag == "-n":
name = "task_count"

if flag == "--mode":
continue

if name is None:
submitargs += " {}".format(flag)
else:
if name.startswith("$"):
name = name[1:]
try:
submitargs += self._resolve_argument(case, flag, name, job)
except ValueError:
continue

return submitargs

def _get_argument(self, case, arg):
flag = self.get(arg, "flag")

name = self.get(arg, "name")

# if flag is None then we dealing with new `argument`
if flag is None:
flag = self.text(arg)

job_queue_restriction = self.get(arg, "job_queue")

if (
job_queue_restriction is not None
and job_queue_restriction != case.get_value("JOB_QUEUE")
):
raise ValueError()

return flag, name

def _resolve_argument(self, case, flag, name, job):
submitargs = ""

if name.startswith("$"):
name = name[1:]

if "$" in name:
# We have a complex expression and must rely on get_resolved_value.
# Hopefully, none of the values require subgroup
val = case.get_resolved_value(name)
else:
val = case.get_value(name, subgroup=job)

if val is not None and len(str(val)) > 0 and val != "None":
# Try to evaluate val if it contains any whitespace
if " " in val:
try:
rval = eval(val)
except Exception:
rval = val
else:
rval = val

if "$" in name:
# We have a complex expression and must rely on get_resolved_value.
# Hopefully, none of the values require subgroup
val = case.get_resolved_value(name)
else:
val = case.get_value(name, subgroup=job)

if val is not None and len(str(val)) > 0 and val != "None":
# Try to evaluate val if it contains any whitespace
if " " in val:
try:
rval = eval(val)
except Exception:
rval = val
else:
rval = val

# We don't want floating-point data
try:
rval = int(round(float(rval)))
except ValueError:
pass

# need a correction for tasks per node
if flag == "-n" and rval <= 0:
rval = 1

if (
flag == "-q"
and rval == "batch"
and case.get_value("MACH") == "blues"
):
# Special case. Do not provide '-q batch' for blues
continue

if (
flag.rfind("=", len(flag) - 1, len(flag)) >= 0
or flag.rfind(":", len(flag) - 1, len(flag)) >= 0
):
submitargs += " {}{}".format(flag, str(rval).strip())
else:
submitargs += " {} {}".format(flag, str(rval).strip())
# We don't want floating-point data
try:
rval = int(round(float(rval)))
except ValueError:
pass

# need a correction for tasks per node
if flag == "-n" and rval <= 0:
rval = 1

if flag == "-q" and rval == "batch" and case.get_value("MACH") == "blues":
# Special case. Do not provide '-q batch' for blues
raise ValueError()

if (
flag.rfind("=", len(flag) - 1, len(flag)) >= 0
or flag.rfind(":", len(flag) - 1, len(flag)) >= 0
):
submitargs = " {}{}".format(flag, str(rval).strip())
else:
submitargs = " {} {}".format(flag, str(rval).strip())

return submitargs

Expand Down
24 changes: 13 additions & 11 deletions CIME/data/config/xml_schemas/config_batch.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -127,17 +127,19 @@

<xs:element name="submit_args">
<xs:complexType>
<xs:sequence>
<xs:element maxOccurs="unbounded" ref="arg"/>
</xs:sequence>
</xs:complexType>
</xs:element>


<xs:element name="arg">
<xs:complexType>
<xs:attribute name="flag" use="required"/>
<xs:attribute name="name"/>
<xs:choice>
<xs:element name="arg" maxOccurs="unbounded">
<xs:complexType>
<xs:attribute name="flag" use="required"/>
<xs:attribute name="name"/>
</xs:complexType>
</xs:element>
<xs:element name="argument" maxOccurs="unbounded">
<xs:complexType mixed="true">
<xs:attribute name="job_queue"/>
</xs:complexType>
</xs:element>
</xs:choice>
</xs:complexType>
</xs:element>

Expand Down
144 changes: 144 additions & 0 deletions CIME/tests/test_unit_xml_env_batch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env python3

import os
import unittest
import tempfile
from unittest import mock

from CIME.XML.env_batch import EnvBatch
Expand All @@ -9,6 +11,148 @@


class TestXMLEnvBatch(unittest.TestCase):
def test_get_submit_args_job_queue(self):
with tempfile.NamedTemporaryFile() as tfile:
tfile.write(
b"""<?xml version="1.0"?>
<file id="env_batch.xml" version="2.0">
<header>
These variables may be changed anytime during a run, they
control arguments to the batch submit command.
</header>
<group id="config_batch">
<entry id="BATCH_SYSTEM" value="slurm">
<type>char</type>
<valid_values>miller_slurm,nersc_slurm,lc_slurm,moab,pbs,lsf,slurm,cobalt,cobalt_theta,none</valid_values>
<desc>The batch system type to use for this machine.</desc>
</entry>
</group>
<group id="job_submission">
<entry id="PROJECT_REQUIRED" value="FALSE">
<type>logical</type>
<valid_values>TRUE,FALSE</valid_values>
<desc>whether the PROJECT value is required on this machine</desc>
</entry>
</group>
<batch_system MACH="docker" type="slurm">
<submit_args>
<argument>-w default</argument>
<argument job_queue="short">-w short</argument>
<argument job_queue="long">-w long</argument>
</submit_args>
<queues>
<queue walltimemax="01:00:00" nodemax="1">long</queue>
<queue walltimemax="00:30:00" nodemax="1" default="true">short</queue>
</queues>
</batch_system>
</file>
"""
)

tfile.seek(0)

batch = EnvBatch(infile=tfile.name)

case = mock.MagicMock()

case.get_value.return_value = "long"

case.filename = mock.PropertyMock(return_value=tfile.name)

submit_args = batch.get_submit_args(case, ".case.run")

expected_args = " -w default -w long"

assert submit_args == expected_args

@mock.patch.dict(os.environ, {"TEST": "GOOD"})
def test_get_submit_args(self):
with tempfile.NamedTemporaryFile() as tfile:
tfile.write(
b"""<?xml version="1.0"?>
<file id="env_batch.xml" version="2.0">
<header>
These variables may be changed anytime during a run, they
control arguments to the batch submit command.
</header>
<group id="config_batch">
<entry id="BATCH_SYSTEM" value="slurm">
<type>char</type>
<valid_values>miller_slurm,nersc_slurm,lc_slurm,moab,pbs,lsf,slurm,cobalt,cobalt_theta,none</valid_values>
<desc>The batch system type to use for this machine.</desc>
</entry>
</group>
<group id="job_submission">
<entry id="PROJECT_REQUIRED" value="FALSE">
<type>logical</type>
<valid_values>TRUE,FALSE</valid_values>
<desc>whether the PROJECT value is required on this machine</desc>
</entry>
</group>
<batch_system type="slurm">
<batch_query per_job_arg="-j">squeue</batch_query>
<batch_submit>sbatch</batch_submit>
<batch_cancel>scancel</batch_cancel>
<batch_directive>#SBATCH</batch_directive>
<jobid_pattern>(\d+)$</jobid_pattern>
<depend_string>--dependency=afterok:jobid</depend_string>
<depend_allow_string>--dependency=afterany:jobid</depend_allow_string>
<depend_separator>:</depend_separator>
<walltime_format>%H:%M:%S</walltime_format>
<batch_mail_flag>--mail-user</batch_mail_flag>
<batch_mail_type_flag>--mail-type</batch_mail_type_flag>
<batch_mail_type>none, all, begin, end, fail</batch_mail_type>
<submit_args>
<arg flag="--time" name="$JOB_WALLCLOCK_TIME"/>
<arg flag="-p" name="$JOB_QUEUE"/>
<arg flag="--account" name="$PROJECT"/>
<arg flag="--no-arg" />
<arg flag="--path" name="$$ENV{TEST}" />
</submit_args>
<directives>
<directive> --job-name={{ job_id }}</directive>
<directive> --nodes={{ num_nodes }}</directive>
<directive> --output={{ job_id }}.%j </directive>
<directive> --exclusive </directive>
</directives>
</batch_system>
<batch_system MACH="docker" type="slurm">
<submit_args>
<argument>-w docker</argument>
</submit_args>
<queues>
<queue walltimemax="01:00:00" nodemax="1">long</queue>
<queue walltimemax="00:30:00" nodemax="1" default="true">short</queue>
</queues>
</batch_system>
</file>
"""
)

tfile.seek(0)

batch = EnvBatch(infile=tfile.name)

case = mock.MagicMock()

case.get_value.side_effect = [
os.path.dirname(tfile.name),
"00:30:00",
"long",
"CIME",
]

# value for --path
case.get_resolved_value.return_value = "/test"

case.filename = mock.PropertyMock(return_value=tfile.name)

submit_args = batch.get_submit_args(case, ".case.run")

expected_args = " --time 00:30:00 -p long --account CIME --no-arg --path /test -w docker"

assert submit_args == expected_args

@mock.patch("CIME.XML.env_batch.EnvBatch.get")
def test_get_queue_specs(self, get):
node = mock.MagicMock()
Expand Down
4 changes: 2 additions & 2 deletions docker/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
<TESTS>e3sm_developer</TESTS>
<BATCH_SYSTEM>none</BATCH_SYSTEM>
<SUPPORTED_BY>[email protected]</SUPPORTED_BY>
<MAX_TASKS_PER_NODE>4</MAX_TASKS_PER_NODE>
<MAX_MPITASKS_PER_NODE>4</MAX_MPITASKS_PER_NODE>
<MAX_TASKS_PER_NODE>8</MAX_TASKS_PER_NODE>
<MAX_MPITASKS_PER_NODE>8</MAX_MPITASKS_PER_NODE>
<mpirun mpilib="openmpi">
<executable>mpiexec</executable>
<arguments>
Expand Down
Loading

0 comments on commit 058cc8b

Please sign in to comment.