Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Runtime value checking for LaunchSettings and BatchSettings #740

Open
wants to merge 5 commits into
base: v1.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions smartsim/settings/arguments/batch/lsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ def set_walltime(self, walltime: str) -> None:
:param walltime: Time in hh:mm format, e.g. "10:00" for 10 hours,
if time is supplied in hh:mm:ss format, seconds
will be ignored and walltime will be set as ``hh:mm``
:raises TypeError: if not type str
"""
# For compatibility with other launchers, as explained in docstring
if not isinstance(walltime, str):
raise TypeError("walltime argument was not of type str")
if walltime:
if len(walltime.split(":")) > 2:
juliaputko marked this conversation as resolved.
Show resolved Hide resolved
walltime = ":".join(walltime.split(":")[:2])
Expand All @@ -72,7 +75,13 @@ def set_smts(self, smts: int) -> None:
takes precedence.
juliaputko marked this conversation as resolved.
Show resolved Hide resolved

:param smts: SMT (e.g on Summit: 1, 2, or 4)
:raises TypeError: if not an int
:raises ValueError: if not positive int
"""
if not isinstance(smts, int):
raise TypeError("smts argument was not of type int")
if smts <= 0:
raise ValueError("smts must be a positive value")
self.set("alloc_flags", str(smts))

def set_project(self, project: str) -> None:
Expand All @@ -81,7 +90,10 @@ def set_project(self, project: str) -> None:
This sets ``-P``.

:param time: project name
:raises TypeError: if not a str
"""
if not isinstance(project, str):
raise TypeError("project argument was not of type str")
self.set("P", project)

def set_account(self, account: str) -> None:
Expand All @@ -90,7 +102,10 @@ def set_account(self, account: str) -> None:
this function is an alias for `set_project`.

:param account: project name
:raises TypeError: if not a str
"""
if not isinstance(account, str):
raise TypeError("account argument was not of type str")
return self.set_project(account)

def set_nodes(self, num_nodes: int) -> None:
Expand All @@ -99,7 +114,13 @@ def set_nodes(self, num_nodes: int) -> None:
This sets ``-nnodes``.

:param nodes: number of nodes
:raises TypeError: if not an int
:raises ValueError: if not positive int
"""
if not isinstance(num_nodes, int):
raise TypeError("num_nodes argument was not of type int")
if num_nodes <= 0:
raise ValueError("Number of nodes must be a positive value")
self.set("nnodes", str(num_nodes))

def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
Expand All @@ -110,10 +131,11 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
"""
if isinstance(host_list, str):
host_list = [host_list.strip()]
if not isinstance(host_list, list):
if not (
isinstance(host_list, list)
and all(isinstance(item, str) for item in host_list)
):
raise TypeError("host_list argument must be a list of strings")
if not all(isinstance(host, str) for host in host_list):
raise TypeError("host_list argument must be list of strings")
self.set("m", '"' + " ".join(host_list) + '"')

def set_tasks(self, tasks: int) -> None:
Expand All @@ -122,7 +144,13 @@ def set_tasks(self, tasks: int) -> None:
This sets ``-n``

:param tasks: number of tasks
:raises TypeError: if not an int
:raises ValueError: if not positive int
"""
if not isinstance(tasks, int):
raise TypeError("tasks argument was not of type int")
if tasks <= 0:
raise ValueError("Number of tasks must be a positive value")
self.set("n", str(tasks))

def set_queue(self, queue: str) -> None:
Expand All @@ -131,7 +159,10 @@ def set_queue(self, queue: str) -> None:
This sets ``-q``

:param queue: The queue to submit the job on
:raises TypeError: if not a str
"""
if not isinstance(queue, str):
raise TypeError("queue argument was not of type str")
self.set("q", queue)

def format_batch_args(self) -> t.List[str]:
Expand Down Expand Up @@ -159,5 +190,7 @@ def set(self, key: str, value: str | None) -> None:
:param value: A string representation of the value for the launch
argument (if applicable), otherwise `None`
"""
if key in self._batch_args and key != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
Comment on lines +193 to +194
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just sanity checking, should this be

Suggested change
if key in self._batch_args and key != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
if key in self._batch_args and value != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")

?

# Store custom arguments in the launcher_args
self._batch_args[key] = value
44 changes: 38 additions & 6 deletions smartsim/settings/arguments/batch/pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from __future__ import annotations

import re
import typing as t
juliaputko marked this conversation as resolved.
Show resolved Hide resolved
from copy import deepcopy

Expand Down Expand Up @@ -61,7 +62,13 @@ def set_nodes(self, num_nodes: int) -> None:
nodes here is sets the 'nodes' resource.

:param num_nodes: number of nodes
:raises TypeError: if not an int
:raises ValueError: if not positive int
"""
if not isinstance(num_nodes, int):
raise TypeError("num_nodes argument was not of type int")
if num_nodes <= 0:
raise ValueError("Number of nodes must be a positive value")

self.set("nodes", str(num_nodes))

Expand All @@ -73,9 +80,10 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
"""
if isinstance(host_list, str):
host_list = [host_list.strip()]
if not isinstance(host_list, list):
raise TypeError("host_list argument must be a list of strings")
if not all(isinstance(host, str) for host in host_list):
if not (
isinstance(host_list, list)
and all(isinstance(item, str) for item in host_list)
):
raise TypeError("host_list argument must be a list of strings")
self.set("hostname", ",".join(host_list))

Expand All @@ -89,15 +97,28 @@ def set_walltime(self, walltime: str) -> None:
this value will be overridden

:param walltime: wall time
:raises ValueError: if walltime format is invalid
:raises TypeError: if not type str
"""
self.set("walltime", walltime)
if not isinstance(walltime, str):
raise TypeError("walltime argument was not of type str")
pattern = r"^\d{2}:\d{2}:\d{2}$"
juliaputko marked this conversation as resolved.
Show resolved Hide resolved
if walltime and re.match(pattern, walltime):
self.set("walltime", walltime)
else:
raise ValueError(
f"Invalid walltime: {walltime}. Please use 'HH:MM:SS' format."
)

def set_queue(self, queue: str) -> None:
"""Set the queue for the batch job

:param queue: queue name
:raises TypeError: if not a str
"""
self.set("q", str(queue))
if not isinstance(queue, str):
raise TypeError("queue argument was not of type str")
self.set("q", queue)

def set_ncpus(self, num_cpus: int) -> None:
"""Set the number of cpus obtained in each node.
Expand All @@ -107,15 +128,24 @@ def set_ncpus(self, num_cpus: int) -> None:
this value will be overridden

:param num_cpus: number of cpus per node in select
:raises TypeError: if not an int
:raises ValueError: if not positive int
"""
if not isinstance(num_cpus, int):
raise TypeError("num_cpus argument was not of type int")
if num_cpus <= 0:
raise ValueError("Number of CPUs must be a positive value")
self.set("ppn", str(num_cpus))

def set_account(self, account: str) -> None:
"""Set the account for this batch job

:param acct: account id
:raises TypeError: if not a str
"""
self.set("A", str(account))
if not isinstance(account, str):
raise TypeError("account argument was not of type str")
self.set("A", account)

def format_batch_args(self) -> t.List[str]:
"""Get the formatted batch arguments for a preview
Expand Down Expand Up @@ -183,4 +213,6 @@ def set(self, key: str, value: str | None) -> None:
:param value: A string representation of the value for the launch
argument (if applicable), otherwise `None`
"""
if key in self._batch_args and key != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
Comment on lines +216 to +217
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto here

Suggested change
if key in self._batch_args and key != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
if key in self._batch_args and value != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")

?

self._batch_args[key] = value
42 changes: 37 additions & 5 deletions smartsim/settings/arguments/batch/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,32 @@ def set_walltime(self, walltime: str) -> None:
format = "HH:MM:SS"

juliaputko marked this conversation as resolved.
Show resolved Hide resolved
:param walltime: wall time
:raises ValueError: if walltime format is invalid
:raises TypeError: if not str
"""
if not isinstance(walltime, str):
raise TypeError("walltime argument was not of type str")
pattern = r"^\d{2}:\d{2}:\d{2}$"
if walltime and re.match(pattern, walltime):
juliaputko marked this conversation as resolved.
Show resolved Hide resolved
self.set("time", str(walltime))
else:
raise ValueError("Invalid walltime format. Please use 'HH:MM:SS' format.")
raise ValueError(
f"Invalid walltime: {walltime}. Please use 'HH:MM:SS' format."
)

def set_nodes(self, num_nodes: int) -> None:
"""Set the number of nodes for this batch job

This sets ``--nodes``.

:param num_nodes: number of nodes
:raises TypeError: if not an int
:raises ValueError: if not positive int
"""
if not isinstance(num_nodes, int):
raise TypeError("num_nodes argument was not of type int")
if num_nodes <= 0:
raise ValueError("Number of nodes must be a positive value")
self.set("nodes", str(num_nodes))

def set_account(self, account: str) -> None:
Expand All @@ -78,7 +90,10 @@ def set_account(self, account: str) -> None:
This sets ``--account``.

:param account: account id
:raises TypeError: if not a str
"""
if not isinstance(account, str):
raise TypeError("account argument was not of type str")
self.set("account", account)

def set_partition(self, partition: str) -> None:
juliaputko marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -87,16 +102,22 @@ def set_partition(self, partition: str) -> None:
This sets ``--partition``.

:param partition: partition name
:raises TypeError: if not a str
"""
self.set("partition", str(partition))
if not isinstance(partition, str):
raise TypeError("partition argument was not of type str")
self.set("partition", partition)

def set_queue(self, queue: str) -> None:
"""alias for set_partition

Sets the partition for the slurm batch job

:param queue: the partition to run the batch job on
:raises TypeError: if not a str
"""
if not isinstance(queue, str):
raise TypeError("queue argument was not of type str")
return self.set_partition(queue)

def set_cpus_per_task(self, cpus_per_task: int) -> None:
juliaputko marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -105,7 +126,13 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None:
This sets ``--cpus-per-task``

:param num_cpus: number of cpus to use per task
:raises TypeError: if not int
:raises ValueError: if not positive int
"""
if not isinstance(cpus_per_task, int):
raise TypeError("cpus_per_task argument was not of type int")
if cpus_per_task <= 0:
raise ValueError("CPUs per task must be a positive value")
self.set("cpus-per-task", str(cpus_per_task))

def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
Expand All @@ -118,10 +145,13 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
"""
if isinstance(host_list, str):
host_list = [host_list.strip()]
if not isinstance(host_list, list):

if not (
isinstance(host_list, list)
and all(isinstance(item, str) for item in host_list)
):
raise TypeError("host_list argument must be a list of strings")
if not all(isinstance(host, str) for host in host_list):
raise TypeError("host_list argument must be list of strings")

self.set("nodelist", ",".join(host_list))

def format_batch_args(self) -> t.List[str]:
Expand Down Expand Up @@ -153,4 +183,6 @@ def set(self, key: str, value: str | None) -> None:
argument (if applicable), otherwise `None`
"""
# Store custom arguments in the launcher_args
if key in self._batch_args and key != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
Comment on lines +186 to +187
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One more here

Suggested change
if key in self._batch_args and key != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
if key in self._batch_args and value != self._batch_args[key]:
logger.warning(f"Overwriting argument '{key}' with value '{value}'")

?

self._batch_args[key] = value
2 changes: 1 addition & 1 deletion smartsim/settings/arguments/launch/alps.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,5 +224,5 @@ def set(self, key: str, value: str | None) -> None:
)
return
if key in self._launch_args and key != self._launch_args[key]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not technically part of this PR, but I think this should also read

Suggested change
if key in self._launch_args and key != self._launch_args[key]:
if key in self._launch_args and value != self._launch_args[key]:

?

logger.warning(f"Overwritting argument '{key}' with value '{value}'")
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
self._launch_args[key] = value
2 changes: 1 addition & 1 deletion smartsim/settings/arguments/launch/dragon.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def set(self, key: str, value: str | None) -> None:
"""
set_check_input(key, value)
if key in self._launch_args and key != self._launch_args[key]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also somehow got past a previous review

Suggested change
if key in self._launch_args and key != self._launch_args[key]:
if key in self._launch_args and value != self._launch_args[key]:

?

logger.warning(f"Overwritting argument '{key}' with value '{value}'")
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
self._launch_args[key] = value

def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None:
Expand Down
2 changes: 1 addition & 1 deletion smartsim/settings/arguments/launch/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,5 @@ def set(self, key: str, value: str | None) -> None:
"""
set_check_input(key, value)
if key in self._launch_args and key != self._launch_args[key]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto

Suggested change
if key in self._launch_args and key != self._launch_args[key]:
if key in self._launch_args and value != self._launch_args[key]:

?

logger.warning(f"Overwritting argument '{key}' with value '{value}'")
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
self._launch_args[key] = value
2 changes: 1 addition & 1 deletion smartsim/settings/arguments/launch/lsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,5 +148,5 @@ def set(self, key: str, value: str | None) -> None:
)
return
if key in self._launch_args and key != self._launch_args[key]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another "out of scope" sanity check

Suggested change
if key in self._launch_args and key != self._launch_args[key]:
if key in self._launch_args and value != self._launch_args[key]:

logger.warning(f"Overwritting argument '{key}' with value '{value}'")
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
self._launch_args[key] = value
2 changes: 1 addition & 1 deletion smartsim/settings/arguments/launch/mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def set(self, key: str, value: str | None) -> None:
)
return
if key in self._launch_args and key != self._launch_args[key]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto

Suggested change
if key in self._launch_args and key != self._launch_args[key]:
if key in self._launch_args and value != self._launch_args[key]:

logger.warning(f"Overwritting argument '{key}' with value '{value}'")
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
self._launch_args[key] = value


Expand Down
2 changes: 1 addition & 1 deletion smartsim/settings/arguments/launch/pals.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,5 +158,5 @@ def set(self, key: str, value: str | None) -> None:
)
return
if key in self._launch_args and key != self._launch_args[key]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And the last one I think!

Suggested change
if key in self._launch_args and key != self._launch_args[key]:
if key in self._launch_args and value != self._launch_args[key]:

?

logger.warning(f"Overwritting argument '{key}' with value '{value}'")
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
self._launch_args[key] = value
6 changes: 4 additions & 2 deletions smartsim/settings/arguments/launch/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ def set_walltime(self, walltime: str) -> None:
if walltime and re.match(pattern, walltime):
self.set("time", str(walltime))
else:
raise ValueError("Invalid walltime format. Please use 'HH:MM:SS' format.")
raise ValueError(
f"Invalid walltime: {walltime}. Please use 'HH:MM:SS' format."
)

def set_het_group(self, het_group: t.Iterable[int]) -> None:
"""Set the heterogeneous group for this job
Expand Down Expand Up @@ -349,5 +351,5 @@ def set(self, key: str, value: str | None) -> None:
)
return
if key in self._launch_args and key != self._launch_args[key]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops! I lied!! Last one for real

Suggested change
if key in self._launch_args and key != self._launch_args[key]:
if key in self._launch_args and value != self._launch_args[key]:

logger.warning(f"Overwritting argument '{key}' with value '{value}'")
logger.warning(f"Overwriting argument '{key}' with value '{value}'")
self._launch_args[key] = value
Loading