Skip to content

Commit

Permalink
slurm_submit_python() print sbatch command at submit time and in slur…
Browse files Browse the repository at this point in the history
…m job
  • Loading branch information
janosh committed Jun 20, 2023
1 parent 97c6949 commit 7b0918a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
23 changes: 16 additions & 7 deletions mb_discovery/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _get_calling_file_path(frame: int = 1) -> str:
str: Calling function's file path n frames up the stack.
"""
caller_path = sys._getframe(frame).f_code.co_filename
return caller_path
return os.path.abspath(caller_path)


def slurm_submit_python(
Expand Down Expand Up @@ -50,21 +50,30 @@ def slurm_submit_python(
Raises:
SystemExit: Exit code will be subprocess.run(['sbatch', ...]).returncode.
"""
if "slurm-submit" not in sys.argv:
return
os.makedirs(log_dir, exist_ok=True) # slurm fails if log_dir is missing

# calling file's path.
if py_file_path is None:
py_file_path = _get_calling_file_path(frame=2)

cmd = [
*f"sbatch --{partition=} --{account=} --{time=} --{array=}".split(),
*f"sbatch --{partition=} --{account=} --{time=}".replace("'", "").split(),
*("--job-name", job_name),
*("--output", f"{log_dir}/slurm-%A-%a.out"),
*slurm_flags,
*("--wrap", f"'{env_vars} python {py_file_path}'"),
]
if array:
cmd += ["--array", array]

running_as_slurm_job = "SLURM_JOB_ID" in os.environ
if running_as_slurm_job or "slurm-submit" in sys.argv:
# print sbatch command at submission time and into slurm log file
# but not when running interactively
print(" ".join(cmd))

if "slurm-submit" not in sys.argv:
return

os.makedirs(log_dir, exist_ok=True) # slurm fails if log_dir is missing

result = subprocess.run(cmd, check=True)

raise SystemExit(result.returncode)
3 changes: 1 addition & 2 deletions models/m3gnet/slurm_array_m3gnet_relax_wbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
task_type = "IS2RE" # "RS2RE"
today = f"{datetime.now():%Y-%m-%d}"
module_dir = os.path.dirname(__file__)
# set large job array size for fast testing/debugging
slurm_array_task_count = 100
slurm_mem_per_node = 12000
job_name = f"m3gnet-wbm-relax-{task_type}"
Expand All @@ -50,8 +51,6 @@
# %%
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
# set large fallback job array size for fast testing/debugging
slurm_array_task_count = int(os.environ.get("SLURM_ARRAY_TASK_COUNT", 10_000))

print(f"Job started running {datetime.now():%Y-%m-%d@%H-%M}")
print(f"{slurm_job_id = }")
Expand Down

0 comments on commit 7b0918a

Please sign in to comment.