-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Queuing system submission: check if the job is already waiting in the queue or currently running. #499
Queuing system submission: check if the job is already waiting in the queue or currently running. #499
Changes from all commits
a123fc9
a70682e
8267310
0ef2d13
d34f0a2
1e23191
cee811e
c176b4e
67e09a0
865ac64
cfab9d8
8a06907
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,24 +1,29 @@ | ||||||||||||||||||||||||||||||||||||||
import os | ||||||||||||||||||||||||||||||||||||||
import subprocess | ||||||||||||||||||||||||||||||||||||||
from typing import List, Optional, Union | ||||||||||||||||||||||||||||||||||||||
from typing import List, Optional, Tuple, Union | ||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
from pysqa import QueueAdapter | ||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
from executorlib.standalone.hdf import dump, get_queue_id | ||||||||||||||||||||||||||||||||||||||
from executorlib.standalone.inputcheck import check_file_exists | ||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
def execute_with_pysqa( | ||||||||||||||||||||||||||||||||||||||
command: str, | ||||||||||||||||||||||||||||||||||||||
resource_dict: dict, | ||||||||||||||||||||||||||||||||||||||
task_dependent_lst: List[int] = [], | ||||||||||||||||||||||||||||||||||||||
command: list, | ||||||||||||||||||||||||||||||||||||||
task_dependent_lst: list[int] = [], | ||||||||||||||||||||||||||||||||||||||
file_name: Optional[str] = None, | ||||||||||||||||||||||||||||||||||||||
resource_dict: Optional[dict] = None, | ||||||||||||||||||||||||||||||||||||||
config_directory: Optional[str] = None, | ||||||||||||||||||||||||||||||||||||||
backend: Optional[str] = None, | ||||||||||||||||||||||||||||||||||||||
cache_directory: Optional[str] = None, | ||||||||||||||||||||||||||||||||||||||
) -> int: | ||||||||||||||||||||||||||||||||||||||
) -> Tuple[int, int]: | ||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct the return type annotation to match the actual return value The function's return type annotation specifies Apply this diff to fix the return type annotation: ) -> Tuple[int, int]:
+ ) -> int:
"""
Execute a command by submitting it to the queuing system
...
return queue_id Also applies to: 73-73 |
||||||||||||||||||||||||||||||||||||||
""" | ||||||||||||||||||||||||||||||||||||||
Execute a command by submitting it to the queuing system | ||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
Args: | ||||||||||||||||||||||||||||||||||||||
command (list): The command to be executed. | ||||||||||||||||||||||||||||||||||||||
task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to []. | ||||||||||||||||||||||||||||||||||||||
file_name (str): Name of the HDF5 file which contains the Python function | ||||||||||||||||||||||||||||||||||||||
resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function. | ||||||||||||||||||||||||||||||||||||||
Example resource dictionary: { | ||||||||||||||||||||||||||||||||||||||
cwd: None, | ||||||||||||||||||||||||||||||||||||||
|
@@ -30,37 +35,42 @@ def execute_with_pysqa( | |||||||||||||||||||||||||||||||||||||
Returns: | ||||||||||||||||||||||||||||||||||||||
int: queuing system ID | ||||||||||||||||||||||||||||||||||||||
""" | ||||||||||||||||||||||||||||||||||||||
if resource_dict is None: | ||||||||||||||||||||||||||||||||||||||
resource_dict = {} | ||||||||||||||||||||||||||||||||||||||
if "cwd" in resource_dict and resource_dict["cwd"] is not None: | ||||||||||||||||||||||||||||||||||||||
cwd = resource_dict["cwd"] | ||||||||||||||||||||||||||||||||||||||
else: | ||||||||||||||||||||||||||||||||||||||
cwd = cache_directory | ||||||||||||||||||||||||||||||||||||||
check_file_exists(file_name=file_name) | ||||||||||||||||||||||||||||||||||||||
queue_id = get_queue_id(file_name=file_name) | ||||||||||||||||||||||||||||||||||||||
Comment on lines
+38
to
+39
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ensure The Apply this diff to add the validation: + if file_name is None:
+ raise ValueError("file_name must be provided")
check_file_exists(file_name=file_name)
queue_id = get_queue_id(file_name=file_name) 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||||||||
qa = QueueAdapter( | ||||||||||||||||||||||||||||||||||||||
directory=config_directory, | ||||||||||||||||||||||||||||||||||||||
queue_type=backend, | ||||||||||||||||||||||||||||||||||||||
execute_command=_pysqa_execute_command, | ||||||||||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||||||||||
submit_kwargs = { | ||||||||||||||||||||||||||||||||||||||
"command": " ".join(command), | ||||||||||||||||||||||||||||||||||||||
"dependency_list": [str(qid) for qid in task_dependent_lst], | ||||||||||||||||||||||||||||||||||||||
"working_directory": os.path.abspath(cwd), | ||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||
if "cwd" in resource_dict: | ||||||||||||||||||||||||||||||||||||||
del resource_dict["cwd"] | ||||||||||||||||||||||||||||||||||||||
unsupported_keys = [ | ||||||||||||||||||||||||||||||||||||||
"threads_per_core", | ||||||||||||||||||||||||||||||||||||||
"gpus_per_core", | ||||||||||||||||||||||||||||||||||||||
"openmpi_oversubscribe", | ||||||||||||||||||||||||||||||||||||||
"slurm_cmd_args", | ||||||||||||||||||||||||||||||||||||||
] | ||||||||||||||||||||||||||||||||||||||
for k in unsupported_keys: | ||||||||||||||||||||||||||||||||||||||
if k in resource_dict: | ||||||||||||||||||||||||||||||||||||||
del resource_dict[k] | ||||||||||||||||||||||||||||||||||||||
if "job_name" not in resource_dict: | ||||||||||||||||||||||||||||||||||||||
resource_dict["job_name"] = "pysqa" | ||||||||||||||||||||||||||||||||||||||
submit_kwargs.update(resource_dict) | ||||||||||||||||||||||||||||||||||||||
return qa.submit_job(**submit_kwargs) | ||||||||||||||||||||||||||||||||||||||
if queue_id is None or qa.get_status_of_job(process_id=queue_id) is None: | ||||||||||||||||||||||||||||||||||||||
if resource_dict is None: | ||||||||||||||||||||||||||||||||||||||
resource_dict = {} | ||||||||||||||||||||||||||||||||||||||
if "cwd" in resource_dict and resource_dict["cwd"] is not None: | ||||||||||||||||||||||||||||||||||||||
Comment on lines
+46
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Avoid mutating input parameters to prevent side effects Modifying the Apply this diff to avoid mutating the original dictionary: if resource_dict is None:
resource_dict = {}
+else:
+ resource_dict = resource_dict.copy() 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||||||||
cwd = resource_dict["cwd"] | ||||||||||||||||||||||||||||||||||||||
else: | ||||||||||||||||||||||||||||||||||||||
cwd = cache_directory | ||||||||||||||||||||||||||||||||||||||
submit_kwargs = { | ||||||||||||||||||||||||||||||||||||||
"command": " ".join(command), | ||||||||||||||||||||||||||||||||||||||
"dependency_list": [str(qid) for qid in task_dependent_lst], | ||||||||||||||||||||||||||||||||||||||
"working_directory": os.path.abspath(cwd), | ||||||||||||||||||||||||||||||||||||||
Comment on lines
+48
to
+55
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handle potential If both Consider setting a default value or raising an exception if else:
cwd = cache_directory
+ if cwd is None:
+ raise ValueError("Working directory (cwd) must be specified.")
submit_kwargs = {
"command": " ".join(command),
"dependency_list": [str(qid) for qid in task_dependent_lst],
"working_directory": os.path.abspath(cwd),
} 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||
if "cwd" in resource_dict: | ||||||||||||||||||||||||||||||||||||||
del resource_dict["cwd"] | ||||||||||||||||||||||||||||||||||||||
unsupported_keys = [ | ||||||||||||||||||||||||||||||||||||||
"threads_per_core", | ||||||||||||||||||||||||||||||||||||||
"gpus_per_core", | ||||||||||||||||||||||||||||||||||||||
"openmpi_oversubscribe", | ||||||||||||||||||||||||||||||||||||||
"slurm_cmd_args", | ||||||||||||||||||||||||||||||||||||||
] | ||||||||||||||||||||||||||||||||||||||
for k in unsupported_keys: | ||||||||||||||||||||||||||||||||||||||
if k in resource_dict: | ||||||||||||||||||||||||||||||||||||||
del resource_dict[k] | ||||||||||||||||||||||||||||||||||||||
if "job_name" not in resource_dict: | ||||||||||||||||||||||||||||||||||||||
resource_dict["job_name"] = "pysqa" | ||||||||||||||||||||||||||||||||||||||
submit_kwargs.update(resource_dict) | ||||||||||||||||||||||||||||||||||||||
queue_id = qa.submit_job(**submit_kwargs) | ||||||||||||||||||||||||||||||||||||||
dump(file_name=file_name, data_dict={"queue_id": queue_id}) | ||||||||||||||||||||||||||||||||||||||
return queue_id | ||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
def _pysqa_execute_command( | ||||||||||||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -2,10 +2,13 @@ | |||||||
import time | ||||||||
from typing import Optional | ||||||||
|
||||||||
from executorlib.standalone.inputcheck import check_file_exists | ||||||||
|
||||||||
|
||||||||
def execute_in_subprocess( | ||||||||
command: list, | ||||||||
task_dependent_lst: list = [], | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Replace mutable default argument with None Using mutable default arguments in Python can lead to unexpected behavior because the same list object is shared across function calls. Apply this fix: - task_dependent_lst: list = [],
+ task_dependent_lst: list = None, And update the function body: + if task_dependent_lst is None:
+ task_dependent_lst = []
🧰 Tools🪛 Ruff10-10: Do not use mutable data structures for argument defaults Replace with (B006) |
||||||||
file_name: Optional[str] = None, | ||||||||
resource_dict: Optional[dict] = None, | ||||||||
config_directory: Optional[str] = None, | ||||||||
backend: Optional[str] = None, | ||||||||
|
@@ -17,6 +20,7 @@ def execute_in_subprocess( | |||||||
Args: | ||||||||
command (list): The command to be executed. | ||||||||
task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to []. | ||||||||
file_name (str): Name of the HDF5 file which contains the Python function | ||||||||
resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function. | ||||||||
Example resource dictionary: { | ||||||||
cwd: None, | ||||||||
|
@@ -29,6 +33,7 @@ def execute_in_subprocess( | |||||||
subprocess.Popen: The subprocess object. | ||||||||
|
||||||||
""" | ||||||||
check_file_exists(file_name=file_name) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add None check before file existence validation Since Apply this fix: - check_file_exists(file_name=file_name)
+ if file_name is not None:
+ check_file_exists(file_name=file_name) 📝 Committable suggestion
Suggested change
|
||||||||
while len(task_dependent_lst) > 0: | ||||||||
task_dependent_lst = [ | ||||||||
task for task in task_dependent_lst if task.poll() is None | ||||||||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,5 +1,6 @@ | ||||||||||||||||||||||
import inspect | ||||||||||||||||||||||
import multiprocessing | ||||||||||||||||||||||
import os.path | ||||||||||||||||||||||
from concurrent.futures import Executor | ||||||||||||||||||||||
from typing import Callable, List, Optional | ||||||||||||||||||||||
|
||||||||||||||||||||||
|
@@ -188,3 +189,10 @@ def validate_number_of_cores( | |||||||||||||||||||||
elif max_cores is not None and max_workers is None: | ||||||||||||||||||||||
max_workers = int(max_cores / cores_per_worker) | ||||||||||||||||||||||
return max_workers | ||||||||||||||||||||||
|
||||||||||||||||||||||
|
||||||||||||||||||||||
def check_file_exists(file_name: str): | ||||||||||||||||||||||
if file_name is None: | ||||||||||||||||||||||
raise ValueError("file_name is not set.") | ||||||||||||||||||||||
if not os.path.exists(file_name): | ||||||||||||||||||||||
raise ValueError("file_name is not written to the file system.") | ||||||||||||||||||||||
Comment on lines
+194
to
+198
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Add type hints and improve error messages The function should follow Python typing best practices and provide more descriptive error messages. -def check_file_exists(file_name: str):
+def check_file_exists(file_name: str) -> None:
if file_name is None:
- raise ValueError("file_name is not set.")
+ raise ValueError("The file_name parameter must not be None.")
if not os.path.exists(file_name):
- raise ValueError("file_name is not written to the file system.")
+ raise ValueError(f"The file '{file_name}' does not exist in the file system.") The changes:
📝 Committable suggestion
Suggested change
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
import shutil | ||
import unittest | ||
|
||
from executorlib.standalone.cache.spawner import execute_in_subprocess | ||
from executorlib.cache.subprocess_spawner import execute_in_subprocess | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Codebase verification Import path refactoring needs attention The verification revealed mixed results regarding the import path refactoring:
This suggests the refactoring is incomplete and needs to be addressed consistently across the codebase. 🔗 Analysis chainLGTM! Verify import path refactoring. The import path update aligns with the module restructuring. Let's verify the new import path exists and the old one is fully removed: 🏁 Scripts executedThe following scripts were executed for the analysis: Script: #!/bin/bash
# Description: Verify import path refactoring is complete
# Expected: New path should exist, old path should be removed
# Check if new module exists
fd -e py "subprocess_spawner.py" executorlib/cache/
# Verify old module is removed
fd -e py "spawner.py" executorlib/standalone/cache/
# Check for any remaining references to old path
rg "executorlib\.standalone\.cache\.spawner"
Length of output: 307 Script: #!/bin/bash
# Let's verify the import path changes more thoroughly
# 1. Check the content of the new module to ensure it has the required function
echo "=== Content of new subprocess_spawner.py ==="
cat executorlib/cache/subprocess_spawner.py
# 2. Search for any other files still using old import path pattern
echo -e "\n=== Files with old import pattern ==="
rg -l "from executorlib.standalone"
# 3. Check if there are any other references to the old path structure
echo -e "\n=== References to standalone in paths ==="
rg "standalone.*cache"
# 4. Verify the function is actually used in the test file
echo -e "\n=== Usage in test file ==="
cat tests/test_cache_executor_mpi.py
Length of output: 4882 |
||
|
||
|
||
try: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,14 +4,14 @@ | |
import shutil | ||
import unittest | ||
|
||
from executorlib.standalone.cache.spawner import ( | ||
from executorlib.cache.subprocess_spawner import ( | ||
execute_in_subprocess, | ||
terminate_subprocess, | ||
) | ||
from executorlib.standalone.thread import RaisingThread | ||
|
||
try: | ||
from executorlib.cache.executor import FileExecutor | ||
from executorlib.cache.executor import FileExecutor, create_file_executor | ||
from executorlib.cache.shared import execute_tasks_h5 | ||
|
||
skip_h5py_test = False | ||
|
@@ -46,6 +46,12 @@ def test_executor_dependence_mixed(self): | |
self.assertEqual(fs2.result(), 4) | ||
self.assertTrue(fs2.done()) | ||
|
||
def test_create_file_executor_error(self): | ||
with self.assertRaises(ValueError): | ||
create_file_executor(block_allocation=True) | ||
with self.assertRaises(ValueError): | ||
create_file_executor(init_function=True) | ||
|
||
def test_executor_dependence_error(self): | ||
with self.assertRaises(ValueError): | ||
with FileExecutor( | ||
|
@@ -163,7 +169,7 @@ def test_executor_function_dependence_args(self): | |
"future_queue": q, | ||
"cache_directory": cache_dir, | ||
"execute_function": execute_in_subprocess, | ||
"resource_dict": {"cores": 1, "cwd": None}, | ||
"resource_dict": {"cores": 1}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Codebase verification Removal of 'cwd' from resource_dict is incorrect and breaks functionality The removal of 'cwd' from resource_dict is problematic because:
🔗 Analysis chainVerify the removal of 'cwd' from resource_dict The 'cwd' key has been removed from the resource dictionary. While this might be intentional, we should verify that this doesn't affect the test's functionality or expected behavior. 🏁 Scripts executedThe following scripts were executed for the analysis: Script: #!/bin/bash
# Search for other test files that might be affected by this change
rg -l "resource_dict.*cwd" tests/
# Check if cwd is used in the actual implementation
rg -l "resource_dict.*cwd" executorlib/
Length of output: 228 Script: #!/bin/bash
# Check the context of resource_dict and cwd usage in the implementation files
rg "resource_dict.*cwd" executorlib/cache/subprocess_spawner.py executorlib/cache/queue_spawner.py -B 2 -A 2
# Check the context in test files to understand the changes
rg "resource_dict.*cwd" tests/test_cache_executor_serial.py tests/test_cache_executor_pysqa_flux.py -B 2 -A 2
# Look for any recent changes related to this in git history
git log -p -- tests/test_cache_executor_serial.py | grep -A 5 -B 5 "resource_dict"
Length of output: 6809 |
||
"terminate_function": terminate_subprocess, | ||
}, | ||
) | ||
|
@@ -176,9 +182,11 @@ def test_executor_function_dependence_args(self): | |
|
||
def test_execute_in_subprocess_errors(self): | ||
with self.assertRaises(ValueError): | ||
execute_in_subprocess(command=[], config_directory="test") | ||
execute_in_subprocess( | ||
file_name=__file__, command=[], config_directory="test" | ||
) | ||
with self.assertRaises(ValueError): | ||
execute_in_subprocess(command=[], backend="flux") | ||
execute_in_subprocess(file_name=__file__, command=[], backend="flux") | ||
|
||
def tearDown(self): | ||
if os.path.exists("cache"): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Avoid mutable default arguments for function parameters
Using mutable default arguments like lists can lead to unexpected behavior because the default value is shared across all function calls. In this case,
task_dependent_lst: list[int] = []
should be replaced withtask_dependent_lst: Optional[List[int]] = None
, and initialized within the function.Apply this diff to fix the issue:
📝 Committable suggestion
🧰 Tools
🪛 Ruff
13-13: Do not use mutable data structures for argument defaults
Replace with
None
; initialize within function(B006)