-
Notifications
You must be signed in to change notification settings - Fork 198
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable HTEX worker to check manager is alive (#2970)
The HTEX worker will periodically check that their parent manager process is still alive. If the manager process dies without first sending the worker a SIGTERM, the worker will shut itself down.
- Loading branch information
Showing
2 changed files
with
80 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
parsl/tests/test_error_handling/test_htex_manager_failure.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import os | ||
import signal | ||
import time | ||
|
||
import pytest | ||
|
||
import parsl | ||
from parsl.app.app import python_app | ||
from parsl.tests.configs.htex_local import fresh_config | ||
|
||
|
||
@pytest.fixture(autouse=True, scope="function") | ||
def load_config(): | ||
config = fresh_config() | ||
config.executors[0].poll_period = 1 | ||
config.executors[0].max_workers = 1 | ||
config.executors[0].heartbeat_period = 1 | ||
|
||
parsl.load(config) | ||
yield | ||
|
||
parsl.dfk().cleanup() | ||
parsl.clear() | ||
|
||
|
||
@python_app | ||
def get_worker_pid(): | ||
import os | ||
return os.getpid() | ||
|
||
|
||
@python_app | ||
def kill_manager(sig: int): | ||
import os | ||
os.kill(os.getppid(), sig) | ||
|
||
|
||
@pytest.mark.local | ||
@pytest.mark.parametrize("sig", [signal.SIGTERM, signal.SIGKILL]) | ||
def test_htex_manager_failure_worker_shutdown(sig: int): | ||
"""Ensure that HTEX workers shut down when the Manager process dies.""" | ||
worker_pid = get_worker_pid().result() | ||
|
||
kill_manager(sig) | ||
|
||
with pytest.raises(OSError): | ||
end = time.monotonic() + 5 | ||
while time.monotonic() < end: | ||
# Raises an exception if the process | ||
# does not exist | ||
os.kill(worker_pid, 0) | ||
time.sleep(.1) |