diff --git a/agent/util-scripts/pbench-tool-meister-client b/agent/util-scripts/pbench-tool-meister-client index 3226264591..a6b6370ce7 100755 --- a/agent/util-scripts/pbench-tool-meister-client +++ b/agent/util-scripts/pbench-tool-meister-client @@ -10,15 +10,12 @@ import logging import os import sys -from pbench.agent.constants import ( - cli_tm_allowed_actions, - cli_tm_channel_prefix, - def_redis_port, -) +from pbench.agent.constants import cli_tm_allowed_actions, cli_tm_channel_prefix from pbench.agent.tool_meister_client import Client +from pbench.agent.utils import RedisServerCommon -def main(argv): +def main(argv: list) -> int: """Main program for the Tool Meister client CLI. The command line arguments are: @@ -73,23 +70,16 @@ def main(argv): # properly shut down. return 0 - redis_server = os.environ.get("PBENCH_REDIS_SERVER", f"localhost:{def_redis_port}") - parts = redis_server.split(":", 1) - if len(parts) != 2: - logger.error("Bad Redis server specified, {!r}", redis_server) - return 1 + redis_server_env = os.environ.get("PBENCH_REDIS_SERVER", "") try: - redis_port = int(parts[1]) - except Exception: - logger.error("Bad port for Redis server specified in {!r}", redis_server) - return 1 - else: - redis_host = parts[0] + redis_server = RedisServerCommon(redis_server_env, "localhost") + except RedisServerCommon.Err as exc: + logger.error(str(exc)) + return exc.return_code - # The Redis server is always running on the local host with the CLI. with Client( - redis_host=redis_host, - redis_port=redis_port, + redis_host=redis_server.host, + redis_port=redis_server.port, channel_prefix=cli_tm_channel_prefix, logger=logger, ) as client: diff --git a/agent/util-scripts/pbench-tool-meister-start b/agent/util-scripts/pbench-tool-meister-start index 0a911544ca..ab97099cff 100755 --- a/agent/util-scripts/pbench-tool-meister-start +++ b/agent/util-scripts/pbench-tool-meister-start @@ -97,12 +97,10 @@ the benchmark execution environment: - ssh-opts """ -import errno import json import logging import os import shlex -import signal import socket import sys import time @@ -128,9 +126,12 @@ from pbench.agent.tool_meister import main as tm_main from pbench.agent.tool_meister_client import Client from pbench.agent.toolmetadata import ToolMetadata from pbench.agent.utils import ( + BaseReturnCode, + BaseServer, cli_verify_sysinfo, error_log, info_log, + RedisServerCommon, validate_hostname, ) @@ -140,12 +141,11 @@ from pbench.agent.utils import ( _TDS_STARTUP_TIMEOUT = 60 -class ReturnCode: +class ReturnCode(BaseReturnCode): """ReturnCode - symbolic return codes for the main program of pbench-tool-meister-start. """ - SUCCESS = 0 BADTOOLGROUP = 1 BADAGENTCONFIG = 2 MISSINGINSTALLDIR = 4 @@ -182,37 +182,8 @@ class ReturnCode: BADFULLHOSTNAME = 37 BADHOSTNAME = 38 - # Kill sub-codes - KILL_SUCCESS = 0 - KILL_READEXC = 1 - KILL_BADPID = 2 - KILL_PIDNOTFOUND = 3 - KILL_KILLERR = 4 - KILL_KILLEXC = 5 - class Err(RuntimeError): - """Err - exception definition to capture return code as an attribute. - """ - - def __init__(self, message: str, return_code: int): - """Adds a return_code attribute to capture an integer representing - the return code a caller can pass along. - """ - super().__init__(message) - self.return_code = return_code - - @staticmethod - def kill_ret_code(kill_code: int, ret_val: int): - """kill_ret_code - return an integer return code made up of the given - kill code and a return value. - - A kill code of 0 and return value of 42 is returned as 42. - A kill code of 5 and return value of 52 is returned as 542. - """ - return (kill_code * 100) + ret_val - - -def _waitpid(pid: int): +def _waitpid(pid: int) -> int: """Wrapper for os.waitpid() Returns the exit status of the given process ID. @@ -241,10 +212,10 @@ def start_tms_via_ssh( tool_group: str, ssh_opts: str, full_hostname: str, - redis_server, + redis_server: RedisServerCommon, redis_client: redis.Redis, logger: logging.Logger, -): +) -> None: """start_tms_via_ssh - orchestrate the creation of local and remote Tool Meister instances using ssh for those that are remote. @@ -364,88 +335,6 @@ def start_tms_via_ssh( ) -class BaseServer: - """BaseServer - abstract base class for common code shared between the - ToolDataSink and RedisServer classes. - """ - - def_port = None - bad_port_code = None - bad_host_code = None - name = None - - class Err(ReturnCode.Err): - """BaseServer.Err - derived from ReturnCode.Err, specifically raised by - BaseServer and its derived classes. - """ - - pass - - def __init__(self, spec: str, def_host_name: str): - """__init__ - from the given IP/port specification given, determine the - IP:port for binding (listening) and the IP:port for connecting. - - The IP/port specification can be given in one of two forms: - - - `:' - * where the same ip address and port are used for binding and - connecting - - `:;:` - * where a semi-colon separates the bind ip/port from the connecting - ip/port - - In either case, a missing port (bare colon, optional) indicates the - default port should be used. If no IP address is given, the default - host name is used. - - No attempt is made to verify that the IP address resolves, or that it - is reachable, though we do check they are syntactically valid. - """ - _spec = spec if spec else def_host_name - parts = _spec.split(";", 1) - pairs = [] - for part in parts: - host_port_parts = part.rsplit(":", 1) - if len(host_port_parts) == 1: - port = self.def_port - else: - try: - port = int(host_port_parts[1]) - except ValueError as exc: - if host_port_parts[1] == "": - port = self.def_port - else: - raise self.Err( - f"Bad port specified for {self.name} in '{spec}'", - self.bad_port_ret_code, - ) from exc - host = host_port_parts[0] if host_port_parts[0] else def_host_name - if host[0] == "[" and host[-1] == "]": - # Brackets are invalid for a host name, but might be used when - # specifying a port with an IPv6 address, strip them before we - # validate the host name. - host = host[1:-1] - if validate_hostname(host) != 0: - raise self.Err( - f"Bad host specified for {self.name} in '{spec}'", - self.bad_host_ret_code, - ) - pairs.append((host, port)) - - self.bind_host, self.bind_port = pairs[0] - if len(pairs) == 2: - # Separate bind/connecting ip:port - self.host, self.port = pairs[1] - self._repr = f"{self.name} - {self.bind_host}:{self.bind_port} / {self.host}:{self.port}" - else: - assert len(pairs) == 1, "Logic bomb! unexpected pairs, {pairs!r}" - self.host, self.port = pairs[0] - self._repr = f"{self.name} - {self.host}:{self.port}" - - def __repr__(self): - return self._repr - - class ToolDataSink(BaseServer): """ToolDataSink - an encapsulation of the handling of the Tool Data Sink specification and methods to optionally create and manage an instance. @@ -461,10 +350,10 @@ class ToolDataSink(BaseServer): exec_dir: Path, full_hostname: str, tds_param_key: str, - redis_server, + redis_server: RedisServerCommon, redis_client: redis.Redis, logger: logging.Logger, - ): + ) -> None: assert ( self.host is not None and self.port is not None @@ -530,7 +419,7 @@ class ToolDataSink(BaseServer): time.sleep(0.1) @staticmethod - def wait(chan: RedisChannelSubscriber, logger: logging.Logger): + def wait(chan: RedisChannelSubscriber, logger: logging.Logger) -> int: """wait - Wait for the Tool Data Sink to report back success or failure regarding the Tool Meister environment setup. """ @@ -559,15 +448,13 @@ class ToolDataSink(BaseServer): return 0 if status == "success" else 1 -class RedisServer(BaseServer): +class RedisServer(RedisServerCommon): """RedisServer - an encapsulation of the handling of the Redis server specification and methods to optionally create and manage an instance. """ - def_port = def_redis_port bad_port_ret_code = ReturnCode.BADREDISPORT bad_host_ret_code = ReturnCode.BADREDISHOST - name = "Redis server" # Redis server configuration template for pbench's use conf_tmpl = """bind {bind_host_names} @@ -584,7 +471,7 @@ port {redis_port:d} super().__init__(spec, def_host_name) self.pid_file = None - def start(self, tm_dir: Path, full_hostname: str, logger: logging.Logger): + def start(self, tm_dir: Path, full_hostname: str, logger: logging.Logger) -> None: """start_redis - configure and start a Redis server. Raises a BaseServer.Err exception if an error is encountered. @@ -695,46 +582,8 @@ port {redis_port:d} ReturnCode.REDISFAILED, ) - def kill(self, ret_val: int): - """kill - attempt to KILL the running Redis server. - - This method is a no-op if the server instance isn't managed by us. - - Returns ReturnCode "enum" via the "kill" return code method. - """ - assert self.pid_file is not None, f"Logic bomb! Unexpected state: {self!r}" - - try: - raw_pid = self.pid_file.read_text() - except Exception: - # No "pid" to kill - return ReturnCode.kill_ret_code(ReturnCode.KILL_READEXC, ret_val) - else: - try: - pid = int(raw_pid) - except ValueError: - # Bad pid value - return ReturnCode.kill_ret_code(ReturnCode.KILL_BADPID, ret_val) - try: - os.kill(pid, signal.SIGKILL) - except OSError as exc: - if exc.errno == errno.ESRCH: - # PID not found, ignore - return ReturnCode.kill_ret_code( - ReturnCode.KILL_PIDNOTFOUND, ret_val - ) - else: - # Some error encountered trying to KILL the process. - return ReturnCode.kill_ret_code(ReturnCode.KILL_KILLERR, ret_val) - except Exception: - # Some other error encountered trying to KILL the process. - return ReturnCode.kill_ret_code(ReturnCode.KILL_KILLEXC, ret_val) - else: - # "successfully" KILL'd the give process. - return ReturnCode.kill_ret_code(ReturnCode.KILL_SUCCESS, ret_val) - -def main(_prog: str, cli_params: Namespace): +def main(_prog: str, cli_params: Namespace) -> int: """Main program for tool meister start. :cli_params: expects a CLI parameters object which has five attributes: diff --git a/agent/util-scripts/pbench-tool-meister-stop b/agent/util-scripts/pbench-tool-meister-stop index ce4f9b62b9..9b4aebbe29 100755 --- a/agent/util-scripts/pbench-tool-meister-stop +++ b/agent/util-scripts/pbench-tool-meister-stop @@ -9,59 +9,95 @@ stopping all local/remote tool meisters, closing down the local data sink, and finally the local redis server. """ -import errno import logging import os -import signal import sys import time -from argparse import ArgumentParser +from argparse import ArgumentParser, Namespace from pathlib import Path -from pbench.agent.constants import def_redis_port, cli_tm_channel_prefix +from pbench.agent.constants import cli_tm_channel_prefix from pbench.agent.tool_group import BadToolGroup, ToolGroup from pbench.agent.tool_meister_client import Client -from pbench.agent.utils import cli_verify_sysinfo, error_log, info_log +from pbench.agent.utils import ( + RedisServerCommon, + cli_verify_sysinfo, + error_log, + info_log, +) -def is_running(pid): - """Is the given PID running? +def is_running(pid: int) -> bool: + """is_running - Is the given PID running? See https://stackoverflow.com/questions/7653178/wait-until-a-certain-process-knowing-the-pid-end + + Return True if a PID is running, else False if not. """ try: os.kill(pid, 0) - except OSError as err: - if err.errno == errno.ESRCH: - return False + except ProcessLookupError: + return False return True -def wait_for_pid(pid): +def wait_for_pid(pid: int) -> None: """wait_for_pid - wait for a process to actually stop running. """ while is_running(pid): time.sleep(0.1) +class RedisServer(RedisServerCommon): + """RedisServer - an encapsulation of the handling of the Redis server + specification for pbench-tool-meister-stop. + + The constructor is enhanced to find the optional local Redis server pid + file, and the additional method, locally_managed(), keys off of its + presence. + """ + + def __init__(self, spec: str, benchmark_run_dir: Path, def_host_name: str): + super().__init__(spec, def_host_name) + try: + self.pid_file = ( + benchmark_run_dir / "tm" / f"redis_{self.def_port:d}.pid" + ).resolve(strict=True) + except FileNotFoundError: + pass + + def locally_managed(self) -> bool: + return self.pid_file is not None + + def graceful_shutdown( - benchmark_run_dir, full_hostname, group, redis_server_pid_file, logger -): - # The assumption/assertion here is that the tool meister "stop" command is - # run on the same node as the tool meister "start" command ran, creating - # the local Tool Data Sink and the optional local Tool Meister. We want to - # make sure anything "local" to this stop command is shut down gracefully - # before we report back to the user. If Tool Meisters from remote nodes - # have already reported that they have received the "terminate" message, - # then we trust they will shutdown gracefully themselves. + benchmark_run_dir: Path, + full_hostname: str, + group: str, + redis_server: RedisServer, + logger: logging.Logger, +) -> int: + """ + graceful_shutdown - attempt to cleanly shut down the previously created + Tool Data Sink, and the Redis server. + + The assumption/assertion here is that the tool meister "stop" command is run + on the same node as the tool meister "start" command ran, creating the local + Tool Data Sink and the optional local Tool Meister. We want to make sure + anything "local" to this stop command is shut down gracefully before we + report back to the user. If Tool Meisters from remote nodes have already + reported that they have received the "terminate" message, then we trust they + will shutdown gracefully themselves. + + Returns 0 on success, 1 on failure (logging any unexpected exceptions) + """ try: tds_pid_file = benchmark_run_dir / "tm" / "pbench-tool-data-sink.pid" try: pid_str = tds_pid_file.read_text() - except OSError as exc: - if exc.errno != errno.ENOENT: - raise + except FileNotFoundError: + pass else: tds_pid = int(pid_str) logger.debug("waiting for tool-data-sink (%d) to exit", tds_pid) @@ -76,9 +112,8 @@ def graceful_shutdown( ltm_pid_file = benchmark_run_dir / "tm" / f"tm-{group}-{full_hostname}.pid" try: pid_str = ltm_pid_file.read_text() - except OSError as exc: - if exc.errno != errno.ENOENT: - raise + except FileNotFoundError: + pass else: ltm_pid = int(pid_str) logger.debug("waiting for local tool-meister (%d) to exit", ltm_pid) @@ -89,29 +124,7 @@ def graceful_shutdown( # All was good so far, so we can terminate the redis server. try: - try: - pid_str = redis_server_pid_file.read_text() - except OSError as exc: - if exc.errno != errno.ENOENT: - raise - else: - redis_server_pid = int(pid_str) - pid_exists = True - timeout = time.time() + 60 - while pid_exists: - try: - os.kill(redis_server_pid, signal.SIGTERM) - except ProcessLookupError: - pid_exists = False - else: - if time.time() > timeout: - try: - os.kill(redis_server_pid, signal.SIGKILL) - except ProcessLookupError: - pid_exists = False - except Exception: - raise - time.sleep(0.1) + ret_val = redis_server.kill(ret_val) except Exception: logger.exception("Exception encountered terminating Redis server") ret_val = 1 @@ -119,7 +132,7 @@ def graceful_shutdown( return ret_val -def main(_prog, cli_params): +def main(_prog: str, cli_params: Namespace) -> int: """Main program for the tool meister stop CLI interface. Stopping the Tool Meisters involves four steps: @@ -183,36 +196,18 @@ def main(_prog, cli_params): logger.exception("failed to fetch required parameters from the environment") return 1 - if cli_params.redis_server is None: - # No Redis server was given, so look locally to see if we can find it. - # If no Redis server locally, we're done. - try: - redis_server_pid_file = ( - benchmark_run_dir / "tm" / f"redis_{def_redis_port:d}.pid" - ).resolve(strict=True) - except FileNotFoundError: - # No Redis server, nothing to do. - return 0 - else: - redis_host = "localhost" - redis_port = def_redis_port - else: - parts = cli_params.redis_server.split(":", 1) - if len(parts) != 2: - logger.error("Bad Redis server specified, '%s'", cli_params.redis_server) - return 1 - try: - redis_port = int(parts[1]) - except Exception: - logger.error("Bad Redis port specified, '%s'", cli_params.redis_server) - return 1 - else: - redis_host = parts[0] + try: + redis_server = RedisServer( + cli_params.redis_server, benchmark_run_dir, full_hostname + ) + except RedisServer.Err as exc: + logger.error(str(exc)) + return exc.return_code # The Redis server is always running on the local host with the CLI. with Client( - redis_host=redis_host, - redis_port=redis_port, + redis_host=redis_server.host, + redis_port=redis_server.port, channel_prefix=cli_tm_channel_prefix, logger=logger, ) as client: @@ -254,14 +249,14 @@ def main(_prog, cli_params): # just return the success/failure of the terminate operation. ret_val = end_ret_val if end_ret_val != 0 else term_ret_val - if cli_params.redis_server is None: + if redis_server.locally_managed(): # The client operations have finished, successful or unsuccessfully, # and we were not given an explicit Redis server to use. So the # previous pbench-tool-meister-start must have set up the local Tool # Data Sink, Tool Meister (if registered), and the Redis server. It is # our responsibility to make sure these processes shut down correctly. shutdown_ret_val = graceful_shutdown( - benchmark_run_dir, full_hostname, group, redis_server_pid_file, logger + benchmark_run_dir, full_hostname, group, redis_server, logger ) if ret_val == 0: # If client termination was successful, report the status of the diff --git a/agent/util-scripts/test-bin/test-client-tool-meister b/agent/util-scripts/test-bin/test-client-tool-meister index d48e910b70..f570c1bfb6 100755 --- a/agent/util-scripts/test-bin/test-client-tool-meister +++ b/agent/util-scripts/test-bin/test-client-tool-meister @@ -76,7 +76,10 @@ function _timeout { source ${_script_path}/common-tm-cleanup -PBENCH_REDIS_SERVER="localhost" PBENCH_TOOL_DATA_SINK="localhost" _timeout pbench-tool-meister-start --sysinfo="${sysinfo}" "${group}" +export PBENCH_REDIS_SERVER="localhost:17001;localhost:17001" +export PBENCH_TOOL_DATA_SINK="localhost;localhost" + +_timeout pbench-tool-meister-start --sysinfo="${sysinfo}" "${group}" status=${?} if [[ ${status} -ne 0 ]]; then printf -- "ERROR - \"pbench-tool-meister-start --sysinfo='%s' '%s'\" failed to execute successfully (exit code: %s)\n" "${sysinfo}" "${group}" "${status}" >&2 diff --git a/agent/util-scripts/test-bin/test-start-stop-tool-meister b/agent/util-scripts/test-bin/test-start-stop-tool-meister index bbd6a4284e..30ef26c53a 100755 --- a/agent/util-scripts/test-bin/test-start-stop-tool-meister +++ b/agent/util-scripts/test-bin/test-start-stop-tool-meister @@ -49,14 +49,14 @@ fi source ${_script_path}/common-tm-cleanup -_PBENCH_TOOL_MEISTER_LOG_LEVEL="debug" _PBENCH_TOOL_DATA_SINK_LOG_LEVEL="debug" _PBENCH_TOOL_MEISTER_START_LOG_LEVEL="debug" PBENCH_REDIS_SERVER="localhost" PBENCH_TOOL_DATA_SINK="localhost" pbench-tool-meister-start --sysinfo="none" ${group} +_PBENCH_TOOL_MEISTER_LOG_LEVEL="debug" _PBENCH_TOOL_DATA_SINK_LOG_LEVEL="debug" _PBENCH_TOOL_MEISTER_START_LOG_LEVEL="debug" PBENCH_REDIS_SERVER="localhost" PBENCH_TOOL_DATA_SINK="localhost:8080;localhost:8080" pbench-tool-meister-start --sysinfo="none" ${group} status=${?} if [[ ${status} -ne 0 ]]; then printf -- "\"pbench-tool-meister-start ${group}\" failed to execute successfully (exit code: ${status})\n" >&2 exit 1 fi -_PBENCH_TOOL_MEISTER_STOP_LOG_LEVEL="debug" pbench-tool-meister-stop --sysinfo="none" ${group} +_PBENCH_TOOL_MEISTER_STOP_LOG_LEVEL="debug" PBENCH_REDIS_SERVER="localhost:17001;localhost:17001" PBENCH_TOOL_DATA_SINK="localhost" pbench-tool-meister-stop --sysinfo="none" ${group} status=${?} if [[ ${status} -ne 0 ]]; then printf -- "\"pbench-tool-meister-stop\" failed to execute successfully (exit code: ${status})\n" >&2 diff --git a/lib/pbench/agent/utils.py b/lib/pbench/agent/utils.py index bf38f0b61c..a4822d29c5 100644 --- a/lib/pbench/agent/utils.py +++ b/lib/pbench/agent/utils.py @@ -2,18 +2,210 @@ import logging import os import re +import signal import subprocess import sys +import time from datetime import datetime from pbench.agent.constants import ( + def_redis_port, sysinfo_opts_available, sysinfo_opts_convenience, sysinfo_opts_default, ) +class BaseReturnCode: + """BaseReturnCode - base class of common methods for symbolic return codes + for main programs. + """ + + # Common success code is zero. + SUCCESS = 0 + + # Common kill sub-codes + KILL_SUCCESS = 0 + KILL_READEXC = 1 + KILL_BADPID = 2 + KILL_READERR = 3 + KILL_TERMEXC = 4 + KILL_KILLEXC = 5 + + class Err(RuntimeError): + """Err - exception definition to capture return code as an attribute. + """ + + def __init__(self, message: str, return_code: int): + """Adds a return_code attribute to capture an integer representing + the return code a caller can pass along. + """ + super().__init__(message) + self.return_code = return_code + + @staticmethod + def kill_ret_code(kill_code: int, ret_val: int): + """kill_ret_code - return an integer return code made up of the given + kill code and a return value. + + A kill code of 0 and return value of 42 is returned as 42. + A kill code of 5 and return value of 52 is returned as 542. + """ + return (kill_code * 100) + ret_val + + +class BaseServer: + """BaseServer - abstract base class for common code shared between the + ToolDataSink and RedisServer classes. + """ + + def_port = None + bad_port_code = None + bad_host_code = None + name = None + + class Err(BaseReturnCode.Err): + """BaseServer.Err - derived from BaseReturnCode.Err, specifically + raised by BaseServer and its derived classes. + """ + + pass + + def __init__(self, spec: str, def_host_name: str): + """__init__ - from the given IP/port specification, determine the + IP:port for binding (listening) and the IP:port for connecting. + + The IP/port specification can be given in one of two forms: + + - `:' + * where the same ip address and port are used for binding and + connecting + - `:;:` + * where a semi-colon separates the bind ip/port from the connecting + ip/port + + In either case, a missing port (bare colon, optional) indicates the + default port should be used. If no IP address is given, the default + host name is used. + + No attempt is made to verify that the IP address resolves, or that it + is reachable, though we do check they are syntactically valid. + """ + assert ( + def_host_name + ), f"Logic bomb! Default host name required: {spec!r}, {def_host_name!r}" + _spec = spec if spec else def_host_name + parts = _spec.split(";", 1) + pairs = [] + for part in parts: + host_port_parts = part.rsplit(":", 1) + if len(host_port_parts) == 1: + port = self.def_port + else: + try: + port = int(host_port_parts[1]) + except ValueError as exc: + if host_port_parts[1] == "": + port = self.def_port + else: + raise self.Err( + f"Bad port specified for {self.name} in '{spec}'", + self.bad_port_ret_code, + ) from exc + host = host_port_parts[0] if host_port_parts[0] else def_host_name + if host[0] == "[" and host[-1] == "]": + # Brackets are invalid for a host name, but might be used when + # specifying a port with an IPv6 address, strip them before we + # validate the host name. + host = host[1:-1] + if validate_hostname(host) != 0: + raise self.Err( + f"Bad host specified for {self.name} in '{spec}'", + self.bad_host_ret_code, + ) + pairs.append((host, port)) + + self.bind_host, self.bind_port = pairs[0] + if len(pairs) == 2: + # Separate bind/connecting ip:port + self.host, self.port = pairs[1] + self._repr = f"{self.name} - {self.bind_host}:{self.bind_port} / {self.host}:{self.port}" + else: + assert len(pairs) == 1, "Logic bomb! unexpected pairs, {pairs!r}" + self.host, self.port = pairs[0] + self._repr = f"{self.name} - {self.host}:{self.port}" + + self.pid_file = None + + def __repr__(self): + return self._repr + + def kill(self, ret_val: int): + """kill - attempt to KILL the running Redis server. + + This method is a no-op if the server instance isn't managed by us. + + Returns BaseReturnCode "enum" via the "kill" return code method. + """ + assert self.pid_file is not None, f"Logic bomb! Unexpected state: {self!r}" + + try: + raw_pid = self.pid_file.read_text() + except FileNotFoundError: + return BaseReturnCode.kill_ret_code(BaseReturnCode.KILL_SUCCESS, ret_val) + except OSError: + return BaseReturnCode.kill_ret_code(BaseReturnCode.KILL_READERR, ret_val) + except Exception: + # No "pid" to kill + return BaseReturnCode.kill_ret_code(BaseReturnCode.KILL_READEXC, ret_val) + + try: + pid = int(raw_pid) + except ValueError: + # Bad pid value + return BaseReturnCode.kill_ret_code(BaseReturnCode.KILL_BADPID, ret_val) + + pid_exists = True + timeout = time.time() + 60 + while pid_exists: + try: + os.kill(pid, signal.SIGTERM) + except ProcessLookupError: + pid_exists = False + except Exception: + # Some other error encountered trying to KILL the process. + return BaseReturnCode.kill_ret_code( + BaseReturnCode.KILL_TERMEXC, ret_val + ) + else: + if time.time() > timeout: + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + pass + except Exception: + return BaseReturnCode.kill_ret_code( + BaseReturnCode.KILL_KILLEXC, ret_val + ) + break + time.sleep(0.1) + + # "successfully" KILL'd the give process. + return BaseReturnCode.kill_ret_code(BaseReturnCode.KILL_SUCCESS, ret_val) + + +class RedisServerCommon(BaseServer): + """RedisServerCommon - an encapsulation of the common handling of the Redis + server specification. + """ + + def_port = def_redis_port + bad_port_ret_code = 1 + bad_host_ret_code = 1 + name = "Redis server" + + def setup_logging(debug, logfile): """Setup logging for client :param debug: Turn on debug logging diff --git a/lib/pbench/test/unit/agent/test_utils.py b/lib/pbench/test/unit/agent/test_utils.py new file mode 100644 index 0000000000..9273e96063 --- /dev/null +++ b/lib/pbench/test/unit/agent/test_utils.py @@ -0,0 +1,211 @@ +"""Tests for the utils module. +""" + +import os +import pathlib +import pytest +import signal +import time + +from pbench.agent.utils import BaseServer, BaseReturnCode + + +class OurServer(BaseServer): + def_port = 4242 + bad_port_ret_code = 42 + bad_host_ret_code = 43 + name = "forty-two" + + +class TestBaseServer: + """Verify the utils BaseServer class. + """ + + def test_constructor(self): + with pytest.raises(AssertionError): + bs = OurServer("", "") + + for arg1, arg2 in [("", "localhost"), ("localhost", "localhost")]: + bs = OurServer(arg1, arg2) + assert bs.pid_file is None + assert bs.host == "localhost" + assert bs.port == OurServer.def_port + assert bs.bind_host == bs.host + assert bs.bind_port == bs.port + assert repr(bs) == f"forty-two - localhost:{OurServer.def_port:d}" + + _def = "localhost" + + with pytest.raises(OurServer.Err) as excinfo: + bs = OurServer("bad_host.example.com", _def) + assert ( + excinfo.value.return_code == OurServer.bad_host_ret_code + ), f"{excinfo.value!r}" + + with pytest.raises(OurServer.Err) as excinfo: + bs = OurServer("bad-port.example.com:bad", _def) + assert ( + excinfo.value.return_code == OurServer.bad_port_ret_code + ), f"{excinfo.value!r}" + + for arg1, exp_port in [ + ("host.example.com:2345", 2345), + ("host.example.com:", 4242), + ]: + bs = OurServer(arg1, _def) + assert bs.pid_file is None + assert bs.host == "host.example.com" + assert bs.port == exp_port + assert bs.bind_host == bs.host + assert bs.bind_port == bs.port + assert repr(bs) == f"forty-two - host.example.com:{exp_port:d}" + + bs = OurServer(":2345", _def) + assert bs.pid_file is None + assert bs.host == "localhost" + assert bs.port == 2345 + assert bs.bind_host == bs.host + assert bs.bind_port == bs.port + assert repr(bs) == "forty-two - localhost:2345" + + bs = OurServer("127.0.0.42:4567", _def) + assert bs.pid_file is None + assert bs.host == "127.0.0.42" + assert bs.port == 4567 + assert bs.bind_host == bs.host + assert bs.bind_port == bs.port + assert repr(bs) == "forty-two - 127.0.0.42:4567" + + bs = OurServer("[127::42]:4567", _def) + assert bs.pid_file is None + assert bs.host == "127::42" + assert bs.port == 4567 + assert bs.bind_host == bs.host + assert bs.bind_port == bs.port + assert repr(bs) == "forty-two - 127::42:4567" + + bs = OurServer("bind.example.com:2345;host.example.com:6789", _def) + assert bs.pid_file is None + assert bs.host == "host.example.com" + assert bs.port == 6789 + assert bs.bind_host == "bind.example.com" + assert bs.bind_port == 2345 + assert repr(bs) == "forty-two - bind.example.com:2345 / host.example.com:6789" + + def test_kill(self, pytestconfig, monkeypatch): + bs = OurServer("localhost", "localhost") + with pytest.raises(AssertionError): + bs.kill(1) + + bs = OurServer("localhost", "localhost") + TMP = pathlib.Path(pytestconfig.cache.get("TMP", None)) + pidfile = TMP / "test.pid" + pidfile.write_text("12345") + bs.pid_file = pidfile + ret = bs.kill(42) + assert ret == 42 + + pidfile.write_text("badpid") + ret = bs.kill(42) + assert ret == (BaseReturnCode.KILL_BADPID * 100) + 42 + + bs.pid_file = TMP / "enoent.pid" + ret = bs.kill(42) + assert ret == 42 + + class MockPath: + def __init__(self, exc: Exception): + self._exc = exc + + def read_text(self): + raise self._exc + + bs.pid_file = MockPath(OSError(13, "fake oserror")) + ret = bs.kill(42) + assert ret == 342 + + bs.pid_file = MockPath(Exception("fake exception")) + ret = bs.kill(42) + assert ret == 142 + + class MockTime: + def __init__(self): + self._clock = 0 + + def time(self, *args, **kwargs): + self._clock += 1 + return self._clock + + def sleep(self, *args, **kwargs): + return + + class MockKill: + behaviors = { + "1001": (True, False, True, False, False, False), + "1002": (False, False, False, False, False, False), + "1003": (True, True, False, True, False, False), + "1004": (True, False, False, True, False, False), + "1005": (True, False, False, False, True, False), + "1006": (True, False, False, False, False, True), + } + + def __init__(self, behavior): + ( + self.pid_exists_term, + self.pid_exists_kill, + self.pid_killed_by_term, + self.pid_killed_by_kill, + self.pid_kill_term_exc, + self.pid_kill_kill_exc, + ) = self.behaviors[behavior] + + def kill(self, pid, sig): + if sig == signal.SIGTERM: + if self.pid_kill_term_exc: + raise Exception("term") + if self.pid_exists_term: + if self.pid_killed_by_term: + self.pid_exists_term = False + self.pid_exists_kill = False + else: + raise ProcessLookupError(pid) + else: + assert sig == signal.SIGKILL + if self.pid_kill_kill_exc: + raise Exception("kill") + if self.pid_exists_kill: + if self.pid_killed_by_kill: + self.pid_exists_term = False + self.pid_exists_kill = False + else: + raise ProcessLookupError(pid) + + bs.pid_file = pidfile + + test_cases = [ + ("1001", 42, "Kill a pid that is found, successfully"), + ("1002", 42, "Kill a pid that is not found, successfully"), + ( + "1003", + 42, + "Kill a pid that is found, successfully," + " where the SIGKILL successfully kills it", + ), + ( + "1004", + 42, + "Kill a pid that is found, unsuccessfully," + " where the SIGKILL does not find it", + ), + ("1005", 442, "Exception raised killing a pid w TERM"), + ("1006", 542, "Exception raised killing a pid w KILL"), + ] + for pid_text, ret_code, desc in test_cases: + pidfile.write_text(pid_text) + mock_time = MockTime() + mock_kill = MockKill(pid_text) + monkeypatch.setattr(os, "kill", mock_kill.kill) + monkeypatch.setattr(time, "time", mock_time.time) + monkeypatch.setattr(time, "sleep", mock_time.sleep) + ret = bs.kill(42) + assert ret == ret_code, f"{desc} FAILED, {pid_text!r}, {ret_code!r}"