Skip to content

Commit

Permalink
Correct pbench-tool-meister-client & -stop
Browse files Browse the repository at this point in the history
Prior to this commit, `pbench-tool-meister-start` was the only CLI
interface which was using the two forms for specifying the Redis server
and Tool Data Sink host/port for binding and connecting.  Both the CLI
commands for stopping the Tool Meister sub-system and communicating with
it incorrectly expected the presence of the `PBENCH_REDIS_SERVER` to
only specify a `<host>:<port>`, and also indicate that instance was not
managed by CLI commands on this controller.

This commit corrects that where both `pbench-tool-meister-client` and
`pbench-tool-meister-stop` now properly handle both forms of host/port
specification, and `-stop` determines whether or not the CLI is in
charge of orchestrating the server components by looking for the local
pid files.

This was accomplished via a series of refactorings to share code across
those three CLI commands:

  * Add and use `BaseReturnCode` in `lib/pbench/agent/utils.py`
  * Move BaseServer to `.../agent/utils.py`
  * Add a `RedisServer` class for `pbench-tool-meister-stop` to take
    advantage of the common parameter handling of the Redis server
    specification
  * In `pbench-tool-meister-client`, we use the new `RedisServerCommon`
    class shared with `-start` and `-stop`
  * We fix the new `BaseServer.kill()` to exit immediately after posting
    a `SIGKILL`
  * Add unit tests for all common code moved to `.../agent/utils.py`
  • Loading branch information
portante committed Apr 22, 2021
1 parent 5794fc1 commit 74ec530
Show file tree
Hide file tree
Showing 7 changed files with 506 additions and 266 deletions.
30 changes: 10 additions & 20 deletions agent/util-scripts/pbench-tool-meister-client
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,12 @@ import logging
import os
import sys

from pbench.agent.constants import (
cli_tm_allowed_actions,
cli_tm_channel_prefix,
def_redis_port,
)
from pbench.agent.constants import cli_tm_allowed_actions, cli_tm_channel_prefix
from pbench.agent.tool_meister_client import Client
from pbench.agent.utils import RedisServerCommon


def main(argv):
def main(argv: list) -> int:
"""Main program for the Tool Meister client CLI. The command line
arguments are:
Expand Down Expand Up @@ -73,23 +70,16 @@ def main(argv):
# properly shut down.
return 0

redis_server = os.environ.get("PBENCH_REDIS_SERVER", f"localhost:{def_redis_port}")
parts = redis_server.split(":", 1)
if len(parts) != 2:
logger.error("Bad Redis server specified, {!r}", redis_server)
return 1
redis_server_env = os.environ.get("PBENCH_REDIS_SERVER", "")
try:
redis_port = int(parts[1])
except Exception:
logger.error("Bad port for Redis server specified in {!r}", redis_server)
return 1
else:
redis_host = parts[0]
redis_server = RedisServerCommon(redis_server_env, "localhost")
except RedisServerCommon.Err as exc:
logger.error(str(exc))
return exc.return_code

# The Redis server is always running on the local host with the CLI.
with Client(
redis_host=redis_host,
redis_port=redis_port,
redis_host=redis_server.host,
redis_port=redis_server.port,
channel_prefix=cli_tm_channel_prefix,
logger=logger,
) as client:
Expand Down
177 changes: 13 additions & 164 deletions agent/util-scripts/pbench-tool-meister-start
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,10 @@ the benchmark execution environment:
- ssh-opts
"""

import errno
import json
import logging
import os
import shlex
import signal
import socket
import sys
import time
Expand All @@ -128,9 +126,12 @@ from pbench.agent.tool_meister import main as tm_main
from pbench.agent.tool_meister_client import Client
from pbench.agent.toolmetadata import ToolMetadata
from pbench.agent.utils import (
BaseReturnCode,
BaseServer,
cli_verify_sysinfo,
error_log,
info_log,
RedisServerCommon,
validate_hostname,
)

Expand All @@ -140,12 +141,11 @@ from pbench.agent.utils import (
_TDS_STARTUP_TIMEOUT = 60


class ReturnCode:
class ReturnCode(BaseReturnCode):
"""ReturnCode - symbolic return codes for the main program of
pbench-tool-meister-start.
"""

SUCCESS = 0
BADTOOLGROUP = 1
BADAGENTCONFIG = 2
MISSINGINSTALLDIR = 4
Expand Down Expand Up @@ -182,37 +182,8 @@ class ReturnCode:
BADFULLHOSTNAME = 37
BADHOSTNAME = 38

# Kill sub-codes
KILL_SUCCESS = 0
KILL_READEXC = 1
KILL_BADPID = 2
KILL_PIDNOTFOUND = 3
KILL_KILLERR = 4
KILL_KILLEXC = 5

class Err(RuntimeError):
"""Err - exception definition to capture return code as an attribute.
"""

def __init__(self, message: str, return_code: int):
"""Adds a return_code attribute to capture an integer representing
the return code a caller can pass along.
"""
super().__init__(message)
self.return_code = return_code

@staticmethod
def kill_ret_code(kill_code: int, ret_val: int):
"""kill_ret_code - return an integer return code made up of the given
kill code and a return value.
A kill code of 0 and return value of 42 is returned as 42.
A kill code of 5 and return value of 52 is returned as 542.
"""
return (kill_code * 100) + ret_val


def _waitpid(pid: int):
def _waitpid(pid: int) -> int:
"""Wrapper for os.waitpid()
Returns the exit status of the given process ID.
Expand Down Expand Up @@ -241,10 +212,10 @@ def start_tms_via_ssh(
tool_group: str,
ssh_opts: str,
full_hostname: str,
redis_server,
redis_server: RedisServerCommon,
redis_client: redis.Redis,
logger: logging.Logger,
):
) -> None:
"""start_tms_via_ssh - orchestrate the creation of local and remote Tool
Meister instances using ssh for those that are remote.
Expand Down Expand Up @@ -364,88 +335,6 @@ def start_tms_via_ssh(
)


class BaseServer:
"""BaseServer - abstract base class for common code shared between the
ToolDataSink and RedisServer classes.
"""

def_port = None
bad_port_code = None
bad_host_code = None
name = None

class Err(ReturnCode.Err):
"""BaseServer.Err - derived from ReturnCode.Err, specifically raised by
BaseServer and its derived classes.
"""

pass

def __init__(self, spec: str, def_host_name: str):
"""__init__ - from the given IP/port specification given, determine the
IP:port for binding (listening) and the IP:port for connecting.
The IP/port specification can be given in one of two forms:
- `<ip>:<port>'
* where the same ip address and port are used for binding and
connecting
- `<bind ip>:<port>;<connect ip>:<port>`
* where a semi-colon separates the bind ip/port from the connecting
ip/port
In either case, a missing port (bare colon, optional) indicates the
default port should be used. If no IP address is given, the default
host name is used.
No attempt is made to verify that the IP address resolves, or that it
is reachable, though we do check they are syntactically valid.
"""
_spec = spec if spec else def_host_name
parts = _spec.split(";", 1)
pairs = []
for part in parts:
host_port_parts = part.rsplit(":", 1)
if len(host_port_parts) == 1:
port = self.def_port
else:
try:
port = int(host_port_parts[1])
except ValueError as exc:
if host_port_parts[1] == "":
port = self.def_port
else:
raise self.Err(
f"Bad port specified for {self.name} in '{spec}'",
self.bad_port_ret_code,
) from exc
host = host_port_parts[0] if host_port_parts[0] else def_host_name
if host[0] == "[" and host[-1] == "]":
# Brackets are invalid for a host name, but might be used when
# specifying a port with an IPv6 address, strip them before we
# validate the host name.
host = host[1:-1]
if validate_hostname(host) != 0:
raise self.Err(
f"Bad host specified for {self.name} in '{spec}'",
self.bad_host_ret_code,
)
pairs.append((host, port))

self.bind_host, self.bind_port = pairs[0]
if len(pairs) == 2:
# Separate bind/connecting ip:port
self.host, self.port = pairs[1]
self._repr = f"{self.name} - {self.bind_host}:{self.bind_port} / {self.host}:{self.port}"
else:
assert len(pairs) == 1, "Logic bomb! unexpected pairs, {pairs!r}"
self.host, self.port = pairs[0]
self._repr = f"{self.name} - {self.host}:{self.port}"

def __repr__(self):
return self._repr


class ToolDataSink(BaseServer):
"""ToolDataSink - an encapsulation of the handling of the Tool Data Sink
specification and methods to optionally create and manage an instance.
Expand All @@ -461,10 +350,10 @@ class ToolDataSink(BaseServer):
exec_dir: Path,
full_hostname: str,
tds_param_key: str,
redis_server,
redis_server: RedisServerCommon,
redis_client: redis.Redis,
logger: logging.Logger,
):
) -> None:
assert (
self.host is not None
and self.port is not None
Expand Down Expand Up @@ -530,7 +419,7 @@ class ToolDataSink(BaseServer):
time.sleep(0.1)

@staticmethod
def wait(chan: RedisChannelSubscriber, logger: logging.Logger):
def wait(chan: RedisChannelSubscriber, logger: logging.Logger) -> int:
"""wait - Wait for the Tool Data Sink to report back success or
failure regarding the Tool Meister environment setup.
"""
Expand Down Expand Up @@ -559,15 +448,13 @@ class ToolDataSink(BaseServer):
return 0 if status == "success" else 1


class RedisServer(BaseServer):
class RedisServer(RedisServerCommon):
"""RedisServer - an encapsulation of the handling of the Redis server
specification and methods to optionally create and manage an instance.
"""

def_port = def_redis_port
bad_port_ret_code = ReturnCode.BADREDISPORT
bad_host_ret_code = ReturnCode.BADREDISHOST
name = "Redis server"

# Redis server configuration template for pbench's use
conf_tmpl = """bind {bind_host_names}
Expand All @@ -584,7 +471,7 @@ port {redis_port:d}
super().__init__(spec, def_host_name)
self.pid_file = None

def start(self, tm_dir: Path, full_hostname: str, logger: logging.Logger):
def start(self, tm_dir: Path, full_hostname: str, logger: logging.Logger) -> None:
"""start_redis - configure and start a Redis server.
Raises a BaseServer.Err exception if an error is encountered.
Expand Down Expand Up @@ -695,46 +582,8 @@ port {redis_port:d}
ReturnCode.REDISFAILED,
)

def kill(self, ret_val: int):
"""kill - attempt to KILL the running Redis server.
This method is a no-op if the server instance isn't managed by us.
Returns ReturnCode "enum" via the "kill" return code method.
"""
assert self.pid_file is not None, f"Logic bomb! Unexpected state: {self!r}"

try:
raw_pid = self.pid_file.read_text()
except Exception:
# No "pid" to kill
return ReturnCode.kill_ret_code(ReturnCode.KILL_READEXC, ret_val)
else:
try:
pid = int(raw_pid)
except ValueError:
# Bad pid value
return ReturnCode.kill_ret_code(ReturnCode.KILL_BADPID, ret_val)
try:
os.kill(pid, signal.SIGKILL)
except OSError as exc:
if exc.errno == errno.ESRCH:
# PID not found, ignore
return ReturnCode.kill_ret_code(
ReturnCode.KILL_PIDNOTFOUND, ret_val
)
else:
# Some error encountered trying to KILL the process.
return ReturnCode.kill_ret_code(ReturnCode.KILL_KILLERR, ret_val)
except Exception:
# Some other error encountered trying to KILL the process.
return ReturnCode.kill_ret_code(ReturnCode.KILL_KILLEXC, ret_val)
else:
# "successfully" KILL'd the give process.
return ReturnCode.kill_ret_code(ReturnCode.KILL_SUCCESS, ret_val)


def main(_prog: str, cli_params: Namespace):
def main(_prog: str, cli_params: Namespace) -> int:
"""Main program for tool meister start.
:cli_params: expects a CLI parameters object which has five attributes:
Expand Down
Loading

0 comments on commit 74ec530

Please sign in to comment.