Skip to content

Commit

Permalink
agent: fc-maintenance allows non-invasive commands for non-root
Browse files Browse the repository at this point in the history
We now separate non-invasive and invasive code paths better.
Moves around existing methods to a more logical order, grouping
invasive and non-invasive methods.

Some read-only commands for showing requests, metrics and the Sensu
check can now be called by non-root users without the need for sudo.

Clean up some uses of rm.scan() which are now handled by
__enter__ which must be called for all invasive methods. This also
takes care of loading requests and creating missing directory now.

Add missing @require_lock decorators for invasive methods and give
internal methods a underscore prefix. The latter don't have the
decorator but it should be fairly obvious how to use them.

PL-131813
  • Loading branch information
dpausp committed Nov 9, 2023
1 parent 9db289d commit 12abf01
Show file tree
Hide file tree
Showing 7 changed files with 569 additions and 328 deletions.
21 changes: 8 additions & 13 deletions nixos/platform/agent.nix
Original file line number Diff line number Diff line change
Expand Up @@ -191,23 +191,14 @@ in
commands = [
"${pkgs.fc.agent}/bin/fc-collect-garbage"
"${pkgs.fc.agent}/bin/fc-manage"
"${pkgs.fc.agent}/bin/fc-maintenance list"
"${pkgs.fc.agent}/bin/fc-maintenance show"
"${pkgs.fc.agent}/bin/fc-maintenance delete"
"${pkgs.fc.agent}/bin/fc-maintenance metrics"
"${pkgs.fc.agent}/bin/fc-maintenance check"
"${pkgs.fc.agent}/bin/fc-maintenance -v delete"
];
groups = [ "admins" "sudo-srv" "service" ];
}
{
commands = [ "${pkgs.fc.agent}/bin/fc-manage check" ];
groups = [ "sensuclient" ];
}
{
commands = [
"${pkgs.fc.agent}/bin/fc-maintenance metrics"
];
groups = [ "telegraf" ];
users = [ "sensuclient" ];
}
{
commands = [ "${pkgs.fc.agent}/bin/fc-postgresql check-autoupgrade-unexpected-dbs" ];
Expand All @@ -218,6 +209,10 @@ in
commands = [
"${pkgs.fc.agent}/bin/fc-maintenance run"
"${pkgs.fc.agent}/bin/fc-maintenance run --run-all-now"
"${pkgs.fc.agent}/bin/fc-maintenance schedule"
"${pkgs.fc.agent}/bin/fc-maintenance -v run"
"${pkgs.fc.agent}/bin/fc-maintenance -v run --run-all-now"
"${pkgs.fc.agent}/bin/fc-maintenance -v schedule"
];
groups = [ "admins" ];
}
Expand Down Expand Up @@ -402,14 +397,14 @@ in
};
fc-maintenance = {
notification = "fc-maintenance check failed.";
command = "sudo ${pkgs.fc.agent}/bin/fc-maintenance check";
command = "${pkgs.fc.agent}/bin/fc-maintenance check";
interval = 180;
};
};
};
flyingcircus.services.telegraf.inputs = {
exec = [{
commands = [ "/run/wrappers/bin/sudo ${pkgs.fc.agent}/bin/fc-maintenance metrics" ];
commands = [ "${pkgs.fc.agent}/bin/fc-maintenance metrics" ];
timeout = "10s";
data_format = "json";
json_name_key = "name";
Expand Down
157 changes: 85 additions & 72 deletions pkgs/fc/agent/fc/maintenance/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
init_command_logging,
init_logging,
)
from fc.util.typer_utils import FCTyperApp
from fc.util.typer_utils import FCTyperApp, requires_root, requires_sudo
from typer import Argument, Exit, Option

app = FCTyperApp("fc-maintenance")
Expand Down Expand Up @@ -70,40 +70,38 @@ def fc_maintenance(
),
spooldir: Path = Option(
file_okay=False,
writable=True,
default=DEFAULT_SPOOLDIR,
help="Directory to store maintenance request files.",
),
logdir: Path = Option(
exists=True,
file_okay=False,
writable=True,
default="/var/log",
help="Directory for log files. Must have a fc-agent subdirectory.",
),
lock_dir: Path = Option(
exists=True,
file_okay=False,
writable=True,
default="/run/lock",
help="Directory where the lock file for exclusive operations should be "
"placed.",
),
config_file: Path = Option(
dir_okay=False,
readable=True,
default=DEFAULT_CONFIG_FILE,
help="Path to the agent config file.",
),
# Normal users cannot read the default file but that's ok for many commands.
enc_path: Path = Option(
dir_okay=False,
readable=True,
readable=False,
default="/etc/nixos/enc.json",
help="Path to enc.json",
),
):
"""
Manage maintenance requests for this machine.
Some sub commands must be run with sudo or as root because they may change
request state. They are marked with `[sudo]` or `[root]`.
"""
global context
global rm
Expand Down Expand Up @@ -131,9 +129,19 @@ def fc_maintenance(


@app.command()
@requires_sudo
def schedule():
"""[sudo] Schedule all requests."""
log.info("fc-maintenance-schedule-start")
with rm:
rm.schedule()
log.info("fc-maintenance-schedule-finished")


@app.command()
@requires_sudo
def run(run_all_now: bool = False, force_run: bool = False):
"""
Run all maintenance activity requests that are due.
"""[sudo] Run all maintenance activity requests that are due.
Note that this does not schedule pending requests like running the script without
arguments in the past did. Run the schedule subcommand if you want to ensure that we
Expand Down Expand Up @@ -175,26 +183,10 @@ def run(run_all_now: bool = False, force_run: bool = False):
log.info("fc-maintenance-run-finished")


@app.command(name="list")
def list_cmd():
"""
List active maintenance requests.
"""
with rm:
rm.list()


@app.command()
def show(request_id: Optional[str] = Argument(None), dump_yaml: bool = False):
"""Show details for a request."""
with rm:
rm.show(request_id, dump_yaml)


@app.command()
@requires_sudo
def delete(request_id: str, archive: bool = True):
"""
Delete a request by request ID.
"""[sudo] Delete a request by request ID.
See the output of the `list` subcommand for available request IDs.
"""
Expand All @@ -204,13 +196,58 @@ def delete(request_id: str, archive: bool = True):
rm.archive()


# Commands that work for unprivileged users (non-invasive).


@app.command(name="list")
def list_cmd():
"""List active maintenance requests."""
rm.list_requests()


@app.command()
def schedule():
"""Schedule all requests."""
log.info("fc-maintenance-schedule-start")
with rm:
rm.schedule()
log.info("fc-maintenance-schedule-finished")
def show(
request_id: Optional[str] = Argument(
None, help="Full request ID or a prefix to search for."
),
dump_yaml: bool = False,
):
"""Show details for a request.
Works for active and archived requests.
if `request_id` is given, active requests are searched first for an exact or
partial (prefix) request ID match. If nothing is found, archived requests
are tried.
If no `request_id` is given, the most recently added active request is shown.
"""
rm.show_request(request_id, dump_yaml)


@app.command()
def check():
"""Detect maintenance and request execution problems."""
fc.util.logging.init_logging(
context.verbose, context.logdir, log_to_console=context.verbose
)
try:
result = rm.check()
except Exception:
print("UNKNOWN: Exception occurred while running checks")
traceback.print_exc()
raise Exit(3)

print(result.format_output())
if result.exit_code:
raise Exit(result.exit_code)


@app.command()
def metrics():
"""Print metrics in telegraf JSON input format."""
jso = json.dumps(rm.get_metrics())
print(jso)


# Request subcommands
Expand All @@ -222,32 +259,33 @@ def schedule():
@request_app.callback(no_args_is_help=True)
def request_main():
"""
Create a new request (see sub commands).
[root] Create a new request (see sub commands).
"""


@request_app.command(name="script")
@requires_root
def run_script(comment: str, script: str, estimate: Optional[str] = None):
"""Request to run a script."""
"""[root] Request to run a script."""
request = Request(ShellScriptActivity(script), estimate, comment)
with rm:
rm.scan()
rm.add(request)


@request_app.command()
@requires_root
def reboot(comment: Optional[str] = None, cold_reboot: bool = False):
"""Request a reboot."""
"""[root] Request a reboot."""
action = "poweroff" if cold_reboot else "reboot"
request = Request(RebootActivity(action), comment=comment)
with rm:
rm.scan()
rm.add(request)


@request_app.command()
@requires_root
def system_properties():
"""Request reboot for changed system properties.
"""[root] Request reboot for changed system properties.
Runs applicable checks for the machine type (virtual/physical).
* Physical: kernel
Expand All @@ -259,7 +297,6 @@ def system_properties():
enc = load_enc(log, context.enc_path)

with rm:
rm.scan()
current_requests = rm.requests.values()

if enc["parameters"]["machine"] == "virtual":
Expand All @@ -272,8 +309,9 @@ def system_properties():


@request_app.command()
@requires_root
def update():
"""Request a system update.
"""[root] Request a system update.
Builds the system and prepares the update to be run in a maintenance
window by default.
Expand All @@ -287,7 +325,6 @@ def update():
init_command_logging(log, context.logdir)

with rm:
rm.scan()
current_requests = rm.requests.values()

with locked(log, context.lock_dir):
Expand All @@ -308,9 +345,8 @@ def update():
# Helper commands (not using reqmanager)


@app.command(
help="Check constraints on the state of machines in the same resource group."
)
@app.command()
@requires_root
def constraints(
in_service: list[str] = Option(
default=[],
Expand All @@ -324,6 +360,9 @@ def constraints(
),
),
):
"""[root] Check constraints on the state of machines in the same resource
group.
"""
log.info("fc-maintenance-constraints")

with directory_connection(context.enc_path) as directory:
Expand Down Expand Up @@ -352,31 +391,5 @@ def constraints(
log.debug("constraints-success")


@app.command()
def check():
fc.util.logging.init_logging(
context.verbose, context.logdir, log_to_console=context.verbose
)
try:
rm.scan()
result = rm.check()
except Exception:
print("UNKNOWN: Exception occurred while running checks")
traceback.print_exc()
raise Exit(3)

print(result.format_output())
if result.exit_code:
raise Exit(result.exit_code)


@app.command(help="Prints metrics in the telegraf JSON input format.")
def metrics():
rm.scan()
jso = json.dumps(rm.get_metrics())

print(jso)


if __name__ == "__main__":
app()
Loading

0 comments on commit 12abf01

Please sign in to comment.