Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

agent: reduce directory errors, easier debugging, various improvements #818

Merged
merged 11 commits into from
Nov 9, 2023
21 changes: 8 additions & 13 deletions nixos/platform/agent.nix
Original file line number Diff line number Diff line change
@@ -191,23 +191,14 @@ in
commands = [
"${pkgs.fc.agent}/bin/fc-collect-garbage"
"${pkgs.fc.agent}/bin/fc-manage"
"${pkgs.fc.agent}/bin/fc-maintenance list"
"${pkgs.fc.agent}/bin/fc-maintenance show"
"${pkgs.fc.agent}/bin/fc-maintenance delete"
"${pkgs.fc.agent}/bin/fc-maintenance metrics"
"${pkgs.fc.agent}/bin/fc-maintenance check"
"${pkgs.fc.agent}/bin/fc-maintenance -v delete"
];
groups = [ "admins" "sudo-srv" "service" ];
}
{
commands = [ "${pkgs.fc.agent}/bin/fc-manage check" ];
groups = [ "sensuclient" ];
}
{
commands = [
"${pkgs.fc.agent}/bin/fc-maintenance metrics"
];
groups = [ "telegraf" ];
users = [ "sensuclient" ];
}
{
commands = [ "${pkgs.fc.agent}/bin/fc-postgresql check-autoupgrade-unexpected-dbs" ];
@@ -218,6 +209,10 @@ in
commands = [
"${pkgs.fc.agent}/bin/fc-maintenance run"
"${pkgs.fc.agent}/bin/fc-maintenance run --run-all-now"
"${pkgs.fc.agent}/bin/fc-maintenance schedule"
"${pkgs.fc.agent}/bin/fc-maintenance -v run"
"${pkgs.fc.agent}/bin/fc-maintenance -v run --run-all-now"
"${pkgs.fc.agent}/bin/fc-maintenance -v schedule"
];
groups = [ "admins" ];
}
@@ -402,14 +397,14 @@ in
};
fc-maintenance = {
notification = "fc-maintenance check failed.";
command = "sudo ${pkgs.fc.agent}/bin/fc-maintenance check";
command = "${pkgs.fc.agent}/bin/fc-maintenance check";
interval = 180;
};
};
};
flyingcircus.services.telegraf.inputs = {
exec = [{
commands = [ "/run/wrappers/bin/sudo ${pkgs.fc.agent}/bin/fc-maintenance metrics" ];
commands = [ "${pkgs.fc.agent}/bin/fc-maintenance metrics" ];
timeout = "10s";
data_format = "json";
json_name_key = "name";
19 changes: 18 additions & 1 deletion pkgs/fc/agent/default.nix
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
, xfsprogs
, pytest
, structlog
, enableSlurm ? stdenv.isLinux
}:

let
@@ -34,6 +35,20 @@ let
buildInputs = [ pytest structlog ];
};

stamina = py.buildPythonPackage rec {
pname = "stamina";
version = "23.1.0";
format = "pyproject";

src = fetchPypi {
inherit pname version;
hash = "sha256-sWzj1S1liqdduBP8amZht3Cr/qkV9yzaSOMl8qeFR4Y=";
};

nativeBuildInputs = with py; [ hatchling hatch-vcs hatch-fancy-pypi-readme ];
propagatedBuildInputs = with py; [ structlog tenacity typing-extensions ];
};

in
buildPythonPackage rec {
name = "fc-agent-${version}";
@@ -67,15 +82,17 @@ buildPythonPackage rec {
py.structlog
py.typer
py.pyyaml
stamina
util-linux
] ++ lib.optionals stdenv.isLinux [
dmidecode
gptfdisk
multipath-tools
py.pyslurm
py.pystemd
py.systemd
xfsprogs
] ++ lib.optionals enableSlurm [
py.pyslurm
];
dontStrip = true;
doCheck = true;
12 changes: 8 additions & 4 deletions pkgs/fc/agent/fc/conftest.py
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@ def agent_maintenance_config(tmp_path):


@fixture
def reqmanager(tmp_path, agent_maintenance_config):
def reqmanager(tmp_path, logger, agent_maintenance_config):
spooldir = tmp_path / "maintenance"
spooldir.mkdir()
enc_path = tmp_path / "enc.json"
@@ -44,6 +44,7 @@ def reqmanager(tmp_path, agent_maintenance_config):
spooldir=spooldir,
enc_path=enc_path,
config_file=agent_maintenance_config,
log=logger,
) as rm:
yield rm

@@ -70,9 +71,12 @@ def _request_population(n):

@fixture
def logger():
_logger = structlog.get_logger()
_logger.trace = lambda *a, **k: None
return _logger
# pytest-structlog patches away structlog.config, but we can still use
# structlog._config.configure...
# We need to do that to support our custom `trace` logging method.
# It fails with BoundLoggingFilteringAtNotset which is the default wrapper.
structlog._config.configure(wrapper_class=structlog.BoundLogger)
return structlog.get_logger()


@fixture
6 changes: 6 additions & 0 deletions pkgs/fc/agent/fc/maintenance/activity/update.py
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@
from fc.maintenance import state
from fc.maintenance.estimate import Estimate
from fc.util import nixos
from fc.util.logging import init_command_logging
from fc.util.nixos import UnitChanges

from ...util.channel import Channel
@@ -281,6 +282,8 @@ def run(self):
self.returncode = 0
return

init_command_logging(self.log)

system_path = nixos.build_system(
self.next_channel_url, log=self.log
)
@@ -305,6 +308,9 @@ def run(self):
next_environment=self.next_environment,
)

# No clean up of the command log file needed as we initialized
# logging only after checking that the activity changes the system.

self.returncode = 0

@property
9 changes: 4 additions & 5 deletions pkgs/fc/agent/fc/maintenance/activity/vm_change.py
Original file line number Diff line number Diff line change
@@ -177,12 +177,11 @@ def _update_reboot_needed(self):
else:
self.reboot_needed = None

def load(self):
def run(self):
self._update_reboot_needed()

# Running an VMChangeActivity is not needed, so no run method.
# The request manager handles the reboot required by this activity.
self.returncode = 0

def resume(self):
# There's nothing to do so we can safely "retry" this activity.
# run() just checks if the reboot is needed at the moment so we can safely
# retry this activity.
self.run()
Loading