Skip to content

Commit

Permalink
Fix: tools: crm_mon --daemonize should update when disconnected
Browse files Browse the repository at this point in the history
With crm_mon --daemonize, currently the output will continue to show the
last known status after the cluster is stopped on the local node. This
commit causes it to write "Not connected to CIB" (and more specific
details) to the output file:
* before the initial connection;
* as soon as the CIB connection is destroyed; and
* every time a reconnection attempt fails;

External agents are notified via traps rather than via output, so we can
ignore them.

We register the new message formatter functions directly instead of via
crm_mon_register_messages(). The reason is that
crm_mon_register_messages() is used for messages specific to the curses
format, so that they can be registered from within crm_mon.c. The new
crm-mon-disconnected formatter isn't used with console output; it's not
really necessary, and it's more complicated to implement (attempts so
far led to display issues after connection loss).

In the future it might make sense to rename crm_mon_register_messages()
in crm_mon_curses.c and reuse that name in crm_mon.c.

Closes T15

Signed-off-by: Reid Wahl <[email protected]>
  • Loading branch information
nrwahl2 committed Nov 3, 2022
1 parent 5d91b67 commit 2561f20
Showing 1 changed file with 91 additions and 8 deletions.
99 changes: 91 additions & 8 deletions tools/crm_mon.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,73 @@ static pcmk__supported_format_t formats[] = {
{ NULL, NULL, NULL }
};

PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "int")
static int
crm_mon_disconnected_default(pcmk__output_t *out, va_list args)
{
return pcmk_rc_no_output;
}

PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "int")
static int
crm_mon_disconnected_text(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
int rc = pcmk_rc_ok;

if (out->dest != stdout) {
out->reset(out);
}

if (state != pcmk_pacemakerd_state_invalid) {
rc = out->info(out, "Not connected to CIB%s%s (%s)",
(desc != NULL)? ": " : "", pcmk__s(desc, ""),
pcmk__pcmkd_state_enum2friendly(state));
} else {
rc = out->info(out, "Not connected to CIB%s%s",
(desc != NULL)? ": " : "", pcmk__s(desc, ""));
}

out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return rc;
}

PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "int")
static int
crm_mon_disconnected_xml(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = NULL;

if (out->dest != stdout) {
out->reset(out);
}

if (state != pcmk_pacemakerd_state_invalid) {
state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
}

pcmk__output_create_xml_node(out, "crm-mon-disconnected",
XML_ATTR_DESC, desc,
"pacemakerd-state", state_s,
NULL);

out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}

static pcmk__message_entry_t fmt_functions[] = {
{ "crm-mon-disconnected", "default", crm_mon_disconnected_default },
{ "crm-mon-disconnected", "html", crm_mon_disconnected_text },
{ "crm-mon-disconnected", "text", crm_mon_disconnected_text },
{ "crm-mon-disconnected", "xml", crm_mon_disconnected_xml },
{ NULL, NULL, NULL },
};

/* Define exit codes for monitoring-compatible output
* For nagios plugins, the possibilities are
* OK=0, WARN=1, CRIT=2, and UNKNOWN=3
Expand Down Expand Up @@ -660,6 +727,9 @@ reconnect_after_timeout(gpointer data)
return G_SOURCE_REMOVE;
}

out->message(out, "crm-mon-disconnected",
"Latest connection attempt failed", pcmkd_state);

reconnect_timer = g_timeout_add(options.reconnect_ms,
reconnect_after_timeout, NULL);
return G_SOURCE_REMOVE;
Expand All @@ -672,7 +742,16 @@ reconnect_after_timeout(gpointer data)
static void
mon_cib_connection_destroy(gpointer user_data)
{
out->transient(out, "\nConnection to the cluster lost");
const char *msg = "Connection to the cluster lost";

pcmkd_state = pcmk_pacemakerd_state_invalid;

/* No crm-mon-disconnected message for console; a working implementation
* is not currently worth the effort
*/
out->transient(out, "\n%s", msg);

out->message(out, "crm-mon-disconnected", msg, pcmkd_state);

if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
Expand All @@ -695,7 +774,6 @@ mon_cib_connection_destroy(gpointer user_data)
reconnect_timer = g_timeout_add(options.reconnect_ms,
reconnect_after_timeout, NULL);
}
return;
}

/* Signal handler installed into the mainloop for normal program shutdown */
Expand Down Expand Up @@ -1435,6 +1513,9 @@ main(int argc, char **argv)
pe__register_messages(out);
stonith__register_messages(out);

// Messages internal to this file, nothing curses-specific
pcmk__register_messages(out, fmt_functions);

if (args->version) {
out->version(out, false);
return clean_up(CRM_EX_OK);
Expand Down Expand Up @@ -1481,13 +1562,9 @@ main(int argc, char **argv)
one_shot();
}

out->message(out, "crm-mon-disconnected",
"Waiting for initial connection", pcmkd_state);
do {
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
out->transient(out, "Connecting to cluster...");
rc = connect_daemons();

Expand All @@ -1499,6 +1576,12 @@ main(int argc, char **argv)

// Give some time to view all output even if we won't retry
pcmk__sleep_ms(options.reconnect_ms);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
}
} while ((rc == ENOTCONN) || (rc == ECONNREFUSED));

Expand Down

0 comments on commit 2561f20

Please sign in to comment.