Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: libpacemaker: crm_mon --one-shot can fail during pacemaker shutdown #2902

Merged
merged 17 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
160538e
API: libpacemaker: pcmk_pacemakerd_status() ipc_name arg is now const
nrwahl2 Oct 10, 2022
7758635
Feature: pacemakerd: New pcmk__pcmkd_state_enum2friendly() function
nrwahl2 Oct 10, 2022
7156177
Low: libcrmcommon: Check invalid time value in pacemakerd API reply
nrwahl2 Oct 10, 2022
da2e667
Low: libpacemaker: Correct default for pinged_buf in pacemakerd_event_cb
nrwahl2 Oct 10, 2022
62affe3
Refactor: libpacemaker: Improve return codes in pcmk__pacemakerd_status
nrwahl2 Oct 11, 2022
2221545
Feature: libpacemaker: pacemakerd-health message accepts state
nrwahl2 Oct 10, 2022
e7f2a33
Feature: libpacemaker: pcmk__pacemakerd_status() can return pcmkd state
nrwahl2 Oct 10, 2022
0e35bf2
Fix: libpacemaker: Memory leak in pcmk_cluster_queries.c:ipc_connect()
nrwahl2 Oct 10, 2022
bdc98fa
Doc: libpe_status: Replace old funcname in pe__build_rsc_list() comment
nrwahl2 Oct 11, 2022
d5f57ec
Refactor: libpacemaker: Clarify pointer arguments in pcmk_status.c
nrwahl2 Oct 11, 2022
a8697cb
Feature: libpacemaker: HTML formatter for pacemakerd-health message
nrwahl2 Oct 11, 2022
494e64c
Low: schemas: Copy API schemas in preparation for changes
nrwahl2 Oct 11, 2022
a640da0
Low: schemas: Add pacemakerd-health schema in preparation for fix
nrwahl2 Oct 11, 2022
92e4bc2
Low: libpacemaker: Fix pacemakerd-health XML output
nrwahl2 Oct 11, 2022
a6ec43e
Refactor: libpacemaker: Default to sync dispatch in pcmk_cluster_queries
nrwahl2 Oct 11, 2022
2d4a36c
Fix: tools: crm_mon --one-shot fails while pacemaker is shutting down
nrwahl2 Oct 11, 2022
a165cdf
Low: libpacemaker: Correct sys_from default in pacemakerd_health()
nrwahl2 Oct 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/crm/common/ipc_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ extern "C" {

#include <crm_config.h> // HAVE_GETPEEREID
#include <crm/common/ipc.h>
#include <crm/common/ipc_pacemakerd.h> // enum pcmk_pacemakerd_state
#include <crm/common/mainloop.h> // mainloop_io_t

/*
Expand Down Expand Up @@ -278,6 +279,8 @@ pcmk__ipc_sys_name(const char *ipc_name, const char *fallback)
return ipc_name ? ipc_name : ((crm_system_name ? crm_system_name : fallback));
}

const char *pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state);

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion include/crm/common/output_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ extern "C" {
*/


# define PCMK__API_VERSION "2.24"
# define PCMK__API_VERSION "2.25"

#if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS)
# define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS)))
Expand Down
19 changes: 13 additions & 6 deletions include/pacemaker.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,22 @@ int pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms)
void pcmk_free_injections(pcmk_injections_t *injections);

/*!
* \brief Get pacemakerd status
*
* \param[in,out] xml The destination for the result, as an XML tree.
* \param[in] ipc_name IPC name for request
* \param[in] message_timeout_ms Message timeout
* \brief Get and output \p pacemakerd status
*
* \param[in,out] xml Destination for the result, as an XML tree
* \param[in] ipc_name IPC name for request
* \param[in] message_timeout_ms How long to wait for a reply from the
* \p pacemakerd API. If 0,
* \p pcmk_ipc_dispatch_sync will be used.
* If positive, \p pcmk_ipc_dispatch_main
* will be used, and a new mainloop will be
* created for this purpose (freed before
* return).
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be better to replace the mainloop stuff in pcmk_cluster_queries.c with pcmk_ipc_dispatch_poll when we need a timeout. That way we never have to worry about mainloop conflicts.

If so, it'll go in a separate PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, as long as anything crm_mon needs still works

Copy link
Contributor Author

@nrwahl2 nrwahl2 Oct 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack. crm_mon already uses polling (timeout == reconnect_ms / 2) instead of sync or mainloop, for its console and daemonized modes. one-shot can continue using sync.

*
* \return Standard Pacemaker return code
*/
int pcmk_pacemakerd_status(xmlNodePtr *xml, char *ipc_name, unsigned int message_timeout_ms);
int pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name,
unsigned int message_timeout_ms);

/*!
* \brief Calculate and output resource operation digests
Expand Down
4 changes: 3 additions & 1 deletion include/pcmki/pcmki_cluster_queries.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

int pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_timeout_ms);
int pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms);
int pcmk__pacemakerd_status(pcmk__output_t *out, char *ipc_name, guint message_timeout_ms);
int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name,
guint message_timeout_ms,
enum pcmk_pacemakerd_state *state);
int pcmk__list_nodes(pcmk__output_t *out, char *node_types, gboolean bash_export);

#endif
18 changes: 11 additions & 7 deletions include/pcmki/pcmki_status.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,19 @@ extern "C" {
*/
int pcmk__output_simple_status(pcmk__output_t *out, pe_working_set_t *data_set);

int pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *st, cib_t *cib,
xmlNode *current_cib, enum pcmk__fence_history fence_history,
uint32_t show, uint32_t show_opts, char *only_node,
char *only_rsc, const char *neg_location_prefix,
int pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith,
cib_t *cib, xmlNode *current_cib,
enum pcmk__fence_history fence_history,
uint32_t show, uint32_t show_opts,
const char *only_node, const char *only_rsc,
const char *neg_location_prefix,
bool simple_output);

int pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_history,
uint32_t show, uint32_t show_opts, char *only_node, char *only_rsc,
const char *neg_location_prefix, bool simple_output);
int pcmk__status(pcmk__output_t *out, cib_t *cib,
enum pcmk__fence_history fence_history, uint32_t show,
uint32_t show_opts, const char *only_node,
const char *only_rsc, const char *neg_location_prefix,
bool simple_output);

#ifdef __cplusplus
}
Expand Down
35 changes: 34 additions & 1 deletion lib/common/ipc_pacemakerd.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,39 @@ pcmk_pacemakerd_api_daemon_state_enum2text(
return "invalid";
}

/*!
* \internal
* \brief Return a friendly string representation of a \p pacemakerd state
*
* \param[in] state \p pacemakerd state
*
* \return A user-friendly string representation of \p state, or
* <tt>"Invalid pacemakerd state"</tt>
*/
const char *
pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state)
{
switch (state) {
case pcmk_pacemakerd_state_init:
return "Initializing pacemaker";
case pcmk_pacemakerd_state_starting_daemons:
return "Pacemaker daemons are starting";
case pcmk_pacemakerd_state_wait_for_ping:
return "Waiting for startup trigger from SBD";
case pcmk_pacemakerd_state_running:
return "Pacemaker is running";
case pcmk_pacemakerd_state_shutting_down:
return "Pacemaker daemons are shutting down";
case pcmk_pacemakerd_state_shutdown_complete:
/* Assuming pacemakerd won't process messages while in
* shutdown_complete state unless reporting to SBD
*/
return "Pacemaker daemons are shut down (reporting to SBD)";
default:
return "Invalid pacemakerd state";
}
}

// \return Standard Pacemaker return code
static int
new_data(pcmk_ipc_api_t *api)
Expand Down Expand Up @@ -180,7 +213,7 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
reply_data.data.ping.status =
pcmk__str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS), "ok",
pcmk__str_casei)?pcmk_rc_ok:pcmk_rc_error;
reply_data.data.ping.last_good = (time_t) value_ll;
reply_data.data.ping.last_good = (value_ll < 0)? 0 : (time_t) value_ll;
reply_data.data.ping.sys_from = crm_element_value(msg_data,
XML_PING_ATTR_SYSFROM);
} else if (pcmk__str_eq(value, CRM_OP_QUIT, pcmk__str_none)) {
Expand Down
Loading