Skip to content

Commit

Permalink
Add paused_reason to INFO CLIENTS (#1564)
Browse files Browse the repository at this point in the history
In #1519, we added paused_actions and paused_timeout_milliseconds,
it would be helpful if we add the paused_purpose since users also
want to know the purpose for the pause.

Currently available options:
- client_pause: trigger by CLIENT PAUSE command.
- shutdown_in_progress: during shutdown, primary waits the replicas to
catch up the offset.
- failover_in_progress: during failover, primary waits the replica to
catch up the offset.
- none

---------

Signed-off-by: Binbin <[email protected]>
  • Loading branch information
enjoy-binbin authored Feb 6, 2025
1 parent 0579103 commit da3f1c6
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 7 deletions.
22 changes: 20 additions & 2 deletions src/networking.c
Original file line number Diff line number Diff line change
Expand Up @@ -4641,12 +4641,30 @@ void flushReplicasOutputBuffers(void) {
}
}

mstime_t getPausedActionTimeout(uint32_t action) {
char *getPausedReason(pause_purpose purpose) {
switch (purpose) {
case PAUSE_BY_CLIENT_COMMAND:
return "client_pause";
case PAUSE_DURING_SHUTDOWN:
return "shutdown_in_progress";
case PAUSE_DURING_FAILOVER:
return "failover_in_progress";
case NUM_PAUSE_PURPOSES:
return "none";
default:
return "Unknown pause reason";
}
}

mstime_t getPausedActionTimeout(uint32_t action, pause_purpose *purpose) {
mstime_t timeout = 0;
*purpose = NUM_PAUSE_PURPOSES;
for (int i = 0; i < NUM_PAUSE_PURPOSES; i++) {
pause_event *p = &(server.client_pause_per_purpose[i]);
if (p->paused_actions & action && (p->end - server.mstime) > timeout)
if (p->paused_actions & action && (p->end - server.mstime) > timeout) {
timeout = p->end - server.mstime;
*purpose = i;
}
}
return timeout;
}
Expand Down
9 changes: 7 additions & 2 deletions src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -5691,14 +5691,18 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
getExpensiveClientsInfo(&maxin, &maxout);
totalNumberOfStatefulKeys(&blocking_keys, &blocking_keys_on_nokey, &watched_keys);

pause_purpose purpose;
char *paused_reason = "none";
char *paused_actions = "none";
long long paused_timeout = 0;
if (server.paused_actions & PAUSE_ACTION_CLIENT_ALL) {
paused_actions = "all";
paused_timeout = getPausedActionTimeout(PAUSE_ACTION_CLIENT_ALL);
paused_timeout = getPausedActionTimeout(PAUSE_ACTION_CLIENT_ALL, &purpose);
paused_reason = getPausedReason(purpose);
} else if (server.paused_actions & PAUSE_ACTION_CLIENT_WRITE) {
paused_actions = "write";
paused_timeout = getPausedActionTimeout(PAUSE_ACTION_CLIENT_WRITE);
paused_timeout = getPausedActionTimeout(PAUSE_ACTION_CLIENT_WRITE, &purpose);
paused_reason = getPausedReason(purpose);
}

if (sections++) info = sdscat(info, "\r\n");
Expand All @@ -5718,6 +5722,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
"total_watched_keys:%lu\r\n", watched_keys,
"total_blocking_keys:%lu\r\n", blocking_keys,
"total_blocking_keys_on_nokey:%lu\r\n", blocking_keys_on_nokey,
"paused_reason:%s\r\n", paused_reason,
"paused_actions:%s\r\n", paused_actions,
"paused_timeout_milliseconds:%lld\r\n", paused_timeout));
}
Expand Down
3 changes: 2 additions & 1 deletion src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -2733,7 +2733,8 @@ void pauseActions(pause_purpose purpose, mstime_t end, uint32_t actions);
void unpauseActions(pause_purpose purpose);
uint32_t isPausedActions(uint32_t action_bitmask);
uint32_t isPausedActionsWithUpdate(uint32_t action_bitmask);
mstime_t getPausedActionTimeout(uint32_t action);
char *getPausedReason(pause_purpose purpose);
mstime_t getPausedActionTimeout(uint32_t action, pause_purpose *purpose);
void updatePausedActions(void);
void unblockPostponedClients(void);
void processEventsWhileBlocked(void);
Expand Down
26 changes: 24 additions & 2 deletions tests/unit/pause.tcl
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
start_server {tags {"pause network"}} {
test "Test check paused_actions in info stats" {
test "Test check paused info in info clients" {
assert_equal [s paused_reason] "none"
assert_equal [s paused_actions] "none"
assert_equal [s paused_timeout_milliseconds] 0

r client PAUSE 10000 WRITE
assert_equal [s paused_reason] "client_pause"
assert_equal [s paused_actions] "write"
after 1000
set timeout [s paused_timeout_milliseconds]
Expand All @@ -13,9 +15,14 @@ start_server {tags {"pause network"}} {
r multi
r client PAUSE 1000 ALL
r info clients
assert_match "*paused_actions:all*" [r exec]
set res [r exec]
assert_match "*paused_reason:client_pause*" $res
assert_match "*paused_actions:all*" $res

r client unpause
assert_equal [s paused_reason] "none"
assert_equal [s paused_actions] "none"
assert_equal [s paused_timeout_milliseconds] 0
}

test "Test read commands are not blocked by client pause" {
Expand Down Expand Up @@ -408,3 +415,18 @@ start_server {tags {"pause network"}} {
# Make sure we unpause at the end
r client unpause
}

start_cluster 1 1 {tags {"external:skip cluster pause network"}} {
test "Test check paused info during the cluster failover in info clients" {
assert_equal [s 0 paused_reason] "none"
assert_equal [s 0 paused_actions] "none"
assert_equal [s 0 paused_timeout_milliseconds] 0

R 1 cluster failover
wait_for_log_messages 0 {"*Manual failover requested by replica*"} 0 10 1000

assert_equal [s 0 paused_reason] "failover_in_progress"
assert_equal [s 0 paused_actions] "write"
assert_morethan [s 0 paused_timeout_milliseconds] 0
}
}

0 comments on commit da3f1c6

Please sign in to comment.