Skip to content

Commit

Permalink
DAOS-14467 chk: properly stop check scheduler
Browse files Browse the repository at this point in the history
When someone wants to stop current check instance, it needs to set
ins->ci_sched_exiting to notify related instance scheduler to exit.

Originally, we used "ci_sched_running" for such purpose. But it is
confused to distinguish whether the scheduler has already exited or
someone is stopping the instance. The others may misunderstand that
related check scheduler has already exited, but the scheduler is in
stopping process, as to subsequent checker restart will get failure.

Signed-off-by: Fan Yong <[email protected]>
  • Loading branch information
Nasf-Fan committed Oct 19, 2023
1 parent 93b8052 commit 2e93b64
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 39 deletions.
18 changes: 7 additions & 11 deletions src/chk/chk_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,6 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu
struct chk_iv iv = { 0 };
int rc;

ins->ci_sched_exiting = 1;

while ((cpr = d_list_pop_entry(&ins->ci_pool_shutdown_list, struct chk_pool_rec,
cpr_shutdown_link)) != NULL) {
chk_pool_shutdown(cpr, false);
Expand All @@ -199,7 +197,7 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu
ins_status != CHK__CHECK_INST_STATUS__CIS_STOPPED &&
ins_status != CHK__CHECK_INST_STATUS__CIS_IMPLICATED && ins->ci_iv_ns != NULL) {
if (DAOS_FAIL_CHECK(DAOS_CHK_PS_NOTIFY_LEADER))
goto out;
return;

iv.ci_gen = cbk->cb_gen;
iv.ci_phase = cbk->cb_phase;
Expand All @@ -213,9 +211,6 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu
DF_ENGINE" on rank %u notify leader for its exit, status %u: rc = %d\n",
DP_ENGINE(ins), dss_self_rank(), ins_status, rc);
}

out:
ins->ci_sched_exiting = 0;
}

static int
Expand Down Expand Up @@ -1858,11 +1853,11 @@ chk_engine_sched(void *args)
D_INFO(DF_ENGINE" scheduler on rank %u entry at phase %u\n",
DP_ENGINE(ins), myrank, cbk->cb_phase);

while (ins->ci_sched_running) {
while (!ins->ci_sched_exiting) {
dss_sleep(300);

/* Someone wants to stop the check. */
if (!ins->ci_sched_running)
if (ins->ci_sched_exiting)
D_GOTO(out, rc = 0);

ins_phase = chk_pools_find_slowest(ins, &done);
Expand Down Expand Up @@ -1942,6 +1937,7 @@ chk_engine_sched(void *args)
D_INFO(DF_ENGINE" scheduler on rank %u exit at phase %u with status %u: rc %d\n",
DP_ENGINE(ins), myrank, cbk->cb_phase, ins_status, rc);

ins->ci_sched_exiting = 0;
ins->ci_sched_running = 0;
}

Expand Down Expand Up @@ -2306,7 +2302,7 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags)
if (ins->ci_starting)
D_GOTO(log, rc = -DER_BUSY);

if (ins->ci_stopping)
if (ins->ci_stopping || ins->ci_sched_exiting)
D_GOTO(log, rc = -DER_INPROGRESS);

if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING)
Expand Down Expand Up @@ -2563,7 +2559,7 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version)
* check instance; otherwise, related pool(s) will be marked as 'failed' when
* try ro access something on the dead rank.
*
* So here, it is not ncessary to find out the affected pools and fail them
* So here, it is not necessary to find out the affected pools and fail them
* immediately when the death event is reported, instead, it will be handled
* sometime later as the DAOS check going.
*/
Expand Down Expand Up @@ -3164,7 +3160,7 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision)
goto out;
}

if (!ins->ci_sched_running || cpr->cpr_exiting) {
if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) {
rc = 1;
ABT_mutex_unlock(cpr->cpr_mutex);
goto out;
Expand Down
32 changes: 13 additions & 19 deletions src/chk/chk_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ CRT_RPC_DECLARE(chk_query, DAOS_ISEQ_CHK_QUERY, DAOS_OSEQ_CHK_QUERY);
/*
* CHK_MARK:
* From check leader to check engine to mark some rank as "dead". Under check mode, if some rank
* is dead (and failed to rejoin), it will not be exlcuded from related pool map to avoid further
* is dead (and failed to rejoin), it will not be excluded from related pool map to avoid further
* damaging the system, instead, it will be mark as "dead" by the check instance and the check
* status on related pool(s) will be marked as "failed".
*/
Expand Down Expand Up @@ -1150,43 +1150,37 @@ chk_dup_string(char **tgt, const char *src, size_t len)
static inline void
chk_stop_sched(struct chk_instance *ins)
{
uint64_t gen = ins->ci_bk.cb_gen;

ABT_mutex_lock(ins->ci_abt_mutex);
if (ins->ci_sched != ABT_THREAD_NULL && ins->ci_sched_running) {
ins->ci_sched_running = 0;
if (ins->ci_sched_running && !ins->ci_sched_exiting) {
D_INFO("Stopping %s instance on rank %u with gen "DF_U64"\n",
ins->ci_is_leader ? "leader" : "engine", dss_self_rank(), gen);
ins->ci_sched_exiting = 1;
ABT_cond_broadcast(ins->ci_abt_cond);
ABT_mutex_unlock(ins->ci_abt_mutex);
ABT_thread_free(&ins->ci_sched);
} else {
ABT_mutex_unlock(ins->ci_abt_mutex);
}

/* Check the ci_bk.cb_gen for the case of others restarted the checker during the wait. */
while (ins->ci_sched_running && gen == ins->ci_bk.cb_gen)
ABT_cond_wait(ins->ci_abt_cond, ins->ci_abt_mutex);
ABT_mutex_unlock(ins->ci_abt_mutex);
}

static inline int
chk_ins_can_start(struct chk_instance *ins)
{
struct chk_bookmark *cbk = &ins->ci_bk;

if (unlikely(!ins->ci_inited))
return -DER_AGAIN;

if (ins->ci_starting)
return -DER_INPROGRESS;

if (ins->ci_stopping)
if (ins->ci_stopping || ins->ci_sched_exiting)
return -DER_BUSY;

if (ins->ci_sched_running)
return -DER_ALREADY;

/*
* If ci_sched_running is zero but check instance is still running,
* then someone is trying to stop it.
*/
if (((ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_LEADER) ||
(!ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_ENGINE)) &&
cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING)
return -DER_BUSY;

return 0;
}

Expand Down
15 changes: 6 additions & 9 deletions src/chk/chk_leader.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,6 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu
struct chk_iv iv = { 0 };
int rc = 0;

ins->ci_sched_exiting = 1;

D_ASSERT(d_list_empty(&ins->ci_pool_shutdown_list));

chk_pool_stop_all(ins, pool_status, NULL);
Expand Down Expand Up @@ -262,8 +260,6 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu
D_ERROR(DF_LEADER" exit with status %u: "DF_RC"\n",
DP_LEADER(ins), ins_status, DP_RC(rc));
}

ins->ci_sched_exiting = 0;
}

static void
Expand Down Expand Up @@ -1306,7 +1302,7 @@ chk_leader_need_stop(struct chk_instance *ins, int *ret)
}
}

if (!ins->ci_sched_running) {
if (!ins->ci_sched_running || ins->ci_sched_exiting) {
*ret = 0;
return true;
}
Expand Down Expand Up @@ -1931,7 +1927,7 @@ chk_leader_pool_mbs_one(struct chk_pool_rec *cpr)
if (rc1 == RSVC_CLIENT_RECHOOSE ||
(rc1 == RSVC_CLIENT_PROCEED && daos_rpc_retryable_rc(rc))) {
dss_sleep(interval);
if (cpr->cpr_stop || !ins->ci_sched_running) {
if (cpr->cpr_stop || !ins->ci_sched_running || ins->ci_sched_exiting) {
notify = false;
D_GOTO(out_client, rc = 0);
}
Expand Down Expand Up @@ -2165,7 +2161,7 @@ chk_leader_sched(void *args)
ABT_mutex_lock(ins->ci_abt_mutex);

again:
if (!ins->ci_sched_running) {
if (ins->ci_sched_exiting) {
ABT_mutex_unlock(ins->ci_abt_mutex);
D_GOTO(out, rc = 0);
}
Expand Down Expand Up @@ -2304,6 +2300,7 @@ chk_leader_sched(void *args)
D_INFO(DF_LEADER" scheduler exit at phase %u with status %u: rc %d\n",
DP_LEADER(ins), cbk->cb_phase, ins_status, rc);

ins->ci_sched_exiting = 0;
ins->ci_sched_running = 0;
}

Expand Down Expand Up @@ -3039,7 +3036,7 @@ chk_leader_stop(int pool_nr, uuid_t pools[])
if (ins->ci_starting)
D_GOTO(log, rc = -DER_BUSY);

if (ins->ci_stopping)
if (ins->ci_stopping || ins->ci_sched_exiting)
D_GOTO(log, rc = -DER_INPROGRESS);

/*
Expand Down Expand Up @@ -3620,7 +3617,7 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision)
goto out;
}

if (!ins->ci_sched_running || cpr->cpr_exiting) {
if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) {
rc = 1;
ABT_mutex_unlock(cpr->cpr_mutex);
goto out;
Expand Down

0 comments on commit 2e93b64

Please sign in to comment.