Skip to content

Commit

Permalink
DAOS-14467 chk: properly stop check scheduler
Browse files Browse the repository at this point in the history
When someone wants to stop current check instance, it needs to set
ins->ci_sched_exiting to notify related instance scheduler to exit.

Originally, we used "ci_sched_running" for such purpose. But it is
confused to distinguish whether the scheduler has already exited or
someone is stopping the instance. The others may misunderstand that
related check scheduler has already exited, but the scheduler is in
stopping process, as to subsequent checker restart will get failure.

Signed-off-by: Fan Yong <[email protected]>
  • Loading branch information
Nasf-Fan committed Nov 9, 2023
1 parent cda2a4e commit 9b69e2e
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 27 deletions.
7 changes: 6 additions & 1 deletion src/chk/chk_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,14 @@ chk_pool_wait(struct chk_pool_rec *cpr)
struct chk_pending_rec *tmp;

D_ASSERT(cpr->cpr_refs > 0);
/*
* The caller chk_pool_stop_one() must firstly delete the cpr from the pool tree,
* then stop via chk_pool_wait(). So chk_pool_wait() will not be called repeatedly.
*/
D_ASSERT(cpr->cpr_stop == 0);

ABT_mutex_lock(cpr->cpr_mutex);
if (cpr->cpr_thread != ABT_THREAD_NULL && !cpr->cpr_stop) {
if (cpr->cpr_thread != ABT_THREAD_NULL) {
cpr->cpr_stop = 1;
ABT_cond_broadcast(cpr->cpr_cond);
ABT_mutex_unlock(cpr->cpr_mutex);
Expand Down
24 changes: 18 additions & 6 deletions src/chk/chk_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -1858,11 +1858,11 @@ chk_engine_sched(void *args)
D_INFO(DF_ENGINE" scheduler on rank %u entry at phase %u\n",
DP_ENGINE(ins), myrank, cbk->cb_phase);

while (ins->ci_sched_running) {
while (!ins->ci_sched_exiting) {
dss_sleep(300);

/* Someone wants to stop the check. */
if (!ins->ci_sched_running)
if (ins->ci_sched_exiting)
D_GOTO(out, rc = 0);

ins_phase = chk_pools_find_slowest(ins, &done);
Expand Down Expand Up @@ -2293,9 +2293,11 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags)
{
struct chk_instance *ins = chk_engine;
struct chk_bookmark *cbk = &ins->ci_bk;
struct chk_pool_rec *cpr;
d_rank_t myrank = dss_self_rank();
int rc = 0;
int i;
int active = false;

if (gen != 0 && gen != cbk->cb_gen)
D_GOTO(log, rc = -DER_NOTAPPLICABLE);
Expand All @@ -2306,7 +2308,7 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags)
if (ins->ci_starting)
D_GOTO(log, rc = -DER_BUSY);

if (ins->ci_stopping)
if (ins->ci_stopping || ins->ci_sched_exiting)
D_GOTO(log, rc = -DER_INPROGRESS);

if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING)
Expand All @@ -2332,7 +2334,17 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags)
if (ins->ci_pool_stopped)
*flags = CSF_POOL_STOPPED;

if (d_list_empty(&ins->ci_pool_list)) {
d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) {
if (!cpr->cpr_done && !cpr->cpr_skip && !cpr->cpr_stop) {
D_ASSERTF(pool_nr != 0, "Hit active pool "DF_UUIDF" after stop all\n",
DP_UUID(cpr->cpr_uuid));

active = true;
break;
}
}

if (!active) {
chk_stop_sched(ins);
/* To indicate that there is no active pool(s) on this rank. */
rc = 1;
Expand Down Expand Up @@ -2563,7 +2575,7 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version)
* check instance; otherwise, related pool(s) will be marked as 'failed' when
* try ro access something on the dead rank.
*
* So here, it is not ncessary to find out the affected pools and fail them
* So here, it is not necessary to find out the affected pools and fail them
* immediately when the death event is reported, instead, it will be handled
* sometime later as the DAOS check going.
*/
Expand Down Expand Up @@ -3164,7 +3176,7 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision)
goto out;
}

if (!ins->ci_sched_running || cpr->cpr_exiting) {
if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) {
rc = 1;
ABT_mutex_unlock(cpr->cpr_mutex);
goto out;
Expand Down
38 changes: 23 additions & 15 deletions src/chk/chk_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ CRT_RPC_DECLARE(chk_query, DAOS_ISEQ_CHK_QUERY, DAOS_OSEQ_CHK_QUERY);
/*
* CHK_MARK:
* From check leader to check engine to mark some rank as "dead". Under check mode, if some rank
* is dead (and failed to rejoin), it will not be exlcuded from related pool map to avoid further
* is dead (and failed to rejoin), it will not be excluded from related pool map to avoid further
* damaging the system, instead, it will be mark as "dead" by the check instance and the check
* status on related pool(s) will be marked as "failed".
*/
Expand Down Expand Up @@ -998,6 +998,9 @@ static inline void
chk_pool_get(struct chk_pool_rec *cpr)
{
cpr->cpr_refs++;

D_DEBUG(DB_TRACE, "Get ref on pool rec %p for "DF_UUIDF", ref %d\n",
cpr, DP_UUID(cpr->cpr_uuid), cpr->cpr_refs);
}

static inline void
Expand All @@ -1009,6 +1012,9 @@ chk_pool_put(struct chk_pool_rec *cpr)
/* NOTE: Before being destroyed, keep it in the list. */
D_ASSERT(!d_list_empty(&cpr->cpr_link));

D_DEBUG(DB_TRACE, "Pet ref on pool rec %p for "DF_UUIDF", ref %d\n",
cpr, DP_UUID(cpr->cpr_uuid), cpr->cpr_refs);

if (--(cpr->cpr_refs) == 0) {
d_list_del(&cpr->cpr_link);
D_ASSERT(cpr->cpr_thread == ABT_THREAD_NULL);
Expand All @@ -1035,6 +1041,9 @@ chk_pool_put(struct chk_pool_rec *cpr)
D_FREE(cpr->cpr_mbs[i].cpm_tgt_status);
}

D_DEBUG(DB_TRACE, "Destroy pool rec %p for "DF_UUIDF"\n",
cpr, DP_UUID(cpr->cpr_uuid));

D_FREE(cpr->cpr_mbs);
D_FREE(cpr->cpr_label);
D_FREE(cpr);
Expand Down Expand Up @@ -1150,43 +1159,42 @@ chk_dup_string(char **tgt, const char *src, size_t len)
static inline void
chk_stop_sched(struct chk_instance *ins)
{
uint64_t gen = ins->ci_bk.cb_gen;

ABT_mutex_lock(ins->ci_abt_mutex);
if (ins->ci_sched != ABT_THREAD_NULL && ins->ci_sched_running) {
ins->ci_sched_running = 0;
if (ins->ci_sched_running && !ins->ci_sched_exiting) {
D_ASSERT(ins->ci_sched != ABT_THREAD_NULL);

D_INFO("Stopping %s instance on rank %u with gen "DF_U64"\n",
ins->ci_is_leader ? "leader" : "engine", dss_self_rank(), gen);

ins->ci_sched_exiting = 1;
ABT_cond_broadcast(ins->ci_abt_cond);
ABT_mutex_unlock(ins->ci_abt_mutex);
ABT_thread_free(&ins->ci_sched);
} else {
ABT_mutex_unlock(ins->ci_abt_mutex);
/* Check ci_bk.cb_gen for the case of others restarted checker during my wait. */
while (ins->ci_sched_running && gen == ins->ci_bk.cb_gen)
dss_sleep(300);
}
}

static inline int
chk_ins_can_start(struct chk_instance *ins)
{
struct chk_bookmark *cbk = &ins->ci_bk;

if (unlikely(!ins->ci_inited))
return -DER_AGAIN;

if (ins->ci_starting)
return -DER_INPROGRESS;

if (ins->ci_stopping)
if (ins->ci_stopping || ins->ci_sched_exiting)
return -DER_BUSY;

if (ins->ci_sched_running)
return -DER_ALREADY;

/*
* If ci_sched_running is zero but check instance is still running,
* then someone is trying to stop it.
*/
if (((ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_LEADER) ||
(!ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_ENGINE)) &&
cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING)
return -DER_BUSY;

return 0;
}

Expand Down
10 changes: 5 additions & 5 deletions src/chk/chk_leader.c
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,7 @@ chk_leader_need_stop(struct chk_instance *ins, int *ret)
}
}

if (!ins->ci_sched_running) {
if (!ins->ci_sched_running || ins->ci_sched_exiting) {
*ret = 0;
return true;
}
Expand Down Expand Up @@ -1931,7 +1931,7 @@ chk_leader_pool_mbs_one(struct chk_pool_rec *cpr)
if (rc1 == RSVC_CLIENT_RECHOOSE ||
(rc1 == RSVC_CLIENT_PROCEED && daos_rpc_retryable_rc(rc))) {
dss_sleep(interval);
if (cpr->cpr_stop || !ins->ci_sched_running) {
if (cpr->cpr_stop || !ins->ci_sched_running || ins->ci_sched_exiting) {
notify = false;
D_GOTO(out_client, rc = 0);
}
Expand Down Expand Up @@ -2165,7 +2165,7 @@ chk_leader_sched(void *args)
ABT_mutex_lock(ins->ci_abt_mutex);

again:
if (!ins->ci_sched_running) {
if (ins->ci_sched_exiting) {
ABT_mutex_unlock(ins->ci_abt_mutex);
D_GOTO(out, rc = 0);
}
Expand Down Expand Up @@ -3039,7 +3039,7 @@ chk_leader_stop(int pool_nr, uuid_t pools[])
if (ins->ci_starting)
D_GOTO(log, rc = -DER_BUSY);

if (ins->ci_stopping)
if (ins->ci_stopping || ins->ci_sched_exiting)
D_GOTO(log, rc = -DER_INPROGRESS);

/*
Expand Down Expand Up @@ -3620,7 +3620,7 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision)
goto out;
}

if (!ins->ci_sched_running || cpr->cpr_exiting) {
if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) {
rc = 1;
ABT_mutex_unlock(cpr->cpr_mutex);
goto out;
Expand Down

0 comments on commit 9b69e2e

Please sign in to comment.