From 2e93b644d795023c383ebf695feaa57ab742abc3 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sun, 15 Oct 2023 22:37:15 +0800 Subject: [PATCH] DAOS-14467 chk: properly stop check scheduler When someone wants to stop current check instance, it needs to set ins->ci_sched_exiting to notify related instance scheduler to exit. Originally, we used "ci_sched_running" for such purpose. But it is confused to distinguish whether the scheduler has already exited or someone is stopping the instance. The others may misunderstand that related check scheduler has already exited, but the scheduler is in stopping process, as to subsequent checker restart will get failure. Signed-off-by: Fan Yong --- src/chk/chk_engine.c | 18 +++++++----------- src/chk/chk_internal.h | 32 +++++++++++++------------------- src/chk/chk_leader.c | 15 ++++++--------- 3 files changed, 26 insertions(+), 39 deletions(-) diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index 3484a63d0dc..4d9fa96c0a2 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -171,8 +171,6 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu struct chk_iv iv = { 0 }; int rc; - ins->ci_sched_exiting = 1; - while ((cpr = d_list_pop_entry(&ins->ci_pool_shutdown_list, struct chk_pool_rec, cpr_shutdown_link)) != NULL) { chk_pool_shutdown(cpr, false); @@ -199,7 +197,7 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu ins_status != CHK__CHECK_INST_STATUS__CIS_STOPPED && ins_status != CHK__CHECK_INST_STATUS__CIS_IMPLICATED && ins->ci_iv_ns != NULL) { if (DAOS_FAIL_CHECK(DAOS_CHK_PS_NOTIFY_LEADER)) - goto out; + return; iv.ci_gen = cbk->cb_gen; iv.ci_phase = cbk->cb_phase; @@ -213,9 +211,6 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu DF_ENGINE" on rank %u notify leader for its exit, status %u: rc = %d\n", DP_ENGINE(ins), dss_self_rank(), ins_status, rc); } - -out: - ins->ci_sched_exiting = 0; } static int @@ -1858,11 +1853,11 @@ chk_engine_sched(void *args) D_INFO(DF_ENGINE" scheduler on rank %u entry at phase %u\n", DP_ENGINE(ins), myrank, cbk->cb_phase); - while (ins->ci_sched_running) { + while (!ins->ci_sched_exiting) { dss_sleep(300); /* Someone wants to stop the check. */ - if (!ins->ci_sched_running) + if (ins->ci_sched_exiting) D_GOTO(out, rc = 0); ins_phase = chk_pools_find_slowest(ins, &done); @@ -1942,6 +1937,7 @@ chk_engine_sched(void *args) D_INFO(DF_ENGINE" scheduler on rank %u exit at phase %u with status %u: rc %d\n", DP_ENGINE(ins), myrank, cbk->cb_phase, ins_status, rc); + ins->ci_sched_exiting = 0; ins->ci_sched_running = 0; } @@ -2306,7 +2302,7 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags) if (ins->ci_starting) D_GOTO(log, rc = -DER_BUSY); - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) D_GOTO(log, rc = -DER_INPROGRESS); if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) @@ -2563,7 +2559,7 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version) * check instance; otherwise, related pool(s) will be marked as 'failed' when * try ro access something on the dead rank. * - * So here, it is not ncessary to find out the affected pools and fail them + * So here, it is not necessary to find out the affected pools and fail them * immediately when the death event is reported, instead, it will be handled * sometime later as the DAOS check going. */ @@ -3164,7 +3160,7 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto out; } - if (!ins->ci_sched_running || cpr->cpr_exiting) { + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { rc = 1; ABT_mutex_unlock(cpr->cpr_mutex); goto out; diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index 56c2a3ee1b9..bc383b60cb3 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -134,7 +134,7 @@ CRT_RPC_DECLARE(chk_query, DAOS_ISEQ_CHK_QUERY, DAOS_OSEQ_CHK_QUERY); /* * CHK_MARK: * From check leader to check engine to mark some rank as "dead". Under check mode, if some rank - * is dead (and failed to rejoin), it will not be exlcuded from related pool map to avoid further + * is dead (and failed to rejoin), it will not be excluded from related pool map to avoid further * damaging the system, instead, it will be mark as "dead" by the check instance and the check * status on related pool(s) will be marked as "failed". */ @@ -1150,43 +1150,37 @@ chk_dup_string(char **tgt, const char *src, size_t len) static inline void chk_stop_sched(struct chk_instance *ins) { + uint64_t gen = ins->ci_bk.cb_gen; + ABT_mutex_lock(ins->ci_abt_mutex); - if (ins->ci_sched != ABT_THREAD_NULL && ins->ci_sched_running) { - ins->ci_sched_running = 0; + if (ins->ci_sched_running && !ins->ci_sched_exiting) { + D_INFO("Stopping %s instance on rank %u with gen "DF_U64"\n", + ins->ci_is_leader ? "leader" : "engine", dss_self_rank(), gen); + ins->ci_sched_exiting = 1; ABT_cond_broadcast(ins->ci_abt_cond); - ABT_mutex_unlock(ins->ci_abt_mutex); - ABT_thread_free(&ins->ci_sched); - } else { - ABT_mutex_unlock(ins->ci_abt_mutex); } + + /* Check the ci_bk.cb_gen for the case of others restarted the checker during the wait. */ + while (ins->ci_sched_running && gen == ins->ci_bk.cb_gen) + ABT_cond_wait(ins->ci_abt_cond, ins->ci_abt_mutex); + ABT_mutex_unlock(ins->ci_abt_mutex); } static inline int chk_ins_can_start(struct chk_instance *ins) { - struct chk_bookmark *cbk = &ins->ci_bk; - if (unlikely(!ins->ci_inited)) return -DER_AGAIN; if (ins->ci_starting) return -DER_INPROGRESS; - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) return -DER_BUSY; if (ins->ci_sched_running) return -DER_ALREADY; - /* - * If ci_sched_running is zero but check instance is still running, - * then someone is trying to stop it. - */ - if (((ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_LEADER) || - (!ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_ENGINE)) && - cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) - return -DER_BUSY; - return 0; } diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 030c3a7eec9..53474d5b4b6 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -229,8 +229,6 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu struct chk_iv iv = { 0 }; int rc = 0; - ins->ci_sched_exiting = 1; - D_ASSERT(d_list_empty(&ins->ci_pool_shutdown_list)); chk_pool_stop_all(ins, pool_status, NULL); @@ -262,8 +260,6 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu D_ERROR(DF_LEADER" exit with status %u: "DF_RC"\n", DP_LEADER(ins), ins_status, DP_RC(rc)); } - - ins->ci_sched_exiting = 0; } static void @@ -1306,7 +1302,7 @@ chk_leader_need_stop(struct chk_instance *ins, int *ret) } } - if (!ins->ci_sched_running) { + if (!ins->ci_sched_running || ins->ci_sched_exiting) { *ret = 0; return true; } @@ -1931,7 +1927,7 @@ chk_leader_pool_mbs_one(struct chk_pool_rec *cpr) if (rc1 == RSVC_CLIENT_RECHOOSE || (rc1 == RSVC_CLIENT_PROCEED && daos_rpc_retryable_rc(rc))) { dss_sleep(interval); - if (cpr->cpr_stop || !ins->ci_sched_running) { + if (cpr->cpr_stop || !ins->ci_sched_running || ins->ci_sched_exiting) { notify = false; D_GOTO(out_client, rc = 0); } @@ -2165,7 +2161,7 @@ chk_leader_sched(void *args) ABT_mutex_lock(ins->ci_abt_mutex); again: - if (!ins->ci_sched_running) { + if (ins->ci_sched_exiting) { ABT_mutex_unlock(ins->ci_abt_mutex); D_GOTO(out, rc = 0); } @@ -2304,6 +2300,7 @@ chk_leader_sched(void *args) D_INFO(DF_LEADER" scheduler exit at phase %u with status %u: rc %d\n", DP_LEADER(ins), cbk->cb_phase, ins_status, rc); + ins->ci_sched_exiting = 0; ins->ci_sched_running = 0; } @@ -3039,7 +3036,7 @@ chk_leader_stop(int pool_nr, uuid_t pools[]) if (ins->ci_starting) D_GOTO(log, rc = -DER_BUSY); - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) D_GOTO(log, rc = -DER_INPROGRESS); /* @@ -3620,7 +3617,7 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto out; } - if (!ins->ci_sched_running || cpr->cpr_exiting) { + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { rc = 1; ABT_mutex_unlock(cpr->cpr_mutex); goto out;