diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index 3484a63d0dc..4d9fa96c0a2 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -171,8 +171,6 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu struct chk_iv iv = { 0 }; int rc; - ins->ci_sched_exiting = 1; - while ((cpr = d_list_pop_entry(&ins->ci_pool_shutdown_list, struct chk_pool_rec, cpr_shutdown_link)) != NULL) { chk_pool_shutdown(cpr, false); @@ -199,7 +197,7 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu ins_status != CHK__CHECK_INST_STATUS__CIS_STOPPED && ins_status != CHK__CHECK_INST_STATUS__CIS_IMPLICATED && ins->ci_iv_ns != NULL) { if (DAOS_FAIL_CHECK(DAOS_CHK_PS_NOTIFY_LEADER)) - goto out; + return; iv.ci_gen = cbk->cb_gen; iv.ci_phase = cbk->cb_phase; @@ -213,9 +211,6 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu DF_ENGINE" on rank %u notify leader for its exit, status %u: rc = %d\n", DP_ENGINE(ins), dss_self_rank(), ins_status, rc); } - -out: - ins->ci_sched_exiting = 0; } static int @@ -1858,11 +1853,11 @@ chk_engine_sched(void *args) D_INFO(DF_ENGINE" scheduler on rank %u entry at phase %u\n", DP_ENGINE(ins), myrank, cbk->cb_phase); - while (ins->ci_sched_running) { + while (!ins->ci_sched_exiting) { dss_sleep(300); /* Someone wants to stop the check. */ - if (!ins->ci_sched_running) + if (ins->ci_sched_exiting) D_GOTO(out, rc = 0); ins_phase = chk_pools_find_slowest(ins, &done); @@ -1942,6 +1937,7 @@ chk_engine_sched(void *args) D_INFO(DF_ENGINE" scheduler on rank %u exit at phase %u with status %u: rc %d\n", DP_ENGINE(ins), myrank, cbk->cb_phase, ins_status, rc); + ins->ci_sched_exiting = 0; ins->ci_sched_running = 0; } @@ -2306,7 +2302,7 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags) if (ins->ci_starting) D_GOTO(log, rc = -DER_BUSY); - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) D_GOTO(log, rc = -DER_INPROGRESS); if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) @@ -2563,7 +2559,7 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version) * check instance; otherwise, related pool(s) will be marked as 'failed' when * try ro access something on the dead rank. * - * So here, it is not ncessary to find out the affected pools and fail them + * So here, it is not necessary to find out the affected pools and fail them * immediately when the death event is reported, instead, it will be handled * sometime later as the DAOS check going. */ @@ -3164,7 +3160,7 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto out; } - if (!ins->ci_sched_running || cpr->cpr_exiting) { + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { rc = 1; ABT_mutex_unlock(cpr->cpr_mutex); goto out; diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index 56c2a3ee1b9..bc383b60cb3 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -134,7 +134,7 @@ CRT_RPC_DECLARE(chk_query, DAOS_ISEQ_CHK_QUERY, DAOS_OSEQ_CHK_QUERY); /* * CHK_MARK: * From check leader to check engine to mark some rank as "dead". Under check mode, if some rank - * is dead (and failed to rejoin), it will not be exlcuded from related pool map to avoid further + * is dead (and failed to rejoin), it will not be excluded from related pool map to avoid further * damaging the system, instead, it will be mark as "dead" by the check instance and the check * status on related pool(s) will be marked as "failed". */ @@ -1150,43 +1150,37 @@ chk_dup_string(char **tgt, const char *src, size_t len) static inline void chk_stop_sched(struct chk_instance *ins) { + uint64_t gen = ins->ci_bk.cb_gen; + ABT_mutex_lock(ins->ci_abt_mutex); - if (ins->ci_sched != ABT_THREAD_NULL && ins->ci_sched_running) { - ins->ci_sched_running = 0; + if (ins->ci_sched_running && !ins->ci_sched_exiting) { + D_INFO("Stopping %s instance on rank %u with gen "DF_U64"\n", + ins->ci_is_leader ? "leader" : "engine", dss_self_rank(), gen); + ins->ci_sched_exiting = 1; ABT_cond_broadcast(ins->ci_abt_cond); - ABT_mutex_unlock(ins->ci_abt_mutex); - ABT_thread_free(&ins->ci_sched); - } else { - ABT_mutex_unlock(ins->ci_abt_mutex); } + + /* Check the ci_bk.cb_gen for the case of others restarted the checker during the wait. */ + while (ins->ci_sched_running && gen == ins->ci_bk.cb_gen) + ABT_cond_wait(ins->ci_abt_cond, ins->ci_abt_mutex); + ABT_mutex_unlock(ins->ci_abt_mutex); } static inline int chk_ins_can_start(struct chk_instance *ins) { - struct chk_bookmark *cbk = &ins->ci_bk; - if (unlikely(!ins->ci_inited)) return -DER_AGAIN; if (ins->ci_starting) return -DER_INPROGRESS; - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) return -DER_BUSY; if (ins->ci_sched_running) return -DER_ALREADY; - /* - * If ci_sched_running is zero but check instance is still running, - * then someone is trying to stop it. - */ - if (((ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_LEADER) || - (!ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_ENGINE)) && - cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) - return -DER_BUSY; - return 0; } diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 030c3a7eec9..53474d5b4b6 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -229,8 +229,6 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu struct chk_iv iv = { 0 }; int rc = 0; - ins->ci_sched_exiting = 1; - D_ASSERT(d_list_empty(&ins->ci_pool_shutdown_list)); chk_pool_stop_all(ins, pool_status, NULL); @@ -262,8 +260,6 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu D_ERROR(DF_LEADER" exit with status %u: "DF_RC"\n", DP_LEADER(ins), ins_status, DP_RC(rc)); } - - ins->ci_sched_exiting = 0; } static void @@ -1306,7 +1302,7 @@ chk_leader_need_stop(struct chk_instance *ins, int *ret) } } - if (!ins->ci_sched_running) { + if (!ins->ci_sched_running || ins->ci_sched_exiting) { *ret = 0; return true; } @@ -1931,7 +1927,7 @@ chk_leader_pool_mbs_one(struct chk_pool_rec *cpr) if (rc1 == RSVC_CLIENT_RECHOOSE || (rc1 == RSVC_CLIENT_PROCEED && daos_rpc_retryable_rc(rc))) { dss_sleep(interval); - if (cpr->cpr_stop || !ins->ci_sched_running) { + if (cpr->cpr_stop || !ins->ci_sched_running || ins->ci_sched_exiting) { notify = false; D_GOTO(out_client, rc = 0); } @@ -2165,7 +2161,7 @@ chk_leader_sched(void *args) ABT_mutex_lock(ins->ci_abt_mutex); again: - if (!ins->ci_sched_running) { + if (ins->ci_sched_exiting) { ABT_mutex_unlock(ins->ci_abt_mutex); D_GOTO(out, rc = 0); } @@ -2304,6 +2300,7 @@ chk_leader_sched(void *args) D_INFO(DF_LEADER" scheduler exit at phase %u with status %u: rc %d\n", DP_LEADER(ins), cbk->cb_phase, ins_status, rc); + ins->ci_sched_exiting = 0; ins->ci_sched_running = 0; } @@ -3039,7 +3036,7 @@ chk_leader_stop(int pool_nr, uuid_t pools[]) if (ins->ci_starting) D_GOTO(log, rc = -DER_BUSY); - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) D_GOTO(log, rc = -DER_INPROGRESS); /* @@ -3620,7 +3617,7 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto out; } - if (!ins->ci_sched_running || cpr->cpr_exiting) { + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { rc = 1; ABT_mutex_unlock(cpr->cpr_mutex); goto out;