diff --git a/src/chk/chk_common.c b/src/chk/chk_common.c index ab5b9a100a5..997af457419 100644 --- a/src/chk/chk_common.c +++ b/src/chk/chk_common.c @@ -434,9 +434,14 @@ chk_pool_wait(struct chk_pool_rec *cpr) struct chk_pending_rec *tmp; D_ASSERT(cpr->cpr_refs > 0); + /* + * The caller chk_pool_stop_one() must firstly delete the cpr from the pool + * tree, then stop it here. So chk_pool_wait() will not be called repeatedly. + */ + D_ASSERT(cpr->cpr_stop == 0); ABT_mutex_lock(cpr->cpr_mutex); - if (cpr->cpr_thread != ABT_THREAD_NULL && !cpr->cpr_stop) { + if (cpr->cpr_thread != ABT_THREAD_NULL) { cpr->cpr_stop = 1; ABT_cond_broadcast(cpr->cpr_cond); ABT_mutex_unlock(cpr->cpr_mutex); diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index 3484a63d0dc..6ffb4c1e551 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -1858,11 +1858,11 @@ chk_engine_sched(void *args) D_INFO(DF_ENGINE" scheduler on rank %u entry at phase %u\n", DP_ENGINE(ins), myrank, cbk->cb_phase); - while (ins->ci_sched_running) { + while (!ins->ci_sched_exiting) { dss_sleep(300); /* Someone wants to stop the check. */ - if (!ins->ci_sched_running) + if (ins->ci_sched_exiting) D_GOTO(out, rc = 0); ins_phase = chk_pools_find_slowest(ins, &done); @@ -2293,9 +2293,11 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags) { struct chk_instance *ins = chk_engine; struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_pool_rec *cpr; d_rank_t myrank = dss_self_rank(); int rc = 0; int i; + int active = false; if (gen != 0 && gen != cbk->cb_gen) D_GOTO(log, rc = -DER_NOTAPPLICABLE); @@ -2306,7 +2308,7 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags) if (ins->ci_starting) D_GOTO(log, rc = -DER_BUSY); - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) D_GOTO(log, rc = -DER_INPROGRESS); if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) @@ -2332,7 +2334,17 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags) if (ins->ci_pool_stopped) *flags = CSF_POOL_STOPPED; - if (d_list_empty(&ins->ci_pool_list)) { + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + if (!cpr->cpr_done && !cpr->cpr_skip && !cpr->cpr_stop) { + D_ASSERTF(pool_nr != 0, "Hit active pool "DF_UUIDF" after stop all\n", + DP_UUID(cpr->cpr_uuid)); + + active = true; + break; + } + } + + if (!active) { chk_stop_sched(ins); /* To indicate that there is no active pool(s) on this rank. */ rc = 1; @@ -2563,7 +2575,7 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version) * check instance; otherwise, related pool(s) will be marked as 'failed' when * try ro access something on the dead rank. * - * So here, it is not ncessary to find out the affected pools and fail them + * So here, it is not necessary to find out the affected pools and fail them * immediately when the death event is reported, instead, it will be handled * sometime later as the DAOS check going. */ @@ -3164,7 +3176,7 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto out; } - if (!ins->ci_sched_running || cpr->cpr_exiting) { + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { rc = 1; ABT_mutex_unlock(cpr->cpr_mutex); goto out; diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index 56c2a3ee1b9..e7f6d670d53 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -134,7 +134,7 @@ CRT_RPC_DECLARE(chk_query, DAOS_ISEQ_CHK_QUERY, DAOS_OSEQ_CHK_QUERY); /* * CHK_MARK: * From check leader to check engine to mark some rank as "dead". Under check mode, if some rank - * is dead (and failed to rejoin), it will not be exlcuded from related pool map to avoid further + * is dead (and failed to rejoin), it will not be excluded from related pool map to avoid further * damaging the system, instead, it will be mark as "dead" by the check instance and the check * status on related pool(s) will be marked as "failed". */ @@ -998,6 +998,9 @@ static inline void chk_pool_get(struct chk_pool_rec *cpr) { cpr->cpr_refs++; + + D_DEBUG(DB_TRACE, "Get ref on pool rec %p for "DF_UUIDF", ref %d\n", + cpr, DP_UUID(cpr->cpr_uuid), cpr->cpr_refs); } static inline void @@ -1009,6 +1012,9 @@ chk_pool_put(struct chk_pool_rec *cpr) /* NOTE: Before being destroyed, keep it in the list. */ D_ASSERT(!d_list_empty(&cpr->cpr_link)); + D_DEBUG(DB_TRACE, "Pet ref on pool rec %p for "DF_UUIDF", ref %d\n", + cpr, DP_UUID(cpr->cpr_uuid), cpr->cpr_refs); + if (--(cpr->cpr_refs) == 0) { d_list_del(&cpr->cpr_link); D_ASSERT(cpr->cpr_thread == ABT_THREAD_NULL); @@ -1035,6 +1041,9 @@ chk_pool_put(struct chk_pool_rec *cpr) D_FREE(cpr->cpr_mbs[i].cpm_tgt_status); } + D_DEBUG(DB_TRACE, "Destroy pool rec %p for "DF_UUIDF"\n", + cpr, DP_UUID(cpr->cpr_uuid)); + D_FREE(cpr->cpr_mbs); D_FREE(cpr->cpr_label); D_FREE(cpr); @@ -1150,43 +1159,42 @@ chk_dup_string(char **tgt, const char *src, size_t len) static inline void chk_stop_sched(struct chk_instance *ins) { + uint64_t gen = ins->ci_bk.cb_gen; + ABT_mutex_lock(ins->ci_abt_mutex); - if (ins->ci_sched != ABT_THREAD_NULL && ins->ci_sched_running) { - ins->ci_sched_running = 0; + if (ins->ci_sched_running && !ins->ci_sched_exiting) { + D_ASSERT(ins->ci_sched != ABT_THREAD_NULL); + + D_INFO("Stopping %s instance on rank %u with gen "DF_U64"\n", + ins->ci_is_leader ? "leader" : "engine", dss_self_rank(), gen); + + ins->ci_sched_exiting = 1; ABT_cond_broadcast(ins->ci_abt_cond); ABT_mutex_unlock(ins->ci_abt_mutex); ABT_thread_free(&ins->ci_sched); } else { ABT_mutex_unlock(ins->ci_abt_mutex); + /* Check ci_bk.cb_gen for the case of others restarted checker during my wait. */ + while (ins->ci_sched_running && gen == ins->ci_bk.cb_gen) + dss_sleep(300); } } static inline int chk_ins_can_start(struct chk_instance *ins) { - struct chk_bookmark *cbk = &ins->ci_bk; - if (unlikely(!ins->ci_inited)) return -DER_AGAIN; if (ins->ci_starting) return -DER_INPROGRESS; - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) return -DER_BUSY; if (ins->ci_sched_running) return -DER_ALREADY; - /* - * If ci_sched_running is zero but check instance is still running, - * then someone is trying to stop it. - */ - if (((ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_LEADER) || - (!ins->ci_is_leader && cbk->cb_magic == CHK_BK_MAGIC_ENGINE)) && - cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) - return -DER_BUSY; - return 0; } diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 030c3a7eec9..4fafb235953 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -1306,7 +1306,7 @@ chk_leader_need_stop(struct chk_instance *ins, int *ret) } } - if (!ins->ci_sched_running) { + if (!ins->ci_sched_running || ins->ci_sched_exiting) { *ret = 0; return true; } @@ -1931,7 +1931,7 @@ chk_leader_pool_mbs_one(struct chk_pool_rec *cpr) if (rc1 == RSVC_CLIENT_RECHOOSE || (rc1 == RSVC_CLIENT_PROCEED && daos_rpc_retryable_rc(rc))) { dss_sleep(interval); - if (cpr->cpr_stop || !ins->ci_sched_running) { + if (cpr->cpr_stop || !ins->ci_sched_running || ins->ci_sched_exiting) { notify = false; D_GOTO(out_client, rc = 0); } @@ -2165,7 +2165,7 @@ chk_leader_sched(void *args) ABT_mutex_lock(ins->ci_abt_mutex); again: - if (!ins->ci_sched_running) { + if (ins->ci_sched_exiting) { ABT_mutex_unlock(ins->ci_abt_mutex); D_GOTO(out, rc = 0); } @@ -3039,7 +3039,7 @@ chk_leader_stop(int pool_nr, uuid_t pools[]) if (ins->ci_starting) D_GOTO(log, rc = -DER_BUSY); - if (ins->ci_stopping) + if (ins->ci_stopping || ins->ci_sched_exiting) D_GOTO(log, rc = -DER_INPROGRESS); /* @@ -3620,7 +3620,7 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto out; } - if (!ins->ci_sched_running || cpr->cpr_exiting) { + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { rc = 1; ABT_mutex_unlock(cpr->cpr_mutex); goto out; diff --git a/src/include/daos_types.h b/src/include/daos_types.h index 5d7ccd95f68..1844e22bc80 100644 --- a/src/include/daos_types.h +++ b/src/include/daos_types.h @@ -239,7 +239,7 @@ static inline bool daos_is_valid_uuid_string(const char *uuid) { const char *p; - int len = DAOS_UUID_STR_SIZE - 1; /* Not include the ternimated '\0' */ + int len = DAOS_UUID_STR_SIZE - 1; /* Not include the terminated '\0' */ int i; if (strnlen(uuid, len) != len) diff --git a/src/rdb/rdb.c b/src/rdb/rdb.c index be1377f30ed..f0b9a2cba62 100644 --- a/src/rdb/rdb.c +++ b/src/rdb/rdb.c @@ -503,7 +503,7 @@ rdb_get_use_leases(void) * \a clue->bcl_replicas with d_rank_list_free. * * \param[in] storage database storage - * \parma[out] clue database clue + * \param[out] clue database clue */ int rdb_glance(struct rdb_storage *storage, struct rdb_clue *clue) diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index 3c6508240f2..3e795b41ad2 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -9,7 +9,7 @@ timeout: 600 timeouts: test_daos_degraded_mode: 450 test_daos_management: 110 - test_daos_cat_recovery: 1800 + test_daos_cat_recovery: 3000 test_daos_pool: 180 test_daos_container: 450 test_daos_epoch: 125 diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py index 4a8cdaf07ee..c3e7958d534 100644 --- a/src/tests/ftest/util/dmg_utils_base.py +++ b/src/tests/ftest/util/dmg_utils_base.py @@ -216,8 +216,7 @@ class ConfigSubCommand(CommandWithSubCommand): def __init__(self): """Create a dmg config subcommand object.""" - super(DmgCommandBase.ConfigSubCommand, self).__init__( - "run/dmg/config/*", "config") + super(DmgCommandBase.ConfigSubCommand, self).__init__("run/dmg/config/*", "config") def get_sub_command_class(self): # pylint: disable=redefined-variable-type @@ -234,10 +233,8 @@ def __init__(self): """Create a dmg config generate object.""" super( DmgCommandBase.ConfigSubCommand.GenerateSubCommand, - self).__init__( - "/run/dmg/config/generate/*", "generate") - self.access_points = FormattedParameter( - "--access-points={}", None) + self).__init__("/run/dmg/config/generate/*", "generate") + self.access_points = FormattedParameter("--access-points={}", None) self.num_engines = FormattedParameter("--num-engines={}", None) self.scm_only = FormattedParameter("--scm-only", False) self.net_class = FormattedParameter("--net-class={}", None) @@ -277,8 +274,7 @@ class FaultsSubCommand(CommandWithSubCommand): def __init__(self): """Create a dmg faults subcommand object.""" - super(DmgCommandBase.FaultsSubCommand, self).__init__( - "run/dmg/faults/*", "faults") + super(DmgCommandBase.FaultsSubCommand, self).__init__("run/dmg/faults/*", "faults") def get_sub_command_class(self): # pylint: disable=redefined-variable-type @@ -299,8 +295,7 @@ def __init__(self): """Create a dmg faults add-checker-report object.""" super( DmgCommandBase.FaultsSubCommand.AddCheckerReportSubCommand, - self).__init__( - "/run/dmg/faults/add-checker-report/*", "add-checker-report") + self).__init__("/run/dmg/faults/add-checker-report/*", "add-checker-report") self.file = FormattedParameter("--file={}", None) self.checker_report_class = FormattedParameter("--class={}", None) @@ -525,8 +520,7 @@ class OverwriteAclSubCommand(CommandWithParameters): def __init__(self): """Create a dmg pool overwrite-acl command object.""" - super().__init__( - "/run/dmg/pool/overwrite-acl/*", "overwrite-acl") + super().__init__("/run/dmg/pool/overwrite-acl/*", "overwrite-acl") self.pool = BasicParameter(None, position=1) self.acl_file = FormattedParameter("-a {}", None) @@ -880,8 +874,7 @@ class EraseSubCommand(CommandWithParameters): def __init__(self): """Create a dmg system erase command object.""" - super().__init__( - "/run/dmg/system/erase/*", "erase") + super().__init__("/run/dmg/system/erase/*", "erase") class ExcludeSubCommand(CommandWithParameters): """Defines an object for the dmg system exclude command.""" @@ -897,8 +890,7 @@ class LeaderQuerySubCommand(CommandWithParameters): def __init__(self): """Create a dmg system leader-query command object.""" - super().__init__( - "/run/dmg/system/leader-query/*", "leader-query") + super().__init__("/run/dmg/system/leader-query/*", "leader-query") class ListPoolsSubCommand(CommandWithParameters): """Defines an object for the dmg system list-pools command.""" @@ -971,8 +963,7 @@ class ListSubCommand(CommandWithParameters): def __init__(self): """Create a dmg telemetry metrics list object.""" - super().__init__( - "/run/dmg/telemetry/metrics/list/*", "list") + super().__init__("/run/dmg/telemetry/metrics/list/*", "list") self.host = FormattedParameter("--host-list={}", None) self.port = FormattedParameter("--port={}", None) @@ -981,8 +972,7 @@ class QuerySubCommand(CommandWithParameters): def __init__(self): """Create a dmg telemetry metrics query object.""" - super().__init__( - "/run/dmg/telemetry/metrics/query/*", "query") + super().__init__("/run/dmg/telemetry/metrics/query/*", "query") self.host = FormattedParameter("--host-list={}", None) self.port = FormattedParameter("--port={}", None) self.metrics = FormattedParameter("--metrics={}", None) @@ -992,8 +982,7 @@ class VersionSubCommand(CommandWithSubCommand): def __init__(self): """Create a dmg version subcommand object.""" - super(DmgCommandBase.VersionSubCommand, self).__init__( - "/run/dmg/version/*", "version") + super(DmgCommandBase.VersionSubCommand, self).__init__("/run/dmg/version/*", "version") def _get_new(self): """Get a new object based upon this one.