diff --git a/src/include/daos_srv/rebuild.h b/src/include/daos_srv/rebuild.h index 55c111b518f..90596e64d4f 100644 --- a/src/include/daos_srv/rebuild.h +++ b/src/include/daos_srv/rebuild.h @@ -71,6 +71,7 @@ int ds_rebuild_schedule(struct ds_pool *pool, uint32_t map_ver, daos_epoch_t stable_eph, uint32_t layout_version, struct pool_target_id_list *tgts, daos_rebuild_opc_t rebuild_op, uint64_t delay_sec); +void ds_rebuild_restart_if_rank_wip(uuid_t pool_uuid, d_rank_t rank); int ds_rebuild_query(uuid_t pool_uuid, struct daos_rebuild_status *status); void ds_rebuild_running_query(uuid_t pool_uuid, uint32_t opc, uint32_t *rebuild_ver, diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 6e3a01379fa..aba200343ea 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -1458,6 +1458,47 @@ resume_event_handling(struct pool_svc *svc) ABT_mutex_unlock(events->pse_mutex); } +/* + * Restart rebuild if the rank is UPIN in pool map and is in rebuilding. + * + * This function only used when PS leader gets CRT_EVT_ALIVE event of engine \a rank, + * if that rank is UPIN in pool map and with unfinished rebuilding should be massive + * failure case - + * 1. some engines down and triggered rebuild. + * 2. the engine \a rank participated the rebuild, not finished yet, it became down again, + * the #failures exceeds pool RF and will not change pool map. + * 3. That engine restarted by administrator. + * + * In that case should recover the rebuild task on engine \a rank, to simplify it now just + * abort and retry the global rebuild task. + */ +static void +pool_restart_rebuild_if_rank_wip(struct ds_pool *pool, d_rank_t rank) +{ + struct pool_domain *dom; + + ABT_rwlock_rdlock(pool->sp_lock); + dom = pool_map_find_dom_by_rank(pool->sp_map, rank); + if (dom == NULL) { + ABT_rwlock_unlock(pool->sp_lock); + D_INFO(DF_UUID": rank %d non-exist on pool map.\n", + DP_UUID(pool->sp_uuid), rank); + return; + } + + if (dom->do_comp.co_status != PO_COMP_ST_UPIN) { + ABT_rwlock_unlock(pool->sp_lock); + D_INFO(DF_UUID": rank %d status %d in pool map, got CRT_EVT_ALIVE.\n", + DP_UUID(pool->sp_uuid), rank, dom->do_comp.co_status); + return; + } + ABT_rwlock_unlock(pool->sp_lock); + + ds_rebuild_restart_if_rank_wip(pool->sp_uuid, rank); + + return; +} + static int pool_svc_exclude_ranks(struct pool_svc *svc, struct pool_svc_event_set *event_set); static int @@ -1489,6 +1530,9 @@ handle_event(struct pool_svc *svc, struct pool_svc_event_set *event_set) if (event->psv_src != CRT_EVS_SWIM || event->psv_type != CRT_EVT_ALIVE) continue; + + pool_restart_rebuild_if_rank_wip(svc->ps_pool, event->psv_rank); + if (ds_pool_map_rank_up(svc->ps_pool->sp_map, event->psv_rank)) { /* * The rank is up in the pool map. Request a pool map diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 845f8b21d87..e051d606794 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -486,6 +486,45 @@ ds_rebuild_running_query(uuid_t pool_uuid, uint32_t opc, uint32_t *upper_ver, rpt_put(rpt); } +/* + * Restart rebuild if \a rank's rebuild not finished. + * Only used for massive failure recovery case, see pool_restart_rebuild_if_rank_wip(). + */ +void +ds_rebuild_restart_if_rank_wip(uuid_t pool_uuid, d_rank_t rank) +{ + struct rebuild_global_pool_tracker *rgt; + int i; + + rgt = rebuild_global_pool_tracker_lookup(pool_uuid, -1, -1); + if (rgt == NULL) + return; + + if (rgt->rgt_status.rs_state != DRS_IN_PROGRESS) { + rgt_put(rgt); + return; + } + + for (i = 0; i < rgt->rgt_servers_number; i++) { + if (rgt->rgt_servers[i].rank == rank) { + if (!rgt->rgt_servers[i].pull_done) { + rgt->rgt_status.rs_errno = -DER_STALE; + rgt->rgt_abort = 1; + rgt->rgt_status.rs_fail_rank = rank; + D_INFO(DF_RB ": abort rebuild because rank %d WIP\n", + DP_RB_RGT(rgt), rank); + } + rgt_put(rgt); + return; + } + } + + D_INFO(DF_RB ": rank %d not in rgt_servers, rgt_servers_number %d\n", + DP_RB_RGT(rgt), rank, rgt->rgt_servers_number); + rgt_put(rgt); + return; +} + /* TODO: Add something about what the current operation is for output status */ int ds_rebuild_query(uuid_t pool_uuid, struct daos_rebuild_status *status)