Skip to content

Commit

Permalink
DAOS-15498 rebuild: reprobe in migrate_obj_iter_cb (#14458) (#14494)
Browse files Browse the repository at this point in the history
migrate_system_enter possibly will yield, so need to do a re-probe
before delete.

Signed-off-by: Xuezhao Liu <[email protected]>
  • Loading branch information
jolivier23 authored Jun 3, 2024
1 parent 2b519e9 commit 533ce74
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions src/object/srv_obj_migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1869,7 +1869,7 @@ enum {

/* Check if there are enough resource for the migration to proceed. */
static int
migrate_system_enter(struct migrate_pool_tls *tls, int tgt_idx)
migrate_system_enter(struct migrate_pool_tls *tls, int tgt_idx, bool *yielded)
{
uint32_t tgt_cnt = 0;
int rc = 0;
Expand All @@ -1883,6 +1883,7 @@ migrate_system_enter(struct migrate_pool_tls *tls, int tgt_idx)
while ((tls->mpt_inflight_max_ult / dss_tgt_nr) <= tgt_cnt) {
D_DEBUG(DB_REBUILD, "tgt%d:%u max %u\n",
tgt_idx, tgt_cnt, tls->mpt_inflight_max_ult / dss_tgt_nr);
*yielded = true;
ABT_mutex_lock(tls->mpt_inflight_mutex);
ABT_cond_wait(tls->mpt_inflight_cond, tls->mpt_inflight_mutex);
ABT_mutex_unlock(tls->mpt_inflight_mutex);
Expand Down Expand Up @@ -3378,6 +3379,8 @@ migrate_obj_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void *
daos_epoch_t punched_epoch = obj_val->punched_epoch;
unsigned int tgt_idx = obj_val->tgt_idx;
unsigned int shard = obj_val->shard;
d_iov_t tmp_iov;
bool yielded = false;
int rc;

if (arg->pool_tls->mpt_fini)
Expand All @@ -3387,7 +3390,7 @@ migrate_obj_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void *
" eph "DF_U64" start\n", DP_UUID(arg->cont_uuid), DP_UOID(*oid),
ih.cookie, epoch);

rc = migrate_system_enter(arg->pool_tls, tgt_idx);
rc = migrate_system_enter(arg->pool_tls, tgt_idx, &yielded);
if (rc != 0) {
DL_ERROR(rc, DF_UUID" enter migrate failed.", DP_UUID(arg->cont_uuid));
return rc;
Expand All @@ -3401,6 +3404,17 @@ migrate_obj_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void *
return rc;
}

/* migrate_system_enter possibly yielded the ULT, let's re-probe before delete */
if (yielded) {
d_iov_set(&tmp_iov, oid, sizeof(*oid));
rc = dbtree_iter_probe(ih, BTR_PROBE_EQ, DAOS_INTENT_MIGRATION, &tmp_iov, NULL);
if (rc) {
D_ASSERT(rc != -DER_NONEXIST);
D_ERROR("obj "DF_UOID" probe failed: "DF_RC"\n", DP_UOID(*oid), DP_RC(rc));
return rc;
}
}

rc = dbtree_iter_delete(ih, NULL);
if (rc) {
D_ERROR("dbtree_iter_delete failed: "DF_RC"\n", DP_RC(rc));
Expand Down

0 comments on commit 533ce74

Please sign in to comment.