From 67723f17662bffabdc30465fa699320270b27bd4 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Wed, 24 Jan 2024 01:01:04 +0800 Subject: [PATCH] DAOS-14976 object: properly select collective punch leader for resend (#13602) Before resending the collective punch RPC, we need to check whether the original leader shard is valid or not. It is possible the object layout has been shrinked after rebuild. Under such case, select a new shard as the collective punch leader. Signed-off-by: Fan Yong --- src/object/cli_obj.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index 088e87067c4..95c1c22d583 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -7060,7 +7060,12 @@ dc_obj_coll_punch(tse_task_t *task, struct dc_object *obj, struct dtx_epoch *epo if (rc != 0) goto out; + leader = coa->coa_dct_nr; + if (auxi->io_retry) { + if (unlikely(spa->pa_auxi.shard >= obj->cob_shards_nr)) + goto new_leader; + /* Try to reuse the same leader. */ rc = obj_shard_open(obj, spa->pa_auxi.shard, map_ver, &shard); if (rc == 0) { @@ -7078,10 +7083,13 @@ dc_obj_coll_punch(tse_task_t *task, struct dc_object *obj, struct dtx_epoch *epo /* Then change to new leader for retry. */ } - /* Randomly select a rank as the leader. */ - leader = d_rand() % coa->coa_dct_nr; - new_leader: + if (leader == coa->coa_dct_nr) + /* Randomly select a rank as the leader. */ + leader = d_rand() % coa->coa_dct_nr; + else + leader = (leader + 1) % coa->coa_dct_nr; + dct = &coa->coa_dcts[leader]; len = dct->dct_bitmap_sz << 3; @@ -7098,8 +7106,6 @@ dc_obj_coll_punch(tse_task_t *task, struct dc_object *obj, struct dtx_epoch *epo } } - /* Try another for leader. */ - leader = (leader + 1) % coa->coa_dct_nr; goto new_leader; gen_mbs: