Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-15056 rebuild: add rpt to the rgt list properly #13862

Merged
merged 3 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/rebuild/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1117,10 +1117,10 @@ rebuild_tgt_scan_handler(crt_rpc_t *rpc)
(rpt->rt_rebuild_op == rsi->rsi_rebuild_op &&
rpt->rt_rebuild_ver == rsi->rsi_rebuild_ver &&
rpt->rt_rebuild_gen < rsi->rsi_rebuild_gen))) {
D_INFO(DF_UUID" %s %u/"DF_U64" < incoming rebuild %u/"DF_U64"\n",
DP_UUID(rpt->rt_pool_uuid), RB_OP_STR(rpt->rt_rebuild_op),
rpt->rt_rebuild_ver, rpt->rt_leader_term, rsi->rsi_rebuild_ver,
rsi->rsi_leader_term);
D_INFO(DF_UUID" %p %s %u/"DF_U64"/%u < incoming rebuild %u/"DF_U64"/%u\n",
DP_UUID(rpt->rt_pool_uuid), rpt, RB_OP_STR(rpt->rt_rebuild_op),
rpt->rt_rebuild_ver, rpt->rt_leader_term, rpt->rt_rebuild_gen,
rsi->rsi_rebuild_ver, rsi->rsi_leader_term, rsi->rsi_rebuild_gen);
rpt->rt_abort = 1;
}
}
Expand Down
41 changes: 28 additions & 13 deletions src/rebuild/srv.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -1069,6 +1069,17 @@ rpt_put(struct rebuild_tgt_pool_tracker *rpt)
if (rpt->rt_refcount == 1 && rpt->rt_finishing)
ABT_cond_signal(rpt->rt_fini_cond);
ABT_mutex_unlock(rpt->rt_lock);
if (rpt->rt_refcount == 0) {
/* If rebuild tracker ULT is started successfully, then
* the rpt will be destroyed in rebuild_tgt_fini().
* Otherwise the rpt might be destroyed if the tracking
* ULT is not started. Since it will happen in system
* XS, no need lock here.
*/
D_ASSERT(dss_get_module_info()->dmi_xs_id == 0);
d_list_del_init(&rpt->rt_list);
rpt_destroy(rpt);
}
}

static void
Expand Down Expand Up @@ -1730,6 +1741,9 @@ ds_rebuild_abort(uuid_t pool_uuid, unsigned int ver, unsigned int gen, uint64_t
(ver == (unsigned int)(-1) || rpt->rt_rebuild_ver == ver) &&
(gen == (unsigned int)(-1) || rpt->rt_rebuild_gen == gen) &&
(term == (uint64_t)(-1) || rpt->rt_leader_term == term)) {
D_INFO(DF_UUID" try abort the rpt %p op %s ver %u gen %u\n",
DP_UUID(rpt->rt_pool_uuid), rpt, RB_OP_STR(rpt->rt_rebuild_op),
rpt->rt_rebuild_ver, rpt->rt_rebuild_gen);
rpt->rt_abort = 1;
aborted = false;
}
Expand Down Expand Up @@ -2157,16 +2171,10 @@ rebuild_tgt_fini(struct rebuild_tgt_pool_tracker *rpt)
DP_UUID(rpt->rt_pool_uuid), DP_RC(rc));
/* destroy the migrate_tls of 0-xstream */
ds_migrate_stop(rpt->rt_pool, rpt->rt_rebuild_ver, rpt->rt_rebuild_gen);
d_list_del_init(&rpt->rt_list);
rpt_put(rpt);
/* No one should access rpt after rebuild_fini_one.
*/
D_ASSERT(rpt->rt_refcount == 0);

/* No one should access rpt after rebuild_fini_one. */
D_INFO("Finalized rebuild for "DF_UUID", map_ver=%u.\n",
DP_UUID(rpt->rt_pool_uuid), rpt->rt_rebuild_ver);

rpt_destroy(rpt);
rpt_put(rpt);
}

void
Expand Down Expand Up @@ -2467,6 +2475,11 @@ rebuild_tgt_prepare(crt_rpc_t *rpc, struct rebuild_tgt_pool_tracker **p_rpt)

rpt->rt_rebuild_op = rsi->rsi_rebuild_op;

/* Let's add the rpt to the tracker list before IV fetch, which might yield,
* to make sure the new coming request can find the rpt in the list.
*/
rpt_get(rpt);
d_list_add(&rpt->rt_list, &rebuild_gst.rg_tgt_tracker_list);
rc = ds_pool_iv_srv_hdl_fetch(pool, &rpt->rt_poh_uuid,
&rpt->rt_coh_uuid);
if (rc)
Expand Down Expand Up @@ -2506,14 +2519,16 @@ rebuild_tgt_prepare(crt_rpc_t *rpc, struct rebuild_tgt_pool_tracker **p_rpt)
rpt->rt_pool = pool; /* pin it */
ABT_mutex_unlock(rpt->rt_lock);

rpt_get(rpt);
d_list_add(&rpt->rt_list, &rebuild_gst.rg_tgt_tracker_list);
*p_rpt = rpt;
out:
if (rc) {
if (rpt)
if (rpt) {
if (!d_list_empty(&rpt->rt_list)) {
d_list_del_init(&rpt->rt_list);
rpt_put(rpt);
}
rpt_put(rpt);

}
ds_pool_put(pool);
}
daos_prop_fini(&prop);
Expand Down
Loading