Skip to content

Commit

Permalink
DAOS-11498 mgmt: Fix PS replica leaks
Browse files Browse the repository at this point in the history
For each pool, the PS ranks stored on the MS may not cover all PS
replicas, in and out of the latest PS membership. The PS may have not
been able to send the latest PS ranks to the MS. Or, it may have been
unable to destroy some PS replicas after removing them from the
membership. The ds_mgmt_destroy_pool function currently only destroys
the PS replicas in the latest membership known to the MS, leaking any
other PS replicas.

This patch removes the step that only destroys the known PS replicas,
combines it with the target destroy CoRPC, and attempts to stop any PS
replicas on all PS engines. Note that the RDB files shall be deleted
along with other pool files. And, ds_pool_svc_create is renamed to
ds_pool_svc_dist_create to avoid confusion.

We then have a few interesting directions to explore?

  - Try releasing local pool handles and associated data structures
    (e.g., container handles) when destroy targets for a force-destroy
    operation.

  - Allow force-destroy to blindly, repeatedly attempt to destroy a
    pool, because it no longer needs to know what the PS ranks are and
    may go straightly to the target destroy CoRPC without contacting
    the PS, who may have already become unavailable due to previous
    destroy attempts.

Additionally, this patch fixes an unrelated assertion failure in
ds_mgmt_group_update that has happened during its CI testing:

  mgmt EMRG src/mgmt/srv_util.c:33 ds_mgmt_group_update() Assertion
  'version_current < version' failed: 10 < 9

Signed-off-by: Li Wei <[email protected]>
Required-githooks: true
  • Loading branch information
liw committed Sep 2, 2022
1 parent ad1af76 commit 521779e
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 81 deletions.
8 changes: 4 additions & 4 deletions src/include/daos_srv/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,10 @@ int ds_pool_target_update_state(uuid_t pool_uuid, d_rank_list_t *ranks,
struct pool_target_addr_list *target_list,
pool_comp_state_t state);

int ds_pool_svc_create(const uuid_t pool_uuid, int ntargets, const char *group,
const d_rank_list_t *target_addrs, int ndomains, const uint32_t *domains,
daos_prop_t *prop, d_rank_list_t **svc_addrs);
int ds_pool_svc_destroy(const uuid_t pool_uuid, d_rank_list_t *svc_ranks);
int ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group,
const d_rank_list_t *target_addrs, int ndomains,
const uint32_t *domains, daos_prop_t *prop, d_rank_list_t **svc_addrs);
int ds_pool_svc_stop(uuid_t pool_uuid);
int ds_pool_svc_rf_to_nreplicas(int svc_rf);
int ds_pool_svc_rf_from_nreplicas(int nreplicas);

Expand Down
3 changes: 1 addition & 2 deletions src/mgmt/srv_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ int ds_mgmt_tgt_map_update_aggregator(crt_rpc_t *source, crt_rpc_t *result,
void ds_mgmt_tgt_mark_hdlr(crt_rpc_t *rpc);

/** srv_util.c */
int ds_mgmt_group_update(crt_group_mod_op_t op, struct server_entry *servers,
int nservers, uint32_t version);
int ds_mgmt_group_update(struct server_entry *servers, int nservers, uint32_t version);
void ds_mgmt_kill_rank(bool force);

#endif /* __SRV_MGMT_INTERNAL_H__ */
30 changes: 3 additions & 27 deletions src/mgmt/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ ds_mgmt_pool_svc_create(uuid_t pool_uuid, int ntargets, const char *group, d_ran
D_DEBUG(DB_MGMT, DF_UUID": all tgts created, setting up pool "
"svc\n", DP_UUID(pool_uuid));

return ds_pool_svc_create(pool_uuid, ranks->rl_nr, group, ranks, domains_nr, domains, prop,
svc_list);
return ds_pool_svc_dist_create(pool_uuid, ranks->rl_nr, group, ranks, domains_nr, domains,
prop, svc_list);
}

int
Expand Down Expand Up @@ -242,7 +242,6 @@ ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks)
{
int rc;
d_rank_list_t *ranks = NULL;
d_rank_list_t *filtered_svc = NULL;

D_DEBUG(DB_MGMT, "Destroying pool "DF_UUID"\n", DP_UUID(pool_uuid));

Expand All @@ -259,38 +258,15 @@ ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks)
goto out;
}

/* Destroy pool service. Send corpc only to svc_ranks found in ranks.
* Control plane may not have been updated yet if any of svc_ranks
* were recently excluded from the pool.
*/
rc = d_rank_list_dup(&filtered_svc, svc_ranks);
if (rc)
D_GOTO(free_ranks, rc = -DER_NOMEM);
d_rank_list_filter(ranks, filtered_svc, false /* exclude */);
if (!d_rank_list_identical(filtered_svc, svc_ranks)) {
D_DEBUG(DB_MGMT, DF_UUID": %u svc_ranks, but only %u found "
"in pool map\n", DP_UUID(pool_uuid),
svc_ranks->rl_nr, filtered_svc->rl_nr);
}

rc = ds_pool_svc_destroy(pool_uuid, filtered_svc);
if (rc != 0) {
D_ERROR("Failed to destroy pool service " DF_UUID ", "
DF_RC "\n", DP_UUID(pool_uuid), DP_RC(rc));
goto free_filtered;
}

rc = ds_mgmt_tgt_pool_destroy(pool_uuid, ranks);
if (rc != 0) {
D_ERROR("Destroying pool "DF_UUID" failed, " DF_RC ".\n",
DP_UUID(pool_uuid), DP_RC(rc));
goto free_filtered;
goto free_ranks;
}

D_DEBUG(DB_MGMT, "Destroying pool " DF_UUID " succeeded.\n",
DP_UUID(pool_uuid));
free_filtered:
d_rank_list_free(filtered_svc);
free_ranks:
d_rank_list_free(ranks);
out:
Expand Down
10 changes: 2 additions & 8 deletions src/mgmt/srv_system.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2019-2021 Intel Corporation.
* (C) Copyright 2019-2022 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -179,16 +179,10 @@ ds_mgmt_group_update_handler(struct mgmt_grp_up_in *in)
if (rc != 0 && rc != -DER_NOTLEADER)
goto out;

D_DEBUG(DB_MGMT, "setting %d servers in map version %u\n",
in->gui_n_servers, in->gui_map_version);
rc = ds_mgmt_group_update(CRT_GROUP_MOD_OP_REPLACE, in->gui_servers,
in->gui_n_servers, in->gui_map_version);
rc = ds_mgmt_group_update(in->gui_servers, in->gui_n_servers, in->gui_map_version);
if (rc != 0)
goto out_svc;

D_DEBUG(DB_MGMT, "set %d servers in map version %u\n",
in->gui_n_servers, in->gui_map_version);

map_servers = dup_server_list(in->gui_servers, in->gui_n_servers);
if (map_servers == NULL) {
rc = -DER_NOMEM;
Expand Down
25 changes: 9 additions & 16 deletions src/mgmt/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,13 @@ ds_mgmt_hdlr_tgt_destroy(crt_rpc_t *td_req)
D_DEBUG(DB_MGMT, DF_UUID": ready to destroy targets\n",
DP_UUID(td_in->td_pool_uuid));

rc = ds_pool_svc_stop(td_in->td_pool_uuid);
if (rc != 0) {
D_ERROR(DF_UUID": failed to stop pool service replica: "DF_RC"\n",
DP_UUID(td_in->td_pool_uuid), DP_RC(rc));
goto out;
}

ds_pool_stop(td_in->td_pool_uuid);

/** generate path to the target directory */
Expand Down Expand Up @@ -1102,23 +1109,9 @@ int
ds_mgmt_tgt_map_update_pre_forward(crt_rpc_t *rpc, void *arg)
{
struct mgmt_tgt_map_update_in *in = crt_req_get(rpc);
uint32_t version;
int rc;

rc = crt_group_version(NULL /* grp */, &version);
D_ASSERTF(rc == 0, "%d\n", rc);
D_DEBUG(DB_MGMT, "in=%u current=%u\n", in->tm_map_version, version);
if (in->tm_map_version <= version)
return 0;

rc = ds_mgmt_group_update(CRT_GROUP_MOD_OP_REPLACE,
in->tm_servers.ca_arrays,
in->tm_servers.ca_count, in->tm_map_version);
if (rc != 0)
return rc;

D_INFO("updated group: %u -> %u\n", version, in->tm_map_version);
return 0;
return ds_mgmt_group_update(in->tm_servers.ca_arrays, in->tm_servers.ca_count,
in->tm_map_version);
}

void
Expand Down
26 changes: 14 additions & 12 deletions src/mgmt/srv_util.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2019-2021 Intel Corporation.
* (C) Copyright 2019-2022 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand All @@ -15,8 +15,7 @@

/* Update the system group. */
int
ds_mgmt_group_update(crt_group_mod_op_t op, struct server_entry *servers,
int nservers, uint32_t version)
ds_mgmt_group_update(struct server_entry *servers, int nservers, uint32_t version)
{
struct dss_module_info *info = dss_get_module_info();
uint32_t version_current;
Expand All @@ -29,9 +28,11 @@ ds_mgmt_group_update(crt_group_mod_op_t op, struct server_entry *servers,

rc = crt_group_version(NULL /* grp */, &version_current);
D_ASSERTF(rc == 0, "%d\n", rc);
D_ASSERTF(version_current < version, "%u < %u\n", version_current,
version);
D_DEBUG(DB_MGMT, "%u -> %u\n", version_current, version);
D_DEBUG(DB_MGMT, "current=%u in=%u in_nservers=%d\n", version_current, version, nservers);
if (version <= version_current) {
rc = 0;
goto out;
}

ranks = d_rank_list_alloc(nservers);
if (ranks == NULL) {
Expand All @@ -49,13 +50,14 @@ ds_mgmt_group_update(crt_group_mod_op_t op, struct server_entry *servers,
for (i = 0; i < nservers; i++)
uris[i] = servers[i].se_uri;

rc = crt_group_primary_modify(NULL /* grp */, &info->dmi_ctx,
1 /* num_ctxs */, ranks, uris, op,
version);
if (rc != 0)
D_ERROR("failed to update group (op=%d version=%u): %d\n",
op, version, rc);
rc = crt_group_primary_modify(NULL /* grp */, &info->dmi_ctx, 1 /* num_ctxs */, ranks, uris,
CRT_GROUP_MOD_OP_REPLACE, version);
if (rc != 0) {
D_ERROR("failed to update group: %u -> %u: %d\n", version_current, version, rc);
goto out;
}

D_INFO("updated group: %u -> %u: %d ranks\n", version_current, version, nservers);
out:
if (uris != NULL)
D_FREE(uris);
Expand Down
2 changes: 1 addition & 1 deletion src/pool/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ The top-level KVS stores the pool map, security attributes such as the UID, GID

#### Pool / Pool Service Creation

Pool creation is driven entirely by the Management Service since it requires special privileges for steps related to allocation of storage and querying of fault domains. After formatting all the targets, the management module passes the control to the pool module by calling the`ds_pool_svc_create`, which initializes service replication on the selected subset of nodes for the combined Pool and Container Service. The Pool module now sends a `POOL_CREATE` request to the service leader which creates the service database; the list of targets and their fault domains are then converted into the initial version of the pool map and stored in the pool service, along with other initial pool metadata.
Pool creation is driven entirely by the Management Service since it requires special privileges for steps related to allocation of storage and querying of fault domains. After formatting all the targets, the management module passes the control to the pool module by calling the`ds_pool_svc_dist_create`, which initializes service replication on the selected subset of nodes for the combined Pool and Container Service. The Pool module now sends a `POOL_CREATE` request to the service leader which creates the service database; the list of targets and their fault domains are then converted into the initial version of the pool map and stored in the pool service, along with other initial pool metadata.

<a id="9.3.2"></a>

Expand Down
28 changes: 17 additions & 11 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,9 +711,9 @@ pool_rsvc_client_complete_rpc(struct rsvc_client *client, const crt_endpoint_t *
* replica ranks
*/
int
ds_pool_svc_create(const uuid_t pool_uuid, int ntargets, const char *group,
const d_rank_list_t *target_addrs, int ndomains, const uint32_t *domains,
daos_prop_t *prop, d_rank_list_t **svc_addrs)
ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group,
const d_rank_list_t *target_addrs, int ndomains, const uint32_t *domains,
daos_prop_t *prop, d_rank_list_t **svc_addrs)
{
struct daos_prop_entry *svc_rf_entry;
d_rank_list_t *ranks;
Expand Down Expand Up @@ -817,18 +817,24 @@ ds_pool_svc_create(const uuid_t pool_uuid, int ntargets, const char *group,
return rc;
}

/** Stop any local PS replica for \a pool_uuid. */
int
ds_pool_svc_destroy(const uuid_t pool_uuid, d_rank_list_t *svc_ranks)
ds_pool_svc_stop(uuid_t pool_uuid)
{
d_iov_t psid;
int rc;
d_iov_t id;
int rc;

d_iov_set(&psid, (void *)pool_uuid, sizeof(uuid_t));
rc = ds_rsvc_dist_stop(DS_RSVC_CLASS_POOL, &psid, svc_ranks,
NULL /* excluded */, true /* destroy */);
if (rc != 0)
D_ERROR(DF_UUID": failed to destroy pool service: "DF_RC"\n",
d_iov_set(&id, pool_uuid, sizeof(uuid_t));

rc = ds_rsvc_stop(DS_RSVC_CLASS_POOL, &id, false /* destroy */);
if (rc == -DER_ALREADY) {
D_DEBUG(DB_MD, DF_UUID": pool service replica already stopped\n",
DP_UUID(pool_uuid));
rc = 0;
} else if (rc != 0) {
D_ERROR(DF_UUID": failed to stop pool service replica: "DF_RC"\n",
DP_UUID(pool_uuid), DP_RC(rc));
}

return rc;
}
Expand Down

0 comments on commit 521779e

Please sign in to comment.