Skip to content

Commit

Permalink
DAOS-15605 rsvc: Create rsvc with VOS DF version (#14156)
Browse files Browse the repository at this point in the history
If a pool with an old layout version is served by a DAOS version with a
new default layout version, for instance, a 2.4-layout pool served by
DAOS 2.5, then any new VOS pools created for this DAOS pool must use the
old layout, or downgrading back to the old DAOS version would become
impossible.

Signed-off-by: Li Wei <[email protected]>
  • Loading branch information
liw authored Apr 18, 2024
1 parent eeee392 commit 655d23a
Show file tree
Hide file tree
Showing 11 changed files with 93 additions and 61 deletions.
1 change: 1 addition & 0 deletions src/include/daos_srv/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

/* age of an entry in svc_ops KVS before it may be evicted */
#define DEFAULT_SVC_OPS_ENTRY_AGE_SEC_MAX 300ULL

/*
* Pool object
*
Expand Down
4 changes: 2 additions & 2 deletions src/include/daos_srv/rdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ struct rdb_cbs;

/** Database storage methods */
int rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t size,
const d_rank_list_t *replicas, struct rdb_cbs *cbs, void *arg,
struct rdb_storage **storagep);
uint32_t vos_df_version, const d_rank_list_t *replicas, struct rdb_cbs *cbs,
void *arg, struct rdb_storage **storagep);
int rdb_open(const char *path, const uuid_t uuid, uint64_t caller_term, struct rdb_cbs *cbs,
void *arg, struct rdb_storage **storagep);
void rdb_close(struct rdb_storage *storage);
Expand Down
14 changes: 7 additions & 7 deletions src/include/daos_srv/rsvc.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,23 +124,23 @@ int ds_rsvc_start_nodb(enum ds_rsvc_class_id class, d_iov_t *id,
int ds_rsvc_stop_nodb(enum ds_rsvc_class_id class, d_iov_t *id);

int ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t caller_term,
bool create, size_t size, d_rank_list_t *replicas, void *arg);
bool create, size_t size, uint32_t vos_df_version, d_rank_list_t *replicas,
void *arg);
int ds_rsvc_stop(enum ds_rsvc_class_id class, d_iov_t *id, uint64_t caller_term, bool destroy);
int ds_rsvc_stop_all(enum ds_rsvc_class_id class);
int ds_rsvc_stop_leader(enum ds_rsvc_class_id class, d_iov_t *id,
struct rsvc_hint *hint);
int ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid,
const d_rank_list_t *ranks, uint64_t caller_term, bool create,
bool bootstrap, size_t size);
bool bootstrap, size_t size, uint32_t vos_df_version);
int ds_rsvc_dist_stop(enum ds_rsvc_class_id class, d_iov_t *id, const d_rank_list_t *ranks,
d_rank_list_t *excluded, uint64_t caller_term, bool destroy);
enum ds_rsvc_state ds_rsvc_get_state(struct ds_rsvc *svc);
void ds_rsvc_set_state(struct ds_rsvc *svc, enum ds_rsvc_state state);
int ds_rsvc_add_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks,
size_t size);
int ds_rsvc_add_replicas(enum ds_rsvc_class_id class, d_iov_t *id,
d_rank_list_t *ranks, size_t size,
struct rsvc_hint *hint);
int ds_rsvc_add_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks, size_t size,
uint32_t vos_df_version);
int ds_rsvc_add_replicas(enum ds_rsvc_class_id class, d_iov_t *id, d_rank_list_t *ranks,
size_t size, uint32_t vos_df_version, struct rsvc_hint *hint);
int ds_rsvc_remove_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks);
int ds_rsvc_remove_replicas(enum ds_rsvc_class_id class, d_iov_t *id, d_rank_list_t *ranks,
struct rsvc_hint *hint);
Expand Down
2 changes: 1 addition & 1 deletion src/pool/srv_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ struct pool_map_refresh_ult_arg {
*/
void ds_pool_rsvc_class_register(void);
void ds_pool_rsvc_class_unregister(void);
uint32_t ds_pool_get_vos_pool_df_version(uint32_t pool_global_version);
uint32_t ds_pool_get_vos_df_version(uint32_t pool_global_version);
int ds_pool_start_all(void);
int ds_pool_stop_all(void);
int ds_pool_hdl_is_from_srv(struct ds_pool *pool, uuid_t hdl);
Expand Down
60 changes: 37 additions & 23 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@
#define PS_OPS_PER_SEC 4096

/*
* Return the corresponding VOS pool DF version or 0 if pool_global_version is
* not supported.
* Return the corresponding VOS DF version or 0 if pool_global_version is not
* supported.
*/
uint32_t
ds_pool_get_vos_pool_df_version(uint32_t pool_global_version)
ds_pool_get_vos_df_version(uint32_t pool_global_version)
{
if (pool_global_version >= 3)
return VOS_POOL_DF_2_6;
Expand Down Expand Up @@ -983,7 +983,8 @@ ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group,

d_iov_set(&psid, (void *)pool_uuid, sizeof(uuid_t));
rc = ds_rsvc_dist_start(DS_RSVC_CLASS_POOL, &psid, pool_uuid, ranks, RDB_NIL_TERM,
true /* create */, true /* bootstrap */, ds_rsvc_get_md_cap());
true /* create */, true /* bootstrap */, ds_rsvc_get_md_cap(),
0 /* vos_df_version */);
if (rc != 0)
D_GOTO(out_ranks, rc);

Expand Down Expand Up @@ -1570,6 +1571,7 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf,
DP_UUID(svc->ps_uuid), DP_RC(rc));
goto out_lock;
}
D_INFO(DF_UUID ": layout version %u\n", DP_UUID(svc->ps_uuid), svc->ps_global_version);
version_exists = true;

/**
Expand Down Expand Up @@ -2152,7 +2154,7 @@ start_one(uuid_t uuid, void *varg)

d_iov_set(&id, uuid, sizeof(uuid_t));
ds_rsvc_start(DS_RSVC_CLASS_POOL, &id, uuid, RDB_NIL_TERM, false /* create */, 0 /* size */,
NULL /* replicas */, NULL /* arg */);
0 /* vos_df_version */, NULL /* replicas */, NULL /* arg */);
return 0;
}

Expand Down Expand Up @@ -6400,12 +6402,12 @@ pool_svc_reconf_ult(void *varg)
struct pool_svc_reconf_arg *arg = reconf->psc_arg;
struct pool_svc *svc;
struct pool_map *map;
d_rank_list_t *current;
d_rank_list_t *pre;
d_rank_list_t *to_add;
d_rank_list_t *to_remove;
d_rank_list_t *new;
uint64_t rdb_nbytes = 0;
int rc;
d_rank_list_t *post;
uint64_t rdb_nbytes = 0;
int rc;

svc = container_of(reconf, struct pool_svc, ps_reconf_sched);

Expand All @@ -6430,7 +6432,7 @@ pool_svc_reconf_ult(void *varg)
}
}

rc = rdb_get_ranks(svc->ps_rsvc.s_db, &current);
rc = rdb_get_ranks(svc->ps_rsvc.s_db, &pre);
if (rc != 0) {
D_ERROR(DF_UUID": failed to get pool service replica ranks: "DF_RC"\n",
DP_UUID(svc->ps_uuid), DP_RC(rc));
Expand All @@ -6447,7 +6449,7 @@ pool_svc_reconf_ult(void *varg)

if (arg->sca_map == NULL)
ABT_rwlock_rdlock(svc->ps_pool->sp_lock);
rc = ds_pool_plan_svc_reconfs(svc->ps_svc_rf, map, current, dss_self_rank(),
rc = ds_pool_plan_svc_reconfs(svc->ps_svc_rf, map, pre, dss_self_rank(),
arg->sca_sync_remove /* filter_only */, &to_add, &to_remove);
if (arg->sca_map == NULL)
ABT_rwlock_unlock(svc->ps_pool->sp_lock);
Expand All @@ -6457,9 +6459,8 @@ pool_svc_reconf_ult(void *varg)
goto out_cur;
}

D_DEBUG(DB_MD, DF_UUID": svc_rf=%d current=%u to_add=%u to_remove=%u\n",
DP_UUID(svc->ps_uuid), svc->ps_svc_rf, current->rl_nr, to_add->rl_nr,
to_remove->rl_nr);
D_DEBUG(DB_MD, DF_UUID ": svc_rf=%d pre=%u to_add=%u to_remove=%u\n", DP_UUID(svc->ps_uuid),
svc->ps_svc_rf, pre->rl_nr, to_add->rl_nr, to_remove->rl_nr);

/*
* Ignore the return values from the "add" and "remove" calls here. If
Expand All @@ -6470,7 +6471,12 @@ pool_svc_reconf_ult(void *varg)
* membership changes to the MS.
*/
if (!arg->sca_sync_remove && to_add->rl_nr > 0) {
ds_rsvc_add_replicas_s(&svc->ps_rsvc, to_add, rdb_nbytes);
uint32_t vos_df_version;

vos_df_version = ds_pool_get_vos_df_version(svc->ps_global_version);
D_ASSERTF(vos_df_version != 0, DF_UUID ": vos_df_version=0 global_version=%u\n",
DP_UUID(svc->ps_uuid), svc->ps_global_version);
ds_rsvc_add_replicas_s(&svc->ps_rsvc, to_add, rdb_nbytes, vos_df_version);
if (reconf->psc_canceled) {
rc = -DER_OP_CANCELED;
goto out_to_add_remove;
Expand Down Expand Up @@ -6504,23 +6510,24 @@ pool_svc_reconf_ult(void *varg)
d_rank_list_free(tmp);
}

if (rdb_get_ranks(svc->ps_rsvc.s_db, &new) == 0) {
if (svc->ps_force_notify || !d_rank_list_identical(new, current)) {
if (rdb_get_ranks(svc->ps_rsvc.s_db, &post) == 0) {
if (svc->ps_force_notify || !d_rank_list_identical(post, pre)) {
int rc_tmp;

/*
* Send RAS event to control-plane over dRPC to indicate
* change in pool service replicas.
*/
rc_tmp = ds_notify_pool_svc_update(&svc->ps_uuid, new, svc->ps_rsvc.s_term);
rc_tmp = ds_notify_pool_svc_update(&svc->ps_uuid, post,
svc->ps_rsvc.s_term);
if (rc_tmp == 0)
svc->ps_force_notify = false;
else
DL_ERROR(rc_tmp, DF_UUID": replica update notify failure",
DP_UUID(svc->ps_uuid));
}

d_rank_list_free(new);
d_rank_list_free(post);
}
if (reconf->psc_canceled) {
rc = -DER_OP_CANCELED;
Expand All @@ -6541,7 +6548,7 @@ pool_svc_reconf_ult(void *varg)
d_rank_list_free(to_remove);
d_rank_list_free(to_add);
out_cur:
d_rank_list_free(current);
d_rank_list_free(pre);
out:
/* Do not yield between the D_FREE and the sched_end. */
D_FREE(reconf->psc_arg);
Expand Down Expand Up @@ -8270,8 +8277,15 @@ ds_pool_replicas_update_handler(crt_rpc_t *rpc)

switch (opc_get(rpc->cr_opc)) {
case POOL_REPLICAS_ADD:
rc = ds_rsvc_add_replicas(DS_RSVC_CLASS_POOL, &id, ranks,
ds_rsvc_get_md_cap(), &out->pmo_hint);
/*
* Before starting to use this unused RPC, we need to fix the
* arguments passed to ds_rsvc_add_replicas. The size argument
* might need to be retrieved from an existing replica; the
* vos_df_version argument needs to be determined somehow.
*/
D_ASSERTF(false, "code fixes required before use");
rc = ds_rsvc_add_replicas(DS_RSVC_CLASS_POOL, &id, ranks, ds_rsvc_get_md_cap(),
0 /* vos_df_version */, &out->pmo_hint);
break;

case POOL_REPLICAS_REMOVE:
Expand Down Expand Up @@ -8561,7 +8575,7 @@ ds_pool_svc_upgrade_vos_pool(struct ds_pool *pool)
uint32_t df_version;
int rc;

df_version = ds_pool_get_vos_pool_df_version(pool->sp_global_version);
df_version = ds_pool_get_vos_df_version(pool->sp_global_version);
if (df_version == 0) {
rc = -DER_NO_PERM;
DL_ERROR(rc, DF_UUID ": pool global version %u no longer supported",
Expand Down
2 changes: 1 addition & 1 deletion src/pool/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -1906,7 +1906,7 @@ update_vos_prop_on_targets(void *in)
goto out;

/** If necessary, upgrade the vos pool format */
df_version = ds_pool_get_vos_pool_df_version(pool->sp_global_version);
df_version = ds_pool_get_vos_df_version(pool->sp_global_version);
if (df_version == 0) {
ret = -DER_NO_PERM;
DL_ERROR(ret, DF_UUID ": pool global version %u no longer supported",
Expand Down
15 changes: 9 additions & 6 deletions src/rdb/rdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,22 @@ static void
rdb_chkptd_stop(struct rdb *db);

/**
* Create an RDB replica at \a path with \a uuid, \a size, and \a replicas, and
* open it with \a cbs and \a arg.
* Create an RDB replica at \a path with \a uuid, \a caller_term, \a size,
* \a vos_df_version, and \a replicas, and open it with \a cbs and \a arg.
*
* \param[in] path replica path
* \param[in] uuid database UUID
* \param[in] caller_term caller term if not RDB_NIL_TERM (see rdb_open)
* \param[in] size replica size in bytes
* \param[in] vos_df_version version of VOS durable format
* \param[in] replicas list of replica ranks
* \param[in] cbs callbacks (not copied)
* \param[in] arg argument for cbs
* \param[out] storagep database storage
*/
int
rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t size,
const d_rank_list_t *replicas, struct rdb_cbs *cbs, void *arg,
uint32_t vos_df_version, const d_rank_list_t *replicas, struct rdb_cbs *cbs, void *arg,
struct rdb_storage **storagep)
{
daos_handle_t pool;
Expand All @@ -51,16 +52,18 @@ rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t siz
int rc;

D_DEBUG(DB_MD,
DF_UUID ": creating db %s with %u replicas: caller_term=" DF_X64 " size=" DF_U64,
DP_UUID(uuid), path, replicas == NULL ? 0 : replicas->rl_nr, caller_term, size);
DF_UUID ": creating db %s with %u replicas: caller_term=" DF_X64 " size=" DF_U64
" vos_df_version=%u\n",
DP_UUID(uuid), path, replicas == NULL ? 0 : replicas->rl_nr, caller_term, size,
vos_df_version);

/*
* Create and open a VOS pool. RDB pools specify VOS_POF_SMALL for
* basic system memory reservation and VOS_POF_EXCL for concurrent
* access protection.
*/
rc = vos_pool_create(path, (unsigned char *)uuid, size, 0 /* nvme_sz */,
VOS_POF_SMALL | VOS_POF_EXCL | VOS_POF_RDB, 0 /* version */, &pool);
VOS_POF_SMALL | VOS_POF_EXCL | VOS_POF_RDB, vos_df_version, &pool);
if (rc != 0)
goto out;
ABT_thread_yield();
Expand Down
12 changes: 7 additions & 5 deletions src/rdb/tests/rdb_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ rdbt_test_rsvc(void)
* leader with a newer term.
*/
MUST(ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 2 /* term */, true /* create */,
DB_CAP, NULL /* replicas */, NULL /* arg */));
DB_CAP, 0 /* vos_df_version */, NULL /* replicas */, NULL /* arg */));
rc = ds_rsvc_stop(DS_RSVC_CLASS_TEST, &svc_id, 1 /* term */, true /* destroy */);
D_ASSERTF(rc == -DER_STALE, DF_RC"\n", DP_RC(rc));

Expand All @@ -287,7 +287,7 @@ rdbt_test_rsvc(void)
* leader with a newer term.
*/
rc = ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 3 /* term */, true /* create */,
DB_CAP, NULL /* replicas */, NULL /* arg */);
DB_CAP, 0 /* vos_df_version */, NULL /* replicas */, NULL /* arg */);
D_ASSERTF(rc == -DER_ALREADY, DF_RC"\n", DP_RC(rc));
rc = ds_rsvc_stop(DS_RSVC_CLASS_TEST, &svc_id, 2 /* term */, true /* destroy */);
D_ASSERTF(rc == -DER_STALE, DF_RC"\n", DP_RC(rc));
Expand Down Expand Up @@ -641,7 +641,8 @@ rdbt_init_handler(crt_rpc_t *rpc)
D_WARN("ranks[%u]=%u\n", ri, ranks->rl_ranks[ri]);

MUST(ds_rsvc_dist_start(DS_RSVC_CLASS_TEST, &test_svc_id, in->tii_uuid, ranks, RDB_NIL_TERM,
true /* create */, true /* bootstrap */, DB_CAP));
true /* create */, true /* bootstrap */, DB_CAP,
0 /* vos_df_version*/));
crt_reply_send(rpc);
}

Expand Down Expand Up @@ -768,8 +769,9 @@ rdbt_replicas_add_handler(crt_rpc_t *rpc)
if (rc != 0)
goto out;

rc = ds_rsvc_add_replicas(DS_RSVC_CLASS_TEST, &test_svc_id, ranks,
DB_CAP, &out->rtmo_hint);
rc = ds_rsvc_add_replicas(DS_RSVC_CLASS_TEST, &test_svc_id, ranks, DB_CAP,
0 /* vos_df_ version */, &out->rtmo_hint);

out->rtmo_failed = ranks;

out:
Expand Down
4 changes: 3 additions & 1 deletion src/rsvc/rpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
* These are for daos_rpc::dr_opc and DAOS_RPC_OPCODE(opc, ...) rather than
* crt_req_create(..., opc, ...). See src/include/daos/rpc.h.
*/
#define DAOS_RSVC_VERSION 3
#define DAOS_RSVC_VERSION 4
/* LIST of internal RPCS in form of:
* OPCODE, flags, FMT, handler, corpc_hdlr,
*/
Expand Down Expand Up @@ -54,6 +54,8 @@ extern struct crt_proto_format rsvc_proto_fmt;
((uint32_t) (sai_class) CRT_VAR) \
((uint32_t) (sai_flags) CRT_VAR) \
((uint64_t) (sai_size) CRT_VAR) \
((uint32_t) (sai_vos_df_version) CRT_VAR) \
((uint32_t) (sai_padding) CRT_VAR) \
((uint64_t) (sai_term) CRT_VAR) \
((d_rank_list_t) (sai_ranks) CRT_PTR)

Expand Down
Loading

0 comments on commit 655d23a

Please sign in to comment.