Skip to content

Commit

Permalink
DAOS-16329 chk: maintenance mode after checking pool with dryrun
Browse files Browse the repository at this point in the history
Sometimes, after system shutdown unexpectedly, the users may expect
to check their critical data under some kind of maintenance mode.
Under such mode, no user data can be modified or moved or aggregated.
That will guarantee no further potential (DAOS logic caused) damage
can happen during the check.

For such purpose, we will enhance current DAOS CR logic with --dryrun
option to allow the pool (after check) to be opened as immutable with
disabling some mechanism that may potentially cause data modification
or movement (such as rebuild or aggregation).

Under such mode, if client wants to connect to the pool, the read-only
option must be specified. Similarly for opening container in such pool.

Test-tag: pr cat_recov

Signed-off-by: Fan Yong <[email protected]>
  • Loading branch information
Nasf-Fan committed Aug 28, 2024
1 parent 9879f07 commit c00f9b9
Show file tree
Hide file tree
Showing 18 changed files with 292 additions and 53 deletions.
2 changes: 1 addition & 1 deletion src/chk/chk_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ chk_pool_restart_svc(struct chk_pool_rec *cpr)
if (cpr->cpr_started)
chk_pool_shutdown(cpr, true);

rc = ds_pool_start_after_check(cpr->cpr_uuid);
rc = ds_pool_start_after_check(cpr->cpr_uuid, cpr->cpr_immutable);
if (rc != 0) {
D_WARN("Cannot start full PS for "DF_UUIDF" after CR check: "DF_RC"\n",
DP_UUID(cpr->cpr_uuid), DP_RC(rc));
Expand Down
13 changes: 8 additions & 5 deletions src/chk/chk_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -1797,10 +1797,8 @@ chk_engine_pool_ult(void *args)
}

rc = chk_engine_cont_cleanup(cpr, svc, &aggregator);
if (rc != 0)
goto out;

rc = ds_pool_svc_schedule_reconf(svc);
if (rc == 0 && !cpr->cpr_immutable)
rc = ds_pool_svc_schedule_reconf(svc);

out:
chk_engine_cont_list_fini(&aggregator);
Expand Down Expand Up @@ -2113,6 +2111,11 @@ chk_engine_start_post(struct chk_instance *ins)
if (pool_cbk->cb_phase == CHK__CHECK_SCAN_PHASE__CSP_DONE)
continue;

if (ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_DRYRUN)
cpr->cpr_immutable = 1;
else
cpr->cpr_immutable = 0;

if (phase > pool_cbk->cb_phase)
phase = pool_cbk->cb_phase;

Expand Down Expand Up @@ -2950,7 +2953,7 @@ chk_engine_pool_start(uint64_t gen, uuid_t uuid, uint32_t phase, uint32_t flags)
cbk = &cpr->cpr_bk;
chk_pool_get(cpr);

rc = ds_pool_start(uuid, false);
rc = ds_pool_start(uuid, false, cpr->cpr_immutable);
if (rc != 0)
D_GOTO(put, rc = (rc == -DER_NONEXIST ? 1 : rc));

Expand Down
1 change: 1 addition & 0 deletions src/chk/chk_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,7 @@ struct chk_pool_rec {
cpr_stop:1,
cpr_done:1,
cpr_skip:1,
cpr_immutable:1,
cpr_dangling:1,
cpr_for_orphan:1,
cpr_notified_exit:1,
Expand Down
4 changes: 3 additions & 1 deletion src/client/dfs/cont.c
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,9 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char *
out_snap:
D_FREE(oit_args);
epr.epr_hi = epr.epr_lo = snap_epoch;
rc2 = daos_cont_destroy_snap(coh, epr, NULL);
rc2 = daos_cont_destroy_snap(coh, epr, NULL);
if (rc2 != 0)
D_ERROR("Failed to destroy OID table: " DF_RC "\n", DP_RC(rc2));
if (rc == 0)
rc = daos_der2errno(rc2);
out_dfs:
Expand Down
28 changes: 22 additions & 6 deletions src/container/srv_container.c
Original file line number Diff line number Diff line change
Expand Up @@ -1555,9 +1555,9 @@ cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont,
* - Users who can delete any container in the pool
* - Users who have been given access to delete the specific container
*/
if (!ds_sec_pool_can_delete_cont(pool_hdl->sph_sec_capas) &&
!ds_sec_cont_can_delete(pool_hdl->sph_flags, &pool_hdl->sph_cred,
&owner, acl)) {
if (pool_hdl->sph_pool->sp_immutable ||
(!ds_sec_pool_can_delete_cont(pool_hdl->sph_sec_capas) &&
!ds_sec_cont_can_delete(pool_hdl->sph_flags, &pool_hdl->sph_cred, &owner, acl))) {
D_ERROR(DF_CONT": permission denied to delete cont\n",
DP_CONT(pool_hdl->sph_pool->sp_uuid, cont->c_uuid));
D_GOTO(out_prop, rc = -DER_NO_PERM);
Expand Down Expand Up @@ -2254,6 +2254,14 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, cr
goto out;
}

if (pool_hdl->sph_pool->sp_immutable && (flags & DAOS_COO_IO_BASE_MASK) != DAOS_COO_RO) {
D_ERROR(DF_UUID "/" DF_UUID "/" DF_UUID ": permission denied to open immutable "
"container with flags " DF_X64 ", sec_capas " DF_X64 "/" DF_X64 "\n",
DP_UUID(cont->c_svc->cs_pool_uuid), DP_UUID(pool_hdl->sph_uuid),
DP_UUID(cont->c_uuid), flags, pool_hdl->sph_sec_capas, sec_capas);
D_GOTO(out, rc = -DER_NO_PERM);
}

/*
* Need props to check for pool redundancy requirements and access
* control.
Expand All @@ -2275,6 +2283,11 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, cr
D_GOTO(out, rc);
}

D_DEBUG(DB_MD, DF_UUID "/" DF_UUID "/" DF_UUID ": opening to the container with flags "
DF_X64", sec_capas " DF_X64 "/" DF_X64 "\n",
DP_UUID(cont->c_svc->cs_pool_uuid), DP_UUID(pool_hdl->sph_uuid),
DP_UUID(cont->c_uuid), flags, pool_hdl->sph_sec_capas, sec_capas);

if ((flags & DAOS_COO_EVICT_ALL) && !ds_sec_cont_can_evict_all(sec_capas)) {
D_ERROR(DF_CONT": permission denied evicting all handles\n",
DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid));
Expand All @@ -2283,9 +2296,12 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, cr
goto out;
}

if ((flags & DAOS_COO_EX) && !ds_sec_cont_can_open_ex(sec_capas)) {
D_ERROR(DF_CONT": permission denied opening exclusively\n",
DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid));
if (((flags & DAOS_COO_EX) && !ds_sec_cont_can_open_ex(sec_capas)) ||
((flags & DAOS_COO_RW) && !ds_sec_cont_can_modify(sec_capas))) {
D_ERROR(DF_UUID "/" DF_UUID "/" DF_UUID ": permission denied opening the "
"container with flags " DF_X64 ", capas " DF_X64 "/" DF_X64 "\n",
DP_UUID(cont->c_svc->cs_pool_uuid), DP_UUID(pool_hdl->sph_uuid),
DP_UUID(cont->c_uuid), flags, pool_hdl->sph_sec_capas, sec_capas);
daos_prop_free(prop);
rc = -DER_NO_PERM;
goto out;
Expand Down
10 changes: 6 additions & 4 deletions src/container/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ cont_child_start(struct ds_pool_child *pool_child, const uuid_t co_uuid,
DP_CONT(pool_child->spc_uuid, co_uuid), tgt_id);
rc = -DER_SHUTDOWN;
} else if (!cont_child_started(cont_child)) {
if (!ds_pool_skip_for_check(pool_child->spc_pool)) {
if (!ds_pool_restricted(pool_child->spc_pool, false)) {
rc = cont_start_agg(cont_child);
if (rc != 0)
goto out;
Expand Down Expand Up @@ -1601,11 +1601,15 @@ ds_cont_local_open(uuid_t pool_uuid, uuid_t cont_hdl_uuid, uuid_t cont_uuid,
* but for creating rebuild global container handle.
*/
D_ASSERT(hdl->sch_cont != NULL);
D_ASSERT(hdl->sch_cont->sc_pool != NULL);
hdl->sch_cont->sc_open++;

if (hdl->sch_cont->sc_open > 1)
goto opened;

if (ds_pool_restricted(hdl->sch_cont->sc_pool->spc_pool, false))
goto csum_init;

rc = dtx_cont_open(hdl->sch_cont);
if (rc != 0) {
D_ASSERTF(hdl->sch_cont->sc_open == 1, "Unexpected open count for cont "
Expand Down Expand Up @@ -1633,10 +1637,8 @@ ds_cont_local_open(uuid_t pool_uuid, uuid_t cont_hdl_uuid, uuid_t cont_uuid,
D_GOTO(err_dtx, rc);
}

D_ASSERT(hdl->sch_cont != NULL);
D_ASSERT(hdl->sch_cont->sc_pool != NULL);
csum_init:
rc = ds_cont_csummer_init(hdl->sch_cont);

if (rc != 0)
D_GOTO(err_dtx, rc);
}
Expand Down
4 changes: 2 additions & 2 deletions src/include/daos/container.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -130,7 +130,7 @@ dc_cont_open_flags_valid(uint64_t flags)
f = flags;

/* One and only one of DAOS_COO_RO, DAOS_COO_RW, and DAOS_COO_EX. */
m = f & (DAOS_COO_RO | DAOS_COO_RW | DAOS_COO_EX);
m = f & DAOS_COO_IO_BASE_MASK;
if (m != DAOS_COO_RO && m != DAOS_COO_RW && m != DAOS_COO_EX)
return false;

Expand Down
3 changes: 3 additions & 0 deletions src/include/daos_cont.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ extern "C" {
/** Mask for all of the bits in the container open mode flag, DAOS_COO_ bits */
#define DAOS_COO_MASK ((1U << DAOS_COO_NBITS) - 1)

/** The basic IO mode: read-only, read-write or exclusively read-write. */
#define DAOS_COO_IO_BASE_MASK (DAOS_COO_RO | DAOS_COO_RW | DAOS_COO_EX)

/** Maximum length for container hints */
#define DAOS_CONT_HINT_MAX_LEN 128

Expand Down
7 changes: 4 additions & 3 deletions src/include/daos_srv/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ struct ds_pool {
uuid_t sp_srv_pool_hdl;
uint32_t sp_stopping:1,
sp_cr_checked:1,
sp_immutable:1,
sp_fetch_hdls:1,
sp_need_discard:1,
sp_disable_rebuild:1;
Expand Down Expand Up @@ -275,9 +276,9 @@ int ds_pool_tgt_finish_rebuild(uuid_t pool_uuid, struct pool_target_id_list *lis
int ds_pool_tgt_map_update(struct ds_pool *pool, struct pool_buf *buf,
unsigned int map_version);

bool ds_pool_skip_for_check(struct ds_pool *pool);
int ds_pool_start_after_check(uuid_t uuid);
int ds_pool_start(uuid_t uuid, bool aft_chk);
bool ds_pool_restricted(struct ds_pool *pool, bool immutable);
int ds_pool_start_after_check(uuid_t uuid, bool immutable);
int ds_pool_start(uuid_t uuid, bool aft_chk, bool immutable);
int ds_pool_stop(uuid_t uuid);
int dsc_pool_svc_extend(uuid_t pool_uuid, d_rank_list_t *svc_ranks, uint64_t deadline, int ntargets,
const d_rank_list_t *rank_list, int ndomains, const uint32_t *domains);
Expand Down
15 changes: 14 additions & 1 deletion src/include/daos_srv/security.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2019-2023 Intel Corporation.
* (C) Copyright 2019-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -304,6 +304,19 @@ ds_sec_cont_can_open_ex(uint64_t cont_capas);
bool
ds_sec_cont_can_evict_all(uint64_t cont_capas);

/**
* Determine if the container can be modified based on the container security
* capabilities.
*
* \param[in] cont_capas Capability bits acquired via
* ds_sec_cont_get_capabilities
*
* \return True Access allowed
* False Access denied
*/
bool
ds_sec_cont_can_modify(uint64_t cont_capas);

/**
* Get the security capabilities for a rebuild container handle created by the
* DAOS server.
Expand Down
2 changes: 1 addition & 1 deletion src/mgmt/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,7 @@ ds_mgmt_hdlr_tgt_create(crt_rpc_t *tc_req)
tc_out->tc_ranks.ca_arrays = rank;
tc_out->tc_ranks.ca_count = 1;

rc = ds_pool_start(tc_in->tc_pool_uuid, false);
rc = ds_pool_start(tc_in->tc_pool_uuid, false, false);
if (rc) {
D_ERROR(DF_UUID": failed to start pool: "DF_RC"\n",
DP_UUID(tc_in->tc_pool_uuid), DP_RC(rc));
Expand Down
6 changes: 6 additions & 0 deletions src/pool/srv_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ pool_tls_get()
return tls;
}

static inline bool
ds_pool_skip_for_check(struct ds_pool *pool)
{
return engine_in_check() && !pool->sp_cr_checked;
}

struct pool_iv_map {
d_rank_t piv_master_rank;
uint32_t piv_pool_map_ver;
Expand Down
Loading

0 comments on commit c00f9b9

Please sign in to comment.