Skip to content

Commit

Permalink
DAOS-16291 bio: auto detect faulty for an unplugged device (#14850)
Browse files Browse the repository at this point in the history
When a health device is unplugged, we should keep counting the I/O errors
against the device, once the number of I/O errors reached faulty criteria,
the unplugged device should be automatically marked as FAULTY and trigger
targets exclusion accordingly.

Signed-off-by: Niu Yawei <[email protected]>
  • Loading branch information
NiuYawei authored Aug 15, 2024
1 parent 8e45a25 commit 6f50acf
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 23 deletions.
9 changes: 5 additions & 4 deletions src/bio/bio_internal.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2018-2023 Intel Corporation.
* (C) Copyright 2018-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -359,9 +359,9 @@ struct bio_blobstore {
* layer, teardown procedure needs be postponed.
*/
int bb_holdings;
/* Flags indicating blobstore load/unload is in-progress */
unsigned bb_loading:1,
bb_unloading:1;
unsigned bb_loading:1, /* Blobstore is loading */
bb_unloading:1, /* Blobstore is unloading */
bb_faulty_done:1; /* Faulty reaction is done */
};

/* Per-xstream blobstore */
Expand Down Expand Up @@ -650,6 +650,7 @@ uint64_t default_wal_sz(uint64_t meta_sz);
/* bio_recovery.c */
int bio_bs_state_transit(struct bio_blobstore *bbs);
int bio_bs_state_set(struct bio_blobstore *bbs, enum bio_bs_state new_state);
void trigger_faulty_reaction(struct bio_blobstore *bbs);

/* bio_device.c */
int fill_in_traddr(struct bio_dev_info *b_info, char *dev_name);
Expand Down
47 changes: 42 additions & 5 deletions src/bio/bio_monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -728,17 +728,54 @@ is_bbs_faulty(struct bio_blobstore *bbs)
void
auto_faulty_detect(struct bio_blobstore *bbs)
{
int rc;
struct smd_dev_info *dev_info;
int rc;

/* The in-memory device is already in FAULTY state */
if (bbs->bb_state == BIO_BS_STATE_FAULTY)
return;

if (bbs->bb_state != BIO_BS_STATE_NORMAL)
/* To make things simpler, don't detect faulty in SETUP phase */
if (bbs->bb_state == BIO_BS_STATE_SETUP)
return;

if (!is_bbs_faulty(bbs))
return;

rc = bio_bs_state_set(bbs, BIO_BS_STATE_FAULTY);
if (rc)
D_ERROR("Failed to set FAULTY state. "DF_RC"\n", DP_RC(rc));
/*
* The device might have been unplugged before marked as FAULTY, and the bbs is
* already in teardown.
*/
if (bbs->bb_state != BIO_BS_STATE_NORMAL) {
/* Faulty reaction is already successfully performed */
if (bbs->bb_faulty_done)
return;

rc = smd_dev_get_by_id(bbs->bb_dev->bb_uuid, &dev_info);
if (rc) {
DL_ERROR(rc, "Get device info "DF_UUID" failed.",
DP_UUID(bbs->bb_dev->bb_uuid));
return;
}

/* The device is already marked as FAULTY */
if (dev_info->sdi_state == SMD_DEV_FAULTY) {
smd_dev_free_info(dev_info);
trigger_faulty_reaction(bbs);
return;
}
smd_dev_free_info(dev_info);

rc = smd_dev_set_state(bbs->bb_dev->bb_uuid, SMD_DEV_FAULTY);
if (rc)
DL_ERROR(rc, "Set device state failed.");
else
trigger_faulty_reaction(bbs);
} else {
rc = bio_bs_state_set(bbs, BIO_BS_STATE_FAULTY);
if (rc)
DL_ERROR(rc, "Failed to set FAULTY state.");
}

if (rc == 0)
ras_notify_eventf(RAS_DEVICE_SET_FAULTY, RAS_TYPE_INFO, RAS_SEV_NOTICE, NULL, NULL,
Expand Down
31 changes: 27 additions & 4 deletions src/bio/bio_recovery.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2018-2023 Intel Corporation.
* (C) Copyright 2018-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -53,10 +53,19 @@ on_faulty(struct bio_blobstore *bbs)
rc = ract_ops->faulty_reaction(tgt_ids, tgt_cnt);
if (rc < 0)
D_ERROR("Faulty reaction failed. "DF_RC"\n", DP_RC(rc));
else if (rc == 0)
bbs->bb_faulty_done = 1;

return rc;
}

void
trigger_faulty_reaction(struct bio_blobstore *bbs)
{
D_ASSERT(!bbs->bb_faulty_done);
on_faulty(bbs);
}

static void
teardown_xs_bs(void *arg)
{
Expand Down Expand Up @@ -460,9 +469,10 @@ bio_bs_state_set(struct bio_blobstore *bbs, enum bio_bs_state new_state)
}

int
bio_xsctxt_health_check(struct bio_xs_context *xs_ctxt)
bio_xsctxt_health_check(struct bio_xs_context *xs_ctxt, bool log_err, bool update)
{
struct bio_xs_blobstore *bxb;
struct media_error_msg *mem;
enum smd_dev_type st;

/* sys xstream in pmem mode doesn't have NVMe context */
Expand All @@ -475,8 +485,21 @@ bio_xsctxt_health_check(struct bio_xs_context *xs_ctxt)
if (!bxb || !bxb->bxb_blobstore)
continue;

if (bxb->bxb_blobstore->bb_state != BIO_BS_STATE_NORMAL)
if (bxb->bxb_blobstore->bb_state != BIO_BS_STATE_NORMAL) {
if (log_err && bxb->bxb_blobstore->bb_state != BIO_BS_STATE_SETUP) {
D_ALLOC_PTR(mem);
if (mem == NULL) {
D_ERROR("Failed to allocate media error msg.\n");
return -DER_NVME_IO;
}

mem->mem_err_type = update ? MET_WRITE : MET_READ;
mem->mem_bs = bxb->bxb_blobstore;
mem->mem_tgt_id = xs_ctxt->bxc_tgt_id;
spdk_thread_send_msg(owner_thread(mem->mem_bs), bio_media_error, mem);
}
return -DER_NVME_IO;
}
}

return 0;
Expand All @@ -492,7 +515,7 @@ is_reint_ready(struct bio_blobstore *bbs)
xs_ctxt = bbs->bb_xs_ctxts[i];

D_ASSERT(xs_ctxt != NULL);
if (bio_xsctxt_health_check(xs_ctxt))
if (bio_xsctxt_health_check(xs_ctxt, false, false))
return false;
}
return true;
Expand Down
13 changes: 13 additions & 0 deletions src/bio/bio_xstream.c
Original file line number Diff line number Diff line change
Expand Up @@ -1705,6 +1705,18 @@ bio_nvme_ctl(unsigned int cmd, void *arg)
return rc;
}

static inline void
reset_media_errors(struct bio_blobstore *bbs)
{
struct nvme_stats *dev_stats = &bbs->bb_dev_health.bdh_health_state;

dev_stats->bio_read_errs = 0;
dev_stats->bio_write_errs = 0;
dev_stats->bio_unmap_errs = 0;
dev_stats->checksum_errs = 0;
bbs->bb_faulty_done = 0;
}

void
setup_bio_bdev(void *arg)
{
Expand Down Expand Up @@ -1736,6 +1748,7 @@ setup_bio_bdev(void *arg)
goto out;
}

reset_media_errors(bbs);
rc = bio_bs_state_set(bbs, BIO_BS_STATE_SETUP);
D_ASSERT(rc == 0);
out:
Expand Down
4 changes: 3 additions & 1 deletion src/include/daos_srv/bio.h
Original file line number Diff line number Diff line change
Expand Up @@ -486,11 +486,13 @@ void bio_xsctxt_free(struct bio_xs_context *ctxt);
* Health check on the per-xstream NVMe context
*
* \param[in] xs_ctxt Per-xstream NVMe context
* \param[in] log_err Log media error if the device is not healthy
* \param[in] update The check is called for an update operation or not
*
* \returns 0: NVMe context is healthy
* -DER_NVME_IO: NVMe context is faulty
*/
int bio_xsctxt_health_check(struct bio_xs_context *xs_ctxt);
int bio_xsctxt_health_check(struct bio_xs_context *xs_ctxt, bool log_err, bool update);

/**
* NVMe poller to poll NVMe I/O completions.
Expand Down
5 changes: 3 additions & 2 deletions src/vos/vos_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1766,20 +1766,21 @@ vos_flush_wal_header(struct vos_pool *vp)
* Check if the NVMe context of a VOS target is healthy.
*
* \param[in] coh VOS container
* \param[in] update The check is for an update operation or not
*
* \return 0 : VOS target is healthy
* -DER_NVME_IO : VOS target is faulty
*/
static inline int
vos_tgt_health_check(struct vos_container *cont)
vos_tgt_health_check(struct vos_container *cont, bool update)
{
D_ASSERT(cont != NULL);
D_ASSERT(cont->vc_pool != NULL);

if (cont->vc_pool->vp_sysdb)
return 0;

return bio_xsctxt_health_check(vos_xsctxt_get());
return bio_xsctxt_health_check(vos_xsctxt_get(), true, update);
}

int
Expand Down
8 changes: 4 additions & 4 deletions src/vos/vos_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1506,7 +1506,7 @@ vos_fetch_end(daos_handle_t ioh, daos_size_t *size, int err)
D_ASSERT(!ioc->ic_update);

if (err == 0) {
err = vos_tgt_health_check(ioc->ic_cont);
err = vos_tgt_health_check(ioc->ic_cont, false);
if (err)
DL_ERROR(err, "Fail fetch due to faulty NVMe.");
}
Expand Down Expand Up @@ -1546,7 +1546,7 @@ vos_fetch_begin(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch,
D_DEBUG(DB_TRACE, "Fetch "DF_UOID", desc_nr %d, epoch "DF_X64"\n",
DP_UOID(oid), iod_nr, epoch);

rc = vos_tgt_health_check(vos_hdl2cont(coh));
rc = vos_tgt_health_check(vos_hdl2cont(coh), false);
if (rc) {
DL_ERROR(rc, DF_UOID": Reject fetch due to faulty NVMe.", DP_UOID(oid));
return rc;
Expand Down Expand Up @@ -2543,7 +2543,7 @@ vos_update_end(daos_handle_t ioh, uint32_t pm_ver, daos_key_t *dkey, int err,
vos_space_unhold(vos_cont2pool(ioc->ic_cont), &ioc->ic_space_held[0]);

if (err == 0) {
err = vos_tgt_health_check(ioc->ic_cont);
err = vos_tgt_health_check(ioc->ic_cont, true);
if (err)
DL_ERROR(err, "Fail update due to faulty NVMe.");
}
Expand Down Expand Up @@ -2590,7 +2590,7 @@ vos_update_begin(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch,
"Prepare IOC for " DF_UOID ", iod_nr %d, epc " DF_X64 ", flags=" DF_X64 "\n",
DP_UOID(oid), iod_nr, (dtx_is_real_handle(dth) ? dth->dth_epoch : epoch), flags);

rc = vos_tgt_health_check(vos_hdl2cont(coh));
rc = vos_tgt_health_check(vos_hdl2cont(coh), true);
if (rc) {
DL_ERROR(rc, DF_UOID": Reject update due to faulty NVMe.", DP_UOID(oid));
return rc;
Expand Down
4 changes: 2 additions & 2 deletions src/vos/vos_obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ vos_obj_punch(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch,
D_DEBUG(DB_IO, "Punch "DF_UOID", epoch "DF_X64"\n",
DP_UOID(oid), epr.epr_hi);

rc = vos_tgt_health_check(cont);
rc = vos_tgt_health_check(cont, true);
if (rc) {
DL_ERROR(rc, DF_UOID": Reject punch due to faulty NVMe.", DP_UOID(oid));
return rc;
Expand Down Expand Up @@ -592,7 +592,7 @@ vos_obj_punch(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch,
vos_ts_set_free(ts_set);

if (rc == 0) {
rc = vos_tgt_health_check(cont);
rc = vos_tgt_health_check(cont, true);
if (rc)
DL_ERROR(rc, "Fail punch due to faulty NVMe.");
}
Expand Down
2 changes: 1 addition & 1 deletion src/vos/vos_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1479,7 +1479,7 @@ vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *m
}
}

rc = bio_xsctxt_health_check(vos_xsctxt_get());
rc = bio_xsctxt_health_check(vos_xsctxt_get(), false, false);
if (rc) {
DL_WARN(rc, DF_UUID": Skip pool open due to faulty NVMe.", DP_UUID(uuid));
return rc;
Expand Down

0 comments on commit 6f50acf

Please sign in to comment.