Skip to content

Commit

Permalink
DOSE-767 deadlock with metaslab_flush during endurance IO stress run (o…
Browse files Browse the repository at this point in the history
  • Loading branch information
grwilson authored Nov 18, 2021
1 parent 7c3ff19 commit 58434e2
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 13 deletions.
9 changes: 2 additions & 7 deletions module/os/linux/zfs/vdev_object_store.c
Original file line number Diff line number Diff line change
Expand Up @@ -1858,20 +1858,15 @@ vdev_object_store_metaslab_init(vdev_t *vd, metaslab_t *msp,
uint64_t
vdev_object_store_metaslab_offset(vdev_t *vd)
{
boolean_t lock_held = spa_config_held(vd->vdev_spa,
SCL_ALLOC, RW_WRITER);
if (!lock_held)
spa_config_enter(vd->vdev_spa, SCL_ALLOC, FTAG, RW_WRITER);
ASSERT3U(spa_config_held(vd->vdev_spa, SCL_ALLOC, RW_WRITER), ==,
SCL_ALLOC);

uint64_t blockid = 0;
for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
metaslab_t *msp = vd->vdev_ms[m];
blockid = MAX(blockid, msp->ms_lbas[0]);
}

if (!lock_held)
spa_config_exit(vd->vdev_spa, SCL_ALLOC, FTAG);

/*
* The blockid represents the next block that will be allocated
* so we need to subtract one to get the last allocated block
Expand Down
37 changes: 32 additions & 5 deletions module/zfs/spa_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,30 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
int wlocks_held = 0;
ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);

/*
* If we're using an object-base pool, we may need
* to flush out any pending writes. We do this only
* when we are trying to grab the SCL_ZIO lock.
*/
boolean_t flush_needed = (rw == RW_WRITER) &&
spa_is_object_based(spa) && (locks & SCL_ZIO);

/*
* If this is an object-based pool and a flush is required, then
* we may have to also acquire the SCL_ALLOC lock. We need to add
* this to the list of locks that are going to be acquired but we
* release it before returning to the caller. This allows us to lock
* out allocations so that we can enable the object store passthru
* logic while still grabbing the locks in the correct order.
*/
boolean_t lock_needed = flush_needed &&
spa_config_held(spa, SCL_ALLOC, RW_WRITER) == 0 &&
!(locks & SCL_ALLOC);

if (lock_needed) {
locks |= SCL_ALLOC;
}

for (int i = 0; i < SCL_LOCKS; i++) {
vdev_t *rvd = spa->spa_root_vdev;
spa_config_lock_t *scl = &spa->spa_config_lock[i];
Expand All @@ -540,10 +564,6 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
while (scl->scl_count != 0) {
scl->scl_write_wanted++;

boolean_t flush_needed =
spa_is_object_based(spa) &&
((1 << i) == SCL_ZIO);

/*
* If we're on object based pool and
* we're trying to lock the SCL_LOCK,
Expand All @@ -554,7 +574,7 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
* flush any I/Os quickly that might
* be holding the SCL_ZIO lock as reader.
*/
if (flush_needed) {
if (flush_needed && (1 << i) == SCL_ZIO) {
vdev_object_store_enable_passthru(rvd);
}
cv_wait(&scl->scl_cv, &scl->scl_lock);
Expand All @@ -566,6 +586,10 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
mutex_exit(&scl->scl_lock);
}
ASSERT3U(wlocks_held, <=, locks);

if (lock_needed) {
spa_config_exit(spa, SCL_ALLOC, tag);
}
}

void
Expand Down Expand Up @@ -2848,6 +2872,9 @@ boolean_t
spa_is_object_based(spa_t *spa)
{
vdev_t *rvd = spa->spa_root_vdev;
if (rvd == NULL)
return (B_FALSE);

for (uint64_t c = 0; c < rvd->vdev_children; c++) {
if (vdev_is_object_based(rvd->vdev_child[c]))
return (B_TRUE);
Expand Down
5 changes: 4 additions & 1 deletion module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
vdev_alloc_bias_t alloc_bias = VDEV_BIAS_NONE;
boolean_t top_level = (parent && !parent->vdev_parent);

ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
ASSERT3U(spa_config_held(spa, SCL_ALL, RW_WRITER), ==, SCL_ALL);

if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
return (SET_ERROR(EINVAL));
Expand Down Expand Up @@ -5224,6 +5224,9 @@ vdev_is_concrete(vdev_t *vd)
boolean_t
vdev_is_object_based(vdev_t *vd)
{
if (vd == NULL)
return (B_FALSE);

vdev_ops_t *ops = vd->vdev_ops;
if (vd->vdev_ops->vdev_op_leaf && ops == &vdev_object_store_ops)
return (B_TRUE);
Expand Down

0 comments on commit 58434e2

Please sign in to comment.