Skip to content

Commit

Permalink
ztest: convert active removal flag to lock
Browse files Browse the repository at this point in the history
The ztest_device_removal_active flag did not compeltely
prevent damage injection from running concurrently with
device removal.  Convert the flag to a mutex.

Signed-off-by: Brian Behlendorf <[email protected]>

TEST_ZTEST_TIMEOUT=7200
  • Loading branch information
behlendorf authored and Tom Caputi committed Oct 24, 2018
1 parent bb5f5a8 commit 53cb0bf
Showing 1 changed file with 38 additions and 41 deletions.
79 changes: 38 additions & 41 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ static spa_t *ztest_spa = NULL;
static ztest_ds_t *ztest_ds;

static kmutex_t ztest_vdev_lock;
static boolean_t ztest_device_removal_active = B_FALSE;
static kmutex_t ztest_removal_lock;
static kmutex_t ztest_checkpoint_lock;

/*
Expand Down Expand Up @@ -3332,6 +3332,15 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
if (ztest_opts.zo_mmp_test)
return;

/*
* If a vdev is in the process of being removed, its removal may
* finish while we are in progress, leading to an unexpected error
* value. Don't bother trying to attach while we are in the middle
* of removal.
*/
if (!mutex_tryenter(&ztest_removal_lock))
return;

oldpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);

Expand All @@ -3340,17 +3349,6 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)

spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);

/*
* If a vdev is in the process of being removed, its removal may
* finish while we are in progress, leading to an unexpected error
* value. Don't bother trying to attach while we are in the middle
* of removal.
*/
if (ztest_device_removal_active) {
spa_config_exit(spa, SCL_ALL, FTAG);
mutex_exit(&ztest_vdev_lock);
return;
}

/*
* Decide whether to do an attach or a replace.
Expand Down Expand Up @@ -3515,6 +3513,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
}
out:
mutex_exit(&ztest_vdev_lock);
mutex_exit(&ztest_removal_lock);

umem_free(oldpath, MAXPATHLEN);
umem_free(newpath, MAXPATHLEN);
Expand All @@ -3529,13 +3528,9 @@ ztest_device_removal(ztest_ds_t *zd, uint64_t id)
uint64_t guid;
int error;

mutex_enter(&ztest_removal_lock);
mutex_enter(&ztest_vdev_lock);

if (ztest_device_removal_active) {
mutex_exit(&ztest_vdev_lock);
return;
}

/*
* Remove a random top-level vdev and wait for removal to finish.
*/
Expand All @@ -3546,13 +3541,13 @@ ztest_device_removal(ztest_ds_t *zd, uint64_t id)

error = spa_vdev_remove(spa, guid, B_FALSE);
if (error == 0) {
ztest_device_removal_active = B_TRUE;
mutex_exit(&ztest_vdev_lock);

while (spa->spa_vdev_removal != NULL)
txg_wait_synced(spa_get_dsl(spa), 0);
} else {
mutex_exit(&ztest_vdev_lock);
mutex_exit(&ztest_removal_lock);
return;
}

Expand All @@ -3568,9 +3563,7 @@ ztest_device_removal(ztest_ds_t *zd, uint64_t id)
txg_wait_synced(spa_get_dsl(spa), 0);
}

mutex_enter(&ztest_vdev_lock);
ztest_device_removal_active = B_FALSE;
mutex_exit(&ztest_vdev_lock);
mutex_exit(&ztest_removal_lock);
}

/*
Expand Down Expand Up @@ -3700,22 +3693,18 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
uint64_t top;
uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;

mutex_enter(&ztest_checkpoint_lock);
mutex_enter(&ztest_vdev_lock);
spa_config_enter(spa, SCL_STATE, spa, RW_READER);

/*
* If there is a vdev removal in progress, it could complete while
* we are running, in which case we would not be able to verify
* that the metaslab_class space increased (because it decreases
* when the device removal completes).
*/
if (ztest_device_removal_active) {
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
mutex_exit(&ztest_checkpoint_lock);
if (!mutex_tryenter(&ztest_removal_lock))
return;
}

mutex_enter(&ztest_checkpoint_lock);
mutex_enter(&ztest_vdev_lock);
spa_config_enter(spa, SCL_STATE, spa, RW_READER);

top = ztest_random_vdev_top(spa, B_TRUE);

Expand Down Expand Up @@ -3744,6 +3733,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
mutex_exit(&ztest_checkpoint_lock);
mutex_exit(&ztest_removal_lock);
return;
}
ASSERT(psize > 0);
Expand All @@ -3770,6 +3760,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
mutex_exit(&ztest_checkpoint_lock);
mutex_exit(&ztest_removal_lock);
return;
}

Expand Down Expand Up @@ -3805,6 +3796,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
mutex_exit(&ztest_checkpoint_lock);
mutex_exit(&ztest_removal_lock);
return;
}

Expand Down Expand Up @@ -3836,6 +3828,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
mutex_exit(&ztest_checkpoint_lock);
mutex_exit(&ztest_removal_lock);
}

/*
Expand Down Expand Up @@ -5715,18 +5708,15 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
path0 = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
pathrand = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);

mutex_enter(&ztest_vdev_lock);

/*
* Device removal is in progress, fault injection must be disabled
* until it completes and the pool is scrubbed. The fault injection
* strategy for damaging blocks does not take in to account evacuated
* blocks which may have already been damaged.
* The fault injection strategy for damaging blocks does not take
* in to account evacuated blocks which may have already been damaged.
* It must not run concurrently with device removal.
*/
if (ztest_device_removal_active) {
mutex_exit(&ztest_vdev_lock);
goto out;
}
if (!mutex_tryenter(&ztest_removal_lock))
return;

mutex_enter(&ztest_vdev_lock);

maxfaults = MAXFAULTS(zs);
leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
Expand Down Expand Up @@ -5957,6 +5947,8 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)

(void) close(fd);
out:
mutex_exit(&ztest_removal_lock);

umem_free(path0, MAXPATHLEN);
umem_free(pathrand, MAXPATHLEN);
}
Expand Down Expand Up @@ -6086,12 +6078,14 @@ ztest_scrub(ztest_ds_t *zd, uint64_t id)
/*
* Scrub in progress by device removal.
*/
if (ztest_device_removal_active)
if (!mutex_tryenter(&ztest_removal_lock))
return;

(void) spa_scan(spa, POOL_SCAN_SCRUB);
(void) poll(NULL, 0, 100); /* wait a moment, then force a restart */
(void) spa_scan(spa, POOL_SCAN_SCRUB);

mutex_exit(&ztest_removal_lock);
}

/*
Expand Down Expand Up @@ -6755,7 +6749,9 @@ ztest_run(ztest_shared_t *zs)
* Initialize parent/child shared state.
*/
mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ztest_removal_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);

VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));

zs->zs_thread_start = gethrtime();
Expand Down Expand Up @@ -6916,6 +6912,7 @@ ztest_run(ztest_shared_t *zs)
mutex_destroy(&zcl.zcl_callbacks_lock);
(void) pthread_rwlock_destroy(&ztest_name_lock);
mutex_destroy(&ztest_vdev_lock);
mutex_destroy(&ztest_removal_lock);
mutex_destroy(&ztest_checkpoint_lock);
}

Expand Down

0 comments on commit 53cb0bf

Please sign in to comment.