From aed23689506ae2d47e8eb08af946106512edbf43 Mon Sep 17 00:00:00 2001 From: Boris Protopopov Date: Tue, 5 Jan 2016 15:47:58 -0500 Subject: [PATCH] Use zfs range locks in ztest --- cmd/ztest/ztest.c | 203 +++++++++++++++++++++++++++++---------- include/sys/zfs_rlock.h | 8 +- lib/libzpool/Makefile.am | 1 + 3 files changed, 157 insertions(+), 55 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index a824da977670..bbf5cbfb78bf 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -101,6 +101,7 @@ #include #include #include +#include #include #include #include @@ -231,15 +232,6 @@ typedef struct bufwad { uint64_t bw_data; } bufwad_t; -/* - * XXX -- fix zfs range locks to be generic so we can use them here. - */ -typedef enum { - RL_READER, - RL_WRITER, - RL_APPEND -} rl_type_t; - typedef struct rll { void *rll_writer; int rll_readers; @@ -247,12 +239,10 @@ typedef struct rll { kcondvar_t rll_cv; } rll_t; -typedef struct rl { - uint64_t rl_object; - uint64_t rl_offset; - uint64_t rl_size; - rll_t *rl_lock; -} rl_t; +typedef struct zll { + list_t z_list; + kmutex_t z_lock; +} zll_t; #define ZTEST_RANGE_LOCKS 64 #define ZTEST_OBJECT_LOCKS 64 @@ -284,7 +274,7 @@ typedef struct ztest_ds { char zd_name[MAXNAMELEN]; kmutex_t zd_dirobj_lock; rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; - rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; + zll_t zd_range_lock[ZTEST_RANGE_LOCKS]; } ztest_ds_t; /* @@ -1133,6 +1123,100 @@ ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) return (error); } + +/* + * Object and range lock mechanics + */ +typedef struct { + list_node_t z_lnode; + refcount_t z_refcnt; + uint64_t z_object; + zfs_rlock_t z_range_lock; +} ztest_znode_t; + +typedef struct { + rl_t *z_rl; + ztest_znode_t *z_ztznode; +} ztest_zrl_t; + +static ztest_znode_t * +ztest_znode_init(uint64_t object) +{ + ztest_znode_t *zp = umem_alloc(sizeof (*zp), UMEM_NOFAIL); + + list_link_init(&zp->z_lnode); + refcount_create(&zp->z_refcnt); + zp->z_object = object; + zfs_rlock_init(&zp->z_range_lock); + + return (zp); +} + +static void +ztest_znode_fini(ztest_znode_t *zp) +{ + ASSERT(refcount_is_zero(&zp->z_refcnt)); + zfs_rlock_destroy(&zp->z_range_lock); + zp->z_object = 0; + refcount_destroy(&zp->z_refcnt); + list_link_init(&zp->z_lnode); + umem_free(zp, sizeof (*zp)); +} + +static void +ztest_zll_init(zll_t *zll) +{ + mutex_init(&zll->z_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zll->z_list, sizeof (ztest_znode_t), + offsetof(ztest_znode_t, z_lnode)); +} + +static void +ztest_zll_destroy(zll_t *zll) +{ + list_destroy(&zll->z_list); + mutex_destroy(&zll->z_lock); +} + +#define RL_TAG "range_lock" +static ztest_znode_t * +ztest_znode_get(ztest_ds_t *zd, uint64_t object) +{ + zll_t *zll = &zd->zd_range_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; + ztest_znode_t *zp = NULL; + mutex_enter(&zll->z_lock); + for (zp = list_head(&zll->z_list); (zp); + zp = list_next(&zll->z_list, zp)) { + if (zp->z_object == object) { + refcount_add(&zp->z_refcnt, RL_TAG); + break; + } + } + if (zp == NULL) { + zp = ztest_znode_init(object); + refcount_add(&zp->z_refcnt, RL_TAG); + list_insert_head(&zll->z_list, zp); + } + mutex_exit(&zll->z_lock); + return (zp); +} + +static void +ztest_znode_put(ztest_ds_t *zd, ztest_znode_t *zp) +{ + zll_t *zll = NULL; + ASSERT3U(zp->z_object, !=, 0); + zll = &zd->zd_range_lock[zp->z_object & (ZTEST_OBJECT_LOCKS - 1)]; + mutex_enter(&zll->z_lock); + refcount_remove(&zp->z_refcnt, RL_TAG); + if (refcount_is_zero(&zp->z_refcnt)) { + list_remove(&zll->z_list, zp); + ztest_znode_fini(zp); + } + mutex_exit(&zll->z_lock); +} + + static void ztest_rll_init(rll_t *rll) { @@ -1205,33 +1289,37 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object) ztest_rll_unlock(rll); } -static rl_t * -ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, - uint64_t size, rl_type_t type) +static ztest_zrl_t * +ztest_zrl_init(rl_t *rl, ztest_znode_t *zp) { - uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); - rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; - rl_t *rl; - - rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); - rl->rl_object = object; - rl->rl_offset = offset; - rl->rl_size = size; - rl->rl_lock = rll; - - ztest_rll_lock(rll, type); - - return (rl); + ztest_zrl_t *zrl = umem_alloc(sizeof (*zrl), UMEM_NOFAIL); + zrl->z_rl = rl; + zrl->z_ztznode = zp; + return (zrl); } static void -ztest_range_unlock(rl_t *rl) +ztest_zrl_fini(ztest_zrl_t *zrl) { - rll_t *rll = rl->rl_lock; + umem_free(zrl, sizeof (*zrl)); +} - ztest_rll_unlock(rll); +static ztest_zrl_t * +ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, + uint64_t size, rl_type_t type) +{ + ztest_znode_t *zp = ztest_znode_get(zd, object); + rl_t *rl = zfs_range_lock(&zp->z_range_lock, offset, + size, type); + return (ztest_zrl_init(rl, zp)); +} - umem_free(rl, sizeof (*rl)); +static void +ztest_range_unlock(ztest_ds_t *zd, ztest_zrl_t *zrl) +{ + zfs_range_unlock(zrl->z_rl); + ztest_znode_put(zd, zrl->z_ztznode); + ztest_zrl_fini(zrl); } static void @@ -1253,7 +1341,7 @@ ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) ztest_rll_init(&zd->zd_object_lock[l]); for (l = 0; l < ZTEST_RANGE_LOCKS; l++) - ztest_rll_init(&zd->zd_range_lock[l]); + ztest_zll_init(&zd->zd_range_lock[l]); } static void @@ -1268,7 +1356,7 @@ ztest_zd_fini(ztest_ds_t *zd) ztest_rll_destroy(&zd->zd_object_lock[l]); for (l = 0; l < ZTEST_RANGE_LOCKS; l++) - ztest_rll_destroy(&zd->zd_range_lock[l]); + ztest_zll_destroy(&zd->zd_range_lock[l]); } #define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) @@ -1625,7 +1713,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) dmu_tx_t *tx; dmu_buf_t *db; arc_buf_t *abuf = NULL; - rl_t *rl; + ztest_zrl_t *rl; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -1674,7 +1762,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) if (abuf != NULL) dmu_return_arcbuf(abuf); dmu_buf_rele(db, FTAG); - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } @@ -1731,7 +1819,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) dmu_tx_commit(tx); - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, lr->lr_foid); return (0); @@ -1743,7 +1831,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; - rl_t *rl; + ztest_zrl_t *rl; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -1758,7 +1846,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } @@ -1770,7 +1858,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) dmu_tx_commit(tx); - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, lr->lr_foid); return (0); @@ -1873,23 +1961,30 @@ zil_replay_func_t ztest_replay_vector[TX_MAX_TYPE] = { /* * ZIL get_data callbacks */ +typedef struct ztest_zgd_private { + ztest_ds_t *z_zd; + ztest_zrl_t *z_rl; + uint64_t z_object; +} ztest_zgd_private_t; static void ztest_get_done(zgd_t *zgd, int error) { - ztest_ds_t *zd = zgd->zgd_private; - uint64_t object = zgd->zgd_rl->rl_object; + ztest_zgd_private_t *zzp = zgd->zgd_private; + ztest_ds_t *zd = zzp->z_zd; + uint64_t object = zzp->z_object; if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - ztest_range_unlock(zgd->zgd_rl); + ztest_range_unlock(zd, zzp->z_rl); ztest_object_unlock(zd, object); if (error == 0 && zgd->zgd_bp) zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); umem_free(zgd, sizeof (*zgd)); + umem_free(zzp, sizeof (*zzp)); } static int @@ -1907,6 +2002,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) dmu_buf_t *db; zgd_t *zgd; int error; + ztest_zgd_private_t *zgd_private; ztest_object_lock(zd, object, RL_READER); error = dmu_bonus_hold(os, object, FTAG, &db); @@ -1929,10 +2025,13 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); zgd->zgd_zilog = zd->zd_zilog; - zgd->zgd_private = zd; + zgd_private = umem_zalloc(sizeof (ztest_zgd_private_t), UMEM_NOFAIL); + zgd_private->z_zd = zd; + zgd_private->z_object = object; + zgd->zgd_private = zgd_private; if (buf != NULL) { /* immediate write */ - zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, + zgd_private->z_rl = ztest_range_lock(zd, object, offset, size, RL_READER); error = dmu_read(os, object, offset, size, buf, @@ -1947,7 +2046,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) offset = 0; } - zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, + zgd_private->z_rl = ztest_range_lock(zd, object, offset, size, RL_READER); error = dmu_buf_hold(os, object, offset, zgd, &db, @@ -2198,7 +2297,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; - rl_t *rl; + ztest_zrl_t *rl; txg_wait_synced(dmu_objset_pool(os), 0); @@ -2219,7 +2318,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) (void) dmu_free_long_range(os, object, offset, size); } - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, object); } diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index 5322f3bc73af..5373f0d57320 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h @@ -30,11 +30,14 @@ extern "C" { #endif -#ifdef _KERNEL - #include #include + +#ifdef _KERNEL #include +#else +#include +#endif typedef enum { RL_READER, @@ -106,7 +109,6 @@ zfs_rlock_destroy(zfs_rlock_t *zrl) avl_destroy(&zrl->zr_avl); mutex_destroy(&zrl->zr_mutex); } -#endif /* _KERNEL */ #ifdef __cplusplus } diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 3e59efa5c4b1..9bb1a8eea3fa 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -103,6 +103,7 @@ KERNEL_C = \ zfs_fuid.c \ zfs_sa.c \ zfs_znode.c \ + zfs_rlock.c \ zil.c \ zio.c \ zio_checksum.c \