diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index e7f02a02ed22..6395b6ce1d93 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -101,6 +101,7 @@ #include #include #include +#include #include #include #include @@ -231,15 +232,6 @@ typedef struct bufwad { uint64_t bw_data; } bufwad_t; -/* - * XXX -- fix zfs range locks to be generic so we can use them here. - */ -typedef enum { - RL_READER, - RL_WRITER, - RL_APPEND -} rl_type_t; - typedef struct rll { void *rll_writer; int rll_readers; @@ -247,12 +239,10 @@ typedef struct rll { kcondvar_t rll_cv; } rll_t; -typedef struct rl { - uint64_t rl_object; - uint64_t rl_offset; - uint64_t rl_size; - rll_t *rl_lock; -} rl_t; +typedef struct zll { + list_t z_list; + kmutex_t z_lock; +} zll_t; #define ZTEST_RANGE_LOCKS 64 #define ZTEST_OBJECT_LOCKS 64 @@ -284,7 +274,7 @@ typedef struct ztest_ds { char zd_name[MAXNAMELEN]; kmutex_t zd_dirobj_lock; rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; - rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; + zll_t zd_range_lock[ZTEST_RANGE_LOCKS]; } ztest_ds_t; /* @@ -1135,6 +1125,100 @@ ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) return (error); } + +/* + * Object and range lock mechanics + */ +typedef struct { + list_node_t z_lnode; + refcount_t z_refcnt; + uint64_t z_object; + znode_t z_znode; +} ztest_znode_t; + +typedef struct { + rl_t *z_rl; + ztest_znode_t *z_ztznode; +} ztest_zrl_t; + +static ztest_znode_t * +ztest_znode_init(uint64_t object) +{ + ztest_znode_t *zp = umem_alloc(sizeof (*zp), UMEM_NOFAIL); + + list_link_init(&zp->z_lnode); + refcount_create(&zp->z_refcnt); + zp->z_object = object; + zfs_rlock_init(&zp->z_znode.z_range_lock); + + return (zp); +} + +static void +ztest_znode_fini(ztest_znode_t *zp) +{ + ASSERT(refcount_is_zero(&zp->z_refcnt)); + zfs_rlock_destroy(&zp->z_znode.z_range_lock); + zp->z_object = 0; + refcount_destroy(&zp->z_refcnt); + list_link_init(&zp->z_lnode); + umem_free(zp, sizeof (*zp)); +} + +static void +ztest_zll_init(zll_t *zll) +{ + mutex_init(&zll->z_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zll->z_list, sizeof (ztest_znode_t), + offsetof(ztest_znode_t, z_lnode)); +} + +static void +ztest_zll_destroy(zll_t *zll) +{ + list_destroy(&zll->z_list); + mutex_destroy(&zll->z_lock); +} + +#define RL_TAG "range_lock" +static ztest_znode_t * +ztest_znode_get(ztest_ds_t *zd, uint64_t object) +{ + zll_t *zll = &zd->zd_range_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; + ztest_znode_t *zp = NULL; + mutex_enter(&zll->z_lock); + for (zp = list_head(&zll->z_list); (zp); + zp = list_next(&zll->z_list, zp)) { + if (zp->z_object == object) { + refcount_add(&zp->z_refcnt, RL_TAG); + break; + } + } + if (zp == NULL) { + zp = ztest_znode_init(object); + refcount_add(&zp->z_refcnt, RL_TAG); + list_insert_head(&zll->z_list, zp); + } + mutex_exit(&zll->z_lock); + return (zp); +} + +static void +ztest_znode_put(ztest_ds_t *zd, ztest_znode_t *zp) +{ + zll_t *zll = NULL; + ASSERT3U(zp->z_object, !=, 0); + zll = &zd->zd_range_lock[zp->z_object & (ZTEST_OBJECT_LOCKS - 1)]; + mutex_enter(&zll->z_lock); + refcount_remove(&zp->z_refcnt, RL_TAG); + if (refcount_is_zero(&zp->z_refcnt)) { + list_remove(&zll->z_list, zp); + ztest_znode_fini(zp); + } + mutex_exit(&zll->z_lock); +} + + static void ztest_rll_init(rll_t *rll) { @@ -1207,33 +1291,37 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object) ztest_rll_unlock(rll); } -static rl_t * -ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, - uint64_t size, rl_type_t type) +static ztest_zrl_t * +ztest_zrl_init(rl_t *rl, ztest_znode_t *zp) { - uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); - rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; - rl_t *rl; - - rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); - rl->rl_object = object; - rl->rl_offset = offset; - rl->rl_size = size; - rl->rl_lock = rll; - - ztest_rll_lock(rll, type); - - return (rl); + ztest_zrl_t *zrl = umem_alloc(sizeof (*zrl), UMEM_NOFAIL); + zrl->z_rl = rl; + zrl->z_ztznode = zp; + return (zrl); } static void -ztest_range_unlock(rl_t *rl) +ztest_zrl_fini(ztest_zrl_t *zrl) { - rll_t *rll = rl->rl_lock; + umem_free(zrl, sizeof (*zrl)); +} - ztest_rll_unlock(rll); +static ztest_zrl_t * +ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, + uint64_t size, rl_type_t type) +{ + ztest_znode_t *zp = ztest_znode_get(zd, object); + rl_t *rl = zfs_range_lock(&zp->z_znode.z_range_lock, offset, + size, type); + return (ztest_zrl_init(rl, zp)); +} - umem_free(rl, sizeof (*rl)); +static void +ztest_range_unlock(ztest_ds_t *zd, ztest_zrl_t *zrl) +{ + zfs_range_unlock(zrl->z_rl); + ztest_znode_put(zd, zrl->z_ztznode); + ztest_zrl_fini(zrl); } static void @@ -1255,7 +1343,7 @@ ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) ztest_rll_init(&zd->zd_object_lock[l]); for (l = 0; l < ZTEST_RANGE_LOCKS; l++) - ztest_rll_init(&zd->zd_range_lock[l]); + ztest_zll_init(&zd->zd_range_lock[l]); } static void @@ -1270,7 +1358,7 @@ ztest_zd_fini(ztest_ds_t *zd) ztest_rll_destroy(&zd->zd_object_lock[l]); for (l = 0; l < ZTEST_RANGE_LOCKS; l++) - ztest_rll_destroy(&zd->zd_range_lock[l]); + ztest_zll_destroy(&zd->zd_range_lock[l]); } #define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) @@ -1627,7 +1715,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) dmu_tx_t *tx; dmu_buf_t *db; arc_buf_t *abuf = NULL; - rl_t *rl; + ztest_zrl_t *rl; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -1676,7 +1764,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) if (abuf != NULL) dmu_return_arcbuf(abuf); dmu_buf_rele(db, FTAG); - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } @@ -1733,7 +1821,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) dmu_tx_commit(tx); - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, lr->lr_foid); return (0); @@ -1745,7 +1833,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; - rl_t *rl; + ztest_zrl_t *rl; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -1760,7 +1848,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } @@ -1772,7 +1860,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) dmu_tx_commit(tx); - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, lr->lr_foid); return (0); @@ -1875,23 +1963,30 @@ zil_replay_func_t ztest_replay_vector[TX_MAX_TYPE] = { /* * ZIL get_data callbacks */ +typedef struct ztest_zgd_private { + ztest_ds_t *z_zd; + ztest_zrl_t *z_rl; + uint64_t z_object; +} ztest_zgd_private_t; static void ztest_get_done(zgd_t *zgd, int error) { - ztest_ds_t *zd = zgd->zgd_private; - uint64_t object = zgd->zgd_rl->rl_object; + ztest_zgd_private_t *zzp = zgd->zgd_private; + ztest_ds_t *zd = zzp->z_zd; + uint64_t object = zzp->z_object; if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - ztest_range_unlock(zgd->zgd_rl); + ztest_range_unlock(zd, zzp->z_rl); ztest_object_unlock(zd, object); if (error == 0 && zgd->zgd_bp) zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); umem_free(zgd, sizeof (*zgd)); + umem_free(zzp, sizeof (*zzp)); } static int @@ -1909,6 +2004,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) dmu_buf_t *db; zgd_t *zgd; int error; + ztest_zgd_private_t *zgd_private; ztest_object_lock(zd, object, RL_READER); error = dmu_bonus_hold(os, object, FTAG, &db); @@ -1931,10 +2027,13 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); zgd->zgd_zilog = zd->zd_zilog; - zgd->zgd_private = zd; + zgd_private = umem_zalloc(sizeof (ztest_zgd_private_t), UMEM_NOFAIL); + zgd_private->z_zd = zd; + zgd_private->z_object = object; + zgd->zgd_private = zgd_private; if (buf != NULL) { /* immediate write */ - zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, + zgd_private->z_rl = ztest_range_lock(zd, object, offset, size, RL_READER); error = dmu_read(os, object, offset, size, buf, @@ -1949,7 +2048,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) offset = 0; } - zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, + zgd_private->z_rl = ztest_range_lock(zd, object, offset, size, RL_READER); error = dmu_buf_hold(os, object, offset, zgd, &db, @@ -2200,7 +2299,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; - rl_t *rl; + ztest_zrl_t *rl; txg_wait_synced(dmu_objset_pool(os), 0); @@ -2221,7 +2320,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) (void) dmu_free_long_range(os, object, offset, size); } - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, object); } @@ -4226,7 +4325,7 @@ ztest_write_free(ztest_ds_t *zd, ztest_od_t *od, dmu_object_info_t *doi) while (ztest_random(10)) { int i, nranges = 0, minoffset = 0, maxoffset = 0; - rl_t *rl; + ztest_zrl_t *rl; struct ztest_range { uint64_t offset; uint64_t size; @@ -4352,7 +4451,7 @@ ztest_write_free(ztest_ds_t *zd, ztest_od_t *od, dmu_object_info_t *doi) break; } - ztest_range_unlock(rl); + ztest_range_unlock(zd, rl); ztest_object_unlock(zd, object); } diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index cbc573a40cde..63fac2a19c20 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h @@ -30,11 +30,14 @@ extern "C" { #endif -#ifdef _KERNEL - #include #include + +#ifdef _KERNEL #include +#else +#include +#endif typedef enum { RL_READER, @@ -74,10 +77,15 @@ struct znode; */ rl_t *zfs_range_lock_impl(zfs_rlock_t *zrl, uint64_t off, uint64_t len, rl_type_t type, struct znode *zp); +#ifdef _KERNEL #define zfs_range_lock(zp, off, len, type) \ zfs_range_lock_impl(&(zp)->z_range_lock, off, len, type, zp) #define zvol_range_lock(zrl, off, len, type) \ zfs_range_lock_impl(zrl, off, len, type, NULL) +#else +#define zfs_range_lock(zrl, off, len, type) \ + zfs_range_lock_impl(zrl, off, len, type, NULL) +#endif /* Unlock range and destroy range lock structure. */ void zfs_range_unlock(rl_t *rl); @@ -107,7 +115,6 @@ static inline void zfs_rlock_destroy(zfs_rlock_t *zrl) avl_destroy(&zrl->zr_avl); mutex_destroy(&zrl->zr_mutex); } -#endif /* _KERNEL */ #ifdef __cplusplus } diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 30a6ec9f807a..9050721abf17 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -27,6 +27,7 @@ #define _SYS_FS_ZFS_ZNODE_H #ifdef _KERNEL + #include #include #include @@ -38,7 +39,24 @@ #include #include #include + +#else + +#include +#include +#include + +/* User mode znode emulation for ztest */ +#define ZTOZSB(zp) (zp) /* unused */ +typedef struct znode { + uint64_t z_size; /* unused, zeroed out */ + uint64_t z_blksz; /* unused, zeroed out */ + uint64_t z_max_blksz; /* unused, zeroed out */ + zfs_rlock_t z_range_lock; +} znode_t; + #endif + #include #include diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index f45a57d71fdf..f9abca7e7c48 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -102,6 +102,7 @@ KERNEL_C = \ zfs_fuid.c \ zfs_sa.c \ zfs_znode.c \ + zfs_rlock.c \ zil.c \ zio.c \ zio_checksum.c \