Skip to content

Commit

Permalink
Use zfs range locks in ztest
Browse files Browse the repository at this point in the history
  • Loading branch information
bprotopopov authored and Chunwei Chen committed May 5, 2016
1 parent 0e0c5b1 commit aed2368
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 55 deletions.
203 changes: 151 additions & 52 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
#include <sys/zio.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/zfs_rlock.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_file.h>
#include <sys/spa_impl.h>
Expand Down Expand Up @@ -231,28 +232,17 @@ typedef struct bufwad {
uint64_t bw_data;
} bufwad_t;

/*
* XXX -- fix zfs range locks to be generic so we can use them here.
*/
typedef enum {
RL_READER,
RL_WRITER,
RL_APPEND
} rl_type_t;

typedef struct rll {
void *rll_writer;
int rll_readers;
kmutex_t rll_lock;
kcondvar_t rll_cv;
} rll_t;

typedef struct rl {
uint64_t rl_object;
uint64_t rl_offset;
uint64_t rl_size;
rll_t *rl_lock;
} rl_t;
typedef struct zll {
list_t z_list;
kmutex_t z_lock;
} zll_t;

#define ZTEST_RANGE_LOCKS 64
#define ZTEST_OBJECT_LOCKS 64
Expand Down Expand Up @@ -284,7 +274,7 @@ typedef struct ztest_ds {
char zd_name[MAXNAMELEN];
kmutex_t zd_dirobj_lock;
rll_t zd_object_lock[ZTEST_OBJECT_LOCKS];
rll_t zd_range_lock[ZTEST_RANGE_LOCKS];
zll_t zd_range_lock[ZTEST_RANGE_LOCKS];
} ztest_ds_t;

/*
Expand Down Expand Up @@ -1133,6 +1123,100 @@ ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value)
return (error);
}


/*
* Object and range lock mechanics
*/
typedef struct {
list_node_t z_lnode;
refcount_t z_refcnt;
uint64_t z_object;
zfs_rlock_t z_range_lock;
} ztest_znode_t;

typedef struct {
rl_t *z_rl;
ztest_znode_t *z_ztznode;
} ztest_zrl_t;

static ztest_znode_t *
ztest_znode_init(uint64_t object)
{
ztest_znode_t *zp = umem_alloc(sizeof (*zp), UMEM_NOFAIL);

list_link_init(&zp->z_lnode);
refcount_create(&zp->z_refcnt);
zp->z_object = object;
zfs_rlock_init(&zp->z_range_lock);

return (zp);
}

static void
ztest_znode_fini(ztest_znode_t *zp)
{
ASSERT(refcount_is_zero(&zp->z_refcnt));
zfs_rlock_destroy(&zp->z_range_lock);
zp->z_object = 0;
refcount_destroy(&zp->z_refcnt);
list_link_init(&zp->z_lnode);
umem_free(zp, sizeof (*zp));
}

static void
ztest_zll_init(zll_t *zll)
{
mutex_init(&zll->z_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zll->z_list, sizeof (ztest_znode_t),
offsetof(ztest_znode_t, z_lnode));
}

static void
ztest_zll_destroy(zll_t *zll)
{
list_destroy(&zll->z_list);
mutex_destroy(&zll->z_lock);
}

#define RL_TAG "range_lock"
static ztest_znode_t *
ztest_znode_get(ztest_ds_t *zd, uint64_t object)
{
zll_t *zll = &zd->zd_range_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
ztest_znode_t *zp = NULL;
mutex_enter(&zll->z_lock);
for (zp = list_head(&zll->z_list); (zp);
zp = list_next(&zll->z_list, zp)) {
if (zp->z_object == object) {
refcount_add(&zp->z_refcnt, RL_TAG);
break;
}
}
if (zp == NULL) {
zp = ztest_znode_init(object);
refcount_add(&zp->z_refcnt, RL_TAG);
list_insert_head(&zll->z_list, zp);
}
mutex_exit(&zll->z_lock);
return (zp);
}

static void
ztest_znode_put(ztest_ds_t *zd, ztest_znode_t *zp)
{
zll_t *zll = NULL;
ASSERT3U(zp->z_object, !=, 0);
zll = &zd->zd_range_lock[zp->z_object & (ZTEST_OBJECT_LOCKS - 1)];
mutex_enter(&zll->z_lock);
refcount_remove(&zp->z_refcnt, RL_TAG);
if (refcount_is_zero(&zp->z_refcnt)) {
list_remove(&zll->z_list, zp);
ztest_znode_fini(zp);
}
mutex_exit(&zll->z_lock);
}


static void
ztest_rll_init(rll_t *rll)
{
Expand Down Expand Up @@ -1205,33 +1289,37 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
ztest_rll_unlock(rll);
}

static rl_t *
ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
uint64_t size, rl_type_t type)
static ztest_zrl_t *
ztest_zrl_init(rl_t *rl, ztest_znode_t *zp)
{
uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
rl_t *rl;

rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
rl->rl_object = object;
rl->rl_offset = offset;
rl->rl_size = size;
rl->rl_lock = rll;

ztest_rll_lock(rll, type);

return (rl);
ztest_zrl_t *zrl = umem_alloc(sizeof (*zrl), UMEM_NOFAIL);
zrl->z_rl = rl;
zrl->z_ztznode = zp;
return (zrl);
}

static void
ztest_range_unlock(rl_t *rl)
ztest_zrl_fini(ztest_zrl_t *zrl)
{
rll_t *rll = rl->rl_lock;
umem_free(zrl, sizeof (*zrl));
}

ztest_rll_unlock(rll);
static ztest_zrl_t *
ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
uint64_t size, rl_type_t type)
{
ztest_znode_t *zp = ztest_znode_get(zd, object);
rl_t *rl = zfs_range_lock(&zp->z_range_lock, offset,
size, type);
return (ztest_zrl_init(rl, zp));
}

umem_free(rl, sizeof (*rl));
static void
ztest_range_unlock(ztest_ds_t *zd, ztest_zrl_t *zrl)
{
zfs_range_unlock(zrl->z_rl);
ztest_znode_put(zd, zrl->z_ztznode);
ztest_zrl_fini(zrl);
}

static void
Expand All @@ -1253,7 +1341,7 @@ ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
ztest_rll_init(&zd->zd_object_lock[l]);

for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
ztest_rll_init(&zd->zd_range_lock[l]);
ztest_zll_init(&zd->zd_range_lock[l]);
}

static void
Expand All @@ -1268,7 +1356,7 @@ ztest_zd_fini(ztest_ds_t *zd)
ztest_rll_destroy(&zd->zd_object_lock[l]);

for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
ztest_rll_destroy(&zd->zd_range_lock[l]);
ztest_zll_destroy(&zd->zd_range_lock[l]);
}

#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
Expand Down Expand Up @@ -1625,7 +1713,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
dmu_tx_t *tx;
dmu_buf_t *db;
arc_buf_t *abuf = NULL;
rl_t *rl;
ztest_zrl_t *rl;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
Expand Down Expand Up @@ -1674,7 +1762,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
if (abuf != NULL)
dmu_return_arcbuf(abuf);
dmu_buf_rele(db, FTAG);
ztest_range_unlock(rl);
ztest_range_unlock(zd, rl);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
Expand Down Expand Up @@ -1731,7 +1819,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)

dmu_tx_commit(tx);

ztest_range_unlock(rl);
ztest_range_unlock(zd, rl);
ztest_object_unlock(zd, lr->lr_foid);

return (0);
Expand All @@ -1743,7 +1831,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
uint64_t txg;
rl_t *rl;
ztest_zrl_t *rl;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
Expand All @@ -1758,7 +1846,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)

txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
ztest_range_unlock(rl);
ztest_range_unlock(zd, rl);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
Expand All @@ -1770,7 +1858,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)

dmu_tx_commit(tx);

ztest_range_unlock(rl);
ztest_range_unlock(zd, rl);
ztest_object_unlock(zd, lr->lr_foid);

return (0);
Expand Down Expand Up @@ -1873,23 +1961,30 @@ zil_replay_func_t ztest_replay_vector[TX_MAX_TYPE] = {
/*
* ZIL get_data callbacks
*/
typedef struct ztest_zgd_private {
ztest_ds_t *z_zd;
ztest_zrl_t *z_rl;
uint64_t z_object;
} ztest_zgd_private_t;

static void
ztest_get_done(zgd_t *zgd, int error)
{
ztest_ds_t *zd = zgd->zgd_private;
uint64_t object = zgd->zgd_rl->rl_object;
ztest_zgd_private_t *zzp = zgd->zgd_private;
ztest_ds_t *zd = zzp->z_zd;
uint64_t object = zzp->z_object;

if (zgd->zgd_db)
dmu_buf_rele(zgd->zgd_db, zgd);

ztest_range_unlock(zgd->zgd_rl);
ztest_range_unlock(zd, zzp->z_rl);
ztest_object_unlock(zd, object);

if (error == 0 && zgd->zgd_bp)
zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);

umem_free(zgd, sizeof (*zgd));
umem_free(zzp, sizeof (*zzp));
}

static int
Expand All @@ -1907,6 +2002,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
dmu_buf_t *db;
zgd_t *zgd;
int error;
ztest_zgd_private_t *zgd_private;

ztest_object_lock(zd, object, RL_READER);
error = dmu_bonus_hold(os, object, FTAG, &db);
Expand All @@ -1929,10 +2025,13 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)

zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
zgd->zgd_zilog = zd->zd_zilog;
zgd->zgd_private = zd;
zgd_private = umem_zalloc(sizeof (ztest_zgd_private_t), UMEM_NOFAIL);
zgd_private->z_zd = zd;
zgd_private->z_object = object;
zgd->zgd_private = zgd_private;

if (buf != NULL) { /* immediate write */
zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
zgd_private->z_rl = ztest_range_lock(zd, object, offset, size,
RL_READER);

error = dmu_read(os, object, offset, size, buf,
Expand All @@ -1947,7 +2046,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
offset = 0;
}

zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
zgd_private->z_rl = ztest_range_lock(zd, object, offset, size,
RL_READER);

error = dmu_buf_hold(os, object, offset, zgd, &db,
Expand Down Expand Up @@ -2198,7 +2297,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
uint64_t txg;
rl_t *rl;
ztest_zrl_t *rl;

txg_wait_synced(dmu_objset_pool(os), 0);

Expand All @@ -2219,7 +2318,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
(void) dmu_free_long_range(os, object, offset, size);
}

ztest_range_unlock(rl);
ztest_range_unlock(zd, rl);
ztest_object_unlock(zd, object);
}

Expand Down
8 changes: 5 additions & 3 deletions include/sys/zfs_rlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,14 @@
extern "C" {
#endif

#ifdef _KERNEL

#include <sys/list.h>
#include <sys/avl.h>

#ifdef _KERNEL
#include <sys/condvar.h>
#else
#include <sys/zfs_context.h>
#endif

typedef enum {
RL_READER,
Expand Down Expand Up @@ -106,7 +109,6 @@ zfs_rlock_destroy(zfs_rlock_t *zrl)
avl_destroy(&zrl->zr_avl);
mutex_destroy(&zrl->zr_mutex);
}
#endif /* _KERNEL */

#ifdef __cplusplus
}
Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ KERNEL_C = \
zfs_fuid.c \
zfs_sa.c \
zfs_znode.c \
zfs_rlock.c \
zil.c \
zio.c \
zio_checksum.c \
Expand Down

0 comments on commit aed2368

Please sign in to comment.