Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] SIMD RAIDZ and Fletcher4 on top of openzfs-abd #5020

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cmd/raidz_test/raidz_bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,18 @@ bench_init_raidz_map(void)

/*
* To permit larger column sizes these have to be done
* allocated using aligned alloc instead of zio_data_buf_alloc
* allocated using aligned alloc instead of zio_abd_buf_alloc
*/
zio_bench.io_data = raidz_alloc(max_data_size);
zio_bench.io_abd = raidz_alloc(max_data_size);

init_zio_data(&zio_bench);
init_zio_abd(&zio_bench);
}

static void
bench_fini_raidz_maps(void)
{
/* tear down golden zio */
raidz_free(zio_bench.io_data, max_data_size);
raidz_free(zio_bench.io_abd, max_data_size);
bzero(&zio_bench, sizeof (zio_t));
}

Expand Down
69 changes: 43 additions & 26 deletions cmd/raidz_test/raidz_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,10 @@ static void process_options(int argc, char **argv)
}
}

#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_data)
#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)

#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_data)
#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)

static int
Expand All @@ -195,10 +195,9 @@ cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
VERIFY(parity >= 1 && parity <= 3);

for (i = 0; i < parity; i++) {
if (0 != memcmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i),
CODE_COL_SIZE(rm, i))) {
if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i))
!= 0) {
ret++;

LOG_OPT(D_DEBUG, opts,
"\nParity block [%d] different!\n", i);
}
Expand All @@ -213,8 +212,8 @@ cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);

for (i = 0; i < dcols; i++) {
if (0 != memcmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i),
DATA_COL_SIZE(opts->rm_golden, i))) {
if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i))
!= 0) {
ret++;

LOG_OPT(D_DEBUG, opts,
Expand All @@ -224,37 +223,55 @@ cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
return (ret);
}

static int
init_rand(void *data, size_t size, void *private)
{
int i;
int *dst = (int *) data;

for (i = 0; i < size / sizeof (int); i++)
dst[i] = rand_data[i];

return (0);
}

static int
corrupt_rand(void *data, size_t size, void *private)
{
int i;
int *dst = (int *) data;

for (i = 0; i < size / sizeof (int); i++)
dst[i] = rand();

return (0);
}


static void
corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
{
int i;
int *dst;
raidz_col_t *col;

for (i = 0; i < cnt; i++) {
col = &rm->rm_col[tgts[i]];
dst = col->rc_data;
for (i = 0; i < col->rc_size / sizeof (int); i++)
dst[i] = rand();
abd_iterate_func(col->rc_abd, 0, col->rc_size, corrupt_rand,
NULL);
}
}

void
init_zio_data(zio_t *zio)
init_zio_abd(zio_t *zio)
{
int i;
int *dst = (int *) zio->io_data;

for (i = 0; i < zio->io_size / sizeof (int); i++) {
dst[i] = rand_data[i];
}
abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
}

static void
fini_raidz_map(zio_t **zio, raidz_map_t **rm)
{
vdev_raidz_map_free(*rm);
raidz_free((*zio)->io_data, (*zio)->io_size);
raidz_free((*zio)->io_abd, (*zio)->io_size);
umem_free(*zio, sizeof (zio_t));

*zio = NULL;
Expand All @@ -279,13 +296,13 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;

opts->zio_golden->io_data = raidz_alloc(opts->rto_dsize);
zio_test->io_data = raidz_alloc(opts->rto_dsize);
opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
zio_test->io_abd = raidz_alloc(opts->rto_dsize);

init_zio_data(opts->zio_golden);
init_zio_data(zio_test);
init_zio_abd(opts->zio_golden);
init_zio_abd(zio_test);

VERIFY0(vdev_raidz_impl_set("original"));
VERIFY0(vdev_raidz_impl_set("scalar")); /* write simple impl */

opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
opts->rto_ashift, total_ncols, parity);
Expand Down Expand Up @@ -326,8 +343,8 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)

(*zio)->io_offset = 0;
(*zio)->io_size = alloc_dsize;
(*zio)->io_data = raidz_alloc(alloc_dsize);
init_zio_data(*zio);
(*zio)->io_abd = raidz_alloc(alloc_dsize);
init_zio_abd(*zio);

rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
total_ncols, parity);
Expand Down
7 changes: 3 additions & 4 deletions cmd/raidz_test/raidz_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include <sys/spa.h>

static const char *raidz_impl_names[] = {
"original",
"scalar",
"sse2",
"ssse3",
Expand Down Expand Up @@ -96,11 +95,11 @@ static inline size_t ilog2(size_t a)
#define SEP "----------------\n"


#define raidz_alloc(size) zio_data_buf_alloc(size)
#define raidz_free(p, size) zio_data_buf_free(p, size)
#define raidz_alloc(size) abd_alloc(size, B_FALSE)
#define raidz_free(p, size) abd_free(p)


void init_zio_data(zio_t *zio);
void init_zio_abd(zio_t *zio);

void run_raidz_benchmark(void);

Expand Down
55 changes: 34 additions & 21 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include <sys/arc.h>
#include <sys/ddt.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
#include <zfs_comutil.h>
#include <libzfs.h>

Expand Down Expand Up @@ -1297,7 +1298,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
}
if (!err)
ASSERT3U(fill, ==, BP_GET_FILL(bp));
(void) arc_buf_remove_ref(buf, &buf);
arc_buf_destroy(buf, &buf);
}

return (err);
Expand Down Expand Up @@ -2462,7 +2463,7 @@ zdb_blkptr_done(zio_t *zio)
zdb_cb_t *zcb = zio->io_private;
zbookmark_phys_t *zb = &zio->io_bookmark;

zio_data_buf_free(zio->io_data, zio->io_size);
abd_free(zio->io_abd);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
Expand Down Expand Up @@ -2528,7 +2529,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (!BP_IS_EMBEDDED(bp) &&
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
abd_t *abd = abd_alloc(size, B_FALSE);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;

/* If it's an intent log block, failure is expected. */
Expand All @@ -2541,7 +2542,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);

zio_nowait(zio_read(NULL, spa, bp, data, size,
zio_nowait(zio_read(NULL, spa, bp, abd, size,
zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
}

Expand Down Expand Up @@ -3319,6 +3320,13 @@ zdb_vdev_lookup(vdev_t *vdev, char *path)
return (NULL);
}

/* ARGSUSED */
static int
random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
{
return (random_get_pseudo_bytes(buf, len));
}

/*
* Read a block from a pool and print it out. The syntax of the
* block descriptor is:
Expand Down Expand Up @@ -3350,9 +3358,11 @@ zdb_read_block(char *thing, spa_t *spa)
uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
zio_t *zio;
vdev_t *vd;
void *pbuf, *lbuf, *buf;
abd_t *pabd;
void *lbuf, *buf;
char *s, *p, *dup, *vdev, *flagstr;
int i, error;
boolean_t borrowed = B_FALSE;

dup = strdup(thing);
s = strtok(dup, ":");
Expand Down Expand Up @@ -3423,8 +3433,7 @@ zdb_read_block(char *thing, spa_t *spa)
psize = size;
lsize = size;

/* Some 4K native devices require 4K buffer alignment */
pbuf = umem_alloc_aligned(SPA_MAXBLOCKSIZE, PAGESIZE, UMEM_NOFAIL);
pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);

BP_ZERO(bp);
Expand Down Expand Up @@ -3452,15 +3461,15 @@ zdb_read_block(char *thing, spa_t *spa)
/*
* Treat this as a normal block read.
*/
zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
} else {
/*
* Treat this as a vdev child I/O.
*/
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
Expand All @@ -3483,13 +3492,13 @@ zdb_read_block(char *thing, spa_t *spa)
void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);

bcopy(pbuf, pbuf2, psize);
abd_copy_to_buf(pbuf2, pabd, psize);

VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
SPA_MAXBLOCKSIZE - psize) == 0);
VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
random_get_pseudo_bytes_cb, NULL));

VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
SPA_MAXBLOCKSIZE - psize) == 0);
VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
SPA_MAXBLOCKSIZE - psize));

/*
* XXX - On the one hand, with SPA_MAXBLOCKSIZE at 16MB,
Expand All @@ -3504,10 +3513,10 @@ zdb_read_block(char *thing, spa_t *spa)
"Trying %05llx -> %05llx (%s)\n",
(u_longlong_t)psize, (u_longlong_t)lsize,
zio_compress_table[c].ci_name);
if (zio_decompress_data(c, pbuf, lbuf,
psize, lsize) == 0 &&
zio_decompress_data(c, pbuf2, lbuf2,
psize, lsize) == 0 &&
if (zio_decompress_data(c, pabd,
lbuf, psize, lsize) == 0 &&
zio_decompress_data_buf(c, pbuf2,
lbuf2, psize, lsize) == 0 &&
bcmp(lbuf, lbuf2, lsize) == 0)
break;
}
Expand All @@ -3525,8 +3534,9 @@ zdb_read_block(char *thing, spa_t *spa)
buf = lbuf;
size = lsize;
} else {
buf = pbuf;
size = psize;
buf = abd_borrow_buf_copy(pabd, size);
borrowed = B_TRUE;
}

if (flags & ZDB_FLAG_PRINT_BLKPTR)
Expand All @@ -3542,8 +3552,11 @@ zdb_read_block(char *thing, spa_t *spa)
else
zdb_dump_block(thing, buf, size, flags);

if (borrowed)
abd_return_buf_copy(pabd, buf, size);

out:
umem_free(pbuf, SPA_MAXBLOCKSIZE);
abd_free(pabd);
umem_free(lbuf, SPA_MAXBLOCKSIZE);
free(dup);
}
Expand Down
Loading