Skip to content

Commit

Permalink
DLPX-44812 integrate EP-220 large memory scalability
Browse files Browse the repository at this point in the history
  • Loading branch information
dpquigl authored and behlendorf committed Nov 29, 2016
1 parent 616fa7c commit a6255b7
Show file tree
Hide file tree
Showing 49 changed files with 2,623 additions and 796 deletions.
3 changes: 3 additions & 0 deletions cmd/raidz_test/raidz_bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
*/

#ifdef _ABD_READY_

#include <sys/zfs_context.h>
#include <sys/time.h>
#include <sys/wait.h>
Expand Down Expand Up @@ -225,3 +227,4 @@ run_raidz_benchmark(void)

bench_fini_raidz_maps();
}
#endif
11 changes: 11 additions & 0 deletions cmd/raidz_test/raidz_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@
#include <sys/vdev_raidz_impl.h>
#include <assert.h>
#include <stdio.h>

#ifndef _ABD_READY_
int
main(int argc, char **argv)
{
exit(0);
}

#else

#include "raidz_test.h"

static int *rand_data;
Expand Down Expand Up @@ -782,3 +792,4 @@ main(int argc, char **argv)

return (err);
}
#endif
48 changes: 28 additions & 20 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include <sys/arc.h>
#include <sys/ddt.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
#include <zfs_comutil.h>
#include <libzfs.h>

Expand Down Expand Up @@ -2464,7 +2465,7 @@ zdb_blkptr_done(zio_t *zio)
zdb_cb_t *zcb = zio->io_private;
zbookmark_phys_t *zb = &zio->io_bookmark;

zio_data_buf_free(zio->io_data, zio->io_size);
abd_free(zio->io_abd);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
Expand Down Expand Up @@ -2530,7 +2531,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (!BP_IS_EMBEDDED(bp) &&
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
abd_t *abd = abd_alloc(size, B_FALSE);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;

/* If it's an intent log block, failure is expected. */
Expand All @@ -2543,7 +2544,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);

zio_nowait(zio_read(NULL, spa, bp, data, size,
zio_nowait(zio_read(NULL, spa, bp, abd, size,
zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
}

Expand Down Expand Up @@ -3321,6 +3322,13 @@ zdb_vdev_lookup(vdev_t *vdev, char *path)
return (NULL);
}

/* ARGSUSED */
static int
random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
{
return (random_get_pseudo_bytes(buf, len));
}

/*
* Read a block from a pool and print it out. The syntax of the
* block descriptor is:
Expand Down Expand Up @@ -3352,7 +3360,8 @@ zdb_read_block(char *thing, spa_t *spa)
uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
zio_t *zio;
vdev_t *vd;
void *pbuf, *lbuf, *buf;
abd_t *pabd;
void *lbuf, *buf;
char *s, *p, *dup, *vdev, *flagstr;
int i, error;

Expand Down Expand Up @@ -3425,8 +3434,7 @@ zdb_read_block(char *thing, spa_t *spa)
psize = size;
lsize = size;

/* Some 4K native devices require 4K buffer alignment */
pbuf = umem_alloc_aligned(SPA_MAXBLOCKSIZE, PAGESIZE, UMEM_NOFAIL);
pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);

BP_ZERO(bp);
Expand Down Expand Up @@ -3454,15 +3462,15 @@ zdb_read_block(char *thing, spa_t *spa)
/*
* Treat this as a normal block read.
*/
zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
} else {
/*
* Treat this as a vdev child I/O.
*/
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
Expand All @@ -3485,13 +3493,13 @@ zdb_read_block(char *thing, spa_t *spa)
void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);

bcopy(pbuf, pbuf2, psize);
abd_copy_to_buf(pbuf2, pabd, psize);

VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
SPA_MAXBLOCKSIZE - psize) == 0);
VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
random_get_pseudo_bytes_cb, NULL));

VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
SPA_MAXBLOCKSIZE - psize) == 0);
VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
SPA_MAXBLOCKSIZE - psize));

/*
* XXX - On the one hand, with SPA_MAXBLOCKSIZE at 16MB,
Expand All @@ -3506,10 +3514,10 @@ zdb_read_block(char *thing, spa_t *spa)
"Trying %05llx -> %05llx (%s)\n",
(u_longlong_t)psize, (u_longlong_t)lsize,
zio_compress_table[c].ci_name);
if (zio_decompress_data(c, pbuf, lbuf,
psize, lsize) == 0 &&
zio_decompress_data(c, pbuf2, lbuf2,
psize, lsize) == 0 &&
if (zio_decompress_data(c, pabd,
lbuf, psize, lsize) == 0 &&
zio_decompress_data_buf(c, pbuf2,
lbuf2, psize, lsize) == 0 &&
bcmp(lbuf, lbuf2, lsize) == 0)
break;
}
Expand All @@ -3527,7 +3535,7 @@ zdb_read_block(char *thing, spa_t *spa)
buf = lbuf;
size = lsize;
} else {
buf = pbuf;
buf = abd_to_buf(pabd);
size = psize;
}

Expand All @@ -3545,7 +3553,7 @@ zdb_read_block(char *thing, spa_t *spa)
zdb_dump_block(thing, buf, size, flags);

out:
umem_free(pbuf, SPA_MAXBLOCKSIZE);
abd_free(pabd);
umem_free(lbuf, SPA_MAXBLOCKSIZE);
free(dup);
}
Expand Down
59 changes: 34 additions & 25 deletions cmd/zdb/zdb_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
*/

/*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/

/*
Expand All @@ -42,6 +42,7 @@
#include <sys/resource.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/abd.h>

extern uint8_t dump_opt[256];

Expand Down Expand Up @@ -119,14 +120,30 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
(void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm);
}

/* ARGSUSED */
static int
zil_prt_rec_write_cb(void *data, size_t len, void *unused)
{
char *cdata = data;
int i;

for (i = 0; i < len; i++) {
if (isprint(*cdata))
(void) printf("%c ", *cdata);
else
(void) printf("%2X", *cdata);
cdata++;
}
return (0);
}

/* ARGSUSED */
static void
zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
{
char *data, *dlimit;
abd_t *data;
blkptr_t *bp = &lr->lr_blkptr;
zbookmark_phys_t zb;
char *buf;
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int error;

Expand All @@ -137,9 +154,6 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
if (txtype == TX_WRITE2 || verbose < 5)
return;

if ((buf = malloc(SPA_MAXBLOCKSIZE)) == NULL)
return;

if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
(void) printf("%shas blkptr, %s\n", prefix,
!BP_IS_HOLE(bp) &&
Expand All @@ -150,43 +164,38 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
if (BP_IS_HOLE(bp)) {
(void) printf("\t\t\tLSIZE 0x%llx\n",
(u_longlong_t)BP_GET_LSIZE(bp));
bzero(buf, SPA_MAXBLOCKSIZE);
(void) printf("%s<hole>\n", prefix);
goto exit;
return;
}
if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
(void) printf("%s<block already committed>\n", prefix);
goto exit;
return;
}

SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));

data = abd_alloc(BP_GET_LSIZE(bp), B_FALSE);
error = zio_wait(zio_read(NULL, zilog->zl_spa,
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
bp, data, BP_GET_LSIZE(bp), NULL, NULL,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
if (error)
goto exit;
data = buf;
goto out;
} else {
data = (char *)(lr + 1);
/* data is stored after the end of the lr_write record */
data = abd_alloc(lr->lr_length, B_FALSE);
abd_copy_from_buf(data, lr + 1, lr->lr_length);
}

dlimit = data + MIN(lr->lr_length,
(verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));

(void) printf("%s", prefix);
while (data < dlimit) {
if (isprint(*data))
(void) printf("%c ", *data);
else
(void) printf("%2hhX", *data);
data++;
}
(void) abd_iterate_func(data,
0, MIN(lr->lr_length, (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE)),
zil_prt_rec_write_cb, NULL);
(void) printf("\n");
exit:
free(buf);

out:
abd_free(data);
}

/* ARGSUSED */
Expand Down
18 changes: 13 additions & 5 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
#include <sys/refcount.h>
#include <sys/zfeature.h>
#include <sys/dsl_userhold.h>
#include <sys/abd.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
Expand Down Expand Up @@ -193,6 +194,7 @@ extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
extern int zfs_abd_scatter_enabled;

static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
Expand Down Expand Up @@ -5444,7 +5446,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
enum zio_checksum checksum = spa_dedup_checksum(spa);
dmu_buf_t *db;
dmu_tx_t *tx;
void *buf;
abd_t *abd;
blkptr_t blk;
int copies = 2 * ZIO_DEDUPDITTO_MIN;
int i;
Expand Down Expand Up @@ -5525,14 +5527,14 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
* Damage the block. Dedup-ditto will save us when we read it later.
*/
psize = BP_GET_PSIZE(&blk);
buf = zio_buf_alloc(psize);
ztest_pattern_set(buf, psize, ~pattern);
abd = abd_alloc_linear(psize, B_TRUE);
ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);

(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));

zio_buf_free(buf, psize);
abd_free(abd);

(void) rw_unlock(&ztest_name_lock);
umem_free(od, sizeof (ztest_od_t));
Expand Down Expand Up @@ -5965,6 +5967,12 @@ ztest_resume_thread(void *arg)
*/
if (ztest_random(10) == 0)
zfs_compressed_arc_enabled = ztest_random(2);

/*
* Periodically change the zfs_abd_scatter_enabled setting.
*/
if (ztest_random(10) == 0)
zfs_abd_scatter_enabled = ztest_random(2);
}

thread_exit();
Expand Down
1 change: 1 addition & 0 deletions include/sys/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
SUBDIRS = fm fs crypto sysevent

COMMON_H = \
$(top_srcdir)/include/sys/abd.h \
$(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/avl.h \
Expand Down
Loading

0 comments on commit a6255b7

Please sign in to comment.