Skip to content

Commit

Permalink
Illumos 5960 zfs recv should prefetch indirect blocks
Browse files Browse the repository at this point in the history
Illumos 5925 zfs receive -o origin=

Reviewed by: Prakash Surya <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>

depends on openzfs#3574 Illumos 5745 zfs set allows only one dataset property to be set at a time
depends on openzfs#3611 Illumos 5746 more checksumming in zfs send

diverged code base from Illumos:

[lib/libzfs/libzfs_sendrecv.c]
b8864a2 Fix gcc cast warnings
325f023 Add linux kernel device support
5c3f61e Increase Linux pipe buffer size on 'zfs receive'

[module/zfs/zfs_vnops.c]
3558fd7 Prototype/structure update for Linux
c12e3a5 Restructure zfs_readdir() to fix regressions

[module/zfs/zvol.c]
function
@zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
is non-existent in ZoL

[module/zfs/dmu.c]
in function
dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
int i
is initialized before the following code block (c90 vs. c99)

[module/zfs/Makefile.in + lib/libzpool/Makefile.am]
47a4a6f Support parallel build trees (VPATH builds)

[module/zfs/dbuf.c]
fc5bb51 Fix stack dbuf_hold_impl()
9b67f60 Illumos 4757, 4913
{4757 ZFS embedded-data block pointers ("zero block compression") ,
 4913 zfs release should not be subject to space checks}
{reference} 34229a2 Reduce stack usage for recursive traverse_visitbp()

[module/zfs/dmu_send.c]
b58986e Use large stacks when available
241b541 Illumos 5959 - clean up per-dataset feature count code
{reference} 77aef6f Use vmem_alloc() for nvlists
00b4602 Add linux kernel memory support

[module/zfs/zvol.c]
9965059 Prefetch start and end of volumes

[module/zfs/dmu_send.c, C90 warnings - previous commits, code thus less clear to read]
Illumos 5746 more checksumming in zfs send

[module/zfs/dbuf.c, ISO C90 - mixed declarations and code]
arc_flags_t aflags =
uint64_t nextblkid = dpa->dpa_zb.zb_blkid >>
dmu_buf_impl_t *db = dbuf_find(dn->dn_objset, dn->dn_object,
zio_t *pio = zio_root(dmu_objset_spa(dn->dn_objset), NULL, NULL,
blkptr_t *bp = ((blkptr_t *)abuf->b_data) +
dbuf_prefetch_arg_t *dpa = kmem_zalloc(sizeof (*dpa), KM_SLEEP);
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;

[module/zfs/dmu_send.c, ISO C90 - mixed declarations and code]
dnode_phys_t *blk = abuf->b_data;
uint64_t dnobj = zb->zb_blkid * (blksz >> DNODE_SHIFT);
error: ‘for’ loop initial declarations are only allowed in C99 or C11 mode: for (struct receive_ign_obj_node *n =
struct send_block_record *to_data;
struct receive_ign_obj_node *n;

FIXME: man/man8/zfs.8
FIXME: different manpage format, currently I don't "get it" yet

Ported-by: kernelOfTruth [email protected]
  • Loading branch information
pcd1193182 authored and kernelOfTruth committed Dec 26, 2015
1 parent d8695e9 commit 1b12e2c
Show file tree
Hide file tree
Showing 39 changed files with 5,339 additions and 382 deletions.
5 changes: 4 additions & 1 deletion cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -2488,6 +2488,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
dmu_object_type_t type;
boolean_t is_metadata;

if (bp == NULL)
return (0);

if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
char blkbuf[BP_SPRINTF_LEN];
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
Expand Down Expand Up @@ -2984,7 +2987,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;

if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);

if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
Expand Down
29 changes: 23 additions & 6 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,9 @@ get_usage(zfs_help_t idx)
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
return (gettext("\treceive [-vnFu] <filesystem|volume|"
"snapshot>\n"
"\treceive [-vnFu] [-d | -e] <filesystem>\n"));
"snapshot>\n"
"\treceive [-vnFu] [-o origin=<snapshot>] [-d | -e] "
"<filesystem>\n"));
case HELP_RENAME:
return (gettext("\trename [-f] <filesystem|volume|snapshot> "
"<filesystem|volume|snapshot>\n"
Expand Down Expand Up @@ -792,7 +793,7 @@ zfs_do_create(int argc, char **argv)
nomem();
break;
case 'o':
if (parseprop(props, optarg))
if (parseprop(props, optarg) != 0)
goto error;
break;
case 's':
Expand Down Expand Up @@ -3622,7 +3623,7 @@ zfs_do_snapshot(int argc, char **argv)
while ((c = getopt(argc, argv, "ro:")) != -1) {
switch (c) {
case 'o':
if (parseprop(props, optarg))
if (parseprop(props, optarg) != 0)
return (1);
break;
case 'r':
Expand Down Expand Up @@ -3881,10 +3882,19 @@ zfs_do_receive(int argc, char **argv)
{
int c, err;
recvflags_t flags = { 0 };
nvlist_t *props;
nvpair_t *nvp = NULL;

if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
nomem();

/* check options */
while ((c = getopt(argc, argv, ":denuvF")) != -1) {
while ((c = getopt(argc, argv, ":o:denuvF")) != -1) {
switch (c) {
case 'o':
if (parseprop(props, optarg) != 0)
return (1);
break;
case 'd':
flags.isprefix = B_TRUE;
break;
Expand Down Expand Up @@ -3929,6 +3939,13 @@ zfs_do_receive(int argc, char **argv)
usage(B_FALSE);
}

while ((nvp = nvlist_next_nvpair(props, nvp))) {
if (strcmp(nvpair_name(nvp), "origin") != 0) {
(void) fprintf(stderr, gettext("invalid option"));
usage(B_FALSE);
}
}

if (isatty(STDIN_FILENO)) {
(void) fprintf(stderr,
gettext("Error: Backup stream can not be read "
Expand All @@ -3937,7 +3954,7 @@ zfs_do_receive(int argc, char **argv)
return (1);
}

err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL);
err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL);

return (err != 0);
}
Expand Down
9 changes: 6 additions & 3 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -3728,7 +3728,8 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
*/
n = ztest_random(regions) * stride + ztest_random(width);
s = 1 + ztest_random(2 * width - 1);
dmu_prefetch(os, bigobj, n * chunksize, s * chunksize);
dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize,
ZIO_PRIORITY_SYNC_READ);

/*
* Pick a random index and compute the offsets into packobj and bigobj.
Expand Down Expand Up @@ -5930,8 +5931,10 @@ ztest_run(ztest_shared_t *zs)
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
*/
for (object = 1; object < 50; object++)
dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
for (object = 1; object < 50; object++) {
dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
ZIO_PRIORITY_SYNC_READ);
}

/* Verify that at least one commit cb was called in a timely fashion */
if (zc_cb_counter >= ZTEST_COMMIT_CB_MIN_REG)
Expand Down
4 changes: 2 additions & 2 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -678,8 +678,8 @@ typedef struct recvflags {
boolean_t nomount;
} recvflags_t;

extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *,
int, avl_tree_t *);
extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
recvflags_t *, int, avl_tree_t *);

typedef enum diff_flags {
ZFS_DIFF_PARSEABLE = 0x1,
Expand Down
54 changes: 54 additions & 0 deletions include/sys/bqueue.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
*/

#ifndef _BQUEUE_H
#define _BQUEUE_H

#ifdef __cplusplus
extern "C" {
#endif

#include <sys/zfs_context.h>

typedef struct bqueue {
list_t bq_list;
kmutex_t bq_lock;
kcondvar_t bq_add_cv;
kcondvar_t bq_pop_cv;
uint64_t bq_size;
uint64_t bq_maxsize;
size_t bq_node_offset;
} bqueue_t;

typedef struct bqueue_node {
list_node_t bqn_node;
uint64_t bqn_size;
} bqueue_node_t;


int bqueue_init(bqueue_t *, uint64_t, size_t);
void bqueue_destroy(bqueue_t *);
void bqueue_enqueue(bqueue_t *, void *, uint64_t);
void *bqueue_dequeue(bqueue_t *);
boolean_t bqueue_empty(bqueue_t *);

#ifdef __cplusplus
}
#endif

#endif /* _BQUEUE_H */
9 changes: 5 additions & 4 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,7 @@ typedef struct dbuf_hash_table {
kmutex_t hash_mutexes[DBUF_MUTEXES];
} dbuf_hash_table_t;


uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
uint64_t dbuf_whichblock(struct dnode *di, int64_t level, uint64_t offset);

void dbuf_create_bonus(struct dnode *dn);
int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx);
Expand All @@ -272,10 +271,12 @@ void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
void *tag);
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid,
boolean_t fail_sparse, boolean_t fail_uncached,
void *tag, dmu_buf_impl_t **dbp);

void dbuf_prefetch(struct dnode *dn, uint64_t blkid, zio_priority_t prio);
void dbuf_prefetch(struct dnode *dn, int64_t level, uint64_t blkid,
zio_priority_t prio, arc_flags_t aflags);

void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj,
Expand Down
5 changes: 3 additions & 2 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <sys/cred.h>
#include <sys/fs/zfs.h>
#include <sys/uio.h>
#include <sys/zio_priority.h>

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -740,8 +741,8 @@ extern int zfs_max_recordsize;
/*
* Asynchronously try to read in the data.
*/
void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
uint64_t len);
void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
uint64_t len, enum zio_priority pri);

typedef struct dmu_object_info {
/* All sizes are in bytes unless otherwise indicated. */
Expand Down
2 changes: 1 addition & 1 deletion include/sys/dsl_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
Expand Down
12 changes: 11 additions & 1 deletion include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,18 @@ extern int aok;

/*
* DTrace SDT probes have different signatures in userland than they do in
* kernel. If they're being used in kernel code, re-define them out of
* the kernel. If they're being used in kernel code, re-define them out of
* existence for their counterparts in libzpool.
*
* Here's an example of how to use the set-error probes in userland:
* zfs$target:::set-error /arg0 == EBUSY/ {stack();}
*
* Here's an example of how to use DTRACE_PROBE probes in userland:
* If there is a probe declared as follows:
* DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn);
* Then you can use it as follows:
* zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/
* {printf("%u %p\n", arg1, arg2);}
*/

#ifdef DTRACE_PROBE
Expand Down
22 changes: 9 additions & 13 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#ifndef _ZIO_H
#define _ZIO_H

#include <sys/zio_priority.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/txg.h>
Expand Down Expand Up @@ -147,17 +148,6 @@ enum zio_compress {
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2

typedef enum zio_priority {
ZIO_PRIORITY_SYNC_READ,
ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
ZIO_PRIORITY_ASYNC_READ, /* prefetch */
ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
ZIO_PRIORITY_NUM_QUEUEABLE,

ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
} zio_priority_t;

enum zio_flag {
/*
* Flags inherited by gang, ddt, and vdev children,
Expand Down Expand Up @@ -262,6 +252,7 @@ extern const char *zio_type_name[ZIO_TYPES];
* Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>.
* ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>.
* dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>.
* dnode visit bookmarks are <objset, object id of dnode, -3, 0>.
*
* Note: this structure is called a bookmark because its original purpose
* was to remember where to resume a pool-wide traverse.
Expand Down Expand Up @@ -294,6 +285,9 @@ struct zbookmark_phys {
#define ZB_ZIL_OBJECT (0ULL)
#define ZB_ZIL_LEVEL (-2LL)

#define ZB_DNODE_LEVEL (-3LL)
#define ZB_DNODE_BLKID (0ULL)

#define ZB_IS_ZERO(zb) \
((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \
(zb)->zb_level == 0 && (zb)->zb_blkid == 0)
Expand Down Expand Up @@ -598,8 +592,10 @@ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
extern void spa_handle_ignored_writes(spa_t *spa);

/* zbookmark_phys functions */
boolean_t zbookmark_is_before(const struct dnode_phys *dnp,
const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);
boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp,
const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2,
uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);

#ifdef __cplusplus
}
Expand Down
2 changes: 1 addition & 1 deletion include/sys/zio_checksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ typedef const struct zio_checksum_info {
zio_checksum_func_t *ci_func[2]; /* checksum function per byteorder */
int ci_correctable; /* number of correctable bits */
int ci_eck; /* uses zio embedded checksum? */
int ci_dedup; /* strong enough for dedup? */
boolean_t ci_dedup; /* strong enough for dedup? */
char *ci_name; /* descriptive name */
} zio_checksum_info_t;

Expand Down
40 changes: 40 additions & 0 deletions include/sys/zio_priority.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
*/
#ifndef _ZIO_PRIORITY_H
#define _ZIO_PRIORITY_H

#ifdef __cplusplus
extern "C" {
#endif

typedef enum zio_priority {
ZIO_PRIORITY_SYNC_READ,
ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
ZIO_PRIORITY_ASYNC_READ, /* prefetch */
ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
ZIO_PRIORITY_NUM_QUEUEABLE,

ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
} zio_priority_t;

#ifdef __cplusplus
}
#endif

#endif /* _ZIO_PRIORITY_H */
4 changes: 2 additions & 2 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -3529,7 +3529,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
}

static int
zbookmark_compare(const void *a, const void *b)
zbookmark_mem_compare(const void *a, const void *b)
{
return (memcmp(a, b, sizeof (zbookmark_phys_t)));
}
Expand Down Expand Up @@ -3592,7 +3592,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
zc.zc_nvlist_dst_size;
count -= zc.zc_nvlist_dst_size;

qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_compare);
qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);

verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);

Expand Down
Loading

0 comments on commit 1b12e2c

Please sign in to comment.