From 1b12e2c8e76423de831091f22cd8f37ba76770d6 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Tue, 22 Dec 2015 02:31:57 +0100 Subject: [PATCH] Illumos 5960 zfs recv should prefetch indirect blocks Illumos 5925 zfs receive -o origin= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed by: Prakash Surya Reviewed by: Matthew Ahrens depends on #3574 Illumos 5745 zfs set allows only one dataset property to be set at a time depends on #3611 Illumos 5746 more checksumming in zfs send diverged code base from Illumos: [lib/libzfs/libzfs_sendrecv.c] b8864a233c569edcc57c686f3ea8cd1ae3b89153 Fix gcc cast warnings 325f023544bbec6a478882c442e15304ee379759 Add linux kernel device support 5c3f61eb498e8124858b1369096bf64b86a938e7 Increase Linux pipe buffer size on 'zfs receive' [module/zfs/zfs_vnops.c] 3558fd73b5d863304102f6745c26e0b592aca60a Prototype/structure update for Linux c12e3a594a49ed10b7870d950c1f336f78f136cb Restructure zfs_readdir() to fix regressions [module/zfs/zvol.c] function @zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, is non-existent in ZoL [module/zfs/dmu.c] in function dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) int i is initialized before the following code block (c90 vs. c99) [module/zfs/Makefile.in + lib/libzpool/Makefile.am] 47a4a6fd5fa1f1f60bcf6af19e453ecf0292f7d1 Support parallel build trees (VPATH builds) [module/zfs/dbuf.c] fc5bb51f08a6c91ff9ad3559d0266eeeab0b1f61 Fix stack dbuf_hold_impl() 9b67f605601c77c814037613d8129562db642a29 Illumos 4757, 4913 {4757 ZFS embedded-data block pointers ("zero block compression") , 4913 zfs release should not be subject to space checks} {reference} 34229a2f2ac07363f64ddd63e014964fff2f0671 Reduce stack usage for recursive traverse_visitbp() [module/zfs/dmu_send.c] b58986eebf3c47c946393da4b968ee33edaea99e Use large stacks when available 241b5415748859a3c272fc8f570f2368e93adde9 Illumos 5959 - clean up per-dataset feature count code {reference} 77aef6f60ea29f6d3769addc778db6328ac85755 Use vmem_alloc() for nvlists 00b46022c676e402e3f33ce93ee2983bbad2c46f Add linux kernel memory support [module/zfs/zvol.c] 9965059ab9991a5fc7df9a489021e73880b3bcc0 Prefetch start and end of volumes [module/zfs/dmu_send.c, C90 warnings - previous commits, code thus less clear to read] Illumos 5746 more checksumming in zfs send [module/zfs/dbuf.c, ISO C90 - mixed declarations and code] arc_flags_t aflags = uint64_t nextblkid = dpa->dpa_zb.zb_blkid >> dmu_buf_impl_t *db = dbuf_find(dn->dn_objset, dn->dn_object, zio_t *pio = zio_root(dmu_objset_spa(dn->dn_objset), NULL, NULL, blkptr_t *bp = ((blkptr_t *)abuf->b_data) + dbuf_prefetch_arg_t *dpa = kmem_zalloc(sizeof (*dpa), KM_SLEEP); dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; [module/zfs/dmu_send.c, ISO C90 - mixed declarations and code] dnode_phys_t *blk = abuf->b_data; uint64_t dnobj = zb->zb_blkid * (blksz >> DNODE_SHIFT); error: ‘for’ loop initial declarations are only allowed in C99 or C11 mode: for (struct receive_ign_obj_node *n = struct send_block_record *to_data; struct receive_ign_obj_node *n; FIXME: man/man8/zfs.8 FIXME: different manpage format, currently I don't "get it" yet Ported-by: kernelOfTruth kerneloftruth@gmail.com --- cmd/zdb/zdb.c | 5 +- cmd/zfs/zfs_main.c | 29 +- cmd/ztest/ztest.c | 9 +- include/libzfs.h | 4 +- include/sys/bqueue.h | 54 + include/sys/dbuf.h | 9 +- include/sys/dmu.h | 5 +- include/sys/dsl_dataset.h | 2 +- include/sys/zfs_context.h | 12 +- include/sys/zio.h | 22 +- include/sys/zio_checksum.h | 2 +- include/sys/zio_priority.h | 40 + lib/libzfs/libzfs_pool.c | 4 +- lib/libzfs/libzfs_sendrecv.c | 56 +- lib/libzpool/Makefile.am | 1 + man/man8/zfs.8.orig | 3902 ++++++++++++++++++++++++++++++++++ man/man8/zfs.8.rej | 42 + module/zfs/Makefile.in | 1 + module/zfs/bptree.c | 2 +- module/zfs/bqueue.c | 111 + module/zfs/dbuf.c | 288 ++- module/zfs/dmu.c | 44 +- module/zfs/dmu_diff.c | 2 +- module/zfs/dmu_object.c | 5 + module/zfs/dmu_send.c | 809 +++++-- module/zfs/dmu_traverse.c | 28 +- module/zfs/dmu_tx.c | 6 +- module/zfs/dmu_zfetch.c | 3 +- module/zfs/dnode.c | 18 +- module/zfs/dnode_sync.c | 6 +- module/zfs/dsl_dataset.c | 28 +- module/zfs/dsl_destroy.c | 2 +- module/zfs/dsl_scan.c | 3 +- module/zfs/spa.c | 2 +- module/zfs/space_map.c | 4 +- module/zfs/zap.c | 13 +- module/zfs/zfs_vnops.c | 3 +- module/zfs/zio.c | 141 +- module/zfs/zvol.c | 4 +- 39 files changed, 5339 insertions(+), 382 deletions(-) create mode 100644 include/sys/bqueue.h create mode 100644 include/sys/zio_priority.h create mode 100644 man/man8/zfs.8.orig create mode 100644 man/man8/zfs.8.rej create mode 100644 module/zfs/bqueue.c diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 6b5ec4201994..f88291386407 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2488,6 +2488,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, dmu_object_type_t type; boolean_t is_metadata; + if (bp == NULL) + return (0); + if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { char blkbuf[BP_SPRINTF_LEN]; snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); @@ -2984,7 +2987,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, avl_index_t where; zdb_ddt_entry_t *zdde, zdde_search; - if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 356bf9aae4f6..beb6c7c87c4c 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -248,8 +248,9 @@ get_usage(zfs_help_t idx) return (gettext("\tpromote \n")); case HELP_RECEIVE: return (gettext("\treceive [-vnFu] \n" - "\treceive [-vnFu] [-d | -e] \n")); + "snapshot>\n" + "\treceive [-vnFu] [-o origin=] [-d | -e] " + "\n")); case HELP_RENAME: return (gettext("\trename [-f] " "\n" @@ -792,7 +793,7 @@ zfs_do_create(int argc, char **argv) nomem(); break; case 'o': - if (parseprop(props, optarg)) + if (parseprop(props, optarg) != 0) goto error; break; case 's': @@ -3622,7 +3623,7 @@ zfs_do_snapshot(int argc, char **argv) while ((c = getopt(argc, argv, "ro:")) != -1) { switch (c) { case 'o': - if (parseprop(props, optarg)) + if (parseprop(props, optarg) != 0) return (1); break; case 'r': @@ -3881,10 +3882,19 @@ zfs_do_receive(int argc, char **argv) { int c, err; recvflags_t flags = { 0 }; + nvlist_t *props; + nvpair_t *nvp = NULL; + + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + nomem(); /* check options */ - while ((c = getopt(argc, argv, ":denuvF")) != -1) { + while ((c = getopt(argc, argv, ":o:denuvF")) != -1) { switch (c) { + case 'o': + if (parseprop(props, optarg) != 0) + return (1); + break; case 'd': flags.isprefix = B_TRUE; break; @@ -3929,6 +3939,13 @@ zfs_do_receive(int argc, char **argv) usage(B_FALSE); } + while ((nvp = nvlist_next_nvpair(props, nvp))) { + if (strcmp(nvpair_name(nvp), "origin") != 0) { + (void) fprintf(stderr, gettext("invalid option")); + usage(B_FALSE); + } + } + if (isatty(STDIN_FILENO)) { (void) fprintf(stderr, gettext("Error: Backup stream can not be read " @@ -3937,7 +3954,7 @@ zfs_do_receive(int argc, char **argv) return (1); } - err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL); + err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL); return (err != 0); } diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 1dc9eff77017..d28146a372ca 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -3728,7 +3728,8 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) */ n = ztest_random(regions) * stride + ztest_random(width); s = 1 + ztest_random(2 * width - 1); - dmu_prefetch(os, bigobj, n * chunksize, s * chunksize); + dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize, + ZIO_PRIORITY_SYNC_READ); /* * Pick a random index and compute the offsets into packobj and bigobj. @@ -5930,8 +5931,10 @@ ztest_run(ztest_shared_t *zs) * Right before closing the pool, kick off a bunch of async I/O; * spa_close() should wait for it to complete. */ - for (object = 1; object < 50; object++) - dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); + for (object = 1; object < 50; object++) { + dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20, + ZIO_PRIORITY_SYNC_READ); + } /* Verify that at least one commit cb was called in a timely fashion */ if (zc_cb_counter >= ZTEST_COMMIT_CB_MIN_REG) diff --git a/include/libzfs.h b/include/libzfs.h index b7ab890c3e25..2a1f2f50d306 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -678,8 +678,8 @@ typedef struct recvflags { boolean_t nomount; } recvflags_t; -extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *, - int, avl_tree_t *); +extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *, + recvflags_t *, int, avl_tree_t *); typedef enum diff_flags { ZFS_DIFF_PARSEABLE = 0x1, diff --git a/include/sys/bqueue.h b/include/sys/bqueue.h new file mode 100644 index 000000000000..63722df1bbf3 --- /dev/null +++ b/include/sys/bqueue.h @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2014 by Delphix. All rights reserved. + */ + +#ifndef _BQUEUE_H +#define _BQUEUE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct bqueue { + list_t bq_list; + kmutex_t bq_lock; + kcondvar_t bq_add_cv; + kcondvar_t bq_pop_cv; + uint64_t bq_size; + uint64_t bq_maxsize; + size_t bq_node_offset; +} bqueue_t; + +typedef struct bqueue_node { + list_node_t bqn_node; + uint64_t bqn_size; +} bqueue_node_t; + + +int bqueue_init(bqueue_t *, uint64_t, size_t); +void bqueue_destroy(bqueue_t *); +void bqueue_enqueue(bqueue_t *, void *, uint64_t); +void *bqueue_dequeue(bqueue_t *); +boolean_t bqueue_empty(bqueue_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _BQUEUE_H */ diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 0d262e87b5bc..9147c9d4d6cc 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -261,8 +261,7 @@ typedef struct dbuf_hash_table { kmutex_t hash_mutexes[DBUF_MUTEXES]; } dbuf_hash_table_t; - -uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset); +uint64_t dbuf_whichblock(struct dnode *di, int64_t level, uint64_t offset); void dbuf_create_bonus(struct dnode *dn); int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx); @@ -272,10 +271,12 @@ void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx); dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag); dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid, void *tag); -int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create, +int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, + boolean_t fail_sparse, boolean_t fail_uncached, void *tag, dmu_buf_impl_t **dbp); -void dbuf_prefetch(struct dnode *dn, uint64_t blkid, zio_priority_t prio); +void dbuf_prefetch(struct dnode *dn, int64_t level, uint64_t blkid, + zio_priority_t prio, arc_flags_t aflags); void dbuf_add_ref(dmu_buf_impl_t *db, void *tag); boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj, diff --git a/include/sys/dmu.h b/include/sys/dmu.h index d9434db46383..1b81df6f661b 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -45,6 +45,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -740,8 +741,8 @@ extern int zfs_max_recordsize; /* * Asynchronously try to read in the data. */ -void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, - uint64_t len); +void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, + uint64_t len, enum zio_priority pri); typedef struct dmu_object_info { /* All sizes are in bytes unless otherwise indicated. */ diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index 25622263e631..f033eace9945 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2015 by Delphix. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 4f7e3287f3da..792d0796b133 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -160,8 +160,18 @@ extern int aok; /* * DTrace SDT probes have different signatures in userland than they do in - * kernel. If they're being used in kernel code, re-define them out of + * the kernel. If they're being used in kernel code, re-define them out of * existence for their counterparts in libzpool. + * + * Here's an example of how to use the set-error probes in userland: + * zfs$target:::set-error /arg0 == EBUSY/ {stack();} + * + * Here's an example of how to use DTRACE_PROBE probes in userland: + * If there is a probe declared as follows: + * DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn); + * Then you can use it as follows: + * zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/ + * {printf("%u %p\n", arg1, arg2);} */ #ifdef DTRACE_PROBE diff --git a/include/sys/zio.h b/include/sys/zio.h index 278b6e0868a9..432a412638b3 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -29,6 +29,7 @@ #ifndef _ZIO_H #define _ZIO_H +#include #include #include #include @@ -147,17 +148,6 @@ enum zio_compress { #define ZIO_FAILURE_MODE_CONTINUE 1 #define ZIO_FAILURE_MODE_PANIC 2 -typedef enum zio_priority { - ZIO_PRIORITY_SYNC_READ, - ZIO_PRIORITY_SYNC_WRITE, /* ZIL */ - ZIO_PRIORITY_ASYNC_READ, /* prefetch */ - ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */ - ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */ - ZIO_PRIORITY_NUM_QUEUEABLE, - - ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */ -} zio_priority_t; - enum zio_flag { /* * Flags inherited by gang, ddt, and vdev children, @@ -262,6 +252,7 @@ extern const char *zio_type_name[ZIO_TYPES]; * Root blocks (objset_phys_t) are object 0, level -1: . * ZIL blocks are bookmarked . * dmu_sync()ed ZIL data blocks are bookmarked . + * dnode visit bookmarks are . * * Note: this structure is called a bookmark because its original purpose * was to remember where to resume a pool-wide traverse. @@ -294,6 +285,9 @@ struct zbookmark_phys { #define ZB_ZIL_OBJECT (0ULL) #define ZB_ZIL_LEVEL (-2LL) +#define ZB_DNODE_LEVEL (-3LL) +#define ZB_DNODE_BLKID (0ULL) + #define ZB_IS_ZERO(zb) \ ((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \ (zb)->zb_level == 0 && (zb)->zb_blkid == 0) @@ -598,8 +592,10 @@ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, extern void spa_handle_ignored_writes(spa_t *spa); /* zbookmark_phys functions */ -boolean_t zbookmark_is_before(const struct dnode_phys *dnp, - const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2); +boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp, + const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block); +int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2, + uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2); #ifdef __cplusplus } diff --git a/include/sys/zio_checksum.h b/include/sys/zio_checksum.h index 56b83b559377..9fcfd521f4ad 100644 --- a/include/sys/zio_checksum.h +++ b/include/sys/zio_checksum.h @@ -44,7 +44,7 @@ typedef const struct zio_checksum_info { zio_checksum_func_t *ci_func[2]; /* checksum function per byteorder */ int ci_correctable; /* number of correctable bits */ int ci_eck; /* uses zio embedded checksum? */ - int ci_dedup; /* strong enough for dedup? */ + boolean_t ci_dedup; /* strong enough for dedup? */ char *ci_name; /* descriptive name */ } zio_checksum_info_t; diff --git a/include/sys/zio_priority.h b/include/sys/zio_priority.h new file mode 100644 index 000000000000..e33b9585b1c0 --- /dev/null +++ b/include/sys/zio_priority.h @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2014 by Delphix. All rights reserved. + */ +#ifndef _ZIO_PRIORITY_H +#define _ZIO_PRIORITY_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum zio_priority { + ZIO_PRIORITY_SYNC_READ, + ZIO_PRIORITY_SYNC_WRITE, /* ZIL */ + ZIO_PRIORITY_ASYNC_READ, /* prefetch */ + ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */ + ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */ + ZIO_PRIORITY_NUM_QUEUEABLE, + + ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */ +} zio_priority_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _ZIO_PRIORITY_H */ diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index a730a94e081a..60213a6386dd 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3529,7 +3529,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, } static int -zbookmark_compare(const void *a, const void *b) +zbookmark_mem_compare(const void *a, const void *b) { return (memcmp(a, b, sizeof (zbookmark_phys_t))); } @@ -3592,7 +3592,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) zc.zc_nvlist_dst_size; count -= zc.zc_nvlist_dst_size; - qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_compare); + qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare); verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0); diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index b35428f907cd..2adcb0c0f532 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -63,8 +63,9 @@ /* in libzfs_dataset.c */ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); -static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *, - int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *); +static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *, + recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int, + uint64_t *); static const zio_cksum_t zero_cksum = { { 0 } }; @@ -2523,7 +2524,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, * zfs_receive_one() will take care of it (ie, * recv_skip() and return 0). */ - error = zfs_receive_impl(hdl, destname, flags, fd, + error = zfs_receive_impl(hdl, destname, NULL, flags, fd, sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, action_handlep); if (error == ENODATA) { @@ -2656,9 +2657,9 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) */ static int zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, - recvflags_t *flags, dmu_replay_record_t *drr, - dmu_replay_record_t *drr_noswap, const char *sendfs, - nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, + const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr, + dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv, + avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { zfs_cmd_t zc = {"\0"}; @@ -2808,10 +2809,15 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } if (flags->verbose) (void) printf("found clone origin %s\n", zc.zc_string); + } else if (originsnap) { + (void) strncpy(zc.zc_string, originsnap, ZFS_MAXNAMELEN); + if (flags->verbose) + (void) printf("using provided clone origin %s\n", + zc.zc_string); } stream_wantsnewfs = (drrb->drr_fromguid == 0 || - (drrb->drr_flags & DRR_FLAG_CLONE)); + (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap); if (stream_wantsnewfs) { /* @@ -3189,9 +3195,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } static int -zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, - int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, - char **top_zfs, int cleanup_fd, uint64_t *action_handlep) +zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, + const char *originsnap, recvflags_t *flags, int infd, const char *sendfs, + nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, + uint64_t *action_handlep) { int err; dmu_replay_record_t drr, drr_noswap; @@ -3210,6 +3217,12 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, "(%s) does not exist"), tosnap); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } + if (originsnap && + !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs " + "(%s) does not exist"), originsnap); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } /* read in the BEGIN record */ if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE, @@ -3282,14 +3295,14 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, *cp = '\0'; sendfs = nonpackage_sendfs; } - return (zfs_receive_one(hdl, infd, tosnap, flags, - &drr, &drr_noswap, sendfs, stream_nv, stream_avl, - top_zfs, cleanup_fd, action_handlep)); + return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags, + &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs, + cleanup_fd, action_handlep)); } else { assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM); - return (zfs_receive_package(hdl, infd, tosnap, flags, - &drr, &zcksum, top_zfs, cleanup_fd, action_handlep)); + return (zfs_receive_package(hdl, infd, tosnap, flags, &drr, + &zcksum, top_zfs, cleanup_fd, action_handlep)); } } @@ -3300,14 +3313,15 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, * (-1 will override -2). */ int -zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, - int infd, avl_tree_t *stream_avl) +zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props, + recvflags_t *flags, int infd, avl_tree_t *stream_avl) { char *top_zfs = NULL; int err; int cleanup_fd; uint64_t action_handle = 0; struct stat sb; + char *originsnap = NULL; /* * The only way fstat can fail is if we do not have a valid file @@ -3350,10 +3364,16 @@ zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, } #endif /* __linux__ */ + if (props) { + err = nvlist_lookup_string(props, "origin", &originsnap); + if (err && err != ENOENT) + return (err); + } + cleanup_fd = open(ZFS_DEV, O_RDWR); VERIFY(cleanup_fd >= 0); - err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL, + err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL, stream_avl, &top_zfs, cleanup_fd, &action_handle); VERIFY(0 == close(cleanup_fd)); diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 0bcb5e466518..f45a57d71fdf 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -32,6 +32,7 @@ KERNEL_C = \ bplist.c \ bpobj.c \ bptree.c \ + bqueue.c \ dbuf.c \ dbuf_stats.c \ ddt.c \ diff --git a/man/man8/zfs.8.orig b/man/man8/zfs.8.orig new file mode 100644 index 000000000000..8d4aee23f38d --- /dev/null +++ b/man/man8/zfs.8.orig @@ -0,0 +1,3902 @@ +'\" t +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" +.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2011 Joshua M. Clulow +.\" Copyright (c) 2011, 2014 by Delphix. All rights reserved. +.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. +.\" Copyright 2012 Nexenta Systems, Inc. All Rights Reserved. +.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. +.\" +.TH zfs 8 "Nov 19, 2013" "ZFS pool 28, filesystem 5" "System Administration Commands" +.SH NAME +zfs \- configures ZFS file systems +.SH SYNOPSIS +.LP +.nf +\fBzfs\fR [\fB-?\fR] +.fi + +.LP +.nf +\fBzfs\fR \fBcreate\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBcreate\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fB-V\fR \fIsize\fR \fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBdestroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBdestroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,...] +.fi + +.LP +.nf +\fBzfs\fR \fBdestroy\fR \fIfilesystem\fR|\fIvolume\fR#\fIbookmark\fR +.fi + +.LP +.nf +\fBzfs\fR \fBsnapshot | snap\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... + \fIfilesystem@snapname\fR|\fIvolume@snapname\fR ... +.fi + +.LP +.nf +\fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBclone\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBpromote\fR \fIclone-filesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBrename\fR [\fB-f\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR + \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBrename\fR [\fB-fp\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBrename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIproperty\fR[,\fIproperty\fR]...] [\fB-t\fR \fItype\fR[,\fItype\fR]..] + [\fB-s\fR \fIproperty\fR] ... [\fB-S\fR \fIproperty\fR] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR] ... +.fi + +.LP +.nf ++\fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR... \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR... +.fi + +.LP +.nf +\fBzfs\fR \fBget\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIfield\fR[,...]] [\fB-t\fR \fItype\fR[,...]] + [\fB-s\fR \fIsource\fR[,...]] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ... +.fi + +.LP +.nf +\fBzfs\fR \fBinherit\fR [\fB-rS\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume|snapshot\fR ... +.fi + +.LP +.nf +\fBzfs\fR \fBupgrade\fR [\fB-v\fR] +.fi + +.LP +.nf +\fBzfs\fR \fBupgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBuserspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIfield\fR] ... + [\fB-S\fR \fIfield\fR] ... [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBgroupspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIfield\fR] ... + [\fB-S\fR \fIfield\fR] ... [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBmount\fR +.fi + +.LP +.nf +\fBzfs\fR \fBmount\fR [\fB-vO\fR] [\fB-o \fIoptions\fR\fR] \fB-a\fR | \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunmount | umount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR +.fi + +.LP +.nf +\fBzfs\fR \fBshare\fR \fB-a\fR | \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunshare\fR \fB-a\fR \fIfilesystem\fR|\fImountpoint\fR +.fi + +.LP +.nf +\fBzfs\fR \fBbookmark\fR \fIsnapshot\fR \fIbookmark\fR +.fi + +.LP +.nf +\fBzfs\fR \fBsend\fR [\fB-DnPpRveL\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBsend\fR [\fB-eL\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBreceive | recv\fR [\fB-vnFu\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBreceive | recv\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|\fI@setname\fR[,...] + \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunallow\fR [\fB-rldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] [\fIperm\fR|@\fIsetname\fR[,... ]] + \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunallow\fR [\fB-rld\fR] \fB-e\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[ ... ]] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBhold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR... +.fi + +.LP +.nf +\fBzfs\fR \fBholds\fR [\fB-r\fR] \fIsnapshot\fR... +.fi + +.LP +.nf +\fBzfs\fR \fBrelease\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR... +.fi + +.LP +.nf +\fBzfs\fR \fBdiff\fR [\fB-FHt\fR] \fIsnapshot\fR \fIsnapshot|filesystem\fR + +.SH DESCRIPTION +.LP +The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage pool, as described in \fBzpool\fR(8). A dataset is identified by a unique path within the \fBZFS\fR namespace. For example: +.sp +.in +2 +.nf +pool/{filesystem,volume,snapshot} +.fi +.in -2 +.sp + +.sp +.LP +where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes). +.sp +.LP +A dataset can be one of the following: +.sp +.ne 2 +.mk +.na +\fB\fIfile system\fR\fR +.ad +.sp .6 +.RS 4n +A \fBZFS\fR dataset of type \fBfilesystem\fR can be mounted within the standard system namespace and behaves like other file systems. While \fBZFS\fR file systems are designed to be \fBPOSIX\fR compliant, known issues exist that prevent compliance in some cases. Applications that depend on standards conformance might fail due to nonstandard behavior when checking file system free space. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fIvolume\fR\fR +.ad +.sp .6 +.RS 4n +A logical volume exported as a raw or block device. This type of dataset should only be used under special circumstances. File systems are typically used in most environments. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fIsnapshot\fR\fR +.ad +.sp .6 +.RS 4n +A read-only version of a file system or volume at a given point in time. It is specified as \fIfilesystem@name\fR or \fIvolume@name\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fIbookmark\fR\fR +.ad +.sp .6 +.RS 4n +Much like a \fIsnapshot\fR, but without the hold on on-disk data. It can be used as the source of a send (but not for a receive). +It is specified as \fIfilesystem#name\fR or \fIvolume#name\fR. +.RE + +.SS "ZFS File System Hierarchy" +.LP +A \fBZFS\fR storage pool is a logical collection of devices that provide space for datasets. A storage pool is also the root of the \fBZFS\fR file system hierarchy. +.sp +.LP +The root of the pool can be accessed as a file system, such as mounting and unmounting, taking snapshots, and setting properties. The physical storage characteristics, however, are managed by the \fBzpool\fR(8) command. +.sp +.LP +See \fBzpool\fR(8) for more information on creating and administering pools. +.SS "Snapshots" +.LP +A snapshot is a read-only copy of a file system or volume. Snapshots can be created extremely quickly, and initially consume no additional space within the pool. As data within the active dataset changes, the snapshot consumes more data than would otherwise be shared with the active dataset. +.sp +.LP +Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back. Visibility is determined by the \fBsnapdev\fR property of the parent volume. +.sp +.LP +File system snapshots can be accessed under the \fB\&.zfs/snapshot\fR directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the \fB\&.zfs\fR directory can be controlled by the \fBsnapdir\fR property. +.SS "Bookmarks" +.LP +A bookmark is like a snapshot, a read-only copy of a file system or volume. Bookmarks can be created extremely quickly, compared to snapshots, and they consume no additional space within the pool. Bookmarks can also have arbitrary names, much like snapshots. +.sp +.LP +Unlike snapshots, bookmarks can not be accessed through the filesystem in any way. From a storage standpoint a bookmark just provides a way to reference when a snapshot was created as a distinct object. Bookmarks are initially tied to a snapshot, not the filesystem/volume, and they will survive if the snapshot itself is destroyed. Since they are very light weight there's little incentive to destroy them. +.SS "Clones" +.LP +A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space. +.sp +.LP +Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The \fBorigin\fR property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist. +.sp +.LP +The clone parent-child dependency relationship can be reversed by using the \fBpromote\fR subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone was created from. +.SS "Mount Points" +.LP +Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system is likely to be numerous. To cope with this, \fBZFS\fR automatically manages mounting and unmounting file systems without the need to edit the \fB/etc/fstab\fR file. All automatically managed file systems are mounted by \fBZFS\fR at boot time. +.sp +.LP +By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR is the name of the file system in the \fBZFS\fR namespace. Directories are created and destroyed as needed. +.sp +.LP +A file system can also have a mount point set in the \fBmountpoint\fR property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the \fBzfs mount -a\fR command is invoked (without editing \fB/etc/fstab\fR). The \fBmountpoint\fR property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR. +.sp +.LP +A file system \fBmountpoint\fR property of \fBnone\fR prevents the file system from being mounted. +.sp +.LP +If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/fstab\fR). If a file system's mount point is set to \fBlegacy\fR, \fBZFS\fR makes no attempt to manage the file system, and the administrator is responsible for mounting and unmounting the file system. +.SS "Deduplication" +.LP +Deduplication is the process for removing redundant data at the block-level, reducing the total amount of data stored. If a file system has the \fBdedup\fR property enabled, duplicate data blocks are removed synchronously. The result is that only unique data is stored and common components are shared among files. +.sp +\fBWARNING: DO NOT ENABLE DEDUPLICATION UNLESS YOU NEED IT AND KNOW EXACTLY WHAT YOU ARE DOING!\fR +.sp +Deduplicating data is a very resource-intensive operation. It is generally recommended that you have \fIat least\fR 1.25 GB of RAM per 1 TB of storage when you enable deduplication. But calculating the exact requirenments is a somewhat complicated affair. Please see the \fBOracle Dedup Guide\fR for more information.. +.sp +Enabling deduplication on an improperly-designed system will result in extreme performance issues (extremely slow filesystem and snapshot deletions etc.) and can potentially lead to data loss (i.e. unimportable pool due to memory exhaustion) if your system is not built for this purpose. Deduplication affects the processing power (CPU), disks (and the controller) as well as primary (real) memory. +.sp +Before creating a pool with deduplication enabled, ensure that you have planned your hardware requirements appropriately and implemented appropriate recovery practices, such as regular backups. +.sp +Unless necessary, deduplication should NOT be enabled on a system. Instead, consider using \fIcompression=lz4\fR, as a less resource-intensive alternative. +.SS "Native Properties" +.LP +Properties are divided into two types, native properties and user-defined (or "user") properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below. +.sp +.LP +Every dataset has a set of properties that export statistics about the dataset as well as control various behaviors. Properties are inherited from the parent unless overridden by the child. Some properties apply only to certain types of datasets (file systems, volumes, or snapshots). +.sp +.LP +The values of numeric properties can be specified using human-readable suffixes (for example, \fBk\fR, \fBKB\fR, \fBM\fR, \fBGb\fR, and so forth, up to \fBZ\fR for zettabyte). The following are all valid (and equal) specifications: +.sp +.in +2 +.nf +1536M, 1.5g, 1.50GB +.fi +.in -2 +.sp + +.sp +.LP +The values of non-numeric properties are case sensitive and must be lowercase, except for \fBmountpoint\fR, \fBsharenfs\fR, and \fBsharesmb\fR. +.sp +.LP +The following native properties consist of read-only statistics about the dataset. These properties can be neither set, nor inherited. Native properties apply to all dataset types unless otherwise noted. +.sp +.ne 2 +.mk +.na +\fB\fBavailable\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets within the pool. +.sp +This property can also be referred to by its shortened column name, \fBavail\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcompressratio\fR\fR +.ad +.sp .6 +.RS 4n +For non-snapshots, the compression ratio achieved for the \fBused\fR space of this dataset, expressed as a multiplier. The \fBused\fR property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot. For snapshots, the \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcreation\fR\fR +.ad +.sp .6 +.RS 4n +The time this dataset was created. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBclones\fR\fR +.ad +.sp .6 +.RS 4n +For snapshots, this property is a comma-separated list of filesystems or +volumes which are clones of this snapshot. The clones' \fBorigin\fR property +is this snapshot. If the \fBclones\fR property is not empty, then this +snapshot can not be destroyed (even with the \fB-r\fR or \fB-f\fR options). +.RE + +.sp +.ne 2 +.na +\fB\fBdefer_destroy\fR\fR +.ad +.sp .6 +.RS 4n +This property is \fBon\fR if the snapshot has been marked for deferred destruction by using the \fBzfs destroy\fR \fB-d\fR command. Otherwise, the property is \fBoff\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBfilesystem_count\fR +.ad +.sp .6 +.RS 4n +The total number of filesystems and volumes that exist under this location in the +dataset tree. This value is only available when a \fBfilesystem_limit\fR has +been set somewhere in the tree under which the dataset resides. +.RE + +.sp +.ne 2 +.na +\fB\fBlogicalreferenced\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space that is "logically" accessible by this dataset. See +the \fBreferenced\fR property. The logical space ignores the effect of +the \fBcompression\fR and \fBcopies\fR properties, giving a quantity +closer to the amount of data that applications see. However, it does +include space consumed by metadata. +.sp +This property can also be referred to by its shortened column name, +\fBlrefer\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBlogicalused\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space that is "logically" consumed by this dataset and all +its descendents. See the \fBused\fR property. The logical space +ignores the effect of the \fBcompression\fR and \fBcopies\fR properties, +giving a quantity closer to the amount of data that applications see. +However, it does include space consumed by metadata. +.sp +This property can also be referred to by its shortened column name, +\fBlused\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBmounted\fR\fR +.ad +.sp .6 +.RS 4n +For file systems, indicates whether the file system is currently mounted. This property can be either \fByes\fR or \fBno\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBorigin\fR\fR +.ad +.sp .6 +.RS 4n +For cloned file systems or volumes, the snapshot from which the clone was created. See also the \fBclones\fR property. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBreferenced\fR\fR +.ad +.sp .6 +.RS 4n +The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are identical. +.sp +This property can also be referred to by its shortened column name, \fBrefer\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBrefcompressratio\fR\fR +.ad +.sp .6 +.RS 4n +The compression ratio achieved for the \fBreferenced\fR space of this +dataset, expressed as a multiplier. See also the \fBcompressratio\fR +property. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsnapshot_count\fR +.ad +.sp .6 +.RS 4n +The total number of snapshots that exist under this location in the dataset tree. +This value is only available when a \fBsnapshot_limit\fR has been set somewhere +in the tree under which the dataset resides. +.RE + +.sp +.ne 2 +.na +\fB\fBtype\fR\fR +.ad +.sp .6 +.RS 4n +The type of dataset: \fBfilesystem\fR, \fBvolume\fR, or \fBsnapshot\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBused\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space consumed by this dataset and all its descendents. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendent datasets. The amount of space that a dataset consumes from its parent, as well as the amount of space that are freed if this dataset is recursively destroyed, is the greater of its space used and its reservation. +.sp +When snapshots (see the "Snapshots" section) are created, their space is initially shared between the snapshot and the file system, and possibly with previous snapshots. As the file system changes, space that was previously shared becomes unique to the snapshot, and counted in the snapshot's space used. Additionally, deleting snapshots can increase the amount of space unique to (and used by) other snapshots. +.sp +The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using \fBfsync\fR(2) or \fBO_SYNC\fR does not necessarily guarantee that the space usage information is updated immediately. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedby*\fR\fR +.ad +.sp .6 +.RS 4n +The \fBusedby*\fR properties decompose the \fBused\fR properties into the various reasons that space is used. Specifically, \fBused\fR = \fBusedbychildren\fR + \fBusedbydataset\fR + \fBusedbyrefreservation\fR +, \fBusedbysnapshots\fR. These properties are only available for datasets created on \fBzpool\fR "version 13" pools. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedbychildren\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space used by children of this dataset, which would be freed if all the dataset's children were destroyed. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedbydataset\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space used by this dataset itself, which would be freed if the dataset were destroyed (after first removing any \fBrefreservation\fR and destroying any necessary snapshots or descendents). +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedbyrefreservation\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space used by a \fBrefreservation\fR set on this dataset, which would be freed if the \fBrefreservation\fR was removed. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedbysnapshots\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBuserused@\fR\fIuser\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space consumed by the specified user in this dataset. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information. +.sp +Unprivileged users can access only their own space usage. The root user, or a user who has been granted the \fBuserused\fR privilege with \fBzfs allow\fR, can access everyone's usage. +.sp +The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms: +.RS +4 +.TP +.ie t \(bu +.el o +\fIPOSIX name\fR (for example, \fBjoe\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fIPOSIX numeric ID\fR (for example, \fB789\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fISID name\fR (for example, \fBjoe.smith@mydomain\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR) +.RE +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBuserrefs\fR\fR +.ad +.sp .6 +.RS 4n +This property is set to the number of user holds on this snapshot. User holds are set by using the \fBzfs hold\fR command. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBgroupused@\fR\fIgroup\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space consumed by the specified group in this dataset. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information. +.sp +Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBvolblocksize\fR=\fIblocksize\fR\fR +.ad +.sp .6 +.RS 4n +For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes to 128 Kbytes is valid. +.sp +This property can also be referred to by its shortened column name, \fBvolblock\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBwritten\fR\fR +.ad +.sp .6 +.RS 4n +The amount of \fBreferenced\fR space written to this dataset since the +previous snapshot. +.RE + +.sp +.ne 2 +.na +\fB\fBwritten@\fR\fIsnapshot\fR\fR +.ad +.sp .6 +.RS 4n +The amount of \fBreferenced\fR space written to this dataset since the +specified snapshot. This is the space that is referenced by this dataset +but was not referenced by the specified snapshot. +.sp +The \fIsnapshot\fR may be specified as a short snapshot name (just the part +after the \fB@\fR), in which case it will be interpreted as a snapshot in +the same filesystem as this dataset. +The \fIsnapshot\fR be a full snapshot name (\fIfilesystem\fR@\fIsnapshot\fR), +which for clones may be a snapshot in the origin's filesystem (or the origin +of the origin's filesystem, etc). +.RE + +.sp +.LP +The following native properties can be used to change the behavior of a \fBZFS\fR dataset. +.sp +.ne 2 +.mk +.na +\fB\fBaclinherit\fR=\fBdiscard\fR | \fBnoallow\fR | \fBrestricted\fR | \fBpassthrough\fR | \fBpassthrough-x\fR\fR +.ad +.sp .6 +.RS 4n +Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an \fBaclinherit\fR property of \fBdiscard\fR does not inherit any \fBACL\fR entries. A file system with an \fBaclinherit\fR property value of \fBnoallow\fR only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value \fBrestricted\fR (the default) removes the \fBwrite_acl\fR and \fBwrite_owner\fR permissions when the \fBACL\fR entry is inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough\fR inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough-x\fR has the same meaning as \fBpassthrough\fR, except that the \fBowner@\fR, \fBgroup@\fR, and \fBeveryone@\fR \fBACE\fRs inherit the execute permission only if the file creation mode also requests the execute bit. +.sp +When the property value is set to \fBpassthrough\fR, files are created with a mode determined by the inheritable \fBACE\fRs. If no inheritable \fBACE\fRs exist that affect the mode, then the mode is set in accordance to the requested mode from the application. +.sp +The \fBaclinherit\fR property does not apply to Posix ACLs. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBacltype\fR=\fBnoacl\fR | \fBposixacl\fR \fR +.ad +.sp .6 +.RS 4n +Controls whether ACLs are enabled and if so what type of ACL to use. When +a file system has the \fBacltype\fR property set to \fBnoacl\fR (the default) +then ACLs are disabled. Setting the \fBacltype\fR property to \fBposixacl\fR +indicates Posix ACLs should be used. Posix ACLs are specific to Linux and +are not functional on other platforms. Posix ACLs are stored as an xattr and +therefore will not overwrite any existing ZFS/NFSv4 ACLs which may be set. +Currently only \fBposixacls\fR are supported on Linux. +.sp +To obtain the best performance when setting \fBposixacl\fR users are strongly +encouraged to set the \fBxattr=sa\fR property. This will result in the +Posix ACL being stored more efficiently on disk. But as a consequence of this +all new xattrs will only be accessible from ZFS implementations which support +the \fBxattr=sa\fR property. See the \fBxattr\fR property for more details. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBatime\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value is \fBon\fR. See also \fBrelatime\fR below. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcanmount\fR=\fBon\fR | \fBoff\fR | \fBnoauto\fR\fR +.ad +.sp .6 +.RS 4n +If this property is set to \fBoff\fR, the file system cannot be mounted, and is ignored by \fBzfs mount -a\fR. Setting this property to \fBoff\fR is similar to setting the \fBmountpoint\fR property to \fBnone\fR, except that the dataset still has a normal \fBmountpoint\fR property, which can be inherited. Setting this property to \fBoff\fR allows datasets to be used solely as a mechanism to inherit properties. One example of setting \fBcanmount=\fR\fBoff\fR is to have two datasets with the same \fBmountpoint\fR, so that the children of both datasets appear in the same directory, but might have different inherited characteristics. +.sp +When the \fBnoauto\fR option is set, a dataset can only be mounted and unmounted explicitly. The dataset is not mounted automatically when the dataset is created or imported, nor is it mounted by the \fBzfs mount -a\fR command or unmounted by the \fBzfs unmount -a\fR command. +.sp +This property is not inherited. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2,\fR| \fBfletcher4\fR | \fBsha256\fR\fR +.ad +.sp .6 +.RS 4n +Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice. +.sp +Changing this property affects only newly-written data. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBlz4\fR | +\fBgzip\fR | \fBgzip-\fR\fIN\fR | \fBzle\fR\fR +.ad +.sp .6 +.RS 4n +Controls the compression algorithm used for this dataset. +.sp +Setting compression to \fBon\fR indicates that the current default +compression algorithm should be used. The default balances compression +and decompression speed, with compression ratio and is expected to +work well on a wide variety of workloads. Unlike all other settings for +this property, \fBon\fR does not select a fixed compression type. As +new compression algorithms are added to ZFS and enabled on a pool, the +default compression algorithm may change. The current default compression +algorthm is either \fBlzjb\fR or, if the \fBlz4_compress\fR feature is +enabled, \fBlz4\fR. +.sp +The \fBlzjb\fR compression algorithm is optimized for performance while +providing decent data compression. +.sp +The \fBlz4\fR compression algorithm is a high-performance replacement +for the \fBlzjb\fR algorithm. It features significantly faster +compression and decompression, as well as a moderately higher +compression ratio than \fBlzjb\fR, but can only be used on pools with +the \fBlz4_compress\fR feature set to \fIenabled\fR. See +\fBzpool-features\fR(5) for details on ZFS feature flags and the +\fBlz4_compress\fR feature. +.sp +The \fBgzip\fR compression algorithm uses the same compression as +the \fBgzip\fR(1) command. You can specify the \fBgzip\fR level by using the +value \fBgzip-\fR\fIN\fR where \fIN\fR is an integer from 1 (fastest) to 9 +(best compression ratio). Currently, \fBgzip\fR is equivalent to \fBgzip-6\fR +(which is also the default for \fBgzip\fR(1)). The \fBzle\fR compression +algorithm compresses runs of zeros. +.sp +This property can also be referred to by its shortened column name +\fBcompress\fR. Changing this property affects only newly-written data. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcopies\fR=\fB1\fR | \fB2\fR | \fB3\fR\fR +.ad +.sp .6 +.RS 4n +Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or RAID-Z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations. +.sp +Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR\fIN\fR option. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBdedup\fR=\fBon\fR | \fBoff\fR | \fBverify\fR | \fBsha256\fR[,\fBverify\fR]\fR +.ad +.sp .6 +.RS 4n +Controls whether deduplication is in effect for a dataset. The default value is \fBoff\fR. The default checksum used for deduplication is \fBsha256\fR (subject to change). When \fBdedup\fR is enabled, the \fBdedup\fR checksum algorithm overrides the \fBchecksum\fR property. Setting the value to \fBverify\fR is equivalent to specifying \fBsha256,verify\fR. +.sp +If the property is set to \fBverify\fR, then, whenever two blocks have the same signature, ZFS will do a byte-for-byte comparison with the existing block to ensure that the contents are identical. +.sp +Unless necessary, deduplication should NOT be enabled on a system. See \fBDeduplication\fR above. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBdevices\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether device nodes can be opened on this file system. The default value is \fBon\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBexec\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether processes can be executed from within this file system. The default value is \fBon\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBmlslabel\fR=\fIlabel\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +The \fBmlslabel\fR property is a sensitivity label that determines if a dataset can be mounted in a zone on a system with Trusted Extensions enabled. If the labeled dataset matches the labeled zone, the dataset can be mounted and accessed from the labeled zone. +.sp +When the \fBmlslabel\fR property is not set, the default value is \fBnone\fR. Setting the \fBmlslabel\fR property to \fBnone\fR is equivalent to removing the property. +.sp +The \fBmlslabel\fR property can be modified only when Trusted Extensions is enabled and only with appropriate privilege. Rights to modify it cannot be delegated. When changing a label to a higher label or setting the initial dataset label, the \fB{PRIV_FILE_UPGRADE_SL}\fR privilege is required. When changing a label to a lower label or the default (\fBnone\fR), the \fB{PRIV_FILE_DOWNGRADE_SL}\fR privilege is required. Changing the dataset to labels other than the default can be done only when the dataset is not mounted. When a dataset with the default label is mounted into a labeled-zone, the mount operation automatically sets the \fBmlslabel\fR property to the label of that zone. +.sp +When Trusted Extensions is \fBnot\fR enabled, only datasets with the default label (\fBnone\fR) can be mounted. +.sp +Zones are a Solaris feature and are not relevant on Linux. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBfilesystem_limit\fR=\fIcount\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the number of filesystems and volumes that can exist under this point in +the dataset tree. The limit is not enforced if the user is allowed to change +the limit. Setting a filesystem_limit on a descendent of a filesystem that +already has a filesystem_limit does not override the ancestor's filesystem_limit, +but rather imposes an additional limit. This feature must be enabled to be used +(see \fBzpool-features\fR(5)). +.RE + +.sp +.ne 2 +.na +\fB\fBmountpoint\fR=\fIpath\fR | \fBnone\fR | \fBlegacy\fR\fR +.ad +.sp .6 +.RS 4n +Controls the mount point used for this file system. See the "Mount Points" section for more information on how this property is used. +.sp +When the \fBmountpoint\fR property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is \fBlegacy\fR, then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was previously \fBlegacy\fR or \fBnone\fR, or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBnbmand\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether the file system should be mounted with \fBnbmand\fR (Non Blocking mandatory locks). This is used for \fBCIFS\fR clients. Changes to this property only take effect when the file system is umounted and remounted. See \fBmount\fR(8) for more information on \fBnbmand\fR mounts. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBprimarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR +.ad +.sp .6 +.RS 4n +Controls what is cached in the primary cache (ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBquota\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the amount of space a dataset and its descendents can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendents, including file systems and snapshots. Setting a quota on a descendent of a dataset that already has a quota does not override the ancestor's quota, but rather imposes an additional limit. +.sp +Quotas cannot be set on volumes, as the \fBvolsize\fR property acts as an implicit quota. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsnapshot_limit\fR=\fIcount\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the number of snapshots that can be created on a dataset and its +descendents. Setting a snapshot_limit on a descendent of a dataset that already +has a snapshot_limit does not override the ancestor's snapshot_limit, but +rather imposes an additional limit. The limit is not enforced if the user is +allowed to change the limit. For example, this means that recursive snapshots +taken from the global zone are counted against each delegated dataset within +a zone. This feature must be enabled to be used (see \fBzpool-features\fR(5)). +.RE + +.sp +.ne 2 +.na +\fB\fBuserquota@\fR\fIuser\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the amount of space consumed by the specified user. Similar to the \fBrefquota\fR property, the \fBuserquota\fR space calculation does not include space that is used by descendent datasets, such as snapshots and clones. User space consumption is identified by the \fBuserspace@\fR\fIuser\fR property. +.sp +Enforcement of user quotas may be delayed by several seconds. This delay means that a user might exceed their quota before the system notices that they are over quota and begins to refuse additional writes with the \fBEDQUOT\fR error message . See the \fBzfs userspace\fR subcommand for more information. +.sp +Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota. +.sp +This property is not available on volumes, on file systems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms: +.RS +4 +.TP +.ie t \(bu +.el o +\fIPOSIX name\fR (for example, \fBjoe\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fIPOSIX numeric ID\fR (for example, \fB789\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fISID name\fR (for example, \fBjoe.smith@mydomain\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR) +.RE +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBgroupquota@\fR\fIgroup\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the amount of space consumed by the specified group. Group space consumption is identified by the \fBuserquota@\fR\fIuser\fR property. +.sp +Unprivileged users can access only their own groups' space usage. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBreadonly\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether this dataset can be modified. The default value is \fBoff\fR. +.sp +This property can also be referred to by its shortened column name, \fBrdonly\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBrecordsize\fR=\fIsize\fR\fR +.ad +.sp .6 +.RS 4n +Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical access patterns. +.sp +For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a \fBrecordsize\fR greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general purpose file systems is strongly discouraged, and may adversely affect performance. +.sp +The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes. +.sp +Changing the file system's \fBrecordsize\fR affects only files created afterward; existing files are unaffected. +.sp +This property can also be referred to by its shortened column name, \fBrecsize\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBredundant_metadata\fR=\fBall\fR | \fBmost\fR\fR +.ad +.sp .6 +.RS 4n +Controls what types of metadata are stored redundantly. ZFS stores an +extra copy of metadata, so that if a single block is corrupted, the +amount of user data lost is limited. This extra copy is in addition to +any redundancy provided at the pool level (e.g. by mirroring or RAID-Z), +and is in addition to an extra copy specified by the \fBcopies\fR +property (up to a total of 3 copies). For example if the pool is +mirrored, \fBcopies\fR=2, and \fBredundant_metadata\fR=most, then ZFS +stores 6 copies of most metadata, and 4 copies of data and some +metadata. +.sp +When set to \fBall\fR, ZFS stores an extra copy of all metadata. If a +single on-disk block is corrupt, at worst a single block of user data +(which is \fBrecordsize\fR bytes long) can be lost. +.sp +When set to \fBmost\fR, ZFS stores an extra copy of most types of +metadata. This can improve performance of random writes, because less +metadata must be written. In practice, at worst about 100 blocks (of +\fBrecordsize\fR bytes each) of user data can be lost if a single +on-disk block is corrupt. The exact behavior of which metadata blocks +are stored redundantly may change in future releases. +.sp +The default value is \fBall\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBrefquota\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the amount of space a dataset can consume. This property enforces a hard limit on the amount of space used. This hard limit does not include space used by descendents, including file systems and snapshots. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBrefreservation\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +The minimum amount of space guaranteed to a dataset, not including its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by \fBrefreservation\fR. The \fBrefreservation\fR reservation is accounted for in the parent datasets' space used, and counts against the parent datasets' quotas and reservations. +.sp +If \fBrefreservation\fR is set, a snapshot is only allowed if there is enough free pool space outside of this reservation to accommodate the current number of "referenced" bytes in the dataset. +.sp +This property can also be referred to by its shortened column name, \fBrefreserv\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBrelatime\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls the manner in which the access time is updated when \fBatime=on\fR is set. Turning this property \fBon\fR causes the access time to be updated relative to the modify or change time. Access time is only updated if the previous access time was earlier than the current modify or change time or if the existing access time hasn't been updated within the past 24 hours. The default value is \fBoff\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBreservation\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +The minimum amount of space guaranteed to a dataset and its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space used, and count against the parent datasets' quotas and reservations. +.sp +This property can also be referred to by its shortened column name, \fBreserv\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsecondarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR +.ad +.sp .6 +.RS 4n +Controls what is cached in the secondary cache (L2ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsetuid\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether the set-\fBUID\fR bit is respected for the file system. The default value is \fBon\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsharesmb\fR=\fBon\fR | \fBoff\fR +.ad +.sp .6 +.RS 4n +Controls whether the file system is shared by using \fBSamba USERSHARES\fR, and what options are to be used. Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBnet\fR(8) command is invoked to create a \fBUSERSHARE\fR. +.sp +Because \fBSMB\fR shares requires a resource name, a unique resource name is constructed from the dataset name. The constructed name is a copy of the dataset name except that the characters in the dataset name, which would be illegal in the resource name, are replaced with underscore (\fB_\fR) characters. The ZFS On Linux driver does not (yet) support additional options which might be available in the Solaris version. +.sp +If the \fBsharesmb\fR property is set to \fBoff\fR, the file systems are unshared. +.sp +In Linux, the share is created with the ACL (Access Control List) "Everyone:F" ("F" stands for "full permissions", ie. read and write permissions) and no guest access (which means samba must be able to authenticate a real user, system passwd/shadow, ldap or smbpasswd based) by default. This means that any additional access control (dissalow specific user specific access etc) must be done on the underlaying filesystem. +.sp +.in +2 +Example to mount a SMB filesystem shared through ZFS (share/tmp): +.mk +Note that a user and his/her password \fBmust\fR be given! +.sp +.in +2 +smbmount //127.0.0.1/share_tmp /mnt/tmp -o user=workgroup/turbo,password=obrut,uid=1000 +.in -2 +.in -2 +.sp +.ne 2 +.mk +.na +\fBMinimal /etc/samba/smb.conf configuration\fR +.sp +.in +2 +* Samba will need to listen to 'localhost' (127.0.0.1) for the zfs utilities to communitate with samba. This is the default behavior for most Linux distributions. +.sp +* Samba must be able to authenticate a user. This can be done in a number of ways, depending on if using the system password file, LDAP or the Samba specific smbpasswd file. How to do this is outside the scope of this manual. Please refer to the smb.conf(5) manpage for more information. +.sp +* See the \fBUSERSHARE\fR section of the \fBsmb.conf\fR(5) man page for all configuration options in case you need to modify any options to the share afterwards. Do note that any changes done with the 'net' command will be undone if the share is every unshared (such as at a reboot etc). In the future, ZoL will be able to set specific options directly using sharesmb=