diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 34bfba2581a1..c6810ea919e0 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -248,10 +248,11 @@ get_usage(zfs_help_t idx) case HELP_PROMOTE: return (gettext("\tpromote \n")); case HELP_RECEIVE: - return (gettext("\treceive [-vnFu] \n" - "\treceive [-vnFu] [-o origin=] [-d | -e] " - "\n")); + "\treceive [-vnsFu] [-o origin=] [-d | -e] " + "\n" + "\treceive -A \n")); case HELP_RENAME: return (gettext("\trename [-f] " "\n" @@ -263,7 +264,8 @@ get_usage(zfs_help_t idx) return (gettext("\tsend [-DnPpRvLe] [-[iI] snapshot] " "\n" "\tsend [-Le] [-i snapshot|bookmark] " - "\n")); + "\n" + "\tsend [-nvPe] -t \n")); case HELP_SET: return (gettext("\tset ... " " ...\n")); @@ -3687,6 +3689,7 @@ zfs_do_send(int argc, char **argv) { char *fromname = NULL; char *toname = NULL; + char *resume_token = NULL; char *cp; zfs_handle_t *zhp; sendflags_t flags = { 0 }; @@ -3695,7 +3698,7 @@ zfs_do_send(int argc, char **argv) boolean_t extraverbose = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":i:I:RDpvnPLe")) != -1) { + while ((c = getopt(argc, argv, ":i:I:RDpvnPLet:")) != -1) { switch (c) { case 'i': if (fromname) @@ -3736,6 +3739,9 @@ zfs_do_send(int argc, char **argv) case 'e': flags.embed_data = B_TRUE; break; + case 't': + resume_token = optarg; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -3751,14 +3757,28 @@ zfs_do_send(int argc, char **argv) argc -= optind; argv += optind; - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing snapshot argument\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); + if (resume_token != NULL) { + if (fromname != NULL || flags.replicate || flags.props || + flags.dedup) { + (void) fprintf(stderr, + gettext("invalid flags combined with -t\n")); + usage(B_FALSE); + } + if (argc != 0) { + (void) fprintf(stderr, gettext("no additional " + "arguments are permitted with -t\n")); + usage(B_FALSE); + } + } else { + if (argc < 1) { + (void) fprintf(stderr, + gettext("missing snapshot argument\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } } if (!flags.dryrun && isatty(STDOUT_FILENO)) { @@ -3768,6 +3788,11 @@ zfs_do_send(int argc, char **argv) return (1); } + if (resume_token != NULL) { + return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO, + resume_token)); + } + /* * Special case sending a filesystem, or from a bookmark. */ @@ -3873,8 +3898,6 @@ zfs_do_send(int argc, char **argv) } /* - * zfs receive [-vnFu] [-d | -e] - * * Restore a backup stream from stdin. */ static int @@ -3882,6 +3905,8 @@ zfs_do_receive(int argc, char **argv) { int c, err; recvflags_t flags = { 0 }; + boolean_t abort_resumable = B_FALSE; + nvlist_t *props; nvpair_t *nvp = NULL; @@ -3889,7 +3914,7 @@ zfs_do_receive(int argc, char **argv) nomem(); /* check options */ - while ((c = getopt(argc, argv, ":o:denuvF")) != -1) { + while ((c = getopt(argc, argv, ":o:denuvFsA")) != -1) { switch (c) { case 'o': if (parseprop(props, optarg) != 0) @@ -3911,9 +3936,15 @@ zfs_do_receive(int argc, char **argv) case 'v': flags.verbose = B_TRUE; break; + case 's': + flags.resumable = B_TRUE; + break; case 'F': flags.force = B_TRUE; break; + case 'A': + abort_resumable = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -3946,6 +3977,44 @@ zfs_do_receive(int argc, char **argv) } } + if (abort_resumable) { + if (flags.isprefix || flags.istail || flags.dryrun || + flags.resumable || flags.nomount) { + (void) fprintf(stderr, gettext("invalid option")); + usage(B_FALSE); + } + + char namebuf[ZFS_MAXNAMELEN]; + (void) snprintf(namebuf, sizeof (namebuf), + "%s/%%recv", argv[0]); + + if (zfs_dataset_exists(g_zfs, namebuf, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) { + zfs_handle_t *zhp = zfs_open(g_zfs, + namebuf, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (1); + err = zfs_destroy(zhp, B_FALSE); + } else { + zfs_handle_t *zhp = zfs_open(g_zfs, + argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + usage(B_FALSE); + if (!zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) || + zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + NULL, 0, NULL, NULL, 0, B_TRUE) == -1) { + (void) fprintf(stderr, + gettext("'%s' does not have any " + "resumable receive state to abort\n"), + argv[0]); + return (1); + } + err = zfs_destroy(zhp, B_FALSE); + } + + return (err != 0); + } + if (isatty(STDIN_FILENO)) { (void) fprintf(stderr, gettext("Error: Backup stream can not be read " @@ -3953,7 +4022,6 @@ zfs_do_receive(int argc, char **argv) "You must redirect standard input.\n")); return (1); } - err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL); return (err != 0); @@ -5784,6 +5852,24 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, return (0); } + /* + * If this filesystem is inconsistent and has a receive resume + * token, we can not mount it. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) && + zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + NULL, 0, NULL, NULL, 0, B_TRUE) == 0) { + if (!explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot %s '%s': " + "Contains partially-completed state from " + "\"zfs receive -r\", which can be resumed with " + "\"zfs send -t\"\n"), + cmdname, zfs_get_name(zhp)); + return (1); + } + /* * At this point, we have verified that the mountpoint and/or * shareopts are appropriate for auto management. If the diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index f288d148e574..08d52bb37a83 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -127,7 +127,7 @@ read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) (longlong_t)saved_cksum.zc_word[1], (longlong_t)saved_cksum.zc_word[2], (longlong_t)saved_cksum.zc_word[3]); - exit(1); + return (0); } return (sizeof (*drr)); } @@ -347,8 +347,7 @@ main(int argc, char *argv[]) if (verbose) (void) printf("\n"); - if ((DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == - DMU_COMPOUNDSTREAM) && drr->drr_payloadlen != 0) { + if (drr->drr_payloadlen != 0) { nvlist_t *nv; int sz = drr->drr_payloadlen; diff --git a/include/libzfs.h b/include/libzfs.h index 2a1f2f50d306..c0b6785d1fbe 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -631,6 +631,10 @@ typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags); +extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd, + const char *); +extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, + const char *token); extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, @@ -671,6 +675,12 @@ typedef struct recvflags { /* set "canmount=off" on all modified filesystems */ boolean_t canmountoff; + /* + * Mark the file systems as "resumable" and do not destroy them if the + * receive is interrupted + */ + boolean_t resumable; + /* byteswap flag is used internally; callers need not specify */ boolean_t byteswap; diff --git a/include/libzfs_core.h b/include/libzfs_core.h index bdd6c951ee49..5d3a6fda7dcb 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ #ifndef _LIBZFS_CORE_H @@ -58,7 +58,11 @@ enum lzc_send_flags { }; int lzc_send(const char *, const char *, int, enum lzc_send_flags); +int lzc_send_resume(const char *, const char *, int, + enum lzc_send_flags, uint64_t, uint64_t); int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); +int lzc_receive_resumable(const char *, nvlist_t *, const char *, + boolean_t, int); int lzc_send_space(const char *, const char *, uint64_t *); boolean_t lzc_exists(const char *); diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 75d094f0812e..d700d1d17ed3 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -24,7 +24,7 @@ */ /* * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. */ #ifndef _SYS_DMU_IMPL_H @@ -272,6 +272,8 @@ typedef struct dmu_sendarg { uint64_t dsa_featureflags; uint64_t dsa_last_data_object; uint64_t dsa_last_data_offset; + uint64_t dsa_resume_object; + uint64_t dsa_resume_offset; } dmu_sendarg_t; void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *); diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h index 2442a1f8aab1..871f5625460e 100644 --- a/include/sys/dmu_send.h +++ b/include/sys/dmu_send.h @@ -36,10 +36,13 @@ struct vnode; struct dsl_dataset; struct drr_begin; struct avl_tree; +struct dmu_replay_record; -int dmu_send(const char *tosnap, const char *fromsnap, - boolean_t embedok, boolean_t large_block_ok, - int outfd, struct vnode *vp, offset_t *off); +extern const char *recv_clone_name; + +int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, + boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff, + struct vnode *vp, offset_t *off); int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, uint64_t *sizep); int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg, @@ -50,12 +53,14 @@ int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, typedef struct dmu_recv_cookie { struct dsl_dataset *drc_ds; + struct dmu_replay_record *drc_drr_begin; struct drr_begin *drc_drrb; const char *drc_tofs; const char *drc_tosnap; boolean_t drc_newfs; boolean_t drc_byteswap; boolean_t drc_force; + boolean_t drc_resumable; struct avl_tree *drc_guid_to_ds_map; zio_cksum_t drc_cksum; uint64_t drc_newsnapobj; @@ -63,8 +68,9 @@ typedef struct dmu_recv_cookie { cred_t *drc_cred; } dmu_recv_cookie_t; -int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, - boolean_t force, char *origin, dmu_recv_cookie_t *drc); +int dmu_recv_begin(char *tofs, char *tosnap, + struct dmu_replay_record *drr_begin, + boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc); int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, int cleanup_fd, uint64_t *action_handlep); int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner); diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h index 544b721e4612..c010edd440d9 100644 --- a/include/sys/dmu_traverse.h +++ b/include/sys/dmu_traverse.h @@ -54,6 +54,8 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start, int flags, blkptr_cb_t func, void *arg); +int traverse_dataset_resume(struct dsl_dataset *ds, uint64_t txg_start, + zbookmark_phys_t *resume, int flags, blkptr_cb_t func, void *arg); int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr, uint64_t txg_start, zbookmark_phys_t *resume, int flags, blkptr_cb_t func, void *arg); diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index a596642e3130..997c6b5decb5 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -91,6 +91,18 @@ struct dsl_pool; */ #define DS_FIELD_LARGE_BLOCKS "org.open-zfs:large_blocks" +/* + * These fields are set on datasets that are in the middle of a resumable + * receive, and allow the sender to resume the send if it is interrupted. + */ +#define DS_FIELD_RESUME_FROMGUID "com.delphix:resume_fromguid" +#define DS_FIELD_RESUME_TONAME "com.delphix:resume_toname" +#define DS_FIELD_RESUME_TOGUID "com.delphix:resume_toguid" +#define DS_FIELD_RESUME_OBJECT "com.delphix:resume_object" +#define DS_FIELD_RESUME_OFFSET "com.delphix:resume_offset" +#define DS_FIELD_RESUME_BYTES "com.delphix:resume_bytes" +#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok" + /* * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose * name lookups should be performed case-insensitively. @@ -184,6 +196,14 @@ typedef struct dsl_dataset { kmutex_t ds_sendstream_lock; list_t ds_sendstreams; + /* + * When in the middle of a resumable receive, tracks how much + * progress we have made. + */ + uint64_t ds_resume_object[TXG_SIZE]; + uint64_t ds_resume_offset[TXG_SIZE]; + uint64_t ds_resume_bytes[TXG_SIZE]; + /* Protected by our dsl_dir's dd_lock */ list_t ds_prop_cbs; @@ -235,6 +255,7 @@ int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); +boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, @@ -315,6 +336,8 @@ int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, zprop_source_t source, uint64_t value, dmu_tx_t *tx); void dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx); +boolean_t dsl_dataset_is_zapified(dsl_dataset_t *ds); +boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds); int dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result); void dsl_dataset_deactivate_feature(uint64_t dsobj, diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 536fab785c6d..e6bee0be23e4 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -156,6 +156,7 @@ typedef enum { ZFS_PROP_REDUNDANT_METADATA, ZFS_PROP_OVERLAY, ZFS_PROP_PREV_SNAP, + ZFS_PROP_RECEIVE_RESUME_TOKEN, ZFS_NUM_PROPS } zfs_prop_t; diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index 601a9a70c580..58f68ed02578 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -90,14 +90,15 @@ typedef enum drr_headertype { * Feature flags for zfs send streams (flags in drr_versioninfo) */ -#define DMU_BACKUP_FEATURE_DEDUP (1<<0) -#define DMU_BACKUP_FEATURE_DEDUPPROPS (1<<1) -#define DMU_BACKUP_FEATURE_SA_SPILL (1<<2) +#define DMU_BACKUP_FEATURE_DEDUP (1 << 0) +#define DMU_BACKUP_FEATURE_DEDUPPROPS (1 << 1) +#define DMU_BACKUP_FEATURE_SA_SPILL (1 << 2) /* flags #3 - #15 are reserved for incompatible closed-source implementations */ -#define DMU_BACKUP_FEATURE_EMBED_DATA (1<<16) -#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1<<17) +#define DMU_BACKUP_FEATURE_EMBED_DATA (1 << 16) +#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1 << 17) /* flag #18 is reserved for a Delphix feature */ -#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1<<19) +#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19) +#define DMU_BACKUP_FEATURE_RESUMING (1 << 20) /* * Mask of all supported backup features @@ -105,11 +106,16 @@ typedef enum drr_headertype { #define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \ DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \ DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 | \ + DMU_BACKUP_FEATURE_RESUMING | \ DMU_BACKUP_FEATURE_LARGE_BLOCKS) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) +typedef enum dmu_send_resume_token_version { + ZFS_SEND_RESUME_TOKEN_VERSION = 1 +} dmu_send_resume_token_version_t; + /* * The drr_versioninfo field of the dmu_replay_record has the * following layout: @@ -359,14 +365,14 @@ typedef struct zfs_cmd { uint64_t zc_iflags; /* internal to zfs(7fs) */ zfs_share_t zc_share; dmu_objset_stats_t zc_objset_stats; - struct drr_begin zc_begin_record; + dmu_replay_record_t zc_begin_record; zinject_record_t zc_inject_record; uint32_t zc_defer_destroy; uint32_t zc_flags; uint64_t zc_action_handle; int zc_cleanup_fd; uint8_t zc_simple; - uint8_t zc_pad[3]; /* alignment */ + boolean_t zc_resumable; uint64_t zc_sendobj; uint64_t zc_fromobj; uint64_t zc_createtxg; diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 2a85b31c902e..bbe7e2e12a7c 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -1853,22 +1853,21 @@ getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source) return (value); } -static char * +static const char * getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) { nvlist_t *nv; - char *value; + const char *value; *source = NULL; if (nvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(prop), &nv) == 0) { - verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0); + value = fnvlist_lookup_string(nv, ZPROP_VALUE); (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); } else { verify(!zhp->zfs_props_table || zhp->zfs_props_table[prop] == B_TRUE); - if ((value = (char *)zfs_prop_default_string(prop)) == NULL) - value = ""; + value = zfs_prop_default_string(prop); *source = ""; } @@ -2290,7 +2289,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, { char *source = NULL; uint64_t val; - char *str; + const char *str; const char *strval; boolean_t received = zfs_is_recvd_props_mode(zhp); @@ -2396,14 +2395,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, break; case ZFS_PROP_ORIGIN: - (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), - proplen); - /* - * If there is no parent at all, return failure to indicate that - * it doesn't apply to this dataset. - */ - if (propbuf[0] == '\0') + str = getprop_string(zhp, prop, &source); + if (str == NULL) return (-1); + (void) strlcpy(propbuf, str, proplen); break; case ZFS_PROP_CLONES: @@ -2585,8 +2580,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, break; case PROP_TYPE_STRING: - (void) strlcpy(propbuf, - getprop_string(zhp, prop, &source), proplen); + str = getprop_string(zhp, prop, &source); + if (str == NULL) + return (-1); + (void) strlcpy(propbuf, str, proplen); break; case PROP_TYPE_INDEX: diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 41ab86131437..558b0df60b0b 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -1041,6 +1041,17 @@ mount_cb(zfs_handle_t *zhp, void *data) return (0); } + /* + * If this filesystem is inconsistent and has a receive resume + * token, we can not mount it. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) && + zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + NULL, 0, NULL, NULL, 0, B_TRUE) == 0) { + zfs_close(zhp); + return (0); + } + libzfs_add_handle(cbp, zhp); if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) { zfs_close(zhp); diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 2adcb0c0f532..2082f1e86b27 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . * All rights reserved @@ -56,6 +56,7 @@ #include "zfs_prop.h" #include "zfs_fletcher.h" #include "libzfs_impl.h" +#include #include #include #include @@ -66,6 +67,8 @@ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *, recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *); +static int guid_to_name(libzfs_handle_t *, const char *, + uint64_t, boolean_t, char *); static const zio_cksum_t zero_cksum = { { 0 } }; @@ -283,8 +286,7 @@ cksummer(void *arg) DMU_BACKUP_FEATURE_DEDUPPROPS); DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); - if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == - DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { + if (drr->drr_payloadlen != 0) { sz = drr->drr_payloadlen; if (sz > SPA_MAXBLOCKSIZE) { @@ -1013,17 +1015,14 @@ static void * send_progress_thread(void *arg) { progress_arg_t *pa = arg; - zfs_cmd_t zc = {"\0"}; zfs_handle_t *zhp = pa->pa_zhp; libzfs_handle_t *hdl = zhp->zfs_hdl; unsigned long long bytes; char buf[16]; - time_t t; struct tm *tm; - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (!pa->pa_parsable) @@ -1056,6 +1055,51 @@ send_progress_thread(void *arg) } } +static void +send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap, + uint64_t size, boolean_t parsable) +{ + if (parsable) { + if (fromsnap != NULL) { + (void) fprintf(fout, "incremental\t%s\t%s", + fromsnap, tosnap); + } else { + (void) fprintf(fout, "full\t%s", + tosnap); + } + } else { + if (fromsnap != NULL) { + if (strchr(fromsnap, '@') == NULL && + strchr(fromsnap, '#') == NULL) { + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + "send from @%s to %s"), + fromsnap, tosnap); + } else { + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + "send from %s to %s"), + fromsnap, tosnap); + } + } else { + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + "full send of %s"), + tosnap); + } + } + + if (size != 0) { + if (parsable) { + (void) fprintf(fout, "\t%llu", + (longlong_t)size); + } else { + char buf[16]; + zfs_nicenum(size, buf, sizeof (buf)); + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + " estimated size is %s"), buf); + } + } + (void) fprintf(fout, "\n"); +} + static int dump_snapshot(zfs_handle_t *zhp, void *arg) { @@ -1135,37 +1179,14 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) (sdd->fromorigin || sdd->replicate); if (sdd->verbose) { - uint64_t size; - err = estimate_ioctl(zhp, sdd->prevsnap_obj, + uint64_t size = 0; + (void) estimate_ioctl(zhp, sdd->prevsnap_obj, fromorigin, &size); - if (sdd->parsable) { - if (sdd->prevsnap[0] != '\0') { - (void) fprintf(fout, "incremental\t%s\t%s", - sdd->prevsnap, zhp->zfs_name); - } else { - (void) fprintf(fout, "full\t%s", - zhp->zfs_name); - } - } else { - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - "send from @%s to %s"), - sdd->prevsnap, zhp->zfs_name); - } - if (err == 0) { - if (sdd->parsable) { - (void) fprintf(fout, "\t%llu\n", - (longlong_t)size); - } else { - char buf[16]; - zfs_nicenum(size, buf, sizeof (buf)); - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - " estimated size is %s\n"), buf); - } - sdd->size += size; - } else { - (void) fprintf(fout, "\n"); - } + send_print_verbose(fout, zhp->zfs_name, + sdd->prevsnap[0] ? sdd->prevsnap : NULL, + size, sdd->parsable); + sdd->size += size; } if (!sdd->dryrun) { @@ -1376,6 +1397,231 @@ dump_filesystems(zfs_handle_t *rzhp, void *arg) return (0); } +nvlist_t * +zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token) +{ + unsigned int version; + int nread, i; + unsigned long long checksum, packed_len; + + /* + * Decode token header, which is: + * -- + * Note that the only supported token version is 1. + */ + nread = sscanf(token, "%u-%llx-%llx-", + &version, &checksum, &packed_len); + if (nread != 3) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (invalid format)")); + return (NULL); + } + + if (version != ZFS_SEND_RESUME_TOKEN_VERSION) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (invalid version %u)"), + version); + return (NULL); + } + + /* convert hexadecimal representation to binary */ + token = strrchr(token, '-') + 1; + int len = strlen(token) / 2; + unsigned char *compressed = zfs_alloc(hdl, len); + for (i = 0; i < len; i++) { + nread = sscanf(token + i * 2, "%2hhx", compressed + i); + if (nread != 1) { + free(compressed); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt " + "(payload is not hex-encoded)")); + return (NULL); + } + } + + /* verify checksum */ + zio_cksum_t cksum; + fletcher_4_native(compressed, len, &cksum); + if (cksum.zc_word[0] != checksum) { + free(compressed); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (incorrect checksum)")); + return (NULL); + } + + /* uncompress */ + void *packed = zfs_alloc(hdl, packed_len); + uLongf packed_len_long = packed_len; + if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK || + packed_len_long != packed_len) { + free(packed); + free(compressed); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (decompression failed)")); + return (NULL); + } + + /* unpack nvlist */ + nvlist_t *nv; + int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP); + free(packed); + free(compressed); + if (error != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (nvlist_unpack failed)")); + return (NULL); + } + return (nv); +} + +int +zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, + const char *resume_token) +{ + char errbuf[1024]; + char *toname; + char *fromname = NULL; + uint64_t resumeobj, resumeoff, toguid, fromguid, bytes; + zfs_handle_t *zhp; + int error = 0; + char name[ZFS_MAXNAMELEN]; + enum lzc_send_flags lzc_flags = 0; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot resume send")); + + nvlist_t *resume_nvl = + zfs_send_resume_token_to_nvlist(hdl, resume_token); + if (resume_nvl == NULL) { + /* + * zfs_error_aux has already been set by + * zfs_send_resume_token_to_nvlist + */ + return (zfs_error(hdl, EZFS_FAULT, errbuf)); + } + if (flags->verbose) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "resume token contents:\n")); + nvlist_print(stderr, resume_nvl); + } + + if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 || + nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 || + nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 || + nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 || + nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt")); + return (zfs_error(hdl, EZFS_FAULT, errbuf)); + } + fromguid = 0; + (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid); + + if (flags->embed_data || nvlist_exists(resume_nvl, "embedok")) + lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; + + if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) { + if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is no longer the same snapshot used in " + "the initial send"), toname); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' used in the initial send no longer exists"), + toname); + } + return (zfs_error(hdl, EZFS_BADPATH, errbuf)); + } + zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); + if (zhp == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "unable to access '%s'"), name); + return (zfs_error(hdl, EZFS_BADPATH, errbuf)); + } + + if (fromguid != 0) { + if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source %#llx no longer exists"), + (longlong_t)fromguid); + return (zfs_error(hdl, EZFS_BADPATH, errbuf)); + } + fromname = name; + } + + if (flags->verbose) { + uint64_t size = 0; + error = lzc_send_space(zhp->zfs_name, fromname, &size); + if (error == 0) + size = MAX(0, (int64_t)(size - bytes)); + send_print_verbose(stderr, zhp->zfs_name, fromname, + size, flags->parsable); + } + + if (!flags->dryrun) { + progress_arg_t pa = { 0 }; + pthread_t tid; + /* + * If progress reporting is requested, spawn a new thread to + * poll ZFS_IOC_SEND_PROGRESS at a regular interval. + */ + if (flags->progress) { + pa.pa_zhp = zhp; + pa.pa_fd = outfd; + pa.pa_parsable = flags->parsable; + + error = pthread_create(&tid, NULL, + send_progress_thread, &pa); + if (error != 0) { + zfs_close(zhp); + return (error); + } + } + + error = lzc_send_resume(zhp->zfs_name, fromname, outfd, + lzc_flags, resumeobj, resumeoff); + + if (flags->progress) { + (void) pthread_cancel(tid); + (void) pthread_join(tid, NULL); + } + + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot send '%s'"), zhp->zfs_name); + + zfs_close(zhp); + + switch (error) { + case 0: + return (0); + case EXDEV: + case ENOENT: + case EDQUOT: + case EFBIG: + case EIO: + case ENOLINK: + case ENOSPC: + case ENOSTR: + case ENXIO: + case EPIPE: + case ERANGE: + case EFAULT: + case EROFS: + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); + + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + + + zfs_close(zhp); + + return (error); +} + /* * Generate a send stream for the dataset identified by the argument zhp. * @@ -1913,6 +2159,7 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, typedef struct guid_to_name_data { uint64_t guid; + boolean_t bookmark_ok; char *name; char *skip; } guid_to_name_data_t; @@ -1921,20 +2168,25 @@ static int guid_to_name_cb(zfs_handle_t *zhp, void *arg) { guid_to_name_data_t *gtnd = arg; + const char *slash; int err; if (gtnd->skip != NULL && - strcmp(zhp->zfs_name, gtnd->skip) == 0) { + (slash = strrchr(zhp->zfs_name, '/')) != NULL && + strcmp(slash + 1, gtnd->skip) == 0) { + zfs_close(zhp); return (0); } - if (zhp->zfs_dmustats.dds_guid == gtnd->guid) { + if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) { (void) strcpy(gtnd->name, zhp->zfs_name); zfs_close(zhp); return (EEXIST); } err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); + if (err != EEXIST && gtnd->bookmark_ok) + err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd); zfs_close(zhp); return (err); } @@ -1948,45 +2200,48 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg) */ static int guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, - char *name) + boolean_t bookmark_ok, char *name) { - /* exhaustive search all local snapshots */ char pname[ZFS_MAXNAMELEN]; guid_to_name_data_t gtnd; - int err = 0; - zfs_handle_t *zhp; - char *cp; gtnd.guid = guid; + gtnd.bookmark_ok = bookmark_ok; gtnd.name = name; gtnd.skip = NULL; - (void) strlcpy(pname, parent, sizeof (pname)); - /* - * Search progressively larger portions of the hierarchy. This will + * Search progressively larger portions of the hierarchy, starting + * with the filesystem specified by 'parent'. This will * select the "most local" version of the origin snapshot in the case * that there are multiple matching snapshots in the system. */ - while ((cp = strrchr(pname, '/')) != NULL) { - + (void) strlcpy(pname, parent, sizeof (pname)); + char *cp = strrchr(pname, '@'); + if (cp == NULL) + cp = strchr(pname, '\0'); + for (; cp != NULL; cp = strrchr(pname, '/')) { /* Chop off the last component and open the parent */ *cp = '\0'; - zhp = make_dataset_handle(hdl, pname); + zfs_handle_t *zhp = make_dataset_handle(hdl, pname); if (zhp == NULL) continue; - - err = zfs_iter_children(zhp, guid_to_name_cb, >nd); + int err = guid_to_name_cb(zfs_handle_dup(zhp), >nd); + if (err != EEXIST) + err = zfs_iter_children(zhp, guid_to_name_cb, >nd); + if (err != EEXIST && bookmark_ok) + err = zfs_iter_bookmarks(zhp, guid_to_name_cb, >nd); zfs_close(zhp); if (err == EEXIST) return (0); /* - * Remember the dataset that we already searched, so we - * skip it next time through. + * Remember the last portion of the dataset so we skip it next + * time through (as we've already searched that portion of the + * hierarchy). */ - gtnd.skip = pname; + gtnd.skip = strrchr(pname, '/') + 1; } return (ENOENT); @@ -2587,11 +2842,9 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) switch (drr->drr_type) { case DRR_BEGIN: - /* NB: not to be used on v2 stream packages */ if (drr->drr_payloadlen != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid substream header")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + (void) recv_read(hdl, fd, buf, + drr->drr_payloadlen, B_FALSE, NULL); } break; @@ -2652,6 +2905,40 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) return (-1); } +static void +recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap, + boolean_t resumable) +{ + char target_fs[ZFS_MAXNAMELEN]; + + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "checksum mismatch or incomplete stream")); + + if (!resumable) + return; + (void) strlcpy(target_fs, target_snap, sizeof (target_fs)); + *strchr(target_fs, '@') = '\0'; + zfs_handle_t *zhp = zfs_open(hdl, target_fs, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return; + + char token_buf[ZFS_MAXPROPLEN]; + int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + token_buf, sizeof (token_buf), + NULL, NULL, 0, B_TRUE); + if (error == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "checksum mismatch or incomplete stream.\n" + "Partially received snapshot is saved.\n" + "A resuming stream can be generated on the sending " + "system by running:\n" + " zfs send -t %s"), + token_buf); + } + zfs_close(zhp); +} + /* * Restores a backup of tosnap from the file descriptor specified by infd. */ @@ -2800,7 +3087,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, */ if (drrb->drr_flags & DRR_FLAG_CLONE) { if (guid_to_name(hdl, zc.zc_value, - drrb->drr_fromguid, zc.zc_string) != 0) { + drrb->drr_fromguid, B_FALSE, zc.zc_string) != 0) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "local origin for clone %s does not exist"), @@ -2816,8 +3103,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zc.zc_string); } + boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_RESUMING; stream_wantsnewfs = (drrb->drr_fromguid == 0 || - (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap); + (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming; if (stream_wantsnewfs) { /* @@ -2836,7 +3125,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, char suffix[ZFS_MAXNAMELEN]; (void) strcpy(suffix, strrchr(zc.zc_value, '/')); if (guid_to_name(hdl, zc.zc_name, parent_snapguid, - zc.zc_value) == 0) { + B_FALSE, zc.zc_value) == 0) { *strchr(zc.zc_value, '@') = '\0'; (void) strcat(zc.zc_value, suffix); } @@ -2863,7 +3152,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, char snap[ZFS_MAXNAMELEN]; (void) strcpy(snap, strchr(zc.zc_value, '@')); if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid, - zc.zc_value) == 0) { + B_FALSE, zc.zc_value) == 0) { *strchr(zc.zc_value, '@') = '\0'; (void) strcat(zc.zc_value, snap); } @@ -2877,11 +3166,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zfs_handle_t *zhp; /* - * Destination fs exists. Therefore this should either - * be an incremental, or the stream specifies a new fs - * (full stream or clone) and they want us to blow it - * away (and have therefore specified -F and removed any - * snapshots). + * Destination fs exists. It must be one of these cases: + * - an incremental send stream + * - the stream specifies a new fs (full stream or clone) + * and they want us to blow away the existing fs (and + * have therefore specified -F and removed any snapshots) + * - we are resuming a failed receive. */ if (stream_wantsnewfs) { if (!flags->force) { @@ -2936,6 +3226,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (-1); } } + + /* + * If we are resuming a newfs, set newfs here so that we will + * mount it if the recv succeeds this time. We can tell + * that it was a newfs on the first recv because the fs + * itself will be inconsistent (if the fs existed when we + * did the first recv, we would have received it into + * .../%recv). + */ + if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT)) + newfs = B_TRUE; + zfs_close(zhp); } else { /* @@ -2968,9 +3270,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, newfs = B_TRUE; } - zc.zc_begin_record = drr_noswap->drr_u.drr_begin; + zc.zc_begin_record = *drr_noswap; zc.zc_cookie = infd; zc.zc_guid = flags->force; + zc.zc_resumable = flags->resumable; if (flags->verbose) { (void) printf("%s %s stream of %s into %s\n", flags->dryrun ? "would receive" : "receiving", @@ -3107,8 +3410,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid stream (checksum mismatch)")); + recv_ecksum_set_aux(hdl, zc.zc_value, flags->resumable); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ENOTSUP: @@ -3310,7 +3612,8 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, * Restores a backup of tosnap from the file descriptor specified by infd. * Return 0 on total success, -2 if some things couldn't be * destroyed/renamed/promoted, -1 if some things couldn't be received. - * (-1 will override -2). + * (-1 will override -2, if -1 and the resumable flag was specified the + * transfer can be resumed if the sending side supports it). */ int zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props, diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index b706e6f6be88..220792300b8c 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -467,6 +467,13 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp) int lzc_send(const char *snapname, const char *from, int fd, enum lzc_send_flags flags) +{ + return (lzc_send_resume(snapname, from, fd, flags, 0, 0)); +} + +int +lzc_send_resume(const char *snapname, const char *from, int fd, + enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) { nvlist_t *args; int err; @@ -479,6 +486,10 @@ lzc_send(const char *snapname, const char *from, int fd, fnvlist_add_boolean(args, "largeblockok"); if (flags & LZC_SEND_FLAG_EMBED_DATA) fnvlist_add_boolean(args, "embedok"); + if (resumeobj != 0 || resumeoff != 0) { + fnvlist_add_uint64(args, "resume_object", resumeobj); + fnvlist_add_uint64(args, "resume_offset", resumeoff); + } err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); nvlist_free(args); return (err); @@ -536,22 +547,9 @@ recv_read(int fd, void *buf, int ilen) return (0); } -/* - * The simplest receive case: receive from the specified fd, creating the - * specified snapshot. Apply the specified properties a "received" properties - * (which can be overridden by locally-set properties). If the stream is a - * clone, its origin snapshot must be specified by 'origin'. The 'force' - * flag will cause the target filesystem to be rolled back or destroyed if - * necessary to receive. - * - * Return 0 on success or an errno on failure. - * - * Note: this interface does not work on dedup'd streams - * (those with DMU_BACKUP_FEATURE_DEDUP). - */ -int -lzc_receive(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) +static int +lzc_receive_impl(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, boolean_t resumable, int fd) { /* * The receive ioctl is still legacy, so we need to construct our own @@ -561,7 +559,6 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, char *atp; char *packed = NULL; size_t size; - dmu_replay_record_t drr; int error; ASSERT3S(g_refcount, >, 0); @@ -597,10 +594,9 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); /* zc_begin_record is non-byteswapped BEGIN record */ - error = recv_read(fd, &drr, sizeof (drr)); + error = recv_read(fd, &zc.zc_begin_record, sizeof (zc.zc_begin_record)); if (error != 0) goto out; - zc.zc_begin_record = drr.drr_u.drr_begin; /* zc_cookie is fd to read from */ zc.zc_cookie = fd; @@ -608,6 +604,8 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, /* zc guid is force flag */ zc.zc_guid = force; + zc.zc_resumable = resumable; + /* zc_cleanup_fd is unused */ zc.zc_cleanup_fd = -1; @@ -622,6 +620,39 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, return (error); } +/* + * The simplest receive case: receive from the specified fd, creating the + * specified snapshot. Apply the specified properties as "received" properties + * (which can be overridden by locally-set properties). If the stream is a + * clone, its origin snapshot must be specified by 'origin'. The 'force' + * flag will cause the target filesystem to be rolled back or destroyed if + * necessary to receive. + * + * Return 0 on success or an errno on failure. + * + * Note: this interface does not work on dedup'd streams + * (those with DMU_BACKUP_FEATURE_DEDUP). + */ +int +lzc_receive(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd) +{ + return (lzc_receive_impl(snapname, props, origin, force, B_FALSE, fd)); +} + +/* + * Like lzc_receive, but if the receive fails due to premature stream + * termination, the intermediate state will be preserved on disk. In this + * case, ECKSUM will be returned. The receive may subsequently be resumed + * with a resuming send stream generated by lzc_send_resume(). + */ +int +lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd) +{ + return (lzc_receive_impl(snapname, props, origin, force, B_TRUE, fd)); +} + /* * Roll back this filesystem or volume to its most recent snapshot. * If snapnamebuf is not NULL, it will be filled in with the name diff --git a/man/man8/zfs.8.orig b/man/man8/zfs.8.orig new file mode 100644 index 000000000000..3290ababec5b --- /dev/null +++ b/man/man8/zfs.8.orig @@ -0,0 +1,3913 @@ +'\" t +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" +.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2011 Joshua M. Clulow +.\" Copyright (c) 2011, 2014 by Delphix. All rights reserved. +.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. +.\" Copyright 2012 Nexenta Systems, Inc. All Rights Reserved. +.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. +.\" +.TH zfs 8 "Nov 19, 2013" "ZFS pool 28, filesystem 5" "System Administration Commands" +.SH NAME +zfs \- configures ZFS file systems +.SH SYNOPSIS +.LP +.nf +\fBzfs\fR [\fB-?\fR] +.fi + +.LP +.nf +\fBzfs\fR \fBcreate\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBcreate\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fB-V\fR \fIsize\fR \fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBdestroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBdestroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,...] +.fi + +.LP +.nf +\fBzfs\fR \fBdestroy\fR \fIfilesystem\fR|\fIvolume\fR#\fIbookmark\fR +.fi + +.LP +.nf +\fBzfs\fR \fBsnapshot | snap\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... + \fIfilesystem@snapname\fR|\fIvolume@snapname\fR ... +.fi + +.LP +.nf +\fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBclone\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBpromote\fR \fIclone-filesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBrename\fR [\fB-f\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR + \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBrename\fR [\fB-fp\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBrename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIproperty\fR[,\fIproperty\fR]...] [\fB-t\fR \fItype\fR[,\fItype\fR]..] + [\fB-s\fR \fIproperty\fR] ... [\fB-S\fR \fIproperty\fR] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR] ... +.fi + +.LP +.nf ++\fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR... \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR... +.fi + +.LP +.nf +\fBzfs\fR \fBget\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIfield\fR[,...]] [\fB-t\fR \fItype\fR[,...]] + [\fB-s\fR \fIsource\fR[,...]] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ... +.fi + +.LP +.nf +\fBzfs\fR \fBinherit\fR [\fB-rS\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume|snapshot\fR ... +.fi + +.LP +.nf +\fBzfs\fR \fBupgrade\fR [\fB-v\fR] +.fi + +.LP +.nf +\fBzfs\fR \fBupgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBuserspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIfield\fR] ... + [\fB-S\fR \fIfield\fR] ... [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBgroupspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIfield\fR] ... + [\fB-S\fR \fIfield\fR] ... [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBmount\fR +.fi + +.LP +.nf +\fBzfs\fR \fBmount\fR [\fB-vO\fR] [\fB-o \fIoptions\fR\fR] \fB-a\fR | \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunmount | umount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR +.fi + +.LP +.nf +\fBzfs\fR \fBshare\fR \fB-a\fR | \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunshare\fR \fB-a\fR \fIfilesystem\fR|\fImountpoint\fR +.fi + +.LP +.nf +\fBzfs\fR \fBbookmark\fR \fIsnapshot\fR \fIbookmark\fR +.fi + +.LP +.nf +\fBzfs\fR \fBsend\fR [\fB-DnPpRveL\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBsend\fR [\fB-eL\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|\fI@setname\fR[,...] + \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBallow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunallow\fR [\fB-rldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] [\fIperm\fR|@\fIsetname\fR[,... ]] + \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunallow\fR [\fB-rld\fR] \fB-e\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[ ... ]] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR +.fi + +.LP +.nf +\fBzfs\fR \fBhold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR... +.fi + +.LP +.nf +\fBzfs\fR \fBholds\fR [\fB-r\fR] \fIsnapshot\fR... +.fi + +.LP +.nf +\fBzfs\fR \fBrelease\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR... +.fi + +.LP +.nf +\fBzfs\fR \fBdiff\fR [\fB-FHt\fR] \fIsnapshot\fR \fIsnapshot|filesystem\fR + +.SH DESCRIPTION +.LP +The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage pool, as described in \fBzpool\fR(8). A dataset is identified by a unique path within the \fBZFS\fR namespace. For example: +.sp +.in +2 +.nf +pool/{filesystem,volume,snapshot} +.fi +.in -2 +.sp + +.sp +.LP +where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes). +.sp +.LP +A dataset can be one of the following: +.sp +.ne 2 +.mk +.na +\fB\fIfile system\fR\fR +.ad +.sp .6 +.RS 4n +A \fBZFS\fR dataset of type \fBfilesystem\fR can be mounted within the standard system namespace and behaves like other file systems. While \fBZFS\fR file systems are designed to be \fBPOSIX\fR compliant, known issues exist that prevent compliance in some cases. Applications that depend on standards conformance might fail due to nonstandard behavior when checking file system free space. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fIvolume\fR\fR +.ad +.sp .6 +.RS 4n +A logical volume exported as a raw or block device. This type of dataset should only be used under special circumstances. File systems are typically used in most environments. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fIsnapshot\fR\fR +.ad +.sp .6 +.RS 4n +A read-only version of a file system or volume at a given point in time. It is specified as \fIfilesystem@name\fR or \fIvolume@name\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fIbookmark\fR\fR +.ad +.sp .6 +.RS 4n +Much like a \fIsnapshot\fR, but without the hold on on-disk data. It can be used as the source of a send (but not for a receive). +It is specified as \fIfilesystem#name\fR or \fIvolume#name\fR. +.RE + +.SS "ZFS File System Hierarchy" +.LP +A \fBZFS\fR storage pool is a logical collection of devices that provide space for datasets. A storage pool is also the root of the \fBZFS\fR file system hierarchy. +.sp +.LP +The root of the pool can be accessed as a file system, such as mounting and unmounting, taking snapshots, and setting properties. The physical storage characteristics, however, are managed by the \fBzpool\fR(8) command. +.sp +.LP +See \fBzpool\fR(8) for more information on creating and administering pools. +.SS "Snapshots" +.LP +A snapshot is a read-only copy of a file system or volume. Snapshots can be created extremely quickly, and initially consume no additional space within the pool. As data within the active dataset changes, the snapshot consumes more data than would otherwise be shared with the active dataset. +.sp +.LP +Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back. Visibility is determined by the \fBsnapdev\fR property of the parent volume. +.sp +.LP +File system snapshots can be accessed under the \fB\&.zfs/snapshot\fR directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the \fB\&.zfs\fR directory can be controlled by the \fBsnapdir\fR property. +.SS "Bookmarks" +.LP +A bookmark is like a snapshot, a read-only copy of a file system or volume. Bookmarks can be created extremely quickly, compared to snapshots, and they consume no additional space within the pool. Bookmarks can also have arbitrary names, much like snapshots. +.sp +.LP +Unlike snapshots, bookmarks can not be accessed through the filesystem in any way. From a storage standpoint a bookmark just provides a way to reference when a snapshot was created as a distinct object. Bookmarks are initially tied to a snapshot, not the filesystem/volume, and they will survive if the snapshot itself is destroyed. Since they are very light weight there's little incentive to destroy them. +.SS "Clones" +.LP +A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space. +.sp +.LP +Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The \fBorigin\fR property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist. +.sp +.LP +The clone parent-child dependency relationship can be reversed by using the \fBpromote\fR subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone was created from. +.SS "Mount Points" +.LP +Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system is likely to be numerous. To cope with this, \fBZFS\fR automatically manages mounting and unmounting file systems without the need to edit the \fB/etc/fstab\fR file. All automatically managed file systems are mounted by \fBZFS\fR at boot time. +.sp +.LP +By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR is the name of the file system in the \fBZFS\fR namespace. Directories are created and destroyed as needed. +.sp +.LP +A file system can also have a mount point set in the \fBmountpoint\fR property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the \fBzfs mount -a\fR command is invoked (without editing \fB/etc/fstab\fR). The \fBmountpoint\fR property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR. +.sp +.LP +A file system \fBmountpoint\fR property of \fBnone\fR prevents the file system from being mounted. +.sp +.LP +If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/fstab\fR). If a file system's mount point is set to \fBlegacy\fR, \fBZFS\fR makes no attempt to manage the file system, and the administrator is responsible for mounting and unmounting the file system. +.SS "Deduplication" +.LP +Deduplication is the process for removing redundant data at the block-level, reducing the total amount of data stored. If a file system has the \fBdedup\fR property enabled, duplicate data blocks are removed synchronously. The result is that only unique data is stored and common components are shared among files. +.sp +\fBWARNING: DO NOT ENABLE DEDUPLICATION UNLESS YOU NEED IT AND KNOW EXACTLY WHAT YOU ARE DOING!\fR +.sp +Deduplicating data is a very resource-intensive operation. It is generally recommended that you have \fIat least\fR 1.25 GB of RAM per 1 TB of storage when you enable deduplication. But calculating the exact requirements is a somewhat complicated affair. Please see the \fBOracle Dedup Guide\fR for more information.. +.sp +Enabling deduplication on an improperly-designed system will result in extreme performance issues (extremely slow filesystem and snapshot deletions etc.) and can potentially lead to data loss (i.e. unimportable pool due to memory exhaustion) if your system is not built for this purpose. Deduplication affects the processing power (CPU), disks (and the controller) as well as primary (real) memory. +.sp +Before creating a pool with deduplication enabled, ensure that you have planned your hardware requirements appropriately and implemented appropriate recovery practices, such as regular backups. +.sp +Unless necessary, deduplication should NOT be enabled on a system. Instead, consider using \fIcompression=lz4\fR, as a less resource-intensive alternative. +.SS "Native Properties" +.LP +Properties are divided into two types, native properties and user-defined (or "user") properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below. +.sp +.LP +Every dataset has a set of properties that export statistics about the dataset as well as control various behaviors. Properties are inherited from the parent unless overridden by the child. Some properties apply only to certain types of datasets (file systems, volumes, or snapshots). +.sp +.LP +The values of numeric properties can be specified using human-readable suffixes (for example, \fBk\fR, \fBKB\fR, \fBM\fR, \fBGb\fR, and so forth, up to \fBZ\fR for zettabyte). The following are all valid (and equal) specifications: +.sp +.in +2 +.nf +1536M, 1.5g, 1.50GB +.fi +.in -2 +.sp + +.sp +.LP +The values of non-numeric properties are case sensitive and must be lowercase, except for \fBmountpoint\fR, \fBsharenfs\fR, and \fBsharesmb\fR. +.sp +.LP +The following native properties consist of read-only statistics about the dataset. These properties can be neither set, nor inherited. Native properties apply to all dataset types unless otherwise noted. +.sp +.ne 2 +.mk +.na +\fB\fBavailable\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets within the pool. +.sp +This property can also be referred to by its shortened column name, \fBavail\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcompressratio\fR\fR +.ad +.sp .6 +.RS 4n +For non-snapshots, the compression ratio achieved for the \fBused\fR space of this dataset, expressed as a multiplier. The \fBused\fR property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot. For snapshots, the \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcreation\fR\fR +.ad +.sp .6 +.RS 4n +The time this dataset was created. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBclones\fR\fR +.ad +.sp .6 +.RS 4n +For snapshots, this property is a comma-separated list of filesystems or +volumes which are clones of this snapshot. The clones' \fBorigin\fR property +is this snapshot. If the \fBclones\fR property is not empty, then this +snapshot can not be destroyed (even with the \fB-r\fR or \fB-f\fR options). +.RE + +.sp +.ne 2 +.na +\fB\fBdefer_destroy\fR\fR +.ad +.sp .6 +.RS 4n +This property is \fBon\fR if the snapshot has been marked for deferred destruction by using the \fBzfs destroy\fR \fB-d\fR command. Otherwise, the property is \fBoff\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBfilesystem_count\fR +.ad +.sp .6 +.RS 4n +The total number of filesystems and volumes that exist under this location in the +dataset tree. This value is only available when a \fBfilesystem_limit\fR has +been set somewhere in the tree under which the dataset resides. +.RE + +.sp +.ne 2 +.na +\fB\fBlogicalreferenced\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space that is "logically" accessible by this dataset. See +the \fBreferenced\fR property. The logical space ignores the effect of +the \fBcompression\fR and \fBcopies\fR properties, giving a quantity +closer to the amount of data that applications see. However, it does +include space consumed by metadata. +.sp +This property can also be referred to by its shortened column name, +\fBlrefer\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBlogicalused\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space that is "logically" consumed by this dataset and all +its descendents. See the \fBused\fR property. The logical space +ignores the effect of the \fBcompression\fR and \fBcopies\fR properties, +giving a quantity closer to the amount of data that applications see. +However, it does include space consumed by metadata. +.sp +This property can also be referred to by its shortened column name, +\fBlused\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBmounted\fR\fR +.ad +.sp .6 +.RS 4n +For file systems, indicates whether the file system is currently mounted. This property can be either \fByes\fR or \fBno\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBorigin\fR\fR +.ad +.sp .6 +.RS 4n +For cloned file systems or volumes, the snapshot from which the clone was created. See also the \fBclones\fR property. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBreferenced\fR\fR +.ad +.sp .6 +.RS 4n +The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are identical. +.sp +This property can also be referred to by its shortened column name, \fBrefer\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBrefcompressratio\fR\fR +.ad +.sp .6 +.RS 4n +The compression ratio achieved for the \fBreferenced\fR space of this +dataset, expressed as a multiplier. See also the \fBcompressratio\fR +property. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsnapshot_count\fR +.ad +.sp .6 +.RS 4n +The total number of snapshots that exist under this location in the dataset tree. +This value is only available when a \fBsnapshot_limit\fR has been set somewhere +in the tree under which the dataset resides. +.RE + +.sp +.ne 2 +.na +\fB\fBtype\fR\fR +.ad +.sp .6 +.RS 4n +The type of dataset: \fBfilesystem\fR, \fBvolume\fR, or \fBsnapshot\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBused\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space consumed by this dataset and all its descendents. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendent datasets. The amount of space that a dataset consumes from its parent, as well as the amount of space that are freed if this dataset is recursively destroyed, is the greater of its space used and its reservation. +.sp +When snapshots (see the "Snapshots" section) are created, their space is initially shared between the snapshot and the file system, and possibly with previous snapshots. As the file system changes, space that was previously shared becomes unique to the snapshot, and counted in the snapshot's space used. Additionally, deleting snapshots can increase the amount of space unique to (and used by) other snapshots. +.sp +The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using \fBfsync\fR(2) or \fBO_SYNC\fR does not necessarily guarantee that the space usage information is updated immediately. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedby*\fR\fR +.ad +.sp .6 +.RS 4n +The \fBusedby*\fR properties decompose the \fBused\fR properties into the various reasons that space is used. Specifically, \fBused\fR = \fBusedbychildren\fR + \fBusedbydataset\fR + \fBusedbyrefreservation\fR +, \fBusedbysnapshots\fR. These properties are only available for datasets created on \fBzpool\fR "version 13" pools. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedbychildren\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space used by children of this dataset, which would be freed if all the dataset's children were destroyed. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedbydataset\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space used by this dataset itself, which would be freed if the dataset were destroyed (after first removing any \fBrefreservation\fR and destroying any necessary snapshots or descendents). +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedbyrefreservation\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space used by a \fBrefreservation\fR set on this dataset, which would be freed if the \fBrefreservation\fR was removed. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBusedbysnapshots\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBuserused@\fR\fIuser\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space consumed by the specified user in this dataset. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information. +.sp +Unprivileged users can access only their own space usage. The root user, or a user who has been granted the \fBuserused\fR privilege with \fBzfs allow\fR, can access everyone's usage. +.sp +The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms: +.RS +4 +.TP +.ie t \(bu +.el o +\fIPOSIX name\fR (for example, \fBjoe\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fIPOSIX numeric ID\fR (for example, \fB789\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fISID name\fR (for example, \fBjoe.smith@mydomain\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR) +.RE +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBuserrefs\fR\fR +.ad +.sp .6 +.RS 4n +This property is set to the number of user holds on this snapshot. User holds are set by using the \fBzfs hold\fR command. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBgroupused@\fR\fIgroup\fR\fR +.ad +.sp .6 +.RS 4n +The amount of space consumed by the specified group in this dataset. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information. +.sp +Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBvolblocksize\fR=\fIblocksize\fR\fR +.ad +.sp .6 +.RS 4n +For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes to 128 Kbytes is valid. +.sp +This property can also be referred to by its shortened column name, \fBvolblock\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBwritten\fR\fR +.ad +.sp .6 +.RS 4n +The amount of \fBreferenced\fR space written to this dataset since the +previous snapshot. +.RE + +.sp +.ne 2 +.na +\fB\fBwritten@\fR\fIsnapshot\fR\fR +.ad +.sp .6 +.RS 4n +The amount of \fBreferenced\fR space written to this dataset since the +specified snapshot. This is the space that is referenced by this dataset +but was not referenced by the specified snapshot. +.sp +The \fIsnapshot\fR may be specified as a short snapshot name (just the part +after the \fB@\fR), in which case it will be interpreted as a snapshot in +the same filesystem as this dataset. +The \fIsnapshot\fR be a full snapshot name (\fIfilesystem\fR@\fIsnapshot\fR), +which for clones may be a snapshot in the origin's filesystem (or the origin +of the origin's filesystem, etc). +.RE + +.sp +.LP +The following native properties can be used to change the behavior of a \fBZFS\fR dataset. +.sp +.ne 2 +.mk +.na +\fB\fBaclinherit\fR=\fBdiscard\fR | \fBnoallow\fR | \fBrestricted\fR | \fBpassthrough\fR | \fBpassthrough-x\fR\fR +.ad +.sp .6 +.RS 4n +Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an \fBaclinherit\fR property of \fBdiscard\fR does not inherit any \fBACL\fR entries. A file system with an \fBaclinherit\fR property value of \fBnoallow\fR only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value \fBrestricted\fR (the default) removes the \fBwrite_acl\fR and \fBwrite_owner\fR permissions when the \fBACL\fR entry is inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough\fR inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough-x\fR has the same meaning as \fBpassthrough\fR, except that the \fBowner@\fR, \fBgroup@\fR, and \fBeveryone@\fR \fBACE\fRs inherit the execute permission only if the file creation mode also requests the execute bit. +.sp +When the property value is set to \fBpassthrough\fR, files are created with a mode determined by the inheritable \fBACE\fRs. If no inheritable \fBACE\fRs exist that affect the mode, then the mode is set in accordance to the requested mode from the application. +.sp +The \fBaclinherit\fR property does not apply to Posix ACLs. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBacltype\fR=\fBnoacl\fR | \fBposixacl\fR \fR +.ad +.sp .6 +.RS 4n +Controls whether ACLs are enabled and if so what type of ACL to use. When +a file system has the \fBacltype\fR property set to \fBnoacl\fR (the default) +then ACLs are disabled. Setting the \fBacltype\fR property to \fBposixacl\fR +indicates Posix ACLs should be used. Posix ACLs are specific to Linux and +are not functional on other platforms. Posix ACLs are stored as an xattr and +therefore will not overwrite any existing ZFS/NFSv4 ACLs which may be set. +Currently only \fBposixacls\fR are supported on Linux. +.sp +To obtain the best performance when setting \fBposixacl\fR users are strongly +encouraged to set the \fBxattr=sa\fR property. This will result in the +Posix ACL being stored more efficiently on disk. But as a consequence of this +all new xattrs will only be accessible from ZFS implementations which support +the \fBxattr=sa\fR property. See the \fBxattr\fR property for more details. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBatime\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value is \fBon\fR. See also \fBrelatime\fR below. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcanmount\fR=\fBon\fR | \fBoff\fR | \fBnoauto\fR\fR +.ad +.sp .6 +.RS 4n +If this property is set to \fBoff\fR, the file system cannot be mounted, and is ignored by \fBzfs mount -a\fR. Setting this property to \fBoff\fR is similar to setting the \fBmountpoint\fR property to \fBnone\fR, except that the dataset still has a normal \fBmountpoint\fR property, which can be inherited. Setting this property to \fBoff\fR allows datasets to be used solely as a mechanism to inherit properties. One example of setting \fBcanmount=\fR\fBoff\fR is to have two datasets with the same \fBmountpoint\fR, so that the children of both datasets appear in the same directory, but might have different inherited characteristics. +.sp +When the \fBnoauto\fR option is set, a dataset can only be mounted and unmounted explicitly. The dataset is not mounted automatically when the dataset is created or imported, nor is it mounted by the \fBzfs mount -a\fR command or unmounted by the \fBzfs unmount -a\fR command. +.sp +This property is not inherited. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2,\fR| \fBfletcher4\fR | \fBsha256\fR\fR +.ad +.sp .6 +.RS 4n +Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice. +.sp +Changing this property affects only newly-written data. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBlz4\fR | +\fBgzip\fR | \fBgzip-\fR\fIN\fR | \fBzle\fR\fR +.ad +.sp .6 +.RS 4n +Controls the compression algorithm used for this dataset. +.sp +Setting compression to \fBon\fR indicates that the current default +compression algorithm should be used. The default balances compression +and decompression speed, with compression ratio and is expected to +work well on a wide variety of workloads. Unlike all other settings for +this property, \fBon\fR does not select a fixed compression type. As +new compression algorithms are added to ZFS and enabled on a pool, the +default compression algorithm may change. The current default compression +algorithm is either \fBlzjb\fR or, if the \fBlz4_compress\fR feature is +enabled, \fBlz4\fR. +.sp +The \fBlzjb\fR compression algorithm is optimized for performance while +providing decent data compression. +.sp +The \fBlz4\fR compression algorithm is a high-performance replacement +for the \fBlzjb\fR algorithm. It features significantly faster +compression and decompression, as well as a moderately higher +compression ratio than \fBlzjb\fR, but can only be used on pools with +the \fBlz4_compress\fR feature set to \fIenabled\fR. See +\fBzpool-features\fR(5) for details on ZFS feature flags and the +\fBlz4_compress\fR feature. +.sp +The \fBgzip\fR compression algorithm uses the same compression as +the \fBgzip\fR(1) command. You can specify the \fBgzip\fR level by using the +value \fBgzip-\fR\fIN\fR where \fIN\fR is an integer from 1 (fastest) to 9 +(best compression ratio). Currently, \fBgzip\fR is equivalent to \fBgzip-6\fR +(which is also the default for \fBgzip\fR(1)). The \fBzle\fR compression +algorithm compresses runs of zeros. +.sp +This property can also be referred to by its shortened column name +\fBcompress\fR. Changing this property affects only newly-written data. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBcopies\fR=\fB1\fR | \fB2\fR | \fB3\fR\fR +.ad +.sp .6 +.RS 4n +Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or RAID-Z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations. +.sp +Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR\fIN\fR option. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBdedup\fR=\fBon\fR | \fBoff\fR | \fBverify\fR | \fBsha256\fR[,\fBverify\fR]\fR +.ad +.sp .6 +.RS 4n +Controls whether deduplication is in effect for a dataset. The default value is \fBoff\fR. The default checksum used for deduplication is \fBsha256\fR (subject to change). When \fBdedup\fR is enabled, the \fBdedup\fR checksum algorithm overrides the \fBchecksum\fR property. Setting the value to \fBverify\fR is equivalent to specifying \fBsha256,verify\fR. +.sp +If the property is set to \fBverify\fR, then, whenever two blocks have the same signature, ZFS will do a byte-for-byte comparison with the existing block to ensure that the contents are identical. +.sp +Unless necessary, deduplication should NOT be enabled on a system. See \fBDeduplication\fR above. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBdevices\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether device nodes can be opened on this file system. The default value is \fBon\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBexec\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether processes can be executed from within this file system. The default value is \fBon\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBmlslabel\fR=\fIlabel\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +The \fBmlslabel\fR property is a sensitivity label that determines if a dataset can be mounted in a zone on a system with Trusted Extensions enabled. If the labeled dataset matches the labeled zone, the dataset can be mounted and accessed from the labeled zone. +.sp +When the \fBmlslabel\fR property is not set, the default value is \fBnone\fR. Setting the \fBmlslabel\fR property to \fBnone\fR is equivalent to removing the property. +.sp +The \fBmlslabel\fR property can be modified only when Trusted Extensions is enabled and only with appropriate privilege. Rights to modify it cannot be delegated. When changing a label to a higher label or setting the initial dataset label, the \fB{PRIV_FILE_UPGRADE_SL}\fR privilege is required. When changing a label to a lower label or the default (\fBnone\fR), the \fB{PRIV_FILE_DOWNGRADE_SL}\fR privilege is required. Changing the dataset to labels other than the default can be done only when the dataset is not mounted. When a dataset with the default label is mounted into a labeled-zone, the mount operation automatically sets the \fBmlslabel\fR property to the label of that zone. +.sp +When Trusted Extensions is \fBnot\fR enabled, only datasets with the default label (\fBnone\fR) can be mounted. +.sp +Zones are a Solaris feature and are not relevant on Linux. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBfilesystem_limit\fR=\fIcount\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the number of filesystems and volumes that can exist under this point in +the dataset tree. The limit is not enforced if the user is allowed to change +the limit. Setting a filesystem_limit on a descendent of a filesystem that +already has a filesystem_limit does not override the ancestor's filesystem_limit, +but rather imposes an additional limit. This feature must be enabled to be used +(see \fBzpool-features\fR(5)). +.RE + +.sp +.ne 2 +.na +\fB\fBmountpoint\fR=\fIpath\fR | \fBnone\fR | \fBlegacy\fR\fR +.ad +.sp .6 +.RS 4n +Controls the mount point used for this file system. See the "Mount Points" section for more information on how this property is used. +.sp +When the \fBmountpoint\fR property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is \fBlegacy\fR, then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was previously \fBlegacy\fR or \fBnone\fR, or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBnbmand\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether the file system should be mounted with \fBnbmand\fR (Non Blocking mandatory locks). This is used for \fBCIFS\fR clients. Changes to this property only take effect when the file system is umounted and remounted. See \fBmount\fR(8) for more information on \fBnbmand\fR mounts. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBprimarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR +.ad +.sp .6 +.RS 4n +Controls what is cached in the primary cache (ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBquota\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the amount of space a dataset and its descendents can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendents, including file systems and snapshots. Setting a quota on a descendent of a dataset that already has a quota does not override the ancestor's quota, but rather imposes an additional limit. +.sp +Quotas cannot be set on volumes, as the \fBvolsize\fR property acts as an implicit quota. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsnapshot_limit\fR=\fIcount\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the number of snapshots that can be created on a dataset and its +descendents. Setting a snapshot_limit on a descendent of a dataset that already +has a snapshot_limit does not override the ancestor's snapshot_limit, but +rather imposes an additional limit. The limit is not enforced if the user is +allowed to change the limit. For example, this means that recursive snapshots +taken from the global zone are counted against each delegated dataset within +a zone. This feature must be enabled to be used (see \fBzpool-features\fR(5)). +.RE + +.sp +.ne 2 +.na +\fB\fBuserquota@\fR\fIuser\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the amount of space consumed by the specified user. Similar to the \fBrefquota\fR property, the \fBuserquota\fR space calculation does not include space that is used by descendent datasets, such as snapshots and clones. User space consumption is identified by the \fBuserspace@\fR\fIuser\fR property. +.sp +Enforcement of user quotas may be delayed by several seconds. This delay means that a user might exceed their quota before the system notices that they are over quota and begins to refuse additional writes with the \fBEDQUOT\fR error message . See the \fBzfs userspace\fR subcommand for more information. +.sp +Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota. +.sp +This property is not available on volumes, on file systems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms: +.RS +4 +.TP +.ie t \(bu +.el o +\fIPOSIX name\fR (for example, \fBjoe\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fIPOSIX numeric ID\fR (for example, \fB789\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fISID name\fR (for example, \fBjoe.smith@mydomain\fR) +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR) +.RE +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBgroupquota@\fR\fIgroup\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the amount of space consumed by the specified group. Group space consumption is identified by the \fBuserquota@\fR\fIuser\fR property. +.sp +Unprivileged users can access only their own groups' space usage. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBreadonly\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether this dataset can be modified. The default value is \fBoff\fR. +.sp +This property can also be referred to by its shortened column name, \fBrdonly\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBrecordsize\fR=\fIsize\fR\fR +.ad +.sp .6 +.RS 4n +Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical access patterns. +.sp +For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a \fBrecordsize\fR greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general purpose file systems is strongly discouraged, and may adversely affect performance. +.sp +The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes. +.sp +Changing the file system's \fBrecordsize\fR affects only files created afterward; existing files are unaffected. +.sp +This property can also be referred to by its shortened column name, \fBrecsize\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBredundant_metadata\fR=\fBall\fR | \fBmost\fR\fR +.ad +.sp .6 +.RS 4n +Controls what types of metadata are stored redundantly. ZFS stores an +extra copy of metadata, so that if a single block is corrupted, the +amount of user data lost is limited. This extra copy is in addition to +any redundancy provided at the pool level (e.g. by mirroring or RAID-Z), +and is in addition to an extra copy specified by the \fBcopies\fR +property (up to a total of 3 copies). For example if the pool is +mirrored, \fBcopies\fR=2, and \fBredundant_metadata\fR=most, then ZFS +stores 6 copies of most metadata, and 4 copies of data and some +metadata. +.sp +When set to \fBall\fR, ZFS stores an extra copy of all metadata. If a +single on-disk block is corrupt, at worst a single block of user data +(which is \fBrecordsize\fR bytes long) can be lost. +.sp +When set to \fBmost\fR, ZFS stores an extra copy of most types of +metadata. This can improve performance of random writes, because less +metadata must be written. In practice, at worst about 100 blocks (of +\fBrecordsize\fR bytes each) of user data can be lost if a single +on-disk block is corrupt. The exact behavior of which metadata blocks +are stored redundantly may change in future releases. +.sp +The default value is \fBall\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBrefquota\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +Limits the amount of space a dataset can consume. This property enforces a hard limit on the amount of space used. This hard limit does not include space used by descendents, including file systems and snapshots. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBrefreservation\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +The minimum amount of space guaranteed to a dataset, not including its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by \fBrefreservation\fR. The \fBrefreservation\fR reservation is accounted for in the parent datasets' space used, and counts against the parent datasets' quotas and reservations. +.sp +If \fBrefreservation\fR is set, a snapshot is only allowed if there is enough free pool space outside of this reservation to accommodate the current number of "referenced" bytes in the dataset. +.sp +This property can also be referred to by its shortened column name, \fBrefreserv\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBrelatime\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls the manner in which the access time is updated when \fBatime=on\fR is set. Turning this property \fBon\fR causes the access time to be updated relative to the modify or change time. Access time is only updated if the previous access time was earlier than the current modify or change time or if the existing access time hasn't been updated within the past 24 hours. The default value is \fBoff\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBreservation\fR=\fIsize\fR | \fBnone\fR\fR +.ad +.sp .6 +.RS 4n +The minimum amount of space guaranteed to a dataset and its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space used, and count against the parent datasets' quotas and reservations. +.sp +This property can also be referred to by its shortened column name, \fBreserv\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsecondarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR +.ad +.sp .6 +.RS 4n +Controls what is cached in the secondary cache (L2ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsetuid\fR=\fBon\fR | \fBoff\fR\fR +.ad +.sp .6 +.RS 4n +Controls whether the set-\fBUID\fR bit is respected for the file system. The default value is \fBon\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBsharesmb\fR=\fBon\fR | \fBoff\fR +.ad +.sp .6 +.RS 4n +Controls whether the file system is shared by using \fBSamba USERSHARES\fR, and what options are to be used. Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBnet\fR(8) command is invoked to create a \fBUSERSHARE\fR. +.sp +Because \fBSMB\fR shares requires a resource name, a unique resource name is constructed from the dataset name. The constructed name is a copy of the dataset name except that the characters in the dataset name, which would be illegal in the resource name, are replaced with underscore (\fB_\fR) characters. The ZFS On Linux driver does not (yet) support additional options which might be available in the Solaris version. +.sp +If the \fBsharesmb\fR property is set to \fBoff\fR, the file systems are unshared. +.sp +In Linux, the share is created with the ACL (Access Control List) "Everyone:F" ("F" stands for "full permissions", ie. read and write permissions) and no guest access (which means samba must be able to authenticate a real user, system passwd/shadow, ldap or smbpasswd based) by default. This means that any additional access control (disallow specific user specific access etc) must be done on the underlaying filesystem. +.sp +.in +2 +Example to mount a SMB filesystem shared through ZFS (share/tmp): +.mk +Note that a user and his/her password \fBmust\fR be given! +.sp +.in +2 +smbmount //127.0.0.1/share_tmp /mnt/tmp -o user=workgroup/turbo,password=obrut,uid=1000 +.in -2 +.in -2 +.sp +.ne 2 +.mk +.na +\fBMinimal /etc/samba/smb.conf configuration\fR +.sp +.in +2 +* Samba will need to listen to 'localhost' (127.0.0.1) for the zfs utilities to communitate with samba. This is the default behavior for most Linux distributions. +.sp +* Samba must be able to authenticate a user. This can be done in a number of ways, depending on if using the system password file, LDAP or the Samba specific smbpasswd file. How to do this is outside the scope of this manual. Please refer to the smb.conf(5) manpage for more information. +.sp +* See the \fBUSERSHARE\fR section of the \fBsmb.conf\fR(5) man page for all configuration options in case you need to modify any options to the share afterwards. Do note that any changes done with the 'net' command will be undone if the share is every unshared (such as at a reboot etc). In the future, ZoL will be able to set specific options directly using sharesmb=