diff --git a/include/sys/dmu.h b/include/sys/dmu.h index d68700d371db..8a1a1e0dda56 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -142,6 +142,12 @@ typedef enum dmu_object_byteswap { #define DMU_OT_IS_DDT(ot) \ ((ot) == DMU_OT_DDT_ZAP) +#define DMU_OT_IS_CRITICAL(ot) \ + (DMU_OT_IS_METADATA(ot) && \ + (ot) != DMU_OT_DNODE && \ + (ot) != DMU_OT_DIRECTORY_CONTENTS && \ + (ot) != DMU_OT_SA) + /* Note: ztest uses DMU_OT_UINT64_OTHER as a proxy for file blocks */ #define DMU_OT_IS_FILE(ot) \ ((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index b3fecf489eb0..54bf7f82e351 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -501,7 +501,9 @@ typedef enum { typedef enum { ZFS_REDUNDANT_METADATA_ALL, - ZFS_REDUNDANT_METADATA_MOST + ZFS_REDUNDANT_METADATA_MOST, + ZFS_REDUNDANT_METADATA_SOME, + ZFS_REDUNDANT_METADATA_NONE } zfs_redundant_metadata_type_t; typedef enum { diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index 93a7bfcc865f..e92158a9ac3a 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -37,7 +37,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright (c) 2019, Kjeld Schouten-Lebbing .\" -.Dd May 24, 2021 +.Dd July 21, 2022 .Dt ZFSPROPS 7 .Os . @@ -1454,7 +1454,7 @@ affects only files created afterward; existing files are unaffected. .Pp This property can also be referred to by its shortened column name, .Sy recsize . -.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most +.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most Ns | Ns Sy some Ns | Ns Sy none Controls what types of metadata are stored redundantly. ZFS stores an extra copy of metadata, so that if a single block is corrupted, the amount of user data lost is limited. @@ -1486,7 +1486,7 @@ When set to ZFS stores an extra copy of most types of metadata. This can improve performance of random writes, because less metadata must be written. -In practice, at worst about 100 blocks +In practice, at worst about 1000 blocks .Po of .Sy recordsize bytes each @@ -1495,6 +1495,16 @@ of user data can be lost if a single on-disk block is corrupt. The exact behavior of which metadata blocks are stored redundantly may change in future releases. .Pp +When set to +.Sy some , +ZFS stores an extra copy of few types of critical metadata. +If a single on-disk block is corrupt, at worst a single user file can be lost. +.Pp +When set to +.Sy none , +ZFS does not store any copies of metadata redundantly through this property. +If a single on-disk block is corrupt, entire dataset can be lost. +.Pp The default value is .Sy all . .It Sy refquota Ns = Ns Ar size Ns | Ns Sy none diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 0e91304ecd4b..7aed03f34509 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -369,6 +369,8 @@ zfs_prop_init(void) static const zprop_index_t redundant_metadata_table[] = { { "all", ZFS_REDUNDANT_METADATA_ALL }, { "most", ZFS_REDUNDANT_METADATA_MOST }, + { "some", ZFS_REDUNDANT_METADATA_SOME }, + { "none", ZFS_REDUNDANT_METADATA_NONE }, { NULL } }; @@ -388,7 +390,7 @@ zfs_prop_init(void) zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata", ZFS_REDUNDANT_METADATA_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "all | most", "REDUND_MD", + "all | most | some | none", "REDUND_MD", redundant_metadata_table, sfeatures); zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 9e67eb51f415..3c2d975fdddf 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1993,6 +1993,9 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) checksum = ZIO_CHECKSUM_FLETCHER_4; if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL || + (os->os_redundant_metadata == + ZFS_REDUNDANT_METADATA_SOME && + DMU_OT_IS_CRITICAL(type)) || (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_MOST && (level >= zfs_redundant_metadata_most_ditto_level || diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 4c20afcdb9c6..7607744ca09b 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -287,7 +287,9 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval) * Inheritance and range checking should have been done by now. */ ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL || - newval == ZFS_REDUNDANT_METADATA_MOST); + newval == ZFS_REDUNDANT_METADATA_MOST || + newval == ZFS_REDUNDANT_METADATA_SOME || + newval == ZFS_REDUNDANT_METADATA_NONE); os->os_redundant_metadata = newval; } diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 610e887b3fba..ccaae05a2e6c 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -41,6 +41,7 @@ #define ZPROP_INHERIT_SUFFIX "$inherit" #define ZPROP_RECVD_SUFFIX "$recvd" +#define ZPROP_IUV_SUFFIX "$iuv" static int dodefault(zfs_prop_t prop, int intsz, int numints, void *buf) @@ -69,6 +70,16 @@ dodefault(zfs_prop_t prop, int intsz, int numints, void *buf) return (0); } +static int +dsl_prop_known_index(zfs_prop_t prop, uint64_t value) +{ + const char *str = NULL; + if (zfs_prop_get_type(prop) == PROP_TYPE_INDEX) + return (!zfs_prop_index_to_string(prop, value, &str)); + + return (-1); +} + int dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot) @@ -81,6 +92,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, boolean_t inheriting = B_FALSE; char *inheritstr; char *recvdstr; + char *iuvstr; ASSERT(dsl_pool_config_held(dd->dd_pool)); @@ -91,6 +103,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop)); inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX); recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); /* * Note: dd may become NULL, therefore we shouldn't dereference it @@ -105,6 +118,18 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, inheriting = B_TRUE; } + /* Check for a iuv value. */ + err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, + iuvstr, intsz, numints, buf); + if (dsl_prop_known_index(zfs_name_to_prop(propname), + *(uint64_t *)buf) != 1) + err = ENOENT; + if (err != ENOENT) { + if (setpoint != NULL && err == 0) + dsl_dir_name(dd, setpoint); + break; + } + /* Check for a local value. */ err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, propname, intsz, numints, buf); @@ -155,6 +180,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, kmem_strfree(inheritstr); kmem_strfree(recvdstr); + kmem_strfree(iuvstr); return (err); } @@ -647,6 +673,46 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, dsl_dir_rele(dd, FTAG); } + +/* + * For newer values in zfs index type properties, we add a new key + * propname$iuv (iuv = Ignore Unknown Values) to the properties zap object + * to store the new property value and store the default value in the + * existing prop key. So that the propname$iuv key is ignored by the older zfs + * versions and the default property value from the existing prop key is + * used. + */ + +static void +dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname, + int intsz, int numints, const void *value, dmu_tx_t *tx) +{ + char *iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); + boolean_t iuv = B_FALSE; + zfs_prop_t prop = zfs_name_to_prop(propname); + + switch (prop) { + case ZFS_PROP_REDUNDANT_METADATA: + if (*(uint64_t *)value == ZFS_REDUNDANT_METADATA_SOME || + *(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE) + iuv = B_TRUE; + break; + default: + break; + } + + if (iuv) { + VERIFY0(zap_update(mos, zapobj, iuvstr, intsz, numints, + value, tx)); + uint64_t val = zfs_prop_default_numeric(prop); + VERIFY0(zap_update(mos, zapobj, propname, intsz, numints, + &val, tx)); + } else { + zap_remove(mos, zapobj, iuvstr, tx); + } + kmem_strfree(iuvstr); +} + void dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, zprop_source_t source, int intsz, int numints, const void *value, @@ -659,6 +725,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, const char *valstr = NULL; char *inheritstr; char *recvdstr; + char *iuvstr; char *tbuf = NULL; int err; uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa); @@ -692,6 +759,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX); recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); switch ((int)source) { case ZPROP_SRC_NONE: @@ -709,11 +777,14 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, /* * remove propname$inherit * set propname -> value + * set propname$iuv -> new property value */ err = zap_remove(mos, zapobj, inheritstr, tx); ASSERT(err == 0 || err == ENOENT); VERIFY0(zap_update(mos, zapobj, propname, intsz, numints, value, tx)); + (void) dsl_prop_set_iuv(mos, zapobj, propname, intsz, + numints, value, tx); break; case ZPROP_SRC_INHERITED: /* @@ -723,6 +794,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, */ err = zap_remove(mos, zapobj, propname, tx); ASSERT(err == 0 || err == ENOENT); + err = zap_remove(mos, zapobj, iuvstr, tx); + ASSERT(err == 0 || err == ENOENT); if (version >= SPA_VERSION_RECVD_PROPS && dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) { dummy = 0; @@ -763,6 +836,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, kmem_strfree(inheritstr); kmem_strfree(recvdstr); + kmem_strfree(iuvstr); /* * If we are left with an empty snap zap we can destroy it. @@ -1012,6 +1086,14 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, propname = za.za_name; source = setpoint; + + /* Skip if iuv entries are preset. */ + valstr = kmem_asprintf("%s%s", propname, + ZPROP_IUV_SUFFIX); + err = zap_contains(mos, propobj, valstr); + kmem_strfree(valstr); + if (err == 0) + continue; } else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) { /* Skip explicitly inherited entries. */ continue; @@ -1044,6 +1126,16 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, source = ((flags & DSL_PROP_GET_INHERITING) ? setpoint : ZPROP_SOURCE_VAL_RECVD); + } else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) { + (void) strlcpy(buf, za.za_name, + MIN(sizeof (buf), suffix - za.za_name + 1)); + propname = buf; + source = setpoint; + prop = zfs_name_to_prop(propname); + + if (dsl_prop_known_index(prop, + za.za_first_integer) != 1) + continue; } else { /* * For backward compatibility, skip suffixes we don't diff --git a/tests/zfs-tests/include/properties.shlib b/tests/zfs-tests/include/properties.shlib index 14b3f4415b7d..f4c3a7e19fa8 100644 --- a/tests/zfs-tests/include/properties.shlib +++ b/tests/zfs-tests/include/properties.shlib @@ -27,7 +27,7 @@ typeset -a canmount_prop_vals=('on' 'off' 'noauto') typeset -a copies_prop_vals=('1' '2' '3') typeset -a logbias_prop_vals=('latency' 'throughput') typeset -a primarycache_prop_vals=('all' 'none' 'metadata') -typeset -a redundant_metadata_prop_vals=('all' 'most') +typeset -a redundant_metadata_prop_vals=('all' 'most' 'some' 'none') typeset -a secondarycache_prop_vals=('all' 'none' 'metadata') typeset -a snapdir_prop_vals=('hidden' 'visible') typeset -a sync_prop_vals=('standard' 'always' 'disabled') diff --git a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh index e525c51344ad..b9eb810340fd 100755 --- a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh @@ -376,7 +376,8 @@ set -A prop "checksum" "" \ "sharenfs" "" \ "recordsize" "recsize" \ "snapdir" "" \ - "readonly" "" + "readonly" "" \ + "redundant_metadata" "" # # Note except for the mountpoint default value (which is handled in @@ -387,12 +388,14 @@ set -A prop "checksum" "" \ set -A def_val "on" "on" "on" \ "off" "" \ "hidden" \ - "off" + "off" \ + "all" set -A local_val "off" "off" "off" \ "on" "" \ "visible" \ - "off" + "off" \ + "none" # # Add system specific values