From 6b1b8d7b540c0967f209be69c3c5f6d9698cb12b Mon Sep 17 00:00:00 2001 From: Akash B Date: Wed, 20 Jul 2022 06:55:09 -0400 Subject: [PATCH] Add option none to zfs redundant_metadata property Currently, additional/extra copies are created for metadata in addition to the redundancy provided by the pool(mirror/raidz/draid), due to this 2 times more space is utilized per inode and this decreases the total number of inodes that can be created in the filesystem. By setting redundant_metadata to none, no additional copies of metadata are created, hence can reduce the space consumed by the additional metadata copies and increase the total number of inodes that can be created in the filesystem. Reviewed-by: Dipak Ghosh Signed-off-by: Akash B --- include/sys/dmu.h | 6 ++ include/sys/fs/zfs.h | 4 +- man/man7/zfsprops.7 | 16 +++- module/zcommon/zfs_prop.c | 4 +- module/zfs/dmu.c | 3 + module/zfs/dmu_objset.c | 4 +- module/zfs/dsl_prop.c | 92 +++++++++++++++++++ tests/zfs-tests/include/properties.shlib | 2 +- .../inheritance/inherit_001_pos.ksh | 9 +- 9 files changed, 130 insertions(+), 10 deletions(-) diff --git a/include/sys/dmu.h b/include/sys/dmu.h index d68700d371db..8a1a1e0dda56 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -142,6 +142,12 @@ typedef enum dmu_object_byteswap { #define DMU_OT_IS_DDT(ot) \ ((ot) == DMU_OT_DDT_ZAP) +#define DMU_OT_IS_CRITICAL(ot) \ + (DMU_OT_IS_METADATA(ot) && \ + (ot) != DMU_OT_DNODE && \ + (ot) != DMU_OT_DIRECTORY_CONTENTS && \ + (ot) != DMU_OT_SA) + /* Note: ztest uses DMU_OT_UINT64_OTHER as a proxy for file blocks */ #define DMU_OT_IS_FILE(ot) \ ((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index b3fecf489eb0..54bf7f82e351 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -501,7 +501,9 @@ typedef enum { typedef enum { ZFS_REDUNDANT_METADATA_ALL, - ZFS_REDUNDANT_METADATA_MOST + ZFS_REDUNDANT_METADATA_MOST, + ZFS_REDUNDANT_METADATA_SOME, + ZFS_REDUNDANT_METADATA_NONE } zfs_redundant_metadata_type_t; typedef enum { diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index 93a7bfcc865f..e92158a9ac3a 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -37,7 +37,7 @@ .\" Copyright 2019 Joyent, Inc. .\" Copyright (c) 2019, Kjeld Schouten-Lebbing .\" -.Dd May 24, 2021 +.Dd July 21, 2022 .Dt ZFSPROPS 7 .Os . @@ -1454,7 +1454,7 @@ affects only files created afterward; existing files are unaffected. .Pp This property can also be referred to by its shortened column name, .Sy recsize . -.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most +.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most Ns | Ns Sy some Ns | Ns Sy none Controls what types of metadata are stored redundantly. ZFS stores an extra copy of metadata, so that if a single block is corrupted, the amount of user data lost is limited. @@ -1486,7 +1486,7 @@ When set to ZFS stores an extra copy of most types of metadata. This can improve performance of random writes, because less metadata must be written. -In practice, at worst about 100 blocks +In practice, at worst about 1000 blocks .Po of .Sy recordsize bytes each @@ -1495,6 +1495,16 @@ of user data can be lost if a single on-disk block is corrupt. The exact behavior of which metadata blocks are stored redundantly may change in future releases. .Pp +When set to +.Sy some , +ZFS stores an extra copy of few types of critical metadata. +If a single on-disk block is corrupt, at worst a single user file can be lost. +.Pp +When set to +.Sy none , +ZFS does not store any copies of metadata redundantly through this property. +If a single on-disk block is corrupt, entire dataset can be lost. +.Pp The default value is .Sy all . .It Sy refquota Ns = Ns Ar size Ns | Ns Sy none diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 0e91304ecd4b..7aed03f34509 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -369,6 +369,8 @@ zfs_prop_init(void) static const zprop_index_t redundant_metadata_table[] = { { "all", ZFS_REDUNDANT_METADATA_ALL }, { "most", ZFS_REDUNDANT_METADATA_MOST }, + { "some", ZFS_REDUNDANT_METADATA_SOME }, + { "none", ZFS_REDUNDANT_METADATA_NONE }, { NULL } }; @@ -388,7 +390,7 @@ zfs_prop_init(void) zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata", ZFS_REDUNDANT_METADATA_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "all | most", "REDUND_MD", + "all | most | some | none", "REDUND_MD", redundant_metadata_table, sfeatures); zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 9e67eb51f415..3c2d975fdddf 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1993,6 +1993,9 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) checksum = ZIO_CHECKSUM_FLETCHER_4; if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL || + (os->os_redundant_metadata == + ZFS_REDUNDANT_METADATA_SOME && + DMU_OT_IS_CRITICAL(type)) || (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_MOST && (level >= zfs_redundant_metadata_most_ditto_level || diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 4c20afcdb9c6..7607744ca09b 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -287,7 +287,9 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval) * Inheritance and range checking should have been done by now. */ ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL || - newval == ZFS_REDUNDANT_METADATA_MOST); + newval == ZFS_REDUNDANT_METADATA_MOST || + newval == ZFS_REDUNDANT_METADATA_SOME || + newval == ZFS_REDUNDANT_METADATA_NONE); os->os_redundant_metadata = newval; } diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 610e887b3fba..ccaae05a2e6c 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -41,6 +41,7 @@ #define ZPROP_INHERIT_SUFFIX "$inherit" #define ZPROP_RECVD_SUFFIX "$recvd" +#define ZPROP_IUV_SUFFIX "$iuv" static int dodefault(zfs_prop_t prop, int intsz, int numints, void *buf) @@ -69,6 +70,16 @@ dodefault(zfs_prop_t prop, int intsz, int numints, void *buf) return (0); } +static int +dsl_prop_known_index(zfs_prop_t prop, uint64_t value) +{ + const char *str = NULL; + if (zfs_prop_get_type(prop) == PROP_TYPE_INDEX) + return (!zfs_prop_index_to_string(prop, value, &str)); + + return (-1); +} + int dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot) @@ -81,6 +92,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, boolean_t inheriting = B_FALSE; char *inheritstr; char *recvdstr; + char *iuvstr; ASSERT(dsl_pool_config_held(dd->dd_pool)); @@ -91,6 +103,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop)); inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX); recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); /* * Note: dd may become NULL, therefore we shouldn't dereference it @@ -105,6 +118,18 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, inheriting = B_TRUE; } + /* Check for a iuv value. */ + err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, + iuvstr, intsz, numints, buf); + if (dsl_prop_known_index(zfs_name_to_prop(propname), + *(uint64_t *)buf) != 1) + err = ENOENT; + if (err != ENOENT) { + if (setpoint != NULL && err == 0) + dsl_dir_name(dd, setpoint); + break; + } + /* Check for a local value. */ err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, propname, intsz, numints, buf); @@ -155,6 +180,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, kmem_strfree(inheritstr); kmem_strfree(recvdstr); + kmem_strfree(iuvstr); return (err); } @@ -647,6 +673,46 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, dsl_dir_rele(dd, FTAG); } + +/* + * For newer values in zfs index type properties, we add a new key + * propname$iuv (iuv = Ignore Unknown Values) to the properties zap object + * to store the new property value and store the default value in the + * existing prop key. So that the propname$iuv key is ignored by the older zfs + * versions and the default property value from the existing prop key is + * used. + */ + +static void +dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname, + int intsz, int numints, const void *value, dmu_tx_t *tx) +{ + char *iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); + boolean_t iuv = B_FALSE; + zfs_prop_t prop = zfs_name_to_prop(propname); + + switch (prop) { + case ZFS_PROP_REDUNDANT_METADATA: + if (*(uint64_t *)value == ZFS_REDUNDANT_METADATA_SOME || + *(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE) + iuv = B_TRUE; + break; + default: + break; + } + + if (iuv) { + VERIFY0(zap_update(mos, zapobj, iuvstr, intsz, numints, + value, tx)); + uint64_t val = zfs_prop_default_numeric(prop); + VERIFY0(zap_update(mos, zapobj, propname, intsz, numints, + &val, tx)); + } else { + zap_remove(mos, zapobj, iuvstr, tx); + } + kmem_strfree(iuvstr); +} + void dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, zprop_source_t source, int intsz, int numints, const void *value, @@ -659,6 +725,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, const char *valstr = NULL; char *inheritstr; char *recvdstr; + char *iuvstr; char *tbuf = NULL; int err; uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa); @@ -692,6 +759,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX); recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); + iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX); switch ((int)source) { case ZPROP_SRC_NONE: @@ -709,11 +777,14 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, /* * remove propname$inherit * set propname -> value + * set propname$iuv -> new property value */ err = zap_remove(mos, zapobj, inheritstr, tx); ASSERT(err == 0 || err == ENOENT); VERIFY0(zap_update(mos, zapobj, propname, intsz, numints, value, tx)); + (void) dsl_prop_set_iuv(mos, zapobj, propname, intsz, + numints, value, tx); break; case ZPROP_SRC_INHERITED: /* @@ -723,6 +794,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, */ err = zap_remove(mos, zapobj, propname, tx); ASSERT(err == 0 || err == ENOENT); + err = zap_remove(mos, zapobj, iuvstr, tx); + ASSERT(err == 0 || err == ENOENT); if (version >= SPA_VERSION_RECVD_PROPS && dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) { dummy = 0; @@ -763,6 +836,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, kmem_strfree(inheritstr); kmem_strfree(recvdstr); + kmem_strfree(iuvstr); /* * If we are left with an empty snap zap we can destroy it. @@ -1012,6 +1086,14 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, propname = za.za_name; source = setpoint; + + /* Skip if iuv entries are preset. */ + valstr = kmem_asprintf("%s%s", propname, + ZPROP_IUV_SUFFIX); + err = zap_contains(mos, propobj, valstr); + kmem_strfree(valstr); + if (err == 0) + continue; } else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) { /* Skip explicitly inherited entries. */ continue; @@ -1044,6 +1126,16 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, source = ((flags & DSL_PROP_GET_INHERITING) ? setpoint : ZPROP_SOURCE_VAL_RECVD); + } else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) { + (void) strlcpy(buf, za.za_name, + MIN(sizeof (buf), suffix - za.za_name + 1)); + propname = buf; + source = setpoint; + prop = zfs_name_to_prop(propname); + + if (dsl_prop_known_index(prop, + za.za_first_integer) != 1) + continue; } else { /* * For backward compatibility, skip suffixes we don't diff --git a/tests/zfs-tests/include/properties.shlib b/tests/zfs-tests/include/properties.shlib index 14b3f4415b7d..f4c3a7e19fa8 100644 --- a/tests/zfs-tests/include/properties.shlib +++ b/tests/zfs-tests/include/properties.shlib @@ -27,7 +27,7 @@ typeset -a canmount_prop_vals=('on' 'off' 'noauto') typeset -a copies_prop_vals=('1' '2' '3') typeset -a logbias_prop_vals=('latency' 'throughput') typeset -a primarycache_prop_vals=('all' 'none' 'metadata') -typeset -a redundant_metadata_prop_vals=('all' 'most') +typeset -a redundant_metadata_prop_vals=('all' 'most' 'some' 'none') typeset -a secondarycache_prop_vals=('all' 'none' 'metadata') typeset -a snapdir_prop_vals=('hidden' 'visible') typeset -a sync_prop_vals=('standard' 'always' 'disabled') diff --git a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh index e525c51344ad..b9eb810340fd 100755 --- a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh @@ -376,7 +376,8 @@ set -A prop "checksum" "" \ "sharenfs" "" \ "recordsize" "recsize" \ "snapdir" "" \ - "readonly" "" + "readonly" "" \ + "redundant_metadata" "" # # Note except for the mountpoint default value (which is handled in @@ -387,12 +388,14 @@ set -A prop "checksum" "" \ set -A def_val "on" "on" "on" \ "off" "" \ "hidden" \ - "off" + "off" \ + "all" set -A local_val "off" "off" "off" \ "on" "" \ "visible" \ - "off" + "off" \ + "none" # # Add system specific values