Skip to content

Commit

Permalink
Add option none to zfs redundant_metadata property
Browse files Browse the repository at this point in the history
Currently, additional/extra copies are created for metadata in
addition to the redundancy provided by the pool(mirror/raidz/draid),
due to this 2 times more space is utilized per inode and this decreases
the total number of inodes that can be created in the filesystem. By
setting redundant_metadata to none, no additional copies of metadata
are created, hence can reduce the space consumed by the additional
metadata copies and increase the total number of inodes that can be
created in the filesystem.

Reviewed-by: Dipak Ghosh <[email protected]>
Signed-off-by: Akash B <[email protected]>
  • Loading branch information
akashb-22 committed Oct 7, 2022
1 parent 2ba240f commit b5ed41d
Show file tree
Hide file tree
Showing 9 changed files with 234 additions and 64 deletions.
3 changes: 3 additions & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ typedef enum dmu_object_byteswap {
#define DMU_OT_IS_DDT(ot) \
((ot) == DMU_OT_DDT_ZAP)

#define DMU_OT_IS_CRITICAL(ot) (dmu_ot[ot].ot_critical)

/* Note: ztest uses DMU_OT_UINT64_OTHER as a proxy for file blocks */
#define DMU_OT_IS_FILE(ot) \
((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER)
Expand Down Expand Up @@ -898,6 +900,7 @@ typedef struct dmu_object_type_info {
boolean_t ot_metadata;
boolean_t ot_dbuf_metadata_cache;
boolean_t ot_encrypt;
boolean_t ot_critical;
const char *ot_name;
} dmu_object_type_info_t;

Expand Down
4 changes: 3 additions & 1 deletion include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,9 @@ typedef enum {

typedef enum {
ZFS_REDUNDANT_METADATA_ALL,
ZFS_REDUNDANT_METADATA_MOST
ZFS_REDUNDANT_METADATA_MOST,
ZFS_REDUNDANT_METADATA_SOME,
ZFS_REDUNDANT_METADATA_NONE
} zfs_redundant_metadata_type_t;

typedef enum {
Expand Down
12 changes: 10 additions & 2 deletions man/man7/zfsprops.7
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright (c) 2019, Kjeld Schouten-Lebbing
.\"
.Dd May 24, 2021
.Dd July 21, 2022
.Dt ZFSPROPS 7
.Os
.
Expand Down Expand Up @@ -1454,7 +1454,7 @@ affects only files created afterward; existing files are unaffected.
.Pp
This property can also be referred to by its shortened column name,
.Sy recsize .
.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most
.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most Ns | Ns Sy some Ns | Ns Sy none
Controls what types of metadata are stored redundantly.
ZFS stores an extra copy of metadata, so that if a single block is corrupted,
the amount of user data lost is limited.
Expand Down Expand Up @@ -1495,6 +1495,14 @@ of user data can be lost if a single on-disk block is corrupt.
The exact behavior of which metadata blocks are stored redundantly may change in
future releases.
.Pp
When set to
.Sy some ,
ZFS stores an extra copy of few types of critical metadata.
.Pp
When set to
.Sy none ,
ZFS does not store any copies of metadata redundantly through this property.
.Pp
The default value is
.Sy all .
.It Sy refquota Ns = Ns Ar size Ns | Ns Sy none
Expand Down
4 changes: 3 additions & 1 deletion module/zcommon/zfs_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ zfs_prop_init(void)
static const zprop_index_t redundant_metadata_table[] = {
{ "all", ZFS_REDUNDANT_METADATA_ALL },
{ "most", ZFS_REDUNDANT_METADATA_MOST },
{ "some", ZFS_REDUNDANT_METADATA_SOME },
{ "none", ZFS_REDUNDANT_METADATA_NONE },
{ NULL }
};

Expand All @@ -388,7 +390,7 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
ZFS_REDUNDANT_METADATA_ALL,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"all | most", "REDUND_MD",
"all | most | some | none", "REDUND_MD",
redundant_metadata_table, sfeatures);
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
Expand Down
168 changes: 113 additions & 55 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,60 +89,114 @@ static int zfs_dmu_offset_next_sync = 1;
uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;

const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" },
{DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "object array" },
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "packed nvlist" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "packed nvlist size" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj header" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA space map header" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA space map" },
{DMU_BSWAP_UINT64, TRUE, FALSE, TRUE, "ZIL intent log" },
{DMU_BSWAP_DNODE, TRUE, FALSE, TRUE, "DMU dnode" },
{DMU_BSWAP_OBJSET, TRUE, TRUE, FALSE, "DMU objset" },
{DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL directory" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL directory child map"},
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dataset snap map" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL props" },
{DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL dataset" },
{DMU_BSWAP_ZNODE, TRUE, FALSE, FALSE, "ZFS znode" },
{DMU_BSWAP_OLDACL, TRUE, FALSE, TRUE, "ZFS V0 ACL" },
{DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "ZFS plain file" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS directory" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "ZFS master node" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS delete queue" },
{DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "zvol object" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "zvol prop" },
{DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "other uint8[]" },
{DMU_BSWAP_UINT64, FALSE, FALSE, TRUE, "other uint64[]" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "other ZAP" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "persistent error log" },
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "SPA history" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA history offsets" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "Pool properties" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL permissions" },
{DMU_BSWAP_ACL, TRUE, FALSE, TRUE, "ZFS ACL" },
{DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "ZFS SYSACL" },
{DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "FUID table" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "FUID table size" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dataset next clones"},
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "scan work queue" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS user/group/project used" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS user/group/project quota"},
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "snapshot refcount tags"},
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "DDT ZAP algorithm" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "DDT statistics" },
{DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "System attributes" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA master node" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA attr registration" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA attr layouts" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "scan translations" },
{DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "deduplicated block" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL deadlist map" },
{DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL deadlist map hdr" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dir clones" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj subobj" }
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, FALSE,
"unallocated" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, FALSE,
"object directory" },
{DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, FALSE,
"object array" },
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, FALSE,
"packed nvlist" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, FALSE,
"packed nvlist size" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, FALSE,
"bpobj" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, FALSE,
"bpobj header" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, FALSE,
"SPA space map header" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, FALSE,
"SPA space map" },
{DMU_BSWAP_UINT64, TRUE, FALSE, TRUE, FALSE,
"ZIL intent log" },
{DMU_BSWAP_DNODE, TRUE, FALSE, TRUE, FALSE,
"DMU dnode" },
{DMU_BSWAP_OBJSET, TRUE, TRUE, FALSE, TRUE,
"DMU objset" },
{DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, TRUE,
"DSL directory" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, FALSE,
"DSL directory child map" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, FALSE,
"DSL dataset snap map" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, TRUE,
"DSL props" },
{DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, TRUE,
"DSL dataset" },
{DMU_BSWAP_ZNODE, TRUE, FALSE, FALSE, FALSE,
"ZFS znode" },
{DMU_BSWAP_OLDACL, TRUE, FALSE, TRUE, FALSE,
"ZFS V0 ACL" },
{DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, FALSE,
"ZFS plain file" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, FALSE,
"ZFS directory" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, TRUE,
"ZFS master node" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, FALSE,
"ZFS delete queue" },
{DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, FALSE,
"zvol object" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, TRUE,
"zvol prop" },
{DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, FALSE,
"other uint8[]" },
{DMU_BSWAP_UINT64, FALSE, FALSE, TRUE, FALSE,
"other uint64[]" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, FALSE,
"other ZAP" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, FALSE,
"persistent error log" },
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, FALSE,
"SPA history" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, FALSE,
"SPA history offsets" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, TRUE,
"Pool properties" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, FALSE,
"DSL permissions" },
{DMU_BSWAP_ACL, TRUE, FALSE, TRUE, FALSE,
"ZFS ACL" },
{DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, FALSE,
"ZFS SYSACL" },
{DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, FALSE,
"FUID table" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, FALSE,
"FUID table size" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, FALSE,
"DSL dataset next clones" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, FALSE,
"scan work queue" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, FALSE,
"ZFS user/group/project used" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, FALSE,
"ZFS user/group/project quota" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, FALSE,
"snapshot refcount tags" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, FALSE,
"DDT ZAP algorithm" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, FALSE,
"DDT statistics" },
{DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, TRUE,
"System attributes" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, TRUE,
"SA master node" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, TRUE,
"SA attr registration" },
{DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, TRUE,
"SA attr layouts" },
{DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, FALSE,
"scan translations" },
{DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, FALSE,
"deduplicated block" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, FALSE,
"DSL deadlist map" },
{DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, FALSE,
"DSL deadlist map hdr" },
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, FALSE,
"DSL dir clones" },
{DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, FALSE,
"bpobj subobj" }
};

dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
Expand Down Expand Up @@ -1992,7 +2046,11 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
ZCHECKSUM_FLAG_EMBEDDED))
checksum = ZIO_CHECKSUM_FLETCHER_4;

if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
if (DMU_OT_IS_CRITICAL(type) || os->os_redundant_metadata ==
ZFS_REDUNDANT_METADATA_ALL ||
(os->os_redundant_metadata ==
ZFS_REDUNDANT_METADATA_SOME &&
DMU_OT_IS_METADATA(type)) ||
(os->os_redundant_metadata ==
ZFS_REDUNDANT_METADATA_MOST &&
(level >= zfs_redundant_metadata_most_ditto_level ||
Expand Down
4 changes: 3 additions & 1 deletion module/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,9 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval)
* Inheritance and range checking should have been done by now.
*/
ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
newval == ZFS_REDUNDANT_METADATA_MOST);
newval == ZFS_REDUNDANT_METADATA_MOST ||
newval == ZFS_REDUNDANT_METADATA_SOME ||
newval == ZFS_REDUNDANT_METADATA_NONE);

os->os_redundant_metadata = newval;
}
Expand Down
Loading

0 comments on commit b5ed41d

Please sign in to comment.