From 1e69e36e24ed935b429633097fb741f72c581b37 Mon Sep 17 00:00:00 2001 From: Tim Chase Date: Mon, 13 Oct 2014 15:15:11 -0500 Subject: [PATCH] Store common Linux xattrs as native SA with xattr=sa This patch adds a new class of system atributes which are referred to as "Native SA xattrs". The facilities described here are only enabled when "xattr=sa" is set. If xattr=sa is set, the following "security." and "system." xattrs are stored as native SA xattrs rather than as elements of the ZPL_DXATTR SA: xattr System atrribute -------------------------------------------------------- security.selinux ZPL_SECURITY_SELINUX security.capability ZPL_SECURITY_CAPABILITY system.posix_acl_access ZPL_SYSTEM_POSIX_ACL_ACCESS system.posix_acl_default ZPL_SYSTEM_POSIX_ACL_DEFAULT Storing these xattrs as native system attributes allows for the ZPL to more easily and naturally operate on them as an atomic part of other operations and will be used as the foundation for fixing issue #2718. Zdb will display these under the new "Native SA xattrs" section. Lookups of these xattrs will use the following priority: 1. Native SA xattr (as shown in the able above) 2. Linux ZPL_DXATTR nvlist 3. Traditional ZFS directory-style xattr Modifications of these xattrs will erase an existing ZPL_DXATTR instance of an identically-named xattrs but will not change an existing instance of an identically-named directory-style xattr. If a modification of the ZPL_DXATTR SA causes it to become empty, the ZPL_DXATTR SA, itself is deleted to maximize available space in the bonus buffer. The effect is that existing ZPL_DXATTR SAs are automatically upgraded as they are changed. --- cmd/zdb/zdb.c | 55 +++++++++++- include/sys/zfs_sa.h | 7 ++ include/sys/zfs_vfsops.h | 2 +- module/zfs/zfs_sa.c | 176 ++++++++++++++++++++++++++++++++++++++- module/zfs/zpl_xattr.c | 151 +++++++++++++++++++++++++++------ 5 files changed, 360 insertions(+), 31 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 5f9770984650..18f6331e2c9c 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1576,7 +1576,7 @@ dump_znode_sa_xattr(sa_handle_t *hdl) while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) sa_xattr_entries++; - (void) printf("\tSA xattrs: %d bytes, %d entries\n\n", + (void) printf("\tSA xattrs: %d bytes, %d entries\n", sa_xattr_size, sa_xattr_entries); while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) { uchar_t *value; @@ -1597,6 +1597,55 @@ dump_znode_sa_xattr(sa_handle_t *hdl) free(sa_xattr_packed); } +static void +dump_znode_native_sa_xattr(sa_handle_t *hdl) +{ + int error; + boolean_t didheader = B_FALSE; + int i; + int attr_size, idx; + uchar_t *attr_buf; + static struct { + char *name; + zpl_attr_t attr; + } sa_xattrs[] = { + { "security.selinux", ZPL_SECURITY_SELINUX }, + { "security.capability", ZPL_SECURITY_CAPABILITY }, + { "system.posix_acl_access", ZPL_SYSTEM_POSIX_ACL_ACCESS }, + { "system.posix_acl_default", ZPL_SYSTEM_POSIX_ACL_DEFAULT }, + { NULL, 0 } + }; + + for (i = 0; sa_xattrs[i].name != NULL; ++i) { + error = sa_size(hdl, sa_attr_table[sa_xattrs[i].attr], + &attr_size); + if (error) + continue; + attr_buf = malloc(attr_size); + if (attr_buf == NULL) + continue; + error = sa_lookup(hdl, sa_attr_table[sa_xattrs[i].attr], + attr_buf, attr_size); + if (error) { + free(attr_buf); + continue; + } + if (!didheader) { + (void) printf("\tNative SA xattrs:\n"); + didheader = B_TRUE; + } + (void) printf("\t\t%s = ", sa_xattrs[i].name); + for (idx = 0; idx < attr_size; ++idx) { + if (isprint(attr_buf[idx])) + (void) putchar(attr_buf[idx]); + else + (void) printf("\\%3.3o", attr_buf[idx]); + } + (void) putchar('\n'); + free(attr_buf); + } +} + /*ARGSUSED*/ static void dump_znode(objset_t *os, uint64_t object, void *data, size_t size) @@ -1611,6 +1660,7 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size) sa_bulk_attr_t bulk[12]; int idx = 0; int error; + sa_hdr_phys_t *sahdr = data; if (!sa_loaded) { uint64_t sa_attrs = 0; @@ -1679,6 +1729,8 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size) z_mtime = (time_t)modtm[0]; z_ctime = (time_t)chgtm[0]; + (void) printf("\tSA hdrsize %d\n", SA_HDR_SIZE(sahdr)); + (void) printf("\tSA layout %d\n", SA_HDR_LAYOUT_NUM(sahdr)); (void) printf("\tpath %s\n", path); dump_uidgid(os, uid, gid); (void) printf("\tatime %s", ctime(&z_atime)); @@ -1698,6 +1750,7 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size) sizeof (uint64_t)) == 0) (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); dump_znode_sa_xattr(hdl); + dump_znode_native_sa_xattr(hdl); sa_handle_destroy(hdl); } diff --git a/include/sys/zfs_sa.h b/include/sys/zfs_sa.h index 735d4b32ad48..835fde6c17e6 100644 --- a/include/sys/zfs_sa.h +++ b/include/sys/zfs_sa.h @@ -74,6 +74,10 @@ typedef enum zpl_attr { ZPL_SCANSTAMP, ZPL_DACL_ACES, ZPL_DXATTR, + ZPL_SECURITY_SELINUX, + ZPL_SECURITY_CAPABILITY, + ZPL_SYSTEM_POSIX_ACL_ACCESS, + ZPL_SYSTEM_POSIX_ACL_DEFAULT, ZPL_END } zpl_attr_t; @@ -137,6 +141,9 @@ void zfs_sa_get_scanstamp(struct znode *, xvattr_t *); void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *); int zfs_sa_get_xattr(struct znode *); int zfs_sa_set_xattr(struct znode *); +int zfs_sa_native_get_xattr(struct znode *, zpl_attr_t, void *, size_t); +int zfs_sa_native_set_xattr(struct znode *, zpl_attr_t, const void *, + size_t, dmu_tx_t *); void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *); void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *); void zfs_sa_init(void); diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index eeeffbe4c72c..c42645024619 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -82,7 +82,7 @@ typedef struct zfs_sb { boolean_t z_use_fuids; /* version allows fuids */ boolean_t z_replay; /* set during ZIL replay */ boolean_t z_use_sa; /* version allow system attributes */ - boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */ + boolean_t z_xattr_sa; /* allow xattrs to be stored as SA */ uint64_t z_version; /* ZPL version */ uint64_t z_shares_dir; /* hidden shares dir */ kmutex_t z_lock; diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index ebe92bb3a2ea..30e7544634d7 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -64,6 +64,10 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = { {"ZPL_SCANSTAMP", 32, SA_UINT8_ARRAY, 0}, {"ZPL_DACL_ACES", 0, SA_ACL, 0}, {"ZPL_DXATTR", 0, SA_UINT8_ARRAY, 0}, + {"ZPL_SECURITY_SELINUX", 0, SA_UINT8_ARRAY, 0}, + {"ZPL_SECURITY_CAPABILITY", 0, SA_UINT8_ARRAY, 0}, + {"ZPL_SYSTEM_POSIX_ACL_ACCESS", 0, SA_UINT8_ARRAY, 0}, + {"ZPL_SYSTEM_POSIX_ACL_DEFAULT", 0, SA_UINT8_ARRAY, 0}, {NULL, 0, 0, 0} }; @@ -248,8 +252,11 @@ zfs_sa_set_xattr(znode_t *zp) if (error) { dmu_tx_abort(tx); } else { - error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), - obj, size, tx); + if (nvlist_next_nvpair(zp->z_xattr_cached, NULL)) + error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), + obj, size, tx); + else + error = sa_remove(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), tx); if (error) dmu_tx_abort(tx); else @@ -261,6 +268,169 @@ zfs_sa_set_xattr(znode_t *zp) return (error); } +static void +zfs_sa_remove_xattr(znode_t *zp, zpl_attr_t attr, int dxsize, dmu_tx_t *tx) +{ + extern const char *zpl_native_xattr_to_name(zpl_attr_t); + zfs_sb_t *zsb = ZTOZSB(zp); + const char *name; + char *obj; + int error = 0; + + /* + * Look up the name of the native SA xattr. + */ + name = zpl_native_xattr_to_name(attr); + if (name == NULL) + return; + + /* + * Get the ZPL_DXATTR xattr if it's not cached. + */ + if (zp->z_xattr_cached == NULL) { + error = nvlist_alloc(&zp->z_xattr_cached, + NV_UNIQUE_NAME, KM_SLEEP); + if (error) + return; + obj = sa_spill_alloc(KM_SLEEP); + error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), + obj, dxsize); + if (!error) + error = nvlist_unpack(obj, dxsize, + &zp->z_xattr_cached, KM_SLEEP); + sa_spill_free(obj); + } + if (error || zp->z_xattr_cached == NULL) + return; + + /* + * Try to remove it. + */ + error = nvlist_remove(zp->z_xattr_cached, name, DATA_TYPE_BYTE_ARRAY); + if (error) + return; + + /* + * Remove the ZPL_DXATTR SA if it's empty, otherwise update it + * to remove this xattr. + */ + if (nvlist_next_nvpair(zp->z_xattr_cached, NULL)) { + char *obj; + size_t size; + + error = nvlist_size(zp->z_xattr_cached, &size, NV_ENCODE_XDR); + if (error) + return; + + obj = sa_spill_alloc(KM_SLEEP); + + error = nvlist_pack(zp->z_xattr_cached, &obj, &size, + NV_ENCODE_XDR, KM_SLEEP); + if (error) { + sa_spill_free(obj); + return; + } + + (void) sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), + obj, size, tx); + + sa_spill_free(obj); + } else { + (void) sa_remove(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), tx); + } +} + +int +zfs_sa_native_set_xattr(znode_t *zp, zpl_attr_t attr, + const void *value, size_t size, dmu_tx_t *tx) +{ + zfs_sb_t *zsb = ZTOZSB(zp); + int error, dxsize; + void *obj = NULL; + boolean_t havetx = B_FALSE; + + ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); + ASSERT(zp->z_is_sa); + + /* + * Temporary copy of otherwise read-only value to + * satisfy the non-const API of the lower-level SA + * functions. + */ + if (value) { + obj = sa_spill_alloc(KM_SLEEP); + bcopy(value, obj, size); + } + + if (tx) + havetx = B_TRUE; + else + tx = dmu_tx_create(zsb->z_os); + + dmu_tx_hold_sa_create(tx, size); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); + + if (!havetx) { + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + goto out; + } + } + + /* + * Remove an identical instance of this xattr from the ZPL_DXATTR SA. + */ + if (sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), &dxsize) == 0) + zfs_sa_remove_xattr(zp, attr, dxsize, tx); + + /* + * Remove or update the native SA xattr. + */ + if (value) + error = sa_update(zp->z_sa_hdl, zsb->z_attr_table[attr], + obj, size, tx); + else { + error = sa_remove(zp->z_sa_hdl, zsb->z_attr_table[attr], tx); + } + + if (!havetx) { + if (error) + dmu_tx_abort(tx); + else + dmu_tx_commit(tx); + } +out: + if (value) + sa_spill_free(obj); + return (error); +} + +int +zfs_sa_native_get_xattr(znode_t *zp, zpl_attr_t attr, void *value, size_t size) +{ + zfs_sb_t *zsb = ZTOZSB(zp); + int attr_size; + int error; + + ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); + ASSERT(zp->z_is_sa); + + error = sa_size(zp->z_sa_hdl, zsb->z_attr_table[attr], &attr_size); + if (error) + return (-error); + if (!size) + return (attr_size); + if (size < attr_size) + return (-ERANGE); + error = sa_lookup(zp->z_sa_hdl, zsb->z_attr_table[attr], + value, attr_size); + if (error) + return (-error); + return (attr_size); +} + + /* * I'm not convinced we should do any of this upgrade. * since the SA code can read both old/new znode formats @@ -416,6 +586,8 @@ EXPORT_SYMBOL(zfs_attr_table); EXPORT_SYMBOL(zfs_sa_readlink); EXPORT_SYMBOL(zfs_sa_symlink); EXPORT_SYMBOL(zfs_sa_get_scanstamp); +EXPORT_SYMBOL(zfs_sa_native_set_xattr); +EXPORT_SYMBOL(zfs_sa_native_get_xattr); EXPORT_SYMBOL(zfs_sa_set_scanstamp); EXPORT_SYMBOL(zfs_sa_get_xattr); EXPORT_SYMBOL(zfs_sa_set_xattr); diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c index 526c3f9e6172..6f02dd2e5a80 100644 --- a/module/zfs/zpl_xattr.c +++ b/module/zfs/zpl_xattr.c @@ -91,6 +91,58 @@ typedef struct xattr_filldir { struct inode *inode; } xattr_filldir_t; +/* + * xattrs to store as native SA with xattr=sa + */ +typedef struct zpl_xattr_sa { + char *name; /* name of xattr */ + zpl_attr_t attr; /* native SA in which to store the xattr */ +} zpl_xattr_sa_t; + +static zpl_xattr_sa_t sa_xattrs[] = { + /* security.selinux */ + { XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX, + ZPL_SECURITY_SELINUX }, + /* security.capability */ + { XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX, + ZPL_SECURITY_CAPABILITY }, +#ifdef CONFIG_FS_POSIX_ACL + /* system.posix_acl_access */ + { XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_ACCESS, + ZPL_SYSTEM_POSIX_ACL_ACCESS }, + /* system.posix_acl_default */ + { XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT, + ZPL_SYSTEM_POSIX_ACL_DEFAULT }, +#endif /* CONFIG_FS_POSIX_ACL */ + { NULL, 0 }, +}; + +static zpl_xattr_sa_t * +zpl_xattr_is_native_sa(const char *name) { + zpl_xattr_sa_t *zxs; + + if (name == NULL) + return (NULL); + + for (zxs = sa_xattrs; zxs->name; ++zxs) + if (strcmp(name, zxs->name) == 0) + return (zxs); + + return (NULL); +} + +const char * +zpl_native_xattr_to_name(zpl_attr_t attr) +{ + zpl_xattr_sa_t *zxs; + + for (zxs = sa_xattrs; zxs->name; ++zxs) + if (zxs->attr == attr) + return (zxs->name); + + return (NULL); +} + static int zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len) { @@ -179,12 +231,28 @@ zpl_xattr_list_sa(xattr_filldir_t *xf) znode_t *zp = ITOZ(xf->inode); nvpair_t *nvp = NULL; int error = 0; + zpl_xattr_sa_t *zxs; + boolean_t gotnative = B_FALSE; + + for (zxs = sa_xattrs; zxs->name; ++zxs) { + error = zfs_sa_native_get_xattr(zp, zxs->attr, NULL, 0); + if (error >= 0) + error = zpl_xattr_filldir(xf, zxs->name, + strlen(zxs->name)); + if (error == -ENOENT) + error = 0; + if (error) + return (error); + gotnative = B_TRUE; + } mutex_enter(&zp->z_lock); if (zp->z_xattr_cached == NULL) error = -zfs_sa_get_xattr(zp); mutex_exit(&zp->z_lock); + if (gotnative) + error = 0; if (error) return (error); @@ -195,6 +263,9 @@ zpl_xattr_list_sa(xattr_filldir_t *xf) error = zpl_xattr_filldir(xf, nvpair_name(nvp), strlen(nvpair_name(nvp))); + + if (gotnative) + error = 0; if (error) return (error); } @@ -280,9 +351,25 @@ zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size) uchar_t *nv_value; uint_t nv_size; int error = 0; + zpl_xattr_sa_t *zxs; ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); + /* + * Try for a native SA xattr first. + */ + zxs = zpl_xattr_is_native_sa(name); + + if (zxs != NULL) { + error = zfs_sa_native_get_xattr(zp, zxs->attr, value, size); + if (error != -ENOENT) + return (error); + } + error = 0; + + /* + * Next, try for a ZPL_DXATTR-style xattr. + */ mutex_enter(&zp->z_lock); if (zp->z_xattr_cached == NULL) error = -zfs_sa_get_xattr(zp); @@ -292,10 +379,10 @@ zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size) return (error); ASSERT(zp->z_xattr_cached); - error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name, + error = nvlist_lookup_byte_array(zp->z_xattr_cached, name, &nv_value, &nv_size); if (error) - return (error); + return (-error); if (!size) return (nv_size); @@ -438,39 +525,49 @@ zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value, znode_t *zp = ITOZ(ip); nvlist_t *nvl; size_t sa_size; + zpl_xattr_sa_t *zxs; int error; - ASSERT(zp->z_xattr_cached); - nvl = zp->z_xattr_cached; + zxs = zpl_xattr_is_native_sa(name); - if (value == NULL) { - error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); - if (error == -ENOENT) - error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr); + if (zxs != NULL) { + error = -zfs_sa_native_set_xattr(zp, zxs->attr, value, + size, NULL); + return (error); } else { - /* Limited to 32k to keep nvpair memory allocations small */ - if (size > DXATTR_MAX_ENTRY_SIZE) - return (-EFBIG); - - /* Prevent the DXATTR SA from consuming the entire SA region */ - error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); - if (error) - return (error); + ASSERT(zp->z_xattr_cached); + nvl = zp->z_xattr_cached; + + if (value == NULL) { + error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); + if (error == -ENOENT) + error = zpl_xattr_set_dir(ip, name, NULL, 0, + flags, cr); + } else { + /* Limited to 32k to keep nvpair allocations small */ + if (size > DXATTR_MAX_ENTRY_SIZE) + return (-EFBIG); + + /* Prevent DXATTR SA from consuming entire SA region */ + error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); + if (error) + return (error); - if (sa_size > DXATTR_MAX_SA_SIZE) - return (-EFBIG); + if (sa_size > DXATTR_MAX_SA_SIZE) + return (-EFBIG); - error = -nvlist_add_byte_array(nvl, name, - (uchar_t *)value, size); - if (error) - return (error); - } + error = -nvlist_add_byte_array(nvl, name, + (uchar_t *)value, size); + if (error) + return (error); + } - /* Update the SA for additions, modifications, and removals. */ - if (!error) - error = -zfs_sa_set_xattr(zp); + /* Update the SA for additions, modifications, and removals. */ + if (!error) + error = -zfs_sa_set_xattr(zp); - ASSERT3S(error, <=, 0); + ASSERT3S(error, <=, 0); + } return (error); }