Skip to content

Commit

Permalink
Store common Linux xattrs as native SA with xattr=sa openzfs#2809
Browse files Browse the repository at this point in the history
[dweeezil]

There are 2 commits here. The first, dweeezil/zfs@c53847e, fixes deletion of system attributes.

The second is a intended to be the foundation for fixing openzfs#2718 but it should otherwise be able to stand on its own.

I've given this patch stack fairly light manual testing as well as running the "pjd" posixacl test suite and some short ztest runs.

I'm submitting this now as a functional WIP to get feedback from other developers regarding the overall concept and its implementation. There are a handful of issues I'll point out about the code:

The list of xattrs to store as native SAs is stored statically in zpl_xattr.c which made it difficult to share with zdb. The same list (in a different format) exists within zdb.c as part of the new
dump_znode_native_sa_xattr() function.
The management of existing ZPL_DXATTR SAs required a helper function to translate from a zpl_attr_t to its name. As far as I could tell, there were no existing "upcalls" from the ZoL ZPL to the native
Solaris ZPL. Rather than trying to #include the ZPL definitions in zfs_sa.c, I simply added an appropriate extern declaration. This goes against the current coding style.
The ZPL_DXATTR SA cleanup performed by zfs_sa_remove_xattr adds overhead which could be avoided if it was known that no ZPL_DXATTRs exist. To that end, it's only invoked after determining whether a
ZPL_DXATTR exists (which should be a low-overhead operation).
I'd like to add a feature flag which is activated when a Native SA xattr is set for the first time; the feature flag would not be able to be deactivated. We might also want a feature flag to do the same
with the existing ZPL_DXATTR SAs.
I'd also like feedback on the first commit in this stack. I discovered the problem during testing. My concern is that all other OpenZFS implementations likely share the identical code. I think the
problem isn't seen often because SAs are rarely deleted. My test system created sort of a "perfect storm" by "stacking" all the variably-sized SAs at the ends of the registration.
  • Loading branch information
kernelOfTruth committed Dec 13, 2014
1 parent db0ccb3 commit a098c03
Show file tree
Hide file tree
Showing 6 changed files with 362 additions and 38 deletions.
55 changes: 54 additions & 1 deletion cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,7 @@ dump_znode_sa_xattr(sa_handle_t *hdl)
while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL)
sa_xattr_entries++;

(void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
(void) printf("\tSA xattrs: %d bytes, %d entries\n",
sa_xattr_size, sa_xattr_entries);
while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
uchar_t *value;
Expand All @@ -1604,6 +1604,55 @@ dump_znode_sa_xattr(sa_handle_t *hdl)
free(sa_xattr_packed);
}

static void
dump_znode_native_sa_xattr(sa_handle_t *hdl)
{
int error;
boolean_t didheader = B_FALSE;
int i;
int attr_size, idx;
uchar_t *attr_buf;
static struct {
char *name;
zpl_attr_t attr;
} sa_xattrs[] = {
{ "security.selinux", ZPL_SECURITY_SELINUX },
{ "security.capability", ZPL_SECURITY_CAPABILITY },
{ "system.posix_acl_access", ZPL_SYSTEM_POSIX_ACL_ACCESS },
{ "system.posix_acl_default", ZPL_SYSTEM_POSIX_ACL_DEFAULT },
{ NULL, 0 }
};

for (i = 0; sa_xattrs[i].name != NULL; ++i) {
error = sa_size(hdl, sa_attr_table[sa_xattrs[i].attr],
&attr_size);
if (error)
continue;
attr_buf = malloc(attr_size);
if (attr_buf == NULL)
continue;
error = sa_lookup(hdl, sa_attr_table[sa_xattrs[i].attr],
attr_buf, attr_size);
if (error) {
free(attr_buf);
continue;
}
if (!didheader) {
(void) printf("\tNative SA xattrs:\n");
didheader = B_TRUE;
}
(void) printf("\t\t%s = ", sa_xattrs[i].name);
for (idx = 0; idx < attr_size; ++idx) {
if (isprint(attr_buf[idx]))
(void) putchar(attr_buf[idx]);
else
(void) printf("\\%3.3o", attr_buf[idx]);
}
(void) putchar('\n');
free(attr_buf);
}
}

/*ARGSUSED*/
static void
dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
Expand All @@ -1618,6 +1667,7 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
sa_bulk_attr_t bulk[12];
int idx = 0;
int error;
sa_hdr_phys_t *sahdr = data;

if (!sa_loaded) {
uint64_t sa_attrs = 0;
Expand Down Expand Up @@ -1686,6 +1736,8 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
z_mtime = (time_t)modtm[0];
z_ctime = (time_t)chgtm[0];

(void) printf("\tSA hdrsize %d\n", SA_HDR_SIZE(sahdr));
(void) printf("\tSA layout %d\n", SA_HDR_LAYOUT_NUM(sahdr));
(void) printf("\tpath %s\n", path);
dump_uidgid(os, uid, gid);
(void) printf("\tatime %s", ctime(&z_atime));
Expand All @@ -1705,6 +1757,7 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
sizeof (uint64_t)) == 0)
(void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev);
dump_znode_sa_xattr(hdl);
dump_znode_native_sa_xattr(hdl);
sa_handle_destroy(hdl);
}

Expand Down
7 changes: 7 additions & 0 deletions include/sys/zfs_sa.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ typedef enum zpl_attr {
ZPL_SCANSTAMP,
ZPL_DACL_ACES,
ZPL_DXATTR,
ZPL_SECURITY_SELINUX,
ZPL_SECURITY_CAPABILITY,
ZPL_SYSTEM_POSIX_ACL_ACCESS,
ZPL_SYSTEM_POSIX_ACL_DEFAULT,
ZPL_END
} zpl_attr_t;

Expand Down Expand Up @@ -137,6 +141,9 @@ void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
int zfs_sa_get_xattr(struct znode *);
int zfs_sa_set_xattr(struct znode *);
int zfs_sa_native_get_xattr(struct znode *, zpl_attr_t, void *, size_t *);
int zfs_sa_native_set_xattr(struct znode *, zpl_attr_t, const char *,
const void *, size_t, dmu_tx_t *);
void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *);
void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
void zfs_sa_init(void);
Expand Down
2 changes: 1 addition & 1 deletion include/sys/zfs_vfsops.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ typedef struct zfs_sb {
boolean_t z_use_fuids; /* version allows fuids */
boolean_t z_replay; /* set during ZIL replay */
boolean_t z_use_sa; /* version allow system attributes */
boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */
boolean_t z_xattr_sa; /* allow xattrs to be stored as SA */
uint64_t z_version; /* ZPL version */
uint64_t z_shares_dir; /* hidden shares dir */
kmutex_t z_lock;
Expand Down
4 changes: 1 addition & 3 deletions module/zfs/sa.c
Original file line number Diff line number Diff line change
Expand Up @@ -1750,10 +1750,8 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
if (attr == newattr) {
if (length == 0)
++length_idx;
if (action == SA_REMOVE) {
j++;
if (action == SA_REMOVE)
continue;
}
ASSERT(length == 0);
ASSERT(action == SA_REPLACE);
SA_ADD_BULK_ATTR(attr_desc, j, attr,
Expand Down
171 changes: 169 additions & 2 deletions module/zfs/zfs_sa.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
{"ZPL_SCANSTAMP", 32, SA_UINT8_ARRAY, 0},
{"ZPL_DACL_ACES", 0, SA_ACL, 0},
{"ZPL_DXATTR", 0, SA_UINT8_ARRAY, 0},
{"ZPL_SECURITY_SELINUX", 0, SA_UINT8_ARRAY, 0},
{"ZPL_SECURITY_CAPABILITY", 0, SA_UINT8_ARRAY, 0},
{"ZPL_SYSTEM_POSIX_ACL_ACCESS", 0, SA_UINT8_ARRAY, 0},
{"ZPL_SYSTEM_POSIX_ACL_DEFAULT", 0, SA_UINT8_ARRAY, 0},
{NULL, 0, 0, 0}
};

Expand Down Expand Up @@ -248,8 +252,11 @@ zfs_sa_set_xattr(znode_t *zp)
if (error) {
dmu_tx_abort(tx);
} else {
error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb),
obj, size, tx);
if (nvlist_next_nvpair(zp->z_xattr_cached, NULL))
error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb),
obj, size, tx);
else
error = sa_remove(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), tx);
if (error)
dmu_tx_abort(tx);
else
Expand All @@ -261,6 +268,164 @@ zfs_sa_set_xattr(znode_t *zp)
return (error);
}

static void
zfs_sa_remove_xattr(znode_t *zp, const char *attrname,
int dxsize, dmu_tx_t *tx)
{
zfs_sb_t *zsb = ZTOZSB(zp);
char *obj;
int error = 0;

/*
* Get the ZPL_DXATTR xattr if it's not cached.
*/
if (zp->z_xattr_cached == NULL) {
error = nvlist_alloc(&zp->z_xattr_cached,
NV_UNIQUE_NAME, KM_SLEEP);
if (error)
return;
obj = sa_spill_alloc(KM_SLEEP);
error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb),
obj, dxsize);
if (!error)
error = nvlist_unpack(obj, dxsize,
&zp->z_xattr_cached, KM_SLEEP);
sa_spill_free(obj);
}
if (error || zp->z_xattr_cached == NULL)
return;

/*
* Try to remove it.
*/
error = nvlist_remove(zp->z_xattr_cached, attrname, DATA_TYPE_BYTE_ARRAY);
if (error)
return;

/*
* If the ZPL_DXATTR SA is not empty, update it to remove the xattr.
* If it is empty, remove the ZPL_DXATTR SA.
*/
if (nvlist_next_nvpair(zp->z_xattr_cached, NULL)) {
char *obj;
size_t size;

error = nvlist_size(zp->z_xattr_cached, &size, NV_ENCODE_XDR);
if (error)
return;

obj = sa_spill_alloc(KM_SLEEP);

error = nvlist_pack(zp->z_xattr_cached, &obj, &size,
NV_ENCODE_XDR, KM_SLEEP);
if (error) {
sa_spill_free(obj);
return;
}

(void) sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb),
obj, size, tx);

sa_spill_free(obj);
} else {
(void) sa_remove(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), tx);
}
}

int
zfs_sa_native_set_xattr(znode_t *zp, zpl_attr_t attr,
const char *attrname, const void *value, size_t size,
dmu_tx_t *tx)
{
zfs_sb_t *zsb = ZTOZSB(zp);
int error, dxsize;
void *obj = NULL;
boolean_t havetx = B_FALSE;

ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
ASSERT(zp->z_is_sa);

/*
* Temporary copy of otherwise read-only value to
* satisfy the non-const API of the lower-level SA
* functions.
*/
if (value) {
obj = sa_spill_alloc(KM_SLEEP);
bcopy(value, obj, size);
}

if (tx)
havetx = B_TRUE;
else
tx = dmu_tx_create(zsb->z_os);

dmu_tx_hold_sa_create(tx, size);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);

if (!havetx) {
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
goto out;
}
}

/*
* Remove an identical instance of this xattr from the ZPL_DXATTR SA.
*/
if (sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), &dxsize) == 0)
zfs_sa_remove_xattr(zp, attrname, dxsize, tx);

/*
* Remove or update the native SA xattr.
*/
if (value)
error = sa_update(zp->z_sa_hdl, zsb->z_attr_table[attr],
obj, size, tx);
else {
error = sa_remove(zp->z_sa_hdl, zsb->z_attr_table[attr], tx);
}

if (!havetx) {
if (error)
dmu_tx_abort(tx);
else
dmu_tx_commit(tx);
}
out:
if (value)
sa_spill_free(obj);
return (error);
}

int
zfs_sa_native_get_xattr(znode_t *zp, zpl_attr_t attr, void *value, size_t *size)
{
zfs_sb_t *zsb = ZTOZSB(zp);
size_t bufsize = *size;
int attr_size;
int error;

ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
ASSERT(zp->z_is_sa);

error = sa_size(zp->z_sa_hdl, zsb->z_attr_table[attr], &attr_size);
if (error)
return (error);
*size = attr_size;
if (bufsize == 0 || value == NULL)
return (0);
if (bufsize < attr_size)
return (ERANGE);
error = sa_lookup(zp->z_sa_hdl, zsb->z_attr_table[attr],
value, attr_size);
if (error)
return (error);
return (0);
}


/*
* I'm not convinced we should do any of this upgrade.
* since the SA code can read both old/new znode formats
Expand Down Expand Up @@ -416,6 +581,8 @@ EXPORT_SYMBOL(zfs_attr_table);
EXPORT_SYMBOL(zfs_sa_readlink);
EXPORT_SYMBOL(zfs_sa_symlink);
EXPORT_SYMBOL(zfs_sa_get_scanstamp);
EXPORT_SYMBOL(zfs_sa_native_set_xattr);
EXPORT_SYMBOL(zfs_sa_native_get_xattr);
EXPORT_SYMBOL(zfs_sa_set_scanstamp);
EXPORT_SYMBOL(zfs_sa_get_xattr);
EXPORT_SYMBOL(zfs_sa_set_xattr);
Expand Down
Loading

0 comments on commit a098c03

Please sign in to comment.