Skip to content

Commit

Permalink
Support for longnames for files/directories (Linux part)
Browse files Browse the repository at this point in the history
This patch adds the ability for zfs to support file/dir name up to 1023
bytes. This number is chosen so we can support up to 255 4-byte
characters. This new feature is represented by the new feature flag
feature@longname.

A new dataset property "longname" is also introduced to toggle longname
support for each dataset individually. This property can be disabled,
even if it contains longname files. In such case, new file cannot be
created with longname but existing longname files can still be looked
up.

Note that, to my knowledge native Linux filesystems don't support name
longer than 255 bytes. So there might be programs not able to work with
longname.

Note that NFS server may needs to use exportfs_get_name to reconnect
dentries, and the buffer being passed is limit to NAME_MAX+1 (256). So
NFS may not work when longname is enabled.

Signed-off-by: Chunwei Chen <[email protected]>
  • Loading branch information
sanjeevbagewadinutanix authored and davidchenntnx committed Sep 30, 2024
1 parent d346792 commit ed82dc5
Show file tree
Hide file tree
Showing 35 changed files with 1,164 additions and 402 deletions.
4 changes: 2 additions & 2 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1117,7 +1117,7 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
{
(void) data, (void) size;
zap_cursor_t zc;
zap_attribute_t *attrp = zap_attribute_alloc();
zap_attribute_t *attrp = zap_attribute_long_alloc();
void *prop;
unsigned i;

Expand Down Expand Up @@ -1365,7 +1365,7 @@ dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
{
(void) data, (void) size;
zap_cursor_t zc;
zap_attribute_t *attrp = zap_attribute_alloc();
zap_attribute_t *attrp = zap_attribute_long_alloc();
const char *typenames[] = {
/* 0 */ "not specified",
/* 1 */ "FIFO",
Expand Down
2 changes: 1 addition & 1 deletion cmd/zhack.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ static void
dump_obj(objset_t *os, uint64_t obj, const char *name)
{
zap_cursor_t zc;
zap_attribute_t *za = zap_attribute_alloc();
zap_attribute_t *za = zap_attribute_long_alloc();

(void) printf("%s_obj:\n", name);

Expand Down
1 change: 1 addition & 0 deletions include/os/linux/zfs/sys/zfs_vfsops_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ struct zfsvfs {
boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */
boolean_t z_draining; /* is true when drain is active */
boolean_t z_drain_cancel; /* signal the unlinked drain to stop */
boolean_t z_longname; /* Dataset supports long names */
uint64_t z_version; /* ZPL version */
uint64_t z_shares_dir; /* hidden shares dir */
dataset_kstats_t z_kstat; /* fs kstats */
Expand Down
1 change: 1 addition & 0 deletions include/os/linux/zfs/sys/zfs_vnops_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ extern int zfs_write_simple(znode_t *zp, const void *data, size_t len,
loff_t pos, size_t *resid);
extern int zfs_lookup(znode_t *dzp, char *nm, znode_t **zpp, int flags,
cred_t *cr, int *direntflags, pathname_t *realpnp);
extern int zfs_get_name(znode_t *dzp, char *name, znode_t *zp);
extern int zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp,
zidmap_t *mnt_ns);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ typedef enum dmu_objset_type {
* All of these include the terminating NUL byte.
*/
#define ZAP_MAXNAMELEN 256
#define ZAP_MAXNAMELEN_NEW 1024
#define ZAP_MAXVALUELEN (1024 * 8)
#define ZAP_OLDMAXVALUELEN 1024
#define ZFS_MAX_DATASET_NAME_LEN 256
Expand Down Expand Up @@ -194,6 +195,7 @@ typedef enum {
ZFS_PROP_PREFETCH,
ZFS_PROP_VOLTHREADING,
ZFS_PROP_DIRECT,
ZFS_PROP_LONGNAME,
ZFS_NUM_PROPS
} zfs_prop_t;

Expand Down
3 changes: 2 additions & 1 deletion include/sys/zap.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
* match must be exact (ie, same as mask=-1ULL).
*/
int zap_value_search(objset_t *os, uint64_t zapobj,
uint64_t value, uint64_t mask, char *name);
uint64_t value, uint64_t mask, char *name, uint64_t namelen);

/*
* Transfer all the entries from fromobj into intoobj. Only works on
Expand Down Expand Up @@ -387,6 +387,7 @@ void zap_fini(void);
* Alloc and free zap_attribute_t.
*/
zap_attribute_t *zap_attribute_alloc(void);
zap_attribute_t *zap_attribute_long_alloc(void);
void zap_attribute_free(zap_attribute_t *attrp);

/*
Expand Down
4 changes: 2 additions & 2 deletions include/sys/zfs_ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ typedef enum drr_headertype {
* default use of "zfs send" won't encounter the bug mentioned above.
*/
#define DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
/* flag #28 is reserved for a Nutanix feature */
#define DMU_BACKUP_FEATURE_LONGNAME (1 << 28)
/*
* flag #29 is the last unused bit. It is reserved to indicate a to-be-designed
* extension to the stream format which will accomodate more feature flags.
Expand All @@ -141,7 +141,7 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS | \
DMU_BACKUP_FEATURE_ZSTD)
DMU_BACKUP_FEATURE_ZSTD | DMU_BACKUP_FEATURE_LONGNAME)

/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ typedef enum spa_feature {
SPA_FEATURE_REDACTION_LIST_SPILL,
SPA_FEATURE_RAIDZ_EXPANSION,
SPA_FEATURE_FAST_DEDUP,
SPA_FEATURE_LONGNAME,
SPA_FEATURES
} spa_feature_t;

Expand Down
653 changes: 297 additions & 356 deletions lib/libzfs/libzfs.abi

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion module/os/freebsd/zfs/zfs_znode_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -1814,7 +1814,7 @@ zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf)
return (SET_ERROR(EINVAL));

err = zap_value_search(zfsvfs->z_os, parent, zp->z_id,
ZFS_DIRENT_OBJ(-1ULL), buf);
ZFS_DIRENT_OBJ(-1ULL), buf, MAXNAMELEN);
if (err != 0)
return (err);
err = zfs_zget(zfsvfs, parent, dzpp);
Expand Down
9 changes: 9 additions & 0 deletions module/os/linux/zfs/zfs_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,7 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
{
znode_t *dzp = dl->dl_dzp;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
uint64_t value;
int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
sa_bulk_attr_t bulk[5];
Expand Down Expand Up @@ -847,6 +848,14 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
return (error);
}

/*
* If we added a longname activate the SPA_FEATURE_LONGNAME.
*/
if (strlen(dl->dl_name) >= ZAP_MAXNAMELEN) {
ds->ds_feature_activation[SPA_FEATURE_LONGNAME] =
(void *)B_TRUE;
}

SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
&dzp->z_id, sizeof (dzp->z_id));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
Expand Down
12 changes: 11 additions & 1 deletion module/os/linux/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include <sys/dmu_objset.h>
#include <sys/dsl_dir.h>
#include <sys/objlist.h>
#include <sys/zfeature.h>
#include <sys/zpl.h>
#include <linux/vfs_compat.h>
#include <linux/fs.h>
Expand Down Expand Up @@ -449,6 +450,12 @@ acl_inherit_changed_cb(void *arg, uint64_t newval)
((zfsvfs_t *)arg)->z_acl_inherit = newval;
}

static void
longname_changed_cb(void *arg, uint64_t newval)
{
((zfsvfs_t *)arg)->z_longname = newval;
}

static int
zfs_register_callbacks(vfs_t *vfsp)
{
Expand Down Expand Up @@ -509,6 +516,8 @@ zfs_register_callbacks(vfs_t *vfsp)
zfsvfs);
error = error ? error : dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_LONGNAME), longname_changed_cb, zfsvfs);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (error)
goto unregister;
Expand Down Expand Up @@ -1140,7 +1149,8 @@ zfs_statvfs(struct inode *ip, struct kstatfs *statp)
statp->f_fsid.val[0] = (uint32_t)fsid;
statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
statp->f_type = ZFS_SUPER_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
statp->f_namelen =
zfsvfs->z_longname ? (ZAP_MAXNAMELEN_NEW - 1) : (MAXNAMELEN - 1);

/*
* We have all of 40 characters to stuff a string here.
Expand Down
42 changes: 41 additions & 1 deletion module/os/linux/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,46 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
return (error);
}

/*
* Perform a linear search in directory for the name of specific inode.
* Note we don't pass in the buffer size of name because it's hardcoded to
* NAME_MAX+1(256) in Linux.
*
* IN: dzp - znode of directory to search.
* zp - znode of the target
*
* OUT: name - dentry name of the target
*
* RETURN: 0 on success, error code on failure.
*/
int
zfs_get_name(znode_t *dzp, char *name, znode_t *zp)
{
zfsvfs_t *zfsvfs = ZTOZSB(dzp);
int error = 0;

if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
return (error);

if ((error = zfs_verify_zp(zp)) != 0) {
zfs_exit(zfsvfs, FTAG);
return (error);
}

/* ctldir should have got their name in zfs_vget */
if (dzp->z_is_ctldir || zp->z_is_ctldir) {
zfs_exit(zfsvfs, FTAG);
return (ENOENT);
}

/* buffer len is hardcoded to 256 in Linux kernel */
error = zap_value_search(zfsvfs->z_os, dzp->z_id, zp->z_id,
ZFS_DIRENT_OBJ(-1ULL), name, ZAP_MAXNAMELEN);

zfs_exit(zfsvfs, FTAG);
return (error);
}

/*
* Attempt to create a new entry in a directory. If the entry
* already exists, truncate the file if permissible, else return
Expand Down Expand Up @@ -1539,7 +1579,7 @@ zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr)
os = zfsvfs->z_os;
offset = ctx->pos;
prefetch = zp->z_zn_prefetch;
zap = zap_attribute_alloc();
zap = zap_attribute_long_alloc();

/*
* Initialize the iterator cursor.
Expand Down
31 changes: 31 additions & 0 deletions module/os/linux/zfs/zpl_export.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
*/


#include <sys/file.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_vnops.h>
#include <sys/zfs_ctldir.h>
Expand Down Expand Up @@ -102,6 +103,35 @@ zpl_fh_to_dentry(struct super_block *sb, struct fid *fh,
return (d_obtain_alias(ip));
}

/*
* In case the filesystem contains name longer than 255, we need to override
* the default get_name so we don't get buffer overflow. Unfortunately, since
* the buffer size is hardcoded in Linux, we will get ESTALE error in this
* case.
*/
static int
zpl_get_name(struct dentry *parent, char *name, struct dentry *child)
{
cred_t *cr = CRED();
fstrans_cookie_t cookie;
struct inode *dir = parent->d_inode;
struct inode *ip = child->d_inode;
int error;

if (!dir || !S_ISDIR(dir->i_mode))
return (-ENOTDIR);

crhold(cr);
cookie = spl_fstrans_mark();
spl_inode_lock_shared(dir);
error = -zfs_get_name(ITOZ(dir), name, ITOZ(ip));
spl_inode_unlock_shared(dir);
spl_fstrans_unmark(cookie);
crfree(cr);

return (error);
}

static struct dentry *
zpl_get_parent(struct dentry *child)
{
Expand Down Expand Up @@ -146,6 +176,7 @@ zpl_commit_metadata(struct inode *inode)
const struct export_operations zpl_export_operations = {
.encode_fh = zpl_encode_fh,
.fh_to_dentry = zpl_fh_to_dentry,
.get_name = zpl_get_name,
.get_parent = zpl_get_parent,
.commit_metadata = zpl_commit_metadata,
};
56 changes: 55 additions & 1 deletion module/os/linux/zfs/zpl_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,29 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
pathname_t pn;
int zfs_flags = 0;
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
size_t dlen = dlen(dentry);

if (dlen(dentry) >= ZAP_MAXNAMELEN)
/*
* If z_longname is disabled, disallow create or rename of names
* longer than ZAP_MAXNAMELEN.
*
* This is needed in cases where longname was enabled first and some
* files/dirs with names > ZAP_MAXNAMELEN were created. And later
* longname was disabled. In such a case allow access to existing
* longnames. But disallow creation newer longnamed entities.
*/
if (!zfsvfs->z_longname && (dlen >= ZAP_MAXNAMELEN)) {
/*
* If this is for create or rename fail it.
*/
if (!dsl_dataset_feature_is_active(ds, SPA_FEATURE_LONGNAME) ||
(flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)))
return (ERR_PTR(-ENAMETOOLONG));
}
if (dlen >= ZAP_MAXNAMELEN_NEW) {
return (ERR_PTR(-ENAMETOOLONG));
}

crhold(cr);
cookie = spl_fstrans_mark();
Expand Down Expand Up @@ -131,6 +151,16 @@ zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr,
}
}

static inline bool
is_nametoolong(struct dentry *dentry)
{
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
size_t dlen = dlen(dentry);

return ((!zfsvfs->z_longname && dlen >= ZAP_MAXNAMELEN) ||
dlen >= ZAP_MAXNAMELEN_NEW);
}

static int
#ifdef HAVE_IOPS_CREATE_USERNS
zpl_create(struct user_namespace *user_ns, struct inode *dir,
Expand All @@ -151,6 +181,10 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
zidmap_t *user_ns = kcred->user_ns;
#endif

if (is_nametoolong(dentry)) {
return (-ENAMETOOLONG);
}

crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
zpl_vap_init(vap, dir, mode, cr, user_ns);
Expand Down Expand Up @@ -201,6 +235,10 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
zidmap_t *user_ns = kcred->user_ns;
#endif

if (is_nametoolong(dentry)) {
return (-ENAMETOOLONG);
}

/*
* We currently expect Linux to supply rdev=0 for all sockets
* and fifos, but we want to know if this behavior ever changes.
Expand Down Expand Up @@ -353,6 +391,10 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
zidmap_t *user_ns = kcred->user_ns;
#endif

if (is_nametoolong(dentry)) {
return (-ENAMETOOLONG);
}

crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
zpl_vap_init(vap, dir, mode | S_IFDIR, cr, user_ns);
Expand Down Expand Up @@ -568,6 +610,10 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
zidmap_t *user_ns = kcred->user_ns;
#endif

if (is_nametoolong(tdentry)) {
return (-ENAMETOOLONG);
}

crhold(cr);
if (rflags & RENAME_WHITEOUT) {
wo_vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
Expand Down Expand Up @@ -618,6 +664,10 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
zidmap_t *user_ns = kcred->user_ns;
#endif

if (is_nametoolong(dentry)) {
return (-ENAMETOOLONG);
}

crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr, user_ns);
Expand Down Expand Up @@ -707,6 +757,10 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
int error;
fstrans_cookie_t cookie;

if (is_nametoolong(dentry)) {
return (-ENAMETOOLONG);
}

if (ip->i_nlink >= ZFS_LINK_MAX)
return (-EMLINK);

Expand Down
Loading

0 comments on commit ed82dc5

Please sign in to comment.