Skip to content

Commit

Permalink
Add support for user/group dnode accounting & quota
Browse files Browse the repository at this point in the history
This patch tracks dnode usage for each user/group in the
DMU_USER/GROUPUSED_OBJECT ZAPs. ZAP entries dedicated to dnode
accounting have the key prefixed with "obj-" followed by the UID/GID
in string format (as done for the block accounting).
A new SPA feature has been added for dnode accounting as well as
a new ZPL version. The SPA feature must be enabled in the pool
before upgrading the zfs filesystem. During the zfs version upgrade,
a "quotacheck" will be executed by marking all dnode as dirty.

ZoL-bug-id: #3500

Signed-off-by: Jinshan Xiong <[email protected]>
Signed-off-by: Johann Lombardi <[email protected]>
  • Loading branch information
Jinshan Xiong authored and behlendorf committed Oct 7, 2016
1 parent af322de commit 1de321e
Show file tree
Hide file tree
Showing 43 changed files with 1,119 additions and 98 deletions.
4 changes: 3 additions & 1 deletion cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1946,11 +1946,13 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}

if (verbosity >= 4) {
(void) printf("\tdnode flags: %s%s%s\n",
(void) printf("\tdnode flags: %s%s%s%s\n",
(dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
"USED_BYTES " : "",
(dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
"USERUSED_ACCOUNTED " : "",
(dn->dn_phys->dn_flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) ?
"USEROBJUSED_ACCOUNTED " : "",
(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
"SPILL_BLKPTR" : "");
(void) printf("\tdnode maxblkid: %llu\n",
Expand Down
87 changes: 75 additions & 12 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2223,10 +2223,14 @@ enum us_field_types {
USFIELD_TYPE,
USFIELD_NAME,
USFIELD_USED,
USFIELD_QUOTA
USFIELD_QUOTA,
USFIELD_OBJUSED,
USFIELD_OBJQUOTA
};
static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" };
static char *us_field_names[] = { "type", "name", "used", "quota" };
static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA",
"OBJUSED", "OBJQUOTA" };
static char *us_field_names[] = { "type", "name", "used", "quota",
"objused", "objquota" };
#define USFIELD_LAST (sizeof (us_field_names) / sizeof (char *))

#define USTYPE_PSX_GRP (1 << 0)
Expand Down Expand Up @@ -2374,6 +2378,20 @@ us_compare(const void *larg, const void *rarg, void *unused)
return (0);
}

static boolean_t
zfs_prop_is_user(unsigned p)
{
return (p == ZFS_PROP_USERUSED || p == ZFS_PROP_USERQUOTA ||
p == ZFS_PROP_USEROBJUSED || p == ZFS_PROP_USEROBJQUOTA);
}

static boolean_t
zfs_prop_is_group(unsigned p)
{
return (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA ||
p == ZFS_PROP_GROUPOBJUSED || p == ZFS_PROP_GROUPOBJQUOTA);
}

static inline const char *
us_type2str(unsigned field_type)
{
Expand Down Expand Up @@ -2463,7 +2481,7 @@ userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)

if (cb->cb_sid2posix || domain == NULL || domain[0] == '\0') {
/* POSIX or -i */
if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
if (zfs_prop_is_group(prop)) {
type = USTYPE_PSX_GRP;
if (!cb->cb_numname) {
struct group *g;
Expand Down Expand Up @@ -2538,10 +2556,22 @@ userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
propname = "used";
if (!nvlist_exists(props, "quota"))
(void) nvlist_add_uint64(props, "quota", 0);
} else {
} else if (prop == ZFS_PROP_USERQUOTA || prop == ZFS_PROP_GROUPQUOTA) {
propname = "quota";
if (!nvlist_exists(props, "used"))
(void) nvlist_add_uint64(props, "used", 0);
} else if (prop == ZFS_PROP_USEROBJUSED ||
prop == ZFS_PROP_GROUPOBJUSED) {
propname = "objused";
if (!nvlist_exists(props, "objquota"))
(void) nvlist_add_uint64(props, "objquota", 0);
} else if (prop == ZFS_PROP_USEROBJQUOTA ||
prop == ZFS_PROP_GROUPOBJQUOTA) {
propname = "objquota";
if (!nvlist_exists(props, "objused"))
(void) nvlist_add_uint64(props, "objused", 0);
} else {
return (-1);
}
sizeidx = us_field_index(propname);
if (sizelen > cb->cb_width[sizeidx])
Expand Down Expand Up @@ -2574,15 +2604,15 @@ print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types,
data_type_t type;
uint32_t val32;
uint64_t val64;
char *strval = NULL;
char *strval = "-";

while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
if (strcmp(nvpair_name(nvp),
us_field_names[field]) == 0)
break;
}

type = nvpair_type(nvp);
type = nvp == NULL ? DATA_TYPE_UNKNOWN : nvpair_type(nvp);
switch (type) {
case DATA_TYPE_UINT32:
(void) nvpair_value_uint32(nvp, &val32);
Expand All @@ -2593,13 +2623,16 @@ print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types,
case DATA_TYPE_STRING:
(void) nvpair_value_string(nvp, &strval);
break;
case DATA_TYPE_UNKNOWN:
break;
default:
(void) fprintf(stderr, "invalid data type\n");
}

switch (field) {
case USFIELD_TYPE:
strval = (char *)us_type2str(val32);
if (type == DATA_TYPE_UINT32)
strval = (char *)us_type2str(val32);
break;
case USFIELD_NAME:
if (type == DATA_TYPE_UINT64) {
Expand All @@ -2610,6 +2643,8 @@ print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types,
break;
case USFIELD_USED:
case USFIELD_QUOTA:
case USFIELD_OBJUSED:
case USFIELD_OBJQUOTA:
if (type == DATA_TYPE_UINT64) {
if (parsable) {
(void) sprintf(valstr, "%llu",
Expand All @@ -2618,7 +2653,8 @@ print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types,
zfs_nicenum(val64, valstr,
sizeof (valstr));
}
if (field == USFIELD_QUOTA &&
if ((field == USFIELD_QUOTA ||
field == USFIELD_OBJQUOTA) &&
strcmp(valstr, "0") == 0)
strval = "none";
else
Expand Down Expand Up @@ -2690,7 +2726,7 @@ zfs_do_userspace(int argc, char **argv)
uu_avl_t *avl_tree;
uu_avl_walk_t *walk;
char *delim;
char deffields[] = "type,name,used,quota";
char deffields[] = "type,name,used,quota,objused,objquota";
char *ofield = NULL;
char *tfield = NULL;
int cfield = 0;
Expand Down Expand Up @@ -2839,11 +2875,12 @@ zfs_do_userspace(int argc, char **argv)
cb.cb_width[i] = strlen(gettext(us_field_hdr[i]));

for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
if (((p == ZFS_PROP_USERUSED || p == ZFS_PROP_USERQUOTA) &&
if ((zfs_prop_is_user(p) &&
!(types & (USTYPE_PSX_USR | USTYPE_SMB_USR))) ||
((p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) &&
(zfs_prop_is_group(p) &&
!(types & (USTYPE_PSX_GRP | USTYPE_SMB_GRP))))
continue;

cb.cb_prop = p;
if ((ret = zfs_userspace(zhp, p, userspace_cb, &cb)) != 0)
return (ret);
Expand Down Expand Up @@ -4099,6 +4136,11 @@ zfs_do_receive(int argc, char **argv)
#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota"
#define ZFS_DELEG_PERM_USERUSED "userused"
#define ZFS_DELEG_PERM_GROUPUSED "groupused"
#define ZFS_DELEG_PERM_USEROBJQUOTA "userobjquota"
#define ZFS_DELEG_PERM_GROUPOBJQUOTA "groupobjquota"
#define ZFS_DELEG_PERM_USEROBJUSED "userobjused"
#define ZFS_DELEG_PERM_GROUPOBJUSED "groupobjused"

#define ZFS_DELEG_PERM_HOLD "hold"
#define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff"
Expand Down Expand Up @@ -4129,6 +4171,10 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
{ ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
{ ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
{ ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
{ ZFS_DELEG_PERM_USEROBJQUOTA, ZFS_DELEG_NOTE_USEROBJQUOTA },
{ ZFS_DELEG_PERM_USEROBJUSED, ZFS_DELEG_NOTE_USEROBJUSED },
{ ZFS_DELEG_PERM_GROUPOBJQUOTA, ZFS_DELEG_NOTE_GROUPOBJQUOTA },
{ ZFS_DELEG_PERM_GROUPOBJUSED, ZFS_DELEG_NOTE_GROUPOBJUSED },
{ NULL, ZFS_DELEG_NOTE_NONE }
};

Expand Down Expand Up @@ -4206,6 +4252,10 @@ deleg_perm_type(zfs_deleg_note_t note)
case ZFS_DELEG_NOTE_USERPROP:
case ZFS_DELEG_NOTE_USERQUOTA:
case ZFS_DELEG_NOTE_USERUSED:
case ZFS_DELEG_NOTE_USEROBJQUOTA:
case ZFS_DELEG_NOTE_USEROBJUSED:
case ZFS_DELEG_NOTE_GROUPOBJQUOTA:
case ZFS_DELEG_NOTE_GROUPOBJUSED:
/* other */
return (gettext("other"));
default:
Expand Down Expand Up @@ -4709,6 +4759,19 @@ deleg_perm_comment(zfs_deleg_note_t note)
case ZFS_DELEG_NOTE_USERUSED:
str = gettext("Allows reading any userused@... property");
break;
case ZFS_DELEG_NOTE_USEROBJQUOTA:
str = gettext("Allows accessing any userobjquota@... property");
break;
case ZFS_DELEG_NOTE_GROUPOBJQUOTA:
str = gettext("Allows accessing any \n\t\t\t\t"
"groupobjquota@... property");
break;
case ZFS_DELEG_NOTE_GROUPOBJUSED:
str = gettext("Allows reading any groupobjused@... property");
break;
case ZFS_DELEG_NOTE_USEROBJUSED:
str = gettext("Allows reading any userobjused@... property");
break;
/* other */
default:
str = "";
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/threadsappend/Makefile
tests/zfs-tests/tests/functional/truncate/Makefile
tests/zfs-tests/tests/functional/userquota/Makefile
tests/zfs-tests/tests/functional/upgrade/Makefile
tests/zfs-tests/tests/functional/vdev_zaps/Makefile
tests/zfs-tests/tests/functional/write_dirs/Makefile
tests/zfs-tests/tests/functional/xattr/Makefile
Expand Down
6 changes: 6 additions & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,12 @@ void zfs_znode_byteswap(void *buf, size_t size);
#define DMU_USERUSED_OBJECT (-1ULL)
#define DMU_GROUPUSED_OBJECT (-2ULL)

/*
* Zap prefix for object accounting in DMU_{USER,GROUP}USED_OBJECT.
*/
#define DMU_OBJACCT_PREFIX "obj-"
#define DMU_OBJACCT_PREFIX_LEN 4

/*
* artificial blkids for bonus buffer and spill blocks
*/
Expand Down
21 changes: 21 additions & 0 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ struct dmu_tx;
(arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE)

#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
#define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1)

typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
Expand All @@ -68,6 +69,8 @@ typedef struct objset_phys {
dnode_phys_t os_groupused_dnode;
} objset_phys_t;

typedef int (*dmu_objset_upgrade_cb_t)(objset_t *);

struct objset {
/* Immutable: */
struct dsl_dataset *os_dsl_dataset;
Expand Down Expand Up @@ -125,6 +128,13 @@ struct objset {
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
sa_os_t *os_sa;

/* kernel thread to upgrade this dataset */
kmutex_t os_upgrade_lock;
taskqid_t os_upgrade_id;
dmu_objset_upgrade_cb_t os_upgrade_cb;
boolean_t os_upgrade_exit;
int os_upgrade_status;
};

#define DMU_META_OBJSET 0
Expand Down Expand Up @@ -173,6 +183,17 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_t *os);
int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
boolean_t dmu_objset_userobjused_enabled(objset_t *os);
void dmu_objset_userobjspace_upgrade(objset_t *os);
boolean_t dmu_objset_userobjspace_present(objset_t *os);

static inline boolean_t dmu_objset_userobjspace_upgradable(objset_t *os)
{
return (dmu_objset_type(os) == DMU_OST_ZFS &&
dmu_objset_userobjused_enabled(os) &&
!dmu_objset_userobjspace_present(os));
}

int dmu_fsname(const char *snapname, char *buf);

void dmu_objset_evict_done(objset_t *os);
Expand Down
9 changes: 6 additions & 3 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,14 @@ enum dnode_dirtycontext {
};

/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
#define DNODE_FLAG_USED_BYTES (1<<0)
#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1)
#define DNODE_FLAG_USED_BYTES (1 << 0)
#define DNODE_FLAG_USERUSED_ACCOUNTED (1 << 1)

/* Does dnode have a SA spill blkptr in bonus? */
#define DNODE_FLAG_SPILL_BLKPTR (1<<2)
#define DNODE_FLAG_SPILL_BLKPTR (1 << 2)

/* User/Group dnode accounting */
#define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3)

typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
Expand Down
4 changes: 4 additions & 0 deletions include/sys/dsl_deleg.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,12 @@ extern "C" {
#define ZFS_DELEG_PERM_VSCAN "vscan"
#define ZFS_DELEG_PERM_USERQUOTA "userquota"
#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota"
#define ZFS_DELEG_PERM_USEROBJQUOTA "userobjquota"
#define ZFS_DELEG_PERM_GROUPOBJQUOTA "groupobjquota"
#define ZFS_DELEG_PERM_USERUSED "userused"
#define ZFS_DELEG_PERM_GROUPUSED "groupused"
#define ZFS_DELEG_PERM_USEROBJUSED "userobjused"
#define ZFS_DELEG_PERM_GROUPOBJUSED "groupobjused"
#define ZFS_DELEG_PERM_HOLD "hold"
#define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff"
Expand Down
4 changes: 4 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ typedef enum {
ZFS_PROP_USERQUOTA,
ZFS_PROP_GROUPUSED,
ZFS_PROP_GROUPQUOTA,
ZFS_PROP_USEROBJUSED,
ZFS_PROP_USEROBJQUOTA,
ZFS_PROP_GROUPOBJUSED,
ZFS_PROP_GROUPOBJQUOTA,
ZFS_NUM_USERQUOTA_PROPS
} zfs_userquota_prop_t;

Expand Down
2 changes: 2 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ struct spa {
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
refcount_t spa_refcount; /* number of opens */

taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */
};

extern char *spa_config_path;
Expand Down
4 changes: 4 additions & 0 deletions include/sys/zfs_vfsops.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ typedef struct zfs_sb {
kmutex_t z_lock;
uint64_t z_userquota_obj;
uint64_t z_groupquota_obj;
uint64_t z_userobjquota_obj;
uint64_t z_groupobjquota_obj;
uint64_t z_replay_eof; /* New end of file - replay only */
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
uint64_t z_hold_size; /* znode hold array size */
Expand Down Expand Up @@ -190,6 +192,8 @@ extern boolean_t zfs_owner_overquota(zfs_sb_t *zsb, struct znode *,
boolean_t isgroup);
extern boolean_t zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup,
uint64_t fuid);
extern boolean_t zfs_fuid_overobjquota(zfs_sb_t *zsb, boolean_t isgroup,
uint64_t fuid);
extern int zfs_set_version(zfs_sb_t *zsb, uint64_t newvers);
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop,
uint64_t *value);
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ typedef enum spa_feature {
SPA_FEATURE_SHA512,
SPA_FEATURE_SKEIN,
SPA_FEATURE_EDONR,
SPA_FEATURE_USEROBJ_ACCOUNTING,
SPA_FEATURES
} spa_feature_t;

Expand Down
4 changes: 4 additions & 0 deletions include/zfs_deleg.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ typedef enum {
ZFS_DELEG_NOTE_GROUPQUOTA,
ZFS_DELEG_NOTE_USERUSED,
ZFS_DELEG_NOTE_GROUPUSED,
ZFS_DELEG_NOTE_USEROBJQUOTA,
ZFS_DELEG_NOTE_GROUPOBJQUOTA,
ZFS_DELEG_NOTE_USEROBJUSED,
ZFS_DELEG_NOTE_GROUPOBJUSED,
ZFS_DELEG_NOTE_HOLD,
ZFS_DELEG_NOTE_RELEASE,
ZFS_DELEG_NOTE_DIFF,
Expand Down
Loading

0 comments on commit 1de321e

Please sign in to comment.