Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Illumos 4976-4984 - metaslab improvements #2595

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 60 additions & 14 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,11 @@ static void
usage(void)
{
(void) fprintf(stderr,
"Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
"[-U config] [-M inflight I/Os] poolname [object...]\n"
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
"[-U config] [-I inflight I/Os] poolname [object...]\n"
" %s [-divPA] [-e -p path...] [-U config] dataset "
"[object...]\n"
" %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
" %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
"poolname [vdev [metaslab...]]\n"
" %s -R [-A] [-e [-p path...]] poolname "
"vdev:offset:size[:flags]\n"
Expand All @@ -137,6 +137,7 @@ usage(void)
(void) fprintf(stderr, " -h pool history\n");
(void) fprintf(stderr, " -b block statistics\n");
(void) fprintf(stderr, " -m metaslabs\n");
(void) fprintf(stderr, " -M metaslab groups\n");
(void) fprintf(stderr, " -c checksum all metadata (twice for "
"all data) blocks\n");
(void) fprintf(stderr, " -s report stats on zdb's I/O\n");
Expand Down Expand Up @@ -165,7 +166,7 @@ usage(void)
(void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, " -M <number of inflight I/Os> -- "
(void) fprintf(stderr, " -I <number of inflight I/Os> -- "
"specify the maximum number of checksumming I/Os "
"[default is 200]\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
Expand Down Expand Up @@ -547,7 +548,7 @@ get_metaslab_refcount(vdev_t *vd)
int refcount = 0;
int c, m;

if (vd->vdev_top == vd) {
if (vd->vdev_top == vd && !vd->vdev_removing) {
for (m = 0; m < vd->vdev_ms_count; m++) {
space_map_t *sm = vd->vdev_ms[m]->ms_sm;

Expand Down Expand Up @@ -685,9 +686,10 @@ dump_metaslab(metaslab_t *msp)
* The space map histogram represents free space in chunks
* of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
*/
(void) printf("\tOn-disk histogram:\n");
(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
(u_longlong_t)msp->ms_fragmentation);
dump_histogram(sm->sm_phys->smp_histogram,
SPACE_MAP_HISTOGRAM_SIZE(sm), sm->sm_shift);
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
}

if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
Expand All @@ -711,6 +713,48 @@ print_vdev_metaslab_header(vdev_t *vd)
"---------------", "-------------");
}

static void
dump_metaslab_groups(spa_t *spa)
{
vdev_t *rvd = spa->spa_root_vdev;
metaslab_class_t *mc = spa_normal_class(spa);
uint64_t fragmentation;
int c;

metaslab_class_histogram_verify(mc);

for (c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
metaslab_group_t *mg = tvd->vdev_mg;

if (mg->mg_class != mc)
continue;

metaslab_group_histogram_verify(mg);
mg->mg_fragmentation = metaslab_group_fragmentation(mg);

(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
"fragmentation",
(u_longlong_t)tvd->vdev_id,
(u_longlong_t)tvd->vdev_ms_count);
if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
(void) printf("%3s\n", "-");
} else {
(void) printf("%3llu%%\n",
(u_longlong_t)mg->mg_fragmentation);
}
dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
}

(void) printf("\tpool %s\tfragmentation", spa_name(spa));
fragmentation = metaslab_class_fragmentation(mc);
if (fragmentation == ZFS_FRAG_INVALID)
(void) printf("\t%3s\n", "-");
else
(void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
}

static void
dump_metaslabs(spa_t *spa)
{
Expand Down Expand Up @@ -2381,8 +2425,7 @@ zdb_leak(void *arg, uint64_t start, uint64_t size)
}

static metaslab_ops_t zdb_metaslab_ops = {
NULL, /* alloc */
NULL /* fragmented */
NULL /* alloc */
};

static void
Expand Down Expand Up @@ -2874,6 +2917,8 @@ dump_zpool(spa_t *spa)

if (dump_opt['d'] > 2 || dump_opt['m'])
dump_metaslabs(spa);
if (dump_opt['M'])
dump_metaslab_groups(spa);

if (dump_opt['d'] || dump_opt['i']) {
dump_dir(dp->dp_meta_objset);
Expand Down Expand Up @@ -3363,7 +3408,7 @@ main(int argc, char **argv)
int flags = ZFS_IMPORT_MISSING_LOG;
int rewind = ZPOOL_NEVER_REWIND;
char *spa_config_path_env;
const char *opts = "bcdhilmM:suCDRSAFLVXevp:t:U:P";
const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:P";

(void) setrlimit(RLIMIT_NOFILE, &rl);
(void) enable_extended_FILE_stdio(-1, -1);
Expand Down Expand Up @@ -3392,6 +3437,7 @@ main(int argc, char **argv)
case 'u':
case 'C':
case 'D':
case 'M':
case 'R':
case 'S':
dump_opt[c]++;
Expand All @@ -3408,10 +3454,7 @@ main(int argc, char **argv)
case 'V':
flags = ZFS_IMPORT_VERBATIM;
break;
case 'v':
verbose++;
break;
case 'M':
case 'I':
max_inflight = strtoull(optarg, NULL, 0);
if (max_inflight == 0) {
(void) fprintf(stderr, "maximum number "
Expand Down Expand Up @@ -3446,6 +3489,9 @@ main(int argc, char **argv)
case 'U':
spa_config_path = optarg;
break;
case 'v':
verbose++;
break;
default:
usage();
break;
Expand Down
18 changes: 13 additions & 5 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2998,10 +2998,16 @@ print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted)
boolean_t fixed;
size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL);

zfs_nicenum(value, propval, sizeof (propval));

if (prop == ZPOOL_PROP_EXPANDSZ && value == 0)
(void) strlcpy(propval, "-", sizeof (propval));
else if (prop == ZPOOL_PROP_FRAGMENTATION && value == ZFS_FRAG_INVALID)
(void) strlcpy(propval, "-", sizeof (propval));
else if (prop == ZPOOL_PROP_FRAGMENTATION)
(void) snprintf(propval, sizeof (propval), "%llu%%",
(unsigned long long)value);
else
zfs_nicenum(value, propval, sizeof (propval));

if (scripted)
(void) printf("\t%s", propval);
Expand Down Expand Up @@ -3034,16 +3040,18 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
/* only toplevel vdevs have capacity stats */
if (vs->vs_space == 0) {
if (scripted)
(void) printf("\t-\t-\t-");
(void) printf("\t-\t-\t-\t-");
else
(void) printf(" - - -");
(void) printf(" - - - -");
} else {
print_one_column(ZPOOL_PROP_SIZE, vs->vs_space,
scripted);
print_one_column(ZPOOL_PROP_CAPACITY, vs->vs_alloc,
scripted);
print_one_column(ZPOOL_PROP_FREE,
vs->vs_space - vs->vs_alloc, scripted);
print_one_column(ZPOOL_PROP_FRAGMENTATION,
vs->vs_fragmentation, scripted);
}
print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize,
scripted);
Expand Down Expand Up @@ -3128,8 +3136,8 @@ zpool_do_list(int argc, char **argv)
int ret = 0;
list_cbdata_t cb = { 0 };
static char default_props[] =
"name,size,allocated,free,capacity,dedupratio,"
"health,altroot";
"name,size,allocated,free,fragmentation,capacity,"
"dedupratio,health,altroot";
char *props = default_props;
unsigned long interval = 0, count = 0;
zpool_list_t *list;
Expand Down
9 changes: 9 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ typedef enum {
ZPOOL_PROP_COMMENT,
ZPOOL_PROP_EXPANDSZ,
ZPOOL_PROP_FREEING,
ZPOOL_PROP_FRAGMENTATION,
ZPOOL_PROP_LEAKED,
ZPOOL_NUM_PROPS
} zpool_prop_t;
Expand Down Expand Up @@ -598,6 +599,13 @@ typedef struct zpool_rewind_policy {
*/
#define SPA_MINDEVSIZE (64ULL << 20)

/*
* Set if the fragmentation has not yet been calculated. This can happen
* because the space maps have not been upgraded or the histogram feature
* is not enabled.
*/
#define ZFS_FRAG_INVALID UINT64_MAX

/*
* The location of the pool configuration repository, shared between kernel and
* userland.
Expand Down Expand Up @@ -746,6 +754,7 @@ typedef struct vdev_stat {
uint64_t vs_self_healed; /* self-healed bytes */
uint64_t vs_scan_removing; /* removing? */
uint64_t vs_scan_processed; /* scan processed bytes */
uint64_t vs_fragmentation; /* device fragmentation */
} vdev_stat_t;

/*
Expand Down
71 changes: 38 additions & 33 deletions include/sys/metaslab.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
*/

#ifndef _SYS_METASLAB_H
Expand All @@ -38,23 +38,22 @@ extern "C" {

typedef struct metaslab_ops {
uint64_t (*msop_alloc)(metaslab_t *msp, uint64_t size);
boolean_t (*msop_fragmented)(metaslab_t *msp);
} metaslab_ops_t;

extern metaslab_ops_t *zfs_metaslab_ops;

metaslab_t *metaslab_init(metaslab_group_t *mg, uint64_t id,
uint64_t object, uint64_t txg);
void metaslab_fini(metaslab_t *msp);
metaslab_t *metaslab_init(metaslab_group_t *, uint64_t,
uint64_t, uint64_t);
void metaslab_fini(metaslab_t *);

void metaslab_load_wait(metaslab_t *msp);
int metaslab_load(metaslab_t *msp);
void metaslab_unload(metaslab_t *msp);
void metaslab_load_wait(metaslab_t *);
int metaslab_load(metaslab_t *);
void metaslab_unload(metaslab_t *);

void metaslab_sync(metaslab_t *msp, uint64_t txg);
void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
void metaslab_sync_reassess(metaslab_group_t *mg);
uint64_t metaslab_block_maxsize(metaslab_t *msp);
void metaslab_sync(metaslab_t *, uint64_t);
void metaslab_sync_done(metaslab_t *, uint64_t);
void metaslab_sync_reassess(metaslab_group_t *);
uint64_t metaslab_block_maxsize(metaslab_t *);

#define METASLAB_HINTBP_FAVOR 0x0
#define METASLAB_HINTBP_AVOID 0x1
Expand All @@ -63,30 +62,36 @@ uint64_t metaslab_block_maxsize(metaslab_t *msp);
#define METASLAB_GANG_AVOID 0x8
#define METASLAB_FASTWRITE 0x10

int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now);
int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
void metaslab_check_free(spa_t *spa, const blkptr_t *bp);
void metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp);
void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp);
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
blkptr_t *, int, uint64_t, blkptr_t *, int);
void metaslab_free(spa_t *, const blkptr_t *, uint64_t, boolean_t);
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
void metaslab_check_free(spa_t *, const blkptr_t *);
void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);

metaslab_class_t *metaslab_class_create(spa_t *spa, metaslab_ops_t *ops);
void metaslab_class_destroy(metaslab_class_t *mc);
int metaslab_class_validate(metaslab_class_t *mc);
metaslab_class_t *metaslab_class_create(spa_t *, metaslab_ops_t *);
void metaslab_class_destroy(metaslab_class_t *);
int metaslab_class_validate(metaslab_class_t *);
void metaslab_class_histogram_verify(metaslab_class_t *);
uint64_t metaslab_class_fragmentation(metaslab_class_t *);
uint64_t metaslab_class_expandable_space(metaslab_class_t *);

void metaslab_class_space_update(metaslab_class_t *mc,
int64_t alloc_delta, int64_t defer_delta,
int64_t space_delta, int64_t dspace_delta);
uint64_t metaslab_class_get_alloc(metaslab_class_t *mc);
uint64_t metaslab_class_get_space(metaslab_class_t *mc);
uint64_t metaslab_class_get_dspace(metaslab_class_t *mc);
uint64_t metaslab_class_get_deferred(metaslab_class_t *mc);
void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t,
int64_t, int64_t);
uint64_t metaslab_class_get_alloc(metaslab_class_t *);
uint64_t metaslab_class_get_space(metaslab_class_t *);
uint64_t metaslab_class_get_dspace(metaslab_class_t *);
uint64_t metaslab_class_get_deferred(metaslab_class_t *);

metaslab_group_t *metaslab_group_create(metaslab_class_t *mc, vdev_t *vd);
void metaslab_group_destroy(metaslab_group_t *mg);
void metaslab_group_activate(metaslab_group_t *mg);
void metaslab_group_passivate(metaslab_group_t *mg);
metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *);
void metaslab_group_destroy(metaslab_group_t *);
void metaslab_group_activate(metaslab_group_t *);
void metaslab_group_passivate(metaslab_group_t *);
uint64_t metaslab_group_get_space(metaslab_group_t *);
void metaslab_group_histogram_verify(metaslab_group_t *);
uint64_t metaslab_group_fragmentation(metaslab_group_t *);
void metaslab_group_histogram_remove(metaslab_group_t *, metaslab_t *);

#ifdef __cplusplus
}
Expand Down
Loading