From 6e393089b6b284b36dca7fec8a72ac89646e3d9b Mon Sep 17 00:00:00 2001 From: Isaac Huang Date: Tue, 18 Apr 2017 16:49:11 -0600 Subject: [PATCH] CORRAIDZ-296 ZFS: use adjusted size for dRAID metaslab weight dRAID metaslabs start at a certain alignment, which causes their sizes to vary by a few sectors. The block allocator may get confused and pick a distant metaslab because the closer ones are slightly smaller. The small variance doesn't matter when the metaslab has already been allocated from. This fix uses adjusted size to calculate dRAID metaslab weight. Change-Id: I4546015d21aafef847e14b275a011c2689b67c6a Signed-off-by: Isaac Huang Reviewed-by: Don Brady --- module/zfs/metaslab.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 02c265c62e7b..024ae5f4a069 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -201,7 +201,6 @@ boolean_t metaslab_trace_enabled = B_TRUE; uint64_t metaslab_trace_max_entries = 5000; #endif -static uint64_t metaslab_weight(metaslab_t *); static void metaslab_set_fragmentation(metaslab_t *); #ifdef _METASLAB_TRACING @@ -1653,6 +1652,30 @@ metaslab_set_fragmentation(metaslab_t *msp) msp->ms_fragmentation = fragmentation; } +/* + * dRAID metaslabs start at a certain alignment, which causes their sizes to + * vary by a few sectors. The block allocator may get confused and pick a + * distant metaslab because the closer ones are slightly smaller. The small + * variance doesn't matter when the metaslab has already been allocated from. + * + * This function returns adjusted size to calculate metaslab weight, and + * should not be used for other purposes. + */ +static uint64_t +metaslab_weight_size(metaslab_t *msp) +{ + vdev_t *vd = msp->ms_group->mg_vd; + uint64_t size; + + if (vd->vdev_ops != &vdev_draid_ops || + space_map_allocated(msp->ms_sm) != 0) + return (msp->ms_size); + + size = 1ULL << vd->vdev_ms_shift; + ASSERT3U(size, >=, msp->ms_size); + return (size); +} + /* * Compute a weight -- a selection preference value -- for the given metaslab. * This is based on the amount of free space, the level of fragmentation, @@ -1671,7 +1694,7 @@ metaslab_space_weight(metaslab_t *msp) /* * The baseline weight is the metaslab's free space. */ - space = msp->ms_size - space_map_allocated(msp->ms_sm); + space = metaslab_weight_size(msp) - space_map_allocated(msp->ms_sm); if (metaslab_fragmentation_factor_enabled && msp->ms_fragmentation != ZFS_FRAG_INVALID) { @@ -1809,7 +1832,7 @@ metaslab_segment_weight(metaslab_t *msp) * The metaslab is completely free. */ if (space_map_allocated(msp->ms_sm) == 0) { - int idx = highbit64(msp->ms_size) - 1; + int idx = highbit64(metaslab_weight_size(msp)) - 1; int max_idx = SPACE_MAP_HISTOGRAM_SIZE + shift - 1; if (idx < max_idx) { @@ -1885,6 +1908,7 @@ metaslab_should_allocate(metaslab_t *msp, uint64_t asize) } return (should_allocate); } + static uint64_t metaslab_weight(metaslab_t *msp) {