From c4dc1922fb2b8d92a6c3a76a3a9ad53815a95499 Mon Sep 17 00:00:00 2001 From: Haakan T Johansson Date: Wed, 28 Sep 2016 02:15:26 +0200 Subject: [PATCH] Capability to give different allocation thresholds for data and metadata. Example: zpool set "rotorvector=ssd<=meta:4;mixed<=64;123,hdd" pure ssd drive takes metadata <= 4 kB mixed (mirror) takes data (or metadata) <= 64 kB others (hdd) takes remainder Example II: zpool set "rotorvector=ssd<=meta:128,4;mixed<=64;123,hdd" pure ssd drive takes metadata <= 128 kB and data <= 4 kB mixed (mirror) takes data <= 64 kB [this metadata already taken by ssd] others (hdd) takes remainder --- include/sys/metaslab_impl.h | 12 ++++- module/zfs/metaslab.c | 90 +++++++++++++++++++++++++++---------- 2 files changed, 78 insertions(+), 24 deletions(-) diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 8644362c302d..efcc904e4ffe 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -69,6 +69,15 @@ extern "C" { */ #define METASLAB_CLASS_ROTORS 5 +/* + * Number of different categories of allocations. Currently data and + * metadata. + */ +#define METASLAB_ROTOR_ALLOC_CLASS_DATA 0 +#define METASLAB_ROTOR_ALLOC_CLASS_METADATA 1 + +#define METASLAB_ROTOR_ALLOC_CLASSES 2 + struct metaslab_class { spa_t *mc_spa; metaslab_group_t *mc_rotorv[METASLAB_CLASS_ROTORS]; @@ -87,7 +96,8 @@ struct metaslab_class { uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE]; /* Maximum allocation size in each rotor vector category. */ - uint64_t mc_rotvec_threshold[METASLAB_CLASS_ROTORS]; + uint64_t mc_rotvec_threshold[METASLAB_CLASS_ROTORS] + [METASLAB_ROTOR_ALLOC_CLASSES]; /* List of vdev guids to place in each rotor vector category. */ /* Should be a dynamic list. */ uint64_t mc_rotvec_vdev_guids[METASLAB_CLASS_ROTORS][5]; diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index df03271d6698..ef317d729c30 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -573,6 +573,9 @@ metaslab_group_alloc_update(metaslab_group_t *mg) /* * Please do not judge the rotor vector approach based on the ugliness * of this parsing routine. :-) + * + * The configuration should be split into one option for each type list, + * and one for each limit list. */ void metaslab_parse_rotor_config(metaslab_class_t *mc, char *rotorvector) @@ -688,34 +691,68 @@ metaslab_parse_rotor_config(metaslab_class_t *mc, char *rotorvector) } if (lessthan) { - uint64_t threshold; + char *limit = lessthan+2; + + mc->mc_rotvec_threshold[nrot][0] = 0; + mc->mc_rotvec_threshold[nrot][1] = 0; + + while (limit < semicolon) { + char *comma, *nextlimit; + size_t len; + uint64_t threshold; + int alloc_class = 0; #ifdef _KERNEL - char tmpstr[64]; - size_t len = semicolon-(lessthan+2); - strncpy(tmpstr, lessthan+2, len); - tmpstr[len] = 0; + char tmpstr[64]; #endif + + nextlimit = semicolon; + comma = strchr(limit, ','); + if (comma == NULL || comma > semicolon) + comma = semicolon; + else + nextlimit = comma+1; + + len = comma-limit; + + if (len > 2 && + strncmp(limit, "meta:", 5) == 0) { + alloc_class = 1; + limit += 5; + len -= 5; + } #ifdef _KERNEL - if (kstrtoull(tmpstr, 0, &threshold) != 0) - return; /* malformed configuration */ + strncpy(tmpstr, limit, len); + tmpstr[len] = 0; +#endif +#ifdef _KERNEL + if (kstrtoull(tmpstr, 0, &threshold) != 0) + return; /* malformed configuration */ #else - char *endptr; - threshold = strtoull(lessthan+2, &endptr, 0); - if (endptr != semicolon) - return; /* malformed configuration */ + char *endptr; + threshold = strtoull(limit, &endptr, 0); + if (endptr != comma) + return; /* malformed configuration */ #endif - /* - * To live with the 32 character limit for the - * comment field, we multiply the threshold by - * 1024 internally. - */ - mc->mc_rotvec_threshold[nrot] = threshold * 1024; + /* + * To live with the 32 character limit for the + * comment field, we multiply the threshold by + * 1024 internally. + */ + mc->mc_rotvec_threshold[nrot][alloc_class] = + threshold * 1024; + limit = nextlimit; + } + /* Metadata allowed as mauch as data, at least. */ + if (mc->mc_rotvec_threshold[nrot][0] > + mc->mc_rotvec_threshold[nrot][1]) + mc->mc_rotvec_threshold[nrot][1] = + mc->mc_rotvec_threshold[nrot][0]; } rotorvector = nextrotor; nrot++; } -#if 0 +#if 1 #ifdef _KERNEL { int i; @@ -723,9 +760,11 @@ metaslab_parse_rotor_config(metaslab_class_t *mc, char *rotorvector) for (i = 0; i < METASLAB_CLASS_ROTORS; i++) { int j; - printk("rotvec[%d]: limit:%llu typemask:%02x guids:", + printk("rotvec[%d]: limit: data:%5llu meta:%5llu " + "typemask:%02x guids:", i, - mc->mc_rotvec_threshold[i], + mc->mc_rotvec_threshold[i][0], + mc->mc_rotvec_threshold[i][1], mc->mc_rotvec_categories[i]); for (j = 0; j < 5 && mc->mc_rotvec_vdev_guids[i][j]; j++) { @@ -2519,7 +2558,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, */ static int metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, - dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags) + dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags, int alloc_class) { metaslab_group_t *mg, *fast_mg, *rotor; vdev_t *vd; @@ -2547,7 +2586,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, nrot = 0; while (nrot < mc->mc_max_nrot) { - if (psize < mc->mc_rotvec_threshold[nrot]) + if (psize < mc->mc_rotvec_threshold[nrot][alloc_class]) break; /* Size below threshold, accept. */ nrot++; } @@ -2922,6 +2961,7 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, dva_t *hintdva = hintbp->blk_dva; int d, error = 0; int i; + int alloc_class; ASSERT(bp->blk_birth == 0); ASSERT(BP_PHYSICAL_BIRTH(bp) == 0); @@ -2941,9 +2981,13 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, ASSERT(BP_GET_NDVAS(bp) == 0); ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp)); + alloc_class = METASLAB_ROTOR_ALLOC_CLASS_DATA; + if (DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) + alloc_class = METASLAB_ROTOR_ALLOC_CLASS_METADATA; + for (d = 0; d < ndvas; d++) { error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva, - txg, flags); + txg, flags, alloc_class); if (error != 0) { for (d--; d >= 0; d--) { metaslab_free_dva(spa, &dva[d], txg, B_TRUE);