From 86705e51933740889bb4325a45cf440ae9e5235b Mon Sep 17 00:00:00 2001 From: Haakan T Johansson Date: Sat, 1 Oct 2016 11:48:09 +0200 Subject: [PATCH] More rotors, such that more categories of allocation targets can be differentiated. Assign rotor vector categories based on pool property config instead of module parameters. --- include/sys/metaslab_impl.h | 2 +- module/zfs/metaslab.c | 104 ++++++++++++++++++++++-------------- 2 files changed, 66 insertions(+), 40 deletions(-) diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index f1d0ac81bded..4762da8e0b59 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -67,7 +67,7 @@ extern "C" { * big and less expensive. Depending on the size of an allocation, * a vdev will be chosen. */ -#define METASLAB_CLASS_ROTORS 2 +#define METASLAB_CLASS_ROTORS 5 struct metaslab_class { kmutex_t mc_lock; diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index aa89f7bbe426..6bd6e9a41c09 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -102,12 +102,6 @@ int zfs_mg_noalloc_threshold = 0; */ int zfs_mg_fragmentation_threshold = 85; -/* - * Allocate from faster vdev in pool if below threshold, allocate - * from slower vdev in pool if above threshold. - */ -int zfs_metaslab_mixed_slowsize_threshold = 0; - /* * Allow metaslabs to keep their active state as long as their fragmentation * percentage is less than or equal to zfs_metaslab_fragmentation_threshold. An @@ -580,6 +574,22 @@ metaslab_group_alloc_update(metaslab_group_t *mg) mutex_exit(&mg->mg_lock); } +/* + * Five categories, from faster to slower: + * + * 0: nonrot (SSD) disk or mirror + * 1: nonrot (SSD) raidz + * 2: mixed nonrot+rot anything (raidz makes little sense) + * 3: rot (HDD) disk or mirror + * 4: rot (HDD) raidz + */ + +#define METASLAB_ROTOR_VDEV_TYPE_SSD 0x01 +#define METASLAB_ROTOR_VDEV_TYPE_SSD_RAIDZ 0x02 +#define METASLAB_ROTOR_VDEV_TYPE_MIXED 0x04 +#define METASLAB_ROTOR_VDEV_TYPE_HDD 0x08 +#define METASLAB_ROTOR_VDEV_TYPE_HDD_RAIDZ 0x10 + /* * Please do not judge the rotor vector approach based on the ugliness * of this parsing routine. :-) @@ -657,15 +667,20 @@ metaslab_parse_rotor_config(metaslab_class_t *mc, char *rotorvector) len = comma-rotorvector; if (strncmp(rotorvector, "ssd", len) == 0) - mc->mc_rotvec_categories[nrot] |= 0x01; + mc->mc_rotvec_categories[nrot] |= + METASLAB_ROTOR_VDEV_TYPE_SSD; else if (strncmp(rotorvector, "ssd-raidz", len) == 0) - mc->mc_rotvec_categories[nrot] |= 0x02; + mc->mc_rotvec_categories[nrot] |= + METASLAB_ROTOR_VDEV_TYPE_SSD_RAIDZ; else if (strncmp(rotorvector, "mixed", len) == 0) - mc->mc_rotvec_categories[nrot] |= 0x04; + mc->mc_rotvec_categories[nrot] |= + METASLAB_ROTOR_VDEV_TYPE_MIXED; else if (strncmp(rotorvector, "hdd", len) == 0) - mc->mc_rotvec_categories[nrot] |= 0x08; + mc->mc_rotvec_categories[nrot] |= + METASLAB_ROTOR_VDEV_TYPE_HDD; else if (strncmp(rotorvector, "hdd-raidz", len) == 0) - mc->mc_rotvec_categories[nrot] |= 0x10; + mc->mc_rotvec_categories[nrot] |= + METASLAB_ROTOR_VDEV_TYPE_HDD_RAIDZ; else { /* It must be a vdev guid. */ uint64_t guid; @@ -744,25 +759,43 @@ metaslab_parse_rotor_config(metaslab_class_t *mc, char *rotorvector) } int -metaslab_vdev_rotor_category(vdev_t *vd) +metaslab_vdev_rotor_category(metaslab_class_t *mc, vdev_t *vd) { - /* - * Five categories, from faster to slower: - * - * 0: nonrot (SSD) disk or mirror - * 1: nonrot (SSD) raidz - * 2: mixed nonrot+rot anything (raidz makes little sense) - * 3: rot (HDD) disk or mirror - * 4: rot (HDD) raidz - */ + int i, j; + int type; + + /* First match on the vdev guid assignments. */ + for (i = 0; i < METASLAB_CLASS_ROTORS; i++) + for (j = 0; j < 5 && mc->mc_rotvec_vdev_guids[i][j]; j++) + if (mc->mc_rotvec_vdev_guids[i][j] == vd->vdev_guid) + return (i); + + /* Match on the kind of vdev. */ + + /* Figure out what kind we are. */ if (vd->vdev_nonrot) { - return ((vd->vdev_ops != &vdev_raidz_ops) ? 0 : 1); + type = ((vd->vdev_ops != &vdev_raidz_ops) ? + METASLAB_ROTOR_VDEV_TYPE_SSD : + METASLAB_ROTOR_VDEV_TYPE_SSD_RAIDZ); } else if (vd->vdev_nonrot_mix) { - return (2); + type = METASLAB_ROTOR_VDEV_TYPE_MIXED; } else { - return ((vd->vdev_ops != &vdev_raidz_ops) ? 3 : 4); + type = ((vd->vdev_ops != &vdev_raidz_ops) ? + METASLAB_ROTOR_VDEV_TYPE_HDD : + METASLAB_ROTOR_VDEV_TYPE_HDD_RAIDZ); } + + for (i = 0; i < METASLAB_CLASS_ROTORS; i++) + if (mc->mc_rotvec_categories[i] & type) + return (i); + + /* Assign to last category, i.e. with a (dummy) zero limit. */ + for (i = 0; i < METASLAB_CLASS_ROTORS; i++) + if (mc->mc_rotvec_threshold[i] == 0) + return (i); + + return (METASLAB_CLASS_ROTORS-1); } metaslab_group_t * @@ -810,6 +843,8 @@ metaslab_group_destroy(metaslab_group_t *mg) void metaslab_group_set_rotor_category(metaslab_group_t *mg, boolean_t failed_dev) { + metaslab_class_t *mc = mg->mg_class; + /* Already done? */ if (mg->mg_nrot != -1) return; @@ -824,11 +859,8 @@ metaslab_group_set_rotor_category(metaslab_group_t *mg, boolean_t failed_dev) */ if (failed_dev) mg->mg_nrot = METASLAB_CLASS_ROTORS-1; - else { - mg->mg_nrot = 0; - if (!mg->mg_vd->vdev_nonrot) - mg->mg_nrot = 1; - } + else + mg->mg_nrot = metaslab_vdev_rotor_category(mc, mg->mg_vd); } void @@ -2675,13 +2707,10 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, */ nrot = 0; - if (zfs_metaslab_mixed_slowsize_threshold) { - if (psize >= zfs_metaslab_mixed_slowsize_threshold) { - nrot = 1; - - if (nrot > mc->mc_max_nrot) - nrot = mc->mc_max_nrot; - } + while (nrot < mc->mc_max_nrot) { + if (psize < mc->mc_rotvec_threshold[nrot]) + break; /* Size below threshold, accept. */ + nrot++; } for (; nrot < METASLAB_CLASS_ROTORS; nrot++) @@ -3312,7 +3341,6 @@ module_param(metaslab_debug_unload, int, 0644); module_param(metaslab_preload_enabled, int, 0644); module_param(zfs_mg_noalloc_threshold, int, 0644); module_param(zfs_mg_fragmentation_threshold, int, 0644); -module_param(zfs_metaslab_mixed_slowsize_threshold, int, 0644); module_param(zfs_metaslab_fragmentation_threshold, int, 0644); module_param(metaslab_fragmentation_factor_enabled, int, 0644); module_param(metaslab_lba_weighting_enabled, int, 0644); @@ -3331,8 +3359,6 @@ MODULE_PARM_DESC(zfs_mg_noalloc_threshold, "percentage of free space for metaslab group to allow allocation"); MODULE_PARM_DESC(zfs_mg_fragmentation_threshold, "fragmentation for metaslab group to allow allocation"); -MODULE_PARM_DESC(zfs_metaslab_mixed_slowsize_threshold, - "size threshold to choose slower (rotating) storage in mixed pool"); MODULE_PARM_DESC(zfs_metaslab_fragmentation_threshold, "fragmentation for metaslab to allow allocation");