Skip to content

Commit

Permalink
3622: behlendorf/issue-3607 - Align thread priority with Linux defaults
Browse files Browse the repository at this point in the history
Under Linux filesystem threads responsible for handling I/O are
normally created with the maximum priority.  Non-I/O filesystem
processes run with the default priority.  ZFS should adopt the
same priority scheme under Linux to maintain good performance
and so that it will complete fairly when other Linux filesystems
are active.  The priorities have been updated to the following:

$ ps -eLo rtprio,cls,pid,pri,nice,cmd | egrep 'z_|spl_|zvol|arc|dbu|meta'
     -  TS 10743  19 -20 [spl_kmem_cache]
     -  TS 10744  19 -20 [spl_system_task]
     -  TS 10745  19 -20 [spl_dynamic_tas]
     -  TS 10764  19   0 [dbu_evict]
     -  TS 10765  19   0 [arc_prune]
     -  TS 10766  19   0 [arc_reclaim]
     -  TS 10767  19   0 [arc_user_evicts]
     -  TS 10768  19   0 [l2arc_feed]
     -  TS 10769  39   0 [z_unmount]
     -  TS 10770  39 -20 [zvol]
     -  TS 11011  39 -20 [z_null_iss]
     -  TS 11012  39 -20 [z_null_int]
     -  TS 11013  39 -20 [z_rd_iss]
     -  TS 11014  39 -20 [z_rd_int_0]
     -  TS 11022  38 -19 [z_wr_iss]
     -  TS 11023  39 -20 [z_wr_iss_h]
     -  TS 11024  39 -20 [z_wr_int_0]
     -  TS 11032  39 -20 [z_wr_int_h]
     -  TS 11033  39 -20 [z_fr_iss_0]
     -  TS 11041  39 -20 [z_fr_int]
     -  TS 11042  39 -20 [z_cl_iss]
     -  TS 11043  39 -20 [z_cl_int]
     -  TS 11044  39 -20 [z_ioctl_iss]
     -  TS 11045  39 -20 [z_ioctl_int]
     -  TS 11046  39 -20 [metaslab_group_]
     -  TS 11050  19   0 [z_iput]
     -  TS 11121  38 -19 [z_wr_iss]

Note that under Linux the meaning of a processes priority is inverted
with respect to illumos.  High values on Linux indicate a _low_ priority
while high value on illumos indicate a _high_ priority.

In order to preserve the logical meaning of the minclsyspri and
maxclsyspri macros when they are used by the illumos wrapper functions
their values have been inverted.  This way when changes are merged
from upstream illumos we won't need to remember to invert the macro.
It could also lead to confusion.

Signed-off-by: Brian Behlendorf <[email protected]>
  • Loading branch information
behlendorf authored and FransUrbo committed Jul 24, 2015
1 parent 4908f3e commit 8248e1b
Show file tree
Hide file tree
Showing 11 changed files with 18 additions and 17 deletions.
5 changes: 3 additions & 2 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -615,8 +615,9 @@ extern void delay(clock_t ticks);
#define max_ncpus 64
#define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN))

#define minclsyspri 60
#define maxclsyspri 99
#define minclsyspri 99
#define maxclsyspri 60
#define defclsyspri 70

#define CPU_SEQID (pthread_self() & (max_ncpus - 1))

Expand Down
2 changes: 1 addition & 1 deletion lib/libzpool/taskq.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id)
void
system_taskq_init(void)
{
system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512,
TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
}

Expand Down
8 changes: 4 additions & 4 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5431,7 +5431,7 @@ arc_init(void)
mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t));

arc_prune_taskq = taskq_create("arc_prune", max_ncpus, minclsyspri,
arc_prune_taskq = taskq_create("arc_prune", max_ncpus, defclsyspri,
max_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);

arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
Expand All @@ -5444,10 +5444,10 @@ arc_init(void)
}

(void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0,
TS_RUN, minclsyspri);
TS_RUN, defclsyspri);

(void) thread_create(NULL, 0, arc_user_evicts_thread, NULL, 0, &p0,
TS_RUN, minclsyspri);
TS_RUN, defclsyspri);

arc_dead = FALSE;
arc_warm = B_FALSE;
Expand Down Expand Up @@ -6954,7 +6954,7 @@ l2arc_start(void)
return;

(void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0,
TS_RUN, minclsyspri);
TS_RUN, defclsyspri);
}

void
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ dbuf_init(void)
* All entries are queued via taskq_dispatch_ent(), so min/maxalloc
* configuration is not required.
*/
dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
}

void
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
ntasks = dmu_find_threads;
if (ntasks == 0)
ntasks = vdev_count_leaves(dp->dp_spa) * 4;
tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks,
tq = taskq_create("dmu_objset_find", ntasks, maxclsyspri, ntasks,
INT_MAX, 0);
if (tq == NULL) {
kmem_free(dcp, sizeof (*dcp));
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dsl_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);

dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, minclsyspri,
dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);

return (dp);
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
mg->mg_activation_count = 0;

mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
minclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);

return (mg);
}
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
* than the other taskqs.
*/
if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
pri--;
pri++;

tq = taskq_create_proc(name, value, pri, 50,
INT_MAX, spa->spa_proc, flags);
Expand Down
6 changes: 3 additions & 3 deletions module/zfs/txg.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,15 +205,15 @@ txg_sync_start(dsl_pool_t *dp)
tx->tx_threads = 2;

tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread,
dp, 0, &p0, TS_RUN, minclsyspri);
dp, 0, &p0, TS_RUN, defclsyspri);

/*
* The sync thread can need a larger-than-default stack size on
* 32-bit x86. This is due in part to nested pools and
* scrub_visitbp() recursion.
*/
tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread,
dp, 0, &p0, TS_RUN, minclsyspri);
dp, 0, &p0, TS_RUN, defclsyspri);

mutex_exit(&tx->tx_sync_lock);
}
Expand Down Expand Up @@ -445,7 +445,7 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
* Commit callback taskq hasn't been created yet.
*/
tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
max_ncpus, minclsyspri, max_ncpus, max_ncpus * 2,
max_ncpus, defclsyspri, max_ncpus, max_ncpus * 2,
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
}

Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zfs_ctldir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
void
zfsctl_init(void)
{
zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri,
zfs_expire_taskq = taskq_create("z_unmount", 1, defclsyspri,
1, 8, TASKQ_PREPOPULATE);
}

Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -1888,7 +1888,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
ASSERT(list_is_empty(&zilog->zl_lwb_list));

zilog->zl_get_data = get_data;
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, defclsyspri,
2, 2, TASKQ_PREPOPULATE);

return (zilog);
Expand Down

0 comments on commit 8248e1b

Please sign in to comment.