diff --git a/include/sys/taskq.h b/include/sys/taskq.h
index 7b44e8b8..1c91c9a3 100644
--- a/include/sys/taskq.h
+++ b/include/sys/taskq.h
@@ -61,11 +61,12 @@ typedef void (task_func_t)(void *);
 typedef struct taskq {
 	spinlock_t		tq_lock;       /* protects taskq_t */
 	unsigned long		tq_lock_flags; /* interrupt state */
-	const char		*tq_name;      /* taskq name */
+	char			*tq_name;      /* taskq name */
 	struct list_head	tq_thread_list;/* list of all threads */
 	struct list_head	tq_active_list;/* list of active threads */
 	int			tq_nactive;    /* # of active threads */
-	int			tq_nthreads;   /* # of total threads */
+	int			tq_nthreads;   /* # of existing threads */
+	int			tq_maxthreads; /* max # of threads */
 	int			tq_pri;        /* priority */
 	int			tq_minalloc;   /* min task_t pool size */
 	int			tq_maxalloc;   /* max task_t pool size */
@@ -119,7 +120,7 @@ extern void taskq_init_ent(taskq_ent_t *);
 extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
 extern void taskq_destroy(taskq_t *);
 extern void taskq_wait_id(taskq_t *, taskqid_t);
-extern void taskq_wait_all(taskq_t *, taskqid_t);
+extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
 extern void taskq_wait(taskq_t *);
 extern int taskq_cancel_id(taskq_t *, taskqid_t);
 extern int taskq_member(taskq_t *, void *);
diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c
index cd3e543b..86c26ff0 100644
--- a/module/spl/spl-kmem-cache.c
+++ b/module/spl/spl-kmem-cache.c
@@ -1725,7 +1725,9 @@ spl_kmem_cache_init(void)
 	init_rwsem(&spl_kmem_cache_sem);
 	INIT_LIST_HEAD(&spl_kmem_cache_list);
 	spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
-	    spl_kmem_cache_kmem_threads, maxclsyspri, 1, 32, TASKQ_PREPOPULATE);
+	    spl_kmem_cache_kmem_threads, maxclsyspri,
+	    spl_kmem_cache_kmem_threads * 8, INT_MAX,
+	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 	spl_register_shrinker(&spl_kmem_cache_shrinker);
 
 	return (0);
diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c
index 951298d9..4d425abd 100644
--- a/module/spl/spl-taskq.c
+++ b/module/spl/spl-taskq.c
@@ -31,10 +31,22 @@ int spl_taskq_thread_bind = 0;
 module_param(spl_taskq_thread_bind, int, 0644);
 MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
 
+
+int spl_taskq_thread_dynamic = 1;
+module_param(spl_taskq_thread_dynamic, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_dynamic, "Allow dynamic taskq threads");
+
+int spl_taskq_thread_sequential = 1;
+module_param(spl_taskq_thread_sequential, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_sequential,
+    "Create new taskq threads after N sequential tasks");
+
 /* Global system-wide dynamic task queue available for all consumers */
 taskq_t *system_taskq;
 EXPORT_SYMBOL(system_taskq);
 
+static taskq_thread_t *taskq_thread_create(taskq_t *);
+
 static int
 task_km_flags(uint_t flags)
 {
@@ -327,6 +339,33 @@ taskq_find(taskq_t *tq, taskqid_t id, int *active)
 	return (NULL);
 }
 
+/*
+ * Theory for the taskq_wait_id(), taskq_wait_outstanding(), and
+ * taskq_wait() functions below.
+ *
+ * Taskq waiting is accomplished by tracking the lowest outstanding task
+ * id and the next available task id.  As tasks are dispatched they are
+ * added to the tail of the pending, priority, or delay lists.  As worker
+ * threads become available the tasks are removed from the heads of these
+ * lists and linked to the worker threads.  This ensures the lists are
+ * kept sorted by lowest to highest task id.
+ *
+ * Therefore the lowest outstanding task id can be quickly determined by
+ * checking the head item from all of these lists.  This value is stored
+ * with the taskq as the lowest id.  It only needs to be recalculated when
+ * either the task with the current lowest id completes or is canceled.
+ *
+ * By blocking until the lowest task id exceeds the passed task id the
+ * taskq_wait_outstanding() function can be easily implemented.  Similarly,
+ * by blocking until the lowest task id matches the next task id taskq_wait()
+ * can be implemented.
+ *
+ * Callers should be aware that when there are multiple worked threads it
+ * is possible for larger task ids to complete before smaller ones.  Also
+ * when the taskq contains delay tasks with small task ids callers may
+ * block for a considerable length of time waiting for them to expire and
+ * execute.
+ */
 static int
 taskq_wait_id_check(taskq_t *tq, taskqid_t id)
 {
@@ -351,34 +390,8 @@ taskq_wait_id(taskq_t *tq, taskqid_t id)
 }
 EXPORT_SYMBOL(taskq_wait_id);
 
-/*
- * The taskq_wait() function will block until all previously submitted
- * tasks have been completed.  A previously submitted task is defined as
- * a task with a lower task id than the current task queue id.  Note that
- * all task id's are assigned monotonically at dispatch time.
- *
- * Waiting for all previous tasks to complete is accomplished by tracking
- * the lowest outstanding task id.  As tasks are dispatched they are added
- * added to the tail of the pending, priority, or delay lists.  And as
- * worker threads become available the tasks are removed from the heads
- * of these lists and linked to the worker threads.  This ensures the
- * lists are kept in lowest to highest task id order.
- *
- * Therefore the lowest outstanding task id can be quickly determined by
- * checking the head item from all of these lists.  This value is stored
- * with the task queue as the lowest id.  It only needs to be recalculated
- * when either the task with the current lowest id completes or is canceled.
- *
- * By blocking until the lowest task id exceeds the current task id when
- * the function was called we ensure all previous tasks have completed.
- *
- * NOTE: When there are multiple worked threads it is possible for larger
- * task ids to complete before smaller ones.  Conversely when the task
- * queue contains delay tasks with small task ids, you may block for a
- * considerable length of time waiting for them to expire and execute.
- */
 static int
-taskq_wait_check(taskq_t *tq, taskqid_t id)
+taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
 {
 	int rc;
 
@@ -389,26 +402,42 @@ taskq_wait_check(taskq_t *tq, taskqid_t id)
 	return (rc);
 }
 
+/*
+ * The taskq_wait_outstanding() function will block until all tasks with a
+ * lower taskqid than the passed 'id' have been completed.  Note that all
+ * task id's are assigned monotonically at dispatch time.  Zero may be
+ * passed for the id to indicate all tasks dispatch up to this point,
+ * but not after, should be waited for.
+ */
 void
-taskq_wait_all(taskq_t *tq, taskqid_t id)
+taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
 {
-	wait_event(tq->tq_wait_waitq, taskq_wait_check(tq, id));
+	wait_event(tq->tq_wait_waitq,
+	    taskq_wait_outstanding_check(tq, id ? id : tq->tq_next_id - 1));
 }
-EXPORT_SYMBOL(taskq_wait_all);
+EXPORT_SYMBOL(taskq_wait_outstanding);
 
-void
-taskq_wait(taskq_t *tq)
+static int
+taskq_wait_check(taskq_t *tq)
 {
-	taskqid_t id;
-
-	ASSERT(tq);
+	int rc;
 
-	/* Wait for the largest outstanding taskqid */
 	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
-	id = tq->tq_next_id - 1;
+	rc = (tq->tq_lowest_id == tq->tq_next_id);
 	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
 
-	taskq_wait_all(tq, id);
+	return (rc);
+}
+
+/*
+ * The taskq_wait() function will block until the taskq is empty.
+ * This means that if a taskq re-dispatches work to itself taskq_wait()
+ * callers will block indefinitely.
+ */
+void
+taskq_wait(taskq_t *tq)
+{
+	wait_event(tq->tq_wait_waitq, taskq_wait_check(tq));
 }
 EXPORT_SYMBOL(taskq_wait);
 
@@ -417,17 +446,22 @@ taskq_member(taskq_t *tq, void *t)
 {
 	struct list_head *l;
 	taskq_thread_t *tqt;
+	int found = 0;
 
 	ASSERT(tq);
 	ASSERT(t);
 
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
 	list_for_each(l, &tq->tq_thread_list) {
 		tqt = list_entry(l, taskq_thread_t, tqt_thread_list);
-		if (tqt->tqt_thread == (struct task_struct *)t)
-			return (1);
+		if (tqt->tqt_thread == (struct task_struct *)t) {
+			found = 1;
+			break;
+		}
 	}
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
 
-	return (0);
+	return (found);
 }
 EXPORT_SYMBOL(taskq_member);
 
@@ -587,7 +621,6 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
 {
 	ASSERT(tq);
 	ASSERT(func);
-	ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
 
 	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
 
@@ -656,6 +689,7 @@ taskq_thread(void *args)
 	taskq_t *tq;
 	taskq_ent_t *t;
 	struct list_head *pend_list;
+	int seq_tasks = 0;
 
 	ASSERT(tqt);
 	tq = tqt->tqt_tq;
@@ -666,7 +700,13 @@ taskq_thread(void *args)
 	flush_signals(current);
 
 	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	/* Immediately exit if more threads than allowed were created.*/
+	if (tq->tq_nthreads >= tq->tq_maxthreads)
+		goto error;
+
 	tq->tq_nthreads++;
+	list_add(&tqt->tqt_thread_list, &tq->tq_thread_list);
 	wake_up(&tq->tq_wait_waitq);
 	set_current_state(TASK_INTERRUPTIBLE);
 
@@ -674,16 +714,27 @@ taskq_thread(void *args)
 
 		if (list_empty(&tq->tq_pend_list) &&
 		    list_empty(&tq->tq_prio_list)) {
+
+			/*
+			 * Dynamic taskqs will destroy threads which would
+			 * otherwise be idle, they are recreated as needed.
+			 */
+			if ((tq->tq_flags & TASKQ_DYNAMIC) &&
+			    (tq->tq_nthreads > 1) && spl_taskq_thread_dynamic)
+				break;
+
 			add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
 			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+			seq_tasks = 0;
 			schedule();
+
 			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
 			remove_wait_queue(&tq->tq_work_waitq, &wait);
 		} else {
 			__set_current_state(TASK_RUNNING);
 		}
 
-
 		if (!list_empty(&tq->tq_prio_list))
 			pend_list = &tq->tq_prio_list;
 		else if (!list_empty(&tq->tq_pend_list))
@@ -715,14 +766,27 @@ taskq_thread(void *args)
 			/* Perform the requested task */
 			t->tqent_func(t->tqent_arg);
 
+			/*
+			 * Dynamic taskq's will create additional threads up
+			 * to tq_maxthreads when a single thread has handled
+			 * more than spl_taskq_thread_sequential tasks without
+			 * sleeping due to an empty queue.
+			 */
+			if ((++seq_tasks > spl_taskq_thread_sequential) &&
+			    (tq->tq_flags & TASKQ_DYNAMIC) &&
+			    (tq->tq_flags & TQ_ACTIVE) &&
+			    (tq->tq_nthreads < tq->tq_maxthreads)) {
+				taskq_thread_create(tq);
+				seq_tasks = 0;
+			}
+
 			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
 			tq->tq_nactive--;
 			list_del_init(&tqt->tqt_active_list);
 			tqt->tqt_task = NULL;
 
 			/* For prealloc'd tasks, we don't free anything. */
-			if ((tq->tq_flags & TASKQ_DYNAMIC) ||
-			    !(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
+			if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
 				task_done(tq, t);
 
 			/* When the current lowest outstanding taskqid is
@@ -744,27 +808,56 @@ taskq_thread(void *args)
 	__set_current_state(TASK_RUNNING);
 	tq->tq_nthreads--;
 	list_del_init(&tqt->tqt_thread_list);
+error:
 	kmem_free(tqt, sizeof(taskq_thread_t));
-
 	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
 
 	return (0);
 }
 
+static taskq_thread_t *
+taskq_thread_create(taskq_t *tq)
+{
+	static int last_used_cpu = 0;
+	taskq_thread_t *tqt;
+
+	tqt = kmem_alloc(sizeof(*tqt), KM_PUSHPAGE);
+	INIT_LIST_HEAD(&tqt->tqt_thread_list);
+	INIT_LIST_HEAD(&tqt->tqt_active_list);
+	tqt->tqt_tq = tq;
+	tqt->tqt_id = 0;
+
+	tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
+	    "%s", tq->tq_name);
+	if (tqt->tqt_thread == NULL) {
+		kmem_free(tqt, sizeof (taskq_thread_t));
+		return (NULL);
+	}
+
+	if (spl_taskq_thread_bind) {
+		last_used_cpu = (last_used_cpu + 1) % num_online_cpus();
+		kthread_bind(tqt->tqt_thread, last_used_cpu);
+	}
+
+	set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(tq->tq_pri));
+	wake_up_process(tqt->tqt_thread);
+
+	return (tqt);
+}
+
 taskq_t *
 taskq_create(const char *name, int nthreads, pri_t pri,
     int minalloc, int maxalloc, uint_t flags)
 {
-	static int last_used_cpu = 0;
 	taskq_t *tq;
 	taskq_thread_t *tqt;
-	int rc = 0, i, j = 0;
+	int count = 0, rc = 0, i;
 
 	ASSERT(name != NULL);
 	ASSERT(pri <= maxclsyspri);
 	ASSERT(minalloc >= 0);
 	ASSERT(maxalloc <= INT_MAX);
-	ASSERT(!(flags & (TASKQ_CPR_SAFE | TASKQ_DYNAMIC))); /* Unsupported */
+	ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
 
 	/* Scale the number of threads using nthreads as a percentage */
 	if (flags & TASKQ_THREADS_CPU_PCT) {
@@ -780,19 +873,19 @@ taskq_create(const char *name, int nthreads, pri_t pri,
 		return (NULL);
 
 	spin_lock_init(&tq->tq_lock);
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
 	INIT_LIST_HEAD(&tq->tq_thread_list);
 	INIT_LIST_HEAD(&tq->tq_active_list);
-	tq->tq_name      = name;
-	tq->tq_nactive   = 0;
-	tq->tq_nthreads  = 0;
-	tq->tq_pri       = pri;
-	tq->tq_minalloc  = minalloc;
-	tq->tq_maxalloc  = maxalloc;
-	tq->tq_nalloc    = 0;
-	tq->tq_flags     = (flags | TQ_ACTIVE);
-	tq->tq_next_id   = 1;
-	tq->tq_lowest_id = 1;
+	tq->tq_name       = strdup(name);
+	tq->tq_nactive    = 0;
+	tq->tq_nthreads   = 0;
+	tq->tq_maxthreads = nthreads;
+	tq->tq_pri        = pri;
+	tq->tq_minalloc   = minalloc;
+	tq->tq_maxalloc   = maxalloc;
+	tq->tq_nalloc     = 0;
+	tq->tq_flags      = (flags | TQ_ACTIVE);
+	tq->tq_next_id    = 1;
+	tq->tq_lowest_id  = 1;
 	INIT_LIST_HEAD(&tq->tq_free_list);
 	INIT_LIST_HEAD(&tq->tq_pend_list);
 	INIT_LIST_HEAD(&tq->tq_prio_list);
@@ -800,38 +893,28 @@ taskq_create(const char *name, int nthreads, pri_t pri,
 	init_waitqueue_head(&tq->tq_work_waitq);
 	init_waitqueue_head(&tq->tq_wait_waitq);
 
-	if (flags & TASKQ_PREPOPULATE)
+	if (flags & TASKQ_PREPOPULATE) {
+		spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
 		for (i = 0; i < minalloc; i++)
 			task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW));
 
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	}
+
+	if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
+		nthreads = 1;
 
 	for (i = 0; i < nthreads; i++) {
-		tqt = kmem_alloc(sizeof(*tqt), KM_PUSHPAGE);
-		INIT_LIST_HEAD(&tqt->tqt_thread_list);
-		INIT_LIST_HEAD(&tqt->tqt_active_list);
-		tqt->tqt_tq = tq;
-		tqt->tqt_id = 0;
-
-		tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
-		    "%s/%d", name, i);
-		if (tqt->tqt_thread) {
-			list_add(&tqt->tqt_thread_list, &tq->tq_thread_list);
-			if (spl_taskq_thread_bind) {
-				last_used_cpu = (last_used_cpu + 1) % num_online_cpus();
-				kthread_bind(tqt->tqt_thread, last_used_cpu);
-			}
-			set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(pri));
-			wake_up_process(tqt->tqt_thread);
-			j++;
-		} else {
-			kmem_free(tqt, sizeof(taskq_thread_t));
+		tqt = taskq_thread_create(tq);
+		if (tqt == NULL)
 			rc = 1;
-		}
+		else
+			count++;
 	}
 
 	/* Wait for all threads to be started before potential destroy */
-	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == j);
+	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == count);
 
 	if (rc) {
 		taskq_destroy(tq);
@@ -896,6 +979,7 @@ taskq_destroy(taskq_t *tq)
 
 	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
 
+	strfree(tq->tq_name);
 	kmem_free(tq, sizeof(taskq_t));
 }
 EXPORT_SYMBOL(taskq_destroy);
@@ -903,10 +987,10 @@ EXPORT_SYMBOL(taskq_destroy);
 int
 spl_taskq_init(void)
 {
-	/* Solaris creates a dynamic taskq of up to 64 threads, however in
-	 * a Linux environment 1 thread per-core is usually about right */
-	system_taskq = taskq_create("spl_system_taskq", num_online_cpus(),
-				    minclsyspri, 4, 512, TASKQ_PREPOPULATE);
+	int threads = MAX(num_online_cpus(), 64);
+
+	system_taskq = taskq_create("spl_system_taskq", threads,
+	    minclsyspri, threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 	if (system_taskq == NULL)
 		return (1);
 
diff --git a/module/splat/splat-taskq.c b/module/splat/splat-taskq.c
index 2787bf40..529d0705 100644
--- a/module/splat/splat-taskq.c
+++ b/module/splat/splat-taskq.c
@@ -28,6 +28,7 @@
 #include <sys/vmem.h>
 #include <sys/random.h>
 #include <sys/taskq.h>
+#include <sys/time.h>
 #include <sys/timer.h>
 #include <linux/delay.h>
 #include "splat-internal.h"
@@ -75,6 +76,10 @@
 #define SPLAT_TASKQ_TEST10_NAME		"cancel"
 #define SPLAT_TASKQ_TEST10_DESC		"Cancel task execution"
 
+#define SPLAT_TASKQ_TEST11_ID		0x020b
+#define SPLAT_TASKQ_TEST11_NAME		"dynamic"
+#define SPLAT_TASKQ_TEST11_DESC		"Dynamic task queue thread creation"
+
 #define SPLAT_TASKQ_ORDER_MAX		8
 #define SPLAT_TASKQ_DEPTH_MAX		16
 
@@ -588,10 +593,10 @@ splat_taskq_test4(struct file *file, void *arg)
  * next pending task as soon as it completes its current task.  This
  * means that tasks do not strictly complete in order in which they
  * were dispatched (increasing task id).  This is fine but we need to
- * verify that taskq_wait_all() blocks until the passed task id and all
- * lower task ids complete.  We do this by dispatching the following
+ * verify taskq_wait_outstanding() blocks until the passed task id and
+ * all lower task ids complete.  We do this by dispatching the following
  * specific sequence of tasks each of which block for N time units.
- * We then use taskq_wait_all() to unblock at specific task id and
+ * We then use taskq_wait_outstanding() to unblock at specific task id and
  * verify the only the expected task ids have completed and in the
  * correct order.  The two cases of interest are:
  *
@@ -602,17 +607,17 @@ splat_taskq_test4(struct file *file, void *arg)
  *
  * The following table shows each task id and how they will be
  * scheduled.  Each rows represent one time unit and each column
- * one of the three worker threads.  The places taskq_wait_all()
+ * one of the three worker threads.  The places taskq_wait_outstanding()
  * must unblock for a specific id are identified as well as the
  * task ids which must have completed and their order.
  *
- *       +-----+       <--- taskq_wait_all(tq, 8) unblocks
+ *       +-----+       <--- taskq_wait_outstanding(tq, 8) unblocks
  *       |     |            Required Completion Order: 1,2,4,5,3,8,6,7
  * +-----+     |
  * |     |     |
  * |     |     +-----+
  * |     |     |  8  |
- * |     |     +-----+ <--- taskq_wait_all(tq, 3) unblocks
+ * |     |     +-----+ <--- taskq_wait_outstanding(tq, 3) unblocks
  * |     |  7  |     |      Required Completion Order: 1,2,4,5,3
  * |     +-----+     |
  * |  6  |     |     |
@@ -755,13 +760,13 @@ splat_taskq_test5_impl(struct file *file, void *arg, boolean_t prealloc)
 
 	splat_vprint(file, SPLAT_TASKQ_TEST5_NAME, "Taskq '%s' "
 		     "waiting for taskqid %d completion\n", tq_arg.name, 3);
-	taskq_wait_all(tq, 3);
+	taskq_wait_outstanding(tq, 3);
 	if ((rc = splat_taskq_test_order(&tq_arg, order1)))
 		goto out;
 
 	splat_vprint(file, SPLAT_TASKQ_TEST5_NAME, "Taskq '%s' "
 		     "waiting for taskqid %d completion\n", tq_arg.name, 8);
-	taskq_wait_all(tq, 8);
+	taskq_wait_outstanding(tq, 8);
 	rc = splat_taskq_test_order(&tq_arg, order2);
 
 out:
@@ -923,7 +928,7 @@ splat_taskq_test6_impl(struct file *file, void *arg, boolean_t prealloc)
 	splat_vprint(file, SPLAT_TASKQ_TEST6_NAME, "Taskq '%s' "
 		     "waiting for taskqid %d completion\n", tq_arg.name,
 		     SPLAT_TASKQ_ORDER_MAX);
-	taskq_wait_all(tq, SPLAT_TASKQ_ORDER_MAX);
+	taskq_wait_outstanding(tq, SPLAT_TASKQ_ORDER_MAX);
 	rc = splat_taskq_test_order(&tq_arg, order);
 
 out:
@@ -1030,7 +1035,7 @@ splat_taskq_test7_impl(struct file *file, void *arg, boolean_t prealloc)
 	if (tq_arg->flag == 0) {
 		splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
 		             "Taskq '%s' waiting\n", tq_arg->name);
-		taskq_wait_all(tq, SPLAT_TASKQ_DEPTH_MAX);
+		taskq_wait_outstanding(tq, SPLAT_TASKQ_DEPTH_MAX);
 	}
 
 	error = (tq_arg->depth == SPLAT_TASKQ_DEPTH_MAX ? 0 : -EINVAL);
@@ -1052,21 +1057,15 @@ splat_taskq_test7(struct file *file, void *arg)
 
 	rc = splat_taskq_test7_impl(file, arg, B_FALSE);
 	if (rc)
-		return rc;
+		return (rc);
 
 	rc = splat_taskq_test7_impl(file, arg, B_TRUE);
 
-	return rc;
+	return (rc);
 }
 
-/*
- * Create a taskq with 100 threads and dispatch a huge number of trivial
- * tasks to generate contention on tq->tq_lock.  This test should always
- * pass.  The purpose is to provide a benchmark for measuring the
- * effectiveness of taskq optimizations.
- */
 static void
-splat_taskq_test8_func(void *arg)
+splat_taskq_throughput_func(void *arg)
 {
 	splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg;
 	ASSERT(tq_arg);
@@ -1074,98 +1073,105 @@ splat_taskq_test8_func(void *arg)
 	atomic_inc(tq_arg->count);
 }
 
-#define TEST8_NUM_TASKS			0x20000
-#define TEST8_THREADS_PER_TASKQ		100
-
 static int
-splat_taskq_test8_common(struct file *file, void *arg, int minalloc,
-                         int maxalloc)
+splat_taskq_throughput(struct file *file, void *arg, const char *name,
+    int nthreads, int minalloc, int maxalloc, int flags, int tasks,
+    struct timespec *delta)
 {
 	taskq_t *tq;
 	taskqid_t id;
 	splat_taskq_arg_t tq_arg;
 	taskq_ent_t **tqes;
 	atomic_t count;
+	struct timespec start, stop;
 	int i, j, rc = 0;
 
-	tqes = vmalloc(sizeof(*tqes) * TEST8_NUM_TASKS);
+	tqes = vmalloc(sizeof (*tqes) * tasks);
 	if (tqes == NULL)
-		return -ENOMEM;
-	memset(tqes, 0, sizeof(*tqes) * TEST8_NUM_TASKS);
-
-	splat_vprint(file, SPLAT_TASKQ_TEST8_NAME,
-		     "Taskq '%s' creating (%d/%d/%d)\n",
-		     SPLAT_TASKQ_TEST8_NAME,
-		     minalloc, maxalloc, TEST8_NUM_TASKS);
-	if ((tq = taskq_create(SPLAT_TASKQ_TEST8_NAME, TEST8_THREADS_PER_TASKQ,
-			       maxclsyspri, minalloc, maxalloc,
-			       TASKQ_PREPOPULATE)) == NULL) {
-		splat_vprint(file, SPLAT_TASKQ_TEST8_NAME,
-		             "Taskq '%s' create failed\n",
-		             SPLAT_TASKQ_TEST8_NAME);
+		return (-ENOMEM);
+
+	memset(tqes, 0, sizeof (*tqes) * tasks);
+
+	splat_vprint(file, name, "Taskq '%s' creating (%d/%d/%d/%d)\n",
+	    name, nthreads, minalloc, maxalloc, tasks);
+	if ((tq = taskq_create(name, nthreads, maxclsyspri,
+	    minalloc, maxalloc, flags)) == NULL) {
+		splat_vprint(file, name, "Taskq '%s' create failed\n", name);
 		rc = -EINVAL;
 		goto out_free;
 	}
 
 	tq_arg.file = file;
-	tq_arg.name = SPLAT_TASKQ_TEST8_NAME;
+	tq_arg.name = name;
 	tq_arg.count = &count;
 	atomic_set(tq_arg.count, 0);
 
-	for (i = 0; i < TEST8_NUM_TASKS; i++) {
-		tqes[i] = kmalloc(sizeof(taskq_ent_t), GFP_KERNEL);
+	getnstimeofday(&start);
+
+	for (i = 0; i < tasks; i++) {
+		tqes[i] = kmalloc(sizeof (taskq_ent_t), GFP_KERNEL);
 		if (tqes[i] == NULL) {
 			rc = -ENOMEM;
 			goto out;
 		}
-		taskq_init_ent(tqes[i]);
-
-		taskq_dispatch_ent(tq, splat_taskq_test8_func,
-				   &tq_arg, TQ_SLEEP, tqes[i]);
 
+		taskq_init_ent(tqes[i]);
+		taskq_dispatch_ent(tq, splat_taskq_throughput_func,
+		    &tq_arg, TQ_SLEEP, tqes[i]);
 		id = tqes[i]->tqent_id;
 
 		if (id == 0) {
-			splat_vprint(file, SPLAT_TASKQ_TEST8_NAME,
-			        "Taskq '%s' function '%s' dispatch "
-				"%d failed\n", tq_arg.name,
-				sym2str(splat_taskq_test8_func), i);
-				rc = -EINVAL;
-				goto out;
+			splat_vprint(file, name, "Taskq '%s' function '%s' "
+			    "dispatch %d failed\n", tq_arg.name,
+			    sym2str(splat_taskq_throughput_func), i);
+			rc = -EINVAL;
+			goto out;
 		}
 	}
 
-	splat_vprint(file, SPLAT_TASKQ_TEST8_NAME, "Taskq '%s' "
-		     "waiting for %d dispatches\n", tq_arg.name,
-		     TEST8_NUM_TASKS);
+	splat_vprint(file, name, "Taskq '%s' waiting for %d dispatches\n",
+	    tq_arg.name, tasks);
+
 	taskq_wait(tq);
-	splat_vprint(file, SPLAT_TASKQ_TEST8_NAME, "Taskq '%s' "
-		     "%d/%d dispatches finished\n", tq_arg.name,
-		     atomic_read(tq_arg.count), TEST8_NUM_TASKS);
 
-	if (atomic_read(tq_arg.count) != TEST8_NUM_TASKS)
+	if (delta != NULL) {
+		getnstimeofday(&stop);
+		*delta = timespec_sub(stop, start);
+	}
+
+	splat_vprint(file, name, "Taskq '%s' %d/%d dispatches finished\n",
+	    tq_arg.name, atomic_read(tq_arg.count), tasks);
+
+	if (atomic_read(tq_arg.count) != tasks)
 		rc = -ERANGE;
 
 out:
-	splat_vprint(file, SPLAT_TASKQ_TEST8_NAME, "Taskq '%s' destroying\n",
-	           tq_arg.name);
+	splat_vprint(file, name, "Taskq '%s' destroying\n", tq_arg.name);
 	taskq_destroy(tq);
 out_free:
-	for (j = 0; j < TEST8_NUM_TASKS && tqes[j] != NULL; j++)
+	for (j = 0; j < tasks && tqes[j] != NULL; j++)
 		kfree(tqes[j]);
+
 	vfree(tqes);
 
-	return rc;
+	return (rc);
 }
 
+/*
+ * Create a taskq with 100 threads and dispatch a huge number of trivial
+ * tasks to generate contention on tq->tq_lock.  This test should always
+ * pass.  The purpose is to provide a benchmark for measuring the
+ * effectiveness of taskq optimizations.
+ */
+#define	TEST8_NUM_TASKS			0x20000
+#define	TEST8_THREADS_PER_TASKQ		100
+
 static int
 splat_taskq_test8(struct file *file, void *arg)
 {
-	int rc;
-
-	rc = splat_taskq_test8_common(file, arg, 1, 100);
-
-	return rc;
+	return (splat_taskq_throughput(file, arg,
+	    SPLAT_TASKQ_TEST8_NAME, TEST8_THREADS_PER_TASKQ,
+	    1, INT_MAX, TASKQ_PREPOPULATE, TEST8_NUM_TASKS, NULL));
 }
 
 /*
@@ -1433,6 +1439,46 @@ splat_taskq_test10(struct file *file, void *arg)
 	return rc;
 }
 
+/*
+ * Create a dynamic taskq with 100 threads and dispatch a huge number of
+ * trivial tasks.  This will cause the taskq to grow quickly to its max
+ * thread count.  This test should always pass.  The purpose is to provide
+ * a benchmark for measuring the performance of dynamic taskqs.
+ */
+#define	TEST11_NUM_TASKS			100000
+#define	TEST11_THREADS_PER_TASKQ		100
+
+static int
+splat_taskq_test11(struct file *file, void *arg)
+{
+	struct timespec normal, dynamic;
+	int error;
+
+	error = splat_taskq_throughput(file, arg, SPLAT_TASKQ_TEST11_NAME,
+	    TEST11_THREADS_PER_TASKQ, 1, INT_MAX,
+	    TASKQ_PREPOPULATE, TEST11_NUM_TASKS, &normal);
+	if (error)
+		return (error);
+
+	error = splat_taskq_throughput(file, arg, SPLAT_TASKQ_TEST11_NAME,
+	    TEST11_THREADS_PER_TASKQ, 1, INT_MAX,
+	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC, TEST11_NUM_TASKS, &dynamic);
+	if (error)
+		return (error);
+
+	splat_vprint(file, SPLAT_TASKQ_TEST11_NAME,
+	    "Timing taskq_wait(): normal=%ld.%09lds, dynamic=%ld.%09lds\n",
+	    normal.tv_sec, normal.tv_nsec,
+	    dynamic.tv_sec, dynamic.tv_nsec);
+
+	/* A 1% increase in runtime is a */
+	if ((dynamic.tv_sec * NANOSEC + dynamic.tv_nsec) >
+	    ((normal.tv_sec * NANOSEC + normal.tv_nsec) * 101 / 100))
+		error = -ETIME;
+
+	return (error);
+}
+
 splat_subsystem_t *
 splat_taskq_init(void)
 {
@@ -1470,6 +1516,8 @@ splat_taskq_init(void)
 	              SPLAT_TASKQ_TEST9_ID, splat_taskq_test9);
 	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST10_NAME, SPLAT_TASKQ_TEST10_DESC,
 	              SPLAT_TASKQ_TEST10_ID, splat_taskq_test10);
+	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST11_NAME, SPLAT_TASKQ_TEST11_DESC,
+	              SPLAT_TASKQ_TEST11_ID, splat_taskq_test11);
 
         return sub;
 }
@@ -1478,6 +1526,7 @@ void
 splat_taskq_fini(splat_subsystem_t *sub)
 {
         ASSERT(sub);
+	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST11_ID);
 	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST10_ID);
 	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST9_ID);
 	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST8_ID);