From 1e33ac1e2677c898a0b5ef6207048c692cb51bf4 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 26 Aug 2010 10:43:27 -0700 Subject: [PATCH] Fix Solaris thread dependency by using pthreads This is a portability change which removes the dependence of the Solaris thread library. All locations where Solaris thread API was used before have been replaced with equivilant Solaris kernel style thread calls. In user space the kernel style threading API is implemented in term of the portable pthreads library. This includes all threads, mutexs, condition variables, reader/writer locks, and taskqs. Signed-off-by: Brian Behlendorf --- cmd/ztest/ztest.c | 239 ++++++++--------- lib/libuutil/uu_misc.c | 7 +- lib/libzpool/include/sys/zfs_context.h | 123 +++++---- lib/libzpool/kernel.c | 340 +++++++++++++++++++------ lib/libzpool/taskq.c | 22 +- 5 files changed, 480 insertions(+), 251 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index b5cee5b3bd8d..930342d2e421 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -57,6 +57,9 @@ * the transaction group number is less than the current, open txg. * If you add a new test, please do this if applicable. * + * (7) Threads are created with a reduced stack size, for sanity checking. + * Therefore, it's important not to allocate huge buffers on the stack. + * * When run with no arguments, ztest runs for about five minutes and * produces no output if successful. To get a little bit of information, * specify -V. To get more information, specify -VV, and so on. @@ -168,8 +171,8 @@ typedef enum { typedef struct rll { void *rll_writer; int rll_readers; - mutex_t rll_lock; - cond_t rll_cv; + kmutex_t rll_lock; + kcondvar_t rll_cv; } rll_t; typedef struct rl { @@ -206,7 +209,7 @@ typedef struct ztest_ds { uint64_t zd_seq; ztest_od_t *zd_od; /* debugging aid */ char zd_name[MAXNAMELEN]; - mutex_t zd_dirobj_lock; + kmutex_t zd_dirobj_lock; rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; } ztest_ds_t; @@ -299,8 +302,8 @@ ztest_info_t ztest_info[] = { * The callbacks are ordered by txg number. */ typedef struct ztest_cb_list { - mutex_t zcl_callbacks_lock; - list_t zcl_callbacks; + kmutex_t zcl_callbacks_lock; + list_t zcl_callbacks; } ztest_cb_list_t; /* @@ -319,8 +322,8 @@ typedef struct ztest_shared { uint64_t zs_vdev_aux; uint64_t zs_alloc; uint64_t zs_space; - mutex_t zs_vdev_lock; - rwlock_t zs_name_lock; + kmutex_t zs_vdev_lock; + krwlock_t zs_name_lock; ztest_info_t zs_info[ZTEST_FUNCS]; uint64_t zs_splits; uint64_t zs_mirrors; @@ -892,8 +895,8 @@ ztest_rll_init(rll_t *rll) { rll->rll_writer = NULL; rll->rll_readers = 0; - VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); - VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); + mutex_init(&rll->rll_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&rll->rll_cv, NULL, CV_DEFAULT, NULL); } static void @@ -901,32 +904,32 @@ ztest_rll_destroy(rll_t *rll) { ASSERT(rll->rll_writer == NULL); ASSERT(rll->rll_readers == 0); - VERIFY(_mutex_destroy(&rll->rll_lock) == 0); - VERIFY(cond_destroy(&rll->rll_cv) == 0); + mutex_destroy(&rll->rll_lock); + cv_destroy(&rll->rll_cv); } static void ztest_rll_lock(rll_t *rll, rl_type_t type) { - VERIFY(mutex_lock(&rll->rll_lock) == 0); + mutex_enter(&rll->rll_lock); if (type == RL_READER) { while (rll->rll_writer != NULL) - (void) cond_wait(&rll->rll_cv, &rll->rll_lock); + (void) cv_wait(&rll->rll_cv, &rll->rll_lock); rll->rll_readers++; } else { while (rll->rll_writer != NULL || rll->rll_readers) - (void) cond_wait(&rll->rll_cv, &rll->rll_lock); + (void) cv_wait(&rll->rll_cv, &rll->rll_lock); rll->rll_writer = curthread; } - VERIFY(mutex_unlock(&rll->rll_lock) == 0); + mutex_exit(&rll->rll_lock); } static void ztest_rll_unlock(rll_t *rll) { - VERIFY(mutex_lock(&rll->rll_lock) == 0); + mutex_enter(&rll->rll_lock); if (rll->rll_writer) { ASSERT(rll->rll_readers == 0); @@ -938,9 +941,9 @@ ztest_rll_unlock(rll_t *rll) } if (rll->rll_writer == NULL && rll->rll_readers == 0) - VERIFY(cond_broadcast(&rll->rll_cv) == 0); + cv_broadcast(&rll->rll_cv); - VERIFY(mutex_unlock(&rll->rll_lock) == 0); + mutex_exit(&rll->rll_lock); } static void @@ -997,7 +1000,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os) dmu_objset_name(os, zd->zd_name); int l; - VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_init(&zd->zd_object_lock[l]); @@ -1011,7 +1014,7 @@ ztest_zd_fini(ztest_ds_t *zd) { int l; - VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); + mutex_destroy(&zd->zd_dirobj_lock); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_destroy(&zd->zd_object_lock[l]); @@ -1754,7 +1757,7 @@ ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) int error; int i; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); for (i = 0; i < count; i++, od++) { od->od_object = 0; @@ -1795,7 +1798,7 @@ ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) int missing = 0; int i; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); for (i = 0; i < count; i++, od++) { if (missing) { @@ -1841,7 +1844,7 @@ ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) int error; int i; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); od += count - 1; @@ -2057,13 +2060,13 @@ ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) int count = size / sizeof (*od); int rv = 0; - VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); + mutex_enter(&zd->zd_dirobj_lock); if ((ztest_lookup(zd, od, count) != 0 || remove) && (ztest_remove(zd, od, count) != 0 || ztest_create(zd, od, count) != 0)) rv = -1; zd->zd_od = od; - VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); + mutex_exit(&zd->zd_dirobj_lock); return (rv); } @@ -2119,7 +2122,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) * Attempt to create an existing pool. It shouldn't matter * what's in the nvroot; we should fail with EEXIST. */ - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1); VERIFY3U(EEXIST, ==, spa_create(zs->zs_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); @@ -2127,7 +2130,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) VERIFY3U(EBUSY, ==, spa_destroy(zs->zs_pool)); spa_close(spa, FTAG); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } static vdev_t * @@ -2181,7 +2184,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) nvlist_t *nvroot; int error; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * zopt_raidz; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2207,9 +2210,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) * dmu_objset_destroy() to fail with EBUSY thus * leaving the dataset in an inconsistent state. */ - VERIFY(rw_wrlock(&ztest_shared->zs_name_lock) == 0); + rw_enter(&ztest_shared->zs_name_lock, RW_WRITER); error = spa_vdev_remove(spa, guid, B_FALSE); - VERIFY(rw_unlock(&ztest_shared->zs_name_lock) == 0); + rw_exit(&ztest_shared->zs_name_lock); if (error && error != EEXIST) fatal(0, "spa_vdev_remove() = %d", error); @@ -2231,7 +2234,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) fatal(0, "spa_vdev_add() = %d", error); } - VERIFY(mutex_unlock(&ztest_shared->zs_vdev_lock) == 0); + mutex_exit(&ztest_shared->zs_vdev_lock); } /* @@ -2257,7 +2260,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) aux = ZPOOL_CONFIG_L2CACHE; } - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2313,7 +2316,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2330,11 +2333,11 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) uint_t c, children, schildren = 0, lastlogid = 0; int error = 0; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); /* ensure we have a useable config; mirrors of raidz aren't supported */ if (zs->zs_mirrors < 3 || zopt_raidz > 1) { - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2393,9 +2396,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) spa_config_exit(spa, SCL_VDEV, FTAG); - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); nvlist_free(config); @@ -2408,7 +2411,7 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) ++zs->zs_splits; --zs->zs_mirrors; } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } @@ -2437,7 +2440,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) int oldvd_is_log; int error, expected_error; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2498,7 +2501,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) if (error != 0 && error != ENODEV && error != EBUSY && error != ENOTSUP) fatal(0, "detach (%s) returned %d", oldpath, error); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2591,7 +2594,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) (longlong_t)newsize, replacing, error, expected_error); } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2722,7 +2725,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) uint64_t top; uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); spa_config_enter(spa, SCL_STATE, spa, RW_READER); top = ztest_random_vdev_top(spa, B_TRUE); @@ -2750,7 +2753,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) if (tvd->vdev_state != VDEV_STATE_HEALTHY || psize == 0 || psize >= 4 * zopt_vdev_size) { spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } ASSERT(psize > 0); @@ -2775,7 +2778,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) "the vdev configuration changed.\n"); } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2809,7 +2812,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) "intervening vdev offline or remove.\n"); } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2837,7 +2840,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2945,7 +2948,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) zilog_t *zilog; int i; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", zs->zs_pool, (u_longlong_t)id); @@ -2983,7 +2986,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } fatal(0, "dmu_objset_create(%s) = %d", name, error); @@ -3032,7 +3035,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) dmu_objset_disown(os, FTAG); ztest_zd_fini(&zdtmp); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -3043,10 +3046,10 @@ ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) { ztest_shared_t *zs = ztest_shared; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) ztest_snapshot_destroy(zd->zd_name, id); (void) ztest_snapshot_create(zd->zd_name, id); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -3107,7 +3110,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) char *osname = zd->zd_name; int error; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); ztest_dsl_dataset_cleanup(osname, id); @@ -3192,7 +3195,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) out: ztest_dsl_dataset_cleanup(osname, id); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4111,7 +4114,7 @@ ztest_commit_callback(void *arg, int error) ASSERT(data->zcd_added); ASSERT3U(data->zcd_txg, !=, 0); - (void) mutex_lock(&zcl.zcl_callbacks_lock); + (void) mutex_enter(&zcl.zcl_callbacks_lock); /* See if this cb was called more quickly */ if ((synced_txg - data->zcd_txg) < zc_min_txg_delay) @@ -4120,7 +4123,7 @@ ztest_commit_callback(void *arg, int error) /* Remove our callback from the list */ list_remove(&zcl.zcl_callbacks, data); - (void) mutex_unlock(&zcl.zcl_callbacks_lock); + (void) mutex_exit(&zcl.zcl_callbacks_lock); umem_free(data, sizeof (ztest_cb_data_t)); } @@ -4135,6 +4138,7 @@ ztest_create_cb_data(objset_t *os, uint64_t txg) cb_data->zcd_txg = txg; cb_data->zcd_spa = dmu_objset_spa(os); + list_link_init(&cb_data->zcd_node); return (cb_data); } @@ -4214,7 +4218,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); - (void) mutex_lock(&zcl.zcl_callbacks_lock); + (void) mutex_enter(&zcl.zcl_callbacks_lock); /* * Since commit callbacks don't have any ordering requirement and since @@ -4263,7 +4267,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) zc_cb_counter += 3; - (void) mutex_unlock(&zcl.zcl_callbacks_lock); + (void) mutex_exit(&zcl.zcl_callbacks_lock); dmu_tx_commit(tx); } @@ -4281,13 +4285,13 @@ ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) ztest_shared_t *zs = ztest_shared; int p; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); for (p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* ARGSUSED */ @@ -4297,7 +4301,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) ztest_shared_t *zs = ztest_shared; nvlist_t *props = NULL; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) ztest_spa_prop_set_uint64(zs, ZPOOL_PROP_DEDUPDITTO, ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); @@ -4309,7 +4313,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) nvlist_free(props); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4327,14 +4331,14 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) char tag[100]; char osname[MAXNAMELEN]; - (void) rw_rdlock(&ztest_shared->zs_name_lock); + (void) rw_enter(&ztest_shared->zs_name_lock, RW_READER); dmu_objset_name(os, osname); - (void) snprintf(snapname, 100, "sh1_%llu", id); + (void) snprintf(snapname, 100, "sh1_%llu", (u_longlong_t)id); (void) snprintf(fullname, 100, "%s@%s", osname, snapname); - (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id); - (void) snprintf(tag, 100, "%tag_%llu", id); + (void) snprintf(clonename, 100, "%s/ch1_%llu",osname,(u_longlong_t)id); + (void) snprintf(tag, 100, "tag_%llu", (u_longlong_t)id); /* * Clean up from any previous run. @@ -4424,7 +4428,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT); out: - (void) rw_unlock(&ztest_shared->zs_name_lock); + (void) rw_exit(&ztest_shared->zs_name_lock); } /* @@ -4452,11 +4456,11 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) uint64_t guid0 = 0; boolean_t islog = B_FALSE; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); maxfaults = MAXFAULTS(); leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz; mirror_save = zs->zs_mirrors; - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); ASSERT(leaves >= 1); @@ -4550,12 +4554,13 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * leaving the dataset in an inconsistent state. */ if (islog) - (void) rw_wrlock(&ztest_shared->zs_name_lock); + (void) rw_enter(&ztest_shared->zs_name_lock, + RW_WRITER); VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); if (islog) - (void) rw_unlock(&ztest_shared->zs_name_lock); + (void) rw_exit(&ztest_shared->zs_name_lock); } else { (void) vdev_online(spa, guid0, 0, NULL); } @@ -4582,9 +4587,9 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) if (offset >= fsize) continue; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); if (mirror_save != zs->zs_mirrors) { - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); (void) close(fd); return; } @@ -4593,7 +4598,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) fatal(1, "can't inject bad word at 0x%llx in %s", offset, pathrand); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); if (zopt_verbose >= 7) (void) printf("injected bad word into %s," @@ -4634,13 +4639,13 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) * Take the name lock as writer to prevent anyone else from changing * the pool and dataset properies we need to maintain during this test. */ - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, B_FALSE) != 0 || ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, B_FALSE) != 0) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } @@ -4654,7 +4659,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) dmu_tx_hold_write(tx, object, 0, copies * blocksize); txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } @@ -4698,7 +4703,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) zio_buf_free(buf, psize); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4727,7 +4732,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id) char *oldname, *newname; spa_t *spa; - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); oldname = zs->zs_pool; newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); @@ -4767,7 +4772,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id) umem_free(newname, strlen(newname) + 1); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4940,23 +4945,18 @@ ztest_resume_thread(void *arg) ztest_resume(spa); (void) poll(NULL, 0, 100); } - return (NULL); -} -static void * -ztest_deadman_thread(void *arg) -{ - ztest_shared_t *zs = arg; - int grace = 300; - hrtime_t delta; + thread_exit(); - delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace; - - (void) poll(NULL, 0, (int)(1000 * delta)); + return (NULL); +} - fatal(0, "failed to complete within %d seconds of deadline", grace); +#define GRACE 300 - return (NULL); +static void +ztest_deadman_alarm(int sig) +{ + fatal(0, "failed to complete within %d seconds of deadline", GRACE); } static void @@ -5017,6 +5017,8 @@ ztest_thread(void *arg) ztest_execute(zi, id); } + thread_exit(); + return (NULL); } @@ -5082,18 +5084,18 @@ ztest_dataset_open(ztest_shared_t *zs, int d) ztest_dataset_name(name, zs->zs_pool, d); - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); error = ztest_dataset_create(name); if (error == ENOSPC) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); ztest_record_enospc(FTAG); return (error); } ASSERT(error == 0 || error == EEXIST); VERIFY3U(dmu_objset_hold(name, zd, &os), ==, 0); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); ztest_zd_init(zd, os); @@ -5144,9 +5146,10 @@ ztest_dataset_close(ztest_shared_t *zs, int d) static void ztest_run(ztest_shared_t *zs) { - thread_t *tid; + kt_did_t *tid; spa_t *spa; - thread_t resume_tid; + kthread_t *resume_thread; + uint64_t object; int error; int t, d; @@ -5155,8 +5158,8 @@ ztest_run(ztest_shared_t *zs) /* * Initialize parent/child shared state. */ - VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0); - VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL); zs->zs_thread_start = gethrtime(); zs->zs_thread_stop = zs->zs_thread_start + zopt_passtime * NANOSEC; @@ -5165,7 +5168,7 @@ ztest_run(ztest_shared_t *zs) if (ztest_random(100) < zopt_killrate) zs->zs_thread_kill -= ztest_random(zopt_passtime * NANOSEC); - (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); + mutex_init(&zcl.zcl_callbacks_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), offsetof(ztest_cb_data_t, zcd_node)); @@ -5192,14 +5195,14 @@ ztest_run(ztest_shared_t *zs) /* * Create a thread to periodically resume suspended I/O. */ - VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, - &resume_tid) == 0); + VERIFY3P((resume_thread = thread_create(NULL, 0, ztest_resume_thread, + spa, TS_RUN, NULL, 0, 0)), !=, NULL); /* - * Create a deadman thread to abort() if we hang. + * Set a deadman alarm to abort() if we hang. */ - VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, - NULL) == 0); + signal(SIGALRM, ztest_deadman_alarm); + alarm((zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + GRACE); /* * Verify that we can safely inquire about about any object, @@ -5225,7 +5228,7 @@ ztest_run(ztest_shared_t *zs) } zs->zs_enospc_count = 0; - tid = umem_zalloc(zopt_threads * sizeof (thread_t), UMEM_NOFAIL); + tid = umem_zalloc(zopt_threads * sizeof (kt_did_t), UMEM_NOFAIL); if (zopt_verbose >= 4) (void) printf("starting main threads...\n"); @@ -5234,10 +5237,14 @@ ztest_run(ztest_shared_t *zs) * Kick off all the tests that run in parallel. */ for (t = 0; t < zopt_threads; t++) { + kthread_t *thread; + if (t < zopt_datasets && ztest_dataset_open(zs, t) != 0) return; - VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, - THR_BOUND, &tid[t]) == 0); + + VERIFY3P(thread = thread_create(NULL, 0, ztest_thread, + (void *)(uintptr_t)t, TS_RUN, NULL, 0, 0), !=, NULL); + tid[t] = thread->t_tid; } /* @@ -5245,7 +5252,7 @@ ztest_run(ztest_shared_t *zs) * so we don't close datasets while threads are still using them. */ for (t = zopt_threads - 1; t >= 0; t--) { - VERIFY(thr_join(tid[t], NULL, NULL) == 0); + thread_join(tid[t]); if (t < zopt_datasets) ztest_dataset_close(zs, t); } @@ -5255,18 +5262,18 @@ ztest_run(ztest_shared_t *zs) zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); - umem_free(tid, zopt_threads * sizeof (thread_t)); + umem_free(tid, zopt_threads * sizeof (kt_did_t)); /* Kill the resume thread */ ztest_exiting = B_TRUE; - VERIFY(thr_join(resume_tid, NULL, NULL) == 0); + thread_join(resume_thread->t_tid); ztest_resume(spa); /* * Right before closing the pool, kick off a bunch of async I/O; * spa_close() should wait for it to complete. */ - for (uint64_t object = 1; object < 50; object++) + for (object = 1; object < 50; object++) dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); /* Verify that at least one commit cb was called in a timely fashion */ @@ -5426,8 +5433,8 @@ ztest_init(ztest_shared_t *zs) spa_t *spa; nvlist_t *nvroot, *props; - VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0); - VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL); kernel_init(FREAD | FWRITE); @@ -5456,8 +5463,8 @@ ztest_init(ztest_shared_t *zs) ztest_run_zdb(zs->zs_pool); - (void) rwlock_destroy(&zs->zs_name_lock); - (void) _mutex_destroy(&zs->zs_vdev_lock); + (void) rw_destroy(&zs->zs_name_lock); + (void) mutex_destroy(&zs->zs_vdev_lock); } int diff --git a/lib/libuutil/uu_misc.c b/lib/libuutil/uu_misc.c index 578bf3294052..1b843effe65d 100644 --- a/lib/libuutil/uu_misc.c +++ b/lib/libuutil/uu_misc.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -68,11 +67,12 @@ static va_list uu_panic_args; static pthread_t uu_panic_thread; static uint32_t _uu_main_error; +static __thread int _uu_main_thread = 0; void uu_set_error(uint_t code) { - if (thr_main() != 0) { + if (_uu_main_thread) { _uu_main_error = code; return; } @@ -101,7 +101,7 @@ uu_set_error(uint_t code) uint32_t uu_error(void) { - if (thr_main() != 0) + if (_uu_main_thread) return (_uu_main_error); if (uu_error_key_setup < 0) /* can't happen? */ @@ -259,6 +259,7 @@ uu_init(void) __attribute__((constructor)); static void uu_init(void) { + _uu_main_thread = 1; (void) pthread_atfork(uu_lockup, uu_release, uu_release_child); } diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h index fc543559bd39..55b117c218a1 100644 --- a/lib/libzpool/include/sys/zfs_context.h +++ b/lib/libzpool/include/sys/zfs_context.h @@ -49,8 +49,7 @@ extern "C" { #include #include #include -#include -#include +#include #include #include #include @@ -90,6 +89,8 @@ extern "C" { #define CE_PANIC 3 /* panic */ #define CE_IGNORE 4 /* print nothing */ +extern int aok; + /* * ZFS debugging */ @@ -195,27 +196,55 @@ _NOTE(CONSTCOND) } while (0) /* * Threads */ -#define curthread ((void *)(uintptr_t)thr_self()) - -typedef struct kthread kthread_t; - -#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ - zk_thread_create(func, arg) -#define thread_exit() thr_exit(NULL) -#define thread_join(t) panic("libzpool cannot join threads") +#define TS_MAGIC 0x72f158ab4261e538ull +#define TS_RUN 0x00000002 +#ifdef __linux__ +#define STACK_SIZE 8192 /* Linux x86 and amd64 */ +#else +#define STACK_SIZE 24576 /* Solaris */ +#endif -#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS) +#ifdef NPTL_GUARD_WITHIN_STACK +#define EXTRA_GUARD_BYTES PAGESIZE +#else +#define EXTRA_GUARD_BYTES 0 +#endif /* in libzpool, p0 exists only to have its address taken */ -struct proc { +typedef struct proc { uintptr_t this_is_never_used_dont_dereference_it; -}; +} proc_t; extern struct proc p0; -#define PS_NONE -1 +typedef void (*thread_func_t)(void *); +typedef void (*thread_func_arg_t)(void *); +typedef pthread_t kt_did_t; -extern kthread_t *zk_thread_create(void (*func)(), void *arg); +typedef struct kthread { + kt_did_t t_tid; + thread_func_t t_func; + void * t_arg; +} kthread_t; + +#define tsd_get(key) pthread_getspecific(key) +#define tsd_set(key, val) pthread_setspecific(key, val) +#define curthread zk_thread_current() +#define thread_exit zk_thread_exit +#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ + zk_thread_create(stk, stksize, (thread_func_t)func, arg, \ + len, NULL, state, pri) +#define thread_join(t) zk_thread_join(t) +#define newproc(f,a,cid,pri,ctp,pid) (ENOSYS) + +extern kthread_t *zk_thread_current(void); +extern void zk_thread_exit(void); +extern kthread_t *zk_thread_create(caddr_t stk, size_t stksize, + thread_func_t func, void *arg, size_t len, + proc_t *pp, int state, pri_t pri); +extern void zk_thread_join(kt_did_t tid); + +#define PS_NONE -1 #define issig(why) (FALSE) #define ISSIG(thr, why) (FALSE) @@ -223,55 +252,52 @@ extern kthread_t *zk_thread_create(void (*func)(), void *arg); /* * Mutexes */ +#define MTX_MAGIC 0x9522f51362a6e326ull +#define MTX_INIT ((void *)NULL) +#define MTX_DEST ((void *)-1UL) + typedef struct kmutex { void *m_owner; - boolean_t initialized; - mutex_t m_lock; + uint64_t m_magic; + pthread_mutex_t m_lock; } kmutex_t; -#define MUTEX_DEFAULT USYNC_THREAD -#undef MUTEX_HELD -#undef MUTEX_NOT_HELD -#define MUTEX_HELD(m) _mutex_held(&(m)->m_lock) +#define MUTEX_DEFAULT 0 +#define MUTEX_HELD(m) ((m)->m_owner == curthread) #define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m)) -/* - * Argh -- we have to get cheesy here because the kernel and userland - * have different signatures for the same routine. - */ -extern int _mutex_init(mutex_t *mp, int type, void *arg); -extern int _mutex_destroy(mutex_t *mp); - -#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp)) -#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp)) - -extern void zmutex_init(kmutex_t *mp); -extern void zmutex_destroy(kmutex_t *mp); +extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie); +extern void mutex_destroy(kmutex_t *mp); extern void mutex_enter(kmutex_t *mp); extern void mutex_exit(kmutex_t *mp); extern int mutex_tryenter(kmutex_t *mp); extern void *mutex_owner(kmutex_t *mp); +extern int mutex_held(kmutex_t *mp); /* * RW locks */ +#define RW_MAGIC 0x4d31fb123648e78aull +#define RW_INIT ((void *)NULL) +#define RW_DEST ((void *)-1UL) + typedef struct krwlock { - void *rw_owner; - boolean_t initialized; - rwlock_t rw_lock; + void *rw_owner; + void *rw_wr_owner; + uint64_t rw_magic; + pthread_rwlock_t rw_lock; + uint_t rw_readers; } krwlock_t; typedef int krw_t; #define RW_READER 0 #define RW_WRITER 1 -#define RW_DEFAULT USYNC_THREAD +#define RW_DEFAULT RW_READER -#undef RW_READ_HELD -#define RW_READ_HELD(x) _rw_read_held(&(x)->rw_lock) - -#undef RW_WRITE_HELD -#define RW_WRITE_HELD(x) _rw_write_held(&(x)->rw_lock) +#define RW_READ_HELD(x) ((x)->rw_readers > 0) +#define RW_WRITE_HELD(x) ((x)->rw_wr_owner == curthread) +#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg); extern void rw_destroy(krwlock_t *rwlp); @@ -289,9 +315,14 @@ extern gid_t *crgetgroups(cred_t *cr); /* * Condition variables */ -typedef cond_t kcondvar_t; +#define CV_MAGIC 0xd31ea9a83b1b30c4ull + +typedef struct kcondvar { + uint64_t cv_magic; + pthread_cond_t cv; +} kcondvar_t; -#define CV_DEFAULT USYNC_THREAD +#define CV_DEFAULT 0 extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); extern void cv_destroy(kcondvar_t *cv); @@ -367,7 +398,7 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); extern void taskq_destroy(taskq_t *); extern void taskq_wait(taskq_t *); -extern int taskq_member(taskq_t *, void *); +extern int taskq_member(taskq_t *, kthread_t *); extern void system_taskq_init(void); extern void system_taskq_fini(void); @@ -496,7 +527,7 @@ extern void delay(clock_t ticks); #define minclsyspri 60 #define maxclsyspri 99 -#define CPU_SEQID (thr_self() & (max_ncpus - 1)) +#define CPU_SEQID (pthread_self() & (max_ncpus - 1)) #define kcred NULL #define CRED() NULL diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index b64f03da4fe7..4bd08cdd450e 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -57,16 +58,156 @@ struct proc p0; * threads * ========================================================================= */ -/*ARGSUSED*/ + +pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER; +pthread_key_t kthread_key; +int kthread_nr = 0; + +static void +thread_init(void) +{ + kthread_t *kt; + + VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0); + + /* Create entry for primary kthread */ + kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); + kt->t_tid = pthread_self(); + kt->t_func = NULL; + + VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0); + + /* Only the main thread should be running at the moment */ + ASSERT3S(kthread_nr, ==, 0); + kthread_nr = 1; +} + +static void +thread_fini(void) +{ + kthread_t *kt = curthread; + + ASSERT(pthread_equal(kt->t_tid, pthread_self())); + ASSERT3P(kt->t_func, ==, NULL); + + umem_free(kt, sizeof(kthread_t)); + + /* Wait for all threads to exit via thread_exit() */ + VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); + + kthread_nr--; /* Main thread is exiting */ + + while (kthread_nr > 0) + VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==, + 0); + + ASSERT3S(kthread_nr, ==, 0); + VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); + + VERIFY3S(pthread_key_delete(kthread_key), ==, 0); +} + kthread_t * -zk_thread_create(void (*func)(), void *arg) +zk_thread_current(void) +{ + kthread_t *kt = pthread_getspecific(kthread_key); + + ASSERT3P(kt, !=, NULL); + + return kt; +} + +void * +zk_thread_helper(void *arg) { - thread_t tid; + kthread_t *kt = (kthread_t *) arg; + + VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0); - VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, - &tid) == 0); + VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); + kthread_nr++; + VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); - return ((void *)(uintptr_t)tid); + kt->t_tid = pthread_self(); + ((thread_func_arg_t) kt->t_func)(kt->t_arg); + + /* Unreachable, thread must exit with thread_exit() */ + abort(); + + return NULL; +} + +kthread_t * +zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, + size_t len, proc_t *pp, int state, pri_t pri) +{ + kthread_t *kt; + pthread_attr_t attr; + size_t stack; + + ASSERT3S(state & ~TS_RUN, ==, 0); + + kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); + kt->t_func = func; + kt->t_arg = arg; + + /* + * The Solaris kernel stack size is 24k for x86/x86_64. + * The Linux kernel stack size is 8k for x86/x86_64. + * + * We reduce the default stack size in userspace, to ensure + * we observe stack overruns in user space as well as in + * kernel space. PTHREAD_STACK_MIN is the minimum stack + * required for a NULL procedure in user space and is added + * in to the stack requirements. + * + * Some buggy NPTL threading implementations include the + * guard area within the stack size allocations. In + * this case we allocate an extra page to account for the + * guard area since we only have two pages of usable stack + * on Linux. + */ + + stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) + + EXTRA_GUARD_BYTES; + + VERIFY3S(pthread_attr_init(&attr), ==, 0); + VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0); + VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0); + + VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt), + ==, 0); + + VERIFY3S(pthread_attr_destroy(&attr), ==, 0); + + return kt; +} + +void +zk_thread_exit(void) +{ + kthread_t *kt = curthread; + + ASSERT(pthread_equal(kt->t_tid, pthread_self())); + + umem_free(kt, sizeof(kthread_t)); + + pthread_mutex_lock(&kthread_lock); + kthread_nr--; + pthread_mutex_unlock(&kthread_lock); + + pthread_cond_broadcast(&kthread_cond); + pthread_exit((void *)TS_MAGIC); +} + +void +zk_thread_join(kt_did_t tid) +{ + void *ret; + + pthread_join((pthread_t)tid, &ret); + VERIFY3P(ret, ==, (void *)TS_MAGIC); } /* @@ -97,42 +238,45 @@ kstat_delete(kstat_t *ksp) * mutexes * ========================================================================= */ + void -zmutex_init(kmutex_t *mp) +mutex_init(kmutex_t *mp, char *name, int type, void *cookie) { - mp->m_owner = NULL; - mp->initialized = B_TRUE; - (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); + ASSERT3S(type, ==, MUTEX_DEFAULT); + ASSERT3P(cookie, ==, NULL); + mp->m_owner = MTX_INIT; + mp->m_magic = MTX_MAGIC; + VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0); } void -zmutex_destroy(kmutex_t *mp) +mutex_destroy(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner == NULL); - (void) _mutex_destroy(&(mp)->m_lock); - mp->m_owner = (void *)-1UL; - mp->initialized = B_FALSE; + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, ==, MTX_INIT); + VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0); + mp->m_owner = MTX_DEST; + mp->m_magic = 0; } void mutex_enter(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner != (void *)-1UL); - ASSERT(mp->m_owner != curthread); - VERIFY(mutex_lock(&mp->m_lock) == 0); - ASSERT(mp->m_owner == NULL); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, !=, MTX_DEST); + ASSERT3P(mp->m_owner, !=, curthread); + VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0); + ASSERT3P(mp->m_owner, ==, MTX_INIT); mp->m_owner = curthread; } int mutex_tryenter(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner != (void *)-1UL); - if (0 == mutex_trylock(&mp->m_lock)) { - ASSERT(mp->m_owner == NULL); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, !=, MTX_DEST); + if (0 == pthread_mutex_trylock(&mp->m_lock)) { + ASSERT3P(mp->m_owner, ==, MTX_INIT); mp->m_owner = curthread; return (1); } else { @@ -143,53 +287,71 @@ mutex_tryenter(kmutex_t *mp) void mutex_exit(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - VERIFY(mutex_unlock(&mp->m_lock) == 0); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0); } void * mutex_owner(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); return (mp->m_owner); } +int +mutex_held(kmutex_t *mp) +{ + return (mp->m_owner == curthread); +} + /* * ========================================================================= * rwlocks * ========================================================================= */ -/*ARGSUSED*/ + void rw_init(krwlock_t *rwlp, char *name, int type, void *arg) { - rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); - rwlp->rw_owner = NULL; - rwlp->initialized = B_TRUE; + ASSERT3S(type, ==, RW_DEFAULT); + ASSERT3P(arg, ==, NULL); + VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0); + rwlp->rw_owner = RW_INIT; + rwlp->rw_wr_owner = RW_INIT; + rwlp->rw_readers = 0; + rwlp->rw_magic = RW_MAGIC; } void rw_destroy(krwlock_t *rwlp) { - rwlock_destroy(&rwlp->rw_lock); - rwlp->rw_owner = (void *)-1UL; - rwlp->initialized = B_FALSE; + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + + VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0); + rwlp->rw_magic = 0; } void rw_enter(krwlock_t *rwlp, krw_t rw) { - ASSERT(!RW_LOCK_HELD(rwlp)); - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); - ASSERT(rwlp->rw_owner != curthread); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + ASSERT3P(rwlp->rw_owner, !=, curthread); + ASSERT3P(rwlp->rw_wr_owner, !=, curthread); - if (rw == RW_READER) - VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); - else - VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); + if (rw == RW_READER) { + VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0); + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + + atomic_inc_uint(&rwlp->rw_readers); + } else { + VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0); + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + ASSERT3U(rwlp->rw_readers, ==, 0); + + rwlp->rw_wr_owner = curthread; + } rwlp->rw_owner = curthread; } @@ -197,11 +359,16 @@ rw_enter(krwlock_t *rwlp, krw_t rw) void rw_exit(krwlock_t *rwlp) { - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + ASSERT(RW_LOCK_HELD(rwlp)); + + if (RW_READ_HELD(rwlp)) + atomic_dec_uint(&rwlp->rw_readers); + else + rwlp->rw_wr_owner = RW_INIT; - rwlp->rw_owner = NULL; - VERIFY(rw_unlock(&rwlp->rw_lock) == 0); + rwlp->rw_owner = RW_INIT; + VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0); } int @@ -209,28 +376,36 @@ rw_tryenter(krwlock_t *rwlp, krw_t rw) { int rv; - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); if (rw == RW_READER) - rv = rw_tryrdlock(&rwlp->rw_lock); + rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock); else - rv = rw_trywrlock(&rwlp->rw_lock); + rv = pthread_rwlock_trywrlock(&rwlp->rw_lock); if (rv == 0) { + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + + if (rw == RW_READER) + atomic_inc_uint(&rwlp->rw_readers); + else { + ASSERT3U(rwlp->rw_readers, ==, 0); + rwlp->rw_wr_owner = curthread; + } + rwlp->rw_owner = curthread; return (1); } + VERIFY3S(rv, ==, EBUSY); + return (0); } -/*ARGSUSED*/ int rw_tryupgrade(krwlock_t *rwlp) { - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); return (0); } @@ -240,26 +415,32 @@ rw_tryupgrade(krwlock_t *rwlp) * condition variables * ========================================================================= */ -/*ARGSUSED*/ + void cv_init(kcondvar_t *cv, char *name, int type, void *arg) { - VERIFY(cond_init(cv, type, NULL) == 0); + ASSERT3S(type, ==, CV_DEFAULT); + cv->cv_magic = CV_MAGIC; + VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0); } void cv_destroy(kcondvar_t *cv) { - VERIFY(cond_destroy(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0); + cv->cv_magic = 0; } void cv_wait(kcondvar_t *cv, kmutex_t *mp) { - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - int ret = cond_wait(cv, &mp->m_lock); - VERIFY(ret == 0 || ret == EINTR); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + int ret = pthread_cond_wait(&cv->cv, &mp->m_lock); + if (ret != 0) + VERIFY3S(ret, ==, EINTR); mp->m_owner = curthread; } @@ -267,29 +448,38 @@ clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { int error; + struct timeval tv; timestruc_t ts; clock_t delta; + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + top: delta = abstime - ddi_get_lbolt(); if (delta <= 0) return (-1); - ts.tv_sec = delta / hz; - ts.tv_nsec = (delta % hz) * (NANOSEC / hz); + VERIFY(gettimeofday(&tv, NULL) == 0); + + ts.tv_sec = tv.tv_sec + delta / hz; + ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz); + if (ts.tv_nsec >= NANOSEC) { + ts.tv_sec++; + ts.tv_nsec -= NANOSEC; + } - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - error = cond_reltimedwait(cv, &mp->m_lock, &ts); + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts); mp->m_owner = curthread; - if (error == ETIME) + if (error == ETIMEDOUT) return (-1); if (error == EINTR) goto top; - ASSERT(error == 0); + VERIFY3S(error, ==, 0); return (1); } @@ -297,13 +487,15 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) void cv_signal(kcondvar_t *cv) { - VERIFY(cond_signal(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0); } void cv_broadcast(kcondvar_t *cv) { - VERIFY(cond_broadcast(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0); } /* @@ -571,7 +763,7 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) if (dprintf_find_string("pid")) (void) printf("%d ", getpid()); if (dprintf_find_string("tid")) - (void) printf("%u ", thr_self()); + (void) printf("%u ", (uint_t) pthread_self()); if (dprintf_find_string("cpu")) (void) printf("%u ", getcpuid()); if (dprintf_find_string("time")) @@ -824,6 +1016,7 @@ kernel_init(int mode) VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); + thread_init(); system_taskq_init(); spa_init(mode); @@ -835,6 +1028,7 @@ kernel_fini(void) spa_fini(); system_taskq_fini(); + thread_fini(); close(random_fd); close(urandom_fd); diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c index 8db5d11c1327..36c0ec7dfc78 100644 --- a/lib/libzpool/taskq.c +++ b/lib/libzpool/taskq.c @@ -42,7 +42,7 @@ struct taskq { krwlock_t tq_threadlock; kcondvar_t tq_dispatch_cv; kcondvar_t tq_wait_cv; - thread_t *tq_threadlist; + kthread_t **tq_threadlist; int tq_flags; int tq_active; int tq_nthreads; @@ -154,7 +154,7 @@ taskq_wait(taskq_t *tq) mutex_exit(&tq->tq_lock); } -static void * +static void taskq_thread(void *arg) { taskq_t *tq = arg; @@ -183,7 +183,7 @@ taskq_thread(void *arg) tq->tq_nthreads--; cv_broadcast(&tq->tq_wait_cv); mutex_exit(&tq->tq_lock); - return (NULL); + thread_exit(); } /*ARGSUSED*/ @@ -219,7 +219,7 @@ taskq_create(const char *name, int nthreads, pri_t pri, tq->tq_maxalloc = maxalloc; tq->tq_task.task_next = &tq->tq_task; tq->tq_task.task_prev = &tq->tq_task; - tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP); + tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP); if (flags & TASKQ_PREPOPULATE) { mutex_enter(&tq->tq_lock); @@ -229,8 +229,8 @@ taskq_create(const char *name, int nthreads, pri_t pri, } for (t = 0; t < nthreads; t++) - (void) thr_create(0, 0, taskq_thread, - tq, THR_BOUND, &tq->tq_threadlist[t]); + VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0, + taskq_thread, tq, TS_RUN, NULL, 0, 0)) != NULL); return (tq); } @@ -238,7 +238,6 @@ taskq_create(const char *name, int nthreads, pri_t pri, void taskq_destroy(taskq_t *tq) { - int t; int nthreads = tq->tq_nthreads; taskq_wait(tq); @@ -259,10 +258,7 @@ taskq_destroy(taskq_t *tq) mutex_exit(&tq->tq_lock); - for (t = 0; t < nthreads; t++) - (void) thr_join(tq->tq_threadlist[t], NULL, NULL); - - kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t)); + kmem_free(tq->tq_threadlist, nthreads * sizeof (kthread_t *)); rw_destroy(&tq->tq_threadlock); mutex_destroy(&tq->tq_lock); @@ -274,7 +270,7 @@ taskq_destroy(taskq_t *tq) } int -taskq_member(taskq_t *tq, void *t) +taskq_member(taskq_t *tq, kthread_t *t) { int i; @@ -282,7 +278,7 @@ taskq_member(taskq_t *tq, void *t) return (1); for (i = 0; i < tq->tq_nthreads; i++) - if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t) + if (tq->tq_threadlist[i] == t) return (1); return (0);