From 7481237f1c2dab0ac49f34b10f18c8f08c34ed3c Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Mon, 3 Jul 2017 17:08:30 -0700 Subject: [PATCH] Commented out all dn_struct_rwlock RW_READER locks for benchmarking https://github.com/zfsonlinux/zfs/issues/4804 --- module/zfs/dbuf.c | 42 ++++++++++++++++---------------- module/zfs/dmu.c | 53 ++++++++++++++++++++++------------------- module/zfs/dnode.c | 36 +++++++++++++++------------- module/zfs/dnode_sync.c | 12 +++++----- 4 files changed, 75 insertions(+), 68 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 70cc3108d0ac..ea09891aa07c 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -248,12 +248,12 @@ dbuf_find_bonus(objset_t *os, uint64_t object) dmu_buf_impl_t *db = NULL; if (dnode_hold(os, object, FTAG, &dn) == 0) { - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_bonus != NULL) { db = dn->dn_bonus; mutex_enter(&db->db_mtx); } - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); } return (db); @@ -1009,7 +1009,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) dn = DB_DNODE(db); ASSERT(!refcount_is_zero(&db->db_holds)); /* We need the struct_rwlock to prevent db_blkptr from changing. */ - ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); +// ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(db->db_state == DB_UNCACHED); ASSERT(db->db_buf == NULL); @@ -1179,8 +1179,8 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_ENTER(db); dn = DB_DNODE(db); - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// if ((flags & DB_RF_HAVESTRUCT) == 0) +// rw_enter(&dn->dn_struct_rwlock, RW_READER); prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL && @@ -1203,8 +1203,8 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) mutex_exit(&db->db_mtx); if (prefetch) dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_exit(&dn->dn_struct_rwlock); +// if ((flags & DB_RF_HAVESTRUCT) == 0) +// rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(db); } else if (db->db_state == DB_UNCACHED) { spa_t *spa = dn->dn_objset->os_spa; @@ -1222,8 +1222,8 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) if (!err && prefetch) dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_exit(&dn->dn_struct_rwlock); +// if ((flags & DB_RF_HAVESTRUCT) == 0) +// rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(db); if (!err && need_wait) @@ -1241,7 +1241,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) if (prefetch) dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(db); /* Skip the wait per the caller's request. */ @@ -1743,7 +1743,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * looking at db_blkptr. */ if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); drop_struct_lock = TRUE; } @@ -1773,8 +1773,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ASSERT(parent != NULL); parent_held = TRUE; } - if (drop_struct_lock) - rw_exit(&dn->dn_struct_rwlock); +// if (drop_struct_lock) +// rw_exit(&dn->dn_struct_rwlock); ASSERT3U(db->db_level+1, ==, parent->db_level); di = dbuf_dirty(parent, tx); if (parent_held) @@ -1803,8 +1803,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ASSERT(!list_link_active(&dr->dr_dirty_node)); list_insert_tail(&dn->dn_dirty_records[txgoff], dr); mutex_exit(&dn->dn_mtx); - if (drop_struct_lock) - rw_exit(&dn->dn_struct_rwlock); +// if (drop_struct_lock) +// rw_exit(&dn->dn_struct_rwlock); } dnode_setdirty(dn, tx); @@ -2226,7 +2226,7 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; ASSERT3U(level * epbs, <, 64); - ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); +// ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); /* * This assertion shouldn't trip as long as the max indirect block size * is less than 1M. The reason for this is that up to that point, @@ -2299,7 +2299,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, objset_t *os = dn->dn_objset; dmu_buf_impl_t *db, *odb; - ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); +// ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); ASSERT(dn->dn_type != DMU_OT_NONE); db = kmem_cache_alloc(dbuf_kmem_cache, KM_SLEEP); @@ -2501,7 +2501,7 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio, dsl_dataset_t *ds; ASSERT(blkid != DMU_BONUS_BLKID); - ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); +// ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); if (blkid > dn->dn_maxblkid) return; @@ -2623,7 +2623,7 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh) dh->dh_parent = NULL; ASSERT(dh->dh_blkid != DMU_BONUS_BLKID); - ASSERT(RW_LOCK_HELD(&dh->dh_dn->dn_struct_rwlock)); +// ASSERT(RW_LOCK_HELD(&dh->dh_dn->dn_struct_rwlock)); ASSERT3U(dh->dh_dn->dn_nlevels, >, dh->dh_level); *(dh->dh_dbp) = NULL; @@ -3102,10 +3102,10 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) ASSERT(dn->dn_phys->dn_nlevels > 1); if (parent == NULL) { mutex_exit(&db->db_mtx); - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); parent = dbuf_hold_level(dn, db->db_level + 1, db->db_blkid >> epbs, db); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); mutex_enter(&db->db_mtx); db->db_parent = parent; } diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 48e89eef4af3..7eae2353b8fd 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -150,9 +150,9 @@ dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, dmu_buf_impl_t *db; blkid = dbuf_whichblock(dn, 0, offset); - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); db = dbuf_hold(dn, blkid, tag); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); if (db == NULL) { *dbp = NULL; @@ -175,9 +175,9 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset, if (err) return (err); blkid = dbuf_whichblock(dn, 0, offset); - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); db = dbuf_hold(dn, blkid, tag); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); if (db == NULL) { @@ -326,15 +326,17 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) dnode_t *dn; dmu_buf_impl_t *db; int error; + int writer = 0; error = dnode_hold(os, object, FTAG, &dn); if (error) return (error); - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_bonus == NULL) { - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + writer = 1; if (dn->dn_bonus == NULL) dbuf_create_bonus(dn); } @@ -351,7 +353,8 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) * hold and incrementing the dbuf count to ensure that dnode_move() sees * a dnode hold for every dbuf. */ - rw_exit(&dn->dn_struct_rwlock); + if (writer) + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); @@ -376,13 +379,13 @@ dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp) dmu_buf_impl_t *db = NULL; int err; - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// if ((flags & DB_RF_HAVESTRUCT) == 0) +// rw_enter(&dn->dn_struct_rwlock, RW_READER); db = dbuf_hold(dn, DMU_SPILL_BLKID, tag); - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_exit(&dn->dn_struct_rwlock); +// if ((flags & DB_RF_HAVESTRUCT) == 0) +// rw_exit(&dn->dn_struct_rwlock); if (db == NULL) { *dbp = NULL; @@ -411,7 +414,7 @@ dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA) { err = SET_ERROR(EINVAL); } else { - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); if (!dn->dn_have_spill) { err = SET_ERROR(ENOENT); @@ -420,7 +423,7 @@ dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) DB_RF_HAVESTRUCT | DB_RF_CANFAIL, tag, dbp); } - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); } DB_DNODE_EXIT(db); @@ -468,7 +471,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH; - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift) { int blkshift = dn->dn_datablkshift; nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) - @@ -481,7 +484,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, os_dsl_dataset->ds_object, (longlong_t)dn->dn_object, dn->dn_datablksz, (longlong_t)offset, (longlong_t)length); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); return (SET_ERROR(EIO)); } nblks = 1; @@ -493,7 +496,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, for (i = 0; i < nblks; i++) { dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag); if (db == NULL) { - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); dmu_buf_rele_array(dbp, nblks, tag); zio_nowait(zio); return (SET_ERROR(EIO)); @@ -510,7 +513,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, dmu_zfetch(&dn->dn_zfetch, blkid, nblks, read && DNODE_IS_CACHEABLE(dn)); } - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); /* wait for async i/o */ err = zio_wait(zio); @@ -618,11 +621,11 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, if (object == 0 || object >= DN_MAX_OBJECT) return; - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, level, object * sizeof (dnode_phys_t)); dbuf_prefetch(dn, level, blkid, pri, 0); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); return; } @@ -635,7 +638,7 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, if (err != 0) return; - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); /* * offset + len - 1 is the last byte we want to prefetch for, and offset * is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the @@ -658,7 +661,7 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, dbuf_prefetch(dn, level, blkid + i, pri, 0); } - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); } @@ -1479,10 +1482,10 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, DB_DNODE_ENTER(dbuf); dn = DB_DNODE(dbuf); - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, 0, offset); VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(dbuf); /* @@ -2067,13 +2070,13 @@ __dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) void dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) { - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); mutex_enter(&dn->dn_mtx); __dmu_object_info_from_dnode(dn, doi); mutex_exit(&dn->dn_mtx); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); } /* diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index fd4acbbaa501..7843e338b186 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -229,7 +229,7 @@ dnode_verify(dnode_t *dn) return; if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); drop_struct_lock = TRUE; } if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) { @@ -263,8 +263,8 @@ dnode_verify(dnode_t *dn) (dnode_phys_t *)dn->dn_dbuf->db.db_data + (dn->dn_object % (dn->dn_dbuf->db.db_size >> DNODE_SHIFT))); } - if (drop_struct_lock) - rw_exit(&dn->dn_struct_rwlock); +// if (drop_struct_lock) +// rw_exit(&dn->dn_struct_rwlock); } #endif @@ -710,7 +710,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) { int i; - ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock)); +// ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock)); ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx)); ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx)); ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock)); @@ -1235,15 +1235,15 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, DNODE_VERIFY(mdn); if (!RW_WRITE_HELD(&mdn->dn_struct_rwlock)) { - rw_enter(&mdn->dn_struct_rwlock, RW_READER); +// rw_enter(&mdn->dn_struct_rwlock, RW_READER); drop_struct_lock = TRUE; } blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t)); db = dbuf_hold(mdn, blk, FTAG); - if (drop_struct_lock) - rw_exit(&mdn->dn_struct_rwlock); +// if (drop_struct_lock) +// rw_exit(&mdn->dn_struct_rwlock); if (db == NULL) return (SET_ERROR(EIO)); err = dbuf_read(db, NULL, DB_RF_CANFAIL); @@ -1554,14 +1554,15 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) ASSERT(blkid != DMU_BONUS_BLKID); - ASSERT(have_read ? - RW_READ_HELD(&dn->dn_struct_rwlock) : - RW_WRITE_HELD(&dn->dn_struct_rwlock)); +// ASSERT(have_read ? +// RW_READ_HELD(&dn->dn_struct_rwlock) : +// RW_WRITE_HELD(&dn->dn_struct_rwlock)); /* * if we have a read-lock, check to see if we need to do any work * before upgrading to a write-lock. */ +#if 0 if (have_read) { if (blkid <= dn->dn_maxblkid) return; @@ -1571,6 +1572,8 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) rw_enter(&dn->dn_struct_rwlock, RW_WRITER); } } +#endif + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); if (blkid <= dn->dn_maxblkid) goto out; @@ -1625,8 +1628,9 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) } out: - if (have_read) - rw_downgrade(&dn->dn_struct_rwlock); +// if (have_read) +// rw_downgrade(&dn->dn_struct_rwlock); + rw_exit(&dn->dn_struct_rwlock); } static void @@ -2099,8 +2103,8 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, int lvl, maxlvl; int error = 0; - if (!(flags & DNODE_FIND_HAVELOCK)) - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// if (!(flags & DNODE_FIND_HAVELOCK)) +// rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_phys->dn_nlevels == 0) { error = SET_ERROR(ESRCH); @@ -2144,8 +2148,8 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, initial_offset < *offset : initial_offset > *offset)) error = SET_ERROR(ESRCH); out: - if (!(flags & DNODE_FIND_HAVELOCK)) - rw_exit(&dn->dn_struct_rwlock); +// if (!(flags & DNODE_FIND_HAVELOCK)) +// rw_exit(&dn->dn_struct_rwlock); return (error); } diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 742d962bc232..92e1a40938df 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -184,10 +184,10 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) ASSERT(db->db_level == 1); - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); err = dbuf_hold_impl(dn, db->db_level-1, (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); if (err == ENOENT) continue; ASSERT(err == 0); @@ -282,10 +282,10 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, for (id = start; id <= end; id++, bp++) { if (BP_IS_HOLE(bp)) continue; - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); VERIFY0(dbuf_hold_impl(dn, db->db_level - 1, id, TRUE, FALSE, FTAG, &subdb)); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); ASSERT3P(bp, ==, subdb->db_blkptr); free_children(subdb, blkid, nblks, tx); @@ -362,10 +362,10 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, for (i = start; i <= end; i++, bp++) { if (BP_IS_HOLE(bp)) continue; - rw_enter(&dn->dn_struct_rwlock, RW_READER); +// rw_enter(&dn->dn_struct_rwlock, RW_READER); VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i, TRUE, FALSE, FTAG, &db)); - rw_exit(&dn->dn_struct_rwlock); +// rw_exit(&dn->dn_struct_rwlock); free_children(db, blkid, nblks, tx); dbuf_rele(db, FTAG);