From 6e47c4b959c1ca2c2fde5e1faef9ba2434e606a1 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 30 Jul 2014 13:07:16 -0700 Subject: [PATCH] Debugging for zio_wait() hangs From the information gathered about issue #2523 it's clear that somehow the spin lock is being damaged. This could occur if the spin lock is reinitialized, the memory is accidentally overwritten, or freed back to the cache to soon. To determine exactly what is happening this patch adds a couple new sanity tests. * A zio->io_magic field is added before the io_lock. This field is designed to act as a red zone allow us to detect if the zio has been written. * The zio->io_magic field is also used to detect if somehow the constructor or destructor is running multiple for the object. This would effectively cause the spin lock to be reinitialized. * The destructor has been updated to poison the entire structure. This should cause us to quickly detect any use-after-free bugs. Once the root cause of this issue can be determined this patch should be reverted. Signed-off-by: Brian Behlendorf Issue #2523 --- include/sys/zio.h | 6 ++++++ module/zfs/zio.c | 24 ++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/sys/zio.h b/include/sys/zio.h index 69b00d0f4029..450eaebfd1da 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -45,6 +45,11 @@ extern "C" { */ #define ZEC_MAGIC 0x210da7ab10c7a11ULL +/* + * Debug zio redzone magic + */ +#define ZIO_MAGIC 0x210210210210210ULL + typedef struct zio_eck { uint64_t zec_magic; /* for validation, endianness */ zio_cksum_t zec_cksum; /* 256-bit checksum */ @@ -445,6 +450,7 @@ struct zio { zio_gang_node_t *io_gang_tree; void *io_executor; void *io_waiter; + uint64_t io_magic; kmutex_t io_lock; kcondvar_t io_cv; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index e4f1271d3a02..146d64f9b146 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -98,7 +98,10 @@ zio_cons(void *arg, void *unused, int kmflag) { zio_t *zio = arg; + /* Verify the constructor never runs twice on the same object. */ + VERIFY3U(zio->io_magic, !=, ZIO_MAGIC); bzero(zio, sizeof (zio_t)); + zio->io_magic = ZIO_MAGIC; mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL); @@ -120,6 +123,14 @@ zio_dest(void *arg, void *unused) cv_destroy(&zio->io_cv); list_destroy(&zio->io_parent_list); list_destroy(&zio->io_child_list); + + /* + * Verify the destructor never runs twice on the same object, and + * poison the object to catch any use-after-free type of bugs. + */ + VERIFY3U(zio->io_magic, !=, ~ZIO_MAGIC); + memset(zio, 'z', sizeof (zio_t)); + zio->io_magic = ~ZIO_MAGIC; } void @@ -553,6 +564,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(vd || stage == ZIO_STAGE_OPEN); zio = kmem_cache_alloc(zio_cache, KM_PUSHPAGE); + VERIFY3U(zio->io_magic, ==, ZIO_MAGIC); if (vd != NULL) zio->io_child_type = ZIO_CHILD_VDEV; @@ -647,6 +659,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, static void zio_destroy(zio_t *zio) { + VERIFY3U(zio->io_magic, ==, ZIO_MAGIC); kmem_cache_free(zio_cache, zio); } @@ -1007,7 +1020,7 @@ zio_flush(zio_t *zio, vdev_t *vd) void zio_shrink(zio_t *zio, uint64_t size) { - ASSERT(zio->io_executor == NULL); + VERIFY(zio->io_executor == NULL); ASSERT(zio->io_orig_size == zio->io_size); ASSERT(size <= zio->io_size); @@ -1380,6 +1393,7 @@ __zio_execute(zio_t *zio) ASSERT(!MUTEX_HELD(&zio->io_lock)); ASSERT(ISP2(stage)); ASSERT(zio->io_stall == NULL); + VERIFY3U(zio->io_magic, ==, ZIO_MAGIC); do { stage <<= 1; @@ -1443,15 +1457,17 @@ zio_wait(zio_t *zio) int error; ASSERT(zio->io_stage == ZIO_STAGE_OPEN); - ASSERT(zio->io_executor == NULL); + VERIFY(zio->io_executor == NULL); zio->io_waiter = curthread; __zio_execute(zio); mutex_enter(&zio->io_lock); - while (zio->io_executor != NULL) + while (zio->io_executor != NULL) { + VERIFY3U(zio->io_magic, ==, ZIO_MAGIC); cv_wait_io(&zio->io_cv, &zio->io_lock); + } mutex_exit(&zio->io_lock); error = zio->io_error; @@ -1463,7 +1479,7 @@ zio_wait(zio_t *zio) void zio_nowait(zio_t *zio) { - ASSERT(zio->io_executor == NULL); + VERIFY(zio->io_executor == NULL); if (zio->io_child_type == ZIO_CHILD_LOGICAL && zio_unique_parent(zio) == NULL) {