Skip to content

Commit

Permalink
Prevent direct reclaim from recursively locking hash_lock
Browse files Browse the repository at this point in the history
Chris Siebemann provided back traces for a deadlock with the latest HEAD from his system.
Of particular interest was this backtrace:

l2arc_feed      D ffff88083f593cc0     0   642      2 0x00000000
 ffff880036697198 0000000000000046 ffff88080d9fa740 0000000000013cc0
 ffff880036697fd8 0000000000013cc0 ffff88080fd10000 ffff88080d9fa740
 000000003f496b20 ffffffffa04fe8d8 ffffffffa04fe8dc ffff88080d9fa740
Call Trace:
 [<ffffffff81760619>] schedule_preempt_disabled+0x29/0x70
 [<ffffffff81762433>] __mutex_lock_slowpath+0xb3/0x120
 [<ffffffff817624c3>] mutex_lock+0x23/0x40
 [<ffffffffa0391f39>] remove_reference.isra.10+0x59/0xc0 [zfs]
 [<ffffffffa039690d>] arc_buf_remove_ref+0xbd/0x140 [zfs]
 [<ffffffffa039cb2f>] dbuf_rele_and_unlock+0x15f/0x3b0 [zfs]
 [<ffffffffa03b870e>] ? dnode_setdirty+0x12e/0x190 [zfs]
 [<ffffffffa039f572>] ? dbuf_dirty+0x492/0x9b0 [zfs]
 [<ffffffffa039cec6>] dbuf_rele+0x26/0x30 [zfs]
 [<ffffffffa03b85c2>] dnode_rele+0x72/0x90 [zfs]
 [<ffffffffa039cd0a>] dbuf_rele_and_unlock+0x33a/0x3b0 [zfs]
 [<ffffffff811f4e9d>] ? __slab_free+0xbd/0x300
 [<ffffffff817624b6>] ? mutex_lock+0x16/0x40
 [<ffffffffa039dd87>] ? dmu_buf_update_user+0x57/0xb0 [zfs]
 [<ffffffffa039d126>] dmu_buf_rele+0x26/0x30 [zfs]
 [<ffffffffa03dc378>] sa_handle_destroy+0x68/0xb0 [zfs]
 [<ffffffffa0437c0e>] zfs_zinactive+0xce/0x160 [zfs]
 [<ffffffffa0431244>] zfs_inactive+0x64/0x200 [zfs]
 [<ffffffff810da5a0>] ? autoremove_wake_function+0x40/0x40
 [<ffffffffa0448518>] zpl_evict_inode+0x28/0x30 [zfs]
 [<ffffffff81232227>] evict+0xa7/0x190
 [<ffffffff8123234e>] dispose_list+0x3e/0x60
 [<ffffffff81233386>] prune_icache_sb+0x56/0x80
 [<ffffffff81218fd5>] super_cache_scan+0x115/0x180
 [<ffffffff811aa299>] shrink_slab_node+0x129/0x2b0
 [<ffffffff81205cfb>] ? mem_cgroup_iter+0x12b/0x430
 [<ffffffff811ac0bb>] shrink_slab+0x8b/0x170
 [<ffffffff811af057>] shrink_zones+0x357/0x470
 [<ffffffff811af22b>] do_try_to_free_pages+0xbb/0x140
 [<ffffffff811af38a>] try_to_free_pages+0xda/0x170
 [<ffffffff811a2177>] __alloc_pages_nodemask+0x647/0xb30
 [<ffffffff811ea59c>] alloc_pages_current+0x9c/0x120
 [<ffffffff811f586b>] new_slab+0x3ab/0x4c0
 [<ffffffff811f5de4>] __slab_alloc+0x464/0x5e0
 [<ffffffffa02f49be>] ? spl_kmem_cache_alloc+0x8e/0x870 [spl]
 [<ffffffff810d0126>] ? dequeue_task_fair+0x3d6/0x680
 [<ffffffff810d0f01>] ? put_prev_entity+0x31/0x400
 [<ffffffff811f6323>] kmem_cache_alloc+0x1a3/0x1f0
 [<ffffffffa02f49be>] ? spl_kmem_cache_alloc+0x8e/0x870 [spl]
 [<ffffffffa02f49be>] spl_kmem_cache_alloc+0x8e/0x870 [spl]
 [<ffffffff810fe9ee>] ? try_to_del_timer_sync+0x5e/0x90
 [<ffffffffa043fb82>] zio_create+0x42/0x5d0 [zfs]
 [<ffffffffa0440341>] zio_null+0x61/0x70 [zfs]
 [<ffffffffa0396070>] ? l2arc_feed_thread+0xbb0/0xbb0 [zfs]
 [<ffffffffa044036e>] zio_root+0x1e/0x20 [zfs]
 [<ffffffffa0395dbc>] l2arc_feed_thread+0x8fc/0xbb0 [zfs]
 [<ffffffff810d3fee>] ? pick_next_task_fair+0x1be/0x8c0
 [<ffffffffa03954c0>] ? l2arc_evict+0x3a0/0x3a0 [zfs]
 [<ffffffffa02f53fa>] thread_generic_wrapper+0x7a/0x90 [spl]
 [<ffffffffa02f5380>] ? __thread_exit+0x20/0x20 [spl]
 [<ffffffff810b7fea>] kthread+0xea/0x100
 [<ffffffff810b7f00>] ? kthread_create_on_node+0x1b0/0x1b0
 [<ffffffff817646fc>] ret_from_fork+0x7c/0xb0
 [<ffffffff810b7f00>] ? kthread_create_on_node+0x1b0/0x1b0

It appears that direct reclaim attempted to evict a buffer protected by
a lock the thread was already holding, which deadlocked. Marking
critical sections formed by the hash_lock with
spl_fstrans_mark()/spl_fstrans_unmark() whenever we might perform an
allocation should avoid this.

Closes openzfs#3050

Reported-by: Chris Siebenmann <[email protected]>
Signed-off-by: Richard Yao <[email protected]>
  • Loading branch information
ryao committed Jan 29, 2015
1 parent c5edc0d commit 916e29a
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1040,11 +1040,18 @@ arc_buf_thaw(arc_buf_t *buf)
void
arc_buf_freeze(arc_buf_t *buf)
{
fstrans_cookie_t cookie;
kmutex_t *hash_lock;

if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;

/*
* Direct reclaim could cause us to try to evict a buffer protected by
* hash_lock.
*/
cookie = spl_fstrans_mark();

hash_lock = HDR_LOCK(buf->b_hdr);
mutex_enter(hash_lock);

Expand All @@ -1053,6 +1060,7 @@ arc_buf_freeze(arc_buf_t *buf)
arc_cksum_compute(buf, B_FALSE);
mutex_exit(hash_lock);

spl_fstrans_unmark(cookie);
}

static void
Expand Down Expand Up @@ -2953,6 +2961,7 @@ arc_read_done(zio_t *zio)
kmutex_t *hash_lock = NULL;
arc_callback_t *callback_list, *acb;
int freeable = FALSE;
fstrans_cookie_t cookie;

buf = zio->io_private;
hdr = buf->b_hdr;
Expand All @@ -2977,6 +2986,12 @@ arc_read_done(zio_t *zio)
found = buf_hash_find(hdr->b_spa, zio->io_bp,
&hash_lock);

/*
* Direct reclaim could deadlock on holding the hash_lock.
*/
if (hash_lock)
cookie = spl_fstrans_mark();

ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) &&
hash_lock == NULL) ||
(found == hdr &&
Expand Down Expand Up @@ -3053,6 +3068,7 @@ arc_read_done(zio_t *zio)
cv_broadcast(&hdr->b_cv);

if (hash_lock) {
spl_fstrans_unmark(cookie);
mutex_exit(hash_lock);
} else {
/*
Expand Down Expand Up @@ -3109,10 +3125,20 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
arc_buf_hdr_t *hdr = NULL;
arc_buf_t *buf = NULL;
kmutex_t *hash_lock = NULL;
fstrans_cookie_t cookie;
zio_t *rzio;
uint64_t guid = spa_load_guid(spa);
int rc = 0;

/*
* Direct reclaim could cause us to try to evict a buffer protected by
* hash_lock. We could try to be more granular by only marking when we
* actually have the lock. That would be prone to regressions as new
* code is merged from Illumos. The one allocation that is clearly not
* in need of such protection is explicitly exluded from it below.
*/
cookie = spl_fstrans_mark();

ASSERT(!BP_IS_EMBEDDED(bp) ||
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);

Expand Down Expand Up @@ -3340,8 +3366,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
ARCSTAT_BUMP(arcstat_l2_hits);
atomic_inc_32(&hdr->b_l2hdr->b_hits);

spl_fstrans_unmark(cookie);
cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
KM_SLEEP);
cookie = spl_fstrans_mark();
cb->l2rcb_buf = buf;
cb->l2rcb_spa = spa;
cb->l2rcb_bp = *bp;
Expand Down Expand Up @@ -3421,6 +3449,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
}

out:
spl_fstrans_unmark(cookie);
spa_read_history_add(spa, zb, *arc_flags);
return (rc);
}
Expand Down Expand Up @@ -4890,6 +4919,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
uint64_t guid = spa_load_guid(spa);
int try;
const boolean_t do_headroom_boost = *headroom_boost;
fstrans_cookie_t cookie;

ASSERT(dev->l2ad_vdev != NULL);

Expand Down Expand Up @@ -4970,6 +5000,12 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
break;
}

/*
* Direct reclaim could cause us to try to evict a
* buffer protected by hash_lock.
*/
cookie = spl_fstrans_mark();

if (pio == NULL) {
/*
* Insert a dummy header on the buflist so
Expand Down Expand Up @@ -5021,6 +5057,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
arc_cksum_verify(ab->b_buf);
arc_cksum_compute(ab->b_buf, B_TRUE);

spl_fstrans_unmark(cookie);
mutex_exit(hash_lock);

write_sz += buf_sz;
Expand Down

0 comments on commit 916e29a

Please sign in to comment.