Skip to content

Commit

Permalink
Use KM_PUSHPAGE instead of KM_SLEEP
Browse files Browse the repository at this point in the history
It used to be the case that all KM_SLEEP allocations were GFS_NOFS.
Unfortunately this often resulted in the kernel being unable to
reclaim the ARC, inode, and dentry caches in a timely manor.
The fix was to make KM_SLEEP a GFP_KERNEL allocation in the SPL.

However, this increases the posibility of deadlocking the system
on a zfs write thread.  If a zfs write thread attempts to perform
an allocation it may trigger synchronous reclaim.  This reclaim
may attempt to flush dirty data/inode to disk to free memory.
Unforunately, this write cannot finish because the write thread
which would handle it is holding the previous transaction open.
Deadlock.

To avoid this all allocations in the zfs write thread path must
use KM_PUSHPAGE which prohibits synchronous reclaim for that
thread.  In this way forward progress in ensured.  The risk
with this change is I missed updating an allocation for the
write threads leaving an increased posibility of deadlock.  If
any deadlocks remain they will be unlikely but we'll have to
make sure they all get fixed.
  • Loading branch information
behlendorf committed Mar 22, 2011
1 parent f47c42e commit 691f6ac
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
6 changes: 3 additions & 3 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2720,7 +2720,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_callback_t *acb = NULL;

acb = kmem_zalloc(sizeof (arc_callback_t),
KM_SLEEP);
KM_PUSHPAGE);
acb->acb_done = done;
acb->acb_private = private;
if (pio != NULL)
Expand Down Expand Up @@ -2836,7 +2836,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,

ASSERT(!GHOST_STATE(hdr->b_state));

acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
acb = kmem_zalloc(sizeof (arc_callback_t), KM_PUSHPAGE);
acb->acb_done = done;
acb->acb_private = private;

Expand Down Expand Up @@ -2885,7 +2885,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
ARCSTAT_BUMP(arcstat_l2_hits);

cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
KM_SLEEP);
KM_PUSHPAGE);
cb->l2rcb_buf = buf;
cb->l2rcb_spa = spa;
cb->l2rcb_bp = *bp;
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
vattr.va_uid = crgetuid(kcred);
vattr.va_gid = crgetgid(kcred);

sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
sharezp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
sharezp->z_moved = 0;
sharezp->z_unlinked = 0;
sharezp->z_atime_dirty = 0;
Expand Down Expand Up @@ -248,7 +248,7 @@ zfs_inode_alloc(struct super_block *sb, struct inode **ip)
{
znode_t *zp;

zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
zp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
*ip = ZTOI(zp);

return (0);
Expand Down
8 changes: 4 additions & 4 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ static void
zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize,
zio_transform_func_t *transform)
{
zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP);
zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_PUSHPAGE);

zt->zt_orig_data = zio->io_data;
zt->zt_orig_size = zio->io_size;
Expand Down Expand Up @@ -370,7 +370,7 @@ zio_unique_parent(zio_t *cio)
void
zio_add_child(zio_t *pio, zio_t *cio)
{
zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_PUSHPAGE);
int w;

/*
Expand Down Expand Up @@ -491,7 +491,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
ASSERT(vd || stage == ZIO_STAGE_OPEN);

zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
zio = kmem_cache_alloc(zio_cache, KM_PUSHPAGE);
bzero(zio, sizeof (zio_t));

mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
Expand Down Expand Up @@ -1503,7 +1503,7 @@ zio_gang_node_alloc(zio_gang_node_t **gnpp)

ASSERT(*gnpp == NULL);

gn = kmem_zalloc(sizeof (*gn), KM_SLEEP);
gn = kmem_zalloc(sizeof (*gn), KM_PUSHPAGE);
gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE);
*gnpp = gn;

Expand Down

0 comments on commit 691f6ac

Please sign in to comment.