Skip to content

Commit

Permalink
linux: zvol: avoid heap allocation for zvol_request_sync=1
Browse files Browse the repository at this point in the history
The spl_kmem_alloc showed up in some flamegraphs in a single-threaded
4k sync write workload at 85k IOPS on an
Intel(R) Xeon(R) Silver 4215 CPU @ 2.50GHz.
Certainly not a huge win but I believe the change is clean and
easy to maintain down the road.

Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matthew Ahrens <[email protected]>
Signed-off-by: Christian Schwarz <[email protected]>
Closes #11666
  • Loading branch information
problame authored Mar 3, 2021
1 parent 3242b53 commit e439ee8
Showing 1 changed file with 64 additions and 29 deletions.
93 changes: 64 additions & 29 deletions module/os/linux/zfs/zvol_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,32 @@ struct zvol_state_os {
taskq_t *zvol_taskq;
static struct ida zvol_ida;

typedef struct zv_request {
typedef struct zv_request_stack {
zvol_state_t *zv;
struct bio *bio;
taskq_ent_t ent;
} zv_request_t;

typedef struct zv_request_task {
zv_request_t zvr;
taskq_ent_t ent;
} zv_request_task_t;

static zv_request_task_t *
zv_request_task_create(zv_request_t zvr)
{
zv_request_task_t *task;
task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
taskq_init_ent(&task->ent);
task->zvr = zvr;
return (task);
}

static void
zv_request_task_free(zv_request_task_t *task)
{
kmem_free(task, sizeof (*task));
}

/*
* Given a path, return TRUE if path is a ZVOL.
*/
Expand All @@ -80,9 +100,8 @@ zvol_is_zvol_impl(const char *path)
}

static void
zvol_write(void *arg)
zvol_write(zv_request_t *zvr)
{
zv_request_t *zvr = arg;
struct bio *bio = zvr->bio;
int error = 0;
zfs_uio_t uio;
Expand All @@ -102,7 +121,6 @@ zvol_write(void *arg)
if (uio.uio_resid == 0) {
rw_exit(&zv->zv_suspend_lock);
BIO_END_IO(bio, 0);
kmem_free(zvr, sizeof (zv_request_t));
return;
}

Expand Down Expand Up @@ -162,13 +180,19 @@ zvol_write(void *arg)
blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);

BIO_END_IO(bio, -error);
kmem_free(zvr, sizeof (zv_request_t));
}

static void
zvol_discard(void *arg)
zvol_write_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_write(&task->zvr);
zv_request_task_free(task);
}

static void
zvol_discard(zv_request_t *zvr)
{
zv_request_t *zvr = arg;
struct bio *bio = zvr->bio;
zvol_state_t *zv = zvr->zv;
uint64_t start = BIO_BI_SECTOR(bio) << 9;
Expand Down Expand Up @@ -238,13 +262,19 @@ zvol_discard(void *arg)
blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);

BIO_END_IO(bio, -error);
kmem_free(zvr, sizeof (zv_request_t));
}

static void
zvol_read(void *arg)
zvol_discard_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_discard(&task->zvr);
zv_request_task_free(task);
}

static void
zvol_read(zv_request_t *zvr)
{
zv_request_t *zvr = arg;
struct bio *bio = zvr->bio;
int error = 0;
zfs_uio_t uio;
Expand Down Expand Up @@ -295,7 +325,14 @@ zvol_read(void *arg)
blk_generic_end_io_acct(q, disk, READ, bio, start_time);

BIO_END_IO(bio, -error);
kmem_free(zvr, sizeof (zv_request_t));
}

static void
zvol_read_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_read(&task->zvr);
zv_request_task_free(task);
}

#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
Expand All @@ -318,7 +355,6 @@ zvol_request(struct request_queue *q, struct bio *bio)
uint64_t offset = BIO_BI_SECTOR(bio) << 9;
uint64_t size = BIO_BI_SIZE(bio);
int rw = bio_data_dir(bio);
zv_request_t *zvr;

if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
printk(KERN_INFO
Expand All @@ -331,6 +367,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
goto out;
}

zv_request_t zvr = {
.zv = zv,
.bio = bio,
};
zv_request_task_t *task;

if (rw == WRITE) {
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
BIO_END_IO(bio, -SET_ERROR(EROFS));
Expand Down Expand Up @@ -361,11 +403,6 @@ zvol_request(struct request_queue *q, struct bio *bio)
rw_downgrade(&zv->zv_suspend_lock);
}

zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
zvr->zv = zv;
zvr->bio = bio;
taskq_init_ent(&zvr->ent);

/*
* We don't want this thread to be blocked waiting for i/o to
* complete, so we instead wait from a taskq callback. The
Expand Down Expand Up @@ -398,17 +435,19 @@ zvol_request(struct request_queue *q, struct bio *bio)
*/
if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
if (zvol_request_sync) {
zvol_discard(zvr);
zvol_discard(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
zvol_discard, zvr, 0, &zvr->ent);
zvol_discard_task, task, 0, &task->ent);
}
} else {
if (zvol_request_sync) {
zvol_write(zvr);
zvol_write(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
zvol_write, zvr, 0, &zvr->ent);
zvol_write_task, task, 0, &task->ent);
}
}
} else {
Expand All @@ -422,19 +461,15 @@ zvol_request(struct request_queue *q, struct bio *bio)
goto out;
}

zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
zvr->zv = zv;
zvr->bio = bio;
taskq_init_ent(&zvr->ent);

rw_enter(&zv->zv_suspend_lock, RW_READER);

/* See comment in WRITE case above. */
if (zvol_request_sync) {
zvol_read(zvr);
zvol_read(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
zvol_read, zvr, 0, &zvr->ent);
zvol_read_task, task, 0, &task->ent);
}
}

Expand Down

0 comments on commit e439ee8

Please sign in to comment.