Skip to content

Commit

Permalink
writeback: eliminate work item allocation in bd_start_writeback()
Browse files Browse the repository at this point in the history
Handle start-all writeback like we do periodic or kupdate
style writeback - by marking the bdi_writeback as needing a full
flush, and simply waking the thread. This eliminates the need to
allocate and queue a specific work item just for this purpose.

After this change, we truly only ever have one of them running at
any point in time. We mark the need to start all flushes, and the
writeback thread will clear it once it has processed the request.

Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
(cherry picked from commit 85009b4)
  • Loading branch information
axboe authored and yishache committed Oct 27, 2020
1 parent de4d46b commit 4071461
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 60 deletions.
71 changes: 34 additions & 37 deletions fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ struct wb_writeback_work {
unsigned int for_background:1;
unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
unsigned int auto_free:1; /* free on completion */
unsigned int start_all:1; /* nr_pages == 0 (all) writeback */
enum wb_reason reason; /* why was writeback initiated? */

struct list_head list; /* pending work list */
Expand Down Expand Up @@ -997,8 +996,6 @@ static unsigned long get_nr_dirty_pages(void)

static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
{
struct wb_writeback_work *work;

if (!wb_has_dirty_io(wb))
return;

Expand All @@ -1008,35 +1005,14 @@ static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
* high frequency, causing pointless allocations of tons of
* work items and keeping the flusher threads busy retrieving
* that work. Ensure that we only allow one of them pending and
* inflight at the time. It doesn't matter if we race a little
* bit on this, so use the faster separate test/set bit variants.
* inflight at the time.
*/
if (test_bit(WB_start_all, &wb->state))
if (test_bit(WB_start_all, &wb->state) ||
test_and_set_bit(WB_start_all, &wb->state))
return;

set_bit(WB_start_all, &wb->state);

/*
* This is WB_SYNC_NONE writeback, so if allocation fails just
* wakeup the thread for old dirty data writeback
*/
work = kzalloc(sizeof(*work),
GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
if (!work) {
clear_bit(WB_start_all, &wb->state);
trace_writeback_nowork(wb);
wb_wakeup(wb);
return;
}

work->sync_mode = WB_SYNC_NONE;
work->nr_pages = wb_split_bdi_pages(wb, get_nr_dirty_pages());
work->range_cyclic = 1;
work->reason = reason;
work->auto_free = 1;
work->start_all = 1;

wb_queue_work(wb, work);
wb->start_all_reason = reason;
wb_wakeup(wb);
}

/**
Expand Down Expand Up @@ -1895,14 +1871,6 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
list_del_init(&work->list);
}
spin_unlock_bh(&wb->work_lock);

/*
* Once we start processing a work item that had !nr_pages,
* clear the wb state bit for that so we can allow more.
*/
if (work && work->start_all)
clear_bit(WB_start_all, &wb->state);

return work;
}

Expand Down Expand Up @@ -1958,6 +1926,30 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
return 0;
}

static long wb_check_start_all(struct bdi_writeback *wb)
{
long nr_pages;

if (!test_bit(WB_start_all, &wb->state))
return 0;

nr_pages = get_nr_dirty_pages();
if (nr_pages) {
struct wb_writeback_work work = {
.nr_pages = wb_split_bdi_pages(wb, nr_pages),
.sync_mode = WB_SYNC_NONE,
.range_cyclic = 1,
.reason = wb->start_all_reason,
};

nr_pages = wb_writeback(wb, &work);
}

clear_bit(WB_start_all, &wb->state);
return nr_pages;
}


/*
* Retrieve work items and do the writeback they describe
*/
Expand All @@ -1973,6 +1965,11 @@ static long wb_do_writeback(struct bdi_writeback *wb)
finish_writeback_work(wb, work);
}

/*
* Check for a flush-everything request
*/
wrote += wb_check_start_all(wb);

/*
* Check for periodic writeback, kupdated() style
*/
Expand Down
23 changes: 23 additions & 0 deletions include/linux/backing-dev-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,28 @@ enum wb_stat_item {

#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))

/*
* why some writeback work was initiated
*/
enum wb_reason {
WB_REASON_BACKGROUND,
WB_REASON_VMSCAN,
WB_REASON_SYNC,
WB_REASON_PERIODIC,
WB_REASON_LAPTOP_TIMER,
WB_REASON_FREE_MORE_MEM,
WB_REASON_FS_FREE_SPACE,
/*
* There is no bdi forker thread any more and works are done
* by emergency worker, however, this is TPs userland visible
* and we'll be exposing exactly the same information,
* so it has a mismatch name.
*/
WB_REASON_FORKER_THREAD,

WB_REASON_MAX,
};

/*
* For cgroup writeback, multiple wb's may map to the same blkcg. Those
* wb's can operate mostly independently but should share the congested
Expand Down Expand Up @@ -116,6 +138,7 @@ struct bdi_writeback {

struct fprop_local_percpu completions;
int dirty_exceeded;
enum wb_reason start_all_reason;

spinlock_t work_lock; /* protects work_list & dwork scheduling */
struct list_head work_list;
Expand Down
22 changes: 0 additions & 22 deletions include/linux/writeback.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,28 +42,6 @@ enum writeback_sync_modes {
WB_SYNC_ALL, /* Wait on every mapping */
};

/*
* why some writeback work was initiated
*/
enum wb_reason {
WB_REASON_BACKGROUND,
WB_REASON_VMSCAN,
WB_REASON_SYNC,
WB_REASON_PERIODIC,
WB_REASON_LAPTOP_TIMER,
WB_REASON_FREE_MORE_MEM,
WB_REASON_FS_FREE_SPACE,
/*
* There is no bdi forker thread any more and works are done
* by emergency worker, however, this is TPs userland visible
* and we'll be exposing exactly the same information,
* so it has a mismatch name.
*/
WB_REASON_FORKER_THREAD,

WB_REASON_MAX,
};

/*
* A control structure which tells the writeback code what to do. These are
* always on the stack, and hence need no locking. They are always initialised
Expand Down
1 change: 0 additions & 1 deletion include/trace/events/writeback.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,6 @@ DEFINE_EVENT(writeback_class, name, \
TP_PROTO(struct bdi_writeback *wb), \
TP_ARGS(wb))

DEFINE_WRITEBACK_EVENT(writeback_nowork);
DEFINE_WRITEBACK_EVENT(writeback_wake_background);

TRACE_EVENT(writeback_bdi_register,
Expand Down

0 comments on commit 4071461

Please sign in to comment.