diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 602255b013e65..9ced21dd93fb9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -52,7 +52,6 @@ struct wb_writeback_work { unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ unsigned int auto_free:1; /* free on completion */ - unsigned int start_all:1; /* nr_pages == 0 (all) writeback */ enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ @@ -997,8 +996,6 @@ static unsigned long get_nr_dirty_pages(void) static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason) { - struct wb_writeback_work *work; - if (!wb_has_dirty_io(wb)) return; @@ -1008,35 +1005,14 @@ static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason) * high frequency, causing pointless allocations of tons of * work items and keeping the flusher threads busy retrieving * that work. Ensure that we only allow one of them pending and - * inflight at the time. It doesn't matter if we race a little - * bit on this, so use the faster separate test/set bit variants. + * inflight at the time. */ - if (test_bit(WB_start_all, &wb->state)) + if (test_bit(WB_start_all, &wb->state) || + test_and_set_bit(WB_start_all, &wb->state)) return; - set_bit(WB_start_all, &wb->state); - - /* - * This is WB_SYNC_NONE writeback, so if allocation fails just - * wakeup the thread for old dirty data writeback - */ - work = kzalloc(sizeof(*work), - GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (!work) { - clear_bit(WB_start_all, &wb->state); - trace_writeback_nowork(wb); - wb_wakeup(wb); - return; - } - - work->sync_mode = WB_SYNC_NONE; - work->nr_pages = wb_split_bdi_pages(wb, get_nr_dirty_pages()); - work->range_cyclic = 1; - work->reason = reason; - work->auto_free = 1; - work->start_all = 1; - - wb_queue_work(wb, work); + wb->start_all_reason = reason; + wb_wakeup(wb); } /** @@ -1895,14 +1871,6 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) list_del_init(&work->list); } spin_unlock_bh(&wb->work_lock); - - /* - * Once we start processing a work item that had !nr_pages, - * clear the wb state bit for that so we can allow more. - */ - if (work && work->start_all) - clear_bit(WB_start_all, &wb->state); - return work; } @@ -1958,6 +1926,30 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) return 0; } +static long wb_check_start_all(struct bdi_writeback *wb) +{ + long nr_pages; + + if (!test_bit(WB_start_all, &wb->state)) + return 0; + + nr_pages = get_nr_dirty_pages(); + if (nr_pages) { + struct wb_writeback_work work = { + .nr_pages = wb_split_bdi_pages(wb, nr_pages), + .sync_mode = WB_SYNC_NONE, + .range_cyclic = 1, + .reason = wb->start_all_reason, + }; + + nr_pages = wb_writeback(wb, &work); + } + + clear_bit(WB_start_all, &wb->state); + return nr_pages; +} + + /* * Retrieve work items and do the writeback they describe */ @@ -1973,6 +1965,11 @@ static long wb_do_writeback(struct bdi_writeback *wb) finish_writeback_work(wb, work); } + /* + * Check for a flush-everything request + */ + wrote += wb_check_start_all(wb); + /* * Check for periodic writeback, kupdated() style */ diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index e4ce1bc95cb1a..0e8c6647c6bde 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -44,6 +44,28 @@ enum wb_stat_item { #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) +/* + * why some writeback work was initiated + */ +enum wb_reason { + WB_REASON_BACKGROUND, + WB_REASON_VMSCAN, + WB_REASON_SYNC, + WB_REASON_PERIODIC, + WB_REASON_LAPTOP_TIMER, + WB_REASON_FREE_MORE_MEM, + WB_REASON_FS_FREE_SPACE, + /* + * There is no bdi forker thread any more and works are done + * by emergency worker, however, this is TPs userland visible + * and we'll be exposing exactly the same information, + * so it has a mismatch name. + */ + WB_REASON_FORKER_THREAD, + + WB_REASON_MAX, +}; + /* * For cgroup writeback, multiple wb's may map to the same blkcg. Those * wb's can operate mostly independently but should share the congested @@ -116,6 +138,7 @@ struct bdi_writeback { struct fprop_local_percpu completions; int dirty_exceeded; + enum wb_reason start_all_reason; spinlock_t work_lock; /* protects work_list & dwork scheduling */ struct list_head work_list; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 10837a22c9462..efa7953c6134a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -42,28 +42,6 @@ enum writeback_sync_modes { WB_SYNC_ALL, /* Wait on every mapping */ }; -/* - * why some writeback work was initiated - */ -enum wb_reason { - WB_REASON_BACKGROUND, - WB_REASON_VMSCAN, - WB_REASON_SYNC, - WB_REASON_PERIODIC, - WB_REASON_LAPTOP_TIMER, - WB_REASON_FREE_MORE_MEM, - WB_REASON_FS_FREE_SPACE, - /* - * There is no bdi forker thread any more and works are done - * by emergency worker, however, this is TPs userland visible - * and we'll be exposing exactly the same information, - * so it has a mismatch name. - */ - WB_REASON_FORKER_THREAD, - - WB_REASON_MAX, -}; - /* * A control structure which tells the writeback code what to do. These are * always on the stack, and hence need no locking. They are always initialised diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 3a76ca2eecd06..29d09755e5cfc 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -287,7 +287,6 @@ DEFINE_EVENT(writeback_class, name, \ TP_PROTO(struct bdi_writeback *wb), \ TP_ARGS(wb)) -DEFINE_WRITEBACK_EVENT(writeback_nowork); DEFINE_WRITEBACK_EVENT(writeback_wake_background); TRACE_EVENT(writeback_bdi_register,