Skip to content

Commit

Permalink
Merge tag 'for-4.19/post-20180822' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull more block updates from Jens Axboe:

 - Set of bcache fixes and changes (Coly)

 - The flush warn fix (me)

 - Small series of BFQ fixes (Paolo)

 - wbt hang fix (Ming)

 - blktrace fix (Steven)

 - blk-mq hardware queue count update fix (Jianchao)

 - Various little fixes

* tag 'for-4.19/post-20180822' of git://git.kernel.dk/linux-block: (31 commits)
  block/DAC960.c: make some arrays static const, shrinks object size
  blk-mq: sync the update nr_hw_queues with blk_mq_queue_tag_busy_iter
  blk-mq: init hctx sched after update ctx and hctx mapping
  block: remove duplicate initialization
  tracing/blktrace: Fix to allow setting same value
  pktcdvd: fix setting of 'ret' error return for a few cases
  block: change return type to bool
  block, bfq: return nbytes and not zero from struct cftype .write() method
  block, bfq: improve code of bfq_bfqq_charge_time
  block, bfq: reduce write overcharge
  block, bfq: always update the budget of an entity when needed
  block, bfq: readd missing reset of parent-entity service
  blk-wbt: fix IO hang in wbt_wait()
  block: don't warn for flush on read-only device
  bcache: add the missing comments for smp_mb()/smp_wmb()
  bcache: remove unnecessary space before ioctl function pointer arguments
  bcache: add missing SPDX header
  bcache: move open brace at end of function definitions to next line
  bcache: add static const prefix to char * array declarations
  bcache: fix code comments style
  ...
  • Loading branch information
torvalds committed Aug 22, 2018
2 parents fe6f0ed + 1e7da86 commit 5bed49a
Show file tree
Hide file tree
Showing 43 changed files with 879 additions and 656 deletions.
3 changes: 2 additions & 1 deletion block/bfq-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,8 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
if (ret)
return ret;

return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
ret = bfq_io_set_weight_legacy(of_css(of), NULL, weight);
return ret ?: nbytes;
}

#ifdef CONFIG_DEBUG_BLK_CGROUP
Expand Down
54 changes: 40 additions & 14 deletions block/bfq-iosched.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,25 @@ static const int bfq_stats_min_budgets = 194;
static const int bfq_default_max_budget = 16 * 1024;

/*
* Async to sync throughput distribution is controlled as follows:
* when an async request is served, the entity is charged the number
* of sectors of the request, multiplied by the factor below
* When a sync request is dispatched, the queue that contains that
* request, and all the ancestor entities of that queue, are charged
* with the number of sectors of the request. In constrast, if the
* request is async, then the queue and its ancestor entities are
* charged with the number of sectors of the request, multiplied by
* the factor below. This throttles the bandwidth for async I/O,
* w.r.t. to sync I/O, and it is done to counter the tendency of async
* writes to steal I/O throughput to reads.
*
* The current value of this parameter is the result of a tuning with
* several hardware and software configurations. We tried to find the
* lowest value for which writes do not cause noticeable problems to
* reads. In fact, the lower this parameter, the stabler I/O control,
* in the following respect. The lower this parameter is, the less
* the bandwidth enjoyed by a group decreases
* - when the group does writes, w.r.t. to when it does reads;
* - when other groups do reads, w.r.t. to when they do writes.
*/
static const int bfq_async_charge_factor = 10;
static const int bfq_async_charge_factor = 3;

/* Default timeout values, in jiffies, approximating CFQ defaults. */
const int bfq_timeout = HZ / 8;
Expand Down Expand Up @@ -853,16 +867,7 @@ static unsigned long bfq_serv_to_charge(struct request *rq,
if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
return blk_rq_sectors(rq);

/*
* If there are no weight-raised queues, then amplify service
* by just the async charge factor; otherwise amplify service
* by twice the async charge factor, to further reduce latency
* for weight-raised queues.
*/
if (bfqq->bfqd->wr_busy_queues == 0)
return blk_rq_sectors(rq) * bfq_async_charge_factor;

return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor;
return blk_rq_sectors(rq) * bfq_async_charge_factor;
}

/**
Expand Down Expand Up @@ -3298,6 +3303,27 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
*/
} else
entity->service = 0;

/*
* Reset the received-service counter for every parent entity.
* Differently from what happens with bfqq->entity.service,
* the resetting of this counter never needs to be postponed
* for parent entities. In fact, in case bfqq may have a
* chance to go on being served using the last, partially
* consumed budget, bfqq->entity.service needs to be kept,
* because if bfqq then actually goes on being served using
* the same budget, the last value of bfqq->entity.service is
* needed to properly decrement bfqq->entity.budget by the
* portion already consumed. In contrast, it is not necessary
* to keep entity->service for parent entities too, because
* the bubble up of the new value of bfqq->entity.budget will
* make sure that the budgets of parent entities are correct,
* even in case bfqq and thus parent entities go on receiving
* service with the same budget.
*/
entity = entity->parent;
for_each_entity(entity)
entity->service = 0;
}

/*
Expand Down
22 changes: 11 additions & 11 deletions block/bfq-wf2q.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,14 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
if (!change_without_lookup) /* lookup needed */
next_in_service = bfq_lookup_next_entity(sd, expiration);

if (next_in_service)
parent_sched_may_change = !sd->next_in_service ||
if (next_in_service) {
bool new_budget_triggers_change =
bfq_update_parent_budget(next_in_service);

parent_sched_may_change = !sd->next_in_service ||
new_budget_triggers_change;
}

sd->next_in_service = next_in_service;

if (!next_in_service)
Expand Down Expand Up @@ -877,15 +881,11 @@ void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
unsigned long time_ms)
{
struct bfq_entity *entity = &bfqq->entity;
int tot_serv_to_charge = entity->service;
unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout);

if (time_ms > 0 && time_ms < timeout_ms)
tot_serv_to_charge =
(bfqd->bfq_max_budget * time_ms) / timeout_ms;

if (tot_serv_to_charge < entity->service)
tot_serv_to_charge = entity->service;
unsigned long timeout_ms = jiffies_to_msecs(bfq_timeout);
unsigned long bounded_time_ms = min(time_ms, timeout_ms);
int serv_to_charge_for_time =
(bfqd->bfq_max_budget * bounded_time_ms) / timeout_ms;
int tot_serv_to_charge = max(serv_to_charge_for_time, entity->service);

/* Increase budget to avoid inconsistencies */
if (tot_serv_to_charge > entity->budget)
Expand Down
5 changes: 3 additions & 2 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1036,7 +1036,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
laptop_mode_timer_fn, 0);
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, NULL);
INIT_LIST_HEAD(&q->queue_head);
INIT_LIST_HEAD(&q->timeout_list);
INIT_LIST_HEAD(&q->icq_list);
#ifdef CONFIG_BLK_CGROUP
Expand Down Expand Up @@ -2162,7 +2161,9 @@ static inline bool should_fail_request(struct hd_struct *part,

static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
{
if (part->policy && op_is_write(bio_op(bio))) {
const int op = bio_op(bio);

if (part->policy && (op_is_write(op) && !op_is_flush(op))) {
char b[BDEVNAME_SIZE];

WARN_ONCE(1,
Expand Down
44 changes: 0 additions & 44 deletions block/blk-mq-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,50 +462,6 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
blk_mq_sched_free_tags(set, hctx, i);
}

int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
struct elevator_queue *e = q->elevator;
int ret;

if (!e)
return 0;

ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
if (ret)
return ret;

if (e->type->ops.mq.init_hctx) {
ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
if (ret) {
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
return ret;
}
}

blk_mq_debugfs_register_sched_hctx(q, hctx);

return 0;
}

void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
struct elevator_queue *e = q->elevator;

if (!e)
return;

blk_mq_debugfs_unregister_sched_hctx(hctx);

if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
e->type->ops.mq.exit_hctx(hctx, hctx_idx);
hctx->sched_data = NULL;
}

blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
}

int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
struct blk_mq_hw_ctx *hctx;
Expand Down
5 changes: 0 additions & 5 deletions block/blk-mq-sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);

int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx);
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx);

static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
Expand Down
14 changes: 13 additions & 1 deletion block/blk-mq-tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,18 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
struct blk_mq_hw_ctx *hctx;
int i;

/*
* __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
* queue_hw_ctx after freeze the queue. So we could use q_usage_counter
* to avoid race with it. __blk_mq_update_nr_hw_queues will users
* synchronize_rcu to ensure all of the users go out of the critical
* section below and see zeroed q_usage_counter.
*/
rcu_read_lock();
if (percpu_ref_is_zero(&q->q_usage_counter)) {
rcu_read_unlock();
return;
}

queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
Expand All @@ -335,7 +347,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
}

rcu_read_unlock();
}

static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
Expand Down
96 changes: 88 additions & 8 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -2145,8 +2145,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (set->ops->exit_request)
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);

blk_mq_sched_exit_hctx(q, hctx, hctx_idx);

if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);

Expand Down Expand Up @@ -2214,12 +2212,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
goto free_bitmap;

if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
goto exit_hctx;

hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
if (!hctx->fq)
goto sched_exit_hctx;
goto exit_hctx;

if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
goto free_fq;
Expand All @@ -2233,8 +2228,6 @@ static int blk_mq_init_hctx(struct request_queue *q,

free_fq:
kfree(hctx->fq);
sched_exit_hctx:
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
Expand Down Expand Up @@ -2896,10 +2889,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
return ret;
}

/*
* request_queue and elevator_type pair.
* It is just used by __blk_mq_update_nr_hw_queues to cache
* the elevator_type associated with a request_queue.
*/
struct blk_mq_qe_pair {
struct list_head node;
struct request_queue *q;
struct elevator_type *type;
};

/*
* Cache the elevator_type in qe pair list and switch the
* io scheduler to 'none'
*/
static bool blk_mq_elv_switch_none(struct list_head *head,
struct request_queue *q)
{
struct blk_mq_qe_pair *qe;

if (!q->elevator)
return true;

qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
if (!qe)
return false;

INIT_LIST_HEAD(&qe->node);
qe->q = q;
qe->type = q->elevator->type;
list_add(&qe->node, head);

mutex_lock(&q->sysfs_lock);
/*
* After elevator_switch_mq, the previous elevator_queue will be
* released by elevator_release. The reference of the io scheduler
* module get by elevator_get will also be put. So we need to get
* a reference of the io scheduler module here to prevent it to be
* removed.
*/
__module_get(qe->type->elevator_owner);
elevator_switch_mq(q, NULL);
mutex_unlock(&q->sysfs_lock);

return true;
}

static void blk_mq_elv_switch_back(struct list_head *head,
struct request_queue *q)
{
struct blk_mq_qe_pair *qe;
struct elevator_type *t = NULL;

list_for_each_entry(qe, head, node)
if (qe->q == q) {
t = qe->type;
break;
}

if (!t)
return;

list_del(&qe->node);
kfree(qe);

mutex_lock(&q->sysfs_lock);
elevator_switch_mq(q, t);
mutex_unlock(&q->sysfs_lock);
}

static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
int nr_hw_queues)
{
struct request_queue *q;
LIST_HEAD(head);

lockdep_assert_held(&set->tag_list_lock);

Expand All @@ -2910,6 +2974,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,

list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue(q);
/*
* Sync with blk_mq_queue_tag_busy_iter.
*/
synchronize_rcu();
/*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
* updating the new sw to hw queue mappings.
*/
list_for_each_entry(q, &set->tag_list, tag_set_list)
if (!blk_mq_elv_switch_none(&head, q))
goto switch_back;

set->nr_hw_queues = nr_hw_queues;
blk_mq_update_queue_map(set);
Expand All @@ -2918,6 +2994,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_queue_reinit(q);
}

switch_back:
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_elv_switch_back(&head, q);

list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_unfreeze_queue(q);
}
Expand Down
6 changes: 1 addition & 5 deletions block/blk-wbt.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,12 +576,8 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock)
struct rq_wb *rwb = RQWB(rqos);
enum wbt_flags flags;

if (!rwb_enabled(rwb))
return;

flags = bio_to_wbt_flags(rwb, bio);

if (!wbt_should_throttle(rwb, bio)) {
if (!(flags & WBT_TRACKED)) {
if (flags & WBT_READ)
wb_timestamp(rwb, &rwb->last_issue);
return;
Expand Down
Loading

0 comments on commit 5bed49a

Please sign in to comment.