Skip to content

Commit

Permalink
md/raid5: avoid races when changing cache size.
Browse files Browse the repository at this point in the history
Cache size can grow or shrink due to various pressures at
any time.  So when we resize the cache as part of a 'grow'
operation (i.e. change the size to allow more devices) we need
to blocks that automatic growing/shrinking.

So introduce a mutex.  auto grow/shrink uses mutex_trylock()
and just doesn't bother if there is a blockage.
Resizing the whole cache holds the mutex to ensure that
the correct number of new stripes is allocated.

This bug can result in some stripes not being freed when an
array is stopped.  This leads to the kmem_cache not being
freed and a subsequent array can try to use the same kmem_cache
and get confused.

Fixes: edbe83a ("md/raid5: allow the stripe_cache to grow and shrink.")
Cc: [email protected] (4.1 - please delay until 2 weeks after release of 4.2)
Signed-off-by: NeilBrown <[email protected]>
  • Loading branch information
NeilBrown committed Jul 22, 2015
1 parent 6aaf0da commit 2d5b569
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
31 changes: 25 additions & 6 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -2162,6 +2162,9 @@ static int resize_stripes(struct r5conf *conf, int newsize)
if (!sc)
return -ENOMEM;

/* Need to ensure auto-resizing doesn't interfere */
mutex_lock(&conf->cache_size_mutex);

for (i = conf->max_nr_stripes; i; i--) {
nsh = alloc_stripe(sc, GFP_KERNEL);
if (!nsh)
Expand All @@ -2178,6 +2181,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
kmem_cache_free(sc, nsh);
}
kmem_cache_destroy(sc);
mutex_unlock(&conf->cache_size_mutex);
return -ENOMEM;
}
/* Step 2 - Must use GFP_NOIO now.
Expand Down Expand Up @@ -2224,6 +2228,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
} else
err = -ENOMEM;

mutex_unlock(&conf->cache_size_mutex);
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
Expand Down Expand Up @@ -5857,12 +5862,14 @@ static void raid5d(struct md_thread *thread)
pr_debug("%d stripes handled\n", handled);

spin_unlock_irq(&conf->device_lock);
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
mutex_trylock(&conf->cache_size_mutex)) {
grow_one_stripe(conf, __GFP_NOWARN);
/* Set flag even if allocation failed. This helps
* slow down allocation requests when mem is short
*/
set_bit(R5_DID_ALLOC, &conf->cache_state);
mutex_unlock(&conf->cache_size_mutex);
}

async_tx_issue_pending_all();
Expand Down Expand Up @@ -5894,18 +5901,22 @@ raid5_set_cache_size(struct mddev *mddev, int size)
return -EINVAL;

conf->min_nr_stripes = size;
mutex_lock(&conf->cache_size_mutex);
while (size < conf->max_nr_stripes &&
drop_one_stripe(conf))
;
mutex_unlock(&conf->cache_size_mutex);


err = md_allow_write(mddev);
if (err)
return err;

mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
if (!grow_one_stripe(conf, GFP_KERNEL))
break;
mutex_unlock(&conf->cache_size_mutex);

return 0;
}
Expand Down Expand Up @@ -6371,11 +6382,18 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
int ret = 0;
while (ret < sc->nr_to_scan) {
if (drop_one_stripe(conf) == 0)
return SHRINK_STOP;
ret++;
unsigned long ret = SHRINK_STOP;

if (mutex_trylock(&conf->cache_size_mutex)) {
ret= 0;
while (ret < sc->nr_to_scan) {
if (drop_one_stripe(conf) == 0) {
ret = SHRINK_STOP;
break;
}
ret++;
}
mutex_unlock(&conf->cache_size_mutex);
}
return ret;
}
Expand Down Expand Up @@ -6444,6 +6462,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
spin_lock_init(&conf->device_lock);
seqcount_init(&conf->gen_lock);
mutex_init(&conf->cache_size_mutex);
init_waitqueue_head(&conf->wait_for_quiescent);
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
init_waitqueue_head(&conf->wait_for_stripe[i]);
Expand Down
3 changes: 2 additions & 1 deletion drivers/md/raid5.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,8 @@ struct r5conf {
*/
int active_name;
char cache_name[2][32];
struct kmem_cache *slab_cache; /* for allocating stripes */
struct kmem_cache *slab_cache; /* for allocating stripes */
struct mutex cache_size_mutex; /* Protect changes to cache size */

int seq_flush, seq_write;
int quiesce;
Expand Down

0 comments on commit 2d5b569

Please sign in to comment.