This repository has been archived by the owner on May 30, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2402 from flatcar/dongsu/ext4-deadlock-fix-3374
sys-kernel/coreos-sources: Add backport of bugfix for #847 for flatcar-3374
- Loading branch information
Showing
3 changed files
with
131 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
- Fix "ext4 deadlock under heavy I/O load" kernel issue. The patch for this is included provisionally while we stay with Kernel 5.15.86. ([Flatcar#847](https://github.com/flatcar/Flatcar/issues/847), [coreos-overlay#2402](https://github.com/flatcar/coreos-overlay/pull/2402)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
129 changes: 129 additions & 0 deletions
129
sys-kernel/coreos-sources/files/5.15/z0008-ext4-Fix-deadlock-due-to-mbcache-en.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
From e7ec42e181c6213d1fd71b946196f05af601ba5c Mon Sep 17 00:00:00 2001 | ||
From: Jan Kara <[email protected]> | ||
Date: Mon, 21 Nov 2022 15:44:10 +0100 | ||
Subject: [PATCH] ext4: Fix deadlock due to mbcache entry corruption | ||
|
||
When manipulating xattr blocks, we can deadlock infinitely looping | ||
inside ext4_xattr_block_set() where we constantly keep finding xattr | ||
block for reuse in mbcache but we are unable to reuse it because its | ||
reference count is too big. This happens because cache entry for the | ||
xattr block is marked as reusable (e_reusable set) although its | ||
reference count is too big. When this inconsistency happens, this | ||
inconsistent state is kept indefinitely and so ext4_xattr_block_set() | ||
keeps retrying indefinitely. | ||
|
||
The inconsistent state is caused by non-atomic update of e_reusable bit. | ||
e_reusable is part of a bitfield and e_reusable update can race with | ||
update of e_referenced bit in the same bitfield resulting in loss of one | ||
of the updates. Fix the problem by using atomic bitops instead. | ||
|
||
[jeremi: backport from here https://lore.kernel.org/linux-ext4/20221122174807.GA9658@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/] | ||
CC: [email protected] | ||
Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries") | ||
Reported-by: Jeremi Piotrowski <[email protected]> | ||
Reported-by: Thilo Fromm <[email protected]> | ||
Signed-off-by: Jan Kara <[email protected]> | ||
--- | ||
fs/ext4/xattr.c | 4 ++-- | ||
fs/mbcache.c | 14 ++++++++------ | ||
include/linux/mbcache.h | 9 +++++++-- | ||
3 files changed, 17 insertions(+), 10 deletions(-) | ||
|
||
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c | ||
index 533216e80fa2..22700812a4d3 100644 | ||
--- a/fs/ext4/xattr.c | ||
+++ b/fs/ext4/xattr.c | ||
@@ -1281,7 +1281,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | ||
ce = mb_cache_entry_get(ea_block_cache, hash, | ||
bh->b_blocknr); | ||
if (ce) { | ||
- ce->e_reusable = 1; | ||
+ set_bit(MBE_REUSABLE_B, &ce->e_flags); | ||
mb_cache_entry_put(ea_block_cache, ce); | ||
} | ||
} | ||
@@ -2042,7 +2042,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | ||
} | ||
BHDR(new_bh)->h_refcount = cpu_to_le32(ref); | ||
if (ref == EXT4_XATTR_REFCOUNT_MAX) | ||
- ce->e_reusable = 0; | ||
+ clear_bit(MBE_REUSABLE_B, &ce->e_flags); | ||
ea_bdebug(new_bh, "reusing; refcount now=%d", | ||
ref); | ||
ext4_xattr_block_csum_set(inode, new_bh); | ||
diff --git a/fs/mbcache.c b/fs/mbcache.c | ||
index 2010bc80a3f2..ac07b50ea3df 100644 | ||
--- a/fs/mbcache.c | ||
+++ b/fs/mbcache.c | ||
@@ -94,8 +94,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, | ||
atomic_set(&entry->e_refcnt, 1); | ||
entry->e_key = key; | ||
entry->e_value = value; | ||
- entry->e_reusable = reusable; | ||
- entry->e_referenced = 0; | ||
+ entry->e_flags = 0; | ||
+ if (reusable) | ||
+ set_bit(MBE_REUSABLE_B, &entry->e_flags); | ||
head = mb_cache_entry_head(cache, key); | ||
hlist_bl_lock(head); | ||
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { | ||
@@ -155,7 +156,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, | ||
while (node) { | ||
entry = hlist_bl_entry(node, struct mb_cache_entry, | ||
e_hash_list); | ||
- if (entry->e_key == key && entry->e_reusable) { | ||
+ if (entry->e_key == key && | ||
+ test_bit(MBE_REUSABLE_B, &entry->e_flags)) { | ||
atomic_inc(&entry->e_refcnt); | ||
goto out; | ||
} | ||
@@ -325,7 +327,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get); | ||
void mb_cache_entry_touch(struct mb_cache *cache, | ||
struct mb_cache_entry *entry) | ||
{ | ||
- entry->e_referenced = 1; | ||
+ set_bit(MBE_REFERENCED_B, &entry->e_flags); | ||
} | ||
EXPORT_SYMBOL(mb_cache_entry_touch); | ||
|
||
@@ -350,8 +352,8 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, | ||
while (nr_to_scan-- && !list_empty(&cache->c_list)) { | ||
entry = list_first_entry(&cache->c_list, | ||
struct mb_cache_entry, e_list); | ||
- if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) { | ||
- entry->e_referenced = 0; | ||
+ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || atomic_read(&entry->e_refcnt) > 2) { | ||
+ clear_bit(MBE_REFERENCED_B, &entry->e_flags); | ||
list_move_tail(&entry->e_list, &cache->c_list); | ||
continue; | ||
} | ||
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h | ||
index 8eca7f25c432..62927f7e2588 100644 | ||
--- a/include/linux/mbcache.h | ||
+++ b/include/linux/mbcache.h | ||
@@ -10,6 +10,12 @@ | ||
|
||
struct mb_cache; | ||
|
||
+/* Cache entry flags */ | ||
+enum { | ||
+ MBE_REFERENCED_B = 0, | ||
+ MBE_REUSABLE_B | ||
+}; | ||
+ | ||
struct mb_cache_entry { | ||
/* List of entries in cache - protected by cache->c_list_lock */ | ||
struct list_head e_list; | ||
@@ -18,8 +24,7 @@ struct mb_cache_entry { | ||
atomic_t e_refcnt; | ||
/* Key in hash - stable during lifetime of the entry */ | ||
u32 e_key; | ||
- u32 e_referenced:1; | ||
- u32 e_reusable:1; | ||
+ unsigned long e_flags; | ||
/* User provided value - stable during lifetime of the entry */ | ||
u64 e_value; | ||
}; | ||
-- | ||
2.25.1 | ||
|