-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
For kernels which do not implement a per-suberblock shrinker the shrink_dcache_parent() function was used to attempt to reclaim dentries. This was found not be entirely reliable which could lead to performance issues on older kernels running meta-data heavy workloads. To address this issue a zfs_sb_prune_compat() function has been added to implement this functionality. It relies on traversing the list of znodes for a filesystem and adding them to a private list with a reference held. The private list can then be safely walked outside the z_znodes_lock to prune dentires and drop the last reference so the inode can be freed. This provides the same synchronous behavior as the per-filesystem shrinker and has the advantage of depending on only long standing interfaces. The number of threads in the iput taskq has also been increased to speed up the handling of asynchronous iputs. This improves the rate of meta data reclaim regardless of the kernel version. Signed-off-by: Brian Behlendorf <[email protected]>
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -211,6 +211,7 @@ typedef struct znode { | |
nvlist_t *z_xattr_cached; /* cached xattrs */ | ||
struct znode *z_xattr_parent; /* xattr parent znode */ | ||
list_node_t z_link_node; /* all znodes in fs link */ | ||
list_node_t z_prune_node; /* znodes being pruned link */ | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
behlendorf
Author
Owner
|
||
sa_handle_t *z_sa_hdl; /* handle to sa data */ | ||
boolean_t z_is_sa; /* are we native sa? */ | ||
boolean_t z_is_zvol; /* are we used by the zvol */ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1072,6 +1072,75 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp) | |
} | ||
EXPORT_SYMBOL(zfs_root); | ||
|
||
#if !defined(HAVE_SHRINK) && !defined(HAVE_SPLIT_SHRINKER_CALLBACK) | ||
/* | ||
* Linux kernels older than 3.1 do not support a per-filesystem shrinker. | ||
* To accomodate this we must improvise and manually walk the list of znodes | ||
This comment has been minimized.
Sorry, something went wrong. |
||
* attempting to prune dentries in order to be able to drop the inodes. | ||
* | ||
* To avoid scanning the same znodes multiple times they are always rotated | ||
* to the end of the z_all_znodes list. New znodes are inserted at the | ||
* end of the list so we're always scanning the oldest znodes first. | ||
*/ | ||
static int | ||
zfs_sb_prune_compat(zfs_sb_t *zsb, unsigned long nr_to_scan) | ||
{ | ||
list_t prune_list; | ||
znode_t *zp; | ||
int objects = 0; | ||
int i = 0; | ||
|
||
list_create(&prune_list, sizeof (znode_t), | ||
offsetof(znode_t, z_prune_node)); | ||
|
||
mutex_enter(&zsb->z_znodes_lock); | ||
This comment has been minimized.
Sorry, something went wrong.
dweeezil
|
||
while ((zp = list_head(&zsb->z_all_znodes)) != NULL) { | ||
|
||
if (i++ > nr_to_scan) | ||
break; | ||
|
||
ASSERT(list_link_active(&zp->z_link_node)); | ||
list_remove(&zsb->z_all_znodes, zp); | ||
list_insert_tail(&zsb->z_all_znodes, zp); | ||
|
||
This comment has been minimized.
Sorry, something went wrong.
dweeezil
|
||
if (!mutex_tryenter(&zp->z_lock)) | ||
continue; | ||
|
||
if (list_link_active(&zp->z_prune_node)) { | ||
mutex_exit(&zp->z_lock); | ||
continue; | ||
} | ||
|
||
if (igrab(ZTOI(zp)) == NULL) { | ||
mutex_exit(&zp->z_lock); | ||
continue; | ||
} | ||
|
||
list_insert_tail(&prune_list, zp); | ||
mutex_exit(&zp->z_lock); | ||
} | ||
mutex_exit(&zsb->z_znodes_lock); | ||
|
||
while ((zp = list_head(&prune_list)) != NULL) { | ||
|
||
d_prune_aliases(ZTOI(zp)); | ||
|
||
if (atomic_read(&ZTOI(zp)->i_count) == 1) | ||
objects++; | ||
|
||
mutex_enter(&zp->z_lock); | ||
list_remove(&prune_list, zp); | ||
mutex_exit(&zp->z_lock); | ||
|
||
iput(ZTOI(zp)); | ||
} | ||
|
||
list_destroy(&prune_list); | ||
|
||
return (objects); | ||
} | ||
#endif | ||
|
||
/* | ||
* The ARC has requested that the filesystem drop entries from the dentry | ||
* and inode caches. This can occur when the ARC needs to free meta data | ||
|
@@ -1107,17 +1176,7 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) | |
#elif defined(HAVE_SHRINK) | ||
*objects = (*shrinker->shrink)(shrinker, &sc); | ||
#else | ||
/* | ||
* Linux kernels older than 3.1 do not support a per-filesystem | ||
* shrinker. Therefore, we must fall back to the only available | ||
* interface which is to discard all unused dentries and inodes. | ||
* This behavior clearly isn't ideal but it's required so the ARC | ||
* may free memory. The performance impact is mitigated by the | ||
* fact that the frequently accessed dentry and inode buffers will | ||
* still be in the ARC making them relatively cheap to recreate. | ||
*/ | ||
*objects = 0; | ||
shrink_dcache_parent(sb->s_root); | ||
*objects = zfs_sb_prune_compat(zsb, nr_to_scan); | ||
#endif | ||
ZFS_EXIT(zsb); | ||
|
||
|
1 comment
on commit 78aa2b1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the speedy review. I'll push a reworked version of this patch shortly after a little additional stress testing.
Could we conditionally not compile this. I would be nice to save the space on systems which don't need it. Ditto for its uses in the non-prune function places.