Skip to content

Commit

Permalink
Gang ABD Type
Browse files Browse the repository at this point in the history
Adding the Gang ABD type, which allows for linear and scatter ABDs to
be chained together into a single ABD.

This can be used to avoid doing memory copies to/from ABDs. An example
of this can be found in vdev_queue.c in the vdev_queue_aggregate()
function.

Signed-off-by: Brian Atkinson <[email protected]>
Co-authored-by: Mark Maybee <[email protected]>
  • Loading branch information
2 people authored and bwatkinson committed May 11, 2020
1 parent 7fcf824 commit ceed488
Show file tree
Hide file tree
Showing 7 changed files with 636 additions and 106 deletions.
10 changes: 7 additions & 3 deletions include/sys/abd.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,24 @@ typedef int abd_iter_func_t(void *buf, size_t len, void *private);
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private);

extern int zfs_abd_scatter_enabled;
extern abd_t *abd_zero_scatter;

/*
* Allocations and deallocations
*/

abd_t *abd_alloc(size_t, boolean_t);
abd_t *abd_alloc_linear(size_t, boolean_t);
abd_t *abd_alloc_gang_abd(void);
abd_t *abd_alloc_for_io(size_t, boolean_t);
abd_t *abd_alloc_sametype(abd_t *, size_t);
void abd_gang_add(abd_t *, abd_t *, boolean_t);
void abd_free(abd_t *);
void abd_put(abd_t *);
abd_t *abd_get_offset(abd_t *, size_t);
abd_t *abd_get_offset_size(abd_t *, size_t, size_t);
abd_t *abd_get_zeros(size_t);
abd_t *abd_get_from_buf(void *, size_t);
void abd_put(abd_t *);

/*
* Conversion to and from a normal buffer
Expand Down Expand Up @@ -132,6 +136,7 @@ abd_zero(abd_t *abd, size_t size)
* ABD type check functions
*/
boolean_t abd_is_linear(abd_t *);
boolean_t abd_is_gang_abd(abd_t *);
boolean_t abd_is_linear_page(abd_t *);

/*
Expand All @@ -146,8 +151,7 @@ void abd_fini(void);
* Linux ABD bio functions
*/
#if defined(__linux__) && defined(_KERNEL)
unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int,
size_t);
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
#endif

Expand Down
17 changes: 17 additions & 0 deletions include/sys/abd_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ typedef enum abd_flags {
ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */
ABD_FLAG_MULTI_CHUNK = 1 << 4, /* pages split over multiple chunks */
ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */
ABD_FLAG_GANG = 1 << 6, /* mult ABDs chained together */
ABD_FLAG_GANG_FREE = 1 << 7, /* gang ABD is responsible for mem */
ABD_FLAG_ZEROS = 1 << 8, /* ABD for zero-filled buffer */
} abd_flags_t;

typedef enum abd_stats_op {
Expand All @@ -49,8 +52,10 @@ typedef enum abd_stats_op {
struct abd {
abd_flags_t abd_flags;
uint_t abd_size; /* excludes scattered abd_offset */
list_node_t abd_gang_link;
struct abd *abd_parent;
zfs_refcount_t abd_children;
kmutex_t abd_mtx;
union {
struct abd_scatter {
uint_t abd_offset;
Expand All @@ -66,6 +71,9 @@ struct abd {
void *abd_buf;
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
} abd_linear;
struct abd_gang {
list_t abd_gang_chain;
} abd_gang;
} abd_u;
};

Expand All @@ -84,6 +92,14 @@ struct abd_iter {
struct scatterlist *iter_sg; /* current sg */
};

/*
* This is used to get an ABD from an Gang ABD's list based on
* the provided offset. This should only be called from the
* ABD source code.
*/
abd_t *abd_gang_get_offset(abd_t *, size_t *);


/*
* OS specific functions
*/
Expand Down Expand Up @@ -118,6 +134,7 @@ void abd_iter_unmap(struct abd_iter *);

#define ABD_SCATTER(abd) (abd->abd_u.abd_scatter)
#define ABD_LINEAR_BUF(abd) (abd->abd_u.abd_linear.abd_buf)
#define ABD_GANG(abd) (abd->abd_u.abd_gang)

#ifdef __cplusplus
}
Expand Down
65 changes: 64 additions & 1 deletion module/os/freebsd/zfs/abd_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,17 @@ SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN,
kmem_cache_t *abd_chunk_cache;
static kstat_t *abd_ksp;

/*
* We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
* just a single zero'd sized zfs_abd_chunk_size buffer. This
* allows us to conserve memory by only using a single zero buffer
* for the scatter chunks.
*/
abd_t *abd_zero_scatter = NULL;
static char *abd_zero_buf = NULL;

#define ABD_ZERO_PAGE (abd_zero_buf)

static void
abd_free_chunk(void *c)
{
Expand Down Expand Up @@ -193,6 +204,8 @@ abd_alloc_struct(size_t size)
abd_u.abd_scatter.abd_chunks[chunkcnt]);
abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
ASSERT3P(abd, !=, NULL);
list_link_init(&abd->abd_gang_link);
mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
ABDSTAT_INCR(abdstat_struct_size, abd_size);

return (abd);
Expand All @@ -203,10 +216,54 @@ abd_free_struct(abd_t *abd)
{
size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd);
int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]);
mutex_destroy(&abd->abd_mtx);
ASSERT(!list_link_active(&abd->abd_gang_link));
kmem_free(abd, size);
ABDSTAT_INCR(abdstat_struct_size, -size);
}

/*
* Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where
* each page in the scatterlist will be set to ABD_ZERO_PAGE.
*/
static void
abd_alloc_zero_scatter(void)
{
size_t n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP);
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);

abd_zero_scatter->abd_flags = ABD_FLAG_OWNER;
abd_zero_scatter->abd_flags |= ABD_FLAG_ZEROS;
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
abd_zero_scatter->abd_parent = NULL;
zfs_refcount_create(&abd_zero_scatter->abd_children);

ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
ABD_SCATTER(abd_zero_scatter).abd_chunk_size =
zfs_abd_chunk_size;

for (int i = 0; i < n; i++) {
ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
ABD_ZERO_PAGE;
}

ABDSTAT_BUMP(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, zfs_abd_chunk_size);
}

static void
abd_free_zero_scatter(void)
{
zfs_refcount_destroy(&abd_zero_scatter->abd_children);
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size);

abd_free_struct(abd_zero_scatter);
abd_zero_scatter = NULL;
kmem_free(abd_zero_buf, zfs_abd_chunk_size);
}

void
abd_init(void)
{
Expand All @@ -219,11 +276,15 @@ abd_init(void)
abd_ksp->ks_data = &abd_stats;
kstat_install(abd_ksp);
}

abd_alloc_zero_scatter();
}

void
abd_fini(void)
{
abd_free_zero_scatter();

if (abd_ksp != NULL) {
kstat_delete(abd_ksp);
abd_ksp = NULL;
Expand Down Expand Up @@ -271,12 +332,13 @@ abd_alloc_scatter_offset_chunkcnt(size_t chunkcnt)
abd_u.abd_scatter.abd_chunks[chunkcnt]);
abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
ASSERT3P(abd, !=, NULL);
list_link_init(&abd->abd_gang_link);
mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
ABDSTAT_INCR(abdstat_struct_size, abd_size);

return (abd);
}


abd_t *
abd_get_offset_scatter(abd_t *sabd, size_t off)
{
Expand Down Expand Up @@ -332,6 +394,7 @@ abd_iter_scatter_chunk_index(struct abd_iter *aiter)
void
abd_iter_init(struct abd_iter *aiter, abd_t *abd)
{
ASSERT(!abd_is_gang_abd(abd));
abd_verify(abd);
aiter->iter_abd = abd;
aiter->iter_pos = 0;
Expand Down
Loading

0 comments on commit ceed488

Please sign in to comment.