Skip to content

Commit

Permalink
Add visibility in to cached dbufs
Browse files Browse the repository at this point in the history
Currently there is no mechanism to inspect which dbufs are being
cached by the system.  There are some coarse counters in arcstats
by they only give a rough idea of what's being cached.  This patch
aims to improve the current situation by adding a new dbufs kstat.

When read this new kstat will walk all cached dbufs linked in to
the dbuf_hash.  For each dbuf it will dump detailed information
about the buffer.  It will also dump additional information about
the referenced arc buffer and its related dnode.  This provides a
more complete view in to exactly what is being cached.

With this generic infrastructure in place utilities can be written
to post-process the data to understand exactly how the caching is
working.  For example, the data could be processed to show a list
of all cached dnodes and how much space they're consuming.  Or a
similar list could be generated based on dnode type.  Many other
ways to interpret the data exist based on what kinds of questions
you're trying to answer.

Signed-off-by: Brian Behlendorf <[email protected]>
Signed-off-by: Prakash Surya <[email protected]>
  • Loading branch information
behlendorf committed Oct 25, 2013
1 parent 2d37239 commit e0b0ca9
Show file tree
Hide file tree
Showing 9 changed files with 368 additions and 8 deletions.
31 changes: 31 additions & 0 deletions include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,36 @@ typedef enum arc_space_type {
ARC_SPACE_NUMTYPES
} arc_space_type_t;

typedef enum arc_state_type {
ARC_STATE_ANON,
ARC_STATE_MRU,
ARC_STATE_MRU_GHOST,
ARC_STATE_MFU,
ARC_STATE_MFU_GHOST,
ARC_STATE_L2C_ONLY,
ARC_STATE_NUMTYPES
} arc_state_type_t;

typedef struct arc_buf_info {
arc_state_type_t abi_state_type;
arc_buf_contents_t abi_state_contents;
uint64_t abi_state_index;
uint32_t abi_flags;
uint32_t abi_datacnt;
uint64_t abi_size;
uint64_t abi_spa;
uint64_t abi_access;
uint32_t abi_mru_hits;
uint32_t abi_mru_ghost_hits;
uint32_t abi_mfu_hits;
uint32_t abi_mfu_ghost_hits;
uint32_t abi_l2arc_hits;
uint32_t abi_holds;
uint64_t abi_l2arc_dattr;
uint64_t abi_l2arc_asize;
enum zio_compress abi_l2arc_compress;
} arc_buf_info_t;

void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
Expand All @@ -101,6 +131,7 @@ void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
Expand Down
3 changes: 3 additions & 0 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,9 @@ void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,

void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);

void dbuf_stats_init(dbuf_hash_table_t *hash);
void dbuf_stats_destroy(void);

#define DB_DNODE(_db) ((_db)->db_dnode_handle->dnh_dnode)
#define DB_DNODE_LOCK(_db) ((_db)->db_dnode_handle->dnh_zrlock)
#define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db)))
Expand Down
1 change: 1 addition & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,7 @@ extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
* If doi is NULL, just indicates whether the object exists.
*/
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
void __dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ libzpool_la_SOURCES = \
$(top_srcdir)/module/zfs/bpobj.c \
$(top_srcdir)/module/zfs/bptree.c \
$(top_srcdir)/module/zfs/dbuf.c \
$(top_srcdir)/module/zfs/dbuf_stats.c \
$(top_srcdir)/module/zfs/ddt.c \
$(top_srcdir)/module/zfs/ddt_zap.c \
$(top_srcdir)/module/zfs/dmu.c \
Expand Down
1 change: 1 addition & 0 deletions module/zfs/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/arc.o
$(MODULE)-objs += @top_srcdir@/module/zfs/bplist.o
$(MODULE)-objs += @top_srcdir@/module/zfs/bpobj.o
$(MODULE)-objs += @top_srcdir@/module/zfs/dbuf.o
$(MODULE)-objs += @top_srcdir@/module/zfs/dbuf_stats.o
$(MODULE)-objs += @top_srcdir@/module/zfs/bptree.o
$(MODULE)-objs += @top_srcdir@/module/zfs/ddt.o
$(MODULE)-objs += @top_srcdir@/module/zfs/ddt_zap.o
Expand Down
87 changes: 86 additions & 1 deletion module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ typedef struct arc_state {
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
uint64_t arcs_size; /* total amount of data in this state */
kmutex_t arcs_mtx;
arc_state_type_t arcs_state;
} arc_state_t;

/* The 6 states: */
Expand Down Expand Up @@ -534,6 +535,11 @@ struct arc_buf_hdr {

/* updated atomically */
clock_t b_arc_access;
uint32_t b_mru_hits;
uint32_t b_mru_ghost_hits;
uint32_t b_mfu_hits;
uint32_t b_mfu_ghost_hits;
uint32_t b_l2_hits;

/* self protecting */
refcount_t b_refcnt;
Expand Down Expand Up @@ -709,7 +715,8 @@ struct l2arc_buf_hdr {
/* compression applied to buffer data */
enum zio_compress b_compress;
/* real alloc'd buffer size depending on b_compress applied */
int b_asize;
uint32_t b_asize;
uint32_t b_hits;
/* temporary buffer holder for in-flight compressed data */
void *b_tmp_cdata;
};
Expand Down Expand Up @@ -1137,6 +1144,54 @@ remove_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
return (cnt);
}

/*
* Returns detailed information about a specific arc buffer. When the
* state_index argument is set the function will calculate the arc header
* list position for its arc state. Since this requires a linear traversal
* callers are strongly encourage not to do this. However, it can be helpful
* for targeted analysis so the functionality is provided.
*/
void
arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
{
arc_buf_hdr_t *hdr = ab->b_hdr;
arc_state_t *state = hdr->b_state;

memset(abi, 0, sizeof(arc_buf_info_t));
abi->abi_flags = hdr->b_flags;
abi->abi_datacnt = hdr->b_datacnt;
abi->abi_state_type = state ? state->arcs_state : ARC_STATE_ANON;
abi->abi_state_contents = hdr->b_type;
abi->abi_state_index = -1;
abi->abi_size = hdr->b_size;
abi->abi_access = hdr->b_arc_access;
abi->abi_mru_hits = hdr->b_mru_hits;
abi->abi_mru_ghost_hits = hdr->b_mru_ghost_hits;
abi->abi_mfu_hits = hdr->b_mfu_hits;
abi->abi_mfu_ghost_hits = hdr->b_mfu_ghost_hits;
abi->abi_holds = refcount_count(&hdr->b_refcnt);

if (hdr->b_l2hdr) {
abi->abi_l2arc_dattr = hdr->b_l2hdr->b_daddr;
abi->abi_l2arc_asize = hdr->b_l2hdr->b_asize;
abi->abi_l2arc_compress = hdr->b_l2hdr->b_compress;
abi->abi_l2arc_hits = hdr->b_l2hdr->b_hits;
}

if (state && state_index && list_link_active(&hdr->b_arc_node)) {
list_t *list = &state->arcs_list[hdr->b_type];
arc_buf_hdr_t *h;

mutex_enter(&state->arcs_mtx);
for (h = list_head(list); h != NULL; h = list_next(list, h)) {
abi->abi_state_index++;
if (h == hdr)
break;
}
mutex_exit(&state->arcs_mtx);
}
}

/*
* Move the supplied buffer to the indicated state. The mutex
* for the buffer must be held by the caller.
Expand Down Expand Up @@ -1298,6 +1353,11 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
hdr->b_spa = spa_load_guid(spa);
hdr->b_state = arc_anon;
hdr->b_arc_access = 0;
hdr->b_mru_hits = 0;
hdr->b_mru_ghost_hits = 0;
hdr->b_mfu_hits = 0;
hdr->b_mfu_ghost_hits = 0;
hdr->b_l2_hits = 0;
buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
buf->b_hdr = hdr;
buf->b_data = NULL;
Expand Down Expand Up @@ -2670,6 +2730,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
ASSERT(list_link_active(&buf->b_arc_node));
} else {
buf->b_flags &= ~ARC_PREFETCH;
atomic_inc_32(&buf->b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
}
buf->b_arc_access = now;
Expand All @@ -2691,6 +2752,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
arc_change_state(arc_mfu, buf, hash_lock);
}
atomic_inc_32(&buf->b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
} else if (buf->b_state == arc_mru_ghost) {
arc_state_t *new_state;
Expand All @@ -2713,6 +2775,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
buf->b_arc_access = ddi_get_lbolt();
arc_change_state(new_state, buf, hash_lock);

atomic_inc_32(&buf->b_mru_ghost_hits);
ARCSTAT_BUMP(arcstat_mru_ghost_hits);
} else if (buf->b_state == arc_mfu) {
/*
Expand All @@ -2728,6 +2791,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
ASSERT(refcount_count(&buf->b_refcnt) == 0);
ASSERT(list_link_active(&buf->b_arc_node));
}
atomic_inc_32(&buf->b_mfu_hits);
ARCSTAT_BUMP(arcstat_mfu_hits);
buf->b_arc_access = ddi_get_lbolt();
} else if (buf->b_state == arc_mfu_ghost) {
Expand All @@ -2751,6 +2815,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
arc_change_state(new_state, buf, hash_lock);

atomic_inc_32(&buf->b_mfu_ghost_hits);
ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
} else if (buf->b_state == arc_l2c_only) {
/*
Expand Down Expand Up @@ -3134,6 +3199,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,

DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
atomic_inc_32(&hdr->b_l2hdr->b_hits);

cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
KM_PUSHPAGE);
Expand Down Expand Up @@ -3469,6 +3535,11 @@ arc_release(arc_buf_t *buf, void *tag)
nhdr->b_buf = buf;
nhdr->b_state = arc_anon;
nhdr->b_arc_access = 0;
nhdr->b_mru_hits = 0;
nhdr->b_mru_ghost_hits = 0;
nhdr->b_mfu_hits = 0;
nhdr->b_mfu_ghost_hits = 0;
nhdr->b_l2_hits = 0;
nhdr->b_flags = flags & ARC_L2_WRITING;
nhdr->b_l2hdr = NULL;
nhdr->b_datacnt = 1;
Expand All @@ -3485,6 +3556,11 @@ arc_release(arc_buf_t *buf, void *tag)
if (hdr->b_state != arc_anon)
arc_change_state(arc_anon, hdr, hash_lock);
hdr->b_arc_access = 0;
hdr->b_mru_hits = 0;
hdr->b_mru_ghost_hits = 0;
hdr->b_mfu_hits = 0;
hdr->b_mfu_ghost_hits = 0;
hdr->b_l2_hits = 0;
if (hash_lock)
mutex_exit(hash_lock);

Expand Down Expand Up @@ -3902,6 +3978,13 @@ arc_init(void)
list_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));

arc_anon->arcs_state = ARC_STATE_ANON;
arc_mru->arcs_state = ARC_STATE_MRU;
arc_mru_ghost->arcs_state = ARC_STATE_MRU_GHOST;
arc_mfu->arcs_state = ARC_STATE_MFU;
arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST;
arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY;

buf_init();

arc_thread_exit = 0;
Expand Down Expand Up @@ -4785,6 +4868,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
l2hdr->b_compress = ZIO_COMPRESS_OFF;
l2hdr->b_asize = ab->b_size;
l2hdr->b_tmp_cdata = ab->b_buf->b_data;
l2hdr->b_hits = 0;

buf_sz = ab->b_size;
ab->b_l2hdr = l2hdr;
Expand Down Expand Up @@ -5317,6 +5401,7 @@ l2arc_stop(void)
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(arc_read);
EXPORT_SYMBOL(arc_buf_remove_ref);
EXPORT_SYMBOL(arc_buf_info);
EXPORT_SYMBOL(arc_getbuf_func);
EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
Expand Down
4 changes: 4 additions & 0 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,8 @@ dbuf_init(void)

for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);

dbuf_stats_init(h);
}

void
Expand All @@ -325,6 +327,8 @@ dbuf_fini(void)
dbuf_hash_table_t *h = &dbuf_hash_table;
int i;

dbuf_stats_destroy();

for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
#if defined(_KERNEL) && defined(HAVE_SPL)
Expand Down
Loading

0 comments on commit e0b0ca9

Please sign in to comment.