Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include zil kstats in the metric collections #2623

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 50 additions & 2 deletions plugins/inputs/zfs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ from `sysctl` and `zpool` on FreeBSD.
# kstatPath = "/proc/spl/kstat/zfs"

## By default, telegraf gather all zfs stats
## If not specified, then default is:
## If not specified, then default for Linux is:
# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats", "zil"]
## the default for FreeBSD where zil stats are unavailable is:
# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"]

## By default, don't gather zpool stats
Expand Down Expand Up @@ -174,6 +176,23 @@ each pool.
- vdev_cache_stats_hits
- vdev_cache_stats_misses

### ZIL Stats

- zil_commit_count (Linux only)
- zil_commit_writer_count (Linux only)
- zil_itx_count (Linux only)
- zil_itx_indirect_count (Linux only)
- zil_itx_indirect_bytes (Linux only)
- zil_itx_copied_count (Linux only)
- zil_itx_copied_bytes (Linux only)
- zil_itx_needcopy_count (Linux only)
- zil_itx_needcopy_bytes (Linux only)
- zil_itx_metaslab_normal_count (Linux only)
- zil_itx_metaslab_normal_bytes (Linux only)
- zil_itx_metaslab_slog_count (Linux only)
- zil_itx_metaslab_slog_bytes (Linux only)


#### Pool Metrics (optional)

On Linux:
Expand Down Expand Up @@ -217,7 +236,7 @@ On FreeBSD:
$ ./telegraf -config telegraf.conf -input-filter zfs -test
* Plugin: zfs, Collection 1
> zfs_pool,health=ONLINE,pool=zroot allocated=1578590208i,capacity=2i,dedupratio=1,fragmentation=1i,free=64456531968i,size=66035122176i 1464473103625653908
> zfs,pools=zroot arcstats_allocated=4167764i,arcstats_anon_evictable_data=0i,arcstats_anon_evictable_metadata=0i,arcstats_anon_size=16896i,arcstats_arc_meta_limit=10485760i,arcstats_arc_meta_max=115269568i,arcstats_arc_meta_min=8388608i,arcstats_arc_meta_used=51977456i,arcstats_c=16777216i,arcstats_c_max=41943040i,arcstats_c_min=16777216i,arcstats_data_size=0i,arcstats_deleted=1699340i,arcstats_demand_data_hits=14836131i,arcstats_demand_data_misses=2842945i,arcstats_demand_hit_predictive_prefetch=0i,arcstats_demand_metadata_hits=1655006i,arcstats_demand_metadata_misses=830074i,arcstats_duplicate_buffers=0i,arcstats_duplicate_buffers_size=0i,arcstats_duplicate_reads=123i,arcstats_evict_l2_cached=0i,arcstats_evict_l2_eligible=332172623872i,arcstats_evict_l2_ineligible=6168576i,arcstats_evict_l2_skip=0i,arcstats_evict_not_enough=12189444i,arcstats_evict_skip=195190764i,arcstats_hash_chain_max=2i,arcstats_hash_chains=10i,arcstats_hash_collisions=43134i,arcstats_hash_elements=2268i,arcstats_hash_elements_max=6136i,arcstats_hdr_size=565632i,arcstats_hits=16515778i,arcstats_l2_abort_lowmem=0i,arcstats_l2_asize=0i,arcstats_l2_cdata_free_on_write=0i,arcstats_l2_cksum_bad=0i,arcstats_l2_compress_failures=0i,arcstats_l2_compress_successes=0i,arcstats_l2_compress_zeros=0i,arcstats_l2_evict_l1cached=0i,arcstats_l2_evict_lock_retry=0i,arcstats_l2_evict_reading=0i,arcstats_l2_feeds=0i,arcstats_l2_free_on_write=0i,arcstats_l2_hdr_size=0i,arcstats_l2_hits=0i,arcstats_l2_io_error=0i,arcstats_l2_misses=0i,arcstats_l2_read_bytes=0i,arcstats_l2_rw_clash=0i,arcstats_l2_size=0i,arcstats_l2_write_buffer_bytes_scanned=0i,arcstats_l2_write_buffer_iter=0i,arcstats_l2_write_buffer_list_iter=0i,arcstats_l2_write_buffer_list_null_iter=0i,arcstats_l2_write_bytes=0i,arcstats_l2_write_full=0i,arcstats_l2_write_in_l2=0i,arcstats_l2_write_io_in_progress=0i,arcstats_l2_write_not_cacheable=380i,arcstats_l2_write_passed_headroom=0i,arcstats_l2_write_pios=0i,arcstats_l2_write_spa_mismatch=0i,arcstats_l2_write_trylock_fail=0i,arcstats_l2_writes_done=0i,arcstats_l2_writes_error=0i,arcstats_l2_writes_lock_retry=0i,arcstats_l2_writes_sent=0i,arcstats_memory_throttle_count=0i,arcstats_metadata_size=17014784i,arcstats_mfu_evictable_data=0i,arcstats_mfu_evictable_metadata=16384i,arcstats_mfu_ghost_evictable_data=5723648i,arcstats_mfu_ghost_evictable_metadata=10709504i,arcstats_mfu_ghost_hits=1315619i,arcstats_mfu_ghost_size=16433152i,arcstats_mfu_hits=7646611i,arcstats_mfu_size=305152i,arcstats_misses=3676993i,arcstats_mru_evictable_data=0i,arcstats_mru_evictable_metadata=0i,arcstats_mru_ghost_evictable_data=0i,arcstats_mru_ghost_evictable_metadata=80896i,arcstats_mru_ghost_hits=324250i,arcstats_mru_ghost_size=80896i,arcstats_mru_hits=8844526i,arcstats_mru_size=16693248i,arcstats_mutex_miss=354023i,arcstats_other_size=34397040i,arcstats_p=4172800i,arcstats_prefetch_data_hits=0i,arcstats_prefetch_data_misses=0i,arcstats_prefetch_metadata_hits=24641i,arcstats_prefetch_metadata_misses=3974i,arcstats_size=51977456i,arcstats_sync_wait_for_async=0i,vdev_cache_stats_delegations=779i,vdev_cache_stats_hits=323123i,vdev_cache_stats_misses=59929i,zfetchstats_hits=0i,zfetchstats_max_streams=0i,zfetchstats_misses=0i 1464473103634124908
> zfs,pools=zroot arcstats_allocated=4167764i,arcstats_anon_evictable_data=0i,arcstats_anon_evictable_metadata=0i,arcstats_anon_size=16896i,arcstats_arc_meta_limit=10485760i,arcstats_arc_meta_max=115269568i,arcstats_arc_meta_min=8388608i,arcstats_arc_meta_used=51977456i,arcstats_c=16777216i,arcstats_c_max=41943040i,arcstats_c_min=16777216i,arcstats_data_size=0i,arcstats_deleted=1699340i,arcstats_demand_data_hits=14836131i,arcstats_demand_data_misses=2842945i,arcstats_demand_hit_predictive_prefetch=0i,arcstats_demand_metadata_hits=1655006i,arcstats_demand_metadata_misses=830074i,arcstats_duplicate_buffers=0i,arcstats_duplicate_buffers_size=0i,arcstats_duplicate_reads=123i,arcstats_evict_l2_cached=0i,arcstats_evict_l2_eligible=332172623872i,arcstats_evict_l2_ineligible=6168576i,arcstats_evict_l2_skip=0i,arcstats_evict_not_enough=12189444i,arcstats_evict_skip=195190764i,arcstats_hash_chain_max=2i,arcstats_hash_chains=10i,arcstats_hash_collisions=43134i,arcstats_hash_elements=2268i,arcstats_hash_elements_max=6136i,arcstats_hdr_size=565632i,arcstats_hits=16515778i,arcstats_l2_abort_lowmem=0i,arcstats_l2_asize=0i,arcstats_l2_cdata_free_on_write=0i,arcstats_l2_cksum_bad=0i,arcstats_l2_compress_failures=0i,arcstats_l2_compress_successes=0i,arcstats_l2_compress_zeros=0i,arcstats_l2_evict_l1cached=0i,arcstats_l2_evict_lock_retry=0i,arcstats_l2_evict_reading=0i,arcstats_l2_feeds=0i,arcstats_l2_free_on_write=0i,arcstats_l2_hdr_size=0i,arcstats_l2_hits=0i,arcstats_l2_io_error=0i,arcstats_l2_misses=0i,arcstats_l2_read_bytes=0i,arcstats_l2_rw_clash=0i,arcstats_l2_size=0i,arcstats_l2_write_buffer_bytes_scanned=0i,arcstats_l2_write_buffer_iter=0i,arcstats_l2_write_buffer_list_iter=0i,arcstats_l2_write_buffer_list_null_iter=0i,arcstats_l2_write_bytes=0i,arcstats_l2_write_full=0i,arcstats_l2_write_in_l2=0i,arcstats_l2_write_io_in_progress=0i,arcstats_l2_write_not_cacheable=380i,arcstats_l2_write_passed_headroom=0i,arcstats_l2_write_pios=0i,arcstats_l2_write_spa_mismatch=0i,arcstats_l2_write_trylock_fail=0i,arcstats_l2_writes_done=0i,arcstats_l2_writes_error=0i,arcstats_l2_writes_lock_retry=0i,arcstats_l2_writes_sent=0i,arcstats_memory_throttle_count=0i,arcstats_metadata_size=17014784i,arcstats_mfu_evictable_data=0i,arcstats_mfu_evictable_metadata=16384i,arcstats_mfu_ghost_evictable_data=5723648i,arcstats_mfu_ghost_evictable_metadata=10709504i,arcstats_mfu_ghost_hits=1315619i,arcstats_mfu_ghost_size=16433152i,arcstats_mfu_hits=7646611i,arcstats_mfu_size=305152i,arcstats_misses=3676993i,arcstats_mru_evictable_data=0i,arcstats_mru_evictable_metadata=0i,arcstats_mru_ghost_evictable_data=0i,arcstats_mru_ghost_evictable_metadata=80896i,arcstats_mru_ghost_hits=324250i,arcstats_mru_ghost_size=80896i,arcstats_mru_hits=8844526i,arcstats_mru_size=16693248i,arcstats_mutex_miss=354023i,arcstats_other_size=34397040i,arcstats_p=4172800i,arcstats_prefetch_data_hits=0i,arcstats_prefetch_data_misses=0i,arcstats_prefetch_metadata_hits=24641i,arcstats_prefetch_metadata_misses=3974i,arcstats_size=51977456i,arcstats_sync_wait_for_async=0i,vdev_cache_stats_delegations=779i,vdev_cache_stats_hits=323123i,vdev_cache_stats_misses=59929i,zfetchstats_hits=0i,zfetchstats_max_streams=0i,zfetchstats_misses=0i,zil_zil_commit_count=717437i,zil_zil_commit_writer_count=717190i,zil_zil_itx_copied_bytes=2860i,zil_zil_itx_copied_count=2i,zil_zil_itx_count=1572344i,zil_zil_itx_indirect_bytes=22684004247i,zil_zil_itx_indirect_count=414712i,zil_zil_itx_metaslab_normal_bytes=0i,zil_zil_itx_metaslab_normal_count=0i,zil_zil_itx_metaslab_slog_bytes=6358474472i,zil_zil_itx_metaslab_slog_count=565887i,zil_zil_itx_needcopy_bytes=5957698527i,zil_zil_itx_needcopy_count=1077869i 1464473103634124908
```

### Description
Expand Down Expand Up @@ -292,3 +311,32 @@ A short description for some of the metrics.
`vdev_cache_stats_hits` Hits to the vdev (device level) cache.

`vdev_cache_stats_misses` Misses to the vdev (device level) cache.

#### ZIL Stats

`zil_commit_count` Number of times the ZIL has been flushed to stable storage.

`zil_commit_writer_count` Number of times the ZIL has been flushed to stable storage.

`zil_itx_count` Number of transactions (reads, writes, renames, etc.) that have been commited.

`zil_itx_indirect_count` Transaction indirect count.

`zil_itx_indirect_bytes` Transaction indirect bytes. Note that "bytes" accumulates the length of the transactions (i.e. data), not the actual log record sizes.

`zil_itx_copied_count` Transaction copied count.

`zil_itx_copied_bytes` Transaction copied bytes. Note that "bytes" accumulates the length of the transactions (i.e. data), not the actual log record sizes.

`zil_itx_needcopy_count` Transaction needcopy count.

`zil_itx_needcopy_bytes` Transaction needcopy bytes. Note that "bytes" accumulates the length of the transactions (i.e. data), not the actual log record sizes.

`zil_itx_metaslab_normal_count` Transactions which have been allocated to the "normal" (i.e. not slog) storage pool.

`zil_itx_metaslab_normal_bytes` Transactions which have been allocated to the "normal" (i.e. not slog) storage pool.

`zil_itx_metaslab_slog_count` Transactions which have been allocated to the "slog" storage pool.

`zil_itx_metaslab_slog_bytes` Transactions which have been allocated to the "slog" storage pool.

4 changes: 2 additions & 2 deletions plugins/inputs/zfs/zfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ var sampleConfig = `

## By default, telegraf gather all zfs stats
## If not specified, then default is:
# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"]
# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats", "zil"]

## By default, don't gather zpool stats
# poolMetrics = false
Expand All @@ -29,5 +29,5 @@ func (z *Zfs) SampleConfig() string {
}

func (z *Zfs) Description() string {
return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, and pools"
return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, zil, and pools"
}
2 changes: 1 addition & 1 deletion plugins/inputs/zfs/zfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
func (z *Zfs) Gather(acc telegraf.Accumulator) error {
kstatMetrics := z.KstatMetrics
if len(kstatMetrics) == 0 {
kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats"}
kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats", "zil"}
}

kstatPath := z.KstatPath
Expand Down
60 changes: 46 additions & 14 deletions plugins/inputs/zfs/zfs_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,22 @@ delegations 4 0
hits 4 0
misses 4 0
`
const zilContents = `7 1 0x01 13 624 8816177008 1273221813798477
name type data
zil_commit_count 4 726210
zil_commit_writer_count 4 725963
zil_itx_count 4 1601362
zil_itx_indirect_count 4 414878
zil_itx_indirect_bytes 4 22705275500
zil_itx_copied_count 4 2
zil_itx_copied_bytes 4 2860
zil_itx_needcopy_count 4 1106323
zil_itx_needcopy_bytes 4 6092469356
zil_itx_metaslab_normal_count 4 0
zil_itx_metaslab_normal_bytes 4 0
zil_itx_metaslab_slog_count 4 574922
zil_itx_metaslab_slog_bytes 4 6500515552
`
const pool_ioContents = `11 3 0x00 1 80 2225326830828 32953476980628
nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt
1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0
Expand Down Expand Up @@ -186,6 +202,9 @@ func TestZfsGeneratesMetrics(t *testing.T) {
err = ioutil.WriteFile(testKstatPath+"/vdev_cache_stats", []byte(vdev_cache_statsContents), 0644)
require.NoError(t, err)

err = ioutil.WriteFile(testKstatPath+"/zil", []byte(zilContents), 0644)
require.NoError(t, err)

intMetrics := getKstatMetricsAll()

var acc testutil.Accumulator
Expand Down Expand Up @@ -328,20 +347,33 @@ func getKstatMetricsArcOnly() map[string]interface{} {

func getKstatMetricsAll() map[string]interface{} {
otherMetrics := map[string]interface{}{
"zfetchstats_hits": int64(7812959060),
"zfetchstats_misses": int64(4154484207),
"zfetchstats_colinear_hits": int64(1366368),
"zfetchstats_colinear_misses": int64(4153117839),
"zfetchstats_stride_hits": int64(7309776732),
"zfetchstats_stride_misses": int64(222766182),
"zfetchstats_reclaim_successes": int64(107788388),
"zfetchstats_reclaim_failures": int64(4045329451),
"zfetchstats_streams_resets": int64(20989756),
"zfetchstats_streams_noresets": int64(503182328),
"zfetchstats_bogus_streams": int64(0),
"vdev_cache_stats_delegations": int64(0),
"vdev_cache_stats_hits": int64(0),
"vdev_cache_stats_misses": int64(0),
"zfetchstats_hits": int64(7812959060),
"zfetchstats_misses": int64(4154484207),
"zfetchstats_colinear_hits": int64(1366368),
"zfetchstats_colinear_misses": int64(4153117839),
"zfetchstats_stride_hits": int64(7309776732),
"zfetchstats_stride_misses": int64(222766182),
"zfetchstats_reclaim_successes": int64(107788388),
"zfetchstats_reclaim_failures": int64(4045329451),
"zfetchstats_streams_resets": int64(20989756),
"zfetchstats_streams_noresets": int64(503182328),
"zfetchstats_bogus_streams": int64(0),
"vdev_cache_stats_delegations": int64(0),
"vdev_cache_stats_hits": int64(0),
"vdev_cache_stats_misses": int64(0),
"zil_zil_commit_count": int64(726210),
"zil_zil_commit_writer_count": int64(725963),
"zil_zil_itx_count": int64(1601362),
"zil_zil_itx_indirect_count": int64(414878),
"zil_zil_itx_indirect_bytes": int64(22705275500),
"zil_zil_itx_copied_count": int64(2),
"zil_zil_itx_copied_bytes": int64(2860),
"zil_zil_itx_needcopy_count": int64(1106323),
"zil_zil_itx_needcopy_bytes": int64(6092469356),
"zil_zil_itx_metaslab_normal_count": int64(0),
"zil_zil_itx_metaslab_normal_bytes": int64(0),
"zil_zil_itx_metaslab_slog_count": int64(574922),
"zil_zil_itx_metaslab_slog_bytes": int64(6500515552),
}
arcMetrics := getKstatMetricsArcOnly()
for k, v := range otherMetrics {
Expand Down