Skip to content

Commit

Permalink
Add dataset metrics to zfs input (#8383)
Browse files Browse the repository at this point in the history
  • Loading branch information
zozoh94 authored Nov 27, 2020
1 parent 42eacb3 commit ef91f96
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 10 deletions.
23 changes: 21 additions & 2 deletions plugins/inputs/zfs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

This ZFS plugin provides metrics from your ZFS filesystems. It supports ZFS on
Linux and FreeBSD. It gets ZFS stat from `/proc/spl/kstat/zfs` on Linux and
from `sysctl` and `zpool` on FreeBSD.
from `sysctl`, 'zfs' and `zpool` on FreeBSD.

### Configuration:

Expand All @@ -22,18 +22,24 @@ from `sysctl` and `zpool` on FreeBSD.

## By default, don't gather zpool stats
# poolMetrics = false

## By default, don't gather dataset stats
# datasetMetrics = false
```

### Measurements & Fields:

By default this plugin collects metrics about ZFS internals and pool.
By default this plugin collects metrics about ZFS internals pool and dataset.
These metrics are either counters or measure sizes
in bytes. These metrics will be in the `zfs` measurement with the field
names listed bellow.

If `poolMetrics` is enabled then additional metrics will be gathered for
each pool.

If `datasetMetrics` is enabled then additional metrics will be gathered for
each dataset.

- zfs
With fields listed bellow.

Expand Down Expand Up @@ -206,21 +212,34 @@ On FreeBSD:
- size (integer, bytes)
- fragmentation (integer, percent)

#### Dataset Metrics (optional, only on FreeBSD)

- zfs_dataset
- avail (integer, bytes)
- used (integer, bytes)
- usedsnap (integer, bytes
- usedds (integer, bytes)

### Tags:

- ZFS stats (`zfs`) will have the following tag:
- pools - A `::` concatenated list of all ZFS pools on the machine.
- datasets - A `::` concatenated list of all ZFS datasets on the machine.

- Pool metrics (`zfs_pool`) will have the following tag:
- pool - with the name of the pool which the metrics are for.
- health - the health status of the pool. (FreeBSD only)

- Dataset metrics (`zfs_dataset`) will have the following tag:
- dataset - with the name of the dataset which the metrics are for.

### Example Output:

```
$ ./telegraf --config telegraf.conf --input-filter zfs --test
* Plugin: zfs, Collection 1
> zfs_pool,health=ONLINE,pool=zroot allocated=1578590208i,capacity=2i,dedupratio=1,fragmentation=1i,free=64456531968i,size=66035122176i 1464473103625653908
> zfs_dataset,dataset=zata avail=10741741326336,used=8564135526400,usedsnap=0,usedds=90112
> zfs,pools=zroot arcstats_allocated=4167764i,arcstats_anon_evictable_data=0i,arcstats_anon_evictable_metadata=0i,arcstats_anon_size=16896i,arcstats_arc_meta_limit=10485760i,arcstats_arc_meta_max=115269568i,arcstats_arc_meta_min=8388608i,arcstats_arc_meta_used=51977456i,arcstats_c=16777216i,arcstats_c_max=41943040i,arcstats_c_min=16777216i,arcstats_data_size=0i,arcstats_deleted=1699340i,arcstats_demand_data_hits=14836131i,arcstats_demand_data_misses=2842945i,arcstats_demand_hit_predictive_prefetch=0i,arcstats_demand_metadata_hits=1655006i,arcstats_demand_metadata_misses=830074i,arcstats_duplicate_buffers=0i,arcstats_duplicate_buffers_size=0i,arcstats_duplicate_reads=123i,arcstats_evict_l2_cached=0i,arcstats_evict_l2_eligible=332172623872i,arcstats_evict_l2_ineligible=6168576i,arcstats_evict_l2_skip=0i,arcstats_evict_not_enough=12189444i,arcstats_evict_skip=195190764i,arcstats_hash_chain_max=2i,arcstats_hash_chains=10i,arcstats_hash_collisions=43134i,arcstats_hash_elements=2268i,arcstats_hash_elements_max=6136i,arcstats_hdr_size=565632i,arcstats_hits=16515778i,arcstats_l2_abort_lowmem=0i,arcstats_l2_asize=0i,arcstats_l2_cdata_free_on_write=0i,arcstats_l2_cksum_bad=0i,arcstats_l2_compress_failures=0i,arcstats_l2_compress_successes=0i,arcstats_l2_compress_zeros=0i,arcstats_l2_evict_l1cached=0i,arcstats_l2_evict_lock_retry=0i,arcstats_l2_evict_reading=0i,arcstats_l2_feeds=0i,arcstats_l2_free_on_write=0i,arcstats_l2_hdr_size=0i,arcstats_l2_hits=0i,arcstats_l2_io_error=0i,arcstats_l2_misses=0i,arcstats_l2_read_bytes=0i,arcstats_l2_rw_clash=0i,arcstats_l2_size=0i,arcstats_l2_write_buffer_bytes_scanned=0i,arcstats_l2_write_buffer_iter=0i,arcstats_l2_write_buffer_list_iter=0i,arcstats_l2_write_buffer_list_null_iter=0i,arcstats_l2_write_bytes=0i,arcstats_l2_write_full=0i,arcstats_l2_write_in_l2=0i,arcstats_l2_write_io_in_progress=0i,arcstats_l2_write_not_cacheable=380i,arcstats_l2_write_passed_headroom=0i,arcstats_l2_write_pios=0i,arcstats_l2_write_spa_mismatch=0i,arcstats_l2_write_trylock_fail=0i,arcstats_l2_writes_done=0i,arcstats_l2_writes_error=0i,arcstats_l2_writes_lock_retry=0i,arcstats_l2_writes_sent=0i,arcstats_memory_throttle_count=0i,arcstats_metadata_size=17014784i,arcstats_mfu_evictable_data=0i,arcstats_mfu_evictable_metadata=16384i,arcstats_mfu_ghost_evictable_data=5723648i,arcstats_mfu_ghost_evictable_metadata=10709504i,arcstats_mfu_ghost_hits=1315619i,arcstats_mfu_ghost_size=16433152i,arcstats_mfu_hits=7646611i,arcstats_mfu_size=305152i,arcstats_misses=3676993i,arcstats_mru_evictable_data=0i,arcstats_mru_evictable_metadata=0i,arcstats_mru_ghost_evictable_data=0i,arcstats_mru_ghost_evictable_metadata=80896i,arcstats_mru_ghost_hits=324250i,arcstats_mru_ghost_size=80896i,arcstats_mru_hits=8844526i,arcstats_mru_size=16693248i,arcstats_mutex_miss=354023i,arcstats_other_size=34397040i,arcstats_p=4172800i,arcstats_prefetch_data_hits=0i,arcstats_prefetch_data_misses=0i,arcstats_prefetch_metadata_hits=24641i,arcstats_prefetch_metadata_misses=3974i,arcstats_size=51977456i,arcstats_sync_wait_for_async=0i,vdev_cache_stats_delegations=779i,vdev_cache_stats_hits=323123i,vdev_cache_stats_misses=59929i,zfetchstats_hits=0i,zfetchstats_max_streams=0i,zfetchstats_misses=0i 1464473103634124908
```

Expand Down
22 changes: 16 additions & 6 deletions plugins/inputs/zfs/zfs.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
package zfs

import (
"github.com/influxdata/telegraf"
)

type Sysctl func(metric string) ([]string, error)
type Zpool func() ([]string, error)
type Zdataset func(properties []string) ([]string, error)

type Zfs struct {
KstatPath string
KstatMetrics []string
PoolMetrics bool
sysctl Sysctl
zpool Zpool
KstatPath string
KstatMetrics []string
PoolMetrics bool
DatasetMetrics bool
sysctl Sysctl
zpool Zpool
zdataset Zdataset
Log telegraf.Logger `toml:"-"`
}

var sampleConfig = `
Expand All @@ -24,12 +32,14 @@ var sampleConfig = `
# "dmu_tx", "fm", "vdev_mirror_stats", "zfetchstats", "zil"]
## By default, don't gather zpool stats
# poolMetrics = false
## By default, don't gather zdataset stats
# datasetMetrics = false
`

func (z *Zfs) SampleConfig() string {
return sampleConfig
}

func (z *Zfs) Description() string {
return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, and pools"
return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, pools and datasets"
}
55 changes: 53 additions & 2 deletions plugins/inputs/zfs/zfs_freebsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,47 @@ func (z *Zfs) gatherPoolStats(acc telegraf.Accumulator) (string, error) {
return strings.Join(pools, "::"), nil
}

func (z *Zfs) gatherDatasetStats(acc telegraf.Accumulator) (string, error) {
properties := []string{"name", "avail", "used", "usedsnap", "usedds"}

lines, err := z.zdataset(properties)
if err != nil {
return "", err
}

datasets := []string{}
for _, line := range lines {
col := strings.Split(line, "\t")

datasets = append(datasets, col[0])
}

if z.DatasetMetrics {
for _, line := range lines {
col := strings.Split(line, "\t")
if len(col) != len(properties) {
z.Log.Warnf("Invalid number of columns for line: %s", line)
continue
}

tags := map[string]string{"dataset": col[0]}
fields := map[string]interface{}{}

for i, key := range properties[1:] {
value, err := strconv.ParseInt(col[i+1], 10, 64)
if err != nil {
return "", fmt.Errorf("Error parsing %s %q: %s", key, col[i+1], err)
}
fields[key] = value
}

acc.AddFields("zfs_dataset", fields, tags)
}
}

return strings.Join(datasets, "::"), nil
}

func (z *Zfs) Gather(acc telegraf.Accumulator) error {
kstatMetrics := z.KstatMetrics
if len(kstatMetrics) == 0 {
Expand All @@ -99,6 +140,11 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error {
return err
}
tags["pools"] = poolNames
datasetNames, err := z.gatherDatasetStats(acc)
if err != nil {
return err
}
tags["datasets"] = datasetNames

fields := make(map[string]interface{})
for _, metric := range kstatMetrics {
Expand Down Expand Up @@ -137,15 +183,20 @@ func zpool() ([]string, error) {
return run("zpool", []string{"list", "-Hp", "-o", "name,health,size,alloc,free,fragmentation,capacity,dedupratio"}...)
}

func zdataset(properties []string) ([]string, error) {
return run("zfs", []string{"list", "-Hp", "-o", strings.Join(properties, ",")}...)
}

func sysctl(metric string) ([]string, error) {
return run("sysctl", []string{"-q", fmt.Sprintf("kstat.zfs.misc.%s", metric)}...)
}

func init() {
inputs.Add("zfs", func() telegraf.Input {
return &Zfs{
sysctl: sysctl,
zpool: zpool,
sysctl: sysctl,
zpool: zpool,
zdataset: zdataset,
}
})
}
54 changes: 54 additions & 0 deletions plugins/inputs/zfs/zfs_freebsd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,18 @@ func mock_zpool_unavail() ([]string, error) {
return zpool_output_unavail, nil
}

// $ zfs list -Hp -o name,avail,used,usedsnap,usedds
var zdataset_output = []string{
"zata 10741741326336 8564135526400 0 90112",
"zata/home 10741741326336 2498560 212992 2285568",
"zata/import 10741741326336 196608 81920 114688",
"zata/storage 10741741326336 8556084379648 3601138999296 4954945380352",
}

func mock_zdataset() ([]string, error) {
return zdataset_output, nil
}

// sysctl -q kstat.zfs.misc.arcstats

// sysctl -q kstat.zfs.misc.vdev_cache_stats
Expand Down Expand Up @@ -126,6 +138,39 @@ func TestZfsPoolMetrics_unavail(t *testing.T) {
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
}

func TestZfsDatasetMetrics(t *testing.T) {
var acc testutil.Accumulator

z := &Zfs{
KstatMetrics: []string{"vdev_cache_stats"},
sysctl: mock_sysctl,
zdataset: mock_zdataset,
}
err := z.Gather(&acc)
require.NoError(t, err)

require.False(t, acc.HasMeasurement("zfs_dataset"))
acc.Metrics = nil

z = &Zfs{
KstatMetrics: []string{"vdev_cache_stats"},
DatasetMetrics: true,
sysctl: mock_sysctl,
zdataset: mock_zdataset,
}
err = z.Gather(&acc)
require.NoError(t, err)

//one pool, all metrics
tags := map[string]string{
"dataset": "zata",
}

datasetMetrics := getZataDatasetMetrics()

acc.AssertContainsTaggedFields(t, "zfs_dataset", datasetMetrics, tags)
}

func TestZfsGeneratesMetrics(t *testing.T) {
var acc testutil.Accumulator

Expand Down Expand Up @@ -178,6 +223,15 @@ func getTemp2PoolMetrics() map[string]interface{} {
}
}

func getZataDatasetMetrics() map[string]interface{} {
return map[string]interface{}{
"avail": int64(10741741326336),
"used": int64(8564135526400),
"usedsnap": int64(0),
"usedds": int64(90112),
}
}

func getKstatMetricsVdevOnly() map[string]interface{} {
return map[string]interface{}{
"vdev_cache_stats_misses": int64(87789),
Expand Down

0 comments on commit ef91f96

Please sign in to comment.