Skip to content

Commit

Permalink
feat(inputs.lustre2): Add eviction_count field (#15044)
Browse files Browse the repository at this point in the history
  • Loading branch information
lukeyeager authored Mar 25, 2024
1 parent 40b88b0 commit c7466b8
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 0 deletions.
13 changes: 13 additions & 0 deletions plugins/inputs/lustre2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,25 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## An array of /proc globs to search for Lustre stats
## If not specified, the default will work on Lustre 2.12.x
##
# mgs_procfiles = [
# "/sys/fs/lustre/mgs/*/eviction_count",
# ]
# ost_procfiles = [
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
# "/proc/fs/lustre/obdfilter/*/job_stats",
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
# "/sys/fs/lustre/odbfilter/*/eviction_count",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
# "/proc/fs/lustre/mdt/*/job_stats",
# "/proc/fs/lustre/mdt/*/exports/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
# "/sys/fs/lustre/mdt/*/eviction_count",
# ]
```

Expand Down Expand Up @@ -174,6 +179,14 @@ From `/proc/fs/lustre/mdt/*/job_stats`:
- jobstats_sync
- jobstats_unlink

From `/proc/fs/lustre/*/*/eviction_count`:

- lustre2
- tags:
- name
- fields:
- evictions

## Troubleshooting

Check for the default or custom procfiles in the proc filesystem, and reference
Expand Down
68 changes: 68 additions & 0 deletions plugins/inputs/lustre2/lustre2.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type tags struct {
// Lustre proc files can change between versions, so we want to future-proof
// by letting people choose what to look at.
type Lustre2 struct {
MgsProcfiles []string `toml:"mgs_procfiles"`
OstProcfiles []string `toml:"ost_procfiles"`
MdsProcfiles []string `toml:"mds_procfiles"`

Expand Down Expand Up @@ -600,6 +601,43 @@ func (l *Lustre2) getLustreProcBrwStats(fileglob string, wantedFields []*mapping
return nil
}

func (l *Lustre2) getLustreEvictionCount(fileglob string) error {
files, err := filepath.Glob(filepath.Join(l.rootdir, fileglob))
if err != nil {
return fmt.Errorf("failed to find files matching glob %s: %w", fileglob, err)
}

for _, file := range files {
// Turn /sys/fs/lustre/*/<mgt/mdt/ost_name>/eviction_count into just the object store target name
// This assumes that the target name is always second to last, which is true in Lustre 2.1->2.12
path := strings.Split(file, "/")
if len(path) < 2 {
continue
}
name := path[len(path)-2]

contents, err := os.ReadFile(file)
if err != nil {
return fmt.Errorf("failed to read file %s: %w", file, err)
}

value, err := strconv.ParseUint(strings.TrimSpace(string(contents)), 10, 64)
if err != nil {
return fmt.Errorf("failed to parse file %s: %w", file, err)
}

tag := tags{name, "", "", "", ""}
fields, ok := l.allFields[tag]
if !ok {
fields = make(map[string]interface{})
l.allFields[tag] = fields
}

fields["evictions"] = value
}
return nil
}

// Gather reads stats from all lustre targets
func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
l.allFields = make(map[tags]map[string]interface{})
Expand All @@ -609,6 +647,13 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
return err
}

if len(l.MgsProcfiles) == 0 {
l.MgsProcfiles = []string{
// eviction count
"/sys/fs/lustre/mgs/*/eviction_count",
}
}

if len(l.OstProcfiles) == 0 {
l.OstProcfiles = []string{
// read/write bytes are in obdfilter/<ost_name>/stats
Expand All @@ -621,6 +666,8 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
"/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
// bulk read/write statistics for zfs
"/proc/fs/lustre/osd-zfs/*/brw_stats",
// eviction count
"/sys/fs/lustre/obdfilter/*/eviction_count",
}
}

Expand All @@ -630,9 +677,20 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
"/proc/fs/lustre/mdt/*/md_stats",
// Metadata target job stats
"/proc/fs/lustre/mdt/*/job_stats",
// eviction count
"/sys/fs/lustre/mdt/*/eviction_count",
}
}

for _, procfile := range l.MgsProcfiles {
if !strings.HasSuffix(procfile, "eviction_count") {
return fmt.Errorf("no handler found for mgs procfile pattern \"%s\"", procfile)
}
err := l.getLustreEvictionCount(procfile)
if err != nil {
return err
}
}
for _, procfile := range l.OstProcfiles {
if strings.HasSuffix(procfile, "brw_stats") {
err := l.getLustreProcBrwStats(procfile, wantedBrwstatsFields)
Expand All @@ -644,6 +702,11 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if err != nil {
return err
}
} else if strings.HasSuffix(procfile, "eviction_count") {
err := l.getLustreEvictionCount(procfile)
if err != nil {
return err
}
} else {
err := l.GetLustreProcStats(procfile, wantedOstFields)
if err != nil {
Expand All @@ -662,6 +725,11 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if err != nil {
return err
}
} else if strings.HasSuffix(procfile, "eviction_count") {
err := l.getLustreEvictionCount(procfile)
if err != nil {
return err
}
} else {
err := l.GetLustreProcStats(procfile, wantedMdsFields)
if err != nil {
Expand Down
45 changes: 45 additions & 0 deletions plugins/inputs/lustre2/lustre2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
package lustre2

import (
"fmt"
"os"
"path/filepath"
"testing"

"github.com/influxdata/toml"
Expand Down Expand Up @@ -570,3 +572,46 @@ func TestLustre2GeneratesBrwstatsMetrics(t *testing.T) {
}
}
}

func TestLustre2GeneratesEvictionMetrics(t *testing.T) {
rootdir, err := os.MkdirTemp("", "telegraf-lustre-evictions")
require.NoError(t, err)
defer os.RemoveAll(rootdir)

// setup files in mock sysfs
type fileEntry struct {
targetType string
targetName string
value uint64
}
fileEntries := []fileEntry{
{"mdt", "fs-MDT0000", 101},
{"mgs", "MGS", 202},
{"obdfilter", "fs-OST0001", 303},
}
for _, f := range fileEntries {
d := filepath.Join(rootdir, "sys", "fs", "lustre", f.targetType, f.targetName)
err := os.MkdirAll(d, 0750)
require.NoError(t, err)
err = os.WriteFile(filepath.Join(d, "eviction_count"), []byte(fmt.Sprintf("%d\n", f.value)), 0640)
require.NoError(t, err)
}

// gather metrics
m := &Lustre2{rootdir: rootdir}
var acc testutil.Accumulator
err = m.Gather(&acc)
require.NoError(t, err)

// compare with expectations
for _, f := range fileEntries {
acc.AssertContainsTaggedFields(
t,
"lustre2",
map[string]interface{}{
"evictions": f.value,
},
map[string]string{"name": f.targetName},
)
}
}
5 changes: 5 additions & 0 deletions plugins/inputs/lustre2/sample.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
## An array of /proc globs to search for Lustre stats
## If not specified, the default will work on Lustre 2.12.x
##
# mgs_procfiles = [
# "/sys/fs/lustre/mgs/*/eviction_count",
# ]
# ost_procfiles = [
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
# "/proc/fs/lustre/obdfilter/*/job_stats",
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
# "/sys/fs/lustre/odbfilter/*/eviction_count",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
# "/proc/fs/lustre/mdt/*/job_stats",
# "/proc/fs/lustre/mdt/*/exports/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
# "/sys/fs/lustre/mdt/*/eviction_count",
# ]

0 comments on commit c7466b8

Please sign in to comment.