From c7466b883530db50d27d548c317f13be5d1be062 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Mon, 25 Mar 2024 07:33:33 -0500 Subject: [PATCH] feat(inputs.lustre2): Add eviction_count field (#15044) --- plugins/inputs/lustre2/README.md | 13 +++++ plugins/inputs/lustre2/lustre2.go | 68 ++++++++++++++++++++++++++ plugins/inputs/lustre2/lustre2_test.go | 45 +++++++++++++++++ plugins/inputs/lustre2/sample.conf | 5 ++ 4 files changed, 131 insertions(+) diff --git a/plugins/inputs/lustre2/README.md b/plugins/inputs/lustre2/README.md index 7d5cf44b8f764..301352b970598 100644 --- a/plugins/inputs/lustre2/README.md +++ b/plugins/inputs/lustre2/README.md @@ -24,6 +24,9 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## An array of /proc globs to search for Lustre stats ## If not specified, the default will work on Lustre 2.12.x ## + # mgs_procfiles = [ + # "/sys/fs/lustre/mgs/*/eviction_count", + # ] # ost_procfiles = [ # "/proc/fs/lustre/obdfilter/*/stats", # "/proc/fs/lustre/osd-ldiskfs/*/stats", @@ -31,6 +34,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. # "/proc/fs/lustre/obdfilter/*/exports/*/stats", # "/proc/fs/lustre/osd-ldiskfs/*/brw_stats", # "/proc/fs/lustre/osd-zfs/*/brw_stats", + # "/sys/fs/lustre/odbfilter/*/eviction_count", # ] # mds_procfiles = [ # "/proc/fs/lustre/mdt/*/md_stats", @@ -38,6 +42,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. # "/proc/fs/lustre/mdt/*/exports/*/stats", # "/proc/fs/lustre/osd-ldiskfs/*/brw_stats", # "/proc/fs/lustre/osd-zfs/*/brw_stats", + # "/sys/fs/lustre/mdt/*/eviction_count", # ] ``` @@ -174,6 +179,14 @@ From `/proc/fs/lustre/mdt/*/job_stats`: - jobstats_sync - jobstats_unlink +From `/proc/fs/lustre/*/*/eviction_count`: + +- lustre2 + - tags: + - name + - fields: + - evictions + ## Troubleshooting Check for the default or custom procfiles in the proc filesystem, and reference diff --git a/plugins/inputs/lustre2/lustre2.go b/plugins/inputs/lustre2/lustre2.go index e072ba16218c9..bb8c08909a8dc 100644 --- a/plugins/inputs/lustre2/lustre2.go +++ b/plugins/inputs/lustre2/lustre2.go @@ -30,6 +30,7 @@ type tags struct { // Lustre proc files can change between versions, so we want to future-proof // by letting people choose what to look at. type Lustre2 struct { + MgsProcfiles []string `toml:"mgs_procfiles"` OstProcfiles []string `toml:"ost_procfiles"` MdsProcfiles []string `toml:"mds_procfiles"` @@ -600,6 +601,43 @@ func (l *Lustre2) getLustreProcBrwStats(fileglob string, wantedFields []*mapping return nil } +func (l *Lustre2) getLustreEvictionCount(fileglob string) error { + files, err := filepath.Glob(filepath.Join(l.rootdir, fileglob)) + if err != nil { + return fmt.Errorf("failed to find files matching glob %s: %w", fileglob, err) + } + + for _, file := range files { + // Turn /sys/fs/lustre/*//eviction_count into just the object store target name + // This assumes that the target name is always second to last, which is true in Lustre 2.1->2.12 + path := strings.Split(file, "/") + if len(path) < 2 { + continue + } + name := path[len(path)-2] + + contents, err := os.ReadFile(file) + if err != nil { + return fmt.Errorf("failed to read file %s: %w", file, err) + } + + value, err := strconv.ParseUint(strings.TrimSpace(string(contents)), 10, 64) + if err != nil { + return fmt.Errorf("failed to parse file %s: %w", file, err) + } + + tag := tags{name, "", "", "", ""} + fields, ok := l.allFields[tag] + if !ok { + fields = make(map[string]interface{}) + l.allFields[tag] = fields + } + + fields["evictions"] = value + } + return nil +} + // Gather reads stats from all lustre targets func (l *Lustre2) Gather(acc telegraf.Accumulator) error { l.allFields = make(map[tags]map[string]interface{}) @@ -609,6 +647,13 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error { return err } + if len(l.MgsProcfiles) == 0 { + l.MgsProcfiles = []string{ + // eviction count + "/sys/fs/lustre/mgs/*/eviction_count", + } + } + if len(l.OstProcfiles) == 0 { l.OstProcfiles = []string{ // read/write bytes are in obdfilter//stats @@ -621,6 +666,8 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error { "/proc/fs/lustre/osd-ldiskfs/*/brw_stats", // bulk read/write statistics for zfs "/proc/fs/lustre/osd-zfs/*/brw_stats", + // eviction count + "/sys/fs/lustre/obdfilter/*/eviction_count", } } @@ -630,9 +677,20 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error { "/proc/fs/lustre/mdt/*/md_stats", // Metadata target job stats "/proc/fs/lustre/mdt/*/job_stats", + // eviction count + "/sys/fs/lustre/mdt/*/eviction_count", } } + for _, procfile := range l.MgsProcfiles { + if !strings.HasSuffix(procfile, "eviction_count") { + return fmt.Errorf("no handler found for mgs procfile pattern \"%s\"", procfile) + } + err := l.getLustreEvictionCount(procfile) + if err != nil { + return err + } + } for _, procfile := range l.OstProcfiles { if strings.HasSuffix(procfile, "brw_stats") { err := l.getLustreProcBrwStats(procfile, wantedBrwstatsFields) @@ -644,6 +702,11 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error { if err != nil { return err } + } else if strings.HasSuffix(procfile, "eviction_count") { + err := l.getLustreEvictionCount(procfile) + if err != nil { + return err + } } else { err := l.GetLustreProcStats(procfile, wantedOstFields) if err != nil { @@ -662,6 +725,11 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error { if err != nil { return err } + } else if strings.HasSuffix(procfile, "eviction_count") { + err := l.getLustreEvictionCount(procfile) + if err != nil { + return err + } } else { err := l.GetLustreProcStats(procfile, wantedMdsFields) if err != nil { diff --git a/plugins/inputs/lustre2/lustre2_test.go b/plugins/inputs/lustre2/lustre2_test.go index 7e161d81a4d66..2d7df8bec5b0f 100644 --- a/plugins/inputs/lustre2/lustre2_test.go +++ b/plugins/inputs/lustre2/lustre2_test.go @@ -3,7 +3,9 @@ package lustre2 import ( + "fmt" "os" + "path/filepath" "testing" "github.com/influxdata/toml" @@ -570,3 +572,46 @@ func TestLustre2GeneratesBrwstatsMetrics(t *testing.T) { } } } + +func TestLustre2GeneratesEvictionMetrics(t *testing.T) { + rootdir, err := os.MkdirTemp("", "telegraf-lustre-evictions") + require.NoError(t, err) + defer os.RemoveAll(rootdir) + + // setup files in mock sysfs + type fileEntry struct { + targetType string + targetName string + value uint64 + } + fileEntries := []fileEntry{ + {"mdt", "fs-MDT0000", 101}, + {"mgs", "MGS", 202}, + {"obdfilter", "fs-OST0001", 303}, + } + for _, f := range fileEntries { + d := filepath.Join(rootdir, "sys", "fs", "lustre", f.targetType, f.targetName) + err := os.MkdirAll(d, 0750) + require.NoError(t, err) + err = os.WriteFile(filepath.Join(d, "eviction_count"), []byte(fmt.Sprintf("%d\n", f.value)), 0640) + require.NoError(t, err) + } + + // gather metrics + m := &Lustre2{rootdir: rootdir} + var acc testutil.Accumulator + err = m.Gather(&acc) + require.NoError(t, err) + + // compare with expectations + for _, f := range fileEntries { + acc.AssertContainsTaggedFields( + t, + "lustre2", + map[string]interface{}{ + "evictions": f.value, + }, + map[string]string{"name": f.targetName}, + ) + } +} diff --git a/plugins/inputs/lustre2/sample.conf b/plugins/inputs/lustre2/sample.conf index 1b244b5ed08fe..7032ebd46d243 100644 --- a/plugins/inputs/lustre2/sample.conf +++ b/plugins/inputs/lustre2/sample.conf @@ -4,6 +4,9 @@ ## An array of /proc globs to search for Lustre stats ## If not specified, the default will work on Lustre 2.12.x ## + # mgs_procfiles = [ + # "/sys/fs/lustre/mgs/*/eviction_count", + # ] # ost_procfiles = [ # "/proc/fs/lustre/obdfilter/*/stats", # "/proc/fs/lustre/osd-ldiskfs/*/stats", @@ -11,6 +14,7 @@ # "/proc/fs/lustre/obdfilter/*/exports/*/stats", # "/proc/fs/lustre/osd-ldiskfs/*/brw_stats", # "/proc/fs/lustre/osd-zfs/*/brw_stats", + # "/sys/fs/lustre/odbfilter/*/eviction_count", # ] # mds_procfiles = [ # "/proc/fs/lustre/mdt/*/md_stats", @@ -18,4 +22,5 @@ # "/proc/fs/lustre/mdt/*/exports/*/stats", # "/proc/fs/lustre/osd-ldiskfs/*/brw_stats", # "/proc/fs/lustre/osd-zfs/*/brw_stats", + # "/sys/fs/lustre/mdt/*/eviction_count", # ]