Skip to content

Commit

Permalink
metricbeat: suppress error when RAID metrics are enabled on non-RAID …
Browse files Browse the repository at this point in the history
…system (#41825) (#41856)

* metricbeat: return partial metrics errror when RAID metrics are enabled on non-RAID system

When the Linux Metrics integration (beta) is installed with the RAID
metrics option enabled it causes an error if the host does not have a
RAID configuration. This error causes the Agent to go into a degraded
state.

This happens because we report not having `/sys/block/md*` devices as an
error, this only means that no RAID configuration is in place.

Instead, return a partial metrics error for this case. This still shows up in the
agent status but does not cause a degraded state.

* use partial metrics error

* fix linter issue with iface assertion

(cherry picked from commit 444b8e4)

Co-authored-by: Mauri de Souza Meneguzzo <[email protected]>
  • Loading branch information
mergify[bot] and mauri870 authored Dec 2, 2024
1 parent 0b560e7 commit 28f77b9
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
12 changes: 5 additions & 7 deletions metricbeat/module/system/raid/blockinfo/getdev.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ package blockinfo

import (
"fmt"
"io/ioutil"
"os"
"path/filepath"

"github.com/elastic/beats/v7/metricbeat/mb"
)

// ListAll lists all the multi-disk devices in a RAID array
func ListAll(path string) ([]MDDevice, error) {
dir, err := ioutil.ReadDir(path)
dir, err := os.ReadDir(path)
if err != nil {
return nil, fmt.Errorf("could not read directory: %w", err)
}
Expand All @@ -44,7 +45,7 @@ func ListAll(path string) ([]MDDevice, error) {
}

if len(mds) == 0 {
return nil, fmt.Errorf("no matches from path %s", path)
return nil, mb.PartialMetricsError{Err: fmt.Errorf("no RAID devices found. You have probably enabled the RAID metrics on a non-RAID system.")}
}

return mds, nil
Expand All @@ -69,8 +70,5 @@ func getMDDevice(path string) (MDDevice, error) {
// Right now, we're doing this by looking for an `md` directory in the device dir.
func isMD(path string) bool {
_, err := os.Stat(filepath.Join(path, "md"))
if err != nil {
return false
}
return true
return err == nil
}
7 changes: 5 additions & 2 deletions metricbeat/module/system/raid/raid.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@ type MetricSet struct {

// New creates a new instance of the raid metricset.
func New(base mb.BaseMetricSet) (mb.MetricSet, error) {
sys, ok := base.Module().(resolve.Resolver)
if !ok {
return nil, fmt.Errorf("unexpected module type: %T", base.Module())
}

sys := base.Module().(resolve.Resolver)
return &MetricSet{
BaseMetricSet: base,

Expand All @@ -62,7 +65,7 @@ func blockto1024(b int64) int64 {
func (m *MetricSet) Fetch(r mb.ReporterV2) error {
devices, err := blockinfo.ListAll(m.mod.ResolveHostFS("/sys/block"))
if err != nil {
return fmt.Errorf("failed to parse sysfs: %w", err)
return fmt.Errorf("failed to list RAID devices: %w", err)
}

for _, blockDev := range devices {
Expand Down
20 changes: 20 additions & 0 deletions metricbeat/module/system/raid/raid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@
package raid

import (
"errors"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"

"github.com/elastic/beats/v7/metricbeat/mb"
mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing"
_ "github.com/elastic/beats/v7/metricbeat/module/system"
)
Expand All @@ -46,6 +50,22 @@ func TestFetch(t *testing.T) {
events[0].BeatEvent("system", "raid").Fields.StringToPrint())
}

func TestFetchNoRAID(t *testing.T) {
// Ensure that we return partial metrics when no RAID devices are present.
tmpDir := t.TempDir()
assert.NoError(t, os.MkdirAll(filepath.Join(tmpDir, "sys/block"), 0755))
c := getConfig()
c["hostfs"] = tmpDir

f := mbtest.NewReportingMetricSetV2Error(t, c)
events, errs := mbtest.ReportingFetchV2Error(f)

assert.Len(t, errs, 1)
assert.ErrorAs(t, errors.Join(errs...), &mb.PartialMetricsError{})
assert.Contains(t, errors.Join(errs...).Error(), "failed to list RAID devices: no RAID devices found. You have probably enabled the RAID metrics on a non-RAID system.")
assert.Empty(t, events)
}

func getConfig() map[string]interface{} {
return map[string]interface{}{
"module": "system",
Expand Down

0 comments on commit 28f77b9

Please sign in to comment.