From 11ce941fae5480d25c78919b26d4f2ce47384572 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:27:33 +0100 Subject: [PATCH 01/18] smart: Gather S.M.A.R.T. information from storage devices This adds a new input plugin which uses the `smartctl` utility from the smartmontools package to gather metrics from S.M.A.R.T. storage devices. Signed-off-by: Rickard von Essen --- plugins/inputs/all/all.go | 1 + plugins/inputs/smart/README.md | 76 +++++++ plugins/inputs/smart/smart.go | 235 ++++++++++++++++++++++ plugins/inputs/smart/smart_test.go | 310 +++++++++++++++++++++++++++++ 4 files changed, 622 insertions(+) create mode 100644 plugins/inputs/smart/README.md create mode 100644 plugins/inputs/smart/smart.go create mode 100644 plugins/inputs/smart/smart_test.go diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 84c320fed5d22..4bacff5eca7bd 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -73,6 +73,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/rethinkdb" _ "github.com/influxdata/telegraf/plugins/inputs/riak" _ "github.com/influxdata/telegraf/plugins/inputs/sensors" + _ "github.com/influxdata/telegraf/plugins/inputs/smart" _ "github.com/influxdata/telegraf/plugins/inputs/snmp" _ "github.com/influxdata/telegraf/plugins/inputs/snmp_legacy" _ "github.com/influxdata/telegraf/plugins/inputs/socket_listener" diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md new file mode 100644 index 0000000000000..33e3fce2b8dff --- /dev/null +++ b/plugins/inputs/smart/README.md @@ -0,0 +1,76 @@ +# Telegraf S.M.A.R.T. plugin + +Get metrics using the command line utility `smartctl` for S.M.A.R.T. (Self-Monitoring, Analysis and Reporting Technology) storage devices. SMART is a monitoring system included in computer hard disk drives (HDDs) and solid-state drives (SSDs)[1] that detects and reports on various indicators of drive reliability, with the intent of enabling the anticipation of hardware failures. +See smartmontools(https://www.smartmontools.org/). + +If no devices are specified, the plugin will scan for SMART devices via the following command: + +``` +smartctl --scan +``` + +On some platforms (e.g. Darwin/macOS) this doesn't return a useful list of devices and you must instead specify which devices to collect metrics from in the configuration file. + +Metrics will be reported from the following `smartctl` command: + +``` +smartctl --info --attributes --nocheck=standby --format=brief +``` + +## Measurements + +- smart: + + * Tags: + - `device` + - `device_model` + - `serial_no` + - `capacity` + - `enabled` + - `id` + - `name` + - `flags` + - `fail` + * Fields: + - `value` + - `worst` + - `threshold` + - `raw_value` + +### Flags + +The interpretation of the tag `flags` is: + - *K* auto-keep + - *C* event count + - *R* error rate + - *S* speed/performance + - *O* updated online + - *P* prefailure warning + +## Configuration + +```toml +# Read metrics from storage devices supporting S.M.A.R.T. +[[inputs.smart]] + ## optionally specify the path to the smartctl executable + # path = "/usr/bin/smartctl" + # + ## optionally specify devices to exclude from reporting. + # exclude = [ "/dev/pass6" ] + # + ## optionally specify devices, if unset all S.M.A.R.T. devices + ## will be included + # devices = [ "/dev/ada0" ] +``` + +## Output + +When retrieving stats from the local machine (no server specified): +``` +> smart,serial_no=WD-WMC4N0900000,id=1,name=Raw_Read_Error_Rate,flags=POSR-K,fail=-,host=example,device=/dev/ada0,device_model=WDC\ WD30EFRX-68EUZN0,capacity=3000592982016,enabled=Enabled value=200i,worst=200i,threshold=51i,raw_value=0i 1486892929000000000 +> smart,serial_no=WD-WMC4N0900000,device=/dev/ada0,device_model=WDC\ WD30EFRX-68EUZN0,capacity=3000592982016,enabled=Enabled,id=3,name=Spin_Up_Time,flags=POS--K,fail=-,host=example value=181i,worst=180i,threshold=21i,raw_value=5916i 1486892929000000000 +> smart,device_model=WDC\ WD30EFRX-68EUZN0,capacity=3000592982016,enabled=Enabled,name=Start_Stop_Count,flags=-O--CK,fail=-,device=/dev/ada0,serial_no=WD-WMC4N0900000,id=4,host=example value=100i,worst=100i,threshold=0i,raw_value=18i 1486892929000000000 +> smart,enabled=Enabled,device_model=WDC\ WD30EFRX-68EUZN0,id=5,name=Reallocated_Sector_Ct,capacity=3000592982016,device=/dev/ada0,serial_no=WD-WMC4N0900000,flags=PO--CK,fail=-,host=example value=200i,worst=200i,threshold=140i,raw_value=0i 1486892929000000000 +> smart,serial_no=WD-WMC4N0900000,capacity=3000592982016,enabled=Enabled,name=Seek_Error_Rate,host=example,device=/dev/ada0,id=7,flags=-OSR-K,fail=-,device_model=WDC\ WD30EFRX-68EUZN0 value=200i,worst=200i,threshold=0i,raw_value=0i 1486892929000000000 +> smart,flags=-O--CK,device_model=WDC\ WD30EFRX-68EUZN0,capacity=3000592982016,enabled=Enabled,id=9,name=Power_On_Hours,fail=-,host=example,device=/dev/ada0,serial_no=WD-WMC4N0900000 value=65i,worst=65i,threshold=0i,raw_value=25998i 1486892929000000000 +``` diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go new file mode 100644 index 0000000000000..20dcd3394200d --- /dev/null +++ b/plugins/inputs/smart/smart.go @@ -0,0 +1,235 @@ +package smart + +import ( + "fmt" + "os/exec" + "regexp" + "strconv" + "strings" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" +) + +var ( + execCommand = exec.Command // execCommand is used to mock commands in tests. + + deviceInScan = regexp.MustCompile("^(/dev/\\w*)\\s+.*") + // Device Model: APPLE SSD SM256E + modelInInfo = regexp.MustCompile("^Device Model:\\s+(.*)$") + // Serial Number: S0X5NZBC422720 + serialInInfo = regexp.MustCompile("^Serial Number:\\s+(.*)$") + // User Capacity: 251,000,193,024 bytes [251 GB] + usercapacityInInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$") + // SMART support is: Enabled + smartEnabledInInfo = regexp.MustCompile("^SMART support is:\\s+(\\w+)$") + + // ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE + // 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0 + // 5 Reallocated_Sector_Ct PO--CK 100 100 000 - 0 + // 192 Power-Off_Retract_Count -O--C- 097 097 000 - 14716 + attribute = regexp.MustCompile("^\\s*([0-9]+)\\s(\\S+)\\s+([-P][-O][-S][-R][-C][-K])\\s+([0-9]+)\\s+([0-9]+)\\s+([0-9]+)\\s+([-\\w]+)\\s+([\\w\\+\\.]+).*$") +) + +type Smart struct { + Path string + Excludes []string + Devices []string +} + +var sampleConfig = ` + ## optionally specify the path to the smartctl executable + # path = "/usr/bin/smartctl" + # + ## optionally specify devices to exclude from reporting. + # excludes = [ "/dev/pass6" ] + # + ## optionally specify devices, if unset a scan (smartctl --scan) + ## for S.M.A.R.T. devices will done and all found will be included. + # devices = [ "/dev/ada0" ] +` + +func (m *Smart) SampleConfig() string { + return sampleConfig +} + +func (m *Smart) Description() string { + return "Read metrics from storage devices supporting S.M.A.R.T." +} + +func (m *Smart) Gather(acc telegraf.Accumulator) error { + fmt.Printf("Config: %v\n", m) + if len(m.Path) == 0 { + return fmt.Errorf("smartctl not found: verify that smartctl is installed and that smartctl is in your PATH") + } + + devices := m.Devices + if len(devices) == 0 { + var err error + devices, err = m.scan() + if err != nil { + return err + } + } + + err := m.getAttributes(acc, devices) + if err != nil { + return err + } + + return nil +} + +// Scan for S.M.A.R.T. devices +func (m *Smart) scan() ([]string, error) { + + cmd := execCommand(m.Path, "--scan") + out, err := internal.CombinedOutputTimeout(cmd, time.Second*5) + if err != nil { + return []string{}, fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out)) + } + + devices := []string{} + for _, line := range strings.Split(string(out), "\n") { + dev := deviceInScan.FindStringSubmatch(line) + if len(dev) == 2 && !excludedDev(m.Excludes, dev[1]) { + devices = append(devices, dev[1]) + } + } + return devices, nil +} + +func excludedDev(excludes []string, device string) bool { + fmt.Printf("DEBUG: %s in %v?\n", device, excludes) + for _, exclude := range excludes { + if device == exclude { + fmt.Printf("DEBUG: filtered: %s\n", device) + return true + } + } + return false +} + +// Get info and attributes for each S.M.A.R.T. device +func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) error { + + for _, device := range devices { + cmd := execCommand(m.Path, "--info", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", device) + out, err := internal.CombinedOutputTimeout(cmd, time.Second*5) + if err != nil { + return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out)) + } + + device_tags := map[string]string{} + device_tags["device"] = device + + for _, line := range strings.Split(string(out), "\n") { + model := modelInInfo.FindStringSubmatch(line) + if len(model) > 1 { + device_tags["device_model"] = model[1] + } + + serial := serialInInfo.FindStringSubmatch(line) + if len(serial) > 1 { + device_tags["serial_no"] = serial[1] + } + + capacity := usercapacityInInfo.FindStringSubmatch(line) + if len(capacity) > 1 { + device_tags["capacity"] = strings.Replace(capacity[1], ",", "", -1) + } + + enabled := smartEnabledInInfo.FindStringSubmatch(line) + if len(enabled) > 1 { + device_tags["enabled"] = enabled[1] + } + + attr := attribute.FindStringSubmatch(line) + + if len(attr) > 1 { + tags := map[string]string{} + for k, v := range device_tags { + tags[k] = v + } + fields := make(map[string]interface{}) + + tags["id"] = attr[1] + tags["name"] = attr[2] + tags["flags"] = attr[3] + + if i, err := strconv.Atoi(attr[4]); err == nil { + fields["value"] = i + } + if i, err := strconv.Atoi(attr[5]); err == nil { + fields["worst"] = i + } + if i, err := strconv.Atoi(attr[6]); err == nil { + fields["threshold"] = i + } + + tags["fail"] = attr[7] + if val, err := parseRawValue(attr[8]); err == nil { + fields["raw_value"] = val + } + + acc.AddFields("smart", fields, tags) + } + } + } + return nil +} + +func parseRawValue(rawVal string) (int, error) { + + // Integer + if i, err := strconv.Atoi(rawVal); err == nil { + return i, nil + } + + // Duration: 65h+33m+09.259s + unit := regexp.MustCompile("^(.*)([hms])$") + parts := strings.Split(rawVal, "+") + if len(parts) == 0 { + return 0, fmt.Errorf("Couldn't parse RAW_VALUE '%s'", rawVal) + } + + duration := 0 + for _, part := range parts { + timePart := unit.FindStringSubmatch(part) + if len(timePart) == 0 { + continue + } + switch timePart[2] { + case "h": + duration += atoi(timePart[1]) * 3600 + case "m": + duration += atoi(timePart[1]) * 60 + case "s": + // drop fractions of seconds + duration += atoi(strings.Split(timePart[1], ".")[0]) + default: + // Unknown, ignore + } + } + return duration, nil +} + +func atoi(str string) int { + if i, err := strconv.Atoi(str); err == nil { + return i + } + return 0 +} + +func init() { + m := Smart{} + path, _ := exec.LookPath("smartctl") + if len(path) > 0 { + m.Path = path + } + inputs.Add("smart", func() telegraf.Input { + return &m + }) +} diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go new file mode 100644 index 0000000000000..68c6e85dac051 --- /dev/null +++ b/plugins/inputs/smart/smart_test.go @@ -0,0 +1,310 @@ +package smart + +import ( + "fmt" + "os" + "os/exec" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + mockScanData = `/dev/ada0 -d atacam # /dev/ada0, ATA device +` + mockInfoAttributeData = `smartctl 6.5 2016-05-07 r4318 [Darwin 16.4.0 x86_64] (local build) +Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org + +CHECK POWER MODE not implemented, ignoring -n option +=== START OF INFORMATION SECTION === +Model Family: Apple SD/SM/TS...E/F SSDs +Device Model: APPLE SSD SM256E +Serial Number: S0X5NZBC422720 +LU WWN Device Id: 5 002538 043584d30 +Firmware Version: CXM09A1Q +User Capacity: 251,000,193,024 bytes [251 GB] +Sector Sizes: 512 bytes logical, 4096 bytes physical +Rotation Rate: Solid State Device +Device is: In smartctl database [for details use: -P show] +ATA Version is: ATA8-ACS T13/1699-D revision 4c +SATA Version is: SATA 3.0, 6.0 Gb/s (current: 6.0 Gb/s) +Local Time is: Thu Feb 9 16:48:45 2017 CET +SMART support is: Available - device has SMART capability. +SMART support is: Enabled + +=== START OF READ SMART DATA SECTION === +SMART Attributes Data Structure revision number: 1 +Vendor Specific SMART Attributes with Thresholds: +ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE + 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0 + 5 Reallocated_Sector_Ct PO--CK 100 100 000 - 0 + 9 Power_On_Hours -O--CK 099 099 000 - 2988 + 12 Power_Cycle_Count -O--CK 085 085 000 - 14879 +169 Unknown_Attribute PO--C- 253 253 010 - 2044932921600 +173 Wear_Leveling_Count -O--CK 185 185 100 - 957808640337 +190 Airflow_Temperature_Cel -O---K 055 040 045 Past 45 (Min/Max 43/57 #2689) +192 Power-Off_Retract_Count -O--C- 097 097 000 - 14716 +194 Temperature_Celsius -O---K 066 021 000 - 34 (Min/Max 14/79) +197 Current_Pending_Sector -O---K 100 100 000 - 0 +199 UDMA_CRC_Error_Count -O-RC- 200 200 000 - 0 +240 Head_Flying_Hours ------ 100 253 000 - 6585h+55m+23.234s + ||||||_ K auto-keep + |||||__ C event count + ||||___ R error rate + |||____ S speed/performance + ||_____ O updated online + |______ P prefailure warning +` +) + +func TestGather(t *testing.T) { + s := &Smart{ + path: "smartctl", + } + // overwriting exec commands with mock commands + execCommand = fakeExecCommand + var acc testutil.Accumulator + + err := s.Gather(&acc) + + require.NoError(t, err) + assert.Equal(t, 48, acc.NFields(), "Wrong number of fields gathered") + + device_tags := map[string]string{ + "device": "/dev/ada0", + "device_model": "APPLE SSD SM256E", + "serial_no": "S0X5NZBC422720", + "enabled": "Enabled", + "capacity": "251000193024", + } + + var testsAda0Device = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "value": int(200), + "worst": int(200), + "threshold": int(0), + "raw_value": int(0), + }, + map[string]string{ + "id": "1", + "name": "Raw_Read_Error_Rate", + "flags": "-O-RC-", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(100), + "worst": int(100), + "threshold": int(0), + "raw_value": int(0), + }, + map[string]string{ + "id": "5", + "name": "Reallocated_Sector_Ct", + "flags": "PO--CK", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(99), + "worst": int(99), + "threshold": int(0), + "raw_value": int(2988), + }, + map[string]string{ + "id": "9", + "name": "Power_On_Hours", + "flags": "-O--CK", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(85), + "worst": int(85), + "threshold": int(0), + "raw_value": int(14879), + }, + map[string]string{ + "id": "12", + "name": "Power_Cycle_Count", + "flags": "-O--CK", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(253), + "worst": int(253), + "threshold": int(10), + "raw_value": int(2044932921600), + }, + map[string]string{ + "id": "169", + "name": "Unknown_Attribute", + "flags": "PO--C-", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(185), + "worst": int(185), + "threshold": int(100), + "raw_value": int(957808640337), + }, + map[string]string{ + "id": "173", + "name": "Wear_Leveling_Count", + "flags": "-O--CK", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(55), + "worst": int(40), + "threshold": int(45), + "raw_value": int(45), + }, + map[string]string{ + "id": "190", + "name": "Airflow_Temperature_Cel", + "flags": "-O---K", + "fail": "Past", + }, + }, + { + map[string]interface{}{ + "value": int(97), + "worst": int(97), + "threshold": int(0), + "raw_value": int(14716), + }, + map[string]string{ + "id": "192", + "name": "Power-Off_Retract_Count", + "flags": "-O--C-", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(66), + "worst": int(21), + "threshold": int(0), + "raw_value": int(34), + }, + map[string]string{ + "id": "194", + "name": "Temperature_Celsius", + "flags": "-O---K", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(100), + "worst": int(100), + "threshold": int(0), + "raw_value": int(0), + }, + map[string]string{ + "id": "197", + "name": "Current_Pending_Sector", + "flags": "-O---K", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(200), + "worst": int(200), + "threshold": int(0), + "raw_value": int(0), + }, + map[string]string{ + "id": "199", + "name": "UDMA_CRC_Error_Count", + "flags": "-O-RC-", + "fail": "-", + }, + }, + { + map[string]interface{}{ + "value": int(100), + "worst": int(253), + "threshold": int(0), + "raw_value": int(23709323), + }, + map[string]string{ + "id": "240", + "name": "Head_Flying_Hours", + "flags": "------", + "fail": "-", + }, + }, + } + + for _, test := range testsAda0Device { + for k, v := range device_tags { + test.tags[k] = v + } + + acc.AssertContainsTaggedFields(t, "smart", test.fields, test.tags) + } +} + +func TestExcludedDev(t *testing.T) { + assert.Equal(t, true, excludedDev([]string{"/dev/pass6"}, "/dev/pass6"), "Should be excluded.") + assert.Equal(t, false, excludedDev([]string{}, "/dev/pass6"), "Shouldn't be excluded.") + assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1"), "Shouldn't be excluded.") + +} + +// fackeExecCommand is a helper function that mock +// the exec.Command call (and call the test binary) +func fakeExecCommand(command string, args ...string) *exec.Cmd { + cs := []string{"-test.run=TestHelperProcess", "--", command} + cs = append(cs, args...) + cmd := exec.Command(os.Args[0], cs...) + cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"} + return cmd +} + +// TestHelperProcess isn't a real test. It's used to mock exec.Command +// For example, if you run: +// GO_WANT_HELPER_PROCESS=1 go test -test.run=TestHelperProcess -- --scan +// it returns below mockScanData. +func TestHelperProcess(t *testing.T) { + if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { + return + } + + args := os.Args + + // Previous arguments are tests stuff, that looks like : + // /tmp/go-build970079519/…/_test/integration.test -test.run=TestHelperProcess -- + cmd, arg1, args := args[3], args[4], args[5:] + + if cmd == "smartctl" { + if arg1 == "--scan" { + fmt.Fprint(os.Stdout, mockScanData) + } + if arg1 == "--info" { + fmt.Fprint(os.Stdout, mockInfoAttributeData) + } + } else { + fmt.Fprint(os.Stdout, "command not found") + os.Exit(1) + } + os.Exit(0) +} From a71cb5fec17c180672b9ca4d1d1425fa7b94dcf7 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:28:01 +0100 Subject: [PATCH 02/18] Added ability to specify device type --- plugins/inputs/smart/smart.go | 36 +++++++++++++++++------------- plugins/inputs/smart/smart_test.go | 8 +++---- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 20dcd3394200d..8840e2d955d72 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -16,7 +16,6 @@ import ( var ( execCommand = exec.Command // execCommand is used to mock commands in tests. - deviceInScan = regexp.MustCompile("^(/dev/\\w*)\\s+.*") // Device Model: APPLE SSD SM256E modelInInfo = regexp.MustCompile("^Device Model:\\s+(.*)$") // Serial Number: S0X5NZBC422720 @@ -46,9 +45,11 @@ var sampleConfig = ` ## optionally specify devices to exclude from reporting. # excludes = [ "/dev/pass6" ] # - ## optionally specify devices, if unset a scan (smartctl --scan) - ## for S.M.A.R.T. devices will done and all found will be included. - # devices = [ "/dev/ada0" ] + ## optionally specify devices and device type, if unset + ## a scan (smartctl --scan) for S.M.A.R.T. devices will + ## done and all found will be included except for the + ## excluded in excludes. + # devices = [ "/dev/ada0 -d atacam" ] ` func (m *Smart) SampleConfig() string { @@ -93,20 +94,23 @@ func (m *Smart) scan() ([]string, error) { devices := []string{} for _, line := range strings.Split(string(out), "\n") { - dev := deviceInScan.FindStringSubmatch(line) - if len(dev) == 2 && !excludedDev(m.Excludes, dev[1]) { - devices = append(devices, dev[1]) + dev := strings.Split(line, "#") + if len(dev) > 1 && !excludedDev(m.Excludes, strings.TrimSpace(dev[0])) { + devices = append(devices, strings.TrimSpace(dev[0])) } } return devices, nil } -func excludedDev(excludes []string, device string) bool { - fmt.Printf("DEBUG: %s in %v?\n", device, excludes) - for _, exclude := range excludes { - if device == exclude { - fmt.Printf("DEBUG: filtered: %s\n", device) - return true +func excludedDev(excludes []string, deviceLine string) bool { + fmt.Printf("DEBUG: %s in %v?\n", deviceLine, excludes) + device := strings.Split(deviceLine, " ") + if len(device) != 0 { + for _, exclude := range excludes { + if device[0] == exclude { + fmt.Printf("DEBUG: filtered: %s\n", device[0]) + return true + } } } return false @@ -116,14 +120,16 @@ func excludedDev(excludes []string, device string) bool { func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) error { for _, device := range devices { - cmd := execCommand(m.Path, "--info", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", device) + args := []string{"--info", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief"} + args = append(args, strings.Split(device, " ")...) + cmd := execCommand(m.Path, args...) out, err := internal.CombinedOutputTimeout(cmd, time.Second*5) if err != nil { return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out)) } device_tags := map[string]string{} - device_tags["device"] = device + device_tags["device"] = strings.Split(device, " ")[0] for _, line := range strings.Split(string(out), "\n") { model := modelInInfo.FindStringSubmatch(line) diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 68c6e85dac051..ef919aeb4432d 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -61,7 +61,7 @@ ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE func TestGather(t *testing.T) { s := &Smart{ - path: "smartctl", + Path: "smartctl", } // overwriting exec commands with mock commands execCommand = fakeExecCommand @@ -264,9 +264,9 @@ func TestGather(t *testing.T) { } func TestExcludedDev(t *testing.T) { - assert.Equal(t, true, excludedDev([]string{"/dev/pass6"}, "/dev/pass6"), "Should be excluded.") - assert.Equal(t, false, excludedDev([]string{}, "/dev/pass6"), "Shouldn't be excluded.") - assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1"), "Shouldn't be excluded.") + assert.Equal(t, true, excludedDev([]string{"/dev/pass6"}, "/dev/pass6 -d atacam"), "Should be excluded.") + assert.Equal(t, false, excludedDev([]string{}, "/dev/pass6 -d atacam"), "Shouldn't be excluded.") + assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1 -d atacam"), "Shouldn't be excluded.") } From ea0da088b45494e3ef13e380a5c8c8bdbaad0c22 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:28:39 +0100 Subject: [PATCH 03/18] Added overall smart health --- plugins/inputs/smart/smart.go | 9 ++++++++- plugins/inputs/smart/smart_test.go | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 8840e2d955d72..f6157d65e5314 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -24,6 +24,8 @@ var ( usercapacityInInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$") // SMART support is: Enabled smartEnabledInInfo = regexp.MustCompile("^SMART support is:\\s+(\\w+)$") + // SMART overall-health self-assessment test result: PASSED + smartOverallHealth = regexp.MustCompile("^SMART overall-health self-assessment test result:\\s+(\\w+).*$") // ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE // 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0 @@ -120,7 +122,7 @@ func excludedDev(excludes []string, deviceLine string) bool { func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) error { for _, device := range devices { - args := []string{"--info", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief"} + args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief"} args = append(args, strings.Split(device, " ")...) cmd := execCommand(m.Path, args...) out, err := internal.CombinedOutputTimeout(cmd, time.Second*5) @@ -152,6 +154,11 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) error device_tags["enabled"] = enabled[1] } + health := smartOverallHealth.FindStringSubmatch(line) + if len(health) > 1 { + device_tags["health"] = health[1] + } + attr := attribute.FindStringSubmatch(line) if len(attr) > 1 { diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index ef919aeb4432d..83ecebb7271b0 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -34,6 +34,9 @@ Local Time is: Thu Feb 9 16:48:45 2017 CET SMART support is: Available - device has SMART capability. SMART support is: Enabled +=== START OF READ SMART DATA SECTION === +SMART overall-health self-assessment test result: PASSED + === START OF READ SMART DATA SECTION === SMART Attributes Data Structure revision number: 1 Vendor Specific SMART Attributes with Thresholds: @@ -78,6 +81,7 @@ func TestGather(t *testing.T) { "serial_no": "S0X5NZBC422720", "enabled": "Enabled", "capacity": "251000193024", + "health": "PASSED", } var testsAda0Device = []struct { From d73b6d6983309b947ef58df7220ec40889babc0c Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:28:47 +0100 Subject: [PATCH 04/18] Gather disk stats concurrenly --- plugins/inputs/smart/smart.go | 143 ++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 60 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index f6157d65e5314..58400d2017473 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -1,6 +1,7 @@ package smart import ( + "errors" "fmt" "os/exec" "regexp" @@ -77,9 +78,13 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error { } } - err := m.getAttributes(acc, devices) - if err != nil { - return err + errs := m.getAttributes(acc, devices) + if len(errs) > 0 { + var errStrs []string + for _, e := range errs { + errStrs = append(errStrs, e.Error()) + } + return errors.New(strings.Join(errStrs, ", ")) } return nil @@ -119,79 +124,97 @@ func excludedDev(excludes []string, deviceLine string) bool { } // Get info and attributes for each S.M.A.R.T. device -func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) error { +func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) []error { + errchan := make(chan error) for _, device := range devices { - args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief"} - args = append(args, strings.Split(device, " ")...) - cmd := execCommand(m.Path, args...) - out, err := internal.CombinedOutputTimeout(cmd, time.Second*5) + go gatherDisk(acc, m.Path, device, errchan) + } + + var errors []error + for i := 0; i < len(devices); i++ { + err := <-errchan if err != nil { - return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out)) + errors = append(errors, err) } + } - device_tags := map[string]string{} - device_tags["device"] = strings.Split(device, " ")[0] + return errors +} - for _, line := range strings.Split(string(out), "\n") { - model := modelInInfo.FindStringSubmatch(line) - if len(model) > 1 { - device_tags["device_model"] = model[1] - } +func gatherDisk(acc telegraf.Accumulator, path, device string, err chan error) { - serial := serialInInfo.FindStringSubmatch(line) - if len(serial) > 1 { - device_tags["serial_no"] = serial[1] - } + args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief"} + args = append(args, strings.Split(device, " ")...) + cmd := execCommand(path, args...) + out, e := internal.CombinedOutputTimeout(cmd, time.Second*5) + if e != nil { - capacity := usercapacityInInfo.FindStringSubmatch(line) - if len(capacity) > 1 { - device_tags["capacity"] = strings.Replace(capacity[1], ",", "", -1) - } + err <- fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), e, string(out)) + return + } + + device_tags := map[string]string{} + device_tags["device"] = strings.Split(device, " ")[0] + + for _, line := range strings.Split(string(out), "\n") { + model := modelInInfo.FindStringSubmatch(line) + if len(model) > 1 { + device_tags["device_model"] = model[1] + } - enabled := smartEnabledInInfo.FindStringSubmatch(line) - if len(enabled) > 1 { - device_tags["enabled"] = enabled[1] + serial := serialInInfo.FindStringSubmatch(line) + if len(serial) > 1 { + device_tags["serial_no"] = serial[1] + } + + capacity := usercapacityInInfo.FindStringSubmatch(line) + if len(capacity) > 1 { + device_tags["capacity"] = strings.Replace(capacity[1], ",", "", -1) + } + + enabled := smartEnabledInInfo.FindStringSubmatch(line) + if len(enabled) > 1 { + device_tags["enabled"] = enabled[1] + } + + health := smartOverallHealth.FindStringSubmatch(line) + if len(health) > 1 { + device_tags["health"] = health[1] + } + + attr := attribute.FindStringSubmatch(line) + + if len(attr) > 1 { + tags := map[string]string{} + for k, v := range device_tags { + tags[k] = v } + fields := make(map[string]interface{}) - health := smartOverallHealth.FindStringSubmatch(line) - if len(health) > 1 { - device_tags["health"] = health[1] + tags["id"] = attr[1] + tags["name"] = attr[2] + tags["flags"] = attr[3] + + if i, err := strconv.Atoi(attr[4]); err == nil { + fields["value"] = i + } + if i, err := strconv.Atoi(attr[5]); err == nil { + fields["worst"] = i + } + if i, err := strconv.Atoi(attr[6]); err == nil { + fields["threshold"] = i } - attr := attribute.FindStringSubmatch(line) - - if len(attr) > 1 { - tags := map[string]string{} - for k, v := range device_tags { - tags[k] = v - } - fields := make(map[string]interface{}) - - tags["id"] = attr[1] - tags["name"] = attr[2] - tags["flags"] = attr[3] - - if i, err := strconv.Atoi(attr[4]); err == nil { - fields["value"] = i - } - if i, err := strconv.Atoi(attr[5]); err == nil { - fields["worst"] = i - } - if i, err := strconv.Atoi(attr[6]); err == nil { - fields["threshold"] = i - } - - tags["fail"] = attr[7] - if val, err := parseRawValue(attr[8]); err == nil { - fields["raw_value"] = val - } - - acc.AddFields("smart", fields, tags) + tags["fail"] = attr[7] + if val, err := parseRawValue(attr[8]); err == nil { + fields["raw_value"] = val } + + acc.AddFields("smart", fields, tags) } } - return nil + err <- nil } func parseRawValue(rawVal string) (int, error) { From 0ccdfedf60e482e1f993fbb9b543f27e785f2438 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:28:55 +0100 Subject: [PATCH 05/18] Properly handle exitcodes for sleeping and failing disks etc. --- plugins/inputs/smart/smart.go | 24 +++++-- plugins/inputs/smart/smart_test.go | 110 ++++++++++++++++------------- 2 files changed, 81 insertions(+), 53 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 58400d2017473..dfd0e3a92d1c5 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -7,6 +7,7 @@ import ( "regexp" "strconv" "strings" + "syscall" "time" "github.com/influxdata/telegraf" @@ -64,7 +65,6 @@ func (m *Smart) Description() string { } func (m *Smart) Gather(acc telegraf.Accumulator) error { - fmt.Printf("Config: %v\n", m) if len(m.Path) == 0 { return fmt.Errorf("smartctl not found: verify that smartctl is installed and that smartctl is in your PATH") } @@ -142,22 +142,37 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) []erro return errors } +// Command line parse errors are denoted by the exit code having the 0 bit set. +// All other errors are drive/communication errors and should be ignored. +func exitStatus(err error) (int, error) { + if exiterr, ok := err.(*exec.ExitError); ok { + if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { + return status.ExitStatus(), nil + } + } + return 0, err +} + func gatherDisk(acc telegraf.Accumulator, path, device string, err chan error) { args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief"} args = append(args, strings.Split(device, " ")...) cmd := execCommand(path, args...) out, e := internal.CombinedOutputTimeout(cmd, time.Second*5) - if e != nil { + outStr := string(out) - err <- fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), e, string(out)) + // Ignore all exit statuses except if it is a command line parse error + exitStatus, er := exitStatus(e) + if er != nil { + err <- fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), e, outStr) return } device_tags := map[string]string{} device_tags["device"] = strings.Split(device, " ")[0] - for _, line := range strings.Split(string(out), "\n") { + for _, line := range strings.Split(outStr, "\n") { + model := modelInInfo.FindStringSubmatch(line) if len(model) > 1 { device_tags["device_model"] = model[1] @@ -196,6 +211,7 @@ func gatherDisk(acc telegraf.Accumulator, path, device string, err chan error) { tags["name"] = attr[2] tags["flags"] = attr[3] + fields["exit_status"] = exitStatus if i, err := strconv.Atoi(attr[4]); err == nil { fields["value"] = i } diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 83ecebb7271b0..5b7faaefadf73 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -73,7 +73,7 @@ func TestGather(t *testing.T) { err := s.Gather(&acc) require.NoError(t, err) - assert.Equal(t, 48, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, 60, acc.NFields(), "Wrong number of fields gathered") device_tags := map[string]string{ "device": "/dev/ada0", @@ -90,10 +90,11 @@ func TestGather(t *testing.T) { }{ { map[string]interface{}{ - "value": int(200), - "worst": int(200), - "threshold": int(0), - "raw_value": int(0), + "value": int(200), + "worst": int(200), + "threshold": int(0), + "raw_value": int(0), + "exit_status": int(0), }, map[string]string{ "id": "1", @@ -104,10 +105,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(100), - "worst": int(100), - "threshold": int(0), - "raw_value": int(0), + "value": int(100), + "worst": int(100), + "threshold": int(0), + "raw_value": int(0), + "exit_status": int(0), }, map[string]string{ "id": "5", @@ -118,10 +120,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(99), - "worst": int(99), - "threshold": int(0), - "raw_value": int(2988), + "value": int(99), + "worst": int(99), + "threshold": int(0), + "raw_value": int(2988), + "exit_status": int(0), }, map[string]string{ "id": "9", @@ -132,10 +135,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(85), - "worst": int(85), - "threshold": int(0), - "raw_value": int(14879), + "value": int(85), + "worst": int(85), + "threshold": int(0), + "raw_value": int(14879), + "exit_status": int(0), }, map[string]string{ "id": "12", @@ -146,10 +150,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(253), - "worst": int(253), - "threshold": int(10), - "raw_value": int(2044932921600), + "value": int(253), + "worst": int(253), + "threshold": int(10), + "raw_value": int(2044932921600), + "exit_status": int(0), }, map[string]string{ "id": "169", @@ -160,10 +165,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(185), - "worst": int(185), - "threshold": int(100), - "raw_value": int(957808640337), + "value": int(185), + "worst": int(185), + "threshold": int(100), + "raw_value": int(957808640337), + "exit_status": int(0), }, map[string]string{ "id": "173", @@ -174,10 +180,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(55), - "worst": int(40), - "threshold": int(45), - "raw_value": int(45), + "value": int(55), + "worst": int(40), + "threshold": int(45), + "raw_value": int(45), + "exit_status": int(0), }, map[string]string{ "id": "190", @@ -188,10 +195,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(97), - "worst": int(97), - "threshold": int(0), - "raw_value": int(14716), + "value": int(97), + "worst": int(97), + "threshold": int(0), + "raw_value": int(14716), + "exit_status": int(0), }, map[string]string{ "id": "192", @@ -202,10 +210,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(66), - "worst": int(21), - "threshold": int(0), - "raw_value": int(34), + "value": int(66), + "worst": int(21), + "threshold": int(0), + "raw_value": int(34), + "exit_status": int(0), }, map[string]string{ "id": "194", @@ -216,10 +225,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(100), - "worst": int(100), - "threshold": int(0), - "raw_value": int(0), + "value": int(100), + "worst": int(100), + "threshold": int(0), + "raw_value": int(0), + "exit_status": int(0), }, map[string]string{ "id": "197", @@ -230,10 +240,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(200), - "worst": int(200), - "threshold": int(0), - "raw_value": int(0), + "value": int(200), + "worst": int(200), + "threshold": int(0), + "raw_value": int(0), + "exit_status": int(0), }, map[string]string{ "id": "199", @@ -244,10 +255,11 @@ func TestGather(t *testing.T) { }, { map[string]interface{}{ - "value": int(100), - "worst": int(253), - "threshold": int(0), - "raw_value": int(23709323), + "value": int(100), + "worst": int(253), + "threshold": int(0), + "raw_value": int(23709323), + "exit_status": int(0), }, map[string]string{ "id": "240", From 73d66b22325d2b6dbdd13e399f9ebea0028dfdb2 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:29:02 +0100 Subject: [PATCH 06/18] Removed debug prints --- plugins/inputs/smart/smart.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index dfd0e3a92d1c5..a4991e6307812 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -110,12 +110,10 @@ func (m *Smart) scan() ([]string, error) { } func excludedDev(excludes []string, deviceLine string) bool { - fmt.Printf("DEBUG: %s in %v?\n", deviceLine, excludes) device := strings.Split(deviceLine, " ") if len(device) != 0 { for _, exclude := range excludes { if device[0] == exclude { - fmt.Printf("DEBUG: filtered: %s\n", device[0]) return true } } From 2c02b6cd44a42a4c04c3ecae858f2e6ee7680da4 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:29:16 +0100 Subject: [PATCH 07/18] Split metrics into smart_device and smart_attribute --- plugins/inputs/smart/smart.go | 10 +- plugins/inputs/smart/smart_test.go | 153 +++++++++++++++++------------ 2 files changed, 95 insertions(+), 68 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index a4991e6307812..857645758104c 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -168,6 +168,8 @@ func gatherDisk(acc telegraf.Accumulator, path, device string, err chan error) { device_tags := map[string]string{} device_tags["device"] = strings.Split(device, " ")[0] + device_fields := make(map[string]interface{}) + device_fields["exit_status"] = exitStatus for _, line := range strings.Split(outStr, "\n") { @@ -200,11 +202,9 @@ func gatherDisk(acc telegraf.Accumulator, path, device string, err chan error) { if len(attr) > 1 { tags := map[string]string{} - for k, v := range device_tags { - tags[k] = v - } fields := make(map[string]interface{}) + tags["device"] = strings.Split(device, " ")[0] tags["id"] = attr[1] tags["name"] = attr[2] tags["flags"] = attr[3] @@ -225,9 +225,11 @@ func gatherDisk(acc telegraf.Accumulator, path, device string, err chan error) { fields["raw_value"] = val } - acc.AddFields("smart", fields, tags) + acc.AddFields("smart_attribute", fields, tags) } } + acc.AddFields("smart_device", device_fields, device_tags) + err <- nil } diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 5b7faaefadf73..1d2afc4d84496 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -73,18 +73,9 @@ func TestGather(t *testing.T) { err := s.Gather(&acc) require.NoError(t, err) - assert.Equal(t, 60, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, 61, acc.NFields(), "Wrong number of fields gathered") - device_tags := map[string]string{ - "device": "/dev/ada0", - "device_model": "APPLE SSD SM256E", - "serial_no": "S0X5NZBC422720", - "enabled": "Enabled", - "capacity": "251000193024", - "health": "PASSED", - } - - var testsAda0Device = []struct { + var testsAda0Attributes = []struct { fields map[string]interface{} tags map[string]string }{ @@ -97,10 +88,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "1", - "name": "Raw_Read_Error_Rate", - "flags": "-O-RC-", - "fail": "-", + "device": "/dev/ada0", + "id": "1", + "name": "Raw_Read_Error_Rate", + "flags": "-O-RC-", + "fail": "-", }, }, { @@ -112,10 +104,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "5", - "name": "Reallocated_Sector_Ct", - "flags": "PO--CK", - "fail": "-", + "device": "/dev/ada0", + "id": "5", + "name": "Reallocated_Sector_Ct", + "flags": "PO--CK", + "fail": "-", }, }, { @@ -127,10 +120,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "9", - "name": "Power_On_Hours", - "flags": "-O--CK", - "fail": "-", + "device": "/dev/ada0", + "id": "9", + "name": "Power_On_Hours", + "flags": "-O--CK", + "fail": "-", }, }, { @@ -142,10 +136,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "12", - "name": "Power_Cycle_Count", - "flags": "-O--CK", - "fail": "-", + "device": "/dev/ada0", + "id": "12", + "name": "Power_Cycle_Count", + "flags": "-O--CK", + "fail": "-", }, }, { @@ -157,10 +152,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "169", - "name": "Unknown_Attribute", - "flags": "PO--C-", - "fail": "-", + "device": "/dev/ada0", + "id": "169", + "name": "Unknown_Attribute", + "flags": "PO--C-", + "fail": "-", }, }, { @@ -172,10 +168,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "173", - "name": "Wear_Leveling_Count", - "flags": "-O--CK", - "fail": "-", + "device": "/dev/ada0", + "id": "173", + "name": "Wear_Leveling_Count", + "flags": "-O--CK", + "fail": "-", }, }, { @@ -187,10 +184,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "190", - "name": "Airflow_Temperature_Cel", - "flags": "-O---K", - "fail": "Past", + "device": "/dev/ada0", + "id": "190", + "name": "Airflow_Temperature_Cel", + "flags": "-O---K", + "fail": "Past", }, }, { @@ -202,10 +200,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "192", - "name": "Power-Off_Retract_Count", - "flags": "-O--C-", - "fail": "-", + "device": "/dev/ada0", + "id": "192", + "name": "Power-Off_Retract_Count", + "flags": "-O--C-", + "fail": "-", }, }, { @@ -217,10 +216,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "194", - "name": "Temperature_Celsius", - "flags": "-O---K", - "fail": "-", + "device": "/dev/ada0", + "id": "194", + "name": "Temperature_Celsius", + "flags": "-O---K", + "fail": "-", }, }, { @@ -232,10 +232,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "197", - "name": "Current_Pending_Sector", - "flags": "-O---K", - "fail": "-", + "device": "/dev/ada0", + "id": "197", + "name": "Current_Pending_Sector", + "flags": "-O---K", + "fail": "-", }, }, { @@ -247,10 +248,11 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "199", - "name": "UDMA_CRC_Error_Count", - "flags": "-O-RC-", - "fail": "-", + "device": "/dev/ada0", + "id": "199", + "name": "UDMA_CRC_Error_Count", + "flags": "-O-RC-", + "fail": "-", }, }, { @@ -262,21 +264,44 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "id": "240", - "name": "Head_Flying_Hours", - "flags": "------", - "fail": "-", + "device": "/dev/ada0", + "id": "240", + "name": "Head_Flying_Hours", + "flags": "------", + "fail": "-", }, }, } - for _, test := range testsAda0Device { - for k, v := range device_tags { - test.tags[k] = v - } + for _, test := range testsAda0Attributes { + acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags) + } + + // tags = map[string]string{} + + var testsAda0Device = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "exit_status": int(0), + }, + map[string]string{ + "device": "/dev/ada0", + "device_model": "APPLE SSD SM256E", + "serial_no": "S0X5NZBC422720", + "enabled": "Enabled", + "capacity": "251000193024", + "health": "PASSED", + }, + }, + } - acc.AssertContainsTaggedFields(t, "smart", test.fields, test.tags) + for _, test := range testsAda0Device { + acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) } + } func TestExcludedDev(t *testing.T) { From 9535507f8e38bd3d8752a2db2fd74d94c9cffa08 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:29:28 +0100 Subject: [PATCH 08/18] Added support for selecting --nocheck= 5.41 and 5.42 have problems determining the current power mode and don't recognise the --nocheck argument even tough it's in the docs. --- plugins/inputs/smart/smart.go | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 857645758104c..09172eba1c547 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -38,18 +38,27 @@ var ( type Smart struct { Path string + Nocheck string Excludes []string Devices []string } var sampleConfig = ` - ## optionally specify the path to the smartctl executable + ## Optionally specify the path to the smartctl executable # path = "/usr/bin/smartctl" # - ## optionally specify devices to exclude from reporting. + ## Skip checking disks in this power mode. Defaults to + ## "standby" to not wake up disks that have stoped rotating. + ## See --nockeck in the man pages for smartctl. + ## smartctl version 5.41 and 5.42 have faulty detection of + ## power mode and might require changing this value to + ## "never" depending on your disks. + # nocheck = "standby" + # + ## Optionally specify devices to exclude from reporting. # excludes = [ "/dev/pass6" ] # - ## optionally specify devices and device type, if unset + ## Optionally specify devices and device type, if unset ## a scan (smartctl --scan) for S.M.A.R.T. devices will ## done and all found will be included except for the ## excluded in excludes. @@ -126,7 +135,7 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) []erro errchan := make(chan error) for _, device := range devices { - go gatherDisk(acc, m.Path, device, errchan) + go gatherDisk(acc, m.Path, m.Nocheck, device, errchan) } var errors []error @@ -151,9 +160,10 @@ func exitStatus(err error) (int, error) { return 0, err } -func gatherDisk(acc telegraf.Accumulator, path, device string, err chan error) { +func gatherDisk(acc telegraf.Accumulator, path, nockeck, device string, err chan error) { - args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief"} + // smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n + args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nockeck, "--format=brief"} args = append(args, strings.Split(device, " ")...) cmd := execCommand(path, args...) out, e := internal.CombinedOutputTimeout(cmd, time.Second*5) @@ -281,6 +291,8 @@ func init() { if len(path) > 0 { m.Path = path } + m.Nocheck = "standby" + inputs.Add("smart", func() telegraf.Input { return &m }) From 61088ed49f62f5ca2e1ea767159d95fc40339424 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 21 Feb 2017 00:29:37 +0100 Subject: [PATCH 09/18] Updated the docs --- plugins/inputs/smart/README.md | 75 +++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index 33e3fce2b8dff..e57b911b2d4f4 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -1,7 +1,7 @@ # Telegraf S.M.A.R.T. plugin Get metrics using the command line utility `smartctl` for S.M.A.R.T. (Self-Monitoring, Analysis and Reporting Technology) storage devices. SMART is a monitoring system included in computer hard disk drives (HDDs) and solid-state drives (SSDs)[1] that detects and reports on various indicators of drive reliability, with the intent of enabling the anticipation of hardware failures. -See smartmontools(https://www.smartmontools.org/). +See smartmontools (https://www.smartmontools.org/). If no devices are specified, the plugin will scan for SMART devices via the following command: @@ -9,33 +9,49 @@ If no devices are specified, the plugin will scan for SMART devices via the foll smartctl --scan ``` -On some platforms (e.g. Darwin/macOS) this doesn't return a useful list of devices and you must instead specify which devices to collect metrics from in the configuration file. - Metrics will be reported from the following `smartctl` command: ``` -smartctl --info --attributes --nocheck=standby --format=brief +smartctl --info --attributes --health -n --format=brief +``` + +This plugin supports _smartmontools_ version 5.41 and above, but v. 5.41 and v. 5.42 +might require setting `nocheck`, see the comment in the sample configuration. + +To enable SMART on a storage device run: + +``` +smartctl -s on ``` ## Measurements -- smart: +- smart_device: * Tags: + - `capacity` - `device` - `device_model` - - `serial_no` - - `capacity` - `enabled` + - `health` + - `serial_no` + * Fields: + - `exit_status` + +- smart_attribute: + + * Tags: + - `device` + - `fail` + - `flags` - `id` - `name` - - `flags` - - `fail` * Fields: + - `exit_status` + - `raw_value` + - `threshold` - `value` - `worst` - - `threshold` - - `raw_value` ### Flags @@ -52,25 +68,36 @@ The interpretation of the tag `flags` is: ```toml # Read metrics from storage devices supporting S.M.A.R.T. [[inputs.smart]] - ## optionally specify the path to the smartctl executable + ## Optionally specify the path to the smartctl executable # path = "/usr/bin/smartctl" # - ## optionally specify devices to exclude from reporting. - # exclude = [ "/dev/pass6" ] + ## Skip checking disks in this power mode. Defaults to + ## "standby" to not wake up disks that have stoped rotating. + ## See --nockeck in the man pages for smartctl. + ## smartctl version 5.41 and 5.42 have faulty detection of + ## power mode and might require changing this value to + ## "never" depending on your storage device. + # nocheck = "standby" + # + ## Optionally specify devices to exclude from reporting. + # excludes = [ "/dev/pass6" ] # - ## optionally specify devices, if unset all S.M.A.R.T. devices - ## will be included - # devices = [ "/dev/ada0" ] + ## Optionally specify devices and device type, if unset + ## a scan (smartctl --scan) for S.M.A.R.T. devices will + ## done and all found will be included except for the + ## excluded in excludes. + # devices = [ "/dev/ada0 -d atacam" ] ``` +To run `smartctl` with `sudo` create a wrapper script and use `path` in +the configuration to execute that. + ## Output -When retrieving stats from the local machine (no server specified): +Example output from an _Apple SSD_: ``` -> smart,serial_no=WD-WMC4N0900000,id=1,name=Raw_Read_Error_Rate,flags=POSR-K,fail=-,host=example,device=/dev/ada0,device_model=WDC\ WD30EFRX-68EUZN0,capacity=3000592982016,enabled=Enabled value=200i,worst=200i,threshold=51i,raw_value=0i 1486892929000000000 -> smart,serial_no=WD-WMC4N0900000,device=/dev/ada0,device_model=WDC\ WD30EFRX-68EUZN0,capacity=3000592982016,enabled=Enabled,id=3,name=Spin_Up_Time,flags=POS--K,fail=-,host=example value=181i,worst=180i,threshold=21i,raw_value=5916i 1486892929000000000 -> smart,device_model=WDC\ WD30EFRX-68EUZN0,capacity=3000592982016,enabled=Enabled,name=Start_Stop_Count,flags=-O--CK,fail=-,device=/dev/ada0,serial_no=WD-WMC4N0900000,id=4,host=example value=100i,worst=100i,threshold=0i,raw_value=18i 1486892929000000000 -> smart,enabled=Enabled,device_model=WDC\ WD30EFRX-68EUZN0,id=5,name=Reallocated_Sector_Ct,capacity=3000592982016,device=/dev/ada0,serial_no=WD-WMC4N0900000,flags=PO--CK,fail=-,host=example value=200i,worst=200i,threshold=140i,raw_value=0i 1486892929000000000 -> smart,serial_no=WD-WMC4N0900000,capacity=3000592982016,enabled=Enabled,name=Seek_Error_Rate,host=example,device=/dev/ada0,id=7,flags=-OSR-K,fail=-,device_model=WDC\ WD30EFRX-68EUZN0 value=200i,worst=200i,threshold=0i,raw_value=0i 1486892929000000000 -> smart,flags=-O--CK,device_model=WDC\ WD30EFRX-68EUZN0,capacity=3000592982016,enabled=Enabled,id=9,name=Power_On_Hours,fail=-,host=example,device=/dev/ada0,serial_no=WD-WMC4N0900000 value=65i,worst=65i,threshold=0i,raw_value=25998i 1486892929000000000 +> smart_attribute,device=/dev/rdisk0,id=194,name=Temperature_Celsius,flags=-O---K,fail=-,host=STIZ0039.lan exit_status=0i,value=64i,worst=21i,threshold=0i,raw_value=36i 1487632495000000000 +> smart_attribute,device=/dev/rdisk0,id=197,name=Current_Pending_Sector,flags=-O---K,fail=-,host=STIZ0039.lan exit_status=0i,value=100i,worst=100i,threshold=0i,raw_value=0i 1487632495000000000 +> smart_attribute,device=/dev/rdisk0,id=199,name=UDMA_CRC_Error_Count,flags=-O-RC-,fail=-,host=STIZ0039.lan exit_status=0i,value=200i,worst=200i,threshold=0i,raw_value=0i 1487632495000000000 +> smart_device,device_model=APPLE\ SSD\ SM256E,serial_no=S0X5NZBC422720,capacity=251000193024,enabled=Enabled,health=PASSED,host=STIZ0039.lan,device=/dev/rdisk0 exit_status=0i 1487632495000000000 ``` From 364eb9f9ad2bc3e47a7c52a1b60334f1f6cde553 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Thu, 3 Aug 2017 08:00:00 +0200 Subject: [PATCH 10/18] Renamed field device_model to model --- plugins/inputs/smart/smart.go | 2 +- plugins/inputs/smart/smart_test.go | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 09172eba1c547..f3e2116ebc6c2 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -185,7 +185,7 @@ func gatherDisk(acc telegraf.Accumulator, path, nockeck, device string, err chan model := modelInInfo.FindStringSubmatch(line) if len(model) > 1 { - device_tags["device_model"] = model[1] + device_tags["model"] = model[1] } serial := serialInInfo.FindStringSubmatch(line) diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 1d2afc4d84496..25f4fe033e6fb 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -288,12 +288,12 @@ func TestGather(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "device_model": "APPLE SSD SM256E", - "serial_no": "S0X5NZBC422720", - "enabled": "Enabled", - "capacity": "251000193024", - "health": "PASSED", + "device": "/dev/ada0", + "model": "APPLE SSD SM256E", + "serial_no": "S0X5NZBC422720", + "enabled": "Enabled", + "capacity": "251000193024", + "health": "PASSED", }, }, } From de5998e3c3bb85698da466221584d4f3955739aa Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Thu, 3 Aug 2017 08:34:21 +0200 Subject: [PATCH 11/18] Spell correction --- plugins/inputs/smart/smart.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index f3e2116ebc6c2..718f1e3969542 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -49,7 +49,7 @@ var sampleConfig = ` # ## Skip checking disks in this power mode. Defaults to ## "standby" to not wake up disks that have stoped rotating. - ## See --nockeck in the man pages for smartctl. + ## See --nocheck in the man pages for smartctl. ## smartctl version 5.41 and 5.42 have faulty detection of ## power mode and might require changing this value to ## "never" depending on your disks. From 5ff8f0fd5e95fdc3294a63ecfa5ce1a492fb2043 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Sun, 6 Aug 2017 14:11:23 +0200 Subject: [PATCH 12/18] Change to use Accumulator.AddError --- plugins/inputs/smart/smart.go | 37 +++++++++++------------------------ 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 718f1e3969542..b671680b7e61d 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -1,12 +1,12 @@ package smart import ( - "errors" "fmt" "os/exec" "regexp" "strconv" "strings" + "sync" "syscall" "time" @@ -87,15 +87,7 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error { } } - errs := m.getAttributes(acc, devices) - if len(errs) > 0 { - var errStrs []string - for _, e := range errs { - errStrs = append(errStrs, e.Error()) - } - return errors.New(strings.Join(errStrs, ", ")) - } - + m.getAttributes(acc, devices) return nil } @@ -131,22 +123,16 @@ func excludedDev(excludes []string, deviceLine string) bool { } // Get info and attributes for each S.M.A.R.T. device -func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) []error { +func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) { - errchan := make(chan error) - for _, device := range devices { - go gatherDisk(acc, m.Path, m.Nocheck, device, errchan) - } + var wg sync.WaitGroup + wg.Add(len(devices)) - var errors []error - for i := 0; i < len(devices); i++ { - err := <-errchan - if err != nil { - errors = append(errors, err) - } + for _, device := range devices { + go gatherDisk(acc, m.Path, m.Nocheck, device, &wg) } - return errors + wg.Wait() } // Command line parse errors are denoted by the exit code having the 0 bit set. @@ -160,8 +146,9 @@ func exitStatus(err error) (int, error) { return 0, err } -func gatherDisk(acc telegraf.Accumulator, path, nockeck, device string, err chan error) { +func gatherDisk(acc telegraf.Accumulator, path, nockeck, device string, wg *sync.WaitGroup) { + defer wg.Done() // smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nockeck, "--format=brief"} args = append(args, strings.Split(device, " ")...) @@ -172,7 +159,7 @@ func gatherDisk(acc telegraf.Accumulator, path, nockeck, device string, err chan // Ignore all exit statuses except if it is a command line parse error exitStatus, er := exitStatus(e) if er != nil { - err <- fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), e, outStr) + acc.AddError(fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), e, outStr)) return } @@ -239,8 +226,6 @@ func gatherDisk(acc telegraf.Accumulator, path, nockeck, device string, err chan } } acc.AddFields("smart_device", device_fields, device_tags) - - err <- nil } func parseRawValue(rawVal string) (int, error) { From 77b3eab189e4e1dad25c454cc0b384790511db73 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Mon, 7 Aug 2017 06:49:50 +0200 Subject: [PATCH 13/18] Add sudo support --- plugins/inputs/smart/README.md | 6 ++++++ plugins/inputs/smart/smart.go | 24 ++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index e57b911b2d4f4..a7932f7ff8479 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -71,6 +71,12 @@ The interpretation of the tag `flags` is: ## Optionally specify the path to the smartctl executable # path = "/usr/bin/smartctl" # + ## On most platforms smartctl requires root access. + ## Setting 'use_sudo' to true will make use of sudo to run smartctl. + ## Sudo must be configured to to allow the telegraf user to run smartctl + ## with out password. + # use_sudo = false + # ## Skip checking disks in this power mode. Defaults to ## "standby" to not wake up disks that have stoped rotating. ## See --nockeck in the man pages for smartctl. diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index b671680b7e61d..e75dc47c1ea30 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -41,12 +41,19 @@ type Smart struct { Nocheck string Excludes []string Devices []string + UseSudo bool } var sampleConfig = ` ## Optionally specify the path to the smartctl executable # path = "/usr/bin/smartctl" # + ## On most platforms smartctl requires root access. + ## Setting 'use_sudo' to true will make use of sudo to run smartctl. + ## Sudo must be configured to to allow the telegraf user to run smartctl + ## with out password. + # use_sudo = false + # ## Skip checking disks in this power mode. Defaults to ## "standby" to not wake up disks that have stoped rotating. ## See --nocheck in the man pages for smartctl. @@ -91,10 +98,19 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error { return nil } +// Wrap with sudo +func sudo(sudo bool, command string, args ...string) *exec.Cmd { + if sudo { + return execCommand("sudo", append([]string{"-n", command}, args...)...) + } + + return execCommand(command, args...) +} + // Scan for S.M.A.R.T. devices func (m *Smart) scan() ([]string, error) { - cmd := execCommand(m.Path, "--scan") + cmd := sudo(m.UseSudo, m.Path, "--scan") out, err := internal.CombinedOutputTimeout(cmd, time.Second*5) if err != nil { return []string{}, fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out)) @@ -129,7 +145,7 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) { wg.Add(len(devices)) for _, device := range devices { - go gatherDisk(acc, m.Path, m.Nocheck, device, &wg) + go gatherDisk(acc, m.UseSudo, m.Path, m.Nocheck, device, &wg) } wg.Wait() @@ -146,13 +162,13 @@ func exitStatus(err error) (int, error) { return 0, err } -func gatherDisk(acc telegraf.Accumulator, path, nockeck, device string, wg *sync.WaitGroup) { +func gatherDisk(acc telegraf.Accumulator, usesudo bool, path, nockeck, device string, wg *sync.WaitGroup) { defer wg.Done() // smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nockeck, "--format=brief"} args = append(args, strings.Split(device, " ")...) - cmd := execCommand(path, args...) + cmd := sudo(usesudo, path, args...) out, e := internal.CombinedOutputTimeout(cmd, time.Second*5) outStr := string(out) From 3d44badbb41d2df9d8c5f1b3099217f2a596d683 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Mon, 7 Aug 2017 07:52:06 +0200 Subject: [PATCH 14/18] Changed health into field health_ok --- plugins/inputs/smart/smart.go | 3 ++- plugins/inputs/smart/smart_test.go | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index e75dc47c1ea30..1af8a68d3ff95 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -27,6 +27,7 @@ var ( // SMART support is: Enabled smartEnabledInInfo = regexp.MustCompile("^SMART support is:\\s+(\\w+)$") // SMART overall-health self-assessment test result: PASSED + // PASSED, FAILED, UNKNOWN smartOverallHealth = regexp.MustCompile("^SMART overall-health self-assessment test result:\\s+(\\w+).*$") // ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE @@ -208,7 +209,7 @@ func gatherDisk(acc telegraf.Accumulator, usesudo bool, path, nockeck, device st health := smartOverallHealth.FindStringSubmatch(line) if len(health) > 1 { - device_tags["health"] = health[1] + device_fields["health_ok"] = (health[1] == "PASSED") } attr := attribute.FindStringSubmatch(line) diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 25f4fe033e6fb..f814f74c70edf 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -73,7 +73,7 @@ func TestGather(t *testing.T) { err := s.Gather(&acc) require.NoError(t, err) - assert.Equal(t, 61, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, 62, acc.NFields(), "Wrong number of fields gathered") var testsAda0Attributes = []struct { fields map[string]interface{} @@ -286,6 +286,7 @@ func TestGather(t *testing.T) { { map[string]interface{}{ "exit_status": int(0), + "health_ok": bool(true), }, map[string]string{ "device": "/dev/ada0", @@ -293,7 +294,6 @@ func TestGather(t *testing.T) { "serial_no": "S0X5NZBC422720", "enabled": "Enabled", "capacity": "251000193024", - "health": "PASSED", }, }, } From ade9f19b9aa1f4191d31c1a7a2f99b3661704579 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Thu, 10 Aug 2017 06:33:49 +0200 Subject: [PATCH 15/18] Clearify interpretation of exit_status --- plugins/inputs/smart/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index a7932f7ff8479..4ecb19ec02c47 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -63,6 +63,12 @@ The interpretation of the tag `flags` is: - *O* updated online - *P* prefailure warning +### Exit Status + +The `exit_status` field captures the exit status of the smartctl command which +is defined by a bitmask. For the interpretation of the bitmask see the man page for +smartctl. + ## Configuration ```toml From c9c71221c3ad69bcce5214381e5f18fe28d6e18c Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Thu, 10 Aug 2017 06:35:08 +0200 Subject: [PATCH 16/18] Make detailed attributes gathering optional --- plugins/inputs/smart/README.md | 12 ++++- plugins/inputs/smart/smart.go | 80 +++++++++++++++++++----------- plugins/inputs/smart/smart_test.go | 59 ++++++++++++++++++++-- 3 files changed, 117 insertions(+), 34 deletions(-) diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index 4ecb19ec02c47..5d25e3702915a 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -37,6 +37,11 @@ smartctl -s on - `serial_no` * Fields: - `exit_status` + - `health_ok` + - `read_error_rate` + - `seek_error` + - `temp_c` + - `udma_crc_errors` - smart_attribute: @@ -91,6 +96,11 @@ smartctl. ## "never" depending on your storage device. # nocheck = "standby" # + ## Gather detailed metrics for each SMART Attribute. + ## Defaults to "false" + ## + # attributes = false + # ## Optionally specify devices to exclude from reporting. # excludes = [ "/dev/pass6" ] # @@ -111,5 +121,5 @@ Example output from an _Apple SSD_: > smart_attribute,device=/dev/rdisk0,id=194,name=Temperature_Celsius,flags=-O---K,fail=-,host=STIZ0039.lan exit_status=0i,value=64i,worst=21i,threshold=0i,raw_value=36i 1487632495000000000 > smart_attribute,device=/dev/rdisk0,id=197,name=Current_Pending_Sector,flags=-O---K,fail=-,host=STIZ0039.lan exit_status=0i,value=100i,worst=100i,threshold=0i,raw_value=0i 1487632495000000000 > smart_attribute,device=/dev/rdisk0,id=199,name=UDMA_CRC_Error_Count,flags=-O-RC-,fail=-,host=STIZ0039.lan exit_status=0i,value=200i,worst=200i,threshold=0i,raw_value=0i 1487632495000000000 -> smart_device,device_model=APPLE\ SSD\ SM256E,serial_no=S0X5NZBC422720,capacity=251000193024,enabled=Enabled,health=PASSED,host=STIZ0039.lan,device=/dev/rdisk0 exit_status=0i 1487632495000000000 +> smart_device,host=mbpro.local,device=/dev/rdisk0,model=APPLE\ SSD\ SM0512F,serial_no=S1K5NYCD964433,capacity=500277790720,enabled=Enabled temp_c=39i,udma_crc_errors=0i,exit_status=0i,health_ok=true,read_error_rate=0i 1502255921000000000 ``` diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 1af8a68d3ff95..1392906bd754e 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -35,14 +35,22 @@ var ( // 5 Reallocated_Sector_Ct PO--CK 100 100 000 - 0 // 192 Power-Off_Retract_Count -O--C- 097 097 000 - 14716 attribute = regexp.MustCompile("^\\s*([0-9]+)\\s(\\S+)\\s+([-P][-O][-S][-R][-C][-K])\\s+([0-9]+)\\s+([0-9]+)\\s+([0-9]+)\\s+([-\\w]+)\\s+([\\w\\+\\.]+).*$") + + deviceFieldIds = map[string]string{ + "1": "read_error_rate", + "7": "seek_error_rate", + "194": "temp_c", + "199": "udma_crc_errors", + } ) type Smart struct { - Path string - Nocheck string - Excludes []string - Devices []string - UseSudo bool + Path string + Nocheck string + Attributes bool + Excludes []string + Devices []string + UseSudo bool } var sampleConfig = ` @@ -63,6 +71,11 @@ var sampleConfig = ` ## "never" depending on your disks. # nocheck = "standby" # + ## Gather detailed metrics for each SMART Attribute. + ## Defaults to "false" + ## + # attributes = false + # ## Optionally specify devices to exclude from reporting. # excludes = [ "/dev/pass6" ] # @@ -146,7 +159,7 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) { wg.Add(len(devices)) for _, device := range devices { - go gatherDisk(acc, m.UseSudo, m.Path, m.Nocheck, device, &wg) + go gatherDisk(acc, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg) } wg.Wait() @@ -163,7 +176,7 @@ func exitStatus(err error) (int, error) { return 0, err } -func gatherDisk(acc telegraf.Accumulator, usesudo bool, path, nockeck, device string, wg *sync.WaitGroup) { +func gatherDisk(acc telegraf.Accumulator, usesudo, attributes bool, path, nockeck, device string, wg *sync.WaitGroup) { defer wg.Done() // smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n @@ -215,31 +228,42 @@ func gatherDisk(acc telegraf.Accumulator, usesudo bool, path, nockeck, device st attr := attribute.FindStringSubmatch(line) if len(attr) > 1 { - tags := map[string]string{} - fields := make(map[string]interface{}) - - tags["device"] = strings.Split(device, " ")[0] - tags["id"] = attr[1] - tags["name"] = attr[2] - tags["flags"] = attr[3] - fields["exit_status"] = exitStatus - if i, err := strconv.Atoi(attr[4]); err == nil { - fields["value"] = i - } - if i, err := strconv.Atoi(attr[5]); err == nil { - fields["worst"] = i - } - if i, err := strconv.Atoi(attr[6]); err == nil { - fields["threshold"] = i + if attributes { + tags := map[string]string{} + fields := make(map[string]interface{}) + + tags["device"] = strings.Split(device, " ")[0] + tags["id"] = attr[1] + tags["name"] = attr[2] + tags["flags"] = attr[3] + + fields["exit_status"] = exitStatus + if i, err := strconv.Atoi(attr[4]); err == nil { + fields["value"] = i + } + if i, err := strconv.Atoi(attr[5]); err == nil { + fields["worst"] = i + } + if i, err := strconv.Atoi(attr[6]); err == nil { + fields["threshold"] = i + } + + tags["fail"] = attr[7] + if val, err := parseRawValue(attr[8]); err == nil { + fields["raw_value"] = val + } + + acc.AddFields("smart_attribute", fields, tags) } - tags["fail"] = attr[7] - if val, err := parseRawValue(attr[8]); err == nil { - fields["raw_value"] = val + // If the attribute matches on the one in deviceFieldIds + // save the raw value to a field. + if field, ok := deviceFieldIds[attr[1]]; ok { + if val, err := parseRawValue(attr[8]); err == nil { + device_fields[field] = val + } } - - acc.AddFields("smart_attribute", fields, tags) } } acc.AddFields("smart_device", device_fields, device_tags) diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index f814f74c70edf..742cbcb889d23 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -62,9 +62,10 @@ ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE ` ) -func TestGather(t *testing.T) { +func TestGatherAttributes(t *testing.T) { s := &Smart{ - Path: "smartctl", + Path: "smartctl", + Attributes: true, } // overwriting exec commands with mock commands execCommand = fakeExecCommand @@ -73,7 +74,7 @@ func TestGather(t *testing.T) { err := s.Gather(&acc) require.NoError(t, err) - assert.Equal(t, 62, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, 65, acc.NFields(), "Wrong number of fields gathered") var testsAda0Attributes = []struct { fields map[string]interface{} @@ -285,8 +286,56 @@ func TestGather(t *testing.T) { }{ { map[string]interface{}{ - "exit_status": int(0), - "health_ok": bool(true), + "exit_status": int(0), + "health_ok": bool(true), + "read_error_rate": int(0), + "temp_c": int(34), + "udma_crc_errors": int(0), + }, + map[string]string{ + "device": "/dev/ada0", + "model": "APPLE SSD SM256E", + "serial_no": "S0X5NZBC422720", + "enabled": "Enabled", + "capacity": "251000193024", + }, + }, + } + + for _, test := range testsAda0Device { + acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) + } + +} + +func TestGatherNoAttributes(t *testing.T) { + s := &Smart{ + Path: "smartctl", + Attributes: false, + } + // overwriting exec commands with mock commands + execCommand = fakeExecCommand + var acc testutil.Accumulator + + err := s.Gather(&acc) + + require.NoError(t, err) + assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered") + acc.AssertDoesNotContainMeasurement(t, "smart_attribute") + + // tags = map[string]string{} + + var testsAda0Device = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "exit_status": int(0), + "health_ok": bool(true), + "read_error_rate": int(0), + "temp_c": int(34), + "udma_crc_errors": int(0), }, map[string]string{ "device": "/dev/ada0", From da606ac02ab460f97aaa99b644f3b4ba122dd18b Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Sat, 12 Aug 2017 19:40:43 +0200 Subject: [PATCH 17/18] Added field tag for WWN Added WWN to both smart_device and smart_attribute measurements. And added serial_no also to smart_attribute. --- plugins/inputs/smart/README.md | 18 +++- plugins/inputs/smart/smart.go | 14 +++ plugins/inputs/smart/smart_test.go | 146 +++++++++++++++++------------ 3 files changed, 114 insertions(+), 64 deletions(-) diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index 5d25e3702915a..a81b344a567c6 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -35,6 +35,7 @@ smartctl -s on - `enabled` - `health` - `serial_no` + - `wwn` * Fields: - `exit_status` - `health_ok` @@ -51,6 +52,8 @@ smartctl -s on - `flags` - `id` - `name` + - `serial_no` + - `wwn` * Fields: - `exit_status` - `raw_value` @@ -74,6 +77,14 @@ The `exit_status` field captures the exit status of the smartctl command which is defined by a bitmask. For the interpretation of the bitmask see the man page for smartctl. +### Device Names + +Device names, e.g., `/dev/sda`, are *not persistent*, and may be +subject to change across reboots or system changes. Instead, you can the +*World Wide Name* (WWN) or serial number to identify devices. On Linux block +devices can be referenced by the WWN in the following location: +`/dev/disk/by-id/`. + ## Configuration ```toml @@ -118,8 +129,7 @@ the configuration to execute that. Example output from an _Apple SSD_: ``` -> smart_attribute,device=/dev/rdisk0,id=194,name=Temperature_Celsius,flags=-O---K,fail=-,host=STIZ0039.lan exit_status=0i,value=64i,worst=21i,threshold=0i,raw_value=36i 1487632495000000000 -> smart_attribute,device=/dev/rdisk0,id=197,name=Current_Pending_Sector,flags=-O---K,fail=-,host=STIZ0039.lan exit_status=0i,value=100i,worst=100i,threshold=0i,raw_value=0i 1487632495000000000 -> smart_attribute,device=/dev/rdisk0,id=199,name=UDMA_CRC_Error_Count,flags=-O-RC-,fail=-,host=STIZ0039.lan exit_status=0i,value=200i,worst=200i,threshold=0i,raw_value=0i 1487632495000000000 -> smart_device,host=mbpro.local,device=/dev/rdisk0,model=APPLE\ SSD\ SM0512F,serial_no=S1K5NYCD964433,capacity=500277790720,enabled=Enabled temp_c=39i,udma_crc_errors=0i,exit_status=0i,health_ok=true,read_error_rate=0i 1502255921000000000 +> smart_attribute,serial_no=S1K5NYCD964433,wwn=5002538655584d30,id=199,name=UDMA_CRC_Error_Count,flags=-O-RC-,fail=-,host=mbpro.local,device=/dev/rdisk0 threshold=0i,raw_value=0i,exit_status=0i,value=200i,worst=200i 1502536854000000000 +> smart_attribute,device=/dev/rdisk0,serial_no=S1K5NYCD964433,wwn=5002538655584d30,id=240,name=Unknown_SSD_Attribute,flags=-O---K,fail=-,host=mbpro.local exit_status=0i,value=100i,worst=100i,threshold=0i,raw_value=0i 1502536854000000000 +> smart_device,enabled=Enabled,host=mbpro.local,device=/dev/rdisk0,model=APPLE\ SSD\ SM0512F,serial_no=S1K5NYCD964433,wwn=5002538655584d30,capacity=500277790720 udma_crc_errors=0i,exit_status=0i,health_ok=true,read_error_rate=0i,temp_c=40i 1502536854000000000 ``` diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 1392906bd754e..c69e07c3d54a5 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -22,6 +22,8 @@ var ( modelInInfo = regexp.MustCompile("^Device Model:\\s+(.*)$") // Serial Number: S0X5NZBC422720 serialInInfo = regexp.MustCompile("^Serial Number:\\s+(.*)$") + // LU WWN Device Id: 5 002538 655584d30 + wwnInInfo = regexp.MustCompile("^LU WWN Device Id:\\s+(.*)$") // User Capacity: 251,000,193,024 bytes [251 GB] usercapacityInInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$") // SMART support is: Enabled @@ -210,6 +212,11 @@ func gatherDisk(acc telegraf.Accumulator, usesudo, attributes bool, path, nockec device_tags["serial_no"] = serial[1] } + wwn := wwnInInfo.FindStringSubmatch(line) + if len(wwn) > 1 { + device_tags["wwn"] = strings.Replace(wwn[1], " ", "", -1) + } + capacity := usercapacityInInfo.FindStringSubmatch(line) if len(capacity) > 1 { device_tags["capacity"] = strings.Replace(capacity[1], ",", "", -1) @@ -234,6 +241,13 @@ func gatherDisk(acc telegraf.Accumulator, usesudo, attributes bool, path, nockec fields := make(map[string]interface{}) tags["device"] = strings.Split(device, " ")[0] + + if serial, ok := device_tags["serial_no"]; ok { + tags["serial_no"] = serial + } + if wwn, ok := device_tags["wwn"]; ok { + tags["wwn"] = wwn + } tags["id"] = attr[1] tags["name"] = attr[2] tags["flags"] = attr[3] diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 742cbcb889d23..ba17ea42d7028 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -89,11 +89,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "1", - "name": "Raw_Read_Error_Rate", - "flags": "-O-RC-", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "1", + "name": "Raw_Read_Error_Rate", + "flags": "-O-RC-", + "fail": "-", }, }, { @@ -105,11 +107,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "5", - "name": "Reallocated_Sector_Ct", - "flags": "PO--CK", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "5", + "name": "Reallocated_Sector_Ct", + "flags": "PO--CK", + "fail": "-", }, }, { @@ -121,11 +125,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "9", - "name": "Power_On_Hours", - "flags": "-O--CK", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "9", + "name": "Power_On_Hours", + "flags": "-O--CK", + "fail": "-", }, }, { @@ -137,11 +143,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "12", - "name": "Power_Cycle_Count", - "flags": "-O--CK", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "12", + "name": "Power_Cycle_Count", + "flags": "-O--CK", + "fail": "-", }, }, { @@ -153,11 +161,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "169", - "name": "Unknown_Attribute", - "flags": "PO--C-", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "169", + "name": "Unknown_Attribute", + "flags": "PO--C-", + "fail": "-", }, }, { @@ -169,11 +179,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "173", - "name": "Wear_Leveling_Count", - "flags": "-O--CK", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "173", + "name": "Wear_Leveling_Count", + "flags": "-O--CK", + "fail": "-", }, }, { @@ -185,11 +197,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "190", - "name": "Airflow_Temperature_Cel", - "flags": "-O---K", - "fail": "Past", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "190", + "name": "Airflow_Temperature_Cel", + "flags": "-O---K", + "fail": "Past", }, }, { @@ -201,11 +215,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "192", - "name": "Power-Off_Retract_Count", - "flags": "-O--C-", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "192", + "name": "Power-Off_Retract_Count", + "flags": "-O--C-", + "fail": "-", }, }, { @@ -217,11 +233,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "194", - "name": "Temperature_Celsius", - "flags": "-O---K", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "194", + "name": "Temperature_Celsius", + "flags": "-O---K", + "fail": "-", }, }, { @@ -233,11 +251,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "197", - "name": "Current_Pending_Sector", - "flags": "-O---K", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "197", + "name": "Current_Pending_Sector", + "flags": "-O---K", + "fail": "-", }, }, { @@ -249,11 +269,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "199", - "name": "UDMA_CRC_Error_Count", - "flags": "-O-RC-", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "199", + "name": "UDMA_CRC_Error_Count", + "flags": "-O-RC-", + "fail": "-", }, }, { @@ -265,11 +287,13 @@ func TestGatherAttributes(t *testing.T) { "exit_status": int(0), }, map[string]string{ - "device": "/dev/ada0", - "id": "240", - "name": "Head_Flying_Hours", - "flags": "------", - "fail": "-", + "device": "/dev/ada0", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "id": "240", + "name": "Head_Flying_Hours", + "flags": "------", + "fail": "-", }, }, } @@ -296,6 +320,7 @@ func TestGatherAttributes(t *testing.T) { "device": "/dev/ada0", "model": "APPLE SSD SM256E", "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", "enabled": "Enabled", "capacity": "251000193024", }, @@ -341,6 +366,7 @@ func TestGatherNoAttributes(t *testing.T) { "device": "/dev/ada0", "model": "APPLE SSD SM256E", "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", "enabled": "Enabled", "capacity": "251000193024", }, From 6aac2cd106125791187115d113448ad77195a4e8 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Wed, 4 Oct 2017 06:39:48 +0200 Subject: [PATCH 18/18] smart: Switch Atoi to ParseInt --- plugins/inputs/smart/smart.go | 22 +++--- plugins/inputs/smart/smart_test.go | 108 ++++++++++++++--------------- 2 files changed, 65 insertions(+), 65 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index c69e07c3d54a5..a754d1ace1a96 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -253,13 +253,13 @@ func gatherDisk(acc telegraf.Accumulator, usesudo, attributes bool, path, nockec tags["flags"] = attr[3] fields["exit_status"] = exitStatus - if i, err := strconv.Atoi(attr[4]); err == nil { + if i, err := strconv.ParseInt(attr[4], 10, 64); err == nil { fields["value"] = i } - if i, err := strconv.Atoi(attr[5]); err == nil { + if i, err := strconv.ParseInt(attr[5], 10, 64); err == nil { fields["worst"] = i } - if i, err := strconv.Atoi(attr[6]); err == nil { + if i, err := strconv.ParseInt(attr[6], 10, 64); err == nil { fields["threshold"] = i } @@ -283,10 +283,10 @@ func gatherDisk(acc telegraf.Accumulator, usesudo, attributes bool, path, nockec acc.AddFields("smart_device", device_fields, device_tags) } -func parseRawValue(rawVal string) (int, error) { +func parseRawValue(rawVal string) (int64, error) { // Integer - if i, err := strconv.Atoi(rawVal); err == nil { + if i, err := strconv.ParseInt(rawVal, 10, 64); err == nil { return i, nil } @@ -297,7 +297,7 @@ func parseRawValue(rawVal string) (int, error) { return 0, fmt.Errorf("Couldn't parse RAW_VALUE '%s'", rawVal) } - duration := 0 + duration := int64(0) for _, part := range parts { timePart := unit.FindStringSubmatch(part) if len(timePart) == 0 { @@ -305,12 +305,12 @@ func parseRawValue(rawVal string) (int, error) { } switch timePart[2] { case "h": - duration += atoi(timePart[1]) * 3600 + duration += parseInt(timePart[1]) * int64(3600) case "m": - duration += atoi(timePart[1]) * 60 + duration += parseInt(timePart[1]) * int64(60) case "s": // drop fractions of seconds - duration += atoi(strings.Split(timePart[1], ".")[0]) + duration += parseInt(strings.Split(timePart[1], ".")[0]) default: // Unknown, ignore } @@ -318,8 +318,8 @@ func parseRawValue(rawVal string) (int, error) { return duration, nil } -func atoi(str string) int { - if i, err := strconv.Atoi(str); err == nil { +func parseInt(str string) int64 { + if i, err := strconv.ParseInt(str, 10, 64); err == nil { return i } return 0 diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index ba17ea42d7028..c8e7770331afd 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -82,10 +82,10 @@ func TestGatherAttributes(t *testing.T) { }{ { map[string]interface{}{ - "value": int(200), - "worst": int(200), - "threshold": int(0), - "raw_value": int(0), + "value": int64(200), + "worst": int64(200), + "threshold": int64(0), + "raw_value": int64(0), "exit_status": int(0), }, map[string]string{ @@ -100,10 +100,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(100), - "worst": int(100), - "threshold": int(0), - "raw_value": int(0), + "value": int64(100), + "worst": int64(100), + "threshold": int64(0), + "raw_value": int64(0), "exit_status": int(0), }, map[string]string{ @@ -118,10 +118,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(99), - "worst": int(99), - "threshold": int(0), - "raw_value": int(2988), + "value": int64(99), + "worst": int64(99), + "threshold": int64(0), + "raw_value": int64(2988), "exit_status": int(0), }, map[string]string{ @@ -136,10 +136,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(85), - "worst": int(85), - "threshold": int(0), - "raw_value": int(14879), + "value": int64(85), + "worst": int64(85), + "threshold": int64(0), + "raw_value": int64(14879), "exit_status": int(0), }, map[string]string{ @@ -154,10 +154,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(253), - "worst": int(253), - "threshold": int(10), - "raw_value": int(2044932921600), + "value": int64(253), + "worst": int64(253), + "threshold": int64(10), + "raw_value": int64(2044932921600), "exit_status": int(0), }, map[string]string{ @@ -172,10 +172,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(185), - "worst": int(185), - "threshold": int(100), - "raw_value": int(957808640337), + "value": int64(185), + "worst": int64(185), + "threshold": int64(100), + "raw_value": int64(957808640337), "exit_status": int(0), }, map[string]string{ @@ -190,10 +190,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(55), - "worst": int(40), - "threshold": int(45), - "raw_value": int(45), + "value": int64(55), + "worst": int64(40), + "threshold": int64(45), + "raw_value": int64(45), "exit_status": int(0), }, map[string]string{ @@ -208,10 +208,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(97), - "worst": int(97), - "threshold": int(0), - "raw_value": int(14716), + "value": int64(97), + "worst": int64(97), + "threshold": int64(0), + "raw_value": int64(14716), "exit_status": int(0), }, map[string]string{ @@ -226,10 +226,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(66), - "worst": int(21), - "threshold": int(0), - "raw_value": int(34), + "value": int64(66), + "worst": int64(21), + "threshold": int64(0), + "raw_value": int64(34), "exit_status": int(0), }, map[string]string{ @@ -244,10 +244,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(100), - "worst": int(100), - "threshold": int(0), - "raw_value": int(0), + "value": int64(100), + "worst": int64(100), + "threshold": int64(0), + "raw_value": int64(0), "exit_status": int(0), }, map[string]string{ @@ -262,10 +262,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(200), - "worst": int(200), - "threshold": int(0), - "raw_value": int(0), + "value": int64(200), + "worst": int64(200), + "threshold": int64(0), + "raw_value": int64(0), "exit_status": int(0), }, map[string]string{ @@ -280,10 +280,10 @@ func TestGatherAttributes(t *testing.T) { }, { map[string]interface{}{ - "value": int(100), - "worst": int(253), - "threshold": int(0), - "raw_value": int(23709323), + "value": int64(100), + "worst": int64(253), + "threshold": int64(0), + "raw_value": int64(23709323), "exit_status": int(0), }, map[string]string{ @@ -312,9 +312,9 @@ func TestGatherAttributes(t *testing.T) { map[string]interface{}{ "exit_status": int(0), "health_ok": bool(true), - "read_error_rate": int(0), - "temp_c": int(34), - "udma_crc_errors": int(0), + "read_error_rate": int64(0), + "temp_c": int64(34), + "udma_crc_errors": int64(0), }, map[string]string{ "device": "/dev/ada0", @@ -358,9 +358,9 @@ func TestGatherNoAttributes(t *testing.T) { map[string]interface{}{ "exit_status": int(0), "health_ok": bool(true), - "read_error_rate": int(0), - "temp_c": int(34), - "udma_crc_errors": int(0), + "read_error_rate": int64(0), + "temp_c": int64(34), + "udma_crc_errors": int64(0), }, map[string]string{ "device": "/dev/ada0",