Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Tolerate collector failures #1769

Merged
merged 23 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ linters:
- exportloopref
- fatcontext
- funlen
- gochecknoglobals
- gocognit
- goconst
- gocyclo
Expand Down Expand Up @@ -88,7 +87,3 @@ issues:
- text: "don't use ALL_CAPS in Go names; use CamelCase"
linters:
- revive
- path: internal/perfdata/v1/
linters:
- godox
- stylecheck
13 changes: 13 additions & 0 deletions .run/all.run.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="all" type="GoApplicationRunConfiguration" factoryName="Go Application" folderName="run">
<module name="windows_exporter" />
<working_directory value="$PROJECT_DIR$" />
<parameters value="--web.listen-address=127.0.0.1:9182 --log.level=debug --collectors.enabled=ad,adcs,adfs,cache,container,cpu,cpu_info,cs,dfsr,dhcp,diskdrive,dns,exchange,filetime,fsrmquota,hyperv,iis,license,logical_disk,logon,memory,mscluster,msmq,mssql,net,netframework,nps,os,pagefile,perfdata,physical_disk,printer,process,remote_fx,scheduled_task,service,smb,smbclient,smtp,system,tcp,terminal_services,textfile,thermalzone,time,udp,update,vmware" />
<sudo value="true" />
<kind value="PACKAGE" />
<package value="github.com/prometheus-community/windows_exporter/cmd/windows_exporter" />
<directory value="$PROJECT_DIR$" />
<filePath value="$PROJECT_DIR$/exporter.go" />
<method v="2" />
</configuration>
</component>
17 changes: 10 additions & 7 deletions cmd/windows_exporter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
"github.com/prometheus-community/windows_exporter/internal/httphandler"
"github.com/prometheus-community/windows_exporter/internal/log"
"github.com/prometheus-community/windows_exporter/internal/log/flag"
"github.com/prometheus-community/windows_exporter/internal/utils"
"github.com/prometheus-community/windows_exporter/pkg/collector"
"github.com/prometheus/common/version"
"github.com/prometheus/exporter-toolkit/web"
Expand All @@ -64,6 +65,8 @@ func main() {
}

func run() int {
startTime := time.Now()

app := kingpin.New("windows_exporter", "A metrics collector for Windows.")

var (
Expand Down Expand Up @@ -191,7 +194,7 @@ func run() int {

enabledCollectorList := expandEnabledCollectors(*enabledCollectors)
if err := collectors.Enable(enabledCollectorList); err != nil {
logger.Error("Couldn't enable collectors",
logger.Error("couldn't enable collectors",
slog.Any("err", err),
)

Expand All @@ -200,11 +203,11 @@ func run() int {

// Initialize collectors before loading
if err = collectors.Build(logger); err != nil {
logger.Error("Couldn't load collectors",
slog.Any("err", err),
)

return 1
for _, err := range utils.SplitError(err) {
logger.Warn("couldn't initialize collector",
slog.Any("err", err),
)
}
}

logCurrentUser(logger)
Expand All @@ -228,7 +231,7 @@ func run() int {
mux.HandleFunc("GET /debug/pprof/trace", pprof.Trace)
}

logger.Info("Starting windows_exporter",
logger.Info(fmt.Sprintf("starting windows_exporter in %s", time.Since(startTime)),
slog.String("version", version.Version),
slog.String("branch", version.Branch),
slog.String("revision", version.GetRevision()),
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/ad/ad.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package ad

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -31,6 +30,7 @@ const Name = "ad"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

type Collector struct {
Expand Down Expand Up @@ -671,7 +671,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
data, ok := perfData["NTDS"]

if !ok {
return errors.New("perflib query for DirectoryServices (AD) returned empty result set")
return fmt.Errorf("failed to collect DirectoryServices (AD) metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/adcs/adcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package adcs

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -32,6 +31,7 @@ const Name = "adcs"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

type Collector struct {
Expand Down Expand Up @@ -191,7 +191,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for Certification Authority (ADCS) returned empty result set")
return fmt.Errorf("failed to collect Certification Authority (ADCS) metrics: %w", types.ErrNoData)
}

for name, data := range perfData {
Expand Down
8 changes: 4 additions & 4 deletions internal/collector/adfs/adfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package adfs

import (
"errors"
"fmt"
"log/slog"
"maps"
Expand All @@ -34,6 +33,7 @@ const Name = "adfs"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

type Collector struct {
Expand Down Expand Up @@ -160,7 +160,7 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
avgConfigDBQueryTime,
federationMetadataRequests,
})
if err != nil && !errors.Is(err, perfdata.ErrNoData) {
if err != nil {
return fmt.Errorf("failed to create AD FS collector: %w", err)
}

Expand Down Expand Up @@ -435,13 +435,13 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
instanceKey := slices.Collect(maps.Keys(data))

if len(instanceKey) == 0 {
return errors.New("perflib query for ADFS returned empty result set")
return fmt.Errorf("failed to collect ADFS metrics: %w", types.ErrNoData)
}

adfsData, ok := data[instanceKey[0]]

if !ok {
return errors.New("perflib query for ADFS returned empty result set")
return fmt.Errorf("failed to collect ADFS metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package cache

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -31,6 +30,7 @@ const Name = "cache"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for Perflib Cache metrics.
Expand Down Expand Up @@ -322,7 +322,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
cacheData, ok := data[perfdata.InstanceEmpty]

if !ok {
return errors.New("perflib query for Cache returned empty result set")
return fmt.Errorf("failed to collect Cache metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
1 change: 1 addition & 0 deletions internal/collector/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const Name = "container"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for containers metrics.
Expand Down
1 change: 1 addition & 0 deletions internal/collector/cpu/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const Name = "cpu"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

type Collector struct {
Expand Down
6 changes: 6 additions & 0 deletions internal/collector/cpu_info/cpu_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const Name = "cpu_info"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for a few WMI metrics in Win32_Processor.
Expand Down Expand Up @@ -147,6 +148,11 @@ func (c *Collector) Build(_ *slog.Logger, miSession *mi.Session) error {
nil,
)

var dst []miProcessor
if err := c.miSession.Query(&dst, mi.NamespaceRootCIMv2, c.miQuery); err != nil {
return fmt.Errorf("WMI query failed: %w", err)
}

return nil
}

Expand Down
1 change: 1 addition & 0 deletions internal/collector/cs/cs.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const Name = "cs"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for WMI metrics.
Expand Down
2 changes: 1 addition & 1 deletion internal/collector/dfsr/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,5 @@ const (
databaseLookupsTotal = "Database Lookups"
usnJournalRecordsReadTotal = "USN Journal Records Read"
usnJournalRecordsAcceptedTotal = "USN Journal Records Accepted"
usnJournalUnreadPercentage = "USN Journal Records Unread Percentage"
usnJournalUnreadPercentage = "USN Journal Unread Percentage"
)
7 changes: 4 additions & 3 deletions internal/collector/dfsr/dfsr.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type Config struct {
CollectorsEnabled []string `yaml:"collectors_enabled"`
}

//nolint:gochecknoglobals
var ConfigDefaults = Config{
CollectorsEnabled: []string{"connection", "folder", "volume"},
}
Expand Down Expand Up @@ -542,7 +543,7 @@ func (c *Collector) collectPDHConnection(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for DFS Replication Connections returned empty result set")
return fmt.Errorf("failed to collect DFS Replication Connections metrics: %w", types.ErrNoData)
}

for name, connection := range perfData {
Expand Down Expand Up @@ -620,7 +621,7 @@ func (c *Collector) collectPDHFolder(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for DFS Replicated Folders returned empty result set")
return fmt.Errorf("failed to collect DFS Replicated Folders metrics: %w", types.ErrNoData)
}

for name, folder := range perfData {
Expand Down Expand Up @@ -824,7 +825,7 @@ func (c *Collector) collectPDHVolume(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for DFS Replication Volumes returned empty result set")
return fmt.Errorf("failed to collect DFS Replication Volumes metrics: %w", types.ErrNoData)
}

for name, volume := range perfData {
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/dhcp/dhcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package dhcp

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -31,6 +30,7 @@ const Name = "dhcp"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector perflib DHCP metrics.
Expand Down Expand Up @@ -288,7 +288,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {

data, ok := perfData[perfdata.InstanceEmpty]
if !ok {
return errors.New("perflib query for DHCP Server returned empty result set")
return fmt.Errorf("failed to collect DHCP Server metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
7 changes: 7 additions & 0 deletions internal/collector/diskdrive/diskdrive.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const Name = "diskdrive"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for a few WMI metrics in Win32_DiskDrive.
Expand Down Expand Up @@ -119,6 +120,11 @@ func (c *Collector) Build(_ *slog.Logger, miSession *mi.Session) error {
nil,
)

var dst []diskDrive
if err := c.miSession.Query(&dst, mi.NamespaceRootCIMv2, c.miQuery); err != nil {
return fmt.Errorf("WMI query failed: %w", err)
}

return nil
}

Expand All @@ -133,6 +139,7 @@ type diskDrive struct {
Availability uint16 `mi:"Availability"`
}

//nolint:gochecknoglobals
var (
allDiskStatus = []string{
"OK",
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/dns/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package dns

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -31,6 +30,7 @@ const Name = "dns"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for WMI Win32_PerfRawData_DNS_DNS metrics.
Expand Down Expand Up @@ -284,7 +284,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {

data, ok := perfData[perfdata.InstanceEmpty]
if !ok {
return errors.New("perflib query for DNS returned empty result set")
return fmt.Errorf("failed to collect DNS metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
1 change: 1 addition & 0 deletions internal/collector/exchange/exchange.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type Config struct {
CollectorsEnabled []string `yaml:"collectors_enabled"`
}

//nolint:gochecknoglobals
var ConfigDefaults = Config{
CollectorsEnabled: []string{
adAccessProcesses,
Expand Down
3 changes: 1 addition & 2 deletions internal/collector/exchange/exchange_active_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package exchange

import (
"errors"
"fmt"

"github.com/prometheus-community/windows_exporter/internal/perfdata"
Expand Down Expand Up @@ -73,7 +72,7 @@ func (c *Collector) collectActiveSync(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for MSExchange ActiveSync returned empty result set")
return fmt.Errorf("failed to collect MSExchange ActiveSync metrics: %w", types.ErrNoData)
}

for _, data := range perfData {
Expand Down
Loading