Skip to content

Commit

Permalink
Cherry-pick #7986 to 6.x: Better tracking of number of open file desc…
Browse files Browse the repository at this point in the history
…riptors of Filebeat (#8514)

* Better tracking of number of open file descriptors of Filebeat (#7986)

New metrics are introduced to better track the number of open file descriptors.

In the initial issue number of open file descriptors were requested by input. Reporting the number of open files by harvesters is already implemented. It's reported as filebeat.harvester.open_files.

I included process level file descriptor information reporting for each Beat which runs on Linux.

New metrics

    beat.fd.open: Number of open files by a Beat process.
    It's the number of files under /proc/{{ filebeat-pid }}/fd. Only implemented on Linux.
    beat.fd.limit.soft: Soft limit of the Beat process.
    Could be used to notify a user if the process is reaching the limit (in the Monitoring UI).
    beat.fd.limit.hard: Hard limit of the Beat process.
    It is the max limit that can be set on a host without modifying kernel params.
(cherry picked from commit f10096a)
  • Loading branch information
kvch authored Oct 5, 2018
1 parent 060422b commit 174de51
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ https://github.com/elastic/beats/compare/v6.4.0...6.x[Check the HEAD diff]
- Add DNS processor with support for performing reverse lookups on IP addresses. {issue}7770[7770]
- Support for Kafka 2.0.0 in kafka output {pull}8399[8399]
- Add setting `setup.kibana.space.id` to support Kibana Spaces {pull}7942[7942]
- Better tracking of number of open file descriptors. {pull}7986[7986]

*Auditbeat*

Expand Down
104 changes: 87 additions & 17 deletions libbeat/cmd/instance/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"fmt"
"runtime"

"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/libbeat/logp"
"github.com/elastic/beats/libbeat/metric/system/cpu"
"github.com/elastic/beats/libbeat/metric/system/process"
Expand All @@ -43,9 +44,8 @@ func setupMetrics(name string) error {
monitoring.NewFunc(beatMetrics, "cpu", reportBeatCPU, monitoring.Report)

monitoring.NewFunc(systemMetrics, "cpu", reportSystemCPUUsage, monitoring.Report)
if runtime.GOOS != "windows" {
monitoring.NewFunc(systemMetrics, "load", reportSystemLoadAverage, monitoring.Report)
}

setupPlatformSpecificMetrics()

beatProcessStats = &process.Stats{
Procs: []string{name},
Expand All @@ -59,6 +59,16 @@ func setupMetrics(name string) error {
return err
}

func setupPlatformSpecificMetrics() {
if runtime.GOOS != "windows" {
monitoring.NewFunc(systemMetrics, "load", reportSystemLoadAverage, monitoring.Report)
}

if runtime.GOOS == "linux" {
monitoring.NewFunc(beatMetrics, "fd", reportFDUsage, monitoring.Report)
}
}

func reportMemStats(m monitoring.Mode, V monitoring.Visitor) {
var stats runtime.MemStats
runtime.ReadMemStats(&stats)
Expand All @@ -81,14 +91,9 @@ func reportMemStats(m monitoring.Mode, V monitoring.Visitor) {
}

func getRSSSize() (uint64, error) {
pid, err := process.GetSelfPid()
if err != nil {
return 0, fmt.Errorf("error getting PID for self process: %v", err)
}

state, err := beatProcessStats.GetOne(pid)
state, err := getBeatProcessState()
if err != nil {
return 0, fmt.Errorf("error retrieving process stats: %v", err)
return 0, err
}

iRss, err := state.GetValue("memory.rss.bytes")
Expand All @@ -103,6 +108,20 @@ func getRSSSize() (uint64, error) {
return rss, nil
}

func getBeatProcessState() (common.MapStr, error) {
pid, err := process.GetSelfPid()
if err != nil {
return nil, fmt.Errorf("error getting PID for self process: %v", err)
}

state, err := beatProcessStats.GetOne(pid)
if err != nil {
return nil, fmt.Errorf("error retrieving process stats: %v", err)
}

return state, nil
}

func reportBeatCPU(_ monitoring.Mode, V monitoring.Visitor) {
V.OnRegistryStart()
defer V.OnRegistryFinished()
Expand Down Expand Up @@ -141,14 +160,9 @@ func reportBeatCPU(_ monitoring.Mode, V monitoring.Visitor) {
}

func getCPUUsage() (float64, *process.Ticks, error) {
pid, err := process.GetSelfPid()
if err != nil {
return 0.0, nil, fmt.Errorf("error getting PID for self process: %v", err)
}

state, err := beatProcessStats.GetOne(pid)
state, err := getBeatProcessState()
if err != nil {
return 0.0, nil, fmt.Errorf("error retrieving process stats: %v", err)
return 0.0, nil, err
}

iTotalCPUUsage, err := state.GetValue("cpu.total.value")
Expand Down Expand Up @@ -200,6 +214,62 @@ func getCPUUsage() (float64, *process.Ticks, error) {
return totalCPUUsage, &p, nil
}

func reportFDUsage(_ monitoring.Mode, V monitoring.Visitor) {
V.OnRegistryStart()
defer V.OnRegistryFinished()

open, hardLimit, softLimit, err := getFDUsage()
if err != nil {
logp.Err("Error while retrieving FD information: %v", err)
return
}

monitoring.ReportInt(V, "open", int64(open))
monitoring.ReportNamespace(V, "limit", func() {
monitoring.ReportInt(V, "hard", int64(hardLimit))
monitoring.ReportInt(V, "soft", int64(softLimit))
})
}

func getFDUsage() (open, hardLimit, softLimit uint64, err error) {
state, err := getBeatProcessState()
if err != nil {
return 0, 0, 0, err
}

iOpen, err := state.GetValue("fd.open")
if err != nil {
return 0, 0, 0, fmt.Errorf("error getting number of open FD: %v", err)
}

open, ok := iOpen.(uint64)
if !ok {
return 0, 0, 0, fmt.Errorf("error converting value of open FDs to uint64: %v", iOpen)
}

iHardLimit, err := state.GetValue("fd.limit.hard")
if err != nil {
return 0, 0, 0, fmt.Errorf("error getting FD hard limit: %v", err)
}

hardLimit, ok = iHardLimit.(uint64)
if !ok {
return 0, 0, 0, fmt.Errorf("error converting values of FD hard limit: %v", iHardLimit)
}

iSoftLimit, err := state.GetValue("fd.limit.soft")
if err != nil {
return 0, 0, 0, fmt.Errorf("error getting FD hard limit: %v", err)
}

softLimit, ok = iSoftLimit.(uint64)
if !ok {
return 0, 0, 0, fmt.Errorf("error converting values of FD hard limit: %v", iSoftLimit)
}

return open, hardLimit, softLimit, nil
}

func reportSystemLoadAverage(_ monitoring.Mode, V monitoring.Visitor) {
V.OnRegistryStart()
defer V.OnRegistryFinished()
Expand Down
3 changes: 3 additions & 0 deletions libbeat/monitoring/report/log/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ var gauges = map[string]bool{
"beat.cpu.total.value": true,
"beat.cpu.total.ticks": true,
"beat.cpu.total.time": true,
"beat.fd.open": true,
"beat.fd.limit.hard": true,
"beat.fd.limit.soft": true,
"system.load.1": true,
"system.load.5": true,
"system.load.15": true,
Expand Down

0 comments on commit 174de51

Please sign in to comment.