diff --git a/CHANGELOG-3.5.md b/CHANGELOG-3.5.md index 0800b082785..3dea56d2e97 100644 --- a/CHANGELOG-3.5.md +++ b/CHANGELOG-3.5.md @@ -91,6 +91,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change. - Add [`etcd_server_client_requests_total` with `"type"` and `"client_api_version"` labels](https://github.com/etcd-io/etcd/pull/11687). - Add [`etcd_wal_write_bytes_total`](https://github.com/etcd-io/etcd/pull/11738). - Add [`etcd_debugging_auth_revision`](https://github.com/etcd-io/etcd/commit/f14d2a087f7b0fd6f7980b95b5e0b945109c95f3). +- Add [`os_fd_used` and `os_fd_limit` to monitor current OS file descriptors](https://github.com/etcd-io/etcd/pull/12214). ### etcd server @@ -130,12 +131,16 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change. - Add [`--unsafe-no-fsync`](https://github.com/etcd-io/etcd/pull/11946) flag. - Setting the flag disables all uses of fsync, which is unsafe and will cause data loss. This flag makes it possible to run an etcd node for testing and development without placing lots of load on the file system. - Add [etcd --auth-token-ttl](https://github.com/etcd-io/etcd/pull/11980) flag to customize `simpleTokenTTL` settings. -- Improve [runtime.FDUsage objects malloc of Memory Usage and CPU Usage](https://github.com/etcd-io/etcd/pull/11986). +- Improve [`runtime.FDUsage` call pattern to reduce objects malloc of Memory Usage and CPU Usage](https://github.com/etcd-io/etcd/pull/11986). - Improve [mvcc.watchResponse channel Memory Usage](https://github.com/etcd-io/etcd/pull/11987). - Log [expensive request info in UnaryInterceptor](https://github.com/etcd-io/etcd/pull/12086). - [Fix invalid Go type in etcdserverpb](https://github.com/etcd-io/etcd/pull/12000). - [Improve healthcheck by using v3 range request and its corresponding timeout](https://github.com/etcd-io/etcd/pull/12195). +### Package `runtime` + +- Optimize [`runtime.FDUsage` by removing unnecessary sorting](https://github.com/etcd-io/etcd/pull/12214). + ### Package `embed` - Remove [`embed.Config.Debug`](https://github.com/etcd-io/etcd/pull/10947). diff --git a/etcdserver/metrics.go b/etcdserver/metrics.go index 0c0ce912df4..417e05f21a6 100644 --- a/etcdserver/metrics.go +++ b/etcdserver/metrics.go @@ -151,6 +151,19 @@ var ( Help: "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.", }, []string{"server_id"}) + + fdUsed = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "os", + Subsystem: "fd", + Name: "used", + Help: "The number of used file descriptors.", + }) + fdLimit = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "os", + Subsystem: "fd", + Name: "limit", + Help: "The file descriptor limit.", + }) ) func init() { @@ -174,6 +187,8 @@ func init() { prometheus.MustRegister(isLearner) prometheus.MustRegister(learnerPromoteSucceed) prometheus.MustRegister(learnerPromoteFailed) + prometheus.MustRegister(fdUsed) + prometheus.MustRegister(fdLimit) currentVersion.With(prometheus.Labels{ "server_version": version.Version, @@ -184,7 +199,6 @@ func init() { } func monitorFileDescriptor(lg *zap.Logger, done <-chan struct{}) { - // This ticker will check File Descriptor Requirements ,and count all fds in used. // And recorded some logs when in used >= limit/5*4. Just recorded message. // If fds was more than 10K,It's low performance due to FDUsage() works. @@ -198,11 +212,13 @@ func monitorFileDescriptor(lg *zap.Logger, done <-chan struct{}) { lg.Warn("failed to get file descriptor usage", zap.Error(err)) return } + fdUsed.Set(float64(used)) limit, err := runtime.FDLimit() if err != nil { lg.Warn("failed to get file descriptor limit", zap.Error(err)) return } + fdLimit.Set(float64(limit)) if used >= limit/5*4 { lg.Warn("80% of file descriptors are used", zap.Uint64("used", used), zap.Uint64("limit", limit)) } diff --git a/pkg/runtime/fds_linux.go b/pkg/runtime/fds_linux.go index 8e9359db28c..4906d678ff4 100644 --- a/pkg/runtime/fds_linux.go +++ b/pkg/runtime/fds_linux.go @@ -16,7 +16,7 @@ package runtime import ( - "io/ioutil" + "os" "syscall" ) @@ -29,9 +29,20 @@ func FDLimit() (uint64, error) { } func FDUsage() (uint64, error) { - fds, err := ioutil.ReadDir("/proc/self/fd") + return countFiles("/proc/self/fd") +} + +// countFiles reads the directory named by dirname and returns the count. +// This is same as stdlib "io/ioutil.ReadDir" but without sorting. +func countFiles(dirname string) (uint64, error) { + f, err := os.Open(dirname) + if err != nil { + return 0, err + } + list, err := f.Readdir(-1) + f.Close() if err != nil { return 0, err } - return uint64(len(fds)), nil + return uint64(len(list)), nil }