Skip to content

Commit

Permalink
process_collector: fill in most statistics on macOS
Browse files Browse the repository at this point in the history
Unfortunately, the virtual memory, resident memory, and network stats will
require access to undocumented C functions.  I was warned off of cgo in IRC
because it would then have to be enabled in a bunch of different projects that
use this module, but I already was against it because that would break the
ability to cross-compile.  There is no interface to `dlopen` built into golang.
The `github.com/ebitengine/purego` module looks promising (I can cross-compile
and call these methods), but I'm currently getting unexpected results.  I'll
follow up with that separately if I can get it working, but hopefully this stuff
is pretty uncontroversial.

Tested on macOS 10.14.6 (amd64), macOS 14.6.1 (amd64), and macOS 15.0 (arm64)
by spawning `/usr/bin/ulimit -a -S` and `/usr/sbin/lsof -c $my_process` from
the test exporter process, and `ps -o lstart,vsize,rss,utime,stime,command` from
the shell, and comparing results with the exported metrics.

I can't find documentation for `RLIMIT_AS` on macOS (specifically if it's in
bytes or pages).  It's currently being reported back as `RLIM_INFINITY`, which
seems reasonable, because I've come across reports that the value is ignored
anyway[1].  The bash 3.2 code for the built-in `ulimit` divides the value
reported by `getrusage(2)` by 1024 when printing, as it does for `RLIMIT_DATA`,
which is documented as being bytes in `getrusage(2)`.  The help for `ulimit`
indicates it prints both in kbytes, so it's reasonable to assume this is already
in bytes.

[1] https://issues.chromium.org/issues/40581251#comment3

Signed-off-by: Matt Harbison <[email protected]>
  • Loading branch information
mharbison72 committed Aug 29, 2024
1 parent dbf72fc commit 5836830
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 2 deletions.
112 changes: 112 additions & 0 deletions prometheus/process_collector_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin

package prometheus

import (
"fmt"
"golang.org/x/sys/unix"
"os"
"syscall"
"time"
)

func canCollectProcess() bool {
return true
}

func getSoftLimit(which int) (uint64, error) {
rlimit := syscall.Rlimit{}

if err := syscall.Getrlimit(which, &rlimit); err != nil {
return 0, err
}

return rlimit.Cur, nil
}

func getOpenFileCount() (float64, error) {
// Alternately, the undocumented proc_pidinfo(PROC_PIDLISTFDS) can be used to
// return a list of open fds, but that requires a way to call C APIs. The
// benefits, however, include fewer system calls and not failing when at the
// open file soft limit.

if dir, err := os.Open("/dev/fd"); err != nil {
return 0.0, err
} else {
defer dir.Close()

// Avoid ReadDir(), as it calls stat(2) on each descriptor. Not only is
// that info not used, but KQUEUE descriptors fail stat(2), which causes
// the whole method to fail.
if names, err := dir.Readdirnames(0); err != nil {
return 0.0, err
} else {
// Subtract 1 to ignore the open /dev/fd descriptor above.
return float64(len(names) - 1), nil
}
}
}

func (c *processCollector) processCollect(ch chan<- Metric) {
if procs, err := unix.SysctlKinfoProcSlice("kern.proc.pid", os.Getpid()); err == nil {
if len(procs) == 1 {
startTime := float64(procs[0].Proc.P_starttime.Nano() / 1e9)
ch <- MustNewConstMetric(c.startTime, GaugeValue, startTime)
} else {
err = fmt.Errorf("sysctl() returned %d proc structs (expected 1)", len(procs))
c.reportError(ch, c.startTime, err)
}
} else {
c.reportError(ch, c.startTime, err)
}

// The proc structure returned by kern.proc.pid above has an Rusage member,
// but it is not filled in, so it needs to be fetched by getrusage(2). For
// that call, the UTime, STime, and Maxrss members are filled out, but not
// Ixrss, Idrss, or Isrss for the memory usage. Memory stats will require
// access to the C API to call task_info(TASK_BASIC_INFO).
rusage := unix.Rusage{}

if err := unix.Getrusage(syscall.RUSAGE_SELF, &rusage); err == nil {
cpuTime := time.Duration(rusage.Stime.Nano() + rusage.Utime.Nano()).Seconds()
ch <- MustNewConstMetric(c.cpuTotal, CounterValue, cpuTime)
} else {
c.reportError(ch, c.cpuTotal, err)
}

// TODO: publish c.vsize and c.rss values

if fds, err := getOpenFileCount(); err == nil {
ch <- MustNewConstMetric(c.openFDs, GaugeValue, fds)
} else {
c.reportError(ch, c.openFDs, err)
}

if openFiles, err := getSoftLimit(syscall.RLIMIT_NOFILE); err == nil {
ch <- MustNewConstMetric(c.maxFDs, GaugeValue, float64(openFiles))
} else {
c.reportError(ch, c.maxFDs, err)
}

if addressSpace, err := getSoftLimit(syscall.RLIMIT_AS); err == nil {
ch <- MustNewConstMetric(c.maxVsize, GaugeValue, float64(addressSpace))
} else {
c.reportError(ch, c.maxVsize, err)
}

// TODO: socket(PF_SYSTEM) to fetch "com.apple.network.statistics" might
// be able to get the per-process network send/receive counts.
}
4 changes: 2 additions & 2 deletions prometheus/process_collector_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows && !js && !wasip1
// +build !windows,!js,!wasip1
//go:build !windows && !js && !wasip1 && !darwin
// +build !windows,!js,!wasip1,!darwin

package prometheus

Expand Down

0 comments on commit 5836830

Please sign in to comment.