Skip to content

Commit

Permalink
status: export metrics about MemStats into timeseries
Browse files Browse the repository at this point in the history
This commit exposes 5 metrics into cockroachdb's RuntimeStatSampler timeseries.
The added metrics are MemStackSysBytes, HeapFragmentBytes, HeapReservedBytes,
HeapReleasedBytes, TotalAlloc. These metrics are derived from rumtime/metrics.

Fixes: cockroachdb#96717

Relase note: None
  • Loading branch information
lyang24 committed Mar 14, 2024
1 parent 8da64fe commit f86f758
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 3 deletions.
5 changes: 5 additions & 0 deletions docs/generated/metrics/metrics.html
Original file line number Diff line number Diff line change
Expand Up @@ -1580,6 +1580,10 @@
<tr><td>SERVER</td><td>sys.go.allocbytes</td><td>Current bytes of memory allocated by go</td><td>Memory</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.go.totalbytes</td><td>Total bytes of memory allocated by go, but not released</td><td>Memory</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.goroutines</td><td>Current number of goroutines</td><td>goroutines</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.heap.allocbytes</td><td>Cumulative bytes allocated for heap objects.</td><td>Memory</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.heap.heapfragmentbytes</td><td>Total heap fragmentation bytes, derived from bytes in in-use spans subtracts bytes allocated</td><td>Memory</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.heap.heapreleasedbytes</td><td>Total bytes returned to the OS from heap.</td><td>Memory</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.heap.heapreservedbytes</td><td>Total bytes reserved by heap, derived from bytes in idle (unused) spans subtracts bytes returned to the OS</td><td>Memory</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.host.disk.io.time</td><td>Time spent reading from or writing to all disks since this process started (as reported by the OS)</td><td>Time</td><td>GAUGE</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.host.disk.iopsinprogress</td><td>IO operations currently in progress on this host (as reported by the OS)</td><td>Operations</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.host.disk.read.bytes</td><td>Bytes read from all disks since this process started (as reported by the OS)</td><td>Bytes</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
Expand All @@ -1599,6 +1603,7 @@
<tr><td>SERVER</td><td>sys.host.net.send.packets</td><td>Packets sent on all network interfaces since this process started (as reported by the OS)</td><td>Packets</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.rss</td><td>Current process RSS</td><td>RSS</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.runnable.goroutines.per.cpu</td><td>Average number of goroutines that are waiting to run, normalized by number of cores</td><td>goroutines</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.stack.systembytes</td><td>Stack memories obtained from the OS.</td><td>Memory</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.totalmem</td><td>Total memory (both free and used)</td><td>Memory</td><td>GAUGE</td><td>BYTES</td><td>AVG</td><td>NONE</td></tr>
<tr><td>SERVER</td><td>sys.uptime</td><td>Process uptime</td><td>Uptime</td><td>GAUGE</td><td>SECONDS</td><td>AVG</td><td>NONE</td></tr>
</tbody>
Expand Down
57 changes: 54 additions & 3 deletions pkg/server/status/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,36 @@ var (
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaMemStackSysBytes = metric.Metadata{
Name: "sys.stack.systembytes",
Help: "Stack memories obtained from the OS.",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaHeapFragmentBytes = metric.Metadata{
Name: "sys.heap.heapfragmentbytes",
Help: "Total heap fragmentation bytes, derived from bytes in in-use spans subtracts bytes allocated",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaHeapReservedBytes = metric.Metadata{
Name: "sys.heap.heapreservedbytes",
Help: "Total bytes reserved by heap, derived from bytes in idle (unused) spans subtracts bytes returned to the OS",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaHeapReleasedBytes = metric.Metadata{
Name: "sys.heap.heapreleasedbytes",
Help: "Total bytes returned to the OS from heap.",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaTotalAlloc = metric.Metadata{
Name: "sys.heap.allocbytes",
Help: "Cumulative bytes allocated for heap objects.",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaFDOpen = metric.Metadata{
Name: "sys.fd.open",
Help: "Process open file descriptors",
Expand Down Expand Up @@ -309,6 +339,10 @@ const runtimeMetricGCAssist = "/cpu/classes/gc/mark/assist:cpu-seconds"
// yet been marked free by the garbage collector.
const runtimeMetricHeapAlloc = "/memory/classes/heap/objects:bytes"

// Cumulative sum of memory allocated to the heap by the
// application.
const runtimeMetricCumulativeAlloc = "/gc/heap/allocs:bytes"

// Memory that is reserved for heap objects but is not currently
// used to hold heap objects.
const runtimeMetricHeapFragmentBytes = "/memory/classes/heap/unused:bytes"
Expand Down Expand Up @@ -356,6 +390,7 @@ var runtimeMetrics = []string{
runtimeMetricMemStackHeapBytes,
runtimeMetricMemStackOSBytes,
runtimeMetricGoTotal,
runtimeMetricCumulativeAlloc,
}

// GoRuntimeSampler are a collection of metrics to sample from golang's runtime environment and
Expand Down Expand Up @@ -495,8 +530,13 @@ type RuntimeStatSampler struct {
// CPU stats for the CRDB process usage.
HostCPUCombinedPercentNorm *metric.GaugeFloat64
// Memory stats.
RSSBytes *metric.Gauge
TotalMemBytes *metric.Gauge
RSSBytes *metric.Gauge
TotalMemBytes *metric.Gauge
MemStackSysBytes *metric.Gauge
HeapFragmentBytes *metric.Gauge
HeapReservedBytes *metric.Gauge
HeapReleasedBytes *metric.Gauge
TotalAlloc *metric.Gauge
// File descriptor stats.
FDOpen *metric.Gauge
FDSoftLimit *metric.Gauge
Expand Down Expand Up @@ -585,6 +625,11 @@ func NewRuntimeStatSampler(ctx context.Context, clock hlc.WallClock) *RuntimeSta

RSSBytes: metric.NewGauge(metaRSSBytes),
TotalMemBytes: metric.NewGauge(metaTotalMemBytes),
MemStackSysBytes: metric.NewGauge(metaMemStackSysBytes),
HeapFragmentBytes: metric.NewGauge(metaHeapFragmentBytes),
HeapReservedBytes: metric.NewGauge(metaHeapReservedBytes),
HeapReleasedBytes: metric.NewGauge(metaHeapReleasedBytes),
TotalAlloc: metric.NewGauge(metaTotalAlloc),
HostDiskReadBytes: metric.NewGauge(metaHostDiskReadBytes),
HostDiskReadCount: metric.NewGauge(metaHostDiskReadCount),
HostDiskReadTime: metric.NewGauge(metaHostDiskReadTime),
Expand Down Expand Up @@ -768,12 +813,13 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, cs *CGoMem
rsr.last.runnableSum = runnableSum

// Log summary of statistics to console.
osStackBytes := rsr.goRuntimeSampler.uint64(runtimeMetricMemStackOSBytes)
cgoRate := float64((numCgoCall-rsr.last.cgoCall)*int64(time.Second)) / dur
goAlloc := rsr.goRuntimeSampler.uint64(runtimeMetricHeapAlloc)
goTotal := rsr.goRuntimeSampler.uint64(runtimeMetricGoTotal) -
rsr.goRuntimeSampler.uint64(runtimeMetricHeapReleasedBytes)
stackTotal := rsr.goRuntimeSampler.uint64(runtimeMetricMemStackHeapBytes) +
rsr.goRuntimeSampler.uint64(runtimeMetricMemStackOSBytes)
osStackBytes
stats := &eventpb.RuntimeStats{
MemRSSBytes: mem.Resident,
GoroutineCount: uint64(numGoroutine),
Expand Down Expand Up @@ -824,6 +870,11 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, cs *CGoMem
rsr.RSSBytes.Update(int64(mem.Resident))
totalMem, _, _ := GetTotalMemoryWithoutLogging()
rsr.TotalMemBytes.Update(totalMem)
rsr.MemStackSysBytes.Update(int64(osStackBytes))
rsr.HeapFragmentBytes.Update(int64(rsr.goRuntimeSampler.uint64(runtimeMetricHeapFragmentBytes)))
rsr.HeapReservedBytes.Update(int64(rsr.goRuntimeSampler.uint64(runtimeMetricHeapReservedBytes)))
rsr.HeapReleasedBytes.Update(int64(rsr.goRuntimeSampler.uint64(runtimeMetricHeapReleasedBytes)))
rsr.TotalAlloc.Update(int64(rsr.goRuntimeSampler.uint64(runtimeMetricCumulativeAlloc)))
rsr.Uptime.Update((now - rsr.startTimeNanos) / 1e9)
}

Expand Down

0 comments on commit f86f758

Please sign in to comment.