Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memory stats for cgroup-v2 #10286

Merged
merged 4 commits into from
Apr 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion command/alloc_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/posener/complete"
)
Expand Down Expand Up @@ -586,7 +587,13 @@ func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task str
cpuUsage = fmt.Sprintf("%v/%v", math.Floor(cs.TotalTicks), cpuUsage)
}
if ms := ru.ResourceUsage.MemoryStats; ms != nil {
memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(ms.RSS), memUsage)
// Nomad uses RSS as the top-level metric to report, for historical reasons,
// but it's not always measured (e.g. with cgroup-v2)
usage := ms.RSS
if usage == 0 && !helper.SliceStringContains(ms.Measured, "RSS") {
usage = ms.Usage
}
memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(usage), memUsage)
}
deviceStats = ru.ResourceUsage.DeviceStats
}
Expand Down
15 changes: 13 additions & 2 deletions drivers/docker/util/stats_posix.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,28 @@ import (

var (
DockerMeasuredCPUStats = []string{"Throttled Periods", "Throttled Time", "Percent"}
DockerMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage"}

// cgroup-v2 only exposes a subset of memory stats
DockerCgroupV1MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage"}
DockerCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In #9073 we have an issue where metrics that aren't being measured are still being emitted to the metrics endpoint. Does this help with that situation here or will this still result in empty metrics being emitted? See task_runner.go#L1309-L1336.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wouldn't address that issue. We can update task_runners.go to see if the metrics are measured.

)

func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage {
measuredMems := DockerCgroupV1MeasuredMemStats

// use a simple heuristic to check if cgroup-v2 is used.
// go-dockerclient doesn't distinguish between 0 and not-present value
if s.MemoryStats.Stats.Rss == 0 && s.MemoryStats.MaxUsage == 0 && s.MemoryStats.Usage != 0 {
measuredMems = DockerCgroupV2MeasuredMemStats
}

ms := &cstructs.MemoryStats{
RSS: s.MemoryStats.Stats.Rss,
Cache: s.MemoryStats.Stats.Cache,
Swap: s.MemoryStats.Stats.Swap,
Usage: s.MemoryStats.Usage,
MaxUsage: s.MemoryStats.MaxUsage,
Measured: DockerMeasuredMemStats,
Measured: measuredMems,
}

cs := &cstructs.CpuStats{
Expand Down
15 changes: 12 additions & 3 deletions drivers/shared/executor/executor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ const (
)

var (
// ExecutorCgroupMeasuredMemStats is the list of memory stats captured by the executor
ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
// ExecutorCgroupV1MeasuredMemStats is the list of memory stats captured by the executor with cgroup-v1
ExecutorCgroupV1MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage", "Kernel Usage", "Kernel Max Usage"}

// ExecutorCgroupV2MeasuredMemStats is the list of memory stats captured by the executor with cgroup-v2. cgroup-v2 exposes different memory stats and no longer reports rss or max usage.
ExecutorCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"}

// ExecutorCgroupMeasuredCpuStats is the list of CPU stats captures by the executor
ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"}
Expand Down Expand Up @@ -342,6 +345,12 @@ func (l *LibcontainerExecutor) Stats(ctx context.Context, interval time.Duration
func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) {
defer close(ch)
timer := time.NewTimer(0)

measuredMemStats := ExecutorCgroupV1MeasuredMemStats
if cgroups.IsCgroup2UnifiedMode() {
measuredMemStats = ExecutorCgroupV2MeasuredMemStats
}

for {
select {
case <-ctx.Done():
Expand Down Expand Up @@ -379,7 +388,7 @@ func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage,
MaxUsage: maxUsage,
KernelUsage: stats.MemoryStats.KernelUsage.Usage,
KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
Measured: ExecutorCgroupMeasuredMemStats,
Measured: measuredMemStats,
}

// CPU Related Stats
Expand Down