Skip to content

Commit

Permalink
feat: support cgroup deep analysis in talosctl
Browse files Browse the repository at this point in the history
The new command `talosctl cgroups` fetches cgroups snapshot from the
machine, parses it fully, enhances with additional information (e.g.
resolves pod names), and presents a customizable view of cgroups
configuration (e.g. limits) and current consumption.

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira committed Sep 30, 2024
1 parent aa846cc commit 908fd87
Show file tree
Hide file tree
Showing 32 changed files with 3,083 additions and 1,122 deletions.
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -1083,10 +1083,10 @@ RUN protoc \
/protos/time/*.proto

FROM scratch AS docs
COPY --from=docs-build /tmp/configuration/ /website/content/v1.8/reference/configuration/
COPY --from=docs-build /tmp/cli.md /website/content/v1.8/reference/
COPY --from=docs-build /tmp/schemas /website/content/v1.8/schemas/
COPY --from=proto-docs-build /tmp/api.md /website/content/v1.8/reference/
COPY --from=docs-build /tmp/configuration/ /website/content/v1.9/reference/configuration/
COPY --from=docs-build /tmp/cli.md /website/content/v1.9/reference/
COPY --from=docs-build /tmp/schemas /website/content/v1.9/schemas/
COPY --from=proto-docs-build /tmp/api.md /website/content/v1.9/reference/

# The talosctl-cni-bundle builds the CNI bundle for talosctl.

Expand Down
2 changes: 2 additions & 0 deletions api/machine/machine.proto
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,8 @@ message ContainersRequest {
message ContainerInfo {
string namespace = 1;
string id = 2;
string uid = 10;
string internal_id = 9;
string image = 3;
uint32 pid = 4;
string status = 5;
Expand Down
254 changes: 254 additions & 0 deletions cmd/talosctl/cmd/talos/cgroups.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package talos

import (
"archive/tar"
"compress/gzip"
"context"
"fmt"
"os"
"path/filepath"
"slices"
"strconv"
"strings"
"text/tabwriter"

"github.com/siderolabs/gen/xslices"
"github.com/spf13/cobra"
"gopkg.in/yaml.v3"

"github.com/siderolabs/talos/cmd/talosctl/cmd/talos/cgroupsprinter"
"github.com/siderolabs/talos/cmd/talosctl/pkg/talos/helpers"
"github.com/siderolabs/talos/internal/pkg/cgroups"
"github.com/siderolabs/talos/pkg/cli"
"github.com/siderolabs/talos/pkg/machinery/api/common"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/constants"
)

var cgroupsCmdFlags struct {
schemaFile string
presetName string
}

// cgroupsCmd represents the cgroups command.
var cgroupsCmd = &cobra.Command{
Use: "cgroups",
Aliases: []string{"cg"},
Short: "Retrieve cgroups usage information",
Long: `The cgroups command fetches control group v2 (cgroupv2) usage details from the machine.
Several presets are available to focus on specific cgroup subsystems:
* cpu
* cpuset
* io
* memory
* process
* swap
You can specify the preset using the --preset flag.
Alternatively, a custom schema can be provided using the --schema-file flag.
To see schema examples, refer to https://github.com/siderolabs/talos/tree/main/cmd/talosctl/cmd/talos/cgroupsprinter/schemas.
`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
if err := helpers.FailIfMultiNodes(ctx, "cgroups"); err != nil {
return err
}

var schema cgroupsprinter.Schema

switch {
case cgroupsCmdFlags.schemaFile != "":
in, err := os.Open(cgroupsCmdFlags.schemaFile)
if err != nil {
return fmt.Errorf("error opening schema file: %w", err)
}

defer in.Close() //nolint:errcheck

if err = yaml.NewDecoder(in).Decode(&schema); err != nil {
return fmt.Errorf("error decoding schema file: %w", err)
}
case cgroupsCmdFlags.presetName != "":
presetNames := cgroupsprinter.GetPresetNames()

if slices.Index(presetNames, cgroupsCmdFlags.presetName) == -1 {
return fmt.Errorf("invalid preset name: %s (valid %v)", cgroupsCmdFlags.presetName, presetNames)
}

schema = cgroupsprinter.GetPreset(cgroupsCmdFlags.presetName)
default:
return fmt.Errorf("either schema file or preset must be specified")
}

if err := schema.Compile(); err != nil {
return fmt.Errorf("error compiling schema: %w", err)
}

cgroupNameResolveMap := buildCgroupResolveMap(ctx, c)
processResolveMap := buildProcessResolveMap(ctx, c)
devicesResolveMap := buildDevicesResolveMap(ctx, c)

r, err := c.Copy(ctx, constants.CgroupMountPath)
if err != nil {
return fmt.Errorf("error copying: %w", err)
}

defer r.Close() //nolint:errcheck

tree, err := cgroups.TreeFromTarGz(r)
if err != nil {
return fmt.Errorf("error reading cgroups: %w", err)
}

tree.ResolveNames(cgroupNameResolveMap)
tree.Walk(func(node *cgroups.Node) {
node.CgroupProcsResolved = xslices.Map(node.CgroupProcs, func(pid cgroups.Value) cgroups.RawValue {
if name, ok := processResolveMap[pid.String()]; ok {
return cgroups.RawValue(name)
}

return cgroups.RawValue(pid.String())
})

for dev := range node.IOStat {
if name, ok := devicesResolveMap[dev]; ok {
node.IOStat[name] = node.IOStat[dev]
delete(node.IOStat, dev)
}
}
})

w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)

defer w.Flush() //nolint:errcheck

headerLine := "NAME\t" + schema.HeaderLine() + "\n"

_, err = w.Write([]byte(headerLine))
if err != nil {
return fmt.Errorf("error writing header line: %w", err)
}

return cgroupsprinter.PrintNode(".", w, &schema, tree.Root, nil, 0, nil, false, true)
})
},
}

func completeCgroupPresetArg(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
return cgroupsprinter.GetPresetNames(), cobra.ShellCompDirectiveNoFileComp
}

func buildCgroupResolveMap(ctx context.Context, c *client.Client) map[string]string {
cgroupNameResolveMap := map[string]string{}

containersResp, err := c.Containers(ctx, constants.K8sContainerdNamespace, common.ContainerDriver_CRI)
if err != nil {
cli.Warning("error getting containers: %s", err)
} else {
for _, ctr := range containersResp.Messages[0].Containers {
if ctr.Uid != "" && ctr.PodId != "" {
cgroupNameResolveMap["pod"+ctr.Uid] = ctr.PodId
}

if ctr.InternalId != "" {
if ctr.PodId == ctr.Name {
cgroupNameResolveMap[ctr.InternalId] = "sandbox"
} else {
cgroupNameResolveMap[ctr.InternalId] = ctr.Name
}
}
}
}

return cgroupNameResolveMap
}

func buildProcessResolveMap(ctx context.Context, c *client.Client) map[string]string {
processResolveMap := map[string]string{}

processesResp, err := c.Processes(ctx)
if err != nil {
cli.Warning("error getting processes: %s", err)

return processResolveMap
}

for _, proc := range processesResp.Messages[0].Processes {
name := proc.Executable

if name == "" {
name = proc.Command
}

if name == "" {
args := strings.Fields(proc.Args)

if len(args) > 0 {
name = args[0]
}
}

name = filepath.Base(name)

processResolveMap[strconv.FormatInt(int64(proc.Pid), 10)] = name
}

return processResolveMap
}

func buildDevicesResolveMap(ctx context.Context, c *client.Client) map[string]string {
devicesResolveMap := map[string]string{}

r, err := c.Copy(ctx, "/sys/dev/block")
if err != nil {
cli.Warning("error copying devices: %s", err)

return devicesResolveMap
}

defer r.Close() //nolint:errcheck

gzR, err := gzip.NewReader(r)
if err != nil {
cli.Warning("error reading devices: %s", err)

return devicesResolveMap
}

defer gzR.Close() //nolint:errcheck

tarR := tar.NewReader(gzR)

for {
header, err := tarR.Next()
if err != nil {
break
}

if header.Typeflag != tar.TypeSymlink {
continue
}

devicesResolveMap[header.Name] = filepath.Base(header.Linkname)
}

return devicesResolveMap
}

func init() {
presetNames := cgroupsprinter.GetPresetNames()

cgroupsCmd.Flags().StringVar(&cgroupsCmdFlags.schemaFile, "schema-file", "", "path to the columns schema file")
cgroupsCmd.Flags().StringVar(&cgroupsCmdFlags.presetName, "preset", "", fmt.Sprintf("preset name (one of: %v)", presetNames))
cgroupsCmd.MarkFlagsMutuallyExclusive("schema-file", "preset")
cgroupsCmd.RegisterFlagCompletionFunc("preset", completeCgroupPresetArg) //nolint:errcheck

addCommand(cgroupsCmd)
}
58 changes: 58 additions & 0 deletions cmd/talosctl/cmd/talos/cgroupsprinter/presets.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package cgroupsprinter

import (
"embed"
"io/fs"
"path/filepath"
"slices"
"strings"

"github.com/siderolabs/gen/xslices"
"gopkg.in/yaml.v3"
)

//go:embed presets/*.yaml
var presetsFS embed.FS

// GetPresetNames returns the list of preset names.
func GetPresetNames() []string {
list, err := presetsFS.ReadDir("presets")
if err != nil {
panic(err) // should not fail
}

presets := xslices.Map(list, func(dirEntry fs.DirEntry) string {
// cut extension
return strings.TrimSuffix(dirEntry.Name(), filepath.Ext(dirEntry.Name()))
})

slices.Sort(presets)

return presets
}

// GetPreset returns the preset by name.
func GetPreset(name string) Schema {
f, err := presetsFS.Open(filepath.Join("presets", name+".yaml"))
if err != nil {
panic(err) // should not fail
}

defer f.Close() //nolint:errcheck

var schema Schema

if err := yaml.NewDecoder(f).Decode(&schema); err != nil {
panic(err) // should not fail
}

if err := schema.Compile(); err != nil {
panic(err) // should not fail
}

return schema
}
18 changes: 18 additions & 0 deletions cmd/talosctl/cmd/talos/cgroupsprinter/presets/cpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Basic CPU metrics
columns:
- name: CpuWeight
template: '{{ .CPUWeight | printf "%6s" }}'
- name: CpuNice
template: '{{ .CPUWeightNice | printf "%6s" }}'
- name: CpuMax
template: '{{ .CPUMax | printf "%6s" }}'
- name: CpuUser
template: '{{ .CPUStat.user_usec.UsecToDuration | printf "%12s" }}'
- name: User/%
template: '{{ if .Parent }}{{ .CPUStat.user_usec.DivideBy .Parent.CPUStat.user_usec | printf "%6s" }}%{{ else }}-{{ end }}'
- name: CpuSystem
template: '{{ .CPUStat.system_usec.UsecToDuration | printf "%12s" }}'
- name: System/%
template: '{{ if .Parent }}{{ .CPUStat.system_usec.DivideBy .Parent.CPUStat.system_usec | printf "%6s" }}%{{ else }}-{{ end }}'
- name: Throttled
template: '{{ .CPUStat.throttled_usec.UsecToDuration | printf "%12s" }}'
9 changes: 9 additions & 0 deletions cmd/talosctl/cmd/talos/cgroupsprinter/presets/cpuset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
columns:
- name: CpuSet
template: '{{ .CPUSetCPUs | printf "%12s" }}'
- name: CpuSet(Eff)
template: '{{ .CPUSetCPUsEffective | printf "%12s" }}'
- name: Mems
template: '{{ .CPUSetMems | printf "%12s" }}'
- name: Mems(Eff)
template: '{{ .CPUSetMemsEffective | printf "%12s" }}'
11 changes: 11 additions & 0 deletions cmd/talosctl/cmd/talos/cgroupsprinter/presets/io.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
columns:
- name: Bytes Read/Written
template: '{{ range $disk, $v := .IOStat }}{{ if $v }}{{ $disk }}: {{ $v.rbytes.HumanizeIBytes }}/{{ $v.wbytes.HumanizeIBytes }} {{ end }}{{ end }}'
- name: ios Read/Write
template: '{{ if .Parent }}{{ range $disk, $v := .IOStat }}{{ $disk }}: {{ $v.rios }}/{{ $v.wios }} {{ end }}{{ end }}'
- name: PressAvg10
template: '{{ .IOPressure.some.avg10 | printf "%6s" }}'
- name: PressAvg60
template: '{{ .IOPressure.some.avg60 | printf "%6s" }}'
- name: PressTotal
template: '{{ .IOPressure.some.total.UsecToDuration | printf "%12s" }}'
18 changes: 18 additions & 0 deletions cmd/talosctl/cmd/talos/cgroupsprinter/presets/memory.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Memory-related cgroup metrics
columns:
- name: MemCurrent
template: '{{ .MemoryCurrent.HumanizeIBytes | printf "%8s" }}'
- name: MemPeak
template: '{{ .MemoryPeak.HumanizeIBytes | printf "%8s" }}'
- name: MemLow
template: '{{ .MemoryLow.HumanizeIBytes | printf "%8s" }}'
- name: Peak/Low
template: '{{ .MemoryPeak.DivideBy .MemoryLow | printf "%6s%%" }}'
- name: MemHigh
template: '{{ .MemoryHigh.HumanizeIBytes | printf "%8s" }}'
- name: MemMin
template: '{{ .MemoryMin.HumanizeIBytes | printf "%8s" }}'
- name: Current/Min
template: '{{ .MemoryCurrent.DivideBy .MemoryMin | printf "%6s%%" }}'
- name: MemMax
template: '{{ .MemoryMax.HumanizeIBytes | printf "%8s" }}'
Loading

0 comments on commit 908fd87

Please sign in to comment.