From 8ddaaf3f56b177713340452c6062f250fe66a6d5 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sun, 13 Dec 2015 00:01:09 +0800 Subject: [PATCH] libcontainer: cgroups: add intel_rdt support in runc This PR fixes issue #433 https://github.com/opencontainers/runc/issues/433 About Intel RDT/CAT feature: Intel platforms with new Xeon CPU support Resource Director Technology (RDT). Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3 Cache is the only resource that is supported in RDT. This feature provides a way for the software to restrict cache allocation to a defined 'subset' of L3 cache which may be overlapping with other 'subsets'. The different subsets are identified by class of service (CLOS) and each CLOS has a capacity bitmask (CBM). More information can be found in the section 17.16 of Intel Software Developer Manual. About intel_rdt cgroup: Linux kernel 4.6 (or later) will introduce new cgroup subsystem 'intel_rdt' with kernel config CONFIG_INTEL_RDT. The 'intel_rdt' cgroup manages L3 cache allocation. It has a file 'l3_cbm' which represents the L3 cache capacity bitmask (CBM). The CBM needs to have only *contiguous bits set* and number of bits that can be set is less than the max bits. The max bits in the CBM is varied among supported Intel platforms. The tasks belonging to a cgroup get to fill in the L3 cache represented by the CBM. For example, if the max bits in the CBM is 10 and the L3 cache size is 10MB, each bit represents 1MB of the L3 cache capacity. Root cgroup always has all the bits set in the l3_cbm. User can create more cgroups with mkdir syscall. By default the child cgroups inherit the CBM from parent. User can change the CBM specified in hex for each cgroup. For more information about intel_rdt cgroup: https://lkml.org/lkml/2015/12/17/574 An example: Root cgroup: intel_rdt.l3_cbm == 0xfffff, the max bits of CBM is 20 L3 cache size: 55 MB This assigns 11 MB (1/5) of L3 cache to the child group: $ /bin/echo 0xf > intel_rdt.l3_cbm Signed-off-by: Xiaochen Shen --- libcontainer/SPEC.md | 1 + libcontainer/cgroups/fs/apply_raw.go | 1 + libcontainer/cgroups/fs/intel_rdt.go | 75 +++++++++++++++++++ libcontainer/cgroups/fs/intel_rdt_test.go | 62 +++++++++++++++ libcontainer/cgroups/fs/utils.go | 16 ++++ libcontainer/cgroups/stats.go | 7 +- libcontainer/cgroups/systemd/apply_systemd.go | 29 ++++++- libcontainer/configs/cgroup_unix.go | 3 + spec.go | 2 + 9 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 libcontainer/cgroups/fs/intel_rdt.go create mode 100644 libcontainer/cgroups/fs/intel_rdt_test.go diff --git a/libcontainer/SPEC.md b/libcontainer/SPEC.md index 221545c01dc..9bf1e8d59e3 100644 --- a/libcontainer/SPEC.md +++ b/libcontainer/SPEC.md @@ -143,6 +143,7 @@ system resources like cpu, memory, and device access. | freezer | 1 | | hugetlb | 1 | | pids | 1 | +| intel_rdt | 1 | All cgroup subsystem are joined so that statistics can be collected from diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go index 21646e5710f..a08848af424 100644 --- a/libcontainer/cgroups/fs/apply_raw.go +++ b/libcontainer/cgroups/fs/apply_raw.go @@ -30,6 +30,7 @@ var ( &NetPrioGroup{}, &PerfEventGroup{}, &FreezerGroup{}, + &IntelRdtGroup{}, } CgroupProcesses = "cgroup.procs" HugePageSizes, _ = cgroups.GetHugePageSize() diff --git a/libcontainer/cgroups/fs/intel_rdt.go b/libcontainer/cgroups/fs/intel_rdt.go new file mode 100644 index 00000000000..7c59fb5acfa --- /dev/null +++ b/libcontainer/cgroups/fs/intel_rdt.go @@ -0,0 +1,75 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type IntelRdtGroup struct { +} + +func (s *IntelRdtGroup) Name() string { + return "intel_rdt" +} + +func (s *IntelRdtGroup) Apply(d *cgroupData) error { + dir, err := d.join("intel_rdt") + if err != nil { + if !cgroups.IsNotFound(err) { + return err + } + // We will not return err here when: + // 1. The h/w platform doesn't support Intel RDT/CAT feature, + // intel_rdt cgroup is not enabled in kernel. + // 2. intel_rdt cgroup is not mounted + return nil + } + + if err := s.Set(dir, d.config); err != nil { + return err + } + + return nil +} + +func (s *IntelRdtGroup) Set(path string, cgroup *configs.Cgroup) error { + // The valid CBM (capacity bitmask) is a *contiguous bits set* and + // number of bits that can be set is less than the max bit. The max + // bits in the CBM is varied among supported Intel platforms. + // + // By default the child cgroups inherit the CBM from parent. The CBM + // in a child cgroup should be a subset of the CBM in parent. Kernel + // will check if it is valid when writing. + // + // e.g., 0xfffff in root cgroup indicates the max bits of CBM is 20 + // bits, which mapping to entire L3 cache capacity. Some valid CBM + // values to Set in children cgroup: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + if cgroup.Resources.IntelRdtL3Cbm != 0 { + l3CbmStr := fmt.Sprintf("0x%s", strconv.FormatUint(cgroup.Resources.IntelRdtL3Cbm, 16)) + if err := writeFile(path, "intel_rdt.l3_cbm", l3CbmStr); err != nil { + return err + } + } + + return nil +} + +func (s *IntelRdtGroup) Remove(d *cgroupData) error { + return removePath(d.path("intel_rdt")) +} + +func (s *IntelRdtGroup) GetStats(path string, stats *cgroups.Stats) error { + value, err := getCgroupParamUintHex(path, "intel_rdt.l3_cbm") + if err != nil { + return fmt.Errorf("failed to parse intel_rdt.l3_cbm - %s", err) + } + + stats.IntelRdtStats.L3Cbm = value + + return nil +} diff --git a/libcontainer/cgroups/fs/intel_rdt_test.go b/libcontainer/cgroups/fs/intel_rdt_test.go new file mode 100644 index 00000000000..748750a7e91 --- /dev/null +++ b/libcontainer/cgroups/fs/intel_rdt_test.go @@ -0,0 +1,62 @@ +// +build linux + +package fs + +import ( + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" +) + +func TestIntelRdtSetL3Cbm(t *testing.T) { + helper := NewCgroupTestUtil("intel_rdt", t) + defer helper.cleanup() + + const ( + l3CbmBefore = 0xf + l3CbmAfter = 0xf0 + ) + + helper.writeFileContents(map[string]string{ + "intel_rdt.l3_cbm": strconv.FormatUint(l3CbmBefore, 16), + }) + + helper.CgroupData.config.Resources.IntelRdtL3Cbm = l3CbmAfter + intelrdt := &IntelRdtGroup{} + if err := intelrdt.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUintHex(helper.CgroupPath, "intel_rdt.l3_cbm") + if err != nil { + t.Fatalf("Failed to parse intel_rdt.l3_cbm - %s", err) + } + + if value != l3CbmAfter { + t.Fatal("Got the wrong value, set intel_rdt.l3_cbm failed.") + } +} + +func TestIntelRdtStats(t *testing.T) { + helper := NewCgroupTestUtil("intel_rdt", t) + defer helper.cleanup() + + const ( + l3CbmContents = 0x1f00 + ) + + helper.writeFileContents(map[string]string{ + "intel_rdt.l3_cbm": strconv.FormatUint(l3CbmContents, 16), + }) + + intelrdt := &IntelRdtGroup{} + stats := *cgroups.NewStats() + if err := intelrdt.GetStats(helper.CgroupPath, &stats); err != nil { + t.Fatal(err) + } + + if stats.IntelRdtStats.L3Cbm != l3CbmContents { + t.Fatalf("Expected '0x%x', got '0x%x' for intel_rdt.l3_cbm", l3CbmContents, stats.IntelRdtStats.L3Cbm) + } +} diff --git a/libcontainer/cgroups/fs/utils.go b/libcontainer/cgroups/fs/utils.go index 852b18391d0..04b8def0d49 100644 --- a/libcontainer/cgroups/fs/utils.go +++ b/libcontainer/cgroups/fs/utils.go @@ -68,6 +68,22 @@ func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { return res, nil } +// Gets a single hex uint64 value from the specified cgroup file. +func getCgroupParamUintHex(cgroupPath, cgroupFile string) (uint64, error) { + fileName := filepath.Join(cgroupPath, cgroupFile) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + + hexStr := strings.TrimSpace(strings.TrimPrefix(string(contents), "0x")) + res, err := parseUint(hexStr, 16, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName) + } + return res, nil +} + // Gets a string value from the specified cgroup file func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) { contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) diff --git a/libcontainer/cgroups/stats.go b/libcontainer/cgroups/stats.go index 54ace4185dd..e60b663d040 100644 --- a/libcontainer/cgroups/stats.go +++ b/libcontainer/cgroups/stats.go @@ -84,13 +84,18 @@ type HugetlbStats struct { Failcnt uint64 `json:"failcnt"` } +type IntelRdtStats struct { + L3Cbm uint64 `json:"l3_cbm,omitempty"` +} + type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` MemoryStats MemoryStats `json:"memory_stats,omitempty"` PidsStats PidsStats `json:"pids_stats,omitempty"` BlkioStats BlkioStats `json:"blkio_stats,omitempty"` // the map is in the format "size of hugepage: stats of the hugepage" - HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` + IntelRdtStats IntelRdtStats `json:"intel_rdt_stats,omitempty"` } func NewStats() *Stats { diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index 3161639f218..8ac0543c9d0 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -63,6 +63,7 @@ var subsystems = subsystemSet{ &fs.NetPrioGroup{}, &fs.NetClsGroup{}, &fs.NameGroup{GroupName: "name=systemd"}, + &fs.IntelRdtGroup{}, } const ( @@ -251,7 +252,7 @@ func (m *Manager) Apply(pid int) error { return err } - // we need to manually join the freezer, net_cls, net_prio, pids and cpuset cgroup in systemd + // we need to manually join the freezer, net_cls, net_prio cpuset and intel_rdt cgroup in systemd // because it does not currently support it via the dbus api. if err := joinFreezer(c, pid); err != nil { return err @@ -279,6 +280,11 @@ func (m *Manager) Apply(pid int) error { if err := joinPerfEvent(c, pid); err != nil { return err } + + if err := joinIntelRdt(c, pid); err != nil { + return err + } + // FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem // using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354), // so use fs work around for now. @@ -603,3 +609,24 @@ func joinPerfEvent(c *configs.Cgroup, pid int) error { } return nil } + +func joinIntelRdt(c *configs.Cgroup, pid int) error { + path, err := join(c, "intel_rdt", pid) + if err != nil { + if !cgroups.IsNotFound(err) { + return err + } + // We will not return err here when: + // 1. The h/w platform doesn't support Intel RDT/CAT feature, + // intel_rdt cgroup is not enabled in kernel. + // 2. intel_rdt cgroup is not mounted + return nil + } + + IntelRdt, err := subsystems.Get("intel_rdt") + if err != nil { + return err + } + + return IntelRdt.Set(path, c) +} diff --git a/libcontainer/configs/cgroup_unix.go b/libcontainer/configs/cgroup_unix.go index 40a033f35bc..acd21fa4d79 100644 --- a/libcontainer/configs/cgroup_unix.go +++ b/libcontainer/configs/cgroup_unix.go @@ -118,4 +118,7 @@ type Resources struct { // Set class identifier for container's network packets NetClsClassid string `json:"net_cls_classid"` + + // L3 cache capacity bitmask (CBM) for container + IntelRdtL3Cbm uint64 `json:"intel_rdt_l3_cbm"` } diff --git a/spec.go b/spec.go index 2836e7c8582..893102c8972 100644 --- a/spec.go +++ b/spec.go @@ -480,6 +480,8 @@ func createCgroupConfig(name string, spec *specs.LinuxSpec) (*configs.Cgroup, er }) } } + c.Resources.IntelRdtL3Cbm = *r.IntelRdt.L3Cbm + return c, nil }