Skip to content

Commit

Permalink
libcontainer: add support for Intel RDT/CAT in runc
Browse files Browse the repository at this point in the history
This PR fixes issue opencontainers#433
opencontainers#433

About Intel RDT/CAT feature:
Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
Cache is the only resource that is supported in RDT.

This feature provides a way for the software to restrict cache allocation to a
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
The different subsets are identified by class of service (CLOS) and each CLOS
has a capacity bitmask (CBM).

For more information about Intel RDT/CAT can be found in the section 17.17
of Intel Software Developer Manual and the kernel document:
https://lkml.org/lkml/2016/7/12/747

About Intel RDT/CAT kernel interface:
Intel Cache Allocation Technology (CAT) is a sub-feature of Resource Director
Technology (RDT), which currently supports L3 cache resource allocation.

In Linux kernel, it is exposed via "resource control" filesystem, which is a
"cgroup-like" interface.

Intel RDT "resource control" filesystem hierarchy:
/sys/fs/rscctrl
|-- cpus
|-- info
|   |-- info
|   |-- l3
|       |-- domain_to_cache_id
|       |-- max_cbm_len
|       |-- max_closid
|-- schemas
|-- tasks
|-- <container_id>
    |-- cpus
    |-- schemas
    |-- tasks

The file `tasks` has all task ids belonging to the partition "container_id".
The task ids in the file will be added or removed among partitions. A task id
only stays in one directory at the same time.

The file `schemas` has allocation bitmasks/values for L3 cache on each socket,
which contains L3 cache id and capacity bitmask (CBM).
	Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.

The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
be set is less than the max bit. The max bits in the CBM is varied among
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
layout, the CBM in a "partition" should be a subset of the CBM in root. Kernel
will check if it is valid when writing. e.g., 0xfffff in root indicates the
max bits of CBM is 20 bits, which mapping to entire L3 cache capacity. Some
valid CBM values to set in a "partition": 0xf, 0xf0, 0x3ff, 0x1f00 and etc.

The file `cpus` has a cpu bitmask that specifies the CPUs that are bound to the
schemas. Any tasks scheduled on the cpus will use the schemas.

Comparing with cgroups, intelRdt has similar process management lifecycle and
interfaces in a container. But unlike cgroups' hierarchy, it has single level
filesystem layout. When intelRdt is joined, the statistics can be collected
from a container.

For more information about Intel RDT/CAT kernel interface:
https://lkml.org/lkml/2016/7/12/764

An example for runc:
There are two L3 caches in the two-socket machine, the default CBM is 0xfffff
and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache
id 0 and the whole L3 cache id 1 for the container:
"linux": {
	"resources": {
		"intelRdt": {
			"l3CacheSchema": "L3:0=ffff0;1=fffff",
			"L3CacheCpus": "00000000,00000000,00000000,00000000,00000000,00000000"
		}
	}
}

Signed-off-by: Xiaochen Shen <[email protected]>
  • Loading branch information
xiaochenshen committed Aug 10, 2016
1 parent f59eb69 commit 839d087
Show file tree
Hide file tree
Showing 11 changed files with 640 additions and 24 deletions.
7 changes: 7 additions & 0 deletions libcontainer/configs/cgroup_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,11 @@ type Resources struct {

// Set class identifier for container's network packets
NetClsClassid uint32 `json:"net_cls_classid"`

// Intel RDT: the schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
IntelRdtL3CacheSchema string `json:"intel_rdt_l3_cache_schema"`

// Intel RDT: the bitmask of the CPUs that are bound to the schema
IntelRdtL3CacheCpus string `json:"intel_rdt_l3_cache_cpus"`
}
54 changes: 43 additions & 11 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/syndtr/gocapability/capability"
Expand All @@ -35,6 +36,7 @@ type linuxContainer struct {
root string
config *configs.Config
cgroupManager cgroups.Manager
intelRdtManager intelrdt.Manager
initPath string
initArgs []string
initProcess parentProcess
Expand Down Expand Up @@ -62,6 +64,9 @@ type State struct {

// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
ExternalDescriptors []string `json:"external_descriptors,omitempty"`

// Intel RDT "resource control" filesystem path
IntelRdtPath string `json:"intel_rdt_path"`
}

// Container is a libcontainer container object.
Expand Down Expand Up @@ -156,6 +161,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
}
if c.intelRdtManager != nil {
if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
}
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
Expand All @@ -180,7 +190,18 @@ func (c *linuxContainer) Set(config configs.Config) error {
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
}
c.config = &config
return c.cgroupManager.Set(c.config)
if err := c.cgroupManager.Set(c.config); err != nil {
return err
}
fmt.Printf("SXC: linuxContainer.Set(): intelRdtManager.Set(): 1\n")
if c.intelRdtManager != nil {
if err := c.intelRdtManager.Set(c.config); err != nil {
fmt.Printf("SXC: linuxContainer.Set(): intelRdtManager.Set() err\n")
return err
}
}
fmt.Printf("SXC: linuxContainer.Set(): intelRdtManager.Set()\n")
return nil
}

func (c *linuxContainer) Start(process *Process) error {
Expand Down Expand Up @@ -346,16 +367,17 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
return nil, err
}
return &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
container: c,
process: p,
bootstrapData: data,
sharePidns: sharePidns,
rootDir: rootDir,
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p),
container: c,
process: p,
bootstrapData: data,
sharePidns: sharePidns,
rootDir: rootDir,
}, nil
}

Expand All @@ -371,10 +393,15 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
if err != nil {
return nil, err
}
intelRdtPath := ""
if c.intelRdtManager != nil {
intelRdtPath = c.intelRdtManager.GetPath()
}
// TODO: set on container for process management
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
intelRdtPath: intelRdtPath,
childPipe: childPipe,
parentPipe: parentPipe,
config: c.newInitConfig(p),
Expand Down Expand Up @@ -1190,6 +1217,10 @@ func (c *linuxContainer) currentState() (*State, error) {
startTime, _ = c.initProcess.startTime()
externalDescriptors = c.initProcess.externalDescriptors()
}
IntelRdtPath := ""
if c.intelRdtManager != nil {
IntelRdtPath = c.intelRdtManager.GetPath()
}
state := &State{
BaseState: BaseState{
ID: c.ID(),
Expand All @@ -1201,6 +1232,7 @@ func (c *linuxContainer) currentState() (*State, error) {
CgroupPaths: c.cgroupManager.GetPaths(),
NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: externalDescriptors,
IntelRdtPath: IntelRdtPath,
}
if pid > 0 {
for _, ns := range c.config.Namespaces {
Expand Down
31 changes: 31 additions & 0 deletions libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/utils"
)

Expand Down Expand Up @@ -89,6 +90,22 @@ func Cgroupfs(l *LinuxFactory) error {
return nil
}

// IntelRdtfs is an options func to configure a LinuxFactory to return
// containers that use the Intel RDT "resource control" filesystem to
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
func IntelRdtFs(l *LinuxFactory) error {
if intelrdt.IntelRdtIsEnabled() {
fmt.Printf("SXC: IntelRdtFs(): Intel RDT enabled\n")
l.NewIntelRdtManager = func(config *configs.Cgroup, path string) intelrdt.Manager {
return &intelrdt.IntelRdtManager{
Cgroups: config,
Path: path,
}
}
}
return nil
}

// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
Expand Down Expand Up @@ -156,6 +173,9 @@ type LinuxFactory struct {

// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager

// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
NewIntelRdtManager func(config *configs.Cgroup, path string) intelrdt.Manager
}

func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
Expand All @@ -177,6 +197,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
return nil, newGenericError(err, SystemError)
}
containerRoot := filepath.Join(l.Root, id)
fmt.Printf("SXC: LinuxFactory.Create(): containerRoot: %s\n", containerRoot)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
} else if !os.IsNotExist(err) {
Expand Down Expand Up @@ -207,7 +228,13 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
fmt.Printf("SXC: LinuxFactory.Create(): after l.NewIntelRdtManager()\n")
c.state = &stoppedState{c: c}
c.intelRdtManager = nil
if l.NewIntelRdtManager != nil {
c.intelRdtManager = l.NewIntelRdtManager(config.Cgroups, id)
}

return c, nil
}

Expand Down Expand Up @@ -241,6 +268,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
if err := c.refreshState(); err != nil {
return nil, err
}
c.intelRdtManager = nil
if l.NewIntelRdtManager != nil {
c.intelRdtManager = l.NewIntelRdtManager(state.Config.Cgroups, state.IntelRdtPath)
}
return c, nil
}

Expand Down
Loading

0 comments on commit 839d087

Please sign in to comment.