Skip to content
This repository has been archived by the owner on Feb 8, 2023. It is now read-only.

Commit

Permalink
Merge pull request #18 from Ace-Tang/update_2019_0107
Browse files Browse the repository at this point in the history
Update 2019 0107
  • Loading branch information
rudyfly authored Jan 7, 2019
2 parents 64c7495 + 333ed49 commit f8f73f6
Show file tree
Hide file tree
Showing 14 changed files with 146 additions and 27 deletions.
2 changes: 0 additions & 2 deletions MAINTAINERS
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
Michael Crosby <[email protected]> (@crosbymichael)
Rohit Jnagal <[email protected]> (@rjnagal)
Victor Marmol <[email protected]> (@vmarmol)
Mrunal Patel <[email protected]> (@mrunalp)
Daniel, Dao Quang Minh <[email protected]> (@dqminh)
Qiang Huang <[email protected]> (@hqhq)
Expand Down
17 changes: 15 additions & 2 deletions libcontainer/SPEC.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ service (CLOS) and each CLOS has a capacity bitmask (CBM).

Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
over memory bandwidth for the software. A user controls the resource by
indicating the percentage of maximum memory bandwidth.
indicating the percentage of maximum memory bandwidth or memory bandwidth limit
in MBps unit if MBA Software Controller is enabled.

It can be used to handle L3 cache and memory bandwidth resources allocation
for containers if hardware and kernel support Intel RDT CAT and MBA features.
Expand Down Expand Up @@ -237,7 +238,7 @@ set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.

Memory bandwidth schema:
It has allocation values for memory bandwidth on each socket, which contains
L3 cache id and memory bandwidth percentage.
L3 cache id and memory bandwidth.
```
Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
```
Expand All @@ -250,6 +251,18 @@ that is allocated is also dependent on the CPU model and can be looked up at
min_bw + N * bw_gran. Intermediate values are rounded to the next control
step available on the hardware.

If MBA Software Controller is enabled through mount option "-o mba_MBps"
mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
instead of "percentages". The kernel underneath would use a software feedback
mechanism or a "Software Controller" which reads the actual bandwidth using
MBM counters and adjust the memory bandwidth percentages to ensure:
"actual memory bandwidth < user specified memory bandwidth".

For example, on a two-socket machine, the schema line could be
"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
and 7000 MBps memory bandwidth limit on socket 1.

For more information about Intel RDT kernel interface:
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt

Expand Down
13 changes: 10 additions & 3 deletions libcontainer/cgroups/fs/kmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package fs

import (
"errors"
"fmt"
"io/ioutil"
"os"
Expand All @@ -17,7 +18,12 @@ import (
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"

func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// Ensure that kernel memory is available in this kernel build. If it
// isn't, we just ignore it because EnableKernelMemoryAccounting is
// automatically called for all memory limits.
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
return nil
}
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
Expand All @@ -34,8 +40,9 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
// We have specifically been asked to set a kmem limit. If the kernel
// doesn't support it we *must* error out.
return errors.New("kernel memory accounting not supported by this kernel")
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
Expand Down
6 changes: 5 additions & 1 deletion libcontainer/cgroups/fs/kmem_disabled.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

package fs

import (
"errors"
)

func EnableKernelMemoryAccounting(path string) error {
return nil
}

func setKernelMemory(path string, kernelMemoryLimit int64) error {
return nil
return errors.New("kernel memory accounting disabled in this runc build")
}
2 changes: 1 addition & 1 deletion libcontainer/cgroups/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ func WriteCgroupProc(dir string, pid int) error {
return fmt.Errorf("no such directory for %s", CgroupProcesses)
}

// Dont attach any pid to the cgroup if -1 is specified as a pid
// Don't attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
Expand Down
4 changes: 3 additions & 1 deletion libcontainer/configs/intelrdt.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ type IntelRdt struct {
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`

// The schema of memory bandwidth percentage per L3 cache id
// The schema of memory bandwidth per L3 cache id
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
// The unit of memory bandwidth is specified in "percentages" by
// default, and in "MBps" if MBA Software Controller is enabled.
MemBwSchema string `json:"memBwSchema,omitempty"`
}
14 changes: 11 additions & 3 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,10 +377,18 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
if all {
return signalAllProcesses(c.cgroupManager, s)
}
if err := c.initProcess.signal(s); err != nil {
return newSystemErrorWithCause(err, "signaling init process")
status, err := c.currentStatus()
if err != nil {
return err
}
return nil
// to avoid a PID reuse attack
if status == Running || status == Created || status == Paused {
if err := c.initProcess.signal(s); err != nil {
return newSystemErrorWithCause(err, "signaling init process")
}
return nil
}
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
}

func (c *linuxContainer) createExecFifo() error {
Expand Down
4 changes: 2 additions & 2 deletions libcontainer/container_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,15 @@ func TestGetContainerStats(t *testing.T) {
t.Fatal("intel rdt stats are nil")
}
if stats.IntelRdtStats.L3CacheSchema != "L3:0=f;1=f0" {
t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but recevied %s", stats.IntelRdtStats.L3CacheSchema)
t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but received %s", stats.IntelRdtStats.L3CacheSchema)
}
}
if intelrdt.IsMbaEnabled() {
if stats.IntelRdtStats == nil {
t.Fatal("intel rdt stats are nil")
}
if stats.IntelRdtStats.MemBwSchema != "MB:0=20;1=70" {
t.Fatalf("expected MemBwSchema MB:0=20;1=70 but recevied %s", stats.IntelRdtStats.MemBwSchema)
t.Fatalf("expected MemBwSchema MB:0=20;1=70 but received %s", stats.IntelRdtStats.MemBwSchema)
}
}
}
Expand Down
53 changes: 48 additions & 5 deletions libcontainer/intelrdt/intelrdt.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ import (
*
* Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
* over memory bandwidth for the software. A user controls the resource by
* indicating the percentage of maximum memory bandwidth.
* indicating the percentage of maximum memory bandwidth or memory bandwidth
* limit in MBps unit if MBA Software Controller is enabled.
*
* More details about Intel RDT CAT and MBA can be found in the section 17.18
* of Intel Software Developer Manual:
Expand Down Expand Up @@ -95,7 +96,7 @@ import (
*
* Memory bandwidth schema:
* It has allocation values for memory bandwidth on each socket, which contains
* L3 cache id and memory bandwidth percentage.
* L3 cache id and memory bandwidth.
* Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
* For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
*
Expand All @@ -106,6 +107,18 @@ import (
* min_bw + N * bw_gran. Intermediate values are rounded to the next control
* step available on the hardware.
*
* If MBA Software Controller is enabled through mount option "-o mba_MBps":
* mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
* We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
* instead of "percentages". The kernel underneath would use a software feedback
* mechanism or a "Software Controller" which reads the actual bandwidth using
* MBM counters and adjust the memory bandwidth percentages to ensure:
* "actual memory bandwidth < user specified memory bandwidth".
*
* For example, on a two-socket machine, the schema line could be
* "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
* and 7000 MBps memory bandwidth limit on socket 1.
*
* For more information about Intel RDT kernel interface:
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
*
Expand Down Expand Up @@ -165,6 +178,8 @@ var (
isCatEnabled bool
// The flag to indicate if Intel RDT/MBA is enabled
isMbaEnabled bool
// The flag to indicate if Intel RDT/MBA Software Controller is enabled
isMbaScEnabled bool
)

type intelRdtData struct {
Expand Down Expand Up @@ -197,7 +212,12 @@ func init() {
isCatEnabled = true
}
}
if isMbaFlagSet {
if isMbaScEnabled {
// We confirm MBA Software Controller is enabled in step 2,
// MBA should be enabled because MBA Software Controller
// depends on MBA
isMbaEnabled = true
} else if isMbaFlagSet {
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil {
isMbaEnabled = true
}
Expand Down Expand Up @@ -232,6 +252,11 @@ func findIntelRdtMountpointDir() (string, error) {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}

// Check if MBA Software Controller is enabled through mount option "-o mba_MBps"
if strings.Contains(postSeparatorFields[2], "mba_MBps") {
isMbaScEnabled = true
}

return fields[4], nil
}
}
Expand Down Expand Up @@ -461,7 +486,7 @@ func WriteIntelRdtTasks(dir string, pid int) error {
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
}

// Dont attach any pid if -1 is specified as a pid
// Don't attach any pid if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
Expand All @@ -480,6 +505,11 @@ func IsMbaEnabled() bool {
return isMbaEnabled
}

// Check if Intel RDT/MBA Software Controller is enabled
func IsMbaScEnabled() bool {
return isMbaScEnabled
}

// Get the 'container_id' path in Intel RDT "resource control" filesystem
func GetIntelRdtPath(id string) (string, error) {
rootPath, err := getIntelRdtRoot()
Expand Down Expand Up @@ -633,7 +663,7 @@ func (m *IntelRdtManager) Set(container *configs.Config) error {
//
// About memory bandwidth schema:
// It has allocation values for memory bandwidth on each socket, which
// contains L3 cache id and memory bandwidth percentage.
// contains L3 cache id and memory bandwidth.
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
// For example, on a two-socket machine, the schema line could be:
// "MB:0=20;1=70"
Expand All @@ -645,6 +675,19 @@ func (m *IntelRdtManager) Set(container *configs.Config) error {
// The available bandwidth control steps are: min_bw + N * bw_gran.
// Intermediate values are rounded to the next control step available
// on the hardware.
//
// If MBA Software Controller is enabled through mount option
// "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
// We could specify memory bandwidth in "MBps" (Mega Bytes per second)
// unit instead of "percentages". The kernel underneath would use a
// software feedback mechanism or a "Software Controller" which reads
// the actual bandwidth using MBM counters and adjust the memory
// bandwidth percentages to ensure:
// "actual memory bandwidth < user specified memory bandwidth".
//
// For example, on a two-socket machine, the schema line could be
// "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on
// socket 0 and 7000 MBps memory bandwidth limit on socket 1.
if container.IntelRdt != nil {
path := m.GetPath()
l3CacheSchema := container.IntelRdt.L3CacheSchema
Expand Down
38 changes: 38 additions & 0 deletions libcontainer/intelrdt/intelrdt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,41 @@ func TestIntelRdtSetMemBwSchema(t *testing.T) {
t.Fatal("Got the wrong value, set 'schemata' failed.")
}
}

func TestIntelRdtSetMemBwScSchema(t *testing.T) {
if !IsMbaScEnabled() {
return
}

helper := NewIntelRdtTestUtil(t)
defer helper.cleanup()

const (
memBwScSchemaBefore = "MB:0=5000;1=7000"
memBwScSchemeAfter = "MB:0=9000;1=4000"
)

helper.writeFileContents(map[string]string{
"schemata": memBwScSchemaBefore + "\n",
})

helper.IntelRdtData.config.IntelRdt.MemBwSchema = memBwScSchemeAfter
intelrdt := &IntelRdtManager{
Config: helper.IntelRdtData.config,
Path: helper.IntelRdtPath,
}
if err := intelrdt.Set(helper.IntelRdtData.config); err != nil {
t.Fatal(err)
}

tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata")
if err != nil {
t.Fatalf("Failed to parse file 'schemata' - %s", err)
}
values := strings.Split(tmpStrings, "\n")
value := values[0]

if value != memBwScSchemeAfter {
t.Fatal("Got the wrong value, set 'schemata' failed.")
}
}
2 changes: 1 addition & 1 deletion man/runc-pause.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ paused.

# DESCRIPTION
The pause command suspends all processes in the instance of the container.
Use runc list to identiy instances of containers and their current status.
Use runc list to identify instances of containers and their current status.
2 changes: 1 addition & 1 deletion man/runc-resume.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ resumed.

# DESCRIPTION
The resume command resumes all processes in the instance of the container.
Use runc list to identiy instances of containers and their current status.
Use runc list to identify instances of containers and their current status.
4 changes: 2 additions & 2 deletions pause.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Where "<container-id>" is the name for the instance of the container to be
paused. `,
Description: `The pause command suspends all processes in the instance of the container.
Use runc list to identiy instances of containers and their current status.`,
Use runc list to identify instances of containers and their current status.`,
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
Expand Down Expand Up @@ -45,7 +45,7 @@ Where "<container-id>" is the name for the instance of the container to be
resumed.`,
Description: `The resume command resumes all processes in the instance of the container.
Use runc list to identiy instances of containers and their current status.`,
Use runc list to identify instances of containers and their current status.`,
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
Expand Down
12 changes: 9 additions & 3 deletions script/check-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,10 @@ flags=(
NAMESPACES {NET,PID,IPC,UTS}_NS
CGROUPS CGROUP_CPUACCT CGROUP_DEVICE CGROUP_FREEZER CGROUP_SCHED CPUSETS MEMCG
KEYS
MACVLAN VETH BRIDGE BRIDGE_NETFILTER
VETH BRIDGE BRIDGE_NETFILTER
NF_NAT_IPV4 IP_NF_FILTER IP_NF_TARGET_MASQUERADE
NETFILTER_XT_MATCH_{ADDRTYPE,CONNTRACK}
NF_NAT NF_NAT_NEEDED
NETFILTER_XT_MATCH_{ADDRTYPE,CONNTRACK,IPVS}
IP_NF_NAT NF_NAT NF_NAT_NEEDED

# required for bind-mounting /dev/mqueue into containers
POSIX_MQUEUE
Expand Down Expand Up @@ -243,5 +243,11 @@ flags=(
CGROUP_HUGETLB
NET_CLS_CGROUP $netprio
CFS_BANDWIDTH FAIR_GROUP_SCHED RT_GROUP_SCHED
IP_NF_TARGET_REDIRECT
IP_VS
IP_VS_NFCT
IP_VS_PROTO_TCP
IP_VS_PROTO_UDP
IP_VS_RR
)
check_flags "${flags[@]}"

0 comments on commit f8f73f6

Please sign in to comment.