From 38580b167a3613b2fd74712dd27414ab4428c604 Mon Sep 17 00:00:00 2001 From: Kailun Qin Date: Thu, 9 Sep 2021 18:29:31 -0400 Subject: [PATCH] libct/cg: add CFS bandwidth burst for CPU Burstable CFS controller is introduced in Linux 5.14. This helps with parallel workloads that might be bursty. They can get throttled even when their average utilization is under quota. And they may be latency sensitive at the same time so that throttling them is undesired. This feature borrows time now against the future underrun, at the cost of increased interference against the other system users, by introducing cfs_burst_us into CFS bandwidth control to enact the cap on unused bandwidth accumulation, which will then used additionally for burst. The patch adds the support/control for CFS bandwidth burst. runtime-spec: https://github.com/opencontainers/runtime-spec/pull/1120 Signed-off-by: Kailun Qin Signed-off-by: Manjusaka --- contrib/completions/bash/runc | 1 + libcontainer/cgroups/fs/cpu.go | 27 +++++++++++++++++++++++++++ libcontainer/cgroups/fs/cpu_test.go | 12 ++++++++++++ libcontainer/cgroups/fs/fs.go | 2 +- libcontainer/cgroups/fs2/cpu.go | 27 ++++++++++++++++++++++++++- libcontainer/configs/cgroup_linux.go | 3 +++ libcontainer/specconv/spec_linux.go | 3 +++ man/runc-update.8.md | 4 ++++ tests/integration/helpers.bash | 16 +++++++++++++++- tests/integration/update.bats | 3 +++ update.go | 13 +++++++++++++ 11 files changed, 108 insertions(+), 3 deletions(-) diff --git a/contrib/completions/bash/runc b/contrib/completions/bash/runc index 23ff64e889d..3801d26ea3d 100644 --- a/contrib/completions/bash/runc +++ b/contrib/completions/bash/runc @@ -732,6 +732,7 @@ _runc_update() { --blkio-weight --cpu-period --cpu-quota + --cpu-burst --cpu-rt-period --cpu-rt-runtime --cpu-share diff --git a/libcontainer/cgroups/fs/cpu.go b/libcontainer/cgroups/fs/cpu.go index 26a9d7c0f5f..b3030db5ea6 100644 --- a/libcontainer/cgroups/fs/cpu.go +++ b/libcontainer/cgroups/fs/cpu.go @@ -84,6 +84,28 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { period = "" } } + + var burst string + if r.CpuBurst != 0 { + burst = strconv.FormatUint(r.CpuBurst, 10) + if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { + // Sometimes when the burst to be set is larger + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_burst exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.ENOENT) { + // this is a special trick for burst feature, the current systemd and low version of kernel will not support it. + // So, an `no such file or directory` error would be raised, and we can ignore it . + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } + } else { + burst = "" + } + } if r.CpuQuota != 0 { if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { return err @@ -93,6 +115,11 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { return err } } + if burst != "" { + if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { + return err + } + } } if r.CPUIdle != nil { diff --git a/libcontainer/cgroups/fs/cpu_test.go b/libcontainer/cgroups/fs/cpu_test.go index bbdd45a7118..3f40ce37f7e 100644 --- a/libcontainer/cgroups/fs/cpu_test.go +++ b/libcontainer/cgroups/fs/cpu_test.go @@ -45,6 +45,8 @@ func TestCpuSetBandWidth(t *testing.T) { const ( quotaBefore = 8000 quotaAfter = 5000 + burstBefore = 2000 + burstAfter = 1000 periodBefore = 10000 periodAfter = 7000 rtRuntimeBefore = 8000 @@ -55,6 +57,7 @@ func TestCpuSetBandWidth(t *testing.T) { writeFileContents(t, path, map[string]string{ "cpu.cfs_quota_us": strconv.Itoa(quotaBefore), + "cpu.cfs_burst_us": strconv.Itoa(burstBefore), "cpu.cfs_period_us": strconv.Itoa(periodBefore), "cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore), "cpu.rt_period_us": strconv.Itoa(rtPeriodBefore), @@ -62,6 +65,7 @@ func TestCpuSetBandWidth(t *testing.T) { r := &configs.Resources{ CpuQuota: quotaAfter, + CpuBurst: burstAfter, CpuPeriod: periodAfter, CpuRtRuntime: rtRuntimeAfter, CpuRtPeriod: rtPeriodAfter, @@ -79,6 +83,14 @@ func TestCpuSetBandWidth(t *testing.T) { t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.") } + burst, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_burst_us") + if err != nil { + t.Fatal(err) + } + if burst != burstAfter { + t.Fatal("Got the wrong value, set cpu.cfs_burst_us failed.") + } + period, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_period_us") if err != nil { t.Fatal(err) diff --git a/libcontainer/cgroups/fs/fs.go b/libcontainer/cgroups/fs/fs.go index d2decb127ca..e2c425d0cd0 100644 --- a/libcontainer/cgroups/fs/fs.go +++ b/libcontainer/cgroups/fs/fs.go @@ -191,7 +191,7 @@ func (m *Manager) Set(r *configs.Resources) error { if path == "" { // We never created a path for this cgroup, so we cannot set // limits for it (though we have already tried at this point). - return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name()) + return fmt.Errorf("cannot set %s limit: container could not join or create cgroup, and the error is %w", sys.Name(), err) } return err } diff --git a/libcontainer/cgroups/fs2/cpu.go b/libcontainer/cgroups/fs2/cpu.go index 3f8deb0adbe..6055908558c 100644 --- a/libcontainer/cgroups/fs2/cpu.go +++ b/libcontainer/cgroups/fs2/cpu.go @@ -2,16 +2,19 @@ package fs2 import ( "bufio" + "errors" "os" "strconv" + "golang.org/x/sys/unix" + "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" ) func isCpuSet(r *configs.Resources) bool { - return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil + return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil || r.CpuBurst != 0 } func setCpu(dirPath string, r *configs.Resources) error { @@ -32,6 +35,23 @@ func setCpu(dirPath string, r *configs.Resources) error { } } + var burst string + if r.CpuBurst != 0 { + burst = strconv.FormatUint(r.CpuBurst, 10) + if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { + // Sometimes when the burst to be set is larger + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_burst exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } else { + burst = "" + } + } if r.CpuQuota != 0 || r.CpuPeriod != 0 { str := "max" if r.CpuQuota > 0 { @@ -47,6 +67,11 @@ func setCpu(dirPath string, r *configs.Resources) error { if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil { return err } + if burst != "" { + if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { + return err + } + } } return nil diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go index 1664304be21..c15604da3f5 100644 --- a/libcontainer/configs/cgroup_linux.go +++ b/libcontainer/configs/cgroup_linux.go @@ -69,6 +69,9 @@ type Resources struct { // CPU hardcap limit (in usecs). Allowed cpu time in a given period. CpuQuota int64 `json:"cpu_quota"` + // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. + CpuBurst uint64 `json:"cpu_burst"` + // CPU period to be used for hardcapping (in usecs). 0 to use system default. CpuPeriod uint64 `json:"cpu_period"` diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 809424a97eb..d5943cfae31 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -737,6 +737,9 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi if r.CPU.Quota != nil { c.Resources.CpuQuota = *r.CPU.Quota } + if r.CPU.Burst != nil { + c.Resources.CpuBurst = *r.CPU.Burst + } if r.CPU.Period != nil { c.Resources.CpuPeriod = *r.CPU.Period } diff --git a/man/runc-update.8.md b/man/runc-update.8.md index 8ceaa386fe8..8ca69451ddd 100644 --- a/man/runc-update.8.md +++ b/man/runc-update.8.md @@ -28,6 +28,7 @@ In case **-r** is used, the JSON format is like this: "cpu": { "shares": 0, "quota": 0, + "burst": 0, "period": 0, "realtimeRuntime": 0, "realtimePeriod": 0, @@ -53,6 +54,9 @@ stdin. If this option is used, all other options are ignored. **--cpu-quota** _num_ : Set CPU usage limit within a given period (in microseconds). +**--cpu-burst** _num_ +: Set CPU burst limit within a given period (in microseconds). + **--cpu-rt-period** _num_ : Set CPU realtime period to be used for hardcapping (in microseconds). diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index ee2e9c9e012..2a16f4752c0 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -254,11 +254,16 @@ function get_cgroup_value() { # Helper to check a if value in a cgroup file matches the expected one. function check_cgroup_value() { local current + local cgroup + cgroup="$(get_cgroup_path "$1")" + if [ ! -f "$cgroup/$1" ]; then + skip "$cgroup/$1 does not exist" + fi current="$(get_cgroup_value "$1")" local expected=$2 echo "current $current !? $expected" - [ "$current" = "$expected" ] + [ "$current" = "$expected" ] || [ "$current" = "$((expected / 1000))" ] } # Helper to check a value in systemd. @@ -304,6 +309,15 @@ function check_cpu_quota() { check_systemd_value "CPUQuotaPeriodUSec" $sd_period $sd_infinity } +function check_cpu_burst() { + local burst=$1 + if [ -v CGROUP_V2 ]; then + check_cgroup_value "cpu.max.burst" "$burst" + else + check_cgroup_value "cpu.cfs_burst_us" "$burst" + fi +} + # Works for cgroup v1 and v2, accepts v1 shares as an argument. function check_cpu_shares() { local shares=$1 diff --git a/tests/integration/update.bats b/tests/integration/update.bats index 56d9f98d43e..0272438f40c 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -288,6 +288,9 @@ EOF runc update test_update --cpu-share 200 [ "$status" -eq 0 ] check_cpu_shares 200 + runc update test_update --cpu-period 900000 --cpu-burst 500000 + [ "$status" -eq 0 ] + check_cpu_burst 500000 # Revert to the test initial value via json on stding runc update -r - test_update <