diff --git a/libcontainer/cgroups/fs/memory.go b/libcontainer/cgroups/fs/memory.go index 40d3d339d07..6c0fd02281c 100644 --- a/libcontainer/cgroups/fs/memory.go +++ b/libcontainer/cgroups/fs/memory.go @@ -5,15 +5,19 @@ package fs import ( "bufio" "fmt" - "math" + "io/ioutil" "os" "path/filepath" "strconv" "strings" + "syscall" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/system" +) + +const ( + cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes" ) type MemoryGroup struct { @@ -34,9 +38,7 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) { return err } } - // We have to set kernel memory here, as we can't change it once - // processes have been attached to the cgroup. - if err := s.SetKernelMemory(path, d.config); err != nil { + if err := EnableKernelMemoryAccounting(path); err != nil { return err } } @@ -55,38 +57,43 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) { return nil } -func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error { - // This has to be done separately because it has special - // constraints (it can only be initialized before setting up a - // hierarchy or adding a task to the cgroups. However, if - // sucessfully initialized, it can be updated anytime afterwards) - if cgroup.Resources.KernelMemory != 0 { - kmemInitialized := false - // Is kmem.limit_in_bytes already set? - kmemValue, err := getCgroupParamUint(path, "memory.kmem.limit_in_bytes") - if err != nil { - return err - } - switch system.GetLongBit() { - case 32: - kmemInitialized = uint32(kmemValue) != uint32(math.MaxUint32) - case 64: - kmemInitialized = kmemValue != uint64(math.MaxUint64) - } - if !kmemInitialized { - // If there's already tasks in the cgroup, we can't change the limit either - tasks, err := getCgroupParamString(path, "tasks") - if err != nil { - return err - } - if tasks != "" { - return fmt.Errorf("cannot set kmem.limit_in_bytes after task have joined this cgroup") - } - } +func EnableKernelMemoryAccounting(path string) error { + // Check if kernel memory is enabled + // We have to limit the kernel memory here as it won't be accounted at all + // until a limit is set on the cgroup and limit cannot be set once the + // cgroup has children, or if there are already tasks in the cgroup. + kernelMemoryLimit := int64(1) + if err := setKernelMemory(path, kernelMemoryLimit); err != nil { + return err + } + kernelMemoryLimit = int64(-1) + if err := setKernelMemory(path, kernelMemoryLimit); err != nil { + return err + } + return nil +} - if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil { - return err +func setKernelMemory(path string, kernelMemoryLimit int64) error { + if path == "" { + return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit) + } + if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) { + // kernel memory is not enabled on the system so we should do nothing + return nil + } + if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil { + // Check if the error number returned by the syscall is "EBUSY" + // The EBUSY signal is returned on attempts to write to the + // memory.kmem.limit_in_bytes file if the cgroup has children or + // once tasks have been attached to the cgroup + if pathErr, ok := err.(*os.PathError); ok { + if errNo, ok := pathErr.Err.(syscall.Errno); ok { + if errNo == syscall.EBUSY { + return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit) + } + } } + return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err) } return nil } @@ -139,8 +146,10 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { return err } - if err := s.SetKernelMemory(path, cgroup); err != nil { - return err + if cgroup.Resources.KernelMemory != 0 { + if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil { + return err + } } if cgroup.Resources.MemoryReservation != 0 { @@ -148,6 +157,7 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { return err } } + if cgroup.Resources.KernelMemoryTCP != 0 { if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil { return err diff --git a/libcontainer/cgroups/fs/memory_test.go b/libcontainer/cgroups/fs/memory_test.go index af3a438f8e0..5e86e1b4ed1 100644 --- a/libcontainer/cgroups/fs/memory_test.go +++ b/libcontainer/cgroups/fs/memory_test.go @@ -230,7 +230,7 @@ func TestMemorySetKernelMemory(t *testing.T) { helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter memory := &MemoryGroup{} - if err := memory.SetKernelMemory(helper.CgroupPath, helper.CgroupData.config); err != nil { + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index e60975b071d..337b045933f 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -490,5 +490,8 @@ func setKernelMemory(c *configs.Cgroup) error { return err } - return os.MkdirAll(path, 0755) + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + return fs.EnableKernelMemoryAccounting(path) } diff --git a/libcontainer/system/sysconfig.go b/libcontainer/system/sysconfig.go index 4fba6c2b704..c5d7979755a 100644 --- a/libcontainer/system/sysconfig.go +++ b/libcontainer/system/sysconfig.go @@ -25,7 +25,3 @@ import "C" func GetClockTicks() int { return int(C.sysconf(C._SC_CLK_TCK)) } - -func GetLongBit() int { - return int(C.GetLongBit()) -}