diff --git a/libcontainer/cgroups/fs/freezer.go b/libcontainer/cgroups/fs/freezer.go index 11cb1646f..25aff4f86 100644 --- a/libcontainer/cgroups/fs/freezer.go +++ b/libcontainer/cgroups/fs/freezer.go @@ -28,33 +28,54 @@ func (s *FreezerGroup) Apply(path string, d *cgroupData) error { func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { switch cgroup.Resources.Freezer { - case configs.Frozen, configs.Thawed: - for { - // In case this loop does not exit because it doesn't get the expected - // state, let's write again this state, hoping it's going to be properly - // set this time. Otherwise, this loop could run infinitely, waiting for - // a state change that would never happen. - if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { + case configs.Frozen: + // As per older kernel docs (freezer-subsystem.txt before + // kernel commit ef9fe980c6fcc1821), if FREEZING is seen, + // userspace should either retry or thaw. While current + // kernel cgroup v1 docs no longer mention a need to retry, + // the kernel (tested on v5.4, Ubuntu 20.04) can't reliably + // freeze a cgroup while new processes keep appearing in it + // (either via fork/clone or by writing new PIDs to + // cgroup.procs). + // + // The number of retries below is chosen to have a decent + // chance to succeed even in the worst case scenario (runc + // pause/unpause with parallel runc exec). + // + // Adding any amount of sleep in between retries did not + // increase the chances of successful freeze. + for i := 0; i < 1000; i++ { + if err := fscommon.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil { return err } - state, err := s.GetState(path) + state, err := fscommon.ReadFile(path, "freezer.state") if err != nil { return err } - if state == cgroup.Resources.Freezer { - break + state = strings.TrimSpace(state) + switch state { + case "FREEZING": + continue + case string(configs.Frozen): + return nil + default: + // should never happen + return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state)) } - - time.Sleep(1 * time.Millisecond) } + // Despite our best efforts, it got stuck in FREEZING. + // Leaving it in this state is bad and dangerous, so + // let's (try to) thaw it back and error out. + _ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed)) + return errors.New("unable to freeze") + case configs.Thawed: + return fscommon.WriteFile(path, "freezer.state", string(configs.Thawed)) case configs.Undefined: return nil default: return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer)) } - - return nil } func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {