diff --git a/libcontainer/cgroups/systemd/common.go b/libcontainer/cgroups/systemd/common.go index b6bfb080a33..50746ae0c56 100644 --- a/libcontainer/cgroups/systemd/common.go +++ b/libcontainer/cgroups/systemd/common.go @@ -353,32 +353,52 @@ func isUnitExists(err error) bool { return isDbusError(err, "org.freedesktop.systemd1.UnitExists") } -func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property) error { +func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error { statusChan := make(chan string, 1) + retry := true + +retry: err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { _, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan) return err }) - if err == nil { - timeout := time.NewTimer(30 * time.Second) - defer timeout.Stop() - - select { - case s := <-statusChan: - close(statusChan) - // Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit - if s != "done" { - resetFailedUnit(cm, unitName) - return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s) - } - case <-timeout.C: + if err != nil { + if !isUnitExists(err) { + return err + } + if ignoreExist { + // TODO: remove this hack. + // This is kubelet making sure a slice exists (see + // https://github.com/opencontainers/runc/pull/1124). + return nil + } + if retry { + // In case a unit with the same name exists, this may + // be a leftover failed unit. Reset it, so systemd can + // remove it, and retry once. resetFailedUnit(cm, unitName) - return errors.New("Timeout waiting for systemd to create " + unitName) + retry = false + goto retry } - } else if !isUnitExists(err) { return err } + timeout := time.NewTimer(30 * time.Second) + defer timeout.Stop() + + select { + case s := <-statusChan: + close(statusChan) + // Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit + if s != "done" { + resetFailedUnit(cm, unitName) + return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s) + } + case <-timeout.C: + resetFailedUnit(cm, unitName) + return errors.New("Timeout waiting for systemd to create " + unitName) + } + return nil } diff --git a/libcontainer/cgroups/systemd/v1.go b/libcontainer/cgroups/systemd/v1.go index a74a05a5cd0..4ec32f7a9ca 100644 --- a/libcontainer/cgroups/systemd/v1.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -206,7 +206,7 @@ func (m *legacyManager) Apply(pid int) error { properties = append(properties, c.SystemdProps...) - if err := startUnit(m.dbus, unitName, properties); err != nil { + if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil { return err } diff --git a/libcontainer/cgroups/systemd/v2.go b/libcontainer/cgroups/systemd/v2.go index de0cb974d46..94d24ee4502 100644 --- a/libcontainer/cgroups/systemd/v2.go +++ b/libcontainer/cgroups/systemd/v2.go @@ -284,7 +284,7 @@ func (m *unifiedManager) Apply(pid int) error { properties = append(properties, c.SystemdProps...) - if err := startUnit(m.dbus, unitName, properties); err != nil { + if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil { return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err) } diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index e6c71ac34e3..1df87e2e5e2 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -179,6 +179,12 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err) } if len(pids) != 0 { + if config.Cgroups.Systemd { + // systemd cgroup driver can't add a pid to an + // existing systemd unit and will return an + // error anyway, so let's error out early. + return nil, fmt.Errorf("container's cgroup is not empty: %d process(es) found", len(pids)) + } // TODO: return an error. logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids)) logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132") diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index ff7cf6d0b70..7fd2e30affa 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -348,7 +348,7 @@ function setup() { [ "$output" = "ok" ] } -@test "runc run/create should warn about a non-empty cgroup" { +@test "runc run/create should error/warn about a non-empty cgroup" { if [[ "$ROOTLESS" -ne 0 ]]; then requires rootless_cgroup fi @@ -358,14 +358,21 @@ function setup() { runc run -d --console-socket "$CONSOLE_SOCKET" ct1 [ "$status" -eq 0 ] + # When systemd driver is used, runc can't add PID to an existing unit, + # so runc returns an error. For backward compatibility, we still allow + # such configuration in 1.1, but only when systemd driver is NOT used. + # See https://github.com/opencontainers/runc/issues/3780. + local exp=0 + [[ -n "${RUNC_USE_SYSTEMD}" ]] && exp=1 + # Run a second container sharing the cgroup with the first one. runc --debug run -d --console-socket "$CONSOLE_SOCKET" ct2 - [ "$status" -eq 0 ] + [ "$status" -eq "$exp" ] [[ "$output" == *"container's cgroup is not empty"* ]] # Same but using runc create. runc create --console-socket "$CONSOLE_SOCKET" ct3 - [ "$status" -eq 0 ] + [ "$status" -eq "$exp" ] [[ "$output" == *"container's cgroup is not empty"* ]] }