Skip to content

Commit

Permalink
Implement SD-NOTIFY proxy in conmon
Browse files Browse the repository at this point in the history
This leverages conmon's ability to proxy the SD-NOTIFY socket.
This prevents locking caused by OCI runtime blocking, waiting for
SD-NOTIFY messages, and instead passes the messages directly up
to the host.

Backport of commit c22f3e8.

Signed-off-by: Valentin Rothberg <[email protected]>
  • Loading branch information
rhatdan authored and vrothberg committed Aug 20, 2021
1 parent 651fbac commit 5fc7c88
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 21 deletions.
4 changes: 4 additions & 0 deletions libpod/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ type Container struct {
// This is true if a container is restored from a checkpoint.
restoreFromCheckpoint bool

// Used to query the NOTIFY_SOCKET once along with setting up
// mounts etc.
notifySocket string

slirp4netnsSubnet *net.IPNet
}

Expand Down
40 changes: 40 additions & 0 deletions libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,10 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
return nil, err
}

if err := c.mountNotifySocket(g); err != nil {
return nil, err
}

// Get host UID and GID based on the container process UID and GID.
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
if err != nil {
Expand Down Expand Up @@ -777,6 +781,41 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
return g.Config, nil
}

// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
// and if the sdnotify mode is set to container. It also sets c.notifySocket
// to avoid redundantly looking up the env variable.
func (c *Container) mountNotifySocket(g generate.Generator) error {
notify, ok := os.LookupEnv("NOTIFY_SOCKET")
if !ok {
return nil
}
c.notifySocket = notify

if c.config.SdNotifyMode != define.SdNotifyModeContainer {
return nil
}

notifyDir := filepath.Join(c.bundlePath(), "notify")
logrus.Debugf("checking notify %q dir", notifyDir)
if err := os.MkdirAll(notifyDir, 0755); err != nil {
if !os.IsExist(err) {
return errors.Wrapf(err, "unable to create notify %q dir", notifyDir)
}
}
if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
return errors.Wrapf(err, "relabel failed %q", notifyDir)
}
logrus.Debugf("add bindmount notify %q dir", notifyDir)
if _, ok := c.state.BindMounts["/run/notify"]; !ok {
c.state.BindMounts["/run/notify"] = notifyDir
}

// Set the container's notify socket to the proxy socket created by conmon
g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")

return nil
}

// systemd expects to have /run, /run/lock and /tmp on tmpfs
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
Expand Down Expand Up @@ -1730,6 +1769,7 @@ rootless=%d
c.state.BindMounts[dest] = src
}
}

return nil
}

Expand Down
1 change: 1 addition & 0 deletions libpod/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ var initInodes = map[string]bool{
"/etc/resolv.conf": true,
"/proc": true,
"/run": true,
"/run/notify": true,
"/run/.containerenv": true,
"/run/secrets": true,
"/sys": true,
Expand Down
2 changes: 1 addition & 1 deletion libpod/oci_conmon_exec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
Setpgid: true,
}

err = startCommandGivenSelinux(execCmd)
err = startCommandGivenSelinux(execCmd, c)

// We don't need children pipes on the parent side
errorhandling.CloseQuiet(childSyncPipe)
Expand Down
49 changes: 30 additions & 19 deletions libpod/oci_conmon_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,11 +364,6 @@ func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
if ctr.config.SdNotifyMode == define.SdNotifyModeContainer {
if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
}
}
if path, ok := os.LookupEnv("PATH"); ok {
env = append(env, fmt.Sprintf("PATH=%s", path))
}
Expand Down Expand Up @@ -1014,19 +1009,17 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
}
}

if ctr.config.SdNotifyMode == define.SdNotifyModeIgnore {
if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
}
}

pidfile := ctr.config.PidFile
if pidfile == "" {
pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
}

args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)

if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.notifySocket != "" {
args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.notifySocket))
}

if ctr.config.Spec.Process.Terminal {
args = append(args, "-t")
} else if ctr.config.Stdin {
Expand Down Expand Up @@ -1171,7 +1164,8 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
}
}

err = startCommandGivenSelinux(cmd)
err = startCommandGivenSelinux(cmd, ctr)

// regardless of whether we errored or not, we no longer need the children pipes
childSyncPipe.Close()
childStartPipe.Close()
Expand Down Expand Up @@ -1203,7 +1197,13 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
// conmon not having a pid file is a valid state, so don't set it if we don't have it
logrus.Infof("Got Conmon PID as %d", conmonPID)
ctr.state.ConmonPID = conmonPID
if ctr.config.SdNotifyMode != define.SdNotifyModeIgnore {

// Send the MAINPID via sdnotify if needed.
switch ctr.config.SdNotifyMode {
case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
// Nothing to do or conmon takes care of it already.

default:
if sent, err := daemon.SdNotify(false, fmt.Sprintf("MAINPID=%d", conmonPID)); err != nil {
logrus.Errorf("Error notifying systemd of Conmon PID: %v", err)
} else if sent {
Expand Down Expand Up @@ -1239,11 +1239,6 @@ func (r *ConmonOCIRuntime) configureConmonEnv(ctr *Container, runtimeDir string)
}

extraFiles := make([]*os.File, 0)
if ctr.config.SdNotifyMode == define.SdNotifyModeContainer {
if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
}
}
if !r.sdNotify {
if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok {
env = append(env, fmt.Sprintf("LISTEN_FDS=%s", listenfds), "LISTEN_PID=1")
Expand Down Expand Up @@ -1335,7 +1330,23 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p

// startCommandGivenSelinux starts a container ensuring to set the labels of
// the process to make sure SELinux doesn't block conmon communication, if SELinux is enabled
func startCommandGivenSelinux(cmd *exec.Cmd) error {
func startCommandGivenSelinux(cmd *exec.Cmd, ctr *Container) error {
// Make sure to unset the NOTIFY_SOCKET and reset if afterwards if needed.
switch ctr.config.SdNotifyMode {
case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
if ctr.notifySocket != "" {
if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
}

defer func() {
if err := os.Setenv("NOTIFY_SOCKET", ctr.notifySocket); err != nil {
logrus.Errorf("Error resetting NOTIFY_SOCKET=%s", ctr.notifySocket)
}
}()
}
}

if !selinux.GetEnabled() {
return cmd.Start()
}
Expand Down
4 changes: 3 additions & 1 deletion test/system/260-sdnotify.bats
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ function _assert_mainpid_is_conmon() {
_stop_socat
}

# These tests can fail in dev. environment because of SELinux.
# quick fix: chcon -t container_runtime_exec_t ./bin/podman
@test "sdnotify : container" {
# Sigh... we need to pull a humongous image because it has systemd-notify.
# (IMPORTANT: fedora:32 and above silently removed systemd-notify; this
Expand All @@ -150,7 +152,7 @@ function _assert_mainpid_is_conmon() {
wait_for_ready $cid

run_podman logs $cid
is "${lines[0]}" "/.*/container\.sock/notify" "NOTIFY_SOCKET is passed to container"
is "${lines[0]}" "/run/notify/notify.sock" "NOTIFY_SOCKET is passed to container"

# With container, READY=1 isn't necessarily the last message received;
# just look for it anywhere in received messages
Expand Down

0 comments on commit 5fc7c88

Please sign in to comment.