From 8c3af71862cbe078a69feba2906cc6349c88c38b Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Fri, 2 Dec 2022 15:49:09 +0100 Subject: [PATCH 1/2] notify k8s system test: move sending message into exec The flake in #16076 is likely related to the notify message not being delivered/read correctly. Move sending the message into an exec session such that flakes will reveal an error message. Signed-off-by: Valentin Rothberg --- test/system/260-sdnotify.bats | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/system/260-sdnotify.bats b/test/system/260-sdnotify.bats index 970a2e3c7c..1fe1c66c72 100644 --- a/test/system/260-sdnotify.bats +++ b/test/system/260-sdnotify.bats @@ -268,6 +268,7 @@ metadata: labels: app: test name: test_pod + restartPolicy: "Never" annotations: io.containers.sdnotify: "container" io.containers.sdnotify/b: "conmon" @@ -276,7 +277,7 @@ spec: - command: - /bin/sh - -c - - 'printenv NOTIFY_SOCKET; echo READY; while ! test -f /stop;do sleep 0.1;done;systemd-notify --ready' + - 'printenv NOTIFY_SOCKET; while ! test -f /stop;do sleep 0.1;done' image: $_FEDORA name: a - command: @@ -332,6 +333,8 @@ ignore" run_podman logs $container_a is "${lines[0]}" "/run/notify/notify.sock" "NOTIFY_SOCKET is passed to container" + run_podman exec --env NOTIFY_SOCKET="/run/notify/notify.sock" $container_a /usr/bin/systemd-notify --ready + # Instruct the container to send the READY run_podman exec $container_a /bin/touch /stop @@ -339,6 +342,8 @@ ignore" main_pid="$output" run_podman container wait $container_a + run_podman container inspect $container_a --format "{{.State.ExitCode}}" + is "$output" "0" "container exited cleanly after sending READY message" wait_for_file $_SOCAT_LOG # The 'echo's help us debug failed runs run cat $_SOCAT_LOG From 4fa307f149234ed27a4e07da8065e1495a1962c2 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Fri, 2 Dec 2022 12:51:30 +0100 Subject: [PATCH 2/2] kube sdnotify: run proxies for the lifespan of the service As outlined in #16076, a subsequent BARRIER *may* follow the READY message sent by a container. To correctly imitate the behavior of systemd's NOTIFY_SOCKET, the notify proxies span up by `kube play` must hence process messages for the entirety of the workload. We know that the workload is done and that all containers and pods have exited when the service container exits. Hence, all proxies are closed at that time. The above changes imply that Podman runs for the entirety of the workload and will henceforth act as the MAINPID when running inside of systemd. Prior to this change, the service container acted as the MAINPID which is now not possible anymore; Podman would be killed immediately on exit of the service container and could not clean up. The kube template now correctly transitions to in-active instead of failed in systemd. Fixes: #16076 Fixes: #16515 Signed-off-by: Valentin Rothberg --- pkg/domain/infra/abi/play.go | 146 ++++++++++++-------- pkg/systemd/notifyproxy/notifyproxy.go | 113 ++++++++++----- pkg/systemd/notifyproxy/notifyproxy_test.go | 9 +- test/system/250-systemd.bats | 10 +- test/system/260-sdnotify.bats | 27 ++-- test/system/700-play.bats | 20 ++- 6 files changed, 211 insertions(+), 114 deletions(-) diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go index e73bf6614d..0868a47532 100644 --- a/pkg/domain/infra/abi/play.go +++ b/pkg/domain/infra/abi/play.go @@ -155,7 +155,25 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options var configMaps []v1.ConfigMap ranContainers := false + // FIXME: both, the service container and the proxies, should ideally + // be _state_ of an object. The Kube code below is quite Spaghetti-code + // which we should refactor at some point to make it easier to extend + // (via shared state instead of passing data around) and make it more + // maintainable long term. var serviceContainer *libpod.Container + var notifyProxies []*notifyproxy.NotifyProxy + defer func() { + // Close the notify proxy on return. At that point we know + // that a) all containers have send their READY message and + // that b) the service container has exited (and hence all + // containers). + for _, proxy := range notifyProxies { + if err := proxy.Close(); err != nil { + logrus.Errorf("Closing notify proxy %q: %v", proxy.SocketPath(), err) + } + } + }() + // create pod on each document if it is a pod or deployment // any other kube kind will be skipped for _, document := range documentList { @@ -205,10 +223,11 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options podYAML.Annotations[name] = val } - r, err := ic.playKubePod(ctx, podTemplateSpec.ObjectMeta.Name, &podTemplateSpec, options, &ipIndex, podYAML.Annotations, configMaps, serviceContainer) + r, proxies, err := ic.playKubePod(ctx, podTemplateSpec.ObjectMeta.Name, &podTemplateSpec, options, &ipIndex, podYAML.Annotations, configMaps, serviceContainer) if err != nil { return nil, err } + notifyProxies = append(notifyProxies, proxies...) report.Pods = append(report.Pods, r.Pods...) validKinds++ @@ -220,10 +239,11 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options return nil, fmt.Errorf("unable to read YAML as Kube Deployment: %w", err) } - r, err := ic.playKubeDeployment(ctx, &deploymentYAML, options, &ipIndex, configMaps, serviceContainer) + r, proxies, err := ic.playKubeDeployment(ctx, &deploymentYAML, options, &ipIndex, configMaps, serviceContainer) if err != nil { return nil, err } + notifyProxies = append(notifyProxies, proxies...) report.Pods = append(report.Pods, r.Pods...) validKinds++ @@ -289,23 +309,20 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options } if options.ServiceContainer && ranContainers { - // We can consider the service to be up and running now. - // Send the sd-notify messages pointing systemd to the - // service container. - data, err := serviceContainer.Inspect(false) - if err != nil { - return nil, err - } - message := fmt.Sprintf("MAINPID=%d\n%s", data.State.ConmonPid, daemon.SdNotifyReady) + message := fmt.Sprintf("MAINPID=%d\n%s", os.Getpid(), daemon.SdNotifyReady) if err := notifyproxy.SendMessage("", message); err != nil { return nil, err } + + if _, err := serviceContainer.Wait(ctx); err != nil { + return nil, fmt.Errorf("waiting for service container: %w", err) + } } return report, nil } -func (ic *ContainerEngine) playKubeDeployment(ctx context.Context, deploymentYAML *v1apps.Deployment, options entities.PlayKubeOptions, ipIndex *int, configMaps []v1.ConfigMap, serviceContainer *libpod.Container) (*entities.PlayKubeReport, error) { +func (ic *ContainerEngine) playKubeDeployment(ctx context.Context, deploymentYAML *v1apps.Deployment, options entities.PlayKubeOptions, ipIndex *int, configMaps []v1.ConfigMap, serviceContainer *libpod.Container) (*entities.PlayKubeReport, []*notifyproxy.NotifyProxy, error) { var ( deploymentName string podSpec v1.PodTemplateSpec @@ -316,7 +333,7 @@ func (ic *ContainerEngine) playKubeDeployment(ctx context.Context, deploymentYAM deploymentName = deploymentYAML.ObjectMeta.Name if deploymentName == "" { - return nil, errors.New("deployment does not have a name") + return nil, nil, errors.New("deployment does not have a name") } numReplicas = 1 if deploymentYAML.Spec.Replicas != nil { @@ -325,18 +342,20 @@ func (ic *ContainerEngine) playKubeDeployment(ctx context.Context, deploymentYAM podSpec = deploymentYAML.Spec.Template // create "replicas" number of pods + var notifyProxies []*notifyproxy.NotifyProxy for i = 0; i < numReplicas; i++ { podName := fmt.Sprintf("%s-pod-%d", deploymentName, i) - podReport, err := ic.playKubePod(ctx, podName, &podSpec, options, ipIndex, deploymentYAML.Annotations, configMaps, serviceContainer) + podReport, proxies, err := ic.playKubePod(ctx, podName, &podSpec, options, ipIndex, deploymentYAML.Annotations, configMaps, serviceContainer) if err != nil { - return nil, fmt.Errorf("encountered while bringing up pod %s: %w", podName, err) + return nil, notifyProxies, fmt.Errorf("encountered while bringing up pod %s: %w", podName, err) } report.Pods = append(report.Pods, podReport.Pods...) + notifyProxies = append(notifyProxies, proxies...) } - return &report, nil + return &report, notifyProxies, nil } -func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podYAML *v1.PodTemplateSpec, options entities.PlayKubeOptions, ipIndex *int, annotations map[string]string, configMaps []v1.ConfigMap, serviceContainer *libpod.Container) (*entities.PlayKubeReport, error) { +func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podYAML *v1.PodTemplateSpec, options entities.PlayKubeOptions, ipIndex *int, annotations map[string]string, configMaps []v1.ConfigMap, serviceContainer *libpod.Container) (*entities.PlayKubeReport, []*notifyproxy.NotifyProxy, error) { var ( writer io.Writer playKubePod entities.PlayKubePod @@ -345,18 +364,18 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY mainSdNotifyMode, err := getSdNotifyMode(annotations, "") if err != nil { - return nil, err + return nil, nil, err } // Create the secret manager before hand secretsManager, err := ic.Libpod.SecretsManager() if err != nil { - return nil, err + return nil, nil, err } // Assert the pod has a name if podName == "" { - return nil, fmt.Errorf("pod does not have a name") + return nil, nil, fmt.Errorf("pod does not have a name") } podOpt := entities.PodCreateOptions{ @@ -366,7 +385,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY } podOpt, err = kube.ToPodOpt(ctx, podName, podOpt, podYAML) if err != nil { - return nil, err + return nil, nil, err } // add kube default network if no network is explicitly added @@ -384,7 +403,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY ns, networks, netOpts, err := specgen.ParseNetworkFlag(options.Networks, pastaNetworkNameExists) if err != nil { - return nil, err + return nil, nil, err } podOpt.Net.Network = ns @@ -404,16 +423,16 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY // Validate the userns modes supported. podOpt.Userns, err = specgen.ParseUserNamespace(options.Userns) if err != nil { - return nil, err + return nil, nil, err } // FIXME This is very hard to support properly with a good ux if len(options.StaticIPs) > *ipIndex { if !podOpt.Net.Network.IsBridge() { - return nil, fmt.Errorf("static ip addresses can only be set when the network mode is bridge: %w", define.ErrInvalidArg) + return nil, nil, fmt.Errorf("static ip addresses can only be set when the network mode is bridge: %w", define.ErrInvalidArg) } if len(podOpt.Net.Networks) != 1 { - return nil, fmt.Errorf("cannot set static ip addresses for more than network, use netname:ip= syntax to specify ips for more than network: %w", define.ErrInvalidArg) + return nil, nil, fmt.Errorf("cannot set static ip addresses for more than network, use netname:ip= syntax to specify ips for more than network: %w", define.ErrInvalidArg) } for name, netOpts := range podOpt.Net.Networks { netOpts.StaticIPs = append(netOpts.StaticIPs, options.StaticIPs[*ipIndex]) @@ -425,10 +444,10 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY } if len(options.StaticMACs) > *ipIndex { if !podOpt.Net.Network.IsBridge() { - return nil, fmt.Errorf("static mac address can only be set when the network mode is bridge: %w", define.ErrInvalidArg) + return nil, nil, fmt.Errorf("static mac address can only be set when the network mode is bridge: %w", define.ErrInvalidArg) } if len(podOpt.Net.Networks) != 1 { - return nil, fmt.Errorf("cannot set static mac address for more than network, use netname:mac= syntax to specify mac for more than network: %w", define.ErrInvalidArg) + return nil, nil, fmt.Errorf("cannot set static mac address for more than network, use netname:mac= syntax to specify mac for more than network: %w", define.ErrInvalidArg) } for name, netOpts := range podOpt.Net.Networks { netOpts.StaticMAC = nettypes.HardwareAddr(options.StaticMACs[*ipIndex]) @@ -442,12 +461,12 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY p := specgen.NewPodSpecGenerator() if err != nil { - return nil, err + return nil, nil, err } p, err = entities.ToPodSpecGen(*p, &podOpt) if err != nil { - return nil, err + return nil, nil, err } podSpec := entities.PodSpec{PodSpecGen: *p} @@ -458,17 +477,17 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY for _, p := range options.ConfigMaps { f, err := os.Open(p) if err != nil { - return nil, err + return nil, nil, err } defer f.Close() cm, err := readConfigMapFromFile(f) if err != nil { - return nil, fmt.Errorf("%q: %w", p, err) + return nil, nil, fmt.Errorf("%q: %w", p, err) } if _, present := configMapIndex[cm.Name]; present { - return nil, fmt.Errorf("ambiguous configuration: the same config map %s is present in YAML and in --configmaps %s file", cm.Name, p) + return nil, nil, fmt.Errorf("ambiguous configuration: the same config map %s is present in YAML and in --configmaps %s file", cm.Name, p) } configMaps = append(configMaps, cm) @@ -476,7 +495,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY volumes, err := kube.InitializeVolumes(podYAML.Spec.Volumes, configMaps, secretsManager) if err != nil { - return nil, err + return nil, nil, err } // Go through the volumes and create a podman volume for all volumes that have been @@ -490,27 +509,27 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY // error out instead reuse the current volume. vol, err = ic.Libpod.GetVolume(v.Source) if err != nil { - return nil, fmt.Errorf("cannot re-use local volume for volume from configmap %q: %w", v.Source, err) + return nil, nil, fmt.Errorf("cannot re-use local volume for volume from configmap %q: %w", v.Source, err) } } else { - return nil, fmt.Errorf("cannot create a local volume for volume from configmap %q: %w", v.Source, err) + return nil, nil, fmt.Errorf("cannot create a local volume for volume from configmap %q: %w", v.Source, err) } } mountPoint, err := vol.MountPoint() if err != nil || mountPoint == "" { - return nil, fmt.Errorf("unable to get mountpoint of volume %q: %w", vol.Name(), err) + return nil, nil, fmt.Errorf("unable to get mountpoint of volume %q: %w", vol.Name(), err) } // Create files and add data to the volume mountpoint based on the Items in the volume for k, v := range v.Items { dataPath := filepath.Join(mountPoint, k) f, err := os.Create(dataPath) if err != nil { - return nil, fmt.Errorf("cannot create file %q at volume mountpoint %q: %w", k, mountPoint, err) + return nil, nil, fmt.Errorf("cannot create file %q at volume mountpoint %q: %w", k, mountPoint, err) } defer f.Close() _, err = f.Write(v) if err != nil { - return nil, err + return nil, nil, err } } } @@ -518,7 +537,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY seccompPaths, err := kube.InitializeSeccompPaths(podYAML.ObjectMeta.Annotations, options.SeccompProfileRoot) if err != nil { - return nil, err + return nil, nil, err } var ctrRestartPolicy string @@ -546,7 +565,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY err = specgenutil.FillOutSpecGen(podSpec.PodSpecGen.InfraContainerSpec, &infraOptions, []string{}) if err != nil { - return nil, err + return nil, nil, err } } @@ -557,12 +576,12 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY // Create the Pod pod, err := generate.MakePod(&podSpec, ic.Libpod) if err != nil { - return nil, err + return nil, nil, err } podInfraID, err := pod.InfraContainerID() if err != nil { - return nil, err + return nil, nil, err } if !options.Quiet { @@ -578,7 +597,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY } else { cwd, err = os.Getwd() if err != nil { - return nil, err + return nil, nil, err } } @@ -586,16 +605,16 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY for _, initCtr := range podYAML.Spec.InitContainers { // Error out if same name is used for more than one container if _, ok := ctrNames[initCtr.Name]; ok { - return nil, fmt.Errorf("the pod %q is invalid; duplicate container name %q detected", podName, initCtr.Name) + return nil, nil, fmt.Errorf("the pod %q is invalid; duplicate container name %q detected", podName, initCtr.Name) } ctrNames[initCtr.Name] = "" // Init containers cannot have either of lifecycle, livenessProbe, readinessProbe, or startupProbe set if initCtr.Lifecycle != nil || initCtr.LivenessProbe != nil || initCtr.ReadinessProbe != nil || initCtr.StartupProbe != nil { - return nil, fmt.Errorf("cannot create an init container that has either of lifecycle, livenessProbe, readinessProbe, or startupProbe set") + return nil, nil, fmt.Errorf("cannot create an init container that has either of lifecycle, livenessProbe, readinessProbe, or startupProbe set") } pulledImage, labels, err := ic.getImageAndLabelInfo(ctx, cwd, annotations, writer, initCtr, options) if err != nil { - return nil, err + return nil, nil, err } for k, v := range podSpec.PodSpecGen.Labels { // add podYAML labels @@ -628,17 +647,17 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY } specGen, err := kube.ToSpecGen(ctx, &specgenOpts) if err != nil { - return nil, err + return nil, nil, err } specGen.SdNotifyMode = define.SdNotifyModeIgnore rtSpec, spec, opts, err := generate.MakeContainer(ctx, ic.Libpod, specGen, false, nil) if err != nil { - return nil, err + return nil, nil, err } opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeIgnore)) ctr, err := generate.ExecuteCreate(ctx, ic.Libpod, rtSpec, spec, false, opts...) if err != nil { - return nil, err + return nil, nil, err } initContainers = append(initContainers, ctr) @@ -649,12 +668,12 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY for _, container := range podYAML.Spec.Containers { // Error out if the same name is used for more than one container if _, ok := ctrNames[container.Name]; ok { - return nil, fmt.Errorf("the pod %q is invalid; duplicate container name %q detected", podName, container.Name) + return nil, nil, fmt.Errorf("the pod %q is invalid; duplicate container name %q detected", podName, container.Name) } ctrNames[container.Name] = "" pulledImage, labels, err := ic.getImageAndLabelInfo(ctx, cwd, annotations, writer, container, options) if err != nil { - return nil, err + return nil, nil, err } for k, v := range podSpec.PodSpecGen.Labels { // add podYAML labels @@ -683,18 +702,18 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY specGen, err := kube.ToSpecGen(ctx, &specgenOpts) if err != nil { - return nil, err + return nil, nil, err } specGen.RawImageName = container.Image rtSpec, spec, opts, err := generate.MakeContainer(ctx, ic.Libpod, specGen, false, nil) if err != nil { - return nil, err + return nil, nil, err } sdNotifyMode := mainSdNotifyMode ctrNotifyMode, err := getSdNotifyMode(annotations, container.Name) if err != nil { - return nil, err + return nil, nil, err } if ctrNotifyMode != "" { sdNotifyMode = ctrNotifyMode @@ -710,7 +729,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY if sdNotifyMode != "" && sdNotifyMode != define.SdNotifyModeIgnore { proxy, err = notifyproxy.New("") if err != nil { - return nil, err + return nil, nil, err } sdNotifyProxies = append(sdNotifyProxies, proxy) opts = append(opts, libpod.WithSdNotifySocket(proxy.SocketPath())) @@ -718,7 +737,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY ctr, err := generate.ExecuteCreate(ctx, ic.Libpod, rtSpec, spec, false, opts...) if err != nil { - return nil, err + return nil, nil, err } if proxy != nil { proxy.AddContainer(ctr) @@ -730,7 +749,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY // Start the containers podStartErrors, err := pod.Start(ctx) if err != nil && !errors.Is(err, define.ErrPodPartialFail) { - return nil, err + return nil, nil, err } for id, err := range podStartErrors { playKubePod.ContainerErrors = append(playKubePod.ContainerErrors, fmt.Errorf("starting container %s: %w", id, err).Error()) @@ -743,8 +762,13 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY errors := make([]error, len(sdNotifyProxies)) for i := range sdNotifyProxies { wg.Add(1) + defer func() { + if err := sdNotifyProxies[i].Close(); err != nil { + logrus.Errorf("Closing sdnotify proxy %q: %v", sdNotifyProxies[i].SocketPath(), err) + } + }() go func(i int) { - err := sdNotifyProxies[i].WaitAndClose() + err := sdNotifyProxies[i].Wait() if err != nil { err = fmt.Errorf("waiting for sd-notify proxy: %w", err) } @@ -755,7 +779,11 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY wg.Wait() for _, err := range errors { if err != nil { - return nil, err + // Close all proxies on error. + for _, proxy := range sdNotifyProxies { + _ = proxy.Close() + } + return nil, nil, err } } } @@ -770,7 +798,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY report.Pods = append(report.Pods, playKubePod) - return &report, nil + return &report, sdNotifyProxies, nil } // getImageAndLabelInfo returns the image information and how the image should be pulled plus as well as labels to be used for the container in the pod. diff --git a/pkg/systemd/notifyproxy/notifyproxy.go b/pkg/systemd/notifyproxy/notifyproxy.go index b5010cbc85..508bc96640 100644 --- a/pkg/systemd/notifyproxy/notifyproxy.go +++ b/pkg/systemd/notifyproxy/notifyproxy.go @@ -14,6 +14,16 @@ import ( "github.com/containers/podman/v4/libpod/define" "github.com/coreos/go-systemd/v22/daemon" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +const ( + // All constants below are defined by systemd. + _notifyRcvbufSize = 8 * 1024 * 1024 + _notifyBufferMax = 4096 + _notifyFdMax = 768 + _notifyBarrierMsg = "BARRIER=1" + _notifyRdyMsg = daemon.SdNotifyReady ) // SendMessage sends the specified message to the specified socket. @@ -76,6 +86,10 @@ func New(tmpDir string) (*NotifyProxy, error) { return nil, err } + if err := conn.SetReadBuffer(_notifyRcvbufSize); err != nil { + return nil, fmt.Errorf("setting read buffer: %w", err) + } + errorChan := make(chan error, 1) readyChan := make(chan bool, 1) @@ -100,34 +114,69 @@ func (p *NotifyProxy) waitForReady() { go func() { // Read until the `READY` message is received or the connection // is closed. - const bufferSize = 1024 + + // See https://github.com/containers/podman/issues/16515 for a description of the protocol. + fdSize := unix.CmsgSpace(4) + buffer := make([]byte, _notifyBufferMax) + oob := make([]byte, _notifyFdMax*fdSize) sBuilder := strings.Builder{} for { - for { - buffer := make([]byte, bufferSize) - num, err := p.connection.Read(buffer) - if err != nil { - if !errors.Is(err, io.EOF) { - p.errorChan <- err - return - } - } - sBuilder.Write(buffer[:num]) - if num != bufferSize || buffer[num-1] == '\n' { - // Break as we read an entire line that - // we can inspect for the `READY` - // message. - break + n, oobn, flags, _, err := p.connection.ReadMsgUnix(buffer, oob) + if err != nil { + if !errors.Is(err, io.EOF) { + p.errorChan <- err + return } + logrus.Errorf("Error reading unix message on socket %q: %v", p.socketPath, err) + } + + if n > _notifyBufferMax || oobn > _notifyFdMax*fdSize { + logrus.Errorf("Ignoring unix message on socket %q: incorrect number of bytes read (n=%d, oobn=%d)", p.socketPath, n, oobn) + continue + } + + if flags&unix.MSG_CTRUNC != 0 { + logrus.Errorf("Ignoring unix message on socket %q: message truncated", p.socketPath) + continue } + sBuilder.Reset() + sBuilder.Write(buffer[:n]) + var isBarrier, isReady bool + for _, line := range strings.Split(sBuilder.String(), "\n") { - if line == daemon.SdNotifyReady { - p.readyChan <- true - return + switch line { + case _notifyRdyMsg: + isReady = true + case _notifyBarrierMsg: + isBarrier = true } } - sBuilder.Reset() + + if isBarrier { + scms, err := unix.ParseSocketControlMessage(oob) + if err != nil { + logrus.Errorf("parsing control message on socket %q: %v", p.socketPath, err) + } + for _, scm := range scms { + fds, err := unix.ParseUnixRights(&scm) + if err != nil { + logrus.Errorf("parsing unix rights of control message on socket %q: %v", p.socketPath, err) + continue + } + for _, fd := range fds { + if err := unix.Close(fd); err != nil { + logrus.Errorf("closing fd passed on socket %q: %v", fd, err) + continue + } + } + } + continue + } + + if isReady { + p.readyChan <- true + } } }() } @@ -137,8 +186,8 @@ func (p *NotifyProxy) SocketPath() string { return p.socketPath } -// close closes the listener and removes the socket. -func (p *NotifyProxy) close() error { +// Close closes the listener and removes the socket. +func (p *NotifyProxy) Close() error { defer os.Remove(p.socketPath) return p.connection.Close() } @@ -158,20 +207,12 @@ type Container interface { ID() string } -// WaitAndClose waits until receiving the `READY` notify message and close the -// listener. Note that the this function must only be executed inside a systemd -// service which will kill the process after a given timeout. -// If the (optional) container stopped running before the `READY` is received, -// the waiting gets canceled and ErrNoReadyMessage is returned. -func (p *NotifyProxy) WaitAndClose() error { - defer func() { - // Closing the socket/connection makes sure that the other - // goroutine reading/waiting for the READY message returns. - if err := p.close(); err != nil { - logrus.Errorf("Closing notify proxy: %v", err) - } - }() - +// WaitAndClose waits until receiving the `READY` notify message. Note that the +// this function must only be executed inside a systemd service which will kill +// the process after a given timeout. If the (optional) container stopped +// running before the `READY` is received, the waiting gets canceled and +// ErrNoReadyMessage is returned. +func (p *NotifyProxy) Wait() error { // If the proxy has a container we need to watch it as it may exit // without sending a READY message. The goroutine below returns when // the container exits OR when the function returns (see deferred the diff --git a/pkg/systemd/notifyproxy/notifyproxy_test.go b/pkg/systemd/notifyproxy/notifyproxy_test.go index 066046cb8b..aaada1ad30 100644 --- a/pkg/systemd/notifyproxy/notifyproxy_test.go +++ b/pkg/systemd/notifyproxy/notifyproxy_test.go @@ -18,7 +18,7 @@ func TestNotifyProxy(t *testing.T) { proxy, err := New("") require.NoError(t, err) require.FileExists(t, proxy.SocketPath()) - require.NoError(t, proxy.close()) + require.NoError(t, proxy.Close()) require.NoFileExists(t, proxy.SocketPath()) } @@ -28,9 +28,12 @@ func TestWaitAndClose(t *testing.T) { require.FileExists(t, proxy.SocketPath()) ch := make(chan error) - + defer func() { + err := proxy.Close() + require.NoError(t, err, "proxy should close successfully") + }() go func() { - ch <- proxy.WaitAndClose() + ch <- proxy.Wait() }() sendMessage(t, proxy, "foo\n") diff --git a/test/system/250-systemd.bats b/test/system/250-systemd.bats index d0dc454d39..c560b20667 100644 --- a/test/system/250-systemd.bats +++ b/test/system/250-systemd.bats @@ -397,6 +397,10 @@ EOF systemctl start $service_name systemctl is-active $service_name + # Make sure that Podman is the service's MainPID + run systemctl show --property=MainPID --value $service_name + is "$(/dev/null & + + # Wait for the container to be running + container_a=test_pod-test + for i in $(seq 1 20); do + run_podman "?" container wait $container_a --condition="running" + if [[ $status == 0 ]]; then + break + fi + sleep 0.5 + # Just for debugging + run_podman ps -a + done + if [[ $status != 0 ]]; then + die "container $container_a did not start" + fi # The name of the service container is predictable: the first 12 characters # of the hash of the YAML file followed by the "-service" suffix