Skip to content

Commit

Permalink
Merge pull request #15820 from vrothberg/fix-15800
Browse files Browse the repository at this point in the history
kube: notifyproxy: fix lost READY message
  • Loading branch information
openshift-merge-robot authored Sep 26, 2022
2 parents 17f3756 + 4a053a8 commit 1d63d9f
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 38 deletions.
22 changes: 19 additions & 3 deletions pkg/domain/infra/abi/play.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"path/filepath"
"strconv"
"strings"
"sync"

buildahDefine "github.com/containers/buildah/define"
"github.com/containers/common/libimage"
Expand Down Expand Up @@ -698,9 +699,24 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
fmt.Println(playKubePod.ContainerErrors)
}

// Wait for each proxy to receive a READY message.
for _, proxy := range sdNotifyProxies {
if err := proxy.WaitAndClose(); err != nil {
// Wait for each proxy to receive a READY message. Use a wait
// group to prevent the potential for ABBA kinds of deadlocks.
var wg sync.WaitGroup
errors := make([]error, len(sdNotifyProxies))
for i := range sdNotifyProxies {
wg.Add(1)
go func(i int) {
err := sdNotifyProxies[i].WaitAndClose()
if err != nil {
err = fmt.Errorf("waiting for sd-notify proxy: %w", err)
}
errors[i] = err
wg.Done()
}(i)
}
wg.Wait()
for _, err := range errors {
if err != nil {
return nil, err
}
}
Expand Down
98 changes: 63 additions & 35 deletions pkg/systemd/notifyproxy/notifyproxy.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package notifyproxy

import (
"context"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -109,48 +110,75 @@ func (p *NotifyProxy) WaitAndClose() error {
}
}()

const bufferSize = 1024
sBuilder := strings.Builder{}
for {
// Set a read deadline of one second such that we achieve a
// non-blocking read and can check if the container has already
// stopped running; in that case no READY message will be send
// and we're done.
if err := p.connection.SetReadDeadline(time.Now().Add(time.Second)); err != nil {
return err
}

// Since reading from the connection is blocking, we need to spin up two
// goroutines. One waiting for the `READY` message, the other waiting
// for the container to stop running.
errorChan := make(chan error, 1)
readyChan := make(chan bool, 1)

go func() {
// Read until the `READY` message is received or the connection
// is closed.
const bufferSize = 1024
sBuilder := strings.Builder{}
for {
buffer := make([]byte, bufferSize)
num, err := p.connection.Read(buffer)
if err != nil {
if !errors.Is(err, os.ErrDeadlineExceeded) && !errors.Is(err, io.EOF) {
return err
for {
buffer := make([]byte, bufferSize)
num, err := p.connection.Read(buffer)
if err != nil {
if !errors.Is(err, io.EOF) {
errorChan <- err
return
}
}
sBuilder.Write(buffer[:num])
if num != bufferSize || buffer[num-1] == '\n' {
// Break as we read an entire line that
// we can inspect for the `READY`
// message.
break
}
}
sBuilder.Write(buffer[:num])
if num != bufferSize || buffer[num-1] == '\n' {
break
}
}

for _, line := range strings.Split(sBuilder.String(), "\n") {
if line == daemon.SdNotifyReady {
return nil
for _, line := range strings.Split(sBuilder.String(), "\n") {
if line == daemon.SdNotifyReady {
readyChan <- true
return
}
}
sBuilder.Reset()
}
sBuilder.Reset()
}()

if p.container == nil {
continue
}
if p.container != nil {
// Create a cancellable context to make sure the goroutine
// below terminates.
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
select {
case <-ctx.Done():
return
default:
state, err := p.container.State()
if err != nil {
errorChan <- err
return
}
if state != define.ContainerStateRunning {
errorChan <- fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID())
return
}
time.Sleep(time.Second)
}
}()
}

state, err := p.container.State()
if err != nil {
return err
}
if state != define.ContainerStateRunning {
return fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID())
}
// Wait for the ready/error channel.
select {
case <-readyChan:
return nil
case err := <-errorChan:
return err
}
}

0 comments on commit 1d63d9f

Please sign in to comment.