Skip to content

Commit

Permalink
remote build: fix streaming and error handling
Browse files Browse the repository at this point in the history
Address a number of issues in the streaming logic in remote build, most
importantly an error in using buffered channels on the server side.

The pattern below does not guarantee that the channel is entirely read
before the context fires.

for {
	select {
		case <- bufferedChannel:
		...
		case <- ctx.Done():
		...
	}
}

Fixes: #10154
Signed-off-by: Valentin Rothberg <[email protected]>
  • Loading branch information
vrothberg committed Jul 28, 2021
1 parent a5de831 commit 4df6e31
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 36 deletions.
19 changes: 10 additions & 9 deletions pkg/api/handlers/compat/images_build.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,16 +393,16 @@ func BuildImage(w http.ResponseWriter, r *http.Request) {
defer auth.RemoveAuthfile(authfile)

// Channels all mux'ed in select{} below to follow API build protocol
stdout := channel.NewWriter(make(chan []byte, 1))
stdout := channel.NewWriter(make(chan []byte))
defer stdout.Close()

auxout := channel.NewWriter(make(chan []byte, 1))
auxout := channel.NewWriter(make(chan []byte))
defer auxout.Close()

stderr := channel.NewWriter(make(chan []byte, 1))
stderr := channel.NewWriter(make(chan []byte))
defer stderr.Close()

reporter := channel.NewWriter(make(chan []byte, 1))
reporter := channel.NewWriter(make(chan []byte))
defer reporter.Close()

runtime := r.Context().Value("runtime").(*libpod.Runtime)
Expand Down Expand Up @@ -529,7 +529,7 @@ func BuildImage(w http.ResponseWriter, r *http.Request) {

enc := json.NewEncoder(body)
enc.SetEscapeHTML(true)
loop:

for {
m := struct {
Stream string `json:"stream,omitempty"`
Expand All @@ -543,13 +543,13 @@ loop:
stderr.Write([]byte(err.Error()))
}
flush()
case e := <-auxout.Chan():
case e := <-reporter.Chan():
m.Stream = string(e)
if err := enc.Encode(m); err != nil {
stderr.Write([]byte(err.Error()))
}
flush()
case e := <-reporter.Chan():
case e := <-auxout.Chan():
m.Stream = string(e)
if err := enc.Encode(m); err != nil {
stderr.Write([]byte(err.Error()))
Expand All @@ -561,8 +561,8 @@ loop:
logrus.Warnf("Failed to json encode error %v", err)
}
flush()
return
case <-runCtx.Done():
flush()
if success {
if !utils.IsLibpodRequest(r) {
m.Stream = fmt.Sprintf("Successfully built %12.12s\n", imageID)
Expand All @@ -579,7 +579,8 @@ loop:
}
}
}
break loop
flush()
return
case <-r.Context().Done():
cancel()
logrus.Infof("Client disconnect reported for build %q / %q.", registry, query.Dockerfile)
Expand Down
3 changes: 1 addition & 2 deletions pkg/bindings/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ func (c *Connection) DoRequest(httpBody io.Reader, httpMethod, endpoint string,
uri := fmt.Sprintf("http://d/v%d.%d.%d/libpod"+endpoint, params...)
logrus.Debugf("DoRequest Method: %s URI: %v", httpMethod, uri)

req, err := http.NewRequest(httpMethod, uri, httpBody)
req, err := http.NewRequestWithContext(context.WithValue(context.Background(), clientKey, c), httpMethod, uri, httpBody)
if err != nil {
return nil, err
}
Expand All @@ -337,7 +337,6 @@ func (c *Connection) DoRequest(httpBody io.Reader, httpMethod, endpoint string,
for key, val := range header {
req.Header.Set(key, val)
}
req = req.WithContext(context.WithValue(context.Background(), clientKey, c))
// Give the Do three chances in the case of a comm/service hiccup
for i := 0; i < 3; i++ {
response, err = c.Client.Do(req) // nolint
Expand Down
38 changes: 23 additions & 15 deletions pkg/bindings/images/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,42 +391,50 @@ func Build(ctx context.Context, containerFiles []string, options entities.BuildO
dec := json.NewDecoder(body)

var id string
var mErr error
for {
var s struct {
Stream string `json:"stream,omitempty"`
Error string `json:"error,omitempty"`
}
if err := dec.Decode(&s); err != nil {
if errors.Is(err, io.EOF) {
if mErr == nil && id == "" {
mErr = errors.New("stream dropped, unexpected failure")
}
break
}
s.Error = err.Error() + "\n"
}

select {
// FIXME(vrothberg): it seems we always hit the EOF case below,
// even when the server quit but it seems desirable to
// distinguish a proper build from a transient EOF.
case <-response.Request.Context().Done():
return &entities.BuildReport{ID: id}, mErr
return &entities.BuildReport{ID: id}, nil
default:
// non-blocking select
}

if err := dec.Decode(&s); err != nil {
if errors.Is(err, io.ErrUnexpectedEOF) {
return nil, errors.Wrap(err, "server probably quit")
}
// EOF means the stream is over in which case we need
// to have read the id.
if errors.Is(err, io.EOF) && id != "" {
break
}
return &entities.BuildReport{ID: id}, errors.Wrap(err, "decoding stream")
}

switch {
case s.Stream != "":
stdout.Write([]byte(s.Stream))
if iidRegex.Match([]byte(s.Stream)) {
raw := []byte(s.Stream)
stdout.Write(raw)
if iidRegex.Match(raw) {
id = strings.TrimSuffix(s.Stream, "\n")
}
case s.Error != "":
mErr = errors.New(s.Error)
// If there's an error, return directly. The stream
// will be closed on return.
return &entities.BuildReport{ID: id}, errors.New(s.Error)
default:
return &entities.BuildReport{ID: id}, errors.New("failed to parse build results stream, unexpected input")
}
}
return &entities.BuildReport{ID: id}, mErr
return &entities.BuildReport{ID: id}, nil
}

func nTar(excludes []string, sources ...string) (io.ReadCloser, error) {
Expand Down
1 change: 1 addition & 0 deletions pkg/domain/infra/runtime_abi.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func NewImageEngine(facts *entities.PodmanConfig) (entities.ImageEngine, error)
r, err := NewLibpodImageRuntime(facts.FlagSet, facts)
return r, err
case entities.TunnelMode:
// TODO: look at me!
ctx, err := bindings.NewConnectionWithIdentity(context.Background(), facts.URI, facts.Identity)
return &tunnel.ImageEngine{ClientCtx: ctx}, err
}
Expand Down
13 changes: 3 additions & 10 deletions test/system/070-build.bats
Original file line number Diff line number Diff line change
Expand Up @@ -749,16 +749,9 @@ RUN echo $random_string
EOF

run_podman 125 build -t build_test --pull-never $tmpdir
# FIXME: this is just ridiculous. Even after #10030 and #10034, Ubuntu
# remote *STILL* flakes this test! It fails with the correct exit status,
# but the error output is 'Error: stream dropped, unexpected failure'
# Let's just stop checking on podman-remote. As long as it exits 125,
# we're happy.
if ! is_remote; then
is "$output" \
".*Error: error creating build container: quay.io/libpod/nosuchimage:nosuchtag: image not known" \
"--pull-never fails with expected error message"
fi
is "$output" \
".*Error: error creating build container: quay.io/libpod/nosuchimage:nosuchtag: image not known" \
"--pull-never fails with expected error message"
}

@test "podman build --logfile test" {
Expand Down

0 comments on commit 4df6e31

Please sign in to comment.