From abfe12290add519aa993221a294b19d4035b3de8 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Mon, 26 Jul 2021 11:55:33 +0200 Subject: [PATCH] remote build: fix streaming and error handling Address a number of issues in the streaming logic in remote build, most importantly an error in using buffered channels on the server side. The pattern below does not guarantee that the channel is entirely read before the context fires. for { select { case <- bufferedChannel: ... case <- ctx.Done(): ... } } Fixes: #10154 Signed-off-by: Valentin Rothberg --- pkg/api/handlers/compat/images_build.go | 19 +++++++------ pkg/bindings/connection.go | 3 +- pkg/bindings/images/build.go | 38 +++++++++++++++---------- pkg/domain/infra/runtime_abi.go | 1 + test/system/070-build.bats | 13 ++------- 5 files changed, 38 insertions(+), 36 deletions(-) diff --git a/pkg/api/handlers/compat/images_build.go b/pkg/api/handlers/compat/images_build.go index 64805b7fa4..2c98a5361e 100644 --- a/pkg/api/handlers/compat/images_build.go +++ b/pkg/api/handlers/compat/images_build.go @@ -393,16 +393,16 @@ func BuildImage(w http.ResponseWriter, r *http.Request) { defer auth.RemoveAuthfile(authfile) // Channels all mux'ed in select{} below to follow API build protocol - stdout := channel.NewWriter(make(chan []byte, 1)) + stdout := channel.NewWriter(make(chan []byte)) defer stdout.Close() - auxout := channel.NewWriter(make(chan []byte, 1)) + auxout := channel.NewWriter(make(chan []byte)) defer auxout.Close() - stderr := channel.NewWriter(make(chan []byte, 1)) + stderr := channel.NewWriter(make(chan []byte)) defer stderr.Close() - reporter := channel.NewWriter(make(chan []byte, 1)) + reporter := channel.NewWriter(make(chan []byte)) defer reporter.Close() runtime := r.Context().Value("runtime").(*libpod.Runtime) @@ -529,7 +529,7 @@ func BuildImage(w http.ResponseWriter, r *http.Request) { enc := json.NewEncoder(body) enc.SetEscapeHTML(true) -loop: + for { m := struct { Stream string `json:"stream,omitempty"` @@ -543,13 +543,13 @@ loop: stderr.Write([]byte(err.Error())) } flush() - case e := <-auxout.Chan(): + case e := <-reporter.Chan(): m.Stream = string(e) if err := enc.Encode(m); err != nil { stderr.Write([]byte(err.Error())) } flush() - case e := <-reporter.Chan(): + case e := <-auxout.Chan(): m.Stream = string(e) if err := enc.Encode(m); err != nil { stderr.Write([]byte(err.Error())) @@ -561,8 +561,8 @@ loop: logrus.Warnf("Failed to json encode error %v", err) } flush() + return case <-runCtx.Done(): - flush() if success { if !utils.IsLibpodRequest(r) { m.Stream = fmt.Sprintf("Successfully built %12.12s\n", imageID) @@ -579,7 +579,8 @@ loop: } } } - break loop + flush() + return case <-r.Context().Done(): cancel() logrus.Infof("Client disconnect reported for build %q / %q.", registry, query.Dockerfile) diff --git a/pkg/bindings/connection.go b/pkg/bindings/connection.go index fd93c5ac7e..62b1655acf 100644 --- a/pkg/bindings/connection.go +++ b/pkg/bindings/connection.go @@ -327,7 +327,7 @@ func (c *Connection) DoRequest(httpBody io.Reader, httpMethod, endpoint string, uri := fmt.Sprintf("http://d/v%d.%d.%d/libpod"+endpoint, params...) logrus.Debugf("DoRequest Method: %s URI: %v", httpMethod, uri) - req, err := http.NewRequest(httpMethod, uri, httpBody) + req, err := http.NewRequestWithContext(context.WithValue(context.Background(), clientKey, c), httpMethod, uri, httpBody) if err != nil { return nil, err } @@ -337,7 +337,6 @@ func (c *Connection) DoRequest(httpBody io.Reader, httpMethod, endpoint string, for key, val := range header { req.Header.Set(key, val) } - req = req.WithContext(context.WithValue(context.Background(), clientKey, c)) // Give the Do three chances in the case of a comm/service hiccup for i := 0; i < 3; i++ { response, err = c.Client.Do(req) // nolint diff --git a/pkg/bindings/images/build.go b/pkg/bindings/images/build.go index 142204f277..a35f461a77 100644 --- a/pkg/bindings/images/build.go +++ b/pkg/bindings/images/build.go @@ -391,42 +391,50 @@ func Build(ctx context.Context, containerFiles []string, options entities.BuildO dec := json.NewDecoder(body) var id string - var mErr error for { var s struct { Stream string `json:"stream,omitempty"` Error string `json:"error,omitempty"` } - if err := dec.Decode(&s); err != nil { - if errors.Is(err, io.EOF) { - if mErr == nil && id == "" { - mErr = errors.New("stream dropped, unexpected failure") - } - break - } - s.Error = err.Error() + "\n" - } select { + // FIXME(vrothberg): it seems we always hit the EOF case below, + // even when the server quit but it seems desirable to + // distinguish a proper build from a transient EOF. case <-response.Request.Context().Done(): - return &entities.BuildReport{ID: id}, mErr + return &entities.BuildReport{ID: id}, nil default: // non-blocking select } + if err := dec.Decode(&s); err != nil { + if errors.Is(err, io.ErrUnexpectedEOF) { + return nil, errors.Wrap(err, "server probably quit") + } + // EOF means the stream is over in which case we need + // to have read the id. + if errors.Is(err, io.EOF) && id != "" { + break + } + return &entities.BuildReport{ID: id}, errors.Wrap(err, "decoding stream") + } + switch { case s.Stream != "": - stdout.Write([]byte(s.Stream)) - if iidRegex.Match([]byte(s.Stream)) { + raw := []byte(s.Stream) + stdout.Write(raw) + if iidRegex.Match(raw) { id = strings.TrimSuffix(s.Stream, "\n") } case s.Error != "": - mErr = errors.New(s.Error) + // If there's an error, return directly. The stream + // will be closed on return. + return &entities.BuildReport{ID: id}, errors.New(s.Error) default: return &entities.BuildReport{ID: id}, errors.New("failed to parse build results stream, unexpected input") } } - return &entities.BuildReport{ID: id}, mErr + return &entities.BuildReport{ID: id}, nil } func nTar(excludes []string, sources ...string) (io.ReadCloser, error) { diff --git a/pkg/domain/infra/runtime_abi.go b/pkg/domain/infra/runtime_abi.go index ca201b5aed..177e9cff40 100644 --- a/pkg/domain/infra/runtime_abi.go +++ b/pkg/domain/infra/runtime_abi.go @@ -33,6 +33,7 @@ func NewImageEngine(facts *entities.PodmanConfig) (entities.ImageEngine, error) r, err := NewLibpodImageRuntime(facts.FlagSet, facts) return r, err case entities.TunnelMode: + // TODO: look at me! ctx, err := bindings.NewConnectionWithIdentity(context.Background(), facts.URI, facts.Identity) return &tunnel.ImageEngine{ClientCtx: ctx}, err } diff --git a/test/system/070-build.bats b/test/system/070-build.bats index 7b76c585f9..26113e45c7 100644 --- a/test/system/070-build.bats +++ b/test/system/070-build.bats @@ -749,16 +749,9 @@ RUN echo $random_string EOF run_podman 125 build -t build_test --pull-never $tmpdir - # FIXME: this is just ridiculous. Even after #10030 and #10034, Ubuntu - # remote *STILL* flakes this test! It fails with the correct exit status, - # but the error output is 'Error: stream dropped, unexpected failure' - # Let's just stop checking on podman-remote. As long as it exits 125, - # we're happy. - if ! is_remote; then - is "$output" \ - ".*Error: error creating build container: quay.io/libpod/nosuchimage:nosuchtag: image not known" \ - "--pull-never fails with expected error message" - fi + is "$output" \ + ".*Error: error creating build container: quay.io/libpod/nosuchimage:nosuchtag: image not known" \ + "--pull-never fails with expected error message" } @test "podman build --logfile test" {