Skip to content

Commit

Permalink
criu swrk: return child error to caller
Browse files Browse the repository at this point in the history
In the podman CI we are seeing a weird flake during criu version
detection[1]. The write to the socket just fails with broken pipe.
The logical thing to assume here is that the child exited. However the
current code never reports back the child error from wait. The cleanup
error is now added to the returned error so the caller sees both.

The output is not captured as this causes hangs when the fds are passed
into child processes.

As errors.Join is used from the std lib bump the minimum go version to
1.20.

[1] containers/podman#18856

Signed-off-by: Paul Holzinger <[email protected]>
  • Loading branch information
Luap99 committed Jul 19, 2024
1 parent 664a3fd commit 1c9f152
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 10 deletions.
24 changes: 17 additions & 7 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,19 @@ func (c *Criu) Prepare() error {
}

// Cleanup cleans up
func (c *Criu) Cleanup() {
func (c *Criu) Cleanup() error {
var errs []error
if c.swrkCmd != nil {
c.swrkSk.Close()
if err := c.swrkSk.Close(); err != nil {
errs = append(errs, err)
}
c.swrkSk = nil
_ = c.swrkCmd.Wait()
if err := c.swrkCmd.Wait(); err != nil {
errs = append(errs, fmt.Errorf("criu swrk failed: %w", err))
}
c.swrkCmd = nil
}
return errors.Join(errs...)
}

func (c *Criu) sendAndRecv(reqB []byte) ([]byte, int, error) {
Expand Down Expand Up @@ -99,9 +105,7 @@ func (c *Criu) doSwrk(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) e
return nil
}

func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify, features *rpc.CriuFeatures) (*rpc.CriuResp, error) {
var resp *rpc.CriuResp

func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify, features *rpc.CriuFeatures) (resp *rpc.CriuResp, retErr error) {
req := rpc.CriuReq{
Type: &reqType,
Opts: opts,
Expand All @@ -121,7 +125,13 @@ func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy N
return nil, err
}

defer c.Cleanup()
defer func() {
// append any cleanup errors to the returned error
err := c.Cleanup()
if err != nil {
retErr = errors.Join(retErr, err)
}
}()
}

for {
Expand Down
11 changes: 9 additions & 2 deletions phaul/client.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package phaul

import (
"errors"
"fmt"

"github.com/checkpoint-restore/go-criu/v7"
Expand Down Expand Up @@ -55,7 +56,7 @@ func isLastIter(iter int, stats *stats.DumpStatsEntry, prevStats *stats.DumpStat
}

// Migrate function
func (pc *Client) Migrate() error {
func (pc *Client) Migrate() (retErr error) {
criu := criu.MakeCriu()
psi := rpc.CriuPageServerInfo{
Fd: proto.Int32(int32(pc.cfg.Memfd)),
Expand All @@ -72,7 +73,13 @@ func (pc *Client) Migrate() error {
return err
}

defer criu.Cleanup()
defer func() {
// append any cleanup errors to the returned error
err := criu.Cleanup()
if err != nil {
retErr = errors.Join(retErr, err)
}
}()

imgs, err := preparePhaulImages(pc.cfg.Wdir)
if err != nil {
Expand Down
5 changes: 4 additions & 1 deletion test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,10 @@ func main() {
log.Fatalln("dump failed: ", err)
}

c.Cleanup()
err = c.Cleanup()
if err != nil {
log.Fatalln("cleanup failed: ", err)
}
case "restore":
log.Println("Restoring")
img, err := os.Open(os.Args[2])
Expand Down

0 comments on commit 1c9f152

Please sign in to comment.