Skip to content

Commit

Permalink
libimage: pull: increase timeout running under systemd
Browse files Browse the repository at this point in the history
Set the `EXTEND_TIMEOUT_USEC` over DBUS when pulling an image from a
registry and when running under systemd.  This will prevent a frequent
issue when running Quadlets and exceeding the default systemd start
timeout of 90 seconds when pulling the image takes too long.

Fixes: containers/podman/issues/18353
Signed-off-by: Valentin Rothberg <[email protected]>
  • Loading branch information
vrothberg committed Aug 17, 2023
1 parent b70b0c4 commit 20a1a0f
Showing 1 changed file with 58 additions and 0 deletions.
58 changes: 58 additions & 0 deletions libimage/pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"errors"
"fmt"
"io"
"net"
"os"
"runtime"
"strings"
"time"
Expand Down Expand Up @@ -592,6 +594,62 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str
return nil
}

// Avoid running out of time when running inside a systemd unit by
// regularly increasing the timeout.
if socketPath, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
socketAddr := &net.UnixAddr{
Name: socketPath,
Net: "unixgram",
}
conn, err := net.DialUnix(socketAddr.Net, nil, socketAddr)
if err != nil {
return nil, err
}
defer conn.Close()

numExtensions := 10
extension := 30 * time.Second
timerFrequency := 25 * time.Second // Fire the timer at a higher frequency to avoid a race
timer := time.NewTimer(timerFrequency)
socketCtx, cancel := context.WithCancel(ctx)
defer cancel()
defer timer.Stop()

logrus.Warnf("Pulling image %s inside systemd: setting pull timeout to %s", imageName, time.Duration(numExtensions)*extension)

// From `man systemd.service(5)`:
//
// "If a service of Type=notify/Type=notify-reload sends "EXTEND_TIMEOUT_USEC=...", this may cause
// the start time to be extended beyond TimeoutStartSec=. The first receipt of this message must
// occur before TimeoutStartSec= is exceeded, and once the start time has extended beyond
// TimeoutStartSec=, the service manager will allow the service to continue to start, provided the
// service repeats "EXTEND_TIMEOUT_USEC=..." within the interval specified until the service startup
// status is finished by "READY=1"."
extendValue := []byte(fmt.Sprintf("EXTEND_TIMEOUT_USEC=%d", extension.Microseconds()))
extendTimeout := func() {
if _, err := conn.Write(extendValue); err != nil {
logrus.Errorf("Increasing EXTEND_TIMEOUT_USEC failed: %v", err)
}
numExtensions--
}

extendTimeout()
go func() {
for {
select {
case <-socketCtx.Done():
return
case <-timer.C:
if numExtensions == 0 {
return
}
extendTimeout()
timer.Reset(extension)
}
}
}()
}

c, err := r.newCopier(&options.CopyOptions)
if err != nil {
return nil, err
Expand Down

0 comments on commit 20a1a0f

Please sign in to comment.