Skip to content

Commit

Permalink
podman service reaper
Browse files Browse the repository at this point in the history
Add a new service reaper package. Podman currently does not reap all
child processes. The slirp4netns and rootlesskit processes are not
reaped. The is not a problem for local podman since the podman process
dies before the other processes and then init will reap them for us.

However with podman system service it is possible that the podman
process is still alive after slirp died. In this case podman has to reap
it or the slirp process will be a zombie until the service is stopped.

The service reaper will listen in an extra goroutine on SIGCHLD. Once it
receives this signal it will try to reap all pids that were added with
`AddPID()`. While I would like to just reap all children this is not
possible because many parts of the code use `os/exec` with `cmd.Wait()`.
If we reap before `cmd.Wait()` things can break, so reaping everything
is not an option.

[NO TESTS NEEDED]

Fixes containers#9777

Signed-off-by: Paul Holzinger <[email protected]>
  • Loading branch information
Luap99 committed Jul 2, 2021
1 parent 955c1d2 commit ed51e3f
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cmd/podman/system/service_abi.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
api "github.com/containers/podman/v3/pkg/api/server"
"github.com/containers/podman/v3/pkg/domain/entities"
"github.com/containers/podman/v3/pkg/domain/infra"
"github.com/containers/podman/v3/pkg/servicereaper"
"github.com/containers/podman/v3/pkg/util"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -71,6 +72,8 @@ func restService(opts entities.ServiceOptions, flags *pflag.FlagSet, cfg *entiti
return err
}

servicereaper.Start()

infra.StartWatcher(rt)
server, err := api.NewServerWithSettings(rt, listener, api.Options{Timeout: opts.Timeout, CorsHeaders: opts.CorsHeaders})
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions libpod/networking_slirp4netns.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/containers/podman/v3/pkg/errorhandling"
"github.com/containers/podman/v3/pkg/rootlessport"
"github.com/containers/podman/v3/pkg/servicereaper"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -299,6 +300,7 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error {
return errors.Wrapf(err, "failed to start slirp4netns process")
}
defer func() {
servicereaper.AddPID(cmd.Process.Pid)
if err := cmd.Process.Release(); err != nil {
logrus.Errorf("unable to release command process: %q", err)
}
Expand Down Expand Up @@ -514,6 +516,7 @@ outer:
return errors.Wrapf(err, "failed to start rootlessport process")
}
defer func() {
servicereaper.AddPID(cmd.Process.Pid)
if err := cmd.Process.Release(); err != nil {
logrus.Errorf("unable to release rootlessport process: %q", err)
}
Expand Down
64 changes: 64 additions & 0 deletions pkg/servicereaper/service.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//+build linux

package servicereaper

import (
"os"
"os/signal"
"sync"
"syscall"

"github.com/sirupsen/logrus"
)

type service struct {
pidMap map[int]bool
mutex *sync.Mutex
}

var s = service{
pidMap: map[int]bool{},
mutex: &sync.Mutex{},
}

func AddPID(pid int) {
s.mutex.Lock()
s.pidMap[pid] = true
s.mutex.Unlock()
}

func Start() {
// create signal channel and only wait for SIGCHLD
sigc := make(chan os.Signal, 1)
signal.Notify(sigc, syscall.SIGCHLD)
// wait and reap in an extra goroutine
go reaper(sigc)
}

func reaper(sigc chan os.Signal) {
for {
// block until we receive SIGCHLD
<-sigc
s.mutex.Lock()
for pid := range s.pidMap {
var status syscall.WaitStatus
waitpid, err := syscall.Wait4(pid, &status, syscall.WNOHANG, nil)
if err != nil {
// do not log error for ECHILD
if err != syscall.ECHILD {
logrus.Warnf("wait for pid %d failed: %v ", pid, err)
}
delete(s.pidMap, pid)
continue
}
// if pid == 0 nothing happened
if waitpid == 0 {
continue
}
if status.Exited() {
delete(s.pidMap, pid)
}
}
s.mutex.Unlock()
}
}

0 comments on commit ed51e3f

Please sign in to comment.