Skip to content

Commit

Permalink
pkg/rootless: correctly handle proxy signals on reexec
Browse files Browse the repository at this point in the history
There are quite a lot of places in podman were we have some signal
handlers, most notably libpod/shutdown/handler.go.

However when we rexec we do not want any of that and just send all
signals we get down to the child obviously. So before we install our
signal handler we must first reset all others with signal.Reset().

Also while at it fix a problem were the joinUserAndMountNS() code path
would not forward signals at all. This code path is used when you have
running containers but the pause process was killed.

Fixes #16091
Given that signal handlers run in different goroutines parallel it would
explain why it flakes sometimes in CI. However to my understanding this
flake can only happen when the pause process is dead before we run the
podman command. So the question still is what kills the pause process?

Signed-off-by: Paul Holzinger <[email protected]>
  • Loading branch information
Luap99 committed May 25, 2023
1 parent 47ac6c4 commit 6bc52c9
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 58 deletions.
24 changes: 13 additions & 11 deletions pkg/rootless/rootless_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,7 @@ func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return false, -1, fmt.Errorf("cannot re-exec process to join the existing user namespace")
}

ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 {
return false, -1, errors.New("waiting for the re-exec process")
}

return true, int(ret), nil
return waitAndProxySignalsToChild(pidC)
}

// GetConfiguredMappings returns the additional IDs configured for the current user.
Expand Down Expand Up @@ -395,7 +390,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
if ret < 0 {
return false, -1, errors.New("waiting for the re-exec process")
}

return true, 0, nil
}

Expand All @@ -418,6 +412,10 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
return false, -1, errors.New("setting up the process")
}

return waitAndProxySignalsToChild(pidC)
}

func waitAndProxySignalsToChild(pid C.int) (bool, int, error) {
signals := []os.Signal{}
for sig := 0; sig < numSig; sig++ {
if sig == int(unix.SIGTSTP) {
Expand All @@ -426,24 +424,28 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
signals = append(signals, unix.Signal(sig))
}

// Disable all existing signal handlers, from now forward everything to the child and let
// it deal with it. All we do is to wait and propagate the exit code from the child to our parent.
gosignal.Reset()
c := make(chan os.Signal, len(signals))
gosignal.Notify(c, signals...)
defer gosignal.Reset()
go func() {
for s := range c {
if s == unix.SIGCHLD || s == unix.SIGPIPE {
continue
}

if err := unix.Kill(int(pidC), s.(unix.Signal)); err != nil {
if err := unix.Kill(int(pid), s.(unix.Signal)); err != nil {
if err != unix.ESRCH {
logrus.Errorf("Failed to propagate signal to child process %d: %v", int(pidC), err)
logrus.Errorf("Failed to propagate signal to child process %d: %v", int(pid), err)
}
}
}
}()

ret := C.reexec_in_user_namespace_wait(pidC, 0)
ret := C.reexec_in_user_namespace_wait(pid, 0)
// child exited reset our signal proxy handler
gosignal.Reset()
if ret < 0 {
return false, -1, errors.New("waiting for the re-exec process")
}
Expand Down
49 changes: 2 additions & 47 deletions test/system/032-sig-proxy.bats
Original file line number Diff line number Diff line change
@@ -1,54 +1,9 @@
#!/usr/bin/env bats

load helpers
load helpers.sig-proxy

# Command to run in each of the tests.
SLEEPLOOP='trap "echo BYE;exit 0" INT;echo READY;while :;do sleep 0.1;done'

# Main test code: wait for container to exist and be ready, send it a
# signal, wait for container to acknowledge and exit.
function _test_sigproxy() {
local cname=$1
local kidpid=$2

# Wait for container to appear
local timeout=10
while :;do
sleep 0.5
run_podman '?' container exists $cname
if [[ $status -eq 0 ]]; then
break
fi
timeout=$((timeout - 1))
if [[ $timeout -eq 0 ]]; then
run_podman ps -a
die "Timed out waiting for container $cname to start"
fi
done

# Now that container exists, wait for it to declare itself READY
wait_for_ready $cname

# Signal, and wait for container to exit
kill -INT $kidpid
timeout=20
while :;do
sleep 0.5
run_podman logs $cname
if [[ "$output" =~ BYE ]]; then
break
fi
timeout=$((timeout - 1))
if [[ $timeout -eq 0 ]]; then
run_podman ps -a
die "Timed out waiting for BYE from container"
fi
done

run_podman rm -f -t0 $cname
}

# Each of the tests below does some setup, then invokes the above helper.
# Each of the tests below does some setup, then invokes the helper from helpers.sig-proxy.bash.

@test "podman sigproxy test: run" {
# We're forced to use $PODMAN because run_podman cannot be backgrounded
Expand Down
61 changes: 61 additions & 0 deletions test/system/550-pause-process.bats
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#

load helpers
load helpers.sig-proxy

function _check_pause_process() {
pause_pid=
Expand Down Expand Up @@ -77,3 +78,63 @@ function _check_pause_process() {
assert "$output" == "$tmpdir_userns" \
"podman with tmpdir2 should use the same userns created using a tmpdir"
}

# https://github.com/containers/podman/issues/16091
@test "rootless reexec with sig-proxy" {
skip_if_not_rootless "pause process is only used as rootless"
skip_if_remote "system migrate not supported via remote"

# Use podman system migrate to stop the currently running pause process
run_podman system migrate

# We're forced to use $PODMAN because run_podman cannot be backgrounded
$PODMAN run -i --name c_run $IMAGE sh -c "$SLEEPLOOP" &
local kidpid=$!

_test_sigproxy c_run $kidpid

# our container exits 0 so podman should too
wait $kidpid || die "podman run exited $? instead of zero"
}


@test "rootless reexec with sig-proxy when rejoining userns from container" {
skip_if_not_rootless "pause process is only used as rootless"
skip_if_remote "unshare not supported via remote"

# System tests can execute in contexts without XDG; in those, we have to
# skip the pause-pid-file checks.
if [[ -z "$XDG_RUNTIME_DIR" ]]; then
skip "\$XDG_RUNTIME_DIR not defined"
fi
local pause_pid_file="$XDG_RUNTIME_DIR/libpod/tmp/pause.pid"

# First let's run a container in the background to keep the userns active
local cname1=c1_$(random_string)
run_podman run -d --name $cname1 $IMAGE top

run_podman unshare readlink /proc/self/ns/user
userns="$output"

# check for pause pid and then kill it
_check_pause_process
kill -9 $pause_pid

# Now again directly start podman run and make sure it can forward signals
# We're forced to use $PODMAN because run_podman cannot be backgrounded
local cname2=c2_$(random_string)
$PODMAN run -i --name $cname2 $IMAGE sh -c "$SLEEPLOOP" &
local kidpid=$!

_test_sigproxy $cname2 $kidpid

# our container exits 0 so podman should too
wait $kidpid || die "podman run exited $? instead of zero"

# Check that podman joined the same userns as it tries to use the one
# from the running podman process in the background.
run_podman unshare readlink /proc/self/ns/user
assert "$output" == "$userns" "userns before/after kill is the same"

run_podman rm -f -t0 $cname1
}
50 changes: 50 additions & 0 deletions test/system/helpers.sig-proxy.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# -*- bash -*-
#
# BATS helpers for sig-proxy functionality
#

# Command to run in each of the tests.
SLEEPLOOP='trap "echo BYE;exit 0" INT;echo READY;while :;do sleep 0.1;done'

# Main test code: wait for container to exist and be ready, send it a
# signal, wait for container to acknowledge and exit.
function _test_sigproxy() {
local cname=$1
local kidpid=$2

# Wait for container to appear
local timeout=10
while :;do
sleep 0.5
run_podman '?' container exists $cname
if [[ $status -eq 0 ]]; then
break
fi
timeout=$((timeout - 1))
if [[ $timeout -eq 0 ]]; then
run_podman ps -a
die "Timed out waiting for container $cname to start"
fi
done

# Now that container exists, wait for it to declare itself READY
wait_for_ready $cname

# Signal, and wait for container to exit
kill -INT $kidpid
timeout=20
while :;do
sleep 0.5
run_podman logs $cname
if [[ "$output" =~ BYE ]]; then
break
fi
timeout=$((timeout - 1))
if [[ $timeout -eq 0 ]]; then
run_podman ps -a
die "Timed out waiting for BYE from container"
fi
done

run_podman rm -f -t0 $cname
}

0 comments on commit 6bc52c9

Please sign in to comment.