From faeed14f6140ef6661bac603ff230744cb2d4e20 Mon Sep 17 00:00:00 2001 From: Ed Santiago Date: Mon, 24 Apr 2023 09:41:50 -0600 Subject: [PATCH] system tests: safer container-stop signaling Having a container spin-wait on a /stop file, then exit, is unsafe: 'podman exec $ctr touch /stop' can get sucked into container cleanup before the exec terminates, resulting in the podman-exec failing and hence the test failing. Most existing instances of this pattern are unnecessary. Replace those with just 'podman rm -f'. When necessary, use a variety of safer alternatives. Re-Closes: #10825 (already closed; this addresses remaining cases) Signed-off-by: Ed Santiago --- test/system/030-run.bats | 5 ++--- test/system/075-exec.bats | 19 ++++++++----------- test/system/130-kill.bats | 11 ++++++----- test/system/260-sdnotify.bats | 24 +++++++++++++++--------- test/system/320-system-df.bats | 6 ++---- test/system/410-selinux.bats | 3 +-- test/system/520-checkpoint.bats | 4 +++- 7 files changed, 37 insertions(+), 35 deletions(-) diff --git a/test/system/030-run.bats b/test/system/030-run.bats index a6bfa69a3b..8baaa4ecd2 100644 --- a/test/system/030-run.bats +++ b/test/system/030-run.bats @@ -347,7 +347,7 @@ echo $rand | 0 | $rand # #6991 : /etc/passwd is modifiable @test "podman run : --userns=keep-id: passwd file is modifiable" { skip_if_not_rootless "--userns=keep-id only works in rootless mode" - run_podman run -d --userns=keep-id --cap-add=dac_override $IMAGE sh -c 'while ! test -e /tmp/stop; do sleep 0.1; done' + run_podman run -d --userns=keep-id --cap-add=dac_override $IMAGE top cid="$output" # Assign a UID that is (a) not in our image /etc/passwd and (b) not @@ -368,8 +368,7 @@ echo $rand | 0 | $rand is "$output" "newuser3:x:$uid:999:$gecos:/home/newuser3:/bin/sh" \ "newuser3 added to /etc/passwd in container" - run_podman exec $cid touch /tmp/stop - run_podman wait $cid + run_podman rm -f -t0 $cid } # For #7754: json-file was equating to 'none' diff --git a/test/system/075-exec.bats b/test/system/075-exec.bats index 0a6048b7e3..c10c98057b 100644 --- a/test/system/075-exec.bats +++ b/test/system/075-exec.bats @@ -27,8 +27,10 @@ load helpers run_podman 127 exec $cid /no/such/command is "$output" ".*such file or dir" "podman exec /no/such/command" - # Done - run_podman exec $cid rm -f /$rand_filename + # Done. Tell the container to stop. + # The '-d' is because container exit is racy: the exec process itself + # could get caught and killed by cleanup, causing this step to exit 137 + run_podman exec -d $cid rm -f /$rand_filename run_podman wait $cid is "$output" "0" "output from podman wait (container exit code)" @@ -59,7 +61,7 @@ load helpers # Issue #4785 - piping to exec statement - fixed in #4818 # Issue #5046 - piping to exec truncates results (actually a conmon issue) @test "podman exec - cat from stdin" { - run_podman run -d $IMAGE sh -c 'while [ ! -e /stop ]; do sleep 0.1;done' + run_podman run -d $IMAGE top cid="$output" echo_string=$(random_string 20) @@ -80,26 +82,21 @@ load helpers is "${output% *}" "$expect " "SHA of file in container" # Clean up - run_podman exec $cid touch /stop - run_podman wait $cid - run_podman rm $cid + run_podman rm -f -t0 $cid } # #6829 : add username to /etc/passwd inside container if --userns=keep-id @test "podman exec - with keep-id" { skip_if_not_rootless "--userns=keep-id only works in rootless mode" # Multiple --userns options confirm command-line override (last one wins) - run_podman run -d --userns=private --userns=keep-id $IMAGE sh -c \ - "echo READY;while [ ! -f /tmp/stop ]; do sleep 1; done" + run_podman run -d --userns=private --userns=keep-id $IMAGE sh -c 'echo READY;top' cid="$output" wait_for_ready $cid run_podman exec $cid id -un is "$output" "$(id -un)" "container is running as current user" - run_podman exec --user=$(id -un) $cid touch /tmp/stop - run_podman wait $cid - run_podman rm $cid + run_podman rm -f -t0 $cid } # #11496: podman-remote loses output diff --git a/test/system/130-kill.bats b/test/system/130-kill.bats index 7522c475dc..e4ec105d78 100644 --- a/test/system/130-kill.bats +++ b/test/system/130-kill.bats @@ -24,10 +24,9 @@ load helpers run_podman run -d ${opt_log_driver} $IMAGE sh -c \ "for i in ${signals[*]}; do trap \"echo got: \$i\" \$i; done; echo READY; - while ! test -e /stop; do sleep 0.05; done; + while ! test -e /stop; do sleep 0.1; done; echo DONE" - # Ignore output regarding pulling/processing container images - cid=$(echo "$output" | tail -1) + cid="$output" # Run 'logs -f' on that container, but run it in the background with # redirection to a named pipe from which we (foreground job) read @@ -71,8 +70,10 @@ load helpers kill_and_check -SIGUSR1 10 kill_and_check SIGUSR2 12 - # Done. Tell the container to stop, and wait for final DONE - run_podman exec $cid touch /stop + # Done. Tell the container to stop, and wait for final DONE. + # The '-d' is because container exit is racy: the exec process itself + # could get caught and killed by cleanup, causing this step to exit 137 + run_podman exec -d $cid touch /stop read -t 5 -u 5 done || die "Timed out waiting for DONE from container" is "$done" "DONE" "final log message from container" diff --git a/test/system/260-sdnotify.bats b/test/system/260-sdnotify.bats index 7c388ba2b2..fb2dc432a9 100644 --- a/test/system/260-sdnotify.bats +++ b/test/system/260-sdnotify.bats @@ -110,7 +110,7 @@ function _assert_mainpid_is_conmon() { run_podman run -d --name sdnotify_conmon_c \ --sdnotify=conmon \ $IMAGE \ - sh -c 'printenv NOTIFY_SOCKET;echo READY;while ! test -f /stop;do sleep 0.1;done' + sh -c 'printenv NOTIFY_SOCKET;echo READY;sleep 999' cid="$output" wait_for_ready $cid @@ -135,9 +135,7 @@ READY=1" "sdnotify sent MAINPID and READY" _assert_mainpid_is_conmon "$output" # Done. Stop container, clean up. - run_podman exec $cid touch /stop - run_podman wait $cid - run_podman rm $cid + run_podman rm -f -t0 $cid _stop_socat } @@ -153,7 +151,7 @@ READY=1" "sdnotify sent MAINPID and READY" _start_socat run_podman run -d --sdnotify=container $SYSTEMD_IMAGE \ - sh -c 'printenv NOTIFY_SOCKET; echo READY; while ! test -f /stop;do sleep 0.1;done;systemd-notify --ready' + sh -c 'trap "touch /stop" SIGUSR1;printenv NOTIFY_SOCKET; echo READY; while ! test -f /stop;do sleep 0.1;done;systemd-notify --ready' cid="$output" wait_for_ready $cid @@ -174,8 +172,8 @@ READY=1" "sdnotify sent MAINPID and READY" is "$output" "MAINPID=$mainPID" "Container is not ready yet, so we only know the main PID" - # Done. Stop container, clean up. - run_podman exec $cid touch /stop + # Done. Tell container to stop itself, and clean up + run_podman kill -s USR1 $cid run_podman wait $cid wait_for_file $_SOCAT_LOG @@ -205,10 +203,18 @@ spec: - command: - /bin/sh - -c - - 'while :; do if test -e /tearsinrain; then exit 0; fi; sleep 1; done' + - 'while :; do if test -e /rain/tears; then exit 0; fi; sleep 1; done' image: $IMAGE name: test resources: {} + volumeMounts: + - mountPath: /rain:z + name: test-mountdir + volumes: + - hostPath: + path: $PODMAN_TMPDIR + type: Directory + name: test-mountdir EOF # The name of the service container is predictable: the first 12 characters @@ -228,7 +234,7 @@ EOF main_pid="$output" # Tell pod to finish, then wait for all containers to stop - run_podman exec test_pod-test touch /tearsinrain + touch $PODMAN_TMPDIR/tears run_podman container wait $service_container test_pod-test # Make sure the containers have the correct policy. diff --git a/test/system/320-system-df.bats b/test/system/320-system-df.bats index 488cacfc59..52d8df0fd7 100644 --- a/test/system/320-system-df.bats +++ b/test/system/320-system-df.bats @@ -82,8 +82,7 @@ Size | ~12.*MB | 0B | 0B @test "podman system df - with active containers and volumes" { run_podman run -v /myvol1 --name c1 $IMAGE true - run_podman run -d -v /myvol2 --name c2 $IMAGE \ - sh -c 'while ! test -e /stop; do sleep 0.1;done' + run_podman run -d -v /myvol2 --name c2 $IMAGE top run_podman system df --format '{{ .Type }}:{{ .Total }}:{{ .Active }}' is "${lines[0]}" "Images:1:1" "system df : Images line" @@ -124,8 +123,7 @@ Size | ~12.*MB | 0B | 0B run_podman system df --format '{{.Reclaimable}}' is "${lines[0]}" "0B (0%)" "cannot reclaim image data as it's still used by the containers" - run_podman exec c2 touch /stop - run_podman wait c2 + run_podman stop c2 # Create a second image by committing a container. run_podman container commit -q c1 diff --git a/test/system/410-selinux.bats b/test/system/410-selinux.bats index 2347fcc447..92ce40b053 100644 --- a/test/system/410-selinux.bats +++ b/test/system/410-selinux.bats @@ -99,12 +99,11 @@ function check_label() { --security-opt seccomp=unconfined \ --security-opt label=type:spc_t \ --security-opt label=level:s0 \ - $IMAGE sh -c 'while test ! -e /stop; do sleep 0.1; done' + $IMAGE top run_podman inspect --format='{{ .HostConfig.SecurityOpt }}' myc is "$output" "[label=type:spc_t,label=level:s0 seccomp=unconfined]" \ "'podman inspect' preserves all --security-opts" - run_podman exec myc touch /stop run_podman rm -t 0 -f myc } diff --git a/test/system/520-checkpoint.bats b/test/system/520-checkpoint.bats index 62281412d9..9fa00aadb9 100644 --- a/test/system/520-checkpoint.bats +++ b/test/system/520-checkpoint.bats @@ -211,7 +211,9 @@ function teardown() { is "$output" "$cid" "podman container restore" # Signal the container to continue; this is where the 1-2-3s will come from - run_podman exec $cid rm /wait + # The '-d' is because container exit is racy: the exec process itself + # could get caught and killed by cleanup, causing this step to exit 137 + run_podman exec -d $cid rm /wait # Wait for the container to stop run_podman wait $cid