From 232c32bd35e8264b94e8885761c2f864f73dae51 Mon Sep 17 00:00:00 2001 From: Ed Santiago Date: Tue, 12 Dec 2023 17:27:45 -0700 Subject: [PATCH] CI: systests: safer isolation in registry & tests Our test registry (used for login & local registry tests) was being run using the standard podman tmpdir, hence the standard podman database, This was then getting clobbered in the 330-corrupt-images test, which runs "system reset". We just didn't know this was happening. Until we added a registry test after the system reset. Oops. Solution: new helper function podman_isolation_opts() sets --root, --runroot, *and --tmpdir*. Refactor all existing --root/--runroot usages. Document. Next problem: the "network reload" test in 500-networking.bats did not (could not) know about our registry port, so the "iptables -F" command reverted that to DROP, so the subsequent podman-auth in 700-play timed out. Solution: add a podman-isolated "network reload" to start_registry(). Final problem, because, really, those weren't enough: a BATS bug where running with --filter-tags would set IFS=',' in setup_suite which in turn has catastrophic consequences: https://github.com/bats-core/bats-core/issues/812 See #20966 for details of the failure and further conversation. Signed-off-by: Ed Santiago --- test/system/272-system-connection.bats | 3 +-- test/system/330-corrupt-images.bats | 9 ++++++--- test/system/520-checkpoint.bats | 7 +------ test/system/550-pause-process.bats | 5 +---- test/system/helpers.bash | 14 ++++++++++++++ test/system/helpers.registry.bash | 25 +++++++++++++------------ test/system/setup_suite.bash | 10 ++++++++++ 7 files changed, 46 insertions(+), 27 deletions(-) diff --git a/test/system/272-system-connection.bats b/test/system/272-system-connection.bats index 028bd4b0a1..2824394fd6 100644 --- a/test/system/272-system-connection.bats +++ b/test/system/272-system-connection.bats @@ -108,8 +108,7 @@ $c2[ ]\+tcp://localhost:54321[ ]\+true" \ # Start service. Now podman info should work fine. The %%-remote* # converts "podman-remote --opts" to just "podman", which is what # we need for the server. - ${PODMAN%%-remote*} --root ${PODMAN_TMPDIR}/root \ - --runroot ${PODMAN_TMPDIR}/runroot \ + ${PODMAN%%-remote*} $(podman_isolation_opts ${PODMAN_TMPDIR}) \ system service -t 99 tcp://localhost:$_SERVICE_PORT & _SERVICE_PID=$! # Wait for the port and the podman-service to be ready. diff --git a/test/system/330-corrupt-images.bats b/test/system/330-corrupt-images.bats index c7ad4a3c4a..824061670e 100644 --- a/test/system/330-corrupt-images.bats +++ b/test/system/330-corrupt-images.bats @@ -17,14 +17,17 @@ PODMAN_CORRUPT_TEST_IMAGE_CANONICAL_FQIN=quay.io/libpod/alpine@sha256:634a8f35b5 PODMAN_CORRUPT_TEST_IMAGE_TAGGED_FQIN=${PODMAN_CORRUPT_TEST_IMAGE_CANONICAL_FQIN%%@sha256:*}:test PODMAN_CORRUPT_TEST_IMAGE_ID=961769676411f082461f9ef46626dd7a2d1e2b2a38e6a44364bcbecf51e66dd4 -# All tests in this file (and ONLY in this file) run with a custom rootdir function setup() { skip_if_remote "none of these tests run under podman-remote" - _PODMAN_TEST_OPTS="--storage-driver=vfs --root ${PODMAN_CORRUPT_TEST_WORKDIR}/root" + + # DANGER! This completely changes the behavior of run_podman, + # forcing it to use a quarantined directory. Make certain that + # it gets unset in teardown. + _PODMAN_TEST_OPTS="--storage-driver=vfs $(podman_isolation_opts ${PODMAN_CORRUPT_TEST_WORKDIR})" } function teardown() { - # No other tests should ever run with this custom rootdir + # No other tests should ever run with these scratch options unset _PODMAN_TEST_OPTS is_remote && return diff --git a/test/system/520-checkpoint.bats b/test/system/520-checkpoint.bats index 33378435a3..a43cf89a94 100644 --- a/test/system/520-checkpoint.bats +++ b/test/system/520-checkpoint.bats @@ -117,13 +117,8 @@ function teardown() { @test "podman checkpoint --export, with volumes" { skip_if_remote "Test uses --root/--runroot, which are N/A over remote" - # Create a root in tempdir. We will run a container here. - local p_root=${PODMAN_TMPDIR}/testroot/root - local p_runroot=${PODMAN_TMPDIR}/testroot/runroot - mkdir -p $p_root $p_runroot - # To avoid network pull, copy $IMAGE straight to temp root - local p_opts="--root $p_root --runroot $p_runroot --events-backend file" + local p_opts="$(podman_isolation_opts ${PODMAN_TMPDIR}) --events-backend file" run_podman save -o $PODMAN_TMPDIR/image.tar $IMAGE run_podman $p_opts load -i $PODMAN_TMPDIR/image.tar diff --git a/test/system/550-pause-process.bats b/test/system/550-pause-process.bats index 282494b630..4e99d85f26 100644 --- a/test/system/550-pause-process.bats +++ b/test/system/550-pause-process.bats @@ -60,10 +60,7 @@ function _check_pause_process() { test $status -eq 0 && die "Pause process $pause_pid is still running even after podman system migrate" fi - run_podman --root $PODMAN_TMPDIR/root \ - --runroot $PODMAN_TMPDIR/runroot \ - --tmpdir $PODMAN_TMPDIR/tmp \ - $getns + run_podman $(podman_isolation_opts ${PODMAN_TMPDIR}) $getns tmpdir_userns="$output" # And now we should once again have a pause process diff --git a/test/system/helpers.bash b/test/system/helpers.bash index dc5575e737..6f641703d7 100644 --- a/test/system/helpers.bash +++ b/test/system/helpers.bash @@ -607,6 +607,20 @@ function podman_storage_driver() { echo "$output" } +# Given a (scratch) directory path, returns a set of command-line options +# for running an isolated podman that will not step on system podman. Set: +# - rootdir, so we don't clobber real images or storage; +# - tmpdir, so we use an isolated DB; and +# - runroot, out of an abundance of paranoia +function podman_isolation_opts() { + local path=${1?podman_isolation_opts: missing PATH arg} + + for opt in root runroot tmpdir;do + mkdir -p $path/$opt + echo " --$opt $path/$opt" + done +} + # rhbz#1895105: rootless journald is unavailable except to users in # certain magic groups; which our testuser account does not belong to # (intentional: that is the RHEL default, so that's the setup we test). diff --git a/test/system/helpers.registry.bash b/test/system/helpers.registry.bash index c355355657..11bb02e252 100644 --- a/test/system/helpers.registry.bash +++ b/test/system/helpers.registry.bash @@ -19,6 +19,14 @@ unset REGISTRY_AUTH_FILE function start_registry() { if [[ -d "$PODMAN_LOGIN_WORKDIR/auth" ]]; then # Already started + + # Fixes very obscure corner case in root system tests: + # 1) we run 150-login tests, starting a registry; then + # 2) run 500-network, which runs iptables -F; then + # 3) run 700-play, the "private" test, which needs the + # already-started registry, but its port is now DROPped, + # so the test times out trying to talk to registry + run_podman --storage-driver vfs $(podman_isolation_opts ${PODMAN_LOGIN_WORKDIR}) network reload --all return fi @@ -28,10 +36,8 @@ function start_registry() { # Registry image; copy of docker.io, but on our own registry local REGISTRY_IMAGE="$PODMAN_TEST_IMAGE_REGISTRY/$PODMAN_TEST_IMAGE_USER/registry:2.8" - # Pull registry image, but into a separate container storage - mkdir ${PODMAN_LOGIN_WORKDIR}/root - mkdir ${PODMAN_LOGIN_WORKDIR}/runroot - PODMAN_LOGIN_ARGS="--storage-driver vfs --root ${PODMAN_LOGIN_WORKDIR}/root --runroot ${PODMAN_LOGIN_WORKDIR}/runroot" + # Pull registry image, but into a separate container storage and DB and everything + PODMAN_LOGIN_ARGS="--storage-driver vfs $(podman_isolation_opts ${PODMAN_LOGIN_WORKDIR})" # _prefetch() will retry twice on network error, and will also use # a pre-cached image if present (helpful on dev workstation, not in CI). _PODMAN_TEST_OPTS="${PODMAN_LOGIN_ARGS}" _prefetch $REGISTRY_IMAGE @@ -86,14 +92,9 @@ function stop_registry() { skip "[leaving registry running by request]" fi - run_podman --storage-driver vfs \ - --root ${PODMAN_LOGIN_WORKDIR}/root \ - --runroot ${PODMAN_LOGIN_WORKDIR}/runroot \ - rm -f -t0 registry - run_podman --storage-driver vfs \ - --root ${PODMAN_LOGIN_WORKDIR}/root \ - --runroot ${PODMAN_LOGIN_WORKDIR}/runroot \ - rmi -a -f + opts="--storage-driver vfs $(podman_isolation_opts ${PODMAN_LOGIN_WORKDIR})" + run_podman $opts rm -f -t0 registry + run_podman $opts rmi -a -f # By default, clean up if [ -z "${PODMAN_TEST_KEEP_LOGIN_WORKDIR}" ]; then diff --git a/test/system/setup_suite.bash b/test/system/setup_suite.bash index ff1bed1daa..ab3791cb2b 100644 --- a/test/system/setup_suite.bash +++ b/test/system/setup_suite.bash @@ -11,6 +11,12 @@ load helpers.registry # Create common environment just in case we end up needing a registry. # These environment variables will be available to all tests. function setup_suite() { + # FIXME: 2023-12-13: https://github.com/bats-core/bats-core/issues/812 + # Running 'bats --filter-tags' sets IFS=',' which ... ugh. Not fun to debug. + # The line below is newline, space, tab. + IFS=" + " + # Can't use $BATS_SUITE_TMPDIR because podman barfs: # Error: the specified runroot is longer than 50 characters export PODMAN_LOGIN_WORKDIR=$(mktemp -d --tmpdir=${BATS_TMPDIR:-${TMPDIR:-/tmp}} podman-bats-registry.XXXXXX) @@ -21,6 +27,10 @@ function setup_suite() { # FIXME: racy! It could be many minutes between now and when we start it. # To mitigate, we use a range not used anywhere else in system tests. export PODMAN_LOGIN_REGISTRY_PORT=$(random_free_port 42000-42999) + + # The above does not handle errors. Do a final confirmation. + assert "$PODMAN_LOGIN_REGISTRY_PORT" != "" \ + "Unable to set PODMAN_LOGIN_REGISTRY_PORT" } # Run at the very end of all tests. Useful for cleanup of non-BATS tmpdirs.