From 1cf2b3eb28959a8e2c9ccd4b0d40186c6731104d Mon Sep 17 00:00:00 2001 From: Ed Santiago Date: Wed, 14 Apr 2021 10:43:19 -0600 Subject: [PATCH] compose test: ongoing efforts to diagnose flakes Yay, we got a failure with the new code (#10017). It shows one ECONNRESET followed by a lot of ECONNREFUSED over an 8-second period (actually 15s because of the second curl retry). My hunch: the container itself is dying. No amount of retrying will get anything to work. So, instead of the curl retry, if curl fails, run 'docker-compose logs', 'podman ps', and 'ss -tulpn' and hope that one/more of those tells us something useful when the test flakes again. Also: DUH! Bitten by one of the most common bash pitfalls. Checking exit status after 'local' will always be zero. Split the declaration and the action into separate lines. Also: if curl fails, return immediately. There's no point in running the string output comparison. Also: in _show_ok(), don't emit "actual/expect" messages if both strings are empty. Signed-off-by: Ed Santiago --- test/compose/test-compose | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/test/compose/test-compose b/test/compose/test-compose index abb957b438..c4c4841900 100755 --- a/test/compose/test-compose +++ b/test/compose/test-compose @@ -136,8 +136,11 @@ function _show_ok() { local expect=$3 local actual=$4 printf "${red}not ok $count $testname${reset}\n" - printf "${red}# expected: %s${reset}\n" "$expect" - printf "${red}# actual: ${bold}%s${reset}\n" "$actual" + # Not all errors include actual/expect + if [[ -n "$expect" || -n "$actual" ]]; then + printf "${red}# expected: %s${reset}\n" "$expect" + printf "${red}# actual: ${bold}%s${reset}\n" "$actual" + fi echo "not ok $count $testname" >>$LOG echo " expected: $expect" >>$LOG @@ -164,20 +167,22 @@ function test_port() { local expect="$3" # what to expect from curl output # -s -S means "silent, but show errors" - local actual=$(curl --retry 3 --retry-all-errors -s -S http://127.0.0.1:$port/) + local actual + actual=$(curl --retry 3 --retry-all-errors -s -S http://127.0.0.1:$port/) local curl_rc=$? - # FIXME 2021-04-13: test is flaking, curl succeeds but returns empty result. - # Could it be that the container is not actually ready? Wait, and retry. - if [[ $curl_rc -eq 0 && -z "$actual" ]]; then - sleep 1 - echo "# Retrying curl:" - actual=$(curl --retry 3 --retry-all-errors -s -S http://127.0.0.1:$port/) - curl_rc=$? - fi - if [ $curl_rc -ne 0 ]; then - _show_ok 0 "$testname - curl failed with status $curl_rc" + _show_ok 0 "$testname - curl (port $port) failed with status $curl_rc" + # FIXME: is this useful? What else can we do to diagnose? + echo "# docker-compose logs:" + docker-compose logs + echo "# podman ps -a:" + $PODMAN_BIN --root $WORKDIR/root --runroot $WORKDIR/runroot ps -a + if type -p ss; then + echo "# ss -tulpn:" + ss -tulpn + fi + return fi case "$op" in