Skip to content

Commit

Permalink
tests/e2e: fix tests and failing github actions
Browse files Browse the repository at this point in the history
  • Loading branch information
iwilltry42 committed Aug 29, 2022
1 parent 80889fa commit 0c20821
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 19 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ jobs:
run: make test
- name: Run E2E Tests
timeout-minutes: 20
run: make e2e -e E2E_PARALLEL=3
# Quite often, tests were failing due to "too many open files" errors, so we're fixing this here
# Also, we want to see trace level logs if tests fail and the pipeline should exit on first error
run: |
sudo prlimit --pid $$ --nofile=1048576:1048576
sudo sysctl fs.inotify.max_user_instances=1280
sudo sysctl fs.inotify.max_user_watches=655360
make e2e -e E2E_LOG_LEVEL=trace -e E2E_FAIL_FAST=true
# Builds
- name: Test Platform Builds
run: make build-cross
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ E2E_KEEP ?=
E2E_PARALLEL ?=
E2E_DIND_VERSION ?=
E2E_K3S_VERSION ?=
E2E_FAIL_FAST ?=

########## Go Build Options ##########
# Build targets
Expand Down Expand Up @@ -179,7 +180,7 @@ test:

e2e:
@echo "Running e2e tests"
LOG_LEVEL="$(E2E_LOG_LEVEL)" E2E_INCLUDE="$(E2E_INCLUDE)" E2E_EXCLUDE="$(E2E_EXCLUDE)" E2E_EXTRA="$(E2E_EXTRA)" E2E_RUNNER_START_TIMEOUT=$(E2E_RUNNER_START_TIMEOUT) E2E_HELPER_IMAGE_TAG="$(E2E_HELPER_IMAGE_TAG)" E2E_KEEP="$(E2E_KEEP)" E2E_PARALLEL="$(E2E_PARALLEL)" E2E_DIND_VERSION="$(E2E_DIND_VERSION)" E2E_K3S_VERSION="$(E2E_K3S_VERSION)" tests/dind.sh "${K3D_IMAGE_TAG}"
LOG_LEVEL="$(E2E_LOG_LEVEL)" E2E_INCLUDE="$(E2E_INCLUDE)" E2E_EXCLUDE="$(E2E_EXCLUDE)" E2E_EXTRA="$(E2E_EXTRA)" E2E_RUNNER_START_TIMEOUT=$(E2E_RUNNER_START_TIMEOUT) E2E_HELPER_IMAGE_TAG="$(E2E_HELPER_IMAGE_TAG)" E2E_KEEP="$(E2E_KEEP)" E2E_PARALLEL="$(E2E_PARALLEL)" E2E_DIND_VERSION="$(E2E_DIND_VERSION)" E2E_K3S_VERSION="$(E2E_K3S_VERSION)" E2E_FAIL_FAST="$(E2E_FAIL_FAST)" tests/dind.sh "${K3D_IMAGE_TAG}"

ci-tests: fmt check e2e

Expand Down
1 change: 1 addition & 0 deletions pkg/client/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,7 @@ func NodeWaitForLogMessage(ctx context.Context, runtime runtimes.Runtime, node *
time.Sleep(500 * time.Millisecond)
continue
} else {
l.Log().Tracef("Non-fatal last log line in node %s: %s", node.Name, previousline)
// case 2: last log line we saw did not contain a fatal error, so we break the loop here and return a generic error
break
}
Expand Down
18 changes: 13 additions & 5 deletions tests/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ failed() {
elif [[ -n "$LOG_FILE" ]]; then
mv "$LOG_FILE" "$LOG_FILE.failed"
fi
abort "test failed"
abort "$CURRENT_STAGE: test failed"
if [[ "$E2E_FAIL_FAST" == "true" ]]; then
exit 1
fi
}

passed() {
Expand Down Expand Up @@ -73,11 +76,16 @@ check_url() {
check_clusters() {
[ -n "$EXE" ] || abort "EXE is not defined"
for c in "$@" ; do
$EXE kubeconfig merge "$c" --kubeconfig-switch-context || return 1
if kubectl cluster-info ; then
passed "cluster $c is reachable"
if $EXE kubeconfig merge "$c" --kubeconfig-switch-context; then
if kubectl cluster-info ; then
passed "cluster $c is reachable"
else
warn "could not obtain cluster info for $c. Kubeconfig:\n$(kubectl config view)"
docker ps -a
return 1
fi
else
warn "could not obtain cluster info for $c. Kubeconfig:\n$(kubectl config view)"
warn "could not merge kubeconfig for $c."
docker ps -a
return 1
fi
Expand Down
4 changes: 4 additions & 0 deletions tests/dind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ RUNNER_START_TIMEOUT=${E2E_RUNNER_START_TIMEOUT:-10}
# Override Docker-in-Docker version
E2E_DIND_VERSION=${E2E_DIND_VERSION:-}

# Fail on first error instead of waiting until all tests are done. Useful in CI.
E2E_FAIL_FAST=${E2E_FAIL_FAST:-}

####################################################################################

CURR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
Expand Down Expand Up @@ -50,6 +53,7 @@ k3de2e=$(docker run -d \
-e EXE="$K3D_EXE" \
-e CI="true" \
-e LOG_LEVEL="$LOG_LEVEL" \
-e E2E_FAIL_FAST="$E2E_FAIL_FAST" \
-e E2E_INCLUDE="$E2E_INCLUDE" \
-e E2E_EXCLUDE="$E2E_EXCLUDE" \
-e E2E_PARALLEL="$E2E_PARALLEL" \
Expand Down
26 changes: 14 additions & 12 deletions tests/test_config_file_from_stdin.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,18 @@ fi

export CURRENT_STAGE="Test | config-file-stdin | $K3S_IMAGE"

configfileoriginal="$CURR_DIR/assets/config_test_simple.yaml"
configfile="/tmp/config_test_simple-tmp_$(date -u +'%Y%m%dT%H%M%SZ').yaml"
clustername="configteststdin"

sed -E "s/^ name:.+/ name: $clustername/g" < "$configfileoriginal" > "$configfile" # replace cluster name in config file so we can use it in this script without running into override issues
cat "$configfile"
highlight "[START] ConfigTest $EXTRA_TITLE"

info "Creating cluster $clustername..."

cat <<EOF | $EXE cluster create "$clustername" --config=-
cat <<EOF | $EXE cluster create --config=-
apiVersion: k3d.io/v1alpha4
kind: Simple
metadata:
name: test
servers: 3
name: $clustername
servers: 1
agents: 2
#image: rancher/k3s:latest
volumes:
Expand Down Expand Up @@ -99,8 +95,8 @@ sleep 5
info "Checking that we have access to the cluster..."
check_clusters "$clustername" || failed "error checking cluster"

info "Checking that we have 5 nodes online..."
check_multi_node "$clustername" 5 || failed "failed to verify number of nodes"
info "Checking that we have 3 nodes online..."
check_multi_node "$clustername" 3 || failed "failed to verify number of nodes"

# 2. check some config settings

Expand Down Expand Up @@ -128,10 +124,16 @@ exec_in_node "k3d-$clustername-server-0" "cat /etc/rancher/k3s/registries.yaml"

# Cleanup

info "Deleting cluster $clustername (using config file)..."
$EXE cluster delete --config "$configfile" --trace || failed "could not delete the cluster $clustername"
info "Deleting cluster $clustername (using config file from stdin)..."
cat <<EOF | $EXE cluster delete --config=-
apiVersion: k3d.io/v1alpha4
kind: Simple
metadata:
name: $clustername
EOF

test $? -eq 0 || failed "could not delete cluster $clustername from stdin config"

rm "$configfile"

highlight "[DONE] ConfigTest $EXTRA_TITLE"

Expand Down

0 comments on commit 0c20821

Please sign in to comment.