Skip to content

Commit

Permalink
tests: Add test for certificates renewal beacon
Browse files Browse the repository at this point in the history
This tests reconfigure the beacons and override
the pillar configuration to force the renewal
of all the certificates and kubeconfigs.

The goal is to ensure that beacons work well
and that nothing is broken in the cluster even
when everything is triggered at the very same
time.

Refs: #1887
  • Loading branch information
alexandre-allard committed Dec 11, 2020
1 parent 553e76b commit 019e992
Show file tree
Hide file tree
Showing 3 changed files with 230 additions and 0 deletions.
10 changes: 10 additions & 0 deletions eve/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2058,6 +2058,16 @@ stages:
PYTEST_ARGS: "--suppress-no-test-exit-code"
PYTEST_FILTERS: >
post and ci and slow and not bootstrap and not restore
- ShellCommand:
name: Certificates expiration beacons test
command: |
scp -F ssh_config %(prop:builddir)s/build/tests/test-certificates-beacon.sh bootstrap:
ssh -F ssh_config bootstrap "sudo ./test-certificates-beacon.sh /var/tmp/metalk8s"
workdir: build/eve/workers/openstack-terraform/terraform/
haltOnFailure: true
- ShellCommand:
<<: *multi_node_fast_tests
name: Run fast tests on Bastion after certificates renewal
- ShellCommand:
<<: *generate_report_over_ssh
env:
Expand Down
32 changes: 32 additions & 0 deletions scripts/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,35 @@ get_salt_env() {
get_salt_minion_id() {
"$SALT_CALL" --out txt grains.get id | cut -c 8-
}

get_salt_minion_ids() {
local salt_container

salt_container=$(get_salt_container)

(
set -o pipefail
retry 5 10 crictl exec -i "$salt_container" \
salt \* grains.get id --out txt | \
cut -d ' ' -f 2
)
}

retry() {
local stdout
local -i try=0 exit_code=0
local -ri retries=$1 sleep_time=$2
shift 2

until stdout=$("$@"); do
exit_code=$?
(( ++try ))
if [ $try -gt "$retries" ]; then
echo "Failed to run '$*' after $retries retries." >&2
return $exit_code
fi
sleep "$sleep_time"
done

echo "$stdout"
}
188 changes: 188 additions & 0 deletions tests/test-certificates-beacon.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/bin/bash
# This script force regeneration of all certificates (including kubeconfig)
# on a MetalK8s platform. This is only for tests purposes and should not
# be run on a production platform.

set -o pipefail

ARCHIVE_MOUNTPOINT=$1
DAYS_VALID=370
# DAYS_REMAINING must be lower than DAYS_VALID to avoid
# renewing certificates on every Salt highstate run
DAYS_REMAINING=365
# BEACON_NOTIFY_DAYS must be greater or equal than DAYS_REMAINING
# in order to trigger a certificate renewal. It should also be lower
# than DAYS_VALID to avoid firing an event on every beacon run.
BEACON_NOTIFY_DAYS=365
BEACON_INTERVAL=60
ARCHIVE_PRODUCT_INFO=$ARCHIVE_MOUNTPOINT/product.txt
SALT_DEFAULTS=$ARCHIVE_MOUNTPOINT/salt/metalk8s/defaults.yaml
OVERRIDE_ROOT_CONF=/etc/salt/master.d/90-metalk8s-root-override.conf
OVERRIDE_PILLAR_DEST=/etc/salt/pillar-override
WAIT_RENEWAL=${WAIT_RENEWAL:-240}

# shellcheck disable=SC1090
. "$ARCHIVE_PRODUCT_INFO"
# shellcheck disable=SC1090
. "$ARCHIVE_MOUNTPOINT/common.sh"

override_pillar_conf() {
local -r certs_pillar_match="\ \ '*':\n - match: compound\n - certificates\n"

mkdir -p "${OVERRIDE_ROOT_CONF%/*}" "${OVERRIDE_PILLAR_DEST%/*}"

cp -rp "$ARCHIVE_MOUNTPOINT/pillar" "$OVERRIDE_PILLAR_DEST"

cat > "$OVERRIDE_ROOT_CONF" << EOF
pillar_roots:
metalk8s-$VERSION:
- "$OVERRIDE_PILLAR_DEST"
EOF

cat > "$OVERRIDE_PILLAR_DEST/certificates.sls" << EOF
certificates:
client:
days_remaining: $DAYS_REMAINING
days_valid: $DAYS_VALID
kubeconfig:
days_remaining: $DAYS_REMAINING
days_valid: $DAYS_VALID
server:
days_remaining: $DAYS_REMAINING
days_valid: $DAYS_VALID
EOF

sed -i "/^metalk8s-{{ version }}:$/a $certs_pillar_match" \
"$OVERRIDE_PILLAR_DEST/top.sls"

crictl stop "$(get_salt_container)"

echo "Wait for Salt master to be ready..."
kubectl wait pods --for=condition=Ready \
--selector app.kubernetes.io/name=salt-master \
--namespace kube-system \
--kubeconfig /etc/kubernetes/admin.conf
}

apply_new_beacon_conf() {
local salt_container
local -ri retries=5 sleep_time=10
local -ra pillar=(
"{"
" 'certificates': {"
" 'beacon': {"
" 'notify_days': $BEACON_NOTIFY_DAYS,"
" 'interval': $BEACON_INTERVAL"
" }"
" }"
"}"
)

readarray -t minions < <(get_salt_minion_ids)
salt_container=$(get_salt_container)

# We apply state on each minion instead of using '*' target,
# otherwise it is much more flaky, this way we can also retry few
# times for each minion.
for minion in "${minions[@]}"; do
echo "Applying new beacon configuration on $minion..."
retry "$retries" "$sleep_time" \
crictl exec -i "$salt_container" \
salt "$minion" state.apply metalk8s.beacon.certificates \
pillar="${pillar[*]}" \
|| exit 1
done
}

check_certificates_renewal() {
local -i return_code=0
local -ri retries=5 time_sleep=10
local -a minions certificates
local salt_container certificates_pillar

salt_container=$(get_salt_container)

readarray -t minions < <(get_salt_minion_ids)

for minion in "${minions[@]}"; do
echo "Checking certificates for $minion..."

certificates_pillar=$(
crictl exec -i "$salt_container" \
salt "$minion" pillar.get certificates \
--out json --out-indent -1
)

readarray -t certificates < <(python - <<EOF
import yaml
with open('$SALT_DEFAULTS', 'r') as fd:
defaults = yaml.safe_load(fd)['certificates']
pillar = yaml.safe_load('$certificates_pillar').values()[0]
watched_certs = []
for cert_type in ('client', 'kubeconfig', 'server'):
for cert, infos in pillar.get(cert_type, {}).get('files', {}).items():
if infos.get('watched', defaults[cert_type]['files'][cert]['watched']):
watched_certs.append(
infos.get('path', defaults[cert_type]['files'][cert]['path'])
)
print("\n".join(watched_certs))
EOF
)

for certificate in "${certificates[@]}"; do
# Sometimes it may fail if the minion is loaded
ctime=$(
retry "$retries" "$time_sleep" \
crictl exec -i "$salt_container" \
salt "$minion" file.stats "$certificate" --out yaml | \
awk '$1 == "ctime:" { printf "%.0f", $2 }'
) || exit 1

if (( ctime > TIMESTAMP )); then
echo "- OK: $certificate successfully regenerated at $ctime."
else
echo "- FAILED: $certificate not regenerated."
return_code=1
fi
done
done

return $return_code
}

reset_pillar_conf() {
rm -rf "$OVERRIDE_ROOT_CONF" "$OVERRIDE_PILLAR_DEST"

crictl stop "$(get_salt_container)"

# Wait for master to come back before exiting script
get_salt_container
}

TIMESTAMP=$(date +%s)

echo "Tests start at $TIMESTAMP."

# Update Salt configuration to trigger certificates renewal
echo "Overriding pillar configuration..."
override_pillar_conf
echo "Applying new beacon configuration..."
apply_new_beacon_conf

SLEEP_TIME=$(( BEACON_INTERVAL + WAIT_RENEWAL ))
echo "Waiting ${SLEEP_TIME}s for certificates to be regenerated..."
sleep $SLEEP_TIME

echo "Checking certificates renewal..."
check_certificates_renewal

EXIT_CODE=$?

echo "Resetting pillar configuration..."
reset_pillar_conf

exit $EXIT_CODE

0 comments on commit 019e992

Please sign in to comment.