Skip to content

Commit

Permalink
Add Retry Mechanism to E2E EC2 Terraform Deployment (#635)
Browse files Browse the repository at this point in the history
* Add Retry Mechanism to E2E EC2 Terraform Deployment

* Add Extra Comments

* Refactor code
  • Loading branch information
harrryr authored Dec 14, 2023
1 parent efb16c6 commit 5d7feed
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 26 deletions.
88 changes: 63 additions & 25 deletions .github/workflows/appsignals-e2e-ec2-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,72 @@ jobs:
with:
terraform_wrapper: false

- name: Deploy sample app via terraform
- name: Deploy sample app via terraform and wait for endpoint to come online
working-directory: testing/terraform/ec2
run: |
terraform init
terraform validate
terraform apply -auto-approve \
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
-var="test_id=${{ env.TESTING_ID }}" \
-var="sample_app_jar=${{ env.SAMPLE_APP_FRONTEND_SERVICE_JAR }}" \
-var="sample_remote_app_jar=${{ env.SAMPLE_APP_REMOTE_SERVICE_JAR }}" \
-var="cw_agent_rpm=${{ env.APP_SIGNALS_CW_AGENT_RPM }}" \
-var="adot_jar=${{ env.APP_SIGNALS_ADOT_JAR }}"
# Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online.
# There may be occasional failures due to transitivity issues, so try up to 2 times.
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates
# that it failed at some point
retry_counter=0
max_retry=2
while [ $retry_counter -lt $max_retry ]; do
echo "Attempt $retry_counter"
deployment_failed=0
terraform apply -auto-approve \
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
-var="test_id=${{ env.TESTING_ID }}" \
-var="sample_app_jar=${{ env.SAMPLE_APP_FRONTEND_SERVICE_JAR }}" \
-var="sample_remote_app_jar=${{ env.SAMPLE_APP_REMOTE_SERVICE_JAR }}" \
-var="cw_agent_rpm=${{ env.APP_SIGNALS_CW_AGENT_RPM }}" \
-var="adot_jar=${{ env.APP_SIGNALS_ADOT_JAR }}" \
|| deployment_failed=$?
if [ $deployment_failed -eq 1 ]; then
echo "Terraform deployment was unsuccessful. Will attempt to retry deployment."
fi
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint.
# Attempts to connect will be made for up to 10 minutes
if [ $deployment_failed -eq 0 ]; then
echo "Attempting to connect to the endpoint"
sample_app_endpoint=http://$(terraform output sample_app_main_service_public_dns):8080
attempt_counter=0
max_attempts=60
until $(curl --output /dev/null --silent --head --fail $(echo "$sample_app_endpoint" | tr -d '"')); do
if [ ${attempt_counter} -eq ${max_attempts} ];then
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
deployment_failed=1
break
fi
printf '.'
attempt_counter=$(($attempt_counter+1))
sleep 10
done
fi
# If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
# resources created from terraform and try again.
if [ $deployment_failed -eq 1 ]; then
echo "Destroying terraform"
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}"
retry_counter=$(($retry_counter+1))
else
# If deployment succeeded, then exit the loop
break
fi
if [ $retry_counter -eq $max_retry ]; then
echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code"
exit 1
fi
done
- name: Get the ec2 instance ami id
run: |
Expand All @@ -80,22 +134,6 @@ jobs:
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
working-directory: testing/terraform/ec2

- name: Wait for app endpoint to come online
id: endpoint-check
run: |
attempt_counter=0
max_attempts=30
until $(curl --output /dev/null --silent --head --fail http://${{ env.MAIN_SERVICE_ENDPOINT }}); do
if [ ${attempt_counter} -eq ${max_attempts} ];then
echo "Max attempts reached"
exit 1
fi
printf '.'
attempt_counter=$(($attempt_counter+1))
sleep 10
done
# This steps increases the speed of the validation by creating the telemetry data in advance
- name: Call all test APIs
continue-on-error: true
Expand Down Expand Up @@ -182,4 +220,4 @@ jobs:
working-directory: testing/terraform/ec2
run: |
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}"
-var="test_id=${{ env.TESTING_ID }}"
2 changes: 1 addition & 1 deletion .github/workflows/appsignals-e2e-eks-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -332,4 +332,4 @@ jobs:
--name service-account-${{ env.TESTING_ID }} \
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \
--cluster ${{ inputs.test-cluster-name }} \
--region ${{ env.AWS_DEFAULT_REGION }}
--region ${{ env.AWS_DEFAULT_REGION }}

0 comments on commit 5d7feed

Please sign in to comment.