-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Python] Add ADOT SigV4 release test workflow + validations (#377)
- Loading branch information
1 parent
e837423
commit ee4b9b6
Showing
21 changed files
with
2,276 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
## SPDX-License-Identifier: Apache-2.0 | ||
|
||
# This is a reusable workflow for running the Python Enablement Canary test for Application Signals. | ||
# It is meant to be called from another workflow. | ||
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview | ||
name: Python EC2 ADOT SigV4 (Stand-Alone ADOT) Use Case | ||
on: | ||
workflow_call: | ||
inputs: | ||
caller-workflow-name: | ||
required: true | ||
type: string | ||
python-version: | ||
description: "Currently support version 3.8, 3.9, 3.10, 3.11, 3.12" | ||
required: false | ||
type: string | ||
default: '3.9' | ||
cpu-architecture: | ||
description: "Permitted values: x86_64 or arm64" | ||
required: false | ||
type: string | ||
default: "x86_64" | ||
staging-wheel-name: | ||
required: false | ||
default: 'aws-opentelemetry-distro' | ||
type: string | ||
|
||
permissions: | ||
id-token: write | ||
contents: read | ||
|
||
env: | ||
E2E_TEST_AWS_REGION: 'us-west-2' | ||
CALLER_WORKFLOW_NAME: ${{ inputs.caller-workflow-name }} | ||
PYTHON_VERSION: ${{ inputs.python-version }} | ||
CPU_ARCHITECTURE: ${{ inputs.cpu-architecture }} | ||
ADOT_WHEEL_NAME: ${{ inputs.staging-wheel-name }} | ||
E2E_TEST_ACCOUNT_ID: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }} | ||
E2E_TEST_ROLE_NAME: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }} | ||
METRIC_NAMESPACE: ApplicationSignals | ||
LOG_GROUP_NAME: aws/spans | ||
TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE} | ||
|
||
jobs: | ||
python-ec2-adot-sigv4: | ||
runs-on: ubuntu-latest | ||
timeout-minutes: 30 | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
repository: 'aws-observability/aws-application-signals-test-framework' | ||
ref: ${{ inputs.caller-workflow-name == 'main-build' && 'main' || github.ref }} | ||
fetch-depth: 0 | ||
|
||
# We initialize Gradlew Daemon early on during the workflow because sometimes initialization | ||
# fails due to transient issues. If it fails here, then we will try again later before the validators | ||
- name: Initiate Gradlew Daemon | ||
id: initiate-gradlew | ||
uses: ./.github/workflows/actions/execute_and_retry | ||
continue-on-error: true | ||
with: | ||
command: "./gradlew :validator:build" | ||
cleanup: "./gradlew clean" | ||
max_retry: 3 | ||
sleep_time: 60 | ||
|
||
- name: Generate testing id | ||
run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}-${RANDOM}" >> $GITHUB_ENV | ||
|
||
- name: Refresh AWS Credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} | ||
aws-region: ${{ env.E2E_TEST_AWS_REGION }} | ||
|
||
- name: Set Get ADOT Wheel command environment variable | ||
run: | | ||
if [ "${{ github.event.repository.name }}" = "aws-otel-python-instrumentation" ]; then | ||
# Reusing the adot-main-build-staging-jar bucket to store the python wheel file | ||
echo GET_ADOT_WHEEL_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && sudo python${{ env.PYTHON_VERSION }} -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV | ||
else | ||
latest_release_version=$(curl -sL https://github.com/aws-observability/aws-otel-python-instrumentation/releases/latest | grep -oP '/releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -n 1) | ||
echo "The latest version is $latest_release_version" | ||
echo GET_ADOT_WHEEL_COMMAND="wget -O ${{ env.ADOT_WHEEL_NAME }} https://github.com/aws-observability/aws-otel-python-instrumentation/releases/latest/download/aws_opentelemetry_distro-$latest_release_version-py3-none-any.whl \ | ||
&& sudo python${{ env.PYTHON_VERSION }} -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV | ||
fi | ||
- name: Set up terraform | ||
uses: ./.github/workflows/actions/execute_and_retry | ||
with: | ||
command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" | ||
post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list | ||
&& sudo apt update && sudo apt install terraform' | ||
sleep_time: 60 | ||
|
||
- name: Initiate Terraform | ||
uses: ./.github/workflows/actions/execute_and_retry | ||
with: | ||
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/python/ec2/adot-sigv4 && terraform init && terraform validate" | ||
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" | ||
max_retry: 6 | ||
sleep_time: 60 | ||
|
||
- name: Deploy sample app via terraform and wait for endpoint to come online | ||
working-directory: terraform/python/ec2/adot-sigv4 | ||
run: | | ||
# Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online. | ||
# There may be occasional failures due to transitivity issues, so try up to 2 times. | ||
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates | ||
# that it failed at some point | ||
retry_counter=0 | ||
max_retry=2 | ||
while [ $retry_counter -lt $max_retry ]; do | ||
echo "Attempt $retry_counter" | ||
deployment_failed=0 | ||
terraform apply -auto-approve \ | ||
-var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ | ||
-var="test_id=${{ env.TESTING_ID }}" \ | ||
-var="sample_app_zip=s3://aws-appsignals-sample-app-prod-us-east-1/python-sample-app.zip" \ | ||
-var="get_adot_wheel_command=${{ env.GET_ADOT_WHEEL_COMMAND }}" \ | ||
-var="language_version=${{ env.PYTHON_VERSION }}" \ | ||
-var="cpu_architecture=${{ env.CPU_ARCHITECTURE }}" \ | ||
|| deployment_failed=$? | ||
if [ $deployment_failed -eq 1 ]; then | ||
echo "Terraform deployment was unsuccessful. Will attempt to retry deployment." | ||
fi | ||
# If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the | ||
# resources created from terraform and try again. | ||
if [ $deployment_failed -eq 1 ]; then | ||
echo "Destroying terraform" | ||
terraform destroy -auto-approve \ | ||
-var="test_id=${{ env.TESTING_ID }}" | ||
retry_counter=$(($retry_counter+1)) | ||
else | ||
# If deployment succeeded, then exit the loop | ||
break | ||
fi | ||
if [ $retry_counter -eq $max_retry ]; then | ||
echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code" | ||
exit 1 | ||
fi | ||
done | ||
- name: Get the ec2 instance ami id | ||
run: | | ||
echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV | ||
working-directory: terraform/python/ec2/adot-sigv4 | ||
|
||
- name: Get the sample app endpoint | ||
run: | | ||
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_private_ip)" >> $GITHUB_ENV | ||
echo "MAIN_SERVICE_INSTANCE_ID=$(terraform output main_service_instance_id)" >> $GITHUB_ENV | ||
working-directory: terraform/python/ec2/adot-sigv4 | ||
|
||
- name: Initiate Gradlew Daemon | ||
if: steps.initiate-gradlew == 'failure' | ||
uses: ./.github/workflows/actions/execute_and_retry | ||
continue-on-error: true | ||
with: | ||
command: "./gradlew :validator:build" | ||
cleanup: "./gradlew clean" | ||
max_retry: 3 | ||
sleep_time: 60 | ||
|
||
# Validation for pulse telemetry data | ||
- name: Validate generated EMF logs | ||
id: log-validation | ||
run: ./gradlew validator:run --args='-c python/ec2/adot-sigv4/log-validation.yml | ||
--testing-id ${{ env.TESTING_ID }} | ||
--endpoint http://localhost:8000 | ||
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 | ||
--region ${{ env.E2E_TEST_AWS_REGION }} | ||
--metric-namespace ${{ env.METRIC_NAMESPACE }} | ||
--log-group ${{ env.LOG_GROUP_NAME }} | ||
--service-name python-sample-application-${{ env.TESTING_ID }} | ||
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} | ||
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }} | ||
--instance-ami ${{ env.EC2_INSTANCE_AMI }} | ||
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }} | ||
--rollup' | ||
|
||
- name: Validate generated metrics | ||
id: metric-validation | ||
if: (success() || steps.log-validation.outcome == 'failure') && !cancelled() | ||
run: ./gradlew validator:run --args='-c python/ec2/adot-sigv4/metric-validation.yml | ||
--testing-id ${{ env.TESTING_ID }} | ||
--endpoint http://localhost:8000 | ||
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 | ||
--region ${{ env.E2E_TEST_AWS_REGION }} | ||
--metric-namespace ${{ env.METRIC_NAMESPACE }} | ||
--log-group ${{ env.LOG_GROUP_NAME }} | ||
--service-name python-sample-application-${{ env.TESTING_ID }} | ||
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} | ||
--query-string ip=${{ env.REMOTE_SERVICE_IP }} | ||
--instance-ami ${{ env.EC2_INSTANCE_AMI }} | ||
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }} | ||
--rollup' | ||
|
||
- name: Validate generated traces | ||
id: trace-validation | ||
if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() | ||
run: ./gradlew validator:run --args='-c python/ec2/adot-sigv4/trace-validation.yml | ||
--testing-id ${{ env.TESTING_ID }} | ||
--endpoint http://localhost:8000 | ||
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001 | ||
--region ${{ env.E2E_TEST_AWS_REGION }} | ||
--account-id ${{ env.E2E_TEST_ACCOUNT_ID }} | ||
--metric-namespace ${{ env.METRIC_NAMESPACE }} | ||
--log-group ${{ env.LOG_GROUP_NAME }} | ||
--service-name python-sample-application-${{ env.TESTING_ID }} | ||
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }} | ||
--query-string ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }} | ||
--instance-ami ${{ env.EC2_INSTANCE_AMI }} | ||
--instance-id ${{ env.MAIN_SERVICE_INSTANCE_ID }} | ||
--rollup' | ||
|
||
- name: Refresh AWS Credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} | ||
aws-region: ${{ env.E2E_TEST_AWS_REGION }} | ||
|
||
# Clean up Procedures | ||
- name: Terraform destroy | ||
if: always() | ||
continue-on-error: true | ||
working-directory: terraform/python/ec2/adot-sigv4 | ||
run: | | ||
terraform destroy -auto-approve \ | ||
-var="test_id=${{ env.TESTING_ID }}" |
Oops, something went wrong.