Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: re-organize deployment pipeline for better failure handling #4486

Merged
merged 2 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/deploy-on-main.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
name: Deploy Main

on:
push:
tags:
Expand Down
40 changes: 6 additions & 34 deletions .github/workflows/deploy-stack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,29 +46,15 @@ jobs:
environment: ${{ inputs.environment }}
service: ${{ matrix.service }}
version: ${{ inputs.version }}
needs:
- invoke-and-check-cdn
secrets: inherit

set-maintenance-mode:
uses: ./.github/workflows/set-maintenance-mode.yml
with:
environment: ${{ inputs.environment }}
needs:
- invoke-and-check-cdn
secrets: inherit

down: # the backend services
needs:
- set-maintenance-mode
strategy:
fail-fast: false
matrix:
service: [ "pinga", "rebaser", "sdf" ]
uses: ./.github/workflows/down-service.yml
- set-service-versions
uses: ./.github/workflows/set-maintenance-mode.yml
with:
environment: ${{ inputs.environment }}
service: ${{ matrix.service }}
secrets: inherit

upgrade-web:
Expand All @@ -82,33 +68,20 @@ jobs:
upgrade: # the backend services
needs:
- set-service-versions
- down
- set-maintenance-mode
strategy:
fail-fast: false
matrix:
service: [ "pinga", "rebaser", "sdf", "veritech" ]
service: [ "pinga", "rebaser", "veritech" ]
uses: ./.github/workflows/upgrade-service.yml
with:
environment: ${{ inputs.environment }}
service: ${{ matrix.service }}
secrets: inherit

up: # the backend services
upgrade-and-migrate-sdf:
needs:
- upgrade
strategy:
fail-fast: false
matrix:
service: [ "pinga", "rebaser", "veritech" ]
uses: ./.github/workflows/up-service.yml
with:
environment: ${{ inputs.environment }}
service: ${{ matrix.service }}
secrets: inherit

migrate-and-up-sdf:
needs:
- up
uses: ./.github/workflows/migrate-sdf.yml
with:
environment: ${{ inputs.environment }}
Expand All @@ -122,8 +95,7 @@ jobs:
cancel-in-progress: true
needs:
- upgrade-web
- migrate-and-up-sdf
if: always()
- upgrade-and-migrate-sdf
uses: ./.github/workflows/e2e-validation.yml
with:
environment: ${{ inputs.environment }}
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/migrate-sdf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Migrate and up service
- name: Upgrade service
run: |
component/toolbox/awsi.sh upgrade -p pull-from-env -r us-east-1 -a y -s sdf -e ${{ inputs.environment }}

- name: Migrate service
run: |
component/toolbox/awsi.sh migrate -p pull-from-env -r us-east-1 -a y -s sdf

- name: Up service
run: |
component/toolbox/awsi.sh service-state -p pull-from-env -r us-east-1 -a y -s sdf -S up
4 changes: 2 additions & 2 deletions .github/workflows/set-maintenance-mode.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Down service
- name: Toggle maintenance
run: |
component/toolbox/awsi.sh toggle-maintenance -p pull-from-env -r us-east-1 -s sdf -m n -a y
component/toolbox/awsi.sh toggle-maintenance -p pull-from-env -r us-east-1 -s sdf -m y -a y
9 changes: 9 additions & 0 deletions .github/workflows/upgrade-service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
environment: ${{ inputs.environment }}
name: ${{ inputs.service }} Upgrade
runs-on: ubuntu-latest
continue-on-error: true
steps:
- name: Configure AWS credentials for ${{ inputs.environment }}
uses: aws-actions/configure-aws-credentials@v4
Expand All @@ -28,10 +29,18 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Down service
run: |
component/toolbox/awsi.sh service-state -p pull-from-env -r us-east-1 -a y -s ${{ inputs.service }} -S down

- name: Upgrade service
run: |
component/toolbox/awsi.sh upgrade -p pull-from-env -r us-east-1 -a y -s ${{ inputs.service }} -e ${{ inputs.environment }}

- name: Up service
run: |
component/toolbox/awsi.sh service-state -p pull-from-env -r us-east-1 -a y -s ${{ inputs.service }} -S up

on-failure:
uses: ./.github/workflows/instance-refresh.yml
needs: upgrade
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/upgrade-web.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@ jobs:

- uses: actions/checkout@v4

- uses: pnpm/action-setup@v4

- uses: actions/setup-node@v4
with:
node-version: '18.18.2'

- uses: pnpm/action-setup@v4
cache: 'pnpm'

- name: Install dependencies
working-directory: app/web
Expand Down
25 changes: 12 additions & 13 deletions component/toolbox/scripts/migrate
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,20 @@ stop_results_file=stop_results.json
upgrade_results_file=upgrade_results.json
mkdir -p "$results_directory/"

# get the first SDF and go do the thing
# attempt on each SDF until one succeeds or they all fail
while read -r line; do
instance_id=$(echo "$line" | awk '{print $2}')
start_and_track_ssm_session "$instance_id" "$sdf_migrate_script" "$results_directory" "InstanceId=$instance_id"
break
done <<< "$instances"

await_file_results "$results_directory" 1
concat_and_output_json "$results_directory" "$check_results_file"
await_file_results "$results_directory" 1
concat_and_output_json "$results_directory" "$check_results_file"

if jq -e 'all(.[]; .status == "success")' "$results_directory/$check_results_file" > /dev/null; then
echo "SDF database has been migrated"
echo "----------------------------------------"
exit 0
else
echo "Error: One or more of the checks failed to push a node into maintenance mode, try again later or look at the logs"
exit 2
fi
if jq -e 'all(.[]; .status == "success")' "$results_directory/$check_results_file" > /dev/null; then
echo "SDF database has been migrated"
echo "----------------------------------------"
exit 0
fi
done <<< "$instances"

echo "Error: One or more of the checks failed to push a node into maintenance mode, try again later or look at the logs"
exit 2
8 changes: 4 additions & 4 deletions component/toolbox/scripts/ssm-scripts/si-service-maintenance
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ mainSteps:
- |
flip_or_stick() {
requested_state=$1
response_code=$(curl -s -o /dev/null -w "%{http_code}" localhost:5156/api/)
[[ "$response_code" == "200" ]] && [[ "$requested_state" == "y" ]] && killall -s USR2 {{ Service }}
response_code=$(curl -s -o /dev/null -w "%{http_code}" localhost:5156/)
[[ "$response_code" == "404" ]] && [[ "$requested_state" == "y" ]] && killall -s USR2 {{ Service }}
[[ "$response_code" == "503" ]] && [[ "$requested_state" == "n" ]] && killall -s USR2 {{ Service }}
}

check_state() {
requested_state=$1
response_code=$(curl -s -o /dev/null -w "%{http_code}" localhost:5156/api/)
response_code=$(curl -s -o /dev/null -w "%{http_code}" localhost:5156/)
if [[ "$response_code" == "503" ]] && [[ "$requested_state" == "y" ]]; then
echo "{\"instance_id\": \"{{ InstanceId }}\", \"status\": \"success\", \"service\": \"{{ Service }}\", \"mode\": \"maintenance\" }"
elif [[ "$response_code" == "200" ]] && [[ "$requested_state" == "n" ]]; then
elif [[ "$response_code" == "404" ]] && [[ "$requested_state" == "n" ]]; then
echo "{\"instance_id\": \"{{ InstanceId }}\", \"status\": \"success\", \"service\": \"{{ Service }}\", \"mode\": \"running\" }"
else
echo "{\"instance_id\": \"{{ InstanceId }}\", \"status\": \"failure\", \"service\": \"{{ Service }}\", \"mode\": \"Status code from API not valid for requested action. Response Code: $response_code, Maintenance Mode Requested: {{ Action }}\" }"
Expand Down
3 changes: 1 addition & 2 deletions component/toolbox/scripts/toggle-maintenance
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@ i=1
while read -r line; do
instance_id=$(echo "$line" | awk '{print $2}')
service_name=$(echo "$line" | awk '{print $1}' | awk -F- '{print $2}')
start_and_track_ssm_session "$instance_id" "$service_maintenance_script" "$service_name" "$maintenance" "$results_directory" &
start_and_track_ssm_session "$instance_id" "$service_maintenance_script" "$results_directory" "InstanceId=$instance_id,Action=$maintenance" &
start_and_track_ssm_session "$instance_id" "$service_maintenance_script" "$results_directory" "InstanceId=$instance_id,Service=$service_name,Action=$maintenance" &
((i++))
done <<< "$instances"

Expand Down
Loading