diff --git a/projects/redrive-dead-letter-queues/container/Dockerfile b/projects/redrive-dead-letter-queues/container/Dockerfile index f832dfc4da..d8468d8086 100644 --- a/projects/redrive-dead-letter-queues/container/Dockerfile +++ b/projects/redrive-dead-letter-queues/container/Dockerfile @@ -12,4 +12,4 @@ USER 1000 COPY --chown=app /scripts /scripts -ENTRYPOINT ["/scripts/startup.sh"] +ENTRYPOINT ["bash"] diff --git a/projects/redrive-dead-letter-queues/container/scripts/notify.sh b/projects/redrive-dead-letter-queues/container/scripts/notify.sh new file mode 100755 index 0000000000..9db1ad601e --- /dev/null +++ b/projects/redrive-dead-letter-queues/container/scripts/notify.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +set -euo pipefail +eval "$(sentry-cli bash-hook --no-environ)" + +queue_urls=$(aws sqs list-queues --queue-name-prefix "$QUEUE_NAME_PREFIX" | jq -r '.QueueUrls[] | select(endswith("-dlq"))') + +queue_stats_json='{}' # Example: {"dev": {"queue1": 1, "queue2": 2, "queue3": 3}, "preprod": {"queue1": 1}} +for queue_url in $queue_urls; do + count=$(aws sqs get-queue-attributes --queue-url "$queue_url" --attribute-names ApproximateNumberOfMessages --query 'Attributes.ApproximateNumberOfMessages' --output text) + queue_name=$(basename "$queue_url" | sed "s/$QUEUE_NAME_PREFIX-//;s/-dlq//") + environment_name=$(echo "$queue_name" | cut -d'-' -f1) + service_name=$(echo "$queue_name" | cut -d'-' -f2-) + + if [ "$count" -ne 0 ]; then + echo "$service_name has $count messages" + queue_stats_json=$(jq --arg key "$environment_name" --arg key2 "$service_name" --arg value "$count" '.[$key][$key2] = ($value | tonumber)' <<< "$queue_stats_json") + fi +done + +if [ "$queue_stats_json" = '{}' ]; then + echo "Dead-letter queues are empty!" + exit 0 +fi + +echo "$queue_stats_json" +echo "Posting dead-letter queue stats to Slack..." +curl -H "Authorization: Bearer $SLACK_TOKEN" --json "$(echo "$queue_stats_json" | jq -rc --arg SLACK_CHANNEL "$SLACK_CHANNEL" '. | { + "channel": $SLACK_CHANNEL, + "unfurl_links": false, + "unfurl_media": false, + "blocks": ([ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "🚦 Dead-letter queue report" + } + }, + { + "type": "divider" + }, + { + "type": "context", + "elements": [ + { + "type": "mrkdwn", + "text": "There are un-processed messages on the dead-letter queues. Review the messages in the and check for issues." + } + ] + } + ] + + + (. | to_entries | map( + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": ("*" + .key + "*") + } + }, + { + "type": "divider" + }, + (.key as $environment_name | .value | to_entries | map( + [ + { + "type": "section", + "text": { + "text": (">" + .key), + "type": "mrkdwn" + }, + "accessory": { + "type": "button", + "text": { + "type": "plain_text", + "text": (.value | tostring) + }, + "url": ("https://eu-west-2.console.aws.amazon.com/sqs/v3/home?region=eu-west-2#/queues/https%3A%2F%2Fsqs.eu-west-2.amazonaws.com%2F754256621582%2Fprobation-integration-" + $environment_name + "-" + .key + "-dlq") + } + } + ] + ) | flatten)[] + )) + + + [ + { + "type": "divider" + }, + { + "type": "actions", + "elements": [ + { + "type": "button", + "text": { + "type": "plain_text", + "text": ":aws: AWS Console" + }, + "url": "https://justice-cloud-platform.eu.auth0.com/samlp/mQev56oEa7mrRCKAZRxSnDSoYt6Y7r5m?connection=github" + }, + { + "type": "button", + "text": { + "type": "plain_text", + "text": ":prometheus: Prometheus" + }, + "url": "https://prometheus.live.cloud-platform.service.justice.gov.uk/graph?g0.expr=(sum%20by%20(queue_name)%20(aws_sqs_approximate_number_of_messages_visible_maximum%7Bqueue_name%3D~%22.*probation-integration-.*-dlq%22%7D%20offset%205m)%20%3E%200)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=1d" + }, + { + "type": "button", + "text": { + "type": "plain_text", + "text": ":sentry: Sentry" + }, + "url": "https://ministryofjustice.sentry.io/issues/?statsPeriod=14d" + } + ] + } + ]) + }')" https://slack.com/api/chat.postMessage \ No newline at end of file diff --git a/projects/redrive-dead-letter-queues/container/scripts/redrive.sh b/projects/redrive-dead-letter-queues/container/scripts/redrive.sh index 06acff9c2b..dfccbd460a 100755 --- a/projects/redrive-dead-letter-queues/container/scripts/redrive.sh +++ b/projects/redrive-dead-letter-queues/container/scripts/redrive.sh @@ -1,8 +1,8 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail eval "$(sentry-cli bash-hook --no-environ)" -queue_urls=$(aws sqs list-queues --queue-name-prefix "probation-integration-$ENVIRONMENT" | jq -r '.QueueUrls[] | select(endswith("-dlq"))') +queue_urls=$(aws sqs list-queues --queue-name-prefix "$QUEUE_NAME_PREFIX" | jq -r '.QueueUrls[] | select(endswith("-dlq"))') for queue_url in $queue_urls do diff --git a/projects/redrive-dead-letter-queues/container/scripts/startup.sh b/projects/redrive-dead-letter-queues/container/scripts/startup.sh deleted file mode 100755 index c318ef5949..0000000000 --- a/projects/redrive-dead-letter-queues/container/scripts/startup.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -euo pipefail -eval "$(sentry-cli bash-hook --no-environ)" -sentry-cli monitors run -e "$SENTRY_ENVIRONMENT" redrive-dead-letter-queues -- /scripts/redrive.sh \ No newline at end of file diff --git a/projects/redrive-dead-letter-queues/deploy/templates/cronjob.yml b/projects/redrive-dead-letter-queues/deploy/templates/redrive.yml similarity index 90% rename from projects/redrive-dead-letter-queues/deploy/templates/cronjob.yml rename to projects/redrive-dead-letter-queues/deploy/templates/redrive.yml index 55cfacfbd7..bea5c94810 100644 --- a/projects/redrive-dead-letter-queues/deploy/templates/cronjob.yml +++ b/projects/redrive-dead-letter-queues/deploy/templates/redrive.yml @@ -13,6 +13,8 @@ spec: containers: - name: dlq-redrive image: "ghcr.io/ministryofjustice/hmpps-probation-integration-services/redrive-dead-letter-queues:{{ .Values.version }}" + command: + - "/scripts/redrive.sh" securityContext: capabilities: drop: @@ -33,6 +35,8 @@ spec: value: eu-west-2 - name: ENVIRONMENT value: {{ .Values.dlq_redrive.environment }} + - name: QUEUE_NAME_PREFIX + value: probation-integration-{{ .Values.dlq_redrive.environment }} - name: SENTRY_ENVIRONMENT value: {{ .Values.dlq_redrive.environment }} - name: SENTRY_DSN diff --git a/projects/redrive-dead-letter-queues/deploy/values.yaml b/projects/redrive-dead-letter-queues/deploy/values.yaml index 5cd42c1522..7a180b8de3 100644 --- a/projects/redrive-dead-letter-queues/deploy/values.yaml +++ b/projects/redrive-dead-letter-queues/deploy/values.yaml @@ -1,2 +1,2 @@ dlq_redrive: - schedule: 30 5 * * 1 # Every Monday at 05:30 UTC + schedule: 30 5 * * 1-5 # Every weekday at 05:30 UTC diff --git a/script/start-service-pod.sh b/script/start-service-pod.sh index 7b1bc03d6f..2d63ef2e01 100755 --- a/script/start-service-pod.sh +++ b/script/start-service-pod.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -set -euo pipefail +set -eo pipefail ## ## Start a long-running Kubernetes pod in a given namespace. ## @@ -7,14 +7,15 @@ set -euo pipefail ## NAMESPACE=hmpps-probation-integration POD_NAME="$USER" ./script/start-service-pod.sh ## -[ -z "$POD_NAME" ] && echo "Missing POD_NAME" && exit 1 -[ -z "$NAMESPACE" ] && echo "Missing NAMESPACE" && exit 1 +if [ -z "$POD_NAME" ]; then echo "Missing POD_NAME"; exit 1; fi +if [ -z "$NAMESPACE" ]; then echo "Missing NAMESPACE"; exit 1; fi +if [ -n "$SERVICE_ACCOUNT_NAME" ]; then overrides="{\"spec\":{\"serviceAccount\": \"$SERVICE_ACCOUNT_NAME\"}}"; else overrides="{}"; fi echo "Starting service pod '$POD_NAME'" function delete_pod() { kubectl --namespace="$NAMESPACE" delete pod "$POD_NAME"; } trap delete_pod SIGTERM SIGINT -kubectl run "$POD_NAME" --namespace="$NAMESPACE" --image=ghcr.io/ministryofjustice/hmpps-devops-tools:latest -- sleep infinity +kubectl run "$POD_NAME" --namespace="$NAMESPACE" --overrides="$overrides" --image=ghcr.io/ministryofjustice/hmpps-devops-tools:latest -- sleep infinity kubectl wait --namespace="$NAMESPACE" --for=condition=ready pod "$POD_NAME" echo "Service pod is ready" \ No newline at end of file