diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..2bcd70e3
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 88
diff --git a/img/logo.png b/.github/img/logo.png
similarity index 100%
rename from img/logo.png
rename to .github/img/logo.png
diff --git a/img/logo.svg b/.github/img/logo.svg
similarity index 100%
rename from img/logo.svg
rename to .github/img/logo.svg
diff --git a/img/stack-diagram.png b/.github/img/stack-diagram.png
similarity index 100%
rename from img/stack-diagram.png
rename to .github/img/stack-diagram.png
diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml
index ba2a4432..b7e03090 100644
--- a/.github/workflows/push.yaml
+++ b/.github/workflows/push.yaml
@@ -13,12 +13,13 @@ jobs:
submodules: true
- name: Setup Python
uses: actions/setup-python@v1
- - name: Install CFN Lint
- run: pip install cfn-lint
+ - name: Install Dev Dependencies
+ run: pip install -r dev-requirements.txt
- name: Lint Templates
run: make lint
test:
+ if: contains(github.ref, 'refs/heads/master')
runs-on: ubuntu-latest
needs: lint
steps:
@@ -28,8 +29,8 @@ jobs:
submodules: true
- name: Setup Python
uses: actions/setup-python@v1
- - name: Install TaskCat
- run: pip install git+git://github.com/villasv/taskcat.git@b1011e8f080bad5d0a7cec65559e3c160787d17f#egg=taskcat
+ - name: Install Dev Dependencies
+ run: pip install -r dev-requirements.txt
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
@@ -40,6 +41,7 @@ jobs:
run: make test
sync:
+ if: contains(github.ref, 'refs/heads/master')
runs-on: ubuntu-latest
needs: test
steps:
@@ -49,8 +51,8 @@ jobs:
submodules: true
- name: Setup Python
uses: actions/setup-python@v1
- - name: Install AWS CLI
- run: pip install awscli
+ - name: Install Dev Dependencies
+ run: pip install -r dev-requirements.txt
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
diff --git a/.github/workflows/review.yaml b/.github/workflows/review.yaml
new file mode 100644
index 00000000..fd03fd2b
--- /dev/null
+++ b/.github/workflows/review.yaml
@@ -0,0 +1,69 @@
+name: Stack Release Pipeline
+
+on:
+ pull_request_review:
+ types:
+ - submitted
+
+
+jobs:
+
+ lint:
+ if: contains(github.event.review.body, '/lint')
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v1
+ with:
+ submodules: true
+ - name: Setup Python
+ uses: actions/setup-python@v1
+ - name: Install CFN Lint
+ run: pip install cfn-lint
+ - name: Lint Templates
+ run: make lint
+
+ test:
+ if: contains(github.event.review.body, '/test')
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v1
+ with:
+ submodules: true
+ - name: Setup Python
+ uses: actions/setup-python@v1
+ - name: Install TaskCat
+ run: pip install git+git://github.com/villasv/taskcat.git@b1011e8f080bad5d0a7cec65559e3c160787d17f#egg=taskcat
+ - name: Configure AWS Credentials
+ uses: aws-actions/configure-aws-credentials@v1
+ with:
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ aws-region: us-east-1
+ - name: Test Stacks
+ run: make test
+
+ sync:
+ if: contains(github.event.review.body, '/sync')
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v1
+ with:
+ submodules: true
+ - name: Setup Python
+ uses: actions/setup-python@v1
+ - name: Install AWS CLI
+ run: pip install awscli
+ - name: Configure AWS Credentials
+ uses: aws-actions/configure-aws-credentials@v1
+ with:
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ aws-region: us-east-1
+ - name: Infer Branch Name
+ shell: bash
+ run: echo "::set-env name=BRANCH::${GITHUB_REF#refs/heads/}"
+ - name: Sync Files
+ run: make sync
diff --git a/.pylintrc b/.pylintrc
index 16971686..238aacdc 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,5 +1,6 @@
[MESSAGES CONTROL]
disable=
+ fixme,
missing-module-docstring,
missing-function-docstring
diff --git a/Makefile b/Makefile
index 7e508f8b..f7b68fca 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,6 @@
ifndef BRANCH
BRANCH := $(shell git rev-parse --abbrev-ref HEAD)
endif
-
ifeq ($(BRANCH),master)
BUCKET := s3://turbine-quickstart/quickstart-turbine-airflow
else
@@ -10,6 +9,9 @@ endif
lint:
+ black . --check
+ flake8 .
+ pylint **/*.py
cfn-lint templates/*.template
nuke:
@@ -19,7 +21,9 @@ pack:
7z a ./functions/package.zip ./functions/*.py
sync: pack
+ aws s3 rm $(BUCKET) --recursive
aws s3 sync --exclude '.*' --acl public-read . $(BUCKET)
test: pack
+ pytest -vv
taskcat test run --input-file ./ci/taskcat.yaml
diff --git a/README.md b/README.md
index 369ec06d..35272633 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-
+
-# Turbine [![CFN Deploy](https://img.shields.io/badge/CFN-deploy-green.svg?style=flat-square&logo=amazon-aws)](#get-it-working) [![GitHub Release](https://img.shields.io/github/release/villasv/aws-airflow-stack.svg?style=flat-square&logo=github)](https://github.com/villasv/aws-airflow-stack/releases/latest) [![Build Status](https://img.shields.io/travis/villasv/aws-airflow-stack/master.svg?style=flat-square&logo=gitlab&logoColor=white&label=taskcat)](https://scrutinizer-ci.com/g/villasv/aws-airflow-stack/build-status/master)
+# Turbine [![GitHub Release](https://img.shields.io/github/release/villasv/aws-airflow-stack.svg?style=flat-square&logo=github)](https://github.com/villasv/aws-airflow-stack/releases/latest) [![Build Status](https://img.shields.io/github/workflow/status/villasv/aws-airflow-stack/Stack%20Release%20Pipeline?style=flat-square&logo=github&logoColor=white&label=build)](https://github.com/villasv/aws-airflow-stack/actions?query=workflow%3A%22Stack+Release+Pipeline%22+branch%3Amaster) [![CFN Deploy](https://img.shields.io/badge/CFN-deploy-green.svg?style=flat-square&logo=amazon-aws)](#get-it-working)
Turbine is the set of bare metals behind a simple yet complete and efficient
Airflow setup.
@@ -13,7 +13,7 @@ configure in a few commands.
## Overview
-![stack diagram](/img/stack-diagram.png)
+![stack diagram](/.github/img/stack-diagram.png)
The stack is composed mainly of three services: the Airflow web server, the
Airflow scheduler, and the Airflow worker. Supporting resources include an RDS
@@ -113,7 +113,7 @@ available in the shell. Before running Airflow commands, you need to load the
Airflow configuration:
```bash
-$ export $(xargs "
+TERMINATING=$(aws autoscaling describe-scaling-activities \
+ --auto-scaling-group-name "$ClusterStack-scaling-group" \
+ --max-items 100 \
+ --region "$AWS_REGION" | \
+ jq --arg TERMINATE_MESSAGE "$TERMINATE_MESSAGE" \
+ '.Activities[]
+ | select(.Description
+ | test($TERMINATE_MESSAGE)) != []')
+if [ "$TERMINATING" = "true" ]; then
+ systemctl stop airflow
+fi
\ No newline at end of file
diff --git a/scripts/commons.setup.sh b/scripts/commons.setup.sh
new file mode 100644
index 00000000..eb045d13
--- /dev/null
+++ b/scripts/commons.setup.sh
@@ -0,0 +1,99 @@
+#!/bin/bash -e
+yum install -y jq
+jsonvar() { jq -n --argjson doc "$1" -r "\$doc.$2"; }
+
+IMDSv1="http://169.254.169.254/latest"
+AWS_PARTITION=$(curl "$IMDSv1/meta-data/services/partition")
+export AWS_PARTITION
+
+IAM_ROLE=$(curl "$IMDSv1/meta-data/iam/security-credentials")
+IAM_DOCUMENT=$(curl "$IMDSv1/meta-data/iam/security-credentials/$IAM_ROLE")
+AWS_ACCESS_KEY_ID=$(jsonvar "$IAM_DOCUMENT" AccessKeyId)
+AWS_SECRET_ACCESS_KEY=$(jsonvar "$IAM_DOCUMENT" SecretAccessKey)
+AWS_SECURITY_TOKEN=$(jsonvar "$IAM_DOCUMENT" Token)
+export IAM_ROLE AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SECURITY_TOKEN
+
+EC2_DOCUMENT=$(curl "$IMDSv1/dynamic/instance-identity/document")
+AWS_REGION=$(jsonvar "$EC2_DOCUMENT" region)
+AWS_DEFAULT_REGION=$(jsonvar "$EC2_DOCUMENT" region)
+AWS_ACCOUNT_ID=$(jsonvar "$EC2_DOCUMENT" accountId)
+EC2_INSTANCE_ID=$(jsonvar "$EC2_DOCUMENT" instanceId)
+export AWS_DEFAULT_REGION AWS_REGION AWS_ACCOUNT_ID EC2_INSTANCE_ID
+
+yum install -y python3 python3-pip python3-wheel python3-devel
+pip3 install awscurl
+EC2_HOST_IDENTIFIER="arn:$AWS_PARTITION:ec2:$AWS_REGION:$AWS_ACCOUNT_ID"
+EC2_HOST_IDENTIFIER="$EC2_HOST_IDENTIFIER:instance/$EC2_INSTANCE_ID"
+CD_COMMAND=$(/usr/local/bin/awscurl -X POST \
+ --service codedeploy-commands \
+ "https://codedeploy-commands.$AWS_REGION.amazonaws.com" \
+ -H "X-AMZ-TARGET: CodeDeployCommandService_v20141006.PollHostCommand" \
+ -H "Content-Type: application/x-amz-json-1.1" \
+ -d "{\"HostIdentifier\": \"$EC2_HOST_IDENTIFIER\"}")
+if [ "$CD_COMMAND" = "" ] || [ "$CD_COMMAND" = "b'{}'" ]
+then CD_PENDING_DEPLOY="false"
+else CD_PENDING_DEPLOY="true"
+fi
+export CD_PENDING_DEPLOY
+
+DB_SECRETS=$(aws secretsmanager \
+ get-secret-value --secret-id "$DB_SECRETS_ARN")
+DB_ENGINE=$(jsonvar "$DB_SECRETS" "SecretString | fromjson.engine")
+DB_USER=$(jsonvar "$DB_SECRETS" "SecretString | fromjson.username")
+DB_PASS=$(jsonvar "$DB_SECRETS" "SecretString | fromjson.password")
+DB_HOST=$(jsonvar "$DB_SECRETS" "SecretString | fromjson.host")
+DB_DBNAME=$(jsonvar "$DB_SECRETS" "SecretString | fromjson.dbname")
+DB_PORT=$(jsonvar "$DB_SECRETS" "SecretString | fromjson.port")
+DATABASE_URI="$DB_ENGINE://$DB_USER:$DB_PASS@$DB_HOST:$DB_PORT/$DB_DBNAME"
+export DATABASE_URI
+
+yum install -y python3
+pip3 install cryptography
+FERNET_KEY=$(python3 -c "if True:#
+ from base64 import urlsafe_b64encode
+ from cryptography.fernet import Fernet
+ from cryptography.hazmat.backends import default_backend
+ from cryptography.hazmat.primitives import hashes
+ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
+ kdf = PBKDF2HMAC(
+ algorithm=hashes.SHA256(),length=32,iterations=100000,
+ backend=default_backend(),salt=b'${FERNET_SALT//\'/\\\'}',
+ )
+ key = kdf.derive(b'${DB_PASS_ESC//\'/\\\'}')
+ key_encoded = urlsafe_b64encode(key)
+ print(key_encoded.decode('utf8'))")
+export FERNET_KEY
+
+FILES=$(dirname "$0")
+find "$FILES" -type f -iname "*.sh" -exec chmod +x {} \;
+envreplace() { CONTENT=$(envsubst <"$1"); echo "$CONTENT" >"$1"; }
+
+mkdir -p /etc/cfn/hooks.d
+cp "$FILES"/systemd/cfn-hup.service /lib/systemd/system/
+cp "$FILES"/systemd/cfn-hup.conf /etc/cfn/cfn-hup.conf
+cp "$FILES"/systemd/cfn-auto-reloader.conf /etc/cfn/hooks.d/cfn-auto-reloader.conf
+envreplace /etc/cfn/cfn-hup.conf
+envreplace /etc/cfn/hooks.d/cfn-auto-reloader.conf
+
+mkdir /run/airflow && chown -R ec2-user: /run/airflow
+cp "$FILES"/systemd/airflow-*.{path,timer,service} /lib/systemd/system/
+cp "$FILES"/systemd/airflow.env /etc/sysconfig/airflow.env
+cp "$FILES"/systemd/airflow.conf /usr/lib/tmpfiles.d/airflow.conf
+envreplace /etc/sysconfig/airflow.env
+
+mapfile -t AIRFLOW_ENVS < /etc/sysconfig/airflow.env
+export "${AIRFLOW_ENVS[@]}"
+
+yum install -y gcc libcurl-devel openssl-devel
+export PYCURL_SSL_LIBRARY=openssl
+pip3 install "apache-airflow[celery,postgres,s3,crypto]==1.10.9" "celery[sqs]"
+mkdir "$AIRFLOW_HOME" && chown -R ec2-user: "$AIRFLOW_HOME"
+
+systemctl enable --now cfn-hup.service
+
+cd_agent() {
+ yum install -y ruby
+ wget "https://aws-codedeploy-$AWS_REGION.s3.amazonaws.com/latest/install"
+ chmod +x ./install
+ ./install auto
+}
diff --git a/scripts/scheduler.setup.sh b/scripts/scheduler.setup.sh
new file mode 100755
index 00000000..c81a11e4
--- /dev/null
+++ b/scripts/scheduler.setup.sh
@@ -0,0 +1,12 @@
+#!/bin/bash -e
+
+. "$(dirname $0)/commons.setup.sh"
+
+if [ "$TURBINE__CORE__LOAD_DEFAULTS" == "True" ]; then
+ su -c '/usr/local/bin/airflow initdb' ec2-user
+else
+ su -c '/usr/local/bin/airflow upgradedb' ec2-user
+fi
+
+systemctl enable --now airflow-scheduler
+cd_agent
diff --git a/scripts/systemd/airflow-confapply-agent.path b/scripts/systemd/airflow-confapply-agent.path
new file mode 100644
index 00000000..8efa3b6a
--- /dev/null
+++ b/scripts/systemd/airflow-confapply-agent.path
@@ -0,0 +1,9 @@
+[Unit]
+After=airflow-scheduler.service airflow-webserver.service airflow-workerset.service
+PartOf=airflow-scheduler.service airflow-webserver.service airflow-workerset.service
+
+[Path]
+PathModified=/etc/sysconfig/airflow.env
+
+[Install]
+WantedBy=airflow-scheduler.service airflow-webserver.service airflow-workerset.service
diff --git a/scripts/systemd/airflow-confapply-agent.service b/scripts/systemd/airflow-confapply-agent.service
new file mode 100644
index 00000000..ed4a97ba
--- /dev/null
+++ b/scripts/systemd/airflow-confapply-agent.service
@@ -0,0 +1,4 @@
+[Service]
+Type=oneshot
+ExecStartPre=/usr/bin/systemctl daemon-reload
+ExecStart=/opt/turbine/restart-services.sh
diff --git a/scripts/systemd/airflow-heartbeat.service b/scripts/systemd/airflow-heartbeat.service
new file mode 100644
index 00000000..b3a71ecb
--- /dev/null
+++ b/scripts/systemd/airflow-heartbeat.service
@@ -0,0 +1,3 @@
+[Service]
+Type=oneshot
+ExecStart=/opt/turbine/airflow-heartbeat.sh
diff --git a/scripts/systemd/airflow-heartbeat.timer b/scripts/systemd/airflow-heartbeat.timer
new file mode 100644
index 00000000..4eb06a62
--- /dev/null
+++ b/scripts/systemd/airflow-heartbeat.timer
@@ -0,0 +1,5 @@
+[Timer]
+OnCalendar=*:0/1
+
+[Install]
+WantedBy=airflow.service
diff --git a/scripts/systemd/airflow-scheduler.service b/scripts/systemd/airflow-scheduler.service
new file mode 100644
index 00000000..748004fb
--- /dev/null
+++ b/scripts/systemd/airflow-scheduler.service
@@ -0,0 +1,14 @@
+[Unit]
+Description=Airflow scheduler daemon
+Wants=airflow-confapply-agent.path
+
+[Service]
+EnvironmentFile=/etc/sysconfig/airflow.env
+ExecStart=/usr/local/bin/airflow scheduler
+User=ec2-user
+Group=ec2-user
+Restart=always
+RestartSec=5s
+
+[Install]
+WantedBy=multi-user.target
diff --git a/scripts/systemd/airflow-terminate.service b/scripts/systemd/airflow-terminate.service
new file mode 100644
index 00000000..0232520c
--- /dev/null
+++ b/scripts/systemd/airflow-terminate.service
@@ -0,0 +1,3 @@
+[Service]
+Type=oneshot
+ExecStart=/opt/turbine/airflow-terminate.sh
diff --git a/scripts/systemd/airflow-terminate.timer b/scripts/systemd/airflow-terminate.timer
new file mode 100644
index 00000000..4eb06a62
--- /dev/null
+++ b/scripts/systemd/airflow-terminate.timer
@@ -0,0 +1,5 @@
+[Timer]
+OnCalendar=*:0/1
+
+[Install]
+WantedBy=airflow.service
diff --git a/scripts/systemd/airflow-webserver.service b/scripts/systemd/airflow-webserver.service
new file mode 100644
index 00000000..e758bd23
--- /dev/null
+++ b/scripts/systemd/airflow-webserver.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=Airflow webserver daemon
+Wants=airflow-confapply-agent.path
+
+[Service]
+EnvironmentFile=/etc/sysconfig/airflow.env
+ExecStart=/usr/local/bin/airflow webserver --pid /run/airflow/webserver.pid
+User=ec2-user
+Group=ec2-user
+Restart=on-failure
+RestartSec=5s
+PrivateTmp=true
+
+[Install]
+WantedBy=multi-user.target
diff --git a/scripts/systemd/airflow-workerset.service b/scripts/systemd/airflow-workerset.service
new file mode 100644
index 00000000..aca7b39f
--- /dev/null
+++ b/scripts/systemd/airflow-workerset.service
@@ -0,0 +1,16 @@
+[Unit]
+Description=Airflow celery worker daemon
+Wants=airflow-confapply-agent.path
+
+[Service]
+EnvironmentFile=/etc/sysconfig/airflow.env
+ExecStart=/usr/local/bin/airflow worker
+User=ec2-user
+Group=ec2-user
+Restart=on-failure
+RestartSec=10s
+KillMode=mixed
+TimeoutStopSec=24h
+
+[Install]
+WantedBy=multi-user.target
diff --git a/scripts/systemd/airflow.conf b/scripts/systemd/airflow.conf
new file mode 100644
index 00000000..e158fe28
--- /dev/null
+++ b/scripts/systemd/airflow.conf
@@ -0,0 +1 @@
+D /run/airflow 0755 ec2-user ec2-user
diff --git a/scripts/systemd/airflow.env b/scripts/systemd/airflow.env
new file mode 100644
index 00000000..f1cca07c
--- /dev/null
+++ b/scripts/systemd/airflow.env
@@ -0,0 +1,13 @@
+AWS_DEFAULT_REGION=${AWS_REGION}
+AIRFLOW_HOME=/airflow
+AIRFLOW__CORE__EXECUTOR=CeleryExecutor
+AIRFLOW__CORE__FERNET_KEY=${FERNET_KEY}
+AIRFLOW__CORE__LOAD_EXAMPLES=${LOAD_EXAMPLES}
+TURBINE__CORE__LOAD_DEFAULTS=${LOAD_DEFAULTS}
+AIRFLOW__CORE__SQL_ALCHEMY_CONN=${DATABASE_URI}
+AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER=s3://${LOGS_BUCKET}
+AIRFLOW__CORE__REMOTE_LOGGING=True
+AIRFLOW__CELERY__BROKER_URL=sqs://
+AIRFLOW__CELERY__DEFAULT_QUEUE=${QUEUE_NAME}
+AIRFLOW__CELERY__RESULT_BACKEND=db+${DATABASE_URI}
+AIRFLOW__CELERY_BROKER_TRANSPORT_OPTIONS__REGION=${AWS_REGION}
diff --git a/scripts/systemd/cfn-auto-reloader.conf b/scripts/systemd/cfn-auto-reloader.conf
new file mode 100644
index 00000000..fa1d04cf
--- /dev/null
+++ b/scripts/systemd/cfn-auto-reloader.conf
@@ -0,0 +1,9 @@
+[cfn-auto-reloader-hook]
+triggers=post.update
+path=Resources.LaunchConfiguration.Metadata.AWS::CloudFormation::Init
+action=/opt/aws/bin/cfn-init -v \
+ --region ${AWS_REGION} \
+ --role ${IAM_ROLE} \
+ --stack ${AWS_STACK_NAME} \
+ --resource LaunchConfiguration
+runas=root
diff --git a/scripts/systemd/cfn-hup.conf b/scripts/systemd/cfn-hup.conf
new file mode 100644
index 00000000..bf3466f2
--- /dev/null
+++ b/scripts/systemd/cfn-hup.conf
@@ -0,0 +1,5 @@
+[main]
+stack=${AWS_STACK_NAME}
+region=${AWS_REGION}
+role=${IAM_ROLE}
+interval=1
diff --git a/scripts/systemd/cfn-hup.service b/scripts/systemd/cfn-hup.service
new file mode 100644
index 00000000..7f46eec5
--- /dev/null
+++ b/scripts/systemd/cfn-hup.service
@@ -0,0 +1,6 @@
+[Service]
+ExecStart=/opt/aws/bin/cfn-hup
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
diff --git a/scripts/webserver.setup.sh b/scripts/webserver.setup.sh
new file mode 100755
index 00000000..9d80bfb7
--- /dev/null
+++ b/scripts/webserver.setup.sh
@@ -0,0 +1,17 @@
+#!/bin/bash -e
+
+. "$(dirname $0)/commons.setup.sh"
+
+PUBLIC=$(curl "$IMDSv1/meta-data/public-ipv4" -w "%{http_code}")
+if [ "$PUBLIC" = "200" ]
+then HOSTNAME=$(ec2-metadata -v | awk '{print $2}')
+else HOSTNAME=$(ec2-metadata -o | awk '{print $2}')
+fi
+BASE_URL="http://$HOSTNAME:${WEB_SERVER_PORT}"
+echo "AIRFLOW__WEBSERVER__BASE_URL=$BASE_URL" \
+ >> /etc/sysconfig/airflow.env
+echo "AIRFLOW__WEBSERVER__WEB_SERVER_PORT=${WEB_SERVER_PORT}" \
+ >> /etc/sysconfig/airflow.env
+
+systemctl enable --now airflow-webserver
+cd_agent
diff --git a/scripts/workerset.setup.sh b/scripts/workerset.setup.sh
new file mode 100755
index 00000000..018d911d
--- /dev/null
+++ b/scripts/workerset.setup.sh
@@ -0,0 +1,19 @@
+#!/bin/bash -e
+
+. "$(dirname $0)/commons.setup.sh"
+
+if [ -d "/mnt/efs" ]; then
+ mkdir /mnt/efs
+ FSPEC="${FILE_SYSTEM_ID}.efs.$AWS_REGION.amazonaws.com:/"
+ PARAMS="nfsvers=4.1,rsize=1048576,wsize=1048576"
+ PARAMS="$PARAMS,hard,timeo=600,retrans=2,noresvport"
+ echo "$FSPEC /mnt/efs nfs $PARAMS,_netdev 0 0" >> /etc/fstab
+ mount /mnt/efs && chown -R ec2-user: /mnt/efs
+fi
+
+if [ "$CD_PENDING_DEPLOY" = "false" ]; then
+ systemctl enable --now airflow-workerset
+else
+ systemctl enable airflow-workerset
+fi
+cd_agent
diff --git a/templates/turbine-cluster.template b/templates/turbine-cluster.template
index efaabb1e..7e8b2f30 100644
--- a/templates/turbine-cluster.template
+++ b/templates/turbine-cluster.template
@@ -1,140 +1,279 @@
AWSTemplateFormatVersion: 2010-09-09
Description: >-
- The Turbine-Airflow cluster stack, composed mainly of the Airflow web server,
- the Airflow scheduler, and the Airflow worker nested stacks. Supporting
- resources include an RDS to host the Airflow metadata database, an SQS to be
- used as broker backend, S3 buckets for logs and deployment bundles, an EFS to
- serve as shared directory, and a custom CloudWatch metric measured by a timed
- AWS Lambda.
+ This template creates the Airflow supporting resources including an RDS
+ instance to host the Airflow metadata database, an SQS queue to be used as
+ broker backend, S3 buckets for logs and deployment packages, and then creates
+ the Airflow scheduler, webserver and workers nested stacks. The nested Airflow
+ services stacks create the Airflow instances in highly available auto scaling
+ groups spanning two subnets, plus for the workers stack an EFS shared network
+ directory and a custom cloudwatch load metric function to guide the auto
+ scaling alarm triggers. **WARNING** This template creates AWS resources. You
+ will be billed for the AWS resources used if you create a stack from this
+ template. QS(0027)
+Metadata:
+ AWS::CloudFormation::Interface:
+ ParameterGroups:
+ - Label:
+ default: VPC network configuration
+ Parameters:
+ - VPCID
+ - PublicSubnet1ID
+ - PublicSubnet2ID
+ - PrivateSubnet1AID
+ - PrivateSubnet2AID
+ - AllowHTTPAccessCIDR
+ - Label:
+ default: Turbine Cluster configuration
+ Parameters:
+ - SchedulerInstanceType
+ - WebserverInstanceType
+ - WorkerInstanceType
+ - MinGroupSize
+ - MaxGroupSize
+ - ShrinkThreshold
+ - GrowthThreshold
+ - Label:
+ default: Apache Airflow configuration
+ Parameters:
+ - LoadExampleDags
+ - LoadDefaultCons
+ - WebServerPort
+ - Label:
+ default: AWS Quick Start configuration
+ Parameters:
+ - QSS3BucketName
+ - QSS3KeyPrefix
+ ParameterLabels:
+ VPCID:
+ default: VPC ID
+ PublicSubnet1ID:
+ default: Public subnet 1 ID
+ PublicSubnet2ID:
+ default: Public subnet 2 ID
+ PrivateSubnet1AID:
+ default: Private subnet 1 ID
+ PrivateSubnet2AID:
+ default: Private subnet 2 ID
+ AllowHTTPAccessCIDR:
+ default: Allowed HTTP access CIDR
+ SchedulerInstanceType:
+ default: Scheduler instance type
+ WebserverInstanceType:
+ default: Web server instance type
+ WorkerInstanceType:
+ default: Workers instance type
+ MinGroupSize:
+ default: Minimum group size
+ MaxGroupSize:
+ default: Maximum group size
+ ShrinkThreshold:
+ default: Shrink threshold
+ GrowthThreshold:
+ default: Growth threshold
+ LoadExampleDags:
+ default: Load example DAGs
+ LoadDefaultCons:
+ default: Load default connections
+ WebServerPort:
+ default: Web server port
+ QSS3BucketName:
+ default: Quick Start S3 bucket name
+ QSS3KeyPrefix:
+ default: Quick Start S3 key prefix
Parameters:
- # Networking
VPCID:
- Description: An existing VPC for the cluster
+ Description: An existing VPC for the cluster.
Type: AWS::EC2::VPC::Id
PublicSubnet1ID:
- Description: An existing public Subnet in some Availability Zone
+ Description: An existing public Subnet in some Availability Zone.
Type: AWS::EC2::Subnet::Id
PublicSubnet2ID:
- Description: An existing public Subnet in another Availability Zone
+ Description: An existing public Subnet in another Availability Zone.
Type: AWS::EC2::Subnet::Id
PrivateSubnet1AID:
- Description: An existing private Subnet in some Availability Zone
+ Description: An existing private Subnet in some Availability Zone.
Type: AWS::EC2::Subnet::Id
PrivateSubnet2AID:
- Description: An existing private Subnet in another Availability Zone
+ Description: An existing private Subnet in another Availability Zone.
Type: AWS::EC2::Subnet::Id
- AllowedWebBlock:
+ AllowHTTPAccessCIDR:
Description: >-
- The IPv4 CIDR block to allow HTTP access in the webserver. The default of
- 0.0.0.0/0 allows HTTP from everywhere, which is convenient but less
- secure.
+ The IPv4 CIDR block to allow HTTP access in the webserver. Using the same
+ CIDR for the VPC is a secure default. Using 0.0.0.0/0 allows access from
+ everywhere, which is convenient but less secure.
+ ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28.
+ AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
+ Default: 10.0.0.0/16
Type: String
- AllowedPattern: >-
- ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
- Default: 0.0.0.0/0
- WebserverPort:
- Description: >-
- The port Airflow webserver will be listening.
- Type: Number
- Default: 8080
- MinValue: 1024
- MaxValue: 65535
- ConstraintDescription: >-
- Ports below 1024 can be opened only with root privileges and the airflow
- process does not run as such.
- # Cluster Settings
SchedulerInstanceType:
Description: EC2 instance type to use for the scheduler.
- Type: String
Default: t3.micro
+ Type: String
WebserverInstanceType:
Description: EC2 instance type to use for the webserver.
- Type: String
Default: t3.micro
+ Type: String
WorkerInstanceType:
Description: EC2 instance type to use for the workers.
- Type: String
Default: t3.medium
+ Type: String
MinGroupSize:
Description: The minimum number of active worker instances.
+ Default: 0
Type: Number
- Default: 1
MaxGroupSize:
Description: The maximum number of active worker instances.
- Type: Number
Default: 10
+ Type: Number
ShrinkThreshold:
Description: >-
The threshold for the average queue size from which going equal or below
will trigger the AutoScaling group to Scale In, deallocating one worker
instance.
- Type: Number
Default: 0.5
+ Type: Number
GrowthThreshold:
Description: >-
The threshold for the average queue size from which going equal or above
will trigger the AutoScaling group to Scale Out, allocating one worker
instance.
- Type: Number
Default: 0.9
- DbMasterUsername:
- Description: The username to be used in the airflow database.
- Type: String
- Default: airflow
- DbMasterPassword:
- Description: The password to be used in the airflow database.
- Type: String
- NoEcho: true
+ Type: Number
- # Airflow Config
LoadExampleDags:
Description: >-
Load the example DAGs distributed with Airflow. Useful if deploying a
stack for demonstrating a few topologies, operators and scheduling
strategies.
- Type: String
AllowedValues:
- 'False'
- 'True'
Default: 'False'
- LoadDefaultConn:
+ Type: String
+ LoadDefaultCons:
Description: >-
Load the default connections initialized by Airflow. Most consider these
unnecessary, which is why the default is to not load them.
- Type: String
AllowedValues:
- 'False'
- 'True'
Default: 'False'
+ Type: String
+ WebServerPort:
+ Description: >-
+ The port Airflow webserver will be listening.
+ ConstraintDescription: >-
+ Ports below 1024 can be opened only with root privileges and the airflow
+ process does not run as such.
+ MinValue: 1024
+ MaxValue: 65535
+ Default: 8080
+ Type: Number
- # Quick Start Overrides
QSS3BucketName:
Description: >-
S3 bucket name for the Quick Start assets. You can specify your own bucket
providing assets and submodules, if you want to override the Quick Start
behavior for your specific implementation.
- Type: String
- Default: turbine-quickstart
- AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$'
ConstraintDescription: >-
Quick Start bucket name can include numbers, lowercase letters, uppercase
letters, and hyphens (-). It cannot start or end with a hyphen (-).
+ AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$'
+ Default: turbine-quickstart
+ Type: String
QSS3KeyPrefix:
Description: >-
S3 key prefix for the Quick Start assets. You can scpeficy your own
"directory" providing the stack templates, if you want to override the
Quick Start behavior for your specific implementation.
- Type: String
- Default: quickstart-turbine-airflow/
- AllowedPattern: '^[0-9a-zA-Z-/]*$'
ConstraintDescription: >-
Quick Start key prefix can include numbers, lowercase letters, uppercase
letters, hyphens (-), and forward slash (/).
+ AllowedPattern: '^[0-9a-zA-Z-/]*$'
+ Default: quickstart-turbine-airflow/
+ Type: String
Resources:
+ DBSubnetGroup:
+ Type: AWS::RDS::DBSubnetGroup
+ Properties:
+ DBSubnetGroupDescription: >
+ Associates the Database Instances with the selected VPC Subnets.
+ SubnetIds:
+ - !Ref PrivateSubnet1AID
+ - !Ref PrivateSubnet2AID
+
+ DBInstance:
+ Type: AWS::RDS::DBInstance
+ Properties:
+ AllocatedStorage: '20'
+ DBInstanceClass: db.t2.micro
+ DBName: airflow
+ Engine: postgres
+ MasterUsername: !Join
+ - ''
+ - - '{{resolve:secretsmanager:'
+ - !Ref Secret
+ - ':SecretString:username}}'
+ MasterUserPassword: !Join
+ - ''
+ - - '{{resolve:secretsmanager:'
+ - !Ref Secret
+ - ':SecretString:password}}'
+ DBSubnetGroupName: !Ref DBSubnetGroup
+ VPCSecurityGroups:
+ - !Ref SecurityGroup
+
+ SecurityGroup:
+ Type: AWS::EC2::SecurityGroup
+ Properties:
+ GroupDescription: Security Rules with permissions for database connections for Airflow.
+ SecurityGroupIngress:
+ - SourceSecurityGroupId: !Ref InstancesSecurityGroup
+ IpProtocol: TCP
+ FromPort: 5432
+ ToPort: 5432
+ VpcId: !Ref VPCID
+
+ Secret:
+ Type: AWS::SecretsManager::Secret
+ Properties:
+ GenerateSecretString:
+ SecretStringTemplate: '{"username": "airflow"}'
+ GenerateStringKey: "password"
+ PasswordLength: 16
+ ExcludePunctuation: True
+
+ SecretTargetAttachment:
+ Type: AWS::SecretsManager::SecretTargetAttachment
+ Properties:
+ SecretId: !Ref Secret
+ TargetId: !Ref DBInstance
+ TargetType: AWS::RDS::DBInstance
+
+ TaskQueue:
+ Type: AWS::SQS::Queue
+
+ LogsBucket:
+ Type: AWS::S3::Bucket
+
+ DeploymentsBucket:
+ Type: AWS::S3::Bucket
+
+ InstancesSecurityGroup:
+ Type: AWS::EC2::SecurityGroup
+ Properties:
+ GroupDescription: >-
+ The security group shared by all Airflow instances used as inbound rule
+ for the other more specific resource security groups.
+ VpcId: !Ref VPCID
+ Tags:
+ - Key: Name
+ Value: airflow-instances-sg
+
SchedulerStack:
Type: AWS::CloudFormation::Stack
Properties:
@@ -144,20 +283,19 @@ Resources:
- !Ref QSS3KeyPrefix
- templates/turbine-scheduler.template
Parameters:
- VPCID: !Ref VPCID
PrivateSubnet1AID: !Ref PrivateSubnet1AID
PrivateSubnet2AID: !Ref PrivateSubnet2AID
- InstancesSecurityGroup: !Ref InstancesSecurityGroup
- IamInstanceProfile: !Ref AirflowProfile
- IamRole: !Ref AirflowRole
- ImageId: !FindInMap
- - AWSAMIRegionMap
- - !Ref AWS::Region
- - AMZNLINUX2
+ SecurityGroupID: !Ref InstancesSecurityGroup
+ DatabaseSecret: !Ref Secret
+ QueueName: !GetAtt TaskQueue.QueueName
+ LogsBucket: !Ref LogsBucket
InstanceType: !Ref SchedulerInstanceType
- SharedCloudInitStack: !Ref AWS::StackName
+ LoadExampleDags: !Ref LoadExampleDags
+ LoadDefaultCons: !Ref LoadDefaultCons
+ QSS3BucketName: !Ref QSS3BucketName
+ QSS3KeyPrefix: !Ref QSS3KeyPrefix
DependsOn:
- - SharedCloudInitMetadata
+ - SecretTargetAttachment
WebserverStack:
Type: AWS::CloudFormation::Stack
@@ -171,19 +309,19 @@ Resources:
VPCID: !Ref VPCID
PublicSubnet1ID: !Ref PublicSubnet1ID
PublicSubnet2ID: !Ref PublicSubnet2ID
- InstancesSecurityGroup: !Ref InstancesSecurityGroup
- IngressCIDR: !Ref AllowedWebBlock
- IngressPort: !Ref WebserverPort
- IamInstanceProfile: !Ref AirflowProfile
- IamRole: !Ref AirflowRole
- ImageId: !FindInMap
- - AWSAMIRegionMap
- - !Ref AWS::Region
- - AMZNLINUX2
+ SecurityGroupID: !Ref InstancesSecurityGroup
+ AllowHTTPAccessCIDR: !Ref AllowHTTPAccessCIDR
+ DatabaseSecret: !Ref Secret
+ QueueName: !GetAtt TaskQueue.QueueName
+ LogsBucket: !Ref LogsBucket
InstanceType: !Ref WebserverInstanceType
- SharedCloudInitStack: !Ref AWS::StackName
+ LoadExampleDags: !Ref LoadExampleDags
+ LoadDefaultCons: !Ref LoadDefaultCons
+ WebServerPort: !Ref WebServerPort
+ QSS3BucketName: !Ref QSS3BucketName
+ QSS3KeyPrefix: !Ref QSS3KeyPrefix
DependsOn:
- - SharedCloudInitMetadata
+ - SecretTargetAttachment
WorkerSetStack:
Type: AWS::CloudFormation::Stack
@@ -197,50 +335,38 @@ Resources:
VPCID: !Ref VPCID
PrivateSubnet1AID: !Ref PrivateSubnet1AID
PrivateSubnet2AID: !Ref PrivateSubnet2AID
- InstancesSecurityGroup: !Ref InstancesSecurityGroup
- IamInstanceProfile: !Ref AirflowProfile
- IamRole: !Ref AirflowRole
- ImageId: !FindInMap
- - AWSAMIRegionMap
- - !Ref AWS::Region
- - AMZNLINUX2
+ SecurityGroupID: !Ref InstancesSecurityGroup
+ DatabaseSecret: !Ref Secret
+ QueueName: !GetAtt TaskQueue.QueueName
+ LogsBucket: !Ref LogsBucket
InstanceType: !Ref WorkerInstanceType
- MinSize: !Ref MinGroupSize
- MaxSize: !Ref MaxGroupSize
- GrowthThreshold: !Ref GrowthThreshold
+ MinGroupSize: !Ref MinGroupSize
+ MaxGroupSize: !Ref MaxGroupSize
ShrinkThreshold: !Ref ShrinkThreshold
- QueueName: !GetAtt TaskQueue.QueueName
- SharedCloudInitStack: !Ref AWS::StackName
+ GrowthThreshold: !Ref GrowthThreshold
+ LoadExampleDags: !Ref LoadExampleDags
+ LoadDefaultCons: !Ref LoadDefaultCons
QSS3BucketName: !Ref QSS3BucketName
QSS3KeyPrefix: !Ref QSS3KeyPrefix
-
DependsOn:
- - SharedCloudInitMetadata
-
- LogsBucket:
- Type: AWS::S3::Bucket
-
- DeploymentsBucket:
- Type: AWS::S3::Bucket
+ - SecretTargetAttachment
CodeDeployApplication:
Type: AWS::CodeDeploy::Application
Properties:
- ApplicationName: !Sub ${AWS::StackName}-deployment-application
+ ApplicationName: !Ref AWS::StackName
ComputePlatform: Server
CodeDeployDeploymentGroup:
Type: AWS::CodeDeploy::DeploymentGroup
Properties:
ApplicationName: !Ref CodeDeployApplication
- DeploymentGroupName: !Sub ${AWS::StackName}-deployment-group
+ DeploymentGroupName: !Ref AWS::StackName
AutoScalingGroups:
- !GetAtt SchedulerStack.Outputs.AutoScalingGroup
- !GetAtt WebserverStack.Outputs.AutoScalingGroup
- !GetAtt WorkerSetStack.Outputs.AutoScalingGroup
- ServiceRoleArn: !GetAtt
- - CodeDeployServiceRole
- - Arn
+ ServiceRoleArn: !GetAtt CodeDeployServiceRole.Arn
CodeDeployServiceRole:
Type: AWS::IAM::Role
@@ -255,519 +381,7 @@ Resources:
Action:
- sts:AssumeRole
ManagedPolicyArns:
- - 'arn:aws:iam::aws:policy/service-role/AWSCodeDeployRole'
-
- EfsFileSystem:
- Type: AWS::EFS::FileSystem
- Properties:
- FileSystemTags:
- - Key: Name
- Value: !Sub ${AWS::StackName}-filesystem
-
- EfsMountTarget1A:
- Type: AWS::EFS::MountTarget
- Properties:
- FileSystemId: !Ref EfsFileSystem
- SubnetId: !Ref PrivateSubnet1AID
- SecurityGroups:
- - !Ref Access
-
- EfsMountTarget2A:
- Type: AWS::EFS::MountTarget
- Properties:
- FileSystemId: !Ref EfsFileSystem
- SubnetId: !Ref PrivateSubnet2AID
- SecurityGroups:
- - !Ref Access
-
- DBs:
- Type: AWS::RDS::DBSubnetGroup
- Properties:
- DBSubnetGroupDescription: Associates the Database Instances with the selected VPC Subnets.
- SubnetIds:
- - !Ref PrivateSubnet1AID
- - !Ref PrivateSubnet2AID
-
- Database:
- Type: AWS::RDS::DBInstance
- Properties:
- AllocatedStorage: '20'
- DBInstanceClass: db.t2.micro
- DBName: airflow
- Engine: postgres
- MasterUsername: !Ref DbMasterUsername
- MasterUserPassword: !Ref DbMasterPassword
- Tags:
- - Key: Name
- Value: !Sub ${AWS::StackName}-database
- DBSubnetGroupName: !Ref DBs
- VPCSecurityGroups:
- - !Ref Connection
-
- TaskQueue:
- Type: AWS::SQS::Queue
- Properties: {}
-
- InstancesSecurityGroup:
- Type: AWS::EC2::SecurityGroup
- Properties:
- GroupDescription: >-
- The security group shared by all Airflow instances used as inbound rule
- for the other more specific resource security groups.
- VpcId: !Ref VPCID
- Tags:
- - Key: Name
- Value: airflow-instances-sg
-
- Access:
- Type: AWS::EC2::SecurityGroup
- Properties:
- GroupDescription: >-
- Security Rules with permissions for the shared filesystem across Airflow
- instances.
- SecurityGroupIngress:
- - SourceSecurityGroupId: !Ref InstancesSecurityGroup
- IpProtocol: TCP
- FromPort: 2049
- ToPort: 2049
- VpcId: !Ref VPCID
- Tags:
- - Key: Name
- Value: !Sub '${AWS::StackName}-access'
-
- Connection:
- Type: AWS::EC2::SecurityGroup
- Properties:
- GroupDescription: Security Rules with permissions for database connections for Airflow.
- SecurityGroupIngress:
- - SourceSecurityGroupId: !Ref InstancesSecurityGroup
- IpProtocol: TCP
- FromPort: 5432
- ToPort: 5432
- VpcId: !Ref VPCID
- Tags:
- - Key: Name
- Value: !Sub ${AWS::StackName}-connection
-
- AirflowRole:
- Type: AWS::IAM::Role
- Properties:
- AssumeRolePolicyDocument:
- Version: 2012-10-17
- Statement:
- - Effect: Allow
- Principal:
- Service:
- - ec2.amazonaws.com
- Action:
- - sts:AssumeRole
- ManagedPolicyArns:
- - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM
- Policies:
- - PolicyName: !Sub ${AWS::StackName}-cfn-describe
- PolicyDocument:
- Version: 2012-10-17
- Statement:
- - Effect: Allow
- Action:
- - cloudformation:DescribeStackResource
- Resource: !Sub arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${AWS::StackName}/*
- - PolicyName: !Sub ${AWS::StackName}-ssm-rw-policy
- PolicyDocument:
- Version: 2012-10-17
- Statement:
- - Effect: Allow
- Action:
- - ssm:GetParameter
- - ssm:PutParameter
- Resource:
- - !Sub arn:aws:ssm:*:${AWS::AccountId}:*/*
- - PolicyName: !Sub ${AWS::StackName}-queue-rw-policy
- PolicyDocument:
- Version: 2012-10-17
- Statement:
- - Effect: Allow
- Action:
- - sqs:ListQueues
- Resource:
- - !Sub arn:aws:sqs:*:${AWS::AccountId}:*
- - Effect: Allow
- Action:
- - sqs:ChangeMessageVisibility
- - sqs:DeleteMessage
- - sqs:GetQueueAttributes
- - sqs:GetQueueUrl
- - sqs:ReceiveMessage
- - sqs:SendMessage
- Resource: !Sub
- - arn:aws:sqs:*:${AWS::AccountId}:${queue}
- - queue: !GetAtt
- - TaskQueue
- - QueueName
- - PolicyName: !Sub ${AWS::StackName}-deployments-r-policy
- PolicyDocument:
- Version: 2012-10-17
- Statement:
- - Effect: Allow
- Action:
- - s3:Get*
- - s3:List*
- Resource: !Sub arn:aws:s3:::${DeploymentsBucket}/*
- - Effect: Allow
- Action:
- - codedeploy:List*
- Resource: !Sub arn:aws:codedeploy:*:${AWS::AccountId}:deploymentgroup:*
- - PolicyName: !Sub ${AWS::StackName}-logs-rw-policy
- PolicyDocument:
- Version: 2012-10-17
- Statement:
- - Effect: Allow
- Action:
- - s3:Get*
- - s3:Put*
- Resource: !Sub arn:aws:s3:::${LogsBucket}/*
- - PolicyName: !Sub ${AWS::StackName}-lifecycle-heartbeat
- PolicyDocument:
- Version: 2012-10-17
- Statement:
- - Effect: Allow
- Action:
- - autoscaling:RecordLifecycleActionHeartbeat
- - autoscaling:CompleteLifecycleAction
- Resource: !Sub arn:aws:autoscaling:*:${AWS::AccountId}:autoScalingGroup:*:*
- - Effect: Allow
- Action:
- - autoscaling:DescribeScalingActivities
- Resource: '*'
-
- AirflowProfile:
- Type: AWS::IAM::InstanceProfile
- Properties:
- Roles:
- - !Ref AirflowRole
-
- SharedCloudInitMetadata:
- Type: AWS::CloudFormation::WaitConditionHandle
- Properties: {}
- Metadata:
- AWS::CloudFormation::Init:
- configSets:
- default:
- - filesys
- - runtime
- - secrets
- - sysconf
- - migrate
- - service
- - lchooks
- - metahup
- - cdagent
- filesys:
- commands:
- mkdir:
- test: test ! -d /airflow
- command: |
- mkdir /airflow
- chown -R ec2-user /airflow
- mount:
- test: test ! -d /mnt/efs
- command: !Sub |
- mkdir /mnt/efs
- fspec="${EfsFileSystem}.efs.${AWS::Region}.amazonaws.com:/"
- param="nfsvers=4.1,rsize=1048576,wsize=1048576"
- param="$param,hard,timeo=600,retrans=2,noresvport"
- echo "$fspec /mnt/efs nfs $param,_netdev 0 0" >> /etc/fstab
- mount /mnt/efs && chown -R ec2-user /mnt/efs
- runtime:
- packages:
- yum:
- git: []
- gcc: []
- gcc-c++: []
- jq: []
- lapack-devel: []
- libcurl-devel: []
- libxml2-devel: []
- libxslt-devel: []
- openssl-devel: []
- postgresql-devel: []
- python3: []
- python3-devel: []
- python3-pip: []
- python3-wheel: []
- commands:
- install:
- command: |
- PYCURL_SSL_LIBRARY=openssl pip3 install \
- --no-cache-dir --compile --ignore-installed \
- pycurl
- SLUGIFY_USES_TEXT_UNIDECODE=yes pip3 install \
- celery[sqs] \
- apache-airflow[celery,postgres,s3,crypto]==1.10.9
- secrets:
- commands:
- generate:
- command: !Sub |
- export $(cat /etc/environment | xargs)
-
- if [ "$TURBINE_MACHINE" != "SCHEDULER" ]; then
- echo "Secret generation reserved for the scheduler"
- exit 0
- fi
- FERNET_KEY=$(aws ssm get-parameter \
- --name ${AWS::StackName}-fernet-key \
- --region '${AWS::Region}' \
- --query 'Parameter.Value')
- if [ "$FERNET_KEY" = "" ]; then
- FERNET_KEY=$(python3 -c "if True:#
- from cryptography.fernet import Fernet
- key = Fernet.generate_key().decode()
- print(key)")
- aws ssm put-parameter \
- --name ${AWS::StackName}-fernet-key \
- --region '${AWS::Region}' \
- --value $FERNET_KEY \
- --type SecureString
- fi
- retrieve:
- command: !Sub |
- while [ "$FERNET_KEY" = "" ]; do
- echo "Waiting for Fernet key to be available..."
- sleep 1
- FERNET_KEY=$(aws ssm get-parameter \
- --name ${AWS::StackName}-fernet-key \
- --region '${AWS::Region}' \
- --with-decryption \
- --query 'Parameter.Value' \
- --output text)
- done
- echo "FERNET_KEY=$FERNET_KEY" >> /etc/environment
- sysconf:
- files:
- /etc/sysconfig/airflow:
- content: !Sub
- - |
- TURBINE_MACHINE=${!TURBINE_MACHINE}
- AWS_DEFAULT_REGION=${AWS::Region}
- AIRFLOW_HOME=/airflow
- AIRFLOW__CORE__EXECUTOR=CeleryExecutor
- AIRFLOW__CORE__FERNET_KEY=${!FERNET_KEY}
- AIRFLOW__CORE__LOAD_EXAMPLES=${LoadExampleDags}
- TURBINE__CORE__LOAD_DEFAULTS=${LoadDefaultConn}
- AIRFLOW__CORE__SQL_ALCHEMY_CONN=${DbUri}
- AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER=s3://${LogsBucket}
- AIRFLOW__CORE__REMOTE_LOGGING=True
- AIRFLOW__WEBSERVER__BASE_URL=http://${!HOSTNAME}:${WebserverPort}
- AIRFLOW__WEBSERVER__WEB_SERVER_PORT=${WebserverPort}
- AIRFLOW__CELERY__BROKER_URL=sqs://
- AIRFLOW__CELERY__DEFAULT_QUEUE=${QueueName}
- AIRFLOW__CELERY__RESULT_BACKEND=db+${DbUri}
- AIRFLOW__CELERY_BROKER_TRANSPORT_OPTIONS__REGION=${AWS::Region}
- - QueueName: !GetAtt TaskQueue.QueueName
- DbUri: !Join
- - ''
- - - postgresql://
- - !Ref DbMasterUsername
- - ':'
- - !Ref DbMasterPassword
- - '@'
- - !GetAtt Database.Endpoint.Address
- - /airflow
- commands:
- envsubst:
- command: |
- export $(cat /etc/environment | xargs)
-
- PUBLIC=$(curl -s -o /dev/null -w "%{http_code}" \
- http://169.254.169.254/latest/meta-data/public-ipv4)
- PUB_IPV4=$(ec2-metadata -v | awk '{print $2}')
- LOC_IPV4=$(ec2-metadata -o | awk '{print $2}')
- if [ $PUBLIC = "200" ]
- then HOSTNAME=$PUB_IPV4
- else HOSTNAME=$LOC_IPV4
- fi
-
- echo "$(envsubst /etc/sysconfig/airflow
- migrate:
- commands:
- migration:
- command: |
- export $(cat /etc/environment | xargs)
- export $(cat /etc/sysconfig/airflow | xargs)
- if [ "$TURBINE_MACHINE" != "SCHEDULER" ]; then
- echo "Database setup reserved for the scheduler"
- exit 0
- fi
- if [ "$TURBINE__CORE__LOAD_DEFAULTS" == "True" ]; then
- su -c '/usr/local/bin/airflow initdb' ec2-user
- else
- su -c '/usr/local/bin/airflow upgradedb' ec2-user
- fi
- service:
- files:
- /usr/bin/turbine:
- mode: 755
- content: |
- #!/bin/sh
- if [ "$TURBINE_MACHINE" == "SCHEDULER" ]
- then exec /usr/local/bin/airflow scheduler
- elif [ "$TURBINE_MACHINE" == "WEBSERVER" ]
- then exec /usr/local/bin/airflow webserver
- elif [ "$TURBINE_MACHINE" == "WORKER" ]
- then exec /usr/local/bin/airflow worker
- else echo "TURBINE_MACHINE value unknown" && exit 1
- fi
- /usr/lib/tmpfiles.d/airflow.conf:
- content: |
- D /run/airflow 0755 ec2-user ec2-user
- /usr/lib/systemd/system/airflow.service:
- content: |
- [Service]
- EnvironmentFile=/etc/sysconfig/airflow
- User=ec2-user
- Group=ec2-user
- ExecStart=/usr/bin/turbine
- Restart=always
- RestartSec=5s
- KillMode=mixed
- TimeoutStopSec=24h
- [Install]
- WantedBy=multi-user.target
- /usr/lib/systemd/system/watcher.path:
- content: |
- [Unit]
- After=airflow.service
- PartOf=airflow.service
- [Path]
- PathModified=/etc/sysconfig/airflow
- [Install]
- WantedBy=airflow.service
- /usr/lib/systemd/system/watcher.service:
- content: |
- [Service]
- Type=oneshot
- ExecStartPre=/usr/bin/systemctl daemon-reload
- ExecStart=/usr/bin/systemctl restart airflow
- commands:
- setup:
- command: !Sub |
- HAS_DEPLOYMENT=$(aws deploy list-deployments \
- --application-name ${AWS::StackName}-deployment-application \
- --deployment-group ${AWS::StackName}-deployment-group \
- --region ${AWS::Region} | \
- jq '.deployments | has(0)')
-
- systemctl enable airflow.service watcher.path
-
- if [ "$HAS_DEPLOYMENT" = "false" ]; then
- systemctl start airflow
- else
- echo "Deployment pending, deferring service start"
- fi
- lchooks:
- files:
- /usr/bin/lchkill:
- mode: 755
- content: !Sub |
- #!/bin/sh
- INSTANCE_ID=$(ec2-metadata -i | awk '{print $2}')
- TERMINATE_MESSAGE="Terminating EC2 instance <$INSTANCE_ID>"
- TERMINATING=$(aws autoscaling describe-scaling-activities \
- --auto-scaling-group-name '${AWS::StackName}-scaling-group' \
- --max-items 100 \
- --region '${AWS::Region}' | \
- jq --arg TERMINATE_MESSAGE "$TERMINATE_MESSAGE" \
- '.Activities[]
- | select(.Description
- | test($TERMINATE_MESSAGE)) != []')
-
- if [ "$TERMINATING" = "true" ]; then
- systemctl stop airflow
- fi
- /usr/lib/systemd/system/lchkill.timer:
- content: |
- [Timer]
- OnCalendar=*:0/1
- [Install]
- WantedBy=airflow.service
- /usr/lib/systemd/system/lchkill.service:
- content: |
- [Service]
- Type=oneshot
- ExecStart=/usr/bin/lchkill
- /usr/bin/lchbeat:
- mode: 755
- content: !Sub |
- #!/bin/sh
- SERVICE_STATUS=$(systemctl is-active airflow)
-
- if [ "$SERVICE_STATUS" = "deactivating" ]; then
- aws autoscaling record-lifecycle-action-heartbeat \
- --instance-id $(ec2-metadata -i | awk '{print $2}') \
- --lifecycle-hook-name '${AWS::StackName}-scaling-lfhook' \
- --auto-scaling-group-name '${AWS::StackName}-scaling-group' \
- --region '${AWS::Region}'
- fi
- /usr/lib/systemd/system/lchbeat.timer:
- content: |
- [Timer]
- OnCalendar=*:0/1
- [Install]
- WantedBy=airflow.service
- /usr/lib/systemd/system/lchbeat.service:
- content: |
- [Service]
- Type=oneshot
- ExecStart=/usr/bin/lchbeat
- commands:
- setup:
- command: |
- if [ "$TURBINE_MACHINE" = "WORKER" ]; then
- systemctl enable lchkill.timer lchbeat.timer
- fi
- metahup:
- files:
- /etc/cfn/cfn-hup.conf:
- content: !Sub |
- [main]
- stack=${AWS::StackId}
- region=${AWS::Region}
- role=${AirflowRole}
- interval=1
- /etc/cfn/hooks.d/cfn-auto-reloader.conf:
- content: !Sub |
- [cfn-auto-reloader-hook]
- triggers=post.update
- path=Resources.Meta.Metadata.AWS::CloudFormation::Init
- action=/opt/aws/bin/cfn-init -v \
- --region ${AWS::Region} \
- --role ${AirflowRole} \
- --stack ${AWS::StackName} \
- --resource Meta
- runas=root
- /lib/systemd/system/cfn-hup.service:
- content: |
- [Service]
- ExecStart=/opt/aws/bin/cfn-hup
- Restart=always
- [Install]
- WantedBy=multi-user.target
- commands:
- setup:
- command: |
- systemctl enable cfn-hup.service
- systemctl start cfn-hup.service
- cdagent:
- packages:
- yum:
- ruby: []
- wget: []
- commands:
- install:
- command: !Sub |
- wget https://aws-codedeploy-${AWS::Region}.s3.amazonaws.com/latest/install
- chmod +x ./install
- ./install auto
+ - arn:aws:iam::aws:policy/service-role/AWSCodeDeployRole
Outputs:
DeploymentsBucket:
@@ -776,72 +390,3 @@ Outputs:
Value: !Ref CodeDeployApplication
CodeDeployDeploymentGroup:
Value: !Ref CodeDeployDeploymentGroup
-
-Mappings:
- AWSAMIRegionMap:
- ap-northeast-1:
- AMZNLINUX2: ami-00d101850e971728d
- ap-northeast-2:
- AMZNLINUX2: ami-08ab3f7e72215fe91
- ap-south-1:
- AMZNLINUX2: ami-00e782930f1c3dbc7
- ap-southeast-1:
- AMZNLINUX2: ami-0b5a47f8865280111
- ap-southeast-2:
- AMZNLINUX2: ami-0fb7513bcdc525c3b
- ca-central-1:
- AMZNLINUX2: ami-08a9b721ecc5b0a53
- eu-central-1:
- AMZNLINUX2: ami-0ebe657bc328d4e82
- eu-west-1:
- AMZNLINUX2: ami-030dbca661d402413
- eu-west-2:
- AMZNLINUX2: ami-0009a33f033d8b7b6
- eu-west-3:
- AMZNLINUX2: ami-0ebb3a801d5fb8b9b
- sa-east-1:
- AMZNLINUX2: ami-058141e091292ecf0
- us-east-1:
- AMZNLINUX2: ami-0c6b1d09930fac512
- us-east-2:
- AMZNLINUX2: ami-0ebbf2179e615c338
- us-west-1:
- AMZNLINUX2: ami-015954d5e5548d13b
- us-west-2:
- AMZNLINUX2: ami-0cb72367e98845d43
-
-Metadata:
- AWS::CloudFormation::Interface:
- ParameterGroups:
- - Label:
- default: Networking
- Parameters:
- - VPCID
- - PublicSubnet1ID
- - PublicSubnet2ID
- - PrivateSubnet1AID
- - PrivateSubnet2AID
- - AllowedWebBlock
- - WebserverPort
- - Label:
- default: Cluster Settings
- Parameters:
- - SchedulerInstanceType
- - WebserverInstanceType
- - WorkerInstanceType
- - MinGroupSize
- - MaxGroupSize
- - ShrinkThreshold
- - GrowthThreshold
- - DbMasterUsername
- - DbMasterPassword
- - Label:
- default: Airflow Config
- Parameters:
- - LoadExampleDags
- - LoadDefaultConn
- - Label:
- default: Quick Start Overrides
- Parameters:
- - QSS3BucketName
- - QSS3KeyPrefix
diff --git a/templates/turbine-master.template b/templates/turbine-master.template
index a496df1e..76033e49 100644
--- a/templates/turbine-master.template
+++ b/templates/turbine-master.template
@@ -1,148 +1,227 @@
AWSTemplateFormatVersion: 2010-09-09
Description: >-
- The Turbine-Airflow master stack, including a quickstart VPC stack and the
- Turbine-Airflow cluster stack.
+ This template creates a Quick Start VPC stack and a Turbine Airflow cluster
+ stack. The Quick Start VPC stack creates a Multi-AZ VPC infrastructure with
+ two private subnets and managed NAT gateways in the two public subnets. The
+ Turbine Airflow cluster stack creates the Airflow supporting resources
+ including an RDS instance to host the Airflow metadata database, an SQS queue
+ to be used as broker backend, S3 buckets for logs and deployment packages, and
+ then creates the Airflow scheduler, webserver and workers nested stacks. The
+ nested Airflow services stacks create the Airflow instances in highly
+ available auto scaling groups spanning two subnets, plus for the workers stack
+ an EFS shared network directory and a custom cloudwatch load metric function
+ to guide the auto scaling alarm triggers. **WARNING** This template creates
+ AWS resources. You will be billed for the AWS resources used if you create a
+ stack from this template. QS(0027)
+Metadata:
+ AWS::CloudFormation::Interface:
+ ParameterGroups:
+ - Label:
+ default: VPC network configuration
+ Parameters:
+ - VPCCIDR
+ - PublicSubnet1CIDR
+ - PublicSubnet2CIDR
+ - PrivateSubnet1ACIDR
+ - PrivateSubnet2ACIDR
+ - AllowHTTPAccessCIDR
+ - Label:
+ default: Turbine Cluster configuration
+ Parameters:
+ - SchedulerInstanceType
+ - WebserverInstanceType
+ - WorkerInstanceType
+ - MinGroupSize
+ - MaxGroupSize
+ - ShrinkThreshold
+ - GrowthThreshold
+ - Label:
+ default: Apache Airflow configuration
+ Parameters:
+ - LoadExampleDags
+ - LoadDefaultCons
+ - WebServerPort
+ - Label:
+ default: AWS Quick Start configuration
+ Parameters:
+ - QSS3BucketName
+ - QSS3KeyPrefix
+ ParameterLabels:
+ VPCCIDR:
+ default: VPC CIDR
+ PublicSubnet1CIDR:
+ default: Public subnet 1 CIDR
+ PublicSubnet2CIDR:
+ default: Public subnet 2 CIDR
+ PrivateSubnet1ACIDR:
+ default: Private subnet 1 CIDR
+ PrivateSubnet2ACIDR:
+ default: Private subnet 2 CIDR
+ AllowHTTPAccessCIDR:
+ default: Allowed HTTP access CIDR
+ SchedulerInstanceType:
+ default: Scheduler instance type
+ WebserverInstanceType:
+ default: Web server instance type
+ WorkerInstanceType:
+ default: Workers instance type
+ MinGroupSize:
+ default: Minimum group size
+ MaxGroupSize:
+ default: Maximum group size
+ ShrinkThreshold:
+ default: Shrink threshold
+ GrowthThreshold:
+ default: Growth threshold
+ LoadExampleDags:
+ default: Load example DAGs
+ LoadDefaultCons:
+ default: Load default connections
+ WebServerPort:
+ default: Web server port
+ QSS3BucketName:
+ default: Quick Start S3 bucket name
+ QSS3KeyPrefix:
+ default: Quick Start S3 key prefix
Parameters:
- # Networking
VPCCIDR:
- Description: The CIDR block for the VPC
- Type: String
- AllowedPattern: >-
- ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
+ Description: >-
+ CIDR block for the VPC.
+ ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28.
+ AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
Default: 10.0.0.0/16
- PublicSubnet1CIDR:
- Description: The CIDR block for the public Subnet in the first Availability Zone
Type: String
- AllowedPattern: >-
- ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
+ PublicSubnet1CIDR:
+ Description: >-
+ CIDR block for the public subnet 1 located in Availability Zone 1.
+ ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28.
+ AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
Default: 10.0.0.0/24
- PublicSubnet2CIDR:
- Description: The CIDR block for the public Subnet in the second Availability Zone
Type: String
- AllowedPattern: >-
- ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
+ PublicSubnet2CIDR:
+ Description: >-
+ CIDR block for the public subnet 1 located in Availability Zone 1.
+ ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28.
+ AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
Default: 10.0.1.0/24
- PrivateSubnet1ACIDR:
- Description: The CIDR block for the private Subnet in the first Availability Zone
Type: String
- AllowedPattern: >-
- ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
+ PrivateSubnet1ACIDR:
+ Description: >-
+ CIDR block for private subnet 1 located in Availability Zone 1.
+ ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28.
+ AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
Default: 10.0.10.0/24
- PrivateSubnet2ACIDR:
- Description: The CIDR block for the private Subnet in the second Availability Zone
Type: String
- AllowedPattern: >-
- ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
- Default: 10.0.11.0/24
- AllowedWebBlock:
+ PrivateSubnet2ACIDR:
Description: >-
- The IPv4 CIDR block to allow HTTP access in the webserver. The default of
- 0.0.0.0/0 allows HTTP from everywhere, which is convenient but less
- secure.
+ CIDR block for private subnet 2 located in Availability Zone 2.
+ ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28.
+ AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
+ Default: 10.0.11.0/24
Type: String
- AllowedPattern: >-
- ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
- Default: 0.0.0.0/0
- WebserverPort:
+ AllowHTTPAccessCIDR:
Description: >-
- The port Airflow webserver will be listening.
- Type: Number
- Default: 8080
- MinValue: 1024
- MaxValue: 65535
- ConstraintDescription: >-
- Ports below 1024 can be opened only with root privileges and the airflow
- process does not run as such.
+ The IPv4 CIDR block to allow HTTP access in the webserver. Using the same
+ CIDR for the VPC is a secure default. Using 0.0.0.0/0 allows access from
+ everywhere, which is convenient but less secure.
+ ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28.
+ AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
+ Default: 10.0.0.0/16
+ Type: String
- # Cluster Settings
SchedulerInstanceType:
- Description: EC2 instance type to use for the scheduler.
- Type: String
+ Description: >-
+ EC2 instance type to use for the scheduler.
Default: t3.micro
- WebserverInstanceType:
- Description: EC2 instance type to use for the webserver.
Type: String
+ WebserverInstanceType:
+ Description: >-
+ EC2 instance type to use for the webserver.
Default: t3.micro
- WorkerInstanceType:
- Description: EC2 instance type to use for the workers.
Type: String
+ WorkerInstanceType:
+ Description: >-
+ EC2 instance type to use for the workers.
Default: t3.medium
+ Type: String
MinGroupSize:
- Description: The minimum number of active worker instances.
+ Description: >-
+ The minimum number of active worker instances.
+ Default: 0
Type: Number
- Default: 1
MaxGroupSize:
- Description: The maximum number of active worker instances.
- Type: Number
+ Description: >-
+ The maximum number of active worker instances.
Default: 10
+ Type: Number
ShrinkThreshold:
Description: >-
The threshold for the average queue size from which going equal or below
will trigger the AutoScaling group to Scale In, deallocating one worker
instance.
- Type: Number
Default: 0.5
+ Type: Number
GrowthThreshold:
Description: >-
The threshold for the average queue size from which going equal or above
will trigger the AutoScaling group to Scale Out, allocating one worker
instance.
- Type: Number
Default: 0.9
- DbMasterUsername:
- Description: The username to be used in the airflow database.
- Type: String
- Default: airflow
- DbMasterPassword:
- Description: The password to be used in the airflow database.
- Type: String
- NoEcho: true
+ Type: Number
- # Airflow Config
LoadExampleDags:
Description: >-
Load the example DAGs distributed with Airflow. Useful if deploying a
stack for demonstrating a few topologies, operators and scheduling
strategies.
- Type: String
AllowedValues:
- 'False'
- 'True'
Default: 'False'
- LoadDefaultConn:
+ Type: String
+ LoadDefaultCons:
Description: >-
Load the default connections initialized by Airflow. Most consider these
unnecessary, which is why the default is to not load them.
- Type: String
AllowedValues:
- 'False'
- 'True'
Default: 'False'
+ Type: String
+ WebServerPort:
+ Description: >-
+ The port Airflow webserver will be listening.
+ ConstraintDescription: >-
+ Ports below 1024 can be opened only with root privileges and the airflow
+ process does not run as such.
+ MinValue: 1024
+ MaxValue: 65535
+ Default: 8080
+ Type: Number
- # Quick Start Overrides
QSS3BucketName:
Description: >-
S3 bucket name for the Quick Start assets. You can specify your own bucket
providing assets and submodules, if you want to override the Quick Start
behavior for your specific implementation.
- Type: String
- Default: turbine-quickstart
- AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$'
ConstraintDescription: >-
Quick Start bucket name can include numbers, lowercase letters, uppercase
letters, and hyphens (-). It cannot start or end with a hyphen (-).
+ AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$'
+ Default: turbine-quickstart
+ Type: String
QSS3KeyPrefix:
Description: >-
S3 key prefix for the Quick Start assets. You can scpeficy your own
"directory" providing the stack templates, if you want to override the
Quick Start behavior for your specific implementation.
- Type: String
- Default: quickstart-turbine-airflow/
- AllowedPattern: '^[0-9a-zA-Z-/]*$'
ConstraintDescription: >-
Quick Start key prefix can include numbers, lowercase letters, uppercase
letters, hyphens (-), and forward slash (/).
+ AllowedPattern: '^[0-9a-zA-Z-/]*$'
+ Default: quickstart-turbine-airflow/
+ Type: String
Resources:
@@ -185,8 +264,8 @@ Resources:
PublicSubnet2ID: !GetAtt VPCStack.Outputs.PublicSubnet2ID
PrivateSubnet1AID: !GetAtt VPCStack.Outputs.PrivateSubnet1AID
PrivateSubnet2AID: !GetAtt VPCStack.Outputs.PrivateSubnet2AID
- AllowedWebBlock: !Ref AllowedWebBlock
- WebserverPort: !Ref WebserverPort
+ AllowHTTPAccessCIDR: !Ref AllowHTTPAccessCIDR
+ WebServerPort: !Ref WebServerPort
SchedulerInstanceType: !Ref SchedulerInstanceType
WebserverInstanceType: !Ref WebserverInstanceType
WorkerInstanceType: !Ref WorkerInstanceType
@@ -194,10 +273,8 @@ Resources:
MaxGroupSize: !Ref MaxGroupSize
GrowthThreshold: !Ref GrowthThreshold
ShrinkThreshold: !Ref ShrinkThreshold
- DbMasterUsername: !Ref DbMasterUsername
- DbMasterPassword: !Ref DbMasterPassword
LoadExampleDags: !Ref LoadExampleDags
- LoadDefaultConn: !Ref LoadDefaultConn
+ LoadDefaultCons: !Ref LoadDefaultCons
QSS3BucketName: !Ref QSS3BucketName
QSS3KeyPrefix: !Ref QSS3KeyPrefix
@@ -208,39 +285,3 @@ Outputs:
Value: !GetAtt TurbineCluster.Outputs.CodeDeployApplication
CodeDeployDeploymentGroup:
Value: !GetAtt TurbineCluster.Outputs.CodeDeployDeploymentGroup
-
-Metadata:
- AWS::CloudFormation::Interface:
- ParameterGroups:
- - Label:
- default: Networking
- Parameters:
- - VPCCIDR
- - PublicSubnet1CIDR
- - PublicSubnet2CIDR
- - PrivateSubnet1ACIDR
- - PrivateSubnet2ACIDR
- - AllowedWebBlock
- - WebserverPort
- - Label:
- default: Cluster Settings
- Parameters:
- - SchedulerInstanceType
- - WebserverInstanceType
- - WorkerInstanceType
- - MinGroupSize
- - MaxGroupSize
- - ShrinkThreshold
- - GrowthThreshold
- - DbMasterUsername
- - DbMasterPassword
- - Label:
- default: Airflow Config
- Parameters:
- - LoadExampleDags
- - LoadDefaultConn
- - Label:
- default: Quick Start Overrides
- Parameters:
- - QSS3BucketName
- - QSS3KeyPrefix
diff --git a/templates/turbine-scheduler.template b/templates/turbine-scheduler.template
index ee9406cb..9f3afbb1 100644
--- a/templates/turbine-scheduler.template
+++ b/templates/turbine-scheduler.template
@@ -1,55 +1,139 @@
AWSTemplateFormatVersion: 2010-09-09
+Description: >-
+ This template creates the Airflow scheduler instance in a highly available
+ auto scaling group spanning two private subnets. **WARNING** This template
+ creates AWS resources. You will be billed for the AWS resources used if you
+ create a stack from this template. QS(0027)
+Metadata:
+ AWS::CloudFormation::Interface:
+ ParameterGroups:
+ - Label:
+ default: VPC network configuration
+ Parameters:
+ - PrivateSubnet1AID
+ - PrivateSubnet2AID
+ - SecurityGroupID
+ - Label:
+ default: Turbine cluster configuration
+ Parameters:
+ - DatabaseSecret
+ - QueueName
+ - LogsBucket
+ - Label:
+ default: Turbine scheduler configuration
+ Parameters:
+ - InstanceType
+ - Label:
+ default: Apache Airflow configuration
+ Parameters:
+ - LoadExampleDags
+ - LoadDefaultCons
+ - Label:
+ default: AWS Quick Start configuration
+ Parameters:
+ - QSS3BucketName
+ - QSS3KeyPrefix
+ ParameterLabels:
+ PrivateSubnet1AID:
+ default: Private subnet 1 ID
+ PrivateSubnet2AID:
+ default: Private subnet 2 ID
+ SecurityGroupID:
+ default: Security group ID
+ DatabaseSecret:
+ default: Database secret
+ QueueName:
+ default: Queue name
+ LogsBucket:
+ default: Logs bucket
+ InstanceType:
+ default: Scheduler instance type
+ LoadExampleDags:
+ default: Load example DAGs
+ LoadDefaultCons:
+ default: Load default connections
+ QSS3BucketName:
+ default: Quick Start S3 bucket name
+ QSS3KeyPrefix:
+ default: Quick Start S3 key prefix
Parameters:
- VPCID:
- Type: AWS::EC2::VPC::Id
+
PrivateSubnet1AID:
+ Description: An existing private Subnet in some Availability Zone.
Type: AWS::EC2::Subnet::Id
PrivateSubnet2AID:
+ Description: An existing private Subnet in another Availability Zone.
Type: AWS::EC2::Subnet::Id
- InstancesSecurityGroup:
+ SecurityGroupID:
+ Description: >-
+ Security Group ID of an externally managed security group that gives
+ instances access to relevant external resources like the metadata database
+ endpoints in the two provided subnets.
Type: AWS::EC2::SecurityGroup::Id
- IamInstanceProfile:
+
+ DatabaseSecret:
+ Description: >-
+ The AWS SecretsManager Secret resource name (ARN) of the secure secret
+ storing the metadata database connection credentials.
Type: String
- IamRole:
+ QueueName:
+ Description: >-
+ Name of the queue to be used as message broker between the scheduler and
+ worker instances.
Type: String
- ImageId:
- Type: AWS::EC2::Image::Id
- InstanceType:
+ LogsBucket:
+ Description: >-
+ Name of the bucket where task logs are remotely stored.
Type: String
- SharedCloudInitStack:
+
+ InstanceType:
+ Description: EC2 instance type to use for the scheduler.
Type: String
-Resources:
+ LoadExampleDags:
+ Description: >-
+ Load the example DAGs distributed with Airflow. Useful if deploying a
+ stack for demonstrating a few topologies, operators and scheduling
+ strategies.
+ AllowedValues:
+ - 'False'
+ - 'True'
+ Default: 'False'
+ Type: String
+ LoadDefaultCons:
+ Description: >-
+ Load the default connections initialized by Airflow. Most consider these
+ unnecessary, which is why the default is to not load them.
+ AllowedValues:
+ - 'False'
+ - 'True'
+ Default: 'False'
+ Type: String
- LaunchConfiguration:
- Type: AWS::AutoScaling::LaunchConfiguration
- Properties:
- IamInstanceProfile: !Ref IamInstanceProfile
- ImageId: !Ref ImageId
- InstanceType: !Ref InstanceType
- SecurityGroups:
- - !Ref InstancesSecurityGroup
- - !Ref SchedulerSecurityGroup
- UserData:
- Fn::Base64: !Sub |
- #!/bin/bash -xe
- echo 'TURBINE_MACHINE=SCHEDULER' > /etc/environment
- /opt/aws/bin/cfn-init -v \
- --region ${AWS::Region} \
- --role ${IamRole} \
- --stack ${SharedCloudInitStack} \
- --resource SharedCloudInitMetadata
+ QSS3BucketName:
+ Description: >-
+ S3 bucket name for the Quick Start assets. You can specify your own bucket
+ providing assets and submodules, if you want to override the Quick Start
+ behavior for your specific implementation.
+ ConstraintDescription: >-
+ Quick Start bucket name can include numbers, lowercase letters, uppercase
+ letters, and hyphens (-). It cannot start or end with a hyphen (-).
+ AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$'
+ Default: turbine-quickstart
+ Type: String
+ QSS3KeyPrefix:
+ Description: >-
+ S3 key prefix for the Quick Start assets. You can scpeficy your own
+ "directory" providing the stack templates, if you want to override the
+ Quick Start behavior for your specific implementation.
+ ConstraintDescription: >-
+ Quick Start key prefix can include numbers, lowercase letters, uppercase
+ letters, hyphens (-), and forward slash (/).
+ AllowedPattern: '^[0-9a-zA-Z-/]*$'
+ Default: quickstart-turbine-airflow/
+ Type: String
- SchedulerSecurityGroup:
- Type: AWS::EC2::SecurityGroup
- Properties:
- GroupDescription: >-
- The security group used by the Airflow scheduler instance. Not initially
- useful but included to facilitate narrowing custom rules.
- VpcId: !Ref VPCID
- Tags:
- - Key: Name
- Value: turbine-scheduler-sg
+Resources:
AutoScalingGroup:
Type: AWS::AutoScaling::AutoScalingGroup
@@ -67,6 +151,146 @@ Resources:
Value: turbine-scheduler
PropagateAtLaunch: true
+ LaunchConfiguration:
+ Type: AWS::AutoScaling::LaunchConfiguration
+ Properties:
+ IamInstanceProfile: !Ref IamInstanceProfile
+ ImageId: !FindInMap
+ - AWSAMIRegionMap
+ - !Ref AWS::Region
+ - AMZNLINUX2
+ InstanceType: !Ref InstanceType
+ SecurityGroups:
+ - !Ref SecurityGroupID
+ UserData:
+ Fn::Base64: !Sub |
+ #!/bin/bash -xe
+ /opt/aws/bin/cfn-init -v \
+ --stack ${AWS::StackName} \
+ --resource LaunchConfiguration
+ /opt/aws/bin/cfn-signal -e $?
+ --stack ${AWS::StackName} \
+ --resource LaunchConfiguration
+ Metadata:
+ AWS::CloudFormation::Init:
+ config:
+ commands:
+ setup:
+ command: !Sub |
+ export AWS_STACK_NAME="${AWS::StackName}"
+ export LOGS_BUCKET="${LogsBucket}"
+ export QUEUE_NAME="${QueueName}"
+ export DB_SECRETS_ARN="${DatabaseSecret}"
+ export LOAD_EXAMPLES="${LoadExampleDags}"
+ export LOAD_DEFAULTS="${LoadDefaultCons}"
+ aws s3 sync s3://${QSS3BucketName}/${QSS3KeyPrefix}scripts /opt/turbine
+ chmod +x /opt/turbine/scheduler.setup.sh
+ /opt/turbine/scheduler.setup.sh
+
+ IamInstanceProfile:
+ Type: AWS::IAM::InstanceProfile
+ Properties:
+ Roles:
+ - !Ref IamRole
+
+ IamRole:
+ Type: AWS::IAM::Role
+ Properties:
+ AssumeRolePolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Principal:
+ Service:
+ - ec2.amazonaws.com
+ Action:
+ - sts:AssumeRole
+ ManagedPolicyArns:
+ - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM
+ Policies:
+ - PolicyName: !Sub TurbineAirflowSchedulerDescribeStackPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - cloudformation:DescribeStackResource
+ Resource: !Join
+ - ':'
+ - - arn:aws:cloudformation
+ - !Ref AWS::Region
+ - !Ref AWS::AccountId
+ - !Sub stack/${AWS::StackName}/*
+ - PolicyName: !Sub TurbineAirflowSchedulerGetSecretPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - secretsmanager:GetSecretValue
+ Resource: !Ref DatabaseSecret
+ - PolicyName: !Sub TurbineAirflowSchedulerQueueRWPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - sqs:ListQueues
+ Resource:
+ - !Sub arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:*
+ - Effect: Allow
+ Action:
+ - sqs:ChangeMessageVisibility
+ - sqs:DeleteMessage
+ - sqs:GetQueueAttributes
+ - sqs:GetQueueUrl
+ - sqs:ReceiveMessage
+ - sqs:SendMessage
+ Resource: !Sub arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:${QueueName}
+ - PolicyName: !Sub TurbineAirflowSchedulerLogsRWPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - s3:GetObject
+ - s3:PutObject
+ - s3:DeleteObject
+ Resource: !Sub arn:aws:s3:::${LogsBucket}/*
+
Outputs:
AutoScalingGroup:
Value: !Ref AutoScalingGroup
+
+Mappings:
+ AWSAMIRegionMap:
+ ap-northeast-1:
+ AMZNLINUX2: ami-052652af12b58691f
+ ap-northeast-2:
+ AMZNLINUX2: ami-0db78afd3d150fc18
+ ap-south-1:
+ AMZNLINUX2: ami-03b5297d565ef30a6
+ ap-southeast-1:
+ AMZNLINUX2: ami-0cbc6aae997c6538a
+ ap-southeast-2:
+ AMZNLINUX2: ami-08fdde86b93accf1c
+ ca-central-1:
+ AMZNLINUX2: ami-0bf54ac1b628cf143
+ eu-central-1:
+ AMZNLINUX2: ami-0ec1ba09723e5bfac
+ eu-west-1:
+ AMZNLINUX2: ami-04d5cc9b88f9d1d39
+ eu-west-2:
+ AMZNLINUX2: ami-0cb790308f7591fa6
+ eu-west-3:
+ AMZNLINUX2: ami-07eda9385feb1e969
+ sa-east-1:
+ AMZNLINUX2: ami-0b032e878a66c3b68
+ us-east-1:
+ AMZNLINUX2: ami-0fc61db8544a617ed
+ us-east-2:
+ AMZNLINUX2: ami-0e01ce4ee18447327
+ us-west-1:
+ AMZNLINUX2: ami-09a7fe78668f1e2c0
+ us-west-2:
+ AMZNLINUX2: ami-0ce21b51cb31a48b8
diff --git a/templates/turbine-webserver.template b/templates/turbine-webserver.template
index 75687f8b..a8a0bce8 100644
--- a/templates/turbine-webserver.template
+++ b/templates/turbine-webserver.template
@@ -1,52 +1,210 @@
AWSTemplateFormatVersion: 2010-09-09
+Description: >-
+ This template creates the Airflow web server instance in a highly available
+ auto scaling group spanning two public subnets. **WARNING** This template
+ creates AWS resources. You will be billed for the AWS resources used if you
+ create a stack from this template. QS(0027)
+Metadata:
+ AWS::CloudFormation::Interface:
+ ParameterGroups:
+ - Label:
+ default: VPC network configuration
+ Parameters:
+ - VPCID
+ - PublicSubnet1ID
+ - PublicSubnet2ID
+ - SecurityGroupID
+ - AllowHTTPAccessCIDR
+ - Label:
+ default: Turbine Cluster configuration
+ Parameters:
+ - DatabaseSecret
+ - QueueName
+ - LogsBucket
+ - Label:
+ default: Turbine webserver configuration
+ Parameters:
+ - InstanceType
+ - Label:
+ default: Apache Airflow configuration
+ Parameters:
+ - LoadExampleDags
+ - LoadDefaultCons
+ - WebServerPort
+ - Label:
+ default: AWS Quick Start configuration
+ Parameters:
+ - QSS3BucketName
+ - QSS3KeyPrefix
+ ParameterLabels:
+ VPCID:
+ default: VPC ID
+ PublicSubnet1ID:
+ default: Public subnet 1 ID
+ PublicSubnet2ID:
+ default: Public subnet 2 ID
+ SecurityGroupID:
+ default: Security group ID
+ AllowHTTPAccessCIDR:
+ default: Allowed HTTP access CIDR
+ DatabaseSecret:
+ default: Database secret
+ QueueName:
+ default: Queue name
+ LogsBucket:
+ default: Logs bucket
+ InstanceType:
+ default: Workers instance type
+ LoadExampleDags:
+ default: Load example DAGs
+ LoadDefaultCons:
+ default: Load default connections
+ WebServerPort:
+ default: Web server port
+ QSS3BucketName:
+ default: Quick Start S3 bucket name
+ QSS3KeyPrefix:
+ default: Quick Start S3 key prefix
Parameters:
+
VPCID:
+ Description: An existing VPC for the cluster.
Type: AWS::EC2::VPC::Id
PublicSubnet1ID:
+ Description: An existing public Subnet in some Availability Zone.
Type: AWS::EC2::Subnet::Id
PublicSubnet2ID:
+ Description: An existing public Subnet in another Availability Zone.
Type: AWS::EC2::Subnet::Id
- InstancesSecurityGroup:
+ SecurityGroupID:
+ Description: >-
+ Security Group ID of an externally managed security group that gives
+ instances access to relevant external resources like the metadata database
+ endpoints in the two provided subnets.
Type: AWS::EC2::SecurityGroup::Id
- IngressCIDR:
+ AllowHTTPAccessCIDR:
+ Description: >-
+ The IPv4 CIDR block to allow HTTP access in the webserver. Using the same
+ CIDR for the VPC is a secure default. Using 0.0.0.0/0 allows access from
+ everywhere, which is convenient but less secure.
+ ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28.
+ AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
+ Default: 10.0.0.0/16
Type: String
- AllowedPattern: >-
- ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$
- IngressPort:
- Type: Number
- IamInstanceProfile:
+
+ DatabaseSecret:
+ Description: >-
+ The AWS SecretsManager Secret resource name (ARN) of the secure secret
+ storing the metadata database connection credentials.
Type: String
- IamRole:
+ QueueName:
+ Description: >-
+ Name of the queue to be used as message broker between the scheduler and
+ worker instances.
Type: String
- ImageId:
- Type: AWS::EC2::Image::Id
+ LogsBucket:
+ Description: >-
+ Name of the bucket where task logs are remotely stored.
+ Type: String
+
InstanceType:
+ Description: >-
+ EC2 instance type to use for the scheduler.
Type: String
- SharedCloudInitStack:
+
+ LoadExampleDags:
+ Description: >-
+ Load the example DAGs distributed with Airflow. Useful if deploying a
+ stack for demonstrating a few topologies, operators and scheduling
+ strategies.
+ AllowedValues:
+ - 'False'
+ - 'True'
+ Default: 'False'
Type: String
+ LoadDefaultCons:
+ Description: >-
+ Load the default connections initialized by Airflow. Most consider these
+ unnecessary, which is why the default is to not load them.
+ AllowedValues:
+ - 'False'
+ - 'True'
+ Default: 'False'
+ Type: String
+ WebServerPort:
+ Description: >-
+ The port Airflow webserver will be listening.
+ ConstraintDescription: >-
+ Ports below 1024 can be opened only with root privileges and the airflow
+ process does not run as such.
+ MinValue: 1024
+ MaxValue: 65535
+ Default: 8080
+ Type: Number
+ QSS3BucketName:
+ Description: >-
+ S3 bucket name for the Quick Start assets. You can specify your own bucket
+ providing assets and submodules, if you want to override the Quick Start
+ behavior for your specific implementation.
+ ConstraintDescription: >-
+ Quick Start bucket name can include numbers, lowercase letters, uppercase
+ letters, and hyphens (-). It cannot start or end with a hyphen (-).
+ AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$'
+ Default: turbine-quickstart
+ Type: String
+ QSS3KeyPrefix:
+ Description: >-
+ S3 key prefix for the Quick Start assets. You can scpeficy your own
+ "directory" providing the stack templates, if you want to override the
+ Quick Start behavior for your specific implementation.
+ ConstraintDescription: >-
+ Quick Start key prefix can include numbers, lowercase letters, uppercase
+ letters, hyphens (-), and forward slash (/).
+ AllowedPattern: '^[0-9a-zA-Z-/]*$'
+ Default: quickstart-turbine-airflow/
+ Type: String
Resources:
LaunchConfiguration:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
IamInstanceProfile: !Ref IamInstanceProfile
- ImageId: !Ref ImageId
+ ImageId: !FindInMap
+ - AWSAMIRegionMap
+ - !Ref AWS::Region
+ - AMZNLINUX2
InstanceType: !Ref InstanceType
SecurityGroups:
- - !Ref InstancesSecurityGroup
- - !Ref WebserverSecurityGroup
+ - !Ref SecurityGroup
+ - !Ref SecurityGroupID
UserData:
Fn::Base64: !Sub |
#!/bin/bash -xe
- echo 'TURBINE_MACHINE=WEBSERVER' > /etc/environment
/opt/aws/bin/cfn-init -v \
- --region ${AWS::Region} \
- --role ${IamRole} \
- --stack ${SharedCloudInitStack} \
- --resource SharedCloudInitMetadata
+ --stack ${AWS::StackName} \
+ --resource LaunchConfiguration
+ /opt/aws/bin/cfn-signal -e $?
+ --stack ${AWS::StackName} \
+ --resource LaunchConfiguration
+ Metadata:
+ AWS::CloudFormation::Init:
+ config:
+ commands:
+ setup:
+ command: !Sub |
+ export AWS_STACK_NAME="${AWS::StackName}"
+ export LOGS_BUCKET="${LogsBucket}"
+ export QUEUE_NAME="${QueueName}"
+ export DB_SECRETS_ARN="${DatabaseSecret}"
+ export LOAD_EXAMPLES="${LoadExampleDags}"
+ export LOAD_DEFAULTS="${LoadDefaultCons}"
+ export WEB_SERVER_PORT="${WebServerPort}"
+ aws s3 sync s3://${QSS3BucketName}/${QSS3KeyPrefix}scripts /opt/turbine
+ chmod +x /opt/turbine/webserver.setup.sh
+ /opt/turbine/webserver.setup.sh
- WebserverSecurityGroup:
+ SecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: >-
@@ -54,10 +212,10 @@ Resources:
the specified port for web access in from a given ip range.
VpcId: !Ref VPCID
SecurityGroupIngress:
- - CidrIp: !Ref IngressCIDR
+ - CidrIp: !Ref AllowHTTPAccessCIDR
IpProtocol: TCP
- FromPort: !Ref IngressPort
- ToPort: !Ref IngressPort
+ FromPort: !Ref WebServerPort
+ ToPort: !Ref WebServerPort
Tags:
- Key: Name
Value: turbine-webserver-sg
@@ -78,6 +236,96 @@ Resources:
Value: turbine-webserver
PropagateAtLaunch: true
+ IamInstanceProfile:
+ Type: AWS::IAM::InstanceProfile
+ Properties:
+ Roles:
+ - !Ref IamRole
+
+ IamRole:
+ Type: AWS::IAM::Role
+ Properties:
+ AssumeRolePolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Principal:
+ Service:
+ - ec2.amazonaws.com
+ Action:
+ - sts:AssumeRole
+ ManagedPolicyArns:
+ - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM
+ Policies:
+ - PolicyName: !Sub TurbineAirflowWebserverDescribeStackPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - cloudformation:DescribeStackResource
+ Resource: !Join
+ - ':'
+ - - arn:aws:cloudformation
+ - !Ref AWS::Region
+ - !Ref AWS::AccountId
+ - !Sub stack/${AWS::StackName}/*
+ - PolicyName: !Sub TurbineAirflowWebserverGetSecretPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - secretsmanager:GetSecretValue
+ Resource: !Ref DatabaseSecret
+ - PolicyName: !Sub TurbineAirflowWebserverLogsRWPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - s3:GetObject
+ - s3:PutObject
+ - s3:DeleteObject
+ Resource: !Sub arn:aws:s3:::${LogsBucket}/*
+
Outputs:
AutoScalingGroup:
Value: !Ref AutoScalingGroup
+ IamRole:
+ Value: !Ref IamRole
+ SecurityGroup:
+ Value: !Ref SecurityGroup
+
+Mappings:
+ AWSAMIRegionMap:
+ ap-northeast-1:
+ AMZNLINUX2: ami-052652af12b58691f
+ ap-northeast-2:
+ AMZNLINUX2: ami-0db78afd3d150fc18
+ ap-south-1:
+ AMZNLINUX2: ami-03b5297d565ef30a6
+ ap-southeast-1:
+ AMZNLINUX2: ami-0cbc6aae997c6538a
+ ap-southeast-2:
+ AMZNLINUX2: ami-08fdde86b93accf1c
+ ca-central-1:
+ AMZNLINUX2: ami-0bf54ac1b628cf143
+ eu-central-1:
+ AMZNLINUX2: ami-0ec1ba09723e5bfac
+ eu-west-1:
+ AMZNLINUX2: ami-04d5cc9b88f9d1d39
+ eu-west-2:
+ AMZNLINUX2: ami-0cb790308f7591fa6
+ eu-west-3:
+ AMZNLINUX2: ami-07eda9385feb1e969
+ sa-east-1:
+ AMZNLINUX2: ami-0b032e878a66c3b68
+ us-east-1:
+ AMZNLINUX2: ami-0fc61db8544a617ed
+ us-east-2:
+ AMZNLINUX2: ami-0e01ce4ee18447327
+ us-west-1:
+ AMZNLINUX2: ami-09a7fe78668f1e2c0
+ us-west-2:
+ AMZNLINUX2: ami-0ce21b51cb31a48b8
diff --git a/templates/turbine-workerset.template b/templates/turbine-workerset.template
index 8958a811..d2fb6d54 100644
--- a/templates/turbine-workerset.template
+++ b/templates/turbine-workerset.template
@@ -1,57 +1,180 @@
AWSTemplateFormatVersion: 2010-09-09
+Description: >-
+ This template creates the Airflow worker instances in a highly available auto
+ scaling group spanning two private subnets, plus an EFS to work as shared
+ network directory and a custom cloudwatch load metric function to guide the
+ auto scaling alarm triggers. **WARNING** This template creates AWS resources.
+ You will be billed for the AWS resources used if you create a stack from this
+ template. QS(0027)
+Metadata:
+ AWS::CloudFormation::Interface:
+ ParameterGroups:
+ - Label:
+ default: VPC network configuration
+ Parameters:
+ - VPCID
+ - PrivateSubnet1AID
+ - PrivateSubnet2AID
+ - SecurityGroupID
+ - Label:
+ default: Turbine cluster configuration
+ Parameters:
+ - DatabaseSecret
+ - QueueName
+ - LogsBucket
+ - Label:
+ default: Turbine workerset configuration
+ Parameters:
+ - InstanceType
+ - MinGroupSize
+ - MaxGroupSize
+ - ShrinkThreshold
+ - GrowthThreshold
+ - Label:
+ default: Apache Airflow configuration
+ Parameters:
+ - LoadExampleDags
+ - LoadDefaultCons
+ - Label:
+ default: AWS Quick Start configuration
+ Parameters:
+ - QSS3BucketName
+ - QSS3KeyPrefix
+ ParameterLabels:
+ VPCID:
+ default: VPC ID
+ PrivateSubnet1AID:
+ default: Private subnet 1 ID
+ PrivateSubnet2AID:
+ default: Private subnet 2 ID
+ SecurityGroupID:
+ default: Security group ID
+ DatabaseSecret:
+ default: Database secret
+ QueueName:
+ default: Queue name
+ LogsBucket:
+ default: Logs bucket
+ InstanceType:
+ default: Scheduler instance type
+ MinGroupSize:
+ default: Minimum group size
+ MaxGroupSize:
+ default: Maximum group size
+ ShrinkThreshold:
+ default: Shrink threshold
+ GrowthThreshold:
+ default: Growth threshold
+ LoadExampleDags:
+ default: Load example DAGs
+ LoadDefaultCons:
+ default: Load default connections
+ QSS3BucketName:
+ default: Quick Start S3 bucket name
+ QSS3KeyPrefix:
+ default: Quick Start S3 key prefix
Parameters:
+
VPCID:
+ Description: An existing VPC for the cluster.
Type: AWS::EC2::VPC::Id
PrivateSubnet1AID:
+ Description: An existing private Subnet in some Availability Zone.
Type: AWS::EC2::Subnet::Id
PrivateSubnet2AID:
+ Description: An existing private Subnet in another Availability Zone.
Type: AWS::EC2::Subnet::Id
- InstancesSecurityGroup:
+ SecurityGroupID:
+ Description: >-
+ Security Group ID of an externally managed security group that gives
+ instances access to relevant external resources like the metadata database
+ endpoints in the two provided subnets.
Type: AWS::EC2::SecurityGroup::Id
- IamInstanceProfile:
+
+ DatabaseSecret:
+ Description: >-
+ The AWS SecretsManager Secret resource name (ARN) of the secure secret
+ storing the metadata database connection credentials.
Type: String
- IamRole:
+ QueueName:
+ Description: >-
+ Name of the queue to be used as message broker between the scheduler and
+ worker instances.
Type: String
- ImageId:
- Type: AWS::EC2::Image::Id
+ LogsBucket:
+ Description: >-
+ Name of the bucket where task logs are remotely stored.
+ Type: String
+
InstanceType:
+ Description: >-
+ EC2 instance type to use for the scheduler.
Type: String
- MinSize:
+ MinGroupSize:
+ Description: The minimum number of active worker instances.
+ Default: 0
Type: Number
- MaxSize:
+ MaxGroupSize:
+ Description: The maximum number of active worker instances.
+ Default: 10
Type: Number
ShrinkThreshold:
+ Description: >-
+ The threshold for the average queue size from which going equal or below
+ will trigger the AutoScaling group to Scale In, deallocating one worker
+ instance.
+ Default: 0.5
Type: Number
GrowthThreshold:
+ Description: >-
+ The threshold for the average queue size from which going equal or above
+ will trigger the AutoScaling group to Scale Out, allocating one worker
+ instance.
+ Default: 0.9
Type: Number
- QueueName:
+
+ LoadExampleDags:
+ Description: >-
+ Load the example DAGs distributed with Airflow. Useful if deploying a
+ stack for demonstrating a few topologies, operators and scheduling
+ strategies.
+ AllowedValues:
+ - 'False'
+ - 'True'
+ Default: 'False'
Type: String
- SharedCloudInitStack:
+ LoadDefaultCons:
+ Description: >-
+ Load the default connections initialized by Airflow. Most consider these
+ unnecessary, which is why the default is to not load them.
+ AllowedValues:
+ - 'False'
+ - 'True'
+ Default: 'False'
Type: String
- # Quick Start Overrides
QSS3BucketName:
Description: >-
S3 bucket name for the Quick Start assets. You can specify your own bucket
providing assets and submodules, if you want to override the Quick Start
behavior for your specific implementation.
- Type: String
- Default: turbine-quickstart
- AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$'
ConstraintDescription: >-
Quick Start bucket name can include numbers, lowercase letters, uppercase
letters, and hyphens (-). It cannot start or end with a hyphen (-).
+ AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$'
+ Default: turbine-quickstart
+ Type: String
QSS3KeyPrefix:
Description: >-
S3 key prefix for the Quick Start assets. You can scpeficy your own
"directory" providing the stack templates, if you want to override the
Quick Start behavior for your specific implementation.
- Type: String
- Default: quickstart-turbine-airflow/
- AllowedPattern: '^[0-9a-zA-Z-/]*$'
ConstraintDescription: >-
Quick Start key prefix can include numbers, lowercase letters, uppercase
letters, hyphens (-), and forward slash (/).
+ AllowedPattern: '^[0-9a-zA-Z-/]*$'
+ Default: quickstart-turbine-airflow/
+ Type: String
Resources:
@@ -59,22 +182,41 @@ Resources:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
IamInstanceProfile: !Ref IamInstanceProfile
- ImageId: !Ref ImageId
+ ImageId: !FindInMap
+ - AWSAMIRegionMap
+ - !Ref AWS::Region
+ - AMZNLINUX2
InstanceType: !Ref InstanceType
SecurityGroups:
- - !Ref InstancesSecurityGroup
- - !Ref WorkersetSecurityGroup
+ - !Ref SecurityGroup
+ - !Ref SecurityGroupID
UserData:
Fn::Base64: !Sub |
#!/bin/bash -xe
- echo 'TURBINE_MACHINE=WORKER' > /etc/environment
/opt/aws/bin/cfn-init -v \
- --region ${AWS::Region} \
- --role ${IamRole} \
- --stack ${SharedCloudInitStack} \
- --resource SharedCloudInitMetadata
+ --stack ${AWS::StackName} \
+ --resource LaunchConfiguration
+ /opt/aws/bin/cfn-signal -e $?
+ --stack ${AWS::StackName} \
+ --resource LaunchConfiguration
+ Metadata:
+ AWS::CloudFormation::Init:
+ config:
+ commands:
+ setup:
+ command: !Sub |
+ export AWS_STACK_NAME="${AWS::StackName}"
+ export QUEUE_NAME="${QueueName}"
+ export LOGS_BUCKET="${LogsBucket}"
+ export FILE_SYSTEM_ID="${FileSystem}"
+ export DB_SECRETS_ARN="${DatabaseSecret}"
+ export LOAD_EXAMPLES="${LoadExampleDags}"
+ export LOAD_DEFAULTS="${LoadDefaultCons}"
+ aws s3 sync s3://${QSS3BucketName}/${QSS3KeyPrefix}scripts /opt/turbine
+ chmod +x /opt/turbine/workerset.setup.sh
+ /opt/turbine/workerset.setup.sh
- WorkersetSecurityGroup:
+ SecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: >-
@@ -82,7 +224,7 @@ Resources:
a special port where the Airflow webserver can fetch logs directly.
VpcId: !Ref VPCID
SecurityGroupIngress:
- - SourceSecurityGroupId: !Ref InstancesSecurityGroup
+ - SourceSecurityGroupId: !Ref SecurityGroupID
IpProtocol: TCP
FromPort: 8793
ToPort: 8793
@@ -94,8 +236,8 @@ Resources:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
LaunchConfigurationName: !Ref LaunchConfiguration
- MaxSize: !Ref MaxSize
- MinSize: !Ref MinSize
+ MaxSize: !Ref MaxGroupSize
+ MinSize: !Ref MinGroupSize
MetricsCollection:
- Granularity: 1Minute
VPCZoneIdentifier:
@@ -106,6 +248,116 @@ Resources:
Value: turbine-worker
PropagateAtLaunch: true
+ IamInstanceProfile:
+ Type: AWS::IAM::InstanceProfile
+ Properties:
+ Roles:
+ - !Ref IamRole
+
+ IamRole:
+ Type: AWS::IAM::Role
+ Properties:
+ AssumeRolePolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Principal:
+ Service:
+ - ec2.amazonaws.com
+ Action:
+ - sts:AssumeRole
+ ManagedPolicyArns:
+ - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM
+ Policies:
+ - PolicyName: !Sub TurbineAirflowWorkersetDescribeStackPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - cloudformation:DescribeStackResource
+ Resource: !Join
+ - ':'
+ - - arn:aws:cloudformation
+ - !Ref AWS::Region
+ - !Ref AWS::AccountId
+ - !Sub stack/${AWS::StackName}/*
+ - PolicyName: !Sub TurbineAirflowWorkersetGetSecretPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - secretsmanager:GetSecretValue
+ Resource: !Ref DatabaseSecret
+ - PolicyName: !Sub TurbineAirflowWorkersetQueueRWPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - sqs:ListQueues
+ Resource:
+ - !Sub arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:*
+ - Effect: Allow
+ Action:
+ - sqs:ChangeMessageVisibility
+ - sqs:DeleteMessage
+ - sqs:GetQueueAttributes
+ - sqs:GetQueueUrl
+ - sqs:ReceiveMessage
+ - sqs:SendMessage
+ Resource: !Sub arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:${QueueName}
+ - PolicyName: !Sub TurbineAirflowWorkersetLogsRWPolicy-${AWS::StackName}
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - s3:GetObject
+ - s3:PutObject
+ - s3:DeleteObject
+ Resource: !Sub arn:aws:s3:::${LogsBucket}/*
+
+ FileSystem:
+ Type: AWS::EFS::FileSystem
+ Properties:
+ FileSystemTags:
+ - Key: Name
+ Value: !Sub ${AWS::StackName}-filesystem
+
+ MountTarget1A:
+ Type: AWS::EFS::MountTarget
+ Properties:
+ FileSystemId: !Ref FileSystem
+ SubnetId: !Ref PrivateSubnet1AID
+ SecurityGroups:
+ - !Ref EfsMountSecurityGroup
+
+ MountTarget2A:
+ Type: AWS::EFS::MountTarget
+ Properties:
+ FileSystemId: !Ref FileSystem
+ SubnetId: !Ref PrivateSubnet2AID
+ SecurityGroups:
+ - !Ref EfsMountSecurityGroup
+
+ EfsMountSecurityGroup:
+ Type: AWS::EC2::SecurityGroup
+ Properties:
+ GroupDescription: >-
+ Security Rules with permissions for the shared filesystem across Airflow
+ instances.
+ SecurityGroupIngress:
+ - SourceSecurityGroupId: !Ref SecurityGroup
+ IpProtocol: TCP
+ FromPort: 2049
+ ToPort: 2049
+ VpcId: !Ref VPCID
+ Tags:
+ - Key: Name
+ Value: EfsMountSecurityGroup
+
CloudWatchMetricLambda:
Type: AWS::Lambda::Function
Properties:
@@ -223,5 +475,41 @@ Resources:
Outputs:
AutoScalingGroup:
- Description: The workers autoscaling group
Value: !Ref AutoScalingGroup
+ IamRole:
+ Value: !Ref IamRole
+ SecurityGroup:
+ Value: !Ref SecurityGroup
+
+Mappings:
+ AWSAMIRegionMap:
+ ap-northeast-1:
+ AMZNLINUX2: ami-052652af12b58691f
+ ap-northeast-2:
+ AMZNLINUX2: ami-0db78afd3d150fc18
+ ap-south-1:
+ AMZNLINUX2: ami-03b5297d565ef30a6
+ ap-southeast-1:
+ AMZNLINUX2: ami-0cbc6aae997c6538a
+ ap-southeast-2:
+ AMZNLINUX2: ami-08fdde86b93accf1c
+ ca-central-1:
+ AMZNLINUX2: ami-0bf54ac1b628cf143
+ eu-central-1:
+ AMZNLINUX2: ami-0ec1ba09723e5bfac
+ eu-west-1:
+ AMZNLINUX2: ami-04d5cc9b88f9d1d39
+ eu-west-2:
+ AMZNLINUX2: ami-0cb790308f7591fa6
+ eu-west-3:
+ AMZNLINUX2: ami-07eda9385feb1e969
+ sa-east-1:
+ AMZNLINUX2: ami-0b032e878a66c3b68
+ us-east-1:
+ AMZNLINUX2: ami-0fc61db8544a617ed
+ us-east-2:
+ AMZNLINUX2: ami-0e01ce4ee18447327
+ us-west-1:
+ AMZNLINUX2: ami-09a7fe78668f1e2c0
+ us-west-2:
+ AMZNLINUX2: ami-0ce21b51cb31a48b8
diff --git a/test/templates.py b/test/templates.py
new file mode 100644
index 00000000..c51c7a58
--- /dev/null
+++ b/test/templates.py
@@ -0,0 +1,14 @@
+from cfn_tools import load_yaml
+
+with open("./templates/turbine-master.template") as f:
+ MASTER = load_yaml(f.read())
+with open("./templates/turbine-cluster.template") as f:
+ CLUSTER = load_yaml(f.read())
+with open("./templates/turbine-scheduler.template") as f:
+ SCHEDULER = load_yaml(f.read())
+with open("./templates/turbine-webserver.template") as f:
+ WEBSERVER = load_yaml(f.read())
+with open("./templates/turbine-workerset.template") as f:
+ WORKERSET = load_yaml(f.read())
+
+ALL = [MASTER, CLUSTER, SCHEDULER, WEBSERVER, WORKERSET]
diff --git a/test/test_quickstart_guidelines.py b/test/test_quickstart_guidelines.py
new file mode 100644
index 00000000..fe2a3815
--- /dev/null
+++ b/test/test_quickstart_guidelines.py
@@ -0,0 +1,34 @@
+import re
+from templates import ALL
+
+
+def test_if_vpc_configuration_comes_first():
+ for template in ALL:
+ interface = template["Metadata"]["AWS::CloudFormation::Interface"]
+ groups = [group["Label"]["default"] for group in interface["ParameterGroups"]]
+ assert "VPC" in groups[0]
+
+
+def test_if_quickstart_configuration_comes_first():
+ for template in ALL:
+ interface = template["Metadata"]["AWS::CloudFormation::Interface"]
+ groups = [group["Label"]["default"] for group in interface["ParameterGroups"]]
+ assert "Quick Start" in groups[-1]
+
+
+def test_if_parameters_are_pascal_case():
+ for template in ALL:
+ params = list(template["Parameters"].keys())
+ for param in params:
+ assert param[0] == param[0].upper()
+
+
+def test_if_labels_include_punctuation():
+ for template in ALL:
+ interface = template["Metadata"]["AWS::CloudFormation::Interface"]
+ labels = list(interface.keys())
+ for label in labels:
+ assert re.match(r"[a-zA-Z0-9]", label)
+
+
+# TODO: continue implementing https://aws-quickstart.github.io/naming-parms.html
diff --git a/test/test_template_descriptions.py b/test/test_template_descriptions.py
new file mode 100644
index 00000000..ebf74d7f
--- /dev/null
+++ b/test/test_template_descriptions.py
@@ -0,0 +1,14 @@
+import re
+from templates import MASTER, CLUSTER
+
+
+def strip_warning(description):
+ return re.sub(r"\*\*WARNING\*\*.*QS\(0027\)", "", description)
+
+
+def test_nesting_consistency():
+ master_desc = strip_warning(MASTER["Description"])
+ cluster_desc = strip_warning(CLUSTER["Description"]).replace(
+ "This template", "The Turbine Airflow cluster stack"
+ )
+ assert cluster_desc in master_desc
diff --git a/test/test_template_interface.py b/test/test_template_interface.py
new file mode 100644
index 00000000..a543022b
--- /dev/null
+++ b/test/test_template_interface.py
@@ -0,0 +1,41 @@
+from templates import ALL
+
+
+def test_if_all_parameters_are_grouped():
+ for template in ALL:
+ interface = template["Metadata"]["AWS::CloudFormation::Interface"]
+ grouped = [
+ param
+ for group in interface["ParameterGroups"]
+ for param in group["Parameters"]
+ ]
+ for param in template["Parameters"]:
+ assert param in grouped
+
+
+def test_if_parameters_in_groups_are_ordered():
+ for template in ALL:
+ interface = template["Metadata"]["AWS::CloudFormation::Interface"]
+ grouped = [
+ param
+ for group in interface["ParameterGroups"]
+ for param in group["Parameters"]
+ ]
+ params = list(template["Parameters"].keys())
+ assert grouped == params
+
+
+def test_if_all_parameters_are_labeled():
+ for template in ALL:
+ interface = template["Metadata"]["AWS::CloudFormation::Interface"]
+ labeled = list(interface["ParameterLabels"].keys())
+ for param in template["Parameters"]:
+ assert param in labeled
+
+
+def test_if_parameters_labels_are_ordered():
+ for template in ALL:
+ interface = template["Metadata"]["AWS::CloudFormation::Interface"]
+ labeled = list(interface["ParameterLabels"].keys())
+ params = list(template["Parameters"].keys())
+ assert labeled == params
diff --git a/test/test_template_params.py b/test/test_template_params.py
new file mode 100644
index 00000000..85d0a4d9
--- /dev/null
+++ b/test/test_template_params.py
@@ -0,0 +1,73 @@
+import re
+from cfn_tools import dump_yaml
+from templates import ALL, MASTER, CLUSTER, SCHEDULER, WEBSERVER, WORKERSET
+
+
+def test_if_important_properties_are_specified():
+ for template in ALL:
+ for specs in template["Parameters"].values():
+ assert "Description" in specs
+ assert "Type" in specs
+ if "AllowedPattern" in specs:
+ assert "ConstraintDescription" in specs
+ if "MinValue" in specs or "MaxValue" in specs:
+ assert "ConstraintDescription" in specs
+
+
+def test_if_properties_are_in_order():
+ def is_ordered(left, right, array):
+ left_index = array.index(left) if left in array else None
+ right_index = array.index(right) if right in array else None
+ if left_index is None or right_index is None:
+ return True
+ return left_index < right_index
+
+ for template in ALL:
+ for spec in template["Parameters"].values():
+ props = list(spec.keys())
+
+ assert is_ordered("Description", "ConstraintDescription", props)
+ assert is_ordered("ConstraintDescription", "AllowedPattern", props)
+ assert is_ordered("AllowedPattern", "Default", props)
+ assert is_ordered("Default", "Type", props)
+
+ assert is_ordered("Description", "AllowedValues", props)
+ assert is_ordered("AllowedValues", "Default", props)
+
+ assert is_ordered("ConstraintDescription", "MinValue", props)
+ assert is_ordered("MinValue", "MaxValue", props)
+ assert is_ordered("MaxValue", "Default", props)
+
+
+def test_if_default_value_satisfies_pattern():
+ for template in ALL:
+ for specs in template["Parameters"].values():
+ if "AllowedPattern" in specs and "Default" in specs:
+ assert re.match(specs["AllowedPattern"], specs["Default"])
+
+
+def test_if_description_ends_in_dot():
+ for template in ALL:
+ for specs in template["Parameters"].values():
+ assert specs["Description"].endswith(".")
+
+
+def test_if_constraint_description_ends_in_dot():
+ for template in ALL:
+ for specs in template["Parameters"].values():
+ if "ConstraintDescription" in specs:
+ assert specs["ConstraintDescription"].endswith(".")
+
+
+def test_consistency():
+ pairs = [
+ (MASTER, CLUSTER),
+ (CLUSTER, SCHEDULER),
+ (CLUSTER, WEBSERVER),
+ (CLUSTER, WORKERSET),
+ ]
+ for (t_outer, t_inner) in pairs:
+ for param1, specs1 in t_outer["Parameters"].items():
+ for param2, specs2 in t_inner["Parameters"].items():
+ if param1 == param2:
+ assert (param1, dump_yaml(specs1)) == (param2, dump_yaml(specs2))