diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..2bcd70e3 --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 88 diff --git a/img/logo.png b/.github/img/logo.png similarity index 100% rename from img/logo.png rename to .github/img/logo.png diff --git a/img/logo.svg b/.github/img/logo.svg similarity index 100% rename from img/logo.svg rename to .github/img/logo.svg diff --git a/img/stack-diagram.png b/.github/img/stack-diagram.png similarity index 100% rename from img/stack-diagram.png rename to .github/img/stack-diagram.png diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index ba2a4432..b7e03090 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -13,12 +13,13 @@ jobs: submodules: true - name: Setup Python uses: actions/setup-python@v1 - - name: Install CFN Lint - run: pip install cfn-lint + - name: Install Dev Dependencies + run: pip install -r dev-requirements.txt - name: Lint Templates run: make lint test: + if: contains(github.ref, 'refs/heads/master') runs-on: ubuntu-latest needs: lint steps: @@ -28,8 +29,8 @@ jobs: submodules: true - name: Setup Python uses: actions/setup-python@v1 - - name: Install TaskCat - run: pip install git+git://github.com/villasv/taskcat.git@b1011e8f080bad5d0a7cec65559e3c160787d17f#egg=taskcat + - name: Install Dev Dependencies + run: pip install -r dev-requirements.txt - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v1 with: @@ -40,6 +41,7 @@ jobs: run: make test sync: + if: contains(github.ref, 'refs/heads/master') runs-on: ubuntu-latest needs: test steps: @@ -49,8 +51,8 @@ jobs: submodules: true - name: Setup Python uses: actions/setup-python@v1 - - name: Install AWS CLI - run: pip install awscli + - name: Install Dev Dependencies + run: pip install -r dev-requirements.txt - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v1 with: diff --git a/.github/workflows/review.yaml b/.github/workflows/review.yaml new file mode 100644 index 00000000..fd03fd2b --- /dev/null +++ b/.github/workflows/review.yaml @@ -0,0 +1,69 @@ +name: Stack Release Pipeline + +on: + pull_request_review: + types: + - submitted + + +jobs: + + lint: + if: contains(github.event.review.body, '/lint') + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v1 + with: + submodules: true + - name: Setup Python + uses: actions/setup-python@v1 + - name: Install CFN Lint + run: pip install cfn-lint + - name: Lint Templates + run: make lint + + test: + if: contains(github.event.review.body, '/test') + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v1 + with: + submodules: true + - name: Setup Python + uses: actions/setup-python@v1 + - name: Install TaskCat + run: pip install git+git://github.com/villasv/taskcat.git@b1011e8f080bad5d0a7cec65559e3c160787d17f#egg=taskcat + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-1 + - name: Test Stacks + run: make test + + sync: + if: contains(github.event.review.body, '/sync') + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v1 + with: + submodules: true + - name: Setup Python + uses: actions/setup-python@v1 + - name: Install AWS CLI + run: pip install awscli + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-1 + - name: Infer Branch Name + shell: bash + run: echo "::set-env name=BRANCH::${GITHUB_REF#refs/heads/}" + - name: Sync Files + run: make sync diff --git a/.pylintrc b/.pylintrc index 16971686..238aacdc 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,5 +1,6 @@ [MESSAGES CONTROL] disable= + fixme, missing-module-docstring, missing-function-docstring diff --git a/Makefile b/Makefile index 7e508f8b..f7b68fca 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,6 @@ ifndef BRANCH BRANCH := $(shell git rev-parse --abbrev-ref HEAD) endif - ifeq ($(BRANCH),master) BUCKET := s3://turbine-quickstart/quickstart-turbine-airflow else @@ -10,6 +9,9 @@ endif lint: + black . --check + flake8 . + pylint **/*.py cfn-lint templates/*.template nuke: @@ -19,7 +21,9 @@ pack: 7z a ./functions/package.zip ./functions/*.py sync: pack + aws s3 rm $(BUCKET) --recursive aws s3 sync --exclude '.*' --acl public-read . $(BUCKET) test: pack + pytest -vv taskcat test run --input-file ./ci/taskcat.yaml diff --git a/README.md b/README.md index 369ec06d..35272633 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ - + -# Turbine [![CFN Deploy](https://img.shields.io/badge/CFN-deploy-green.svg?style=flat-square&logo=amazon-aws)](#get-it-working) [![GitHub Release](https://img.shields.io/github/release/villasv/aws-airflow-stack.svg?style=flat-square&logo=github)](https://github.com/villasv/aws-airflow-stack/releases/latest) [![Build Status](https://img.shields.io/travis/villasv/aws-airflow-stack/master.svg?style=flat-square&logo=gitlab&logoColor=white&label=taskcat)](https://scrutinizer-ci.com/g/villasv/aws-airflow-stack/build-status/master) +# Turbine [![GitHub Release](https://img.shields.io/github/release/villasv/aws-airflow-stack.svg?style=flat-square&logo=github)](https://github.com/villasv/aws-airflow-stack/releases/latest) [![Build Status](https://img.shields.io/github/workflow/status/villasv/aws-airflow-stack/Stack%20Release%20Pipeline?style=flat-square&logo=github&logoColor=white&label=build)](https://github.com/villasv/aws-airflow-stack/actions?query=workflow%3A%22Stack+Release+Pipeline%22+branch%3Amaster) [![CFN Deploy](https://img.shields.io/badge/CFN-deploy-green.svg?style=flat-square&logo=amazon-aws)](#get-it-working) Turbine is the set of bare metals behind a simple yet complete and efficient Airflow setup. @@ -13,7 +13,7 @@ configure in a few commands. ## Overview -![stack diagram](/img/stack-diagram.png) +![stack diagram](/.github/img/stack-diagram.png) The stack is composed mainly of three services: the Airflow web server, the Airflow scheduler, and the Airflow worker. Supporting resources include an RDS @@ -113,7 +113,7 @@ available in the shell. Before running Airflow commands, you need to load the Airflow configuration: ```bash -$ export $(xargs "$1"; } + +mkdir -p /etc/cfn/hooks.d +cp "$FILES"/systemd/cfn-hup.service /lib/systemd/system/ +cp "$FILES"/systemd/cfn-hup.conf /etc/cfn/cfn-hup.conf +cp "$FILES"/systemd/cfn-auto-reloader.conf /etc/cfn/hooks.d/cfn-auto-reloader.conf +envreplace /etc/cfn/cfn-hup.conf +envreplace /etc/cfn/hooks.d/cfn-auto-reloader.conf + +mkdir /run/airflow && chown -R ec2-user: /run/airflow +cp "$FILES"/systemd/airflow-*.{path,timer,service} /lib/systemd/system/ +cp "$FILES"/systemd/airflow.env /etc/sysconfig/airflow.env +cp "$FILES"/systemd/airflow.conf /usr/lib/tmpfiles.d/airflow.conf +envreplace /etc/sysconfig/airflow.env + +mapfile -t AIRFLOW_ENVS < /etc/sysconfig/airflow.env +export "${AIRFLOW_ENVS[@]}" + +yum install -y gcc libcurl-devel openssl-devel +export PYCURL_SSL_LIBRARY=openssl +pip3 install "apache-airflow[celery,postgres,s3,crypto]==1.10.9" "celery[sqs]" +mkdir "$AIRFLOW_HOME" && chown -R ec2-user: "$AIRFLOW_HOME" + +systemctl enable --now cfn-hup.service + +cd_agent() { + yum install -y ruby + wget "https://aws-codedeploy-$AWS_REGION.s3.amazonaws.com/latest/install" + chmod +x ./install + ./install auto +} diff --git a/scripts/scheduler.setup.sh b/scripts/scheduler.setup.sh new file mode 100755 index 00000000..c81a11e4 --- /dev/null +++ b/scripts/scheduler.setup.sh @@ -0,0 +1,12 @@ +#!/bin/bash -e + +. "$(dirname $0)/commons.setup.sh" + +if [ "$TURBINE__CORE__LOAD_DEFAULTS" == "True" ]; then + su -c '/usr/local/bin/airflow initdb' ec2-user +else + su -c '/usr/local/bin/airflow upgradedb' ec2-user +fi + +systemctl enable --now airflow-scheduler +cd_agent diff --git a/scripts/systemd/airflow-confapply-agent.path b/scripts/systemd/airflow-confapply-agent.path new file mode 100644 index 00000000..8efa3b6a --- /dev/null +++ b/scripts/systemd/airflow-confapply-agent.path @@ -0,0 +1,9 @@ +[Unit] +After=airflow-scheduler.service airflow-webserver.service airflow-workerset.service +PartOf=airflow-scheduler.service airflow-webserver.service airflow-workerset.service + +[Path] +PathModified=/etc/sysconfig/airflow.env + +[Install] +WantedBy=airflow-scheduler.service airflow-webserver.service airflow-workerset.service diff --git a/scripts/systemd/airflow-confapply-agent.service b/scripts/systemd/airflow-confapply-agent.service new file mode 100644 index 00000000..ed4a97ba --- /dev/null +++ b/scripts/systemd/airflow-confapply-agent.service @@ -0,0 +1,4 @@ +[Service] +Type=oneshot +ExecStartPre=/usr/bin/systemctl daemon-reload +ExecStart=/opt/turbine/restart-services.sh diff --git a/scripts/systemd/airflow-heartbeat.service b/scripts/systemd/airflow-heartbeat.service new file mode 100644 index 00000000..b3a71ecb --- /dev/null +++ b/scripts/systemd/airflow-heartbeat.service @@ -0,0 +1,3 @@ +[Service] +Type=oneshot +ExecStart=/opt/turbine/airflow-heartbeat.sh diff --git a/scripts/systemd/airflow-heartbeat.timer b/scripts/systemd/airflow-heartbeat.timer new file mode 100644 index 00000000..4eb06a62 --- /dev/null +++ b/scripts/systemd/airflow-heartbeat.timer @@ -0,0 +1,5 @@ +[Timer] +OnCalendar=*:0/1 + +[Install] +WantedBy=airflow.service diff --git a/scripts/systemd/airflow-scheduler.service b/scripts/systemd/airflow-scheduler.service new file mode 100644 index 00000000..748004fb --- /dev/null +++ b/scripts/systemd/airflow-scheduler.service @@ -0,0 +1,14 @@ +[Unit] +Description=Airflow scheduler daemon +Wants=airflow-confapply-agent.path + +[Service] +EnvironmentFile=/etc/sysconfig/airflow.env +ExecStart=/usr/local/bin/airflow scheduler +User=ec2-user +Group=ec2-user +Restart=always +RestartSec=5s + +[Install] +WantedBy=multi-user.target diff --git a/scripts/systemd/airflow-terminate.service b/scripts/systemd/airflow-terminate.service new file mode 100644 index 00000000..0232520c --- /dev/null +++ b/scripts/systemd/airflow-terminate.service @@ -0,0 +1,3 @@ +[Service] +Type=oneshot +ExecStart=/opt/turbine/airflow-terminate.sh diff --git a/scripts/systemd/airflow-terminate.timer b/scripts/systemd/airflow-terminate.timer new file mode 100644 index 00000000..4eb06a62 --- /dev/null +++ b/scripts/systemd/airflow-terminate.timer @@ -0,0 +1,5 @@ +[Timer] +OnCalendar=*:0/1 + +[Install] +WantedBy=airflow.service diff --git a/scripts/systemd/airflow-webserver.service b/scripts/systemd/airflow-webserver.service new file mode 100644 index 00000000..e758bd23 --- /dev/null +++ b/scripts/systemd/airflow-webserver.service @@ -0,0 +1,15 @@ +[Unit] +Description=Airflow webserver daemon +Wants=airflow-confapply-agent.path + +[Service] +EnvironmentFile=/etc/sysconfig/airflow.env +ExecStart=/usr/local/bin/airflow webserver --pid /run/airflow/webserver.pid +User=ec2-user +Group=ec2-user +Restart=on-failure +RestartSec=5s +PrivateTmp=true + +[Install] +WantedBy=multi-user.target diff --git a/scripts/systemd/airflow-workerset.service b/scripts/systemd/airflow-workerset.service new file mode 100644 index 00000000..aca7b39f --- /dev/null +++ b/scripts/systemd/airflow-workerset.service @@ -0,0 +1,16 @@ +[Unit] +Description=Airflow celery worker daemon +Wants=airflow-confapply-agent.path + +[Service] +EnvironmentFile=/etc/sysconfig/airflow.env +ExecStart=/usr/local/bin/airflow worker +User=ec2-user +Group=ec2-user +Restart=on-failure +RestartSec=10s +KillMode=mixed +TimeoutStopSec=24h + +[Install] +WantedBy=multi-user.target diff --git a/scripts/systemd/airflow.conf b/scripts/systemd/airflow.conf new file mode 100644 index 00000000..e158fe28 --- /dev/null +++ b/scripts/systemd/airflow.conf @@ -0,0 +1 @@ +D /run/airflow 0755 ec2-user ec2-user diff --git a/scripts/systemd/airflow.env b/scripts/systemd/airflow.env new file mode 100644 index 00000000..f1cca07c --- /dev/null +++ b/scripts/systemd/airflow.env @@ -0,0 +1,13 @@ +AWS_DEFAULT_REGION=${AWS_REGION} +AIRFLOW_HOME=/airflow +AIRFLOW__CORE__EXECUTOR=CeleryExecutor +AIRFLOW__CORE__FERNET_KEY=${FERNET_KEY} +AIRFLOW__CORE__LOAD_EXAMPLES=${LOAD_EXAMPLES} +TURBINE__CORE__LOAD_DEFAULTS=${LOAD_DEFAULTS} +AIRFLOW__CORE__SQL_ALCHEMY_CONN=${DATABASE_URI} +AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER=s3://${LOGS_BUCKET} +AIRFLOW__CORE__REMOTE_LOGGING=True +AIRFLOW__CELERY__BROKER_URL=sqs:// +AIRFLOW__CELERY__DEFAULT_QUEUE=${QUEUE_NAME} +AIRFLOW__CELERY__RESULT_BACKEND=db+${DATABASE_URI} +AIRFLOW__CELERY_BROKER_TRANSPORT_OPTIONS__REGION=${AWS_REGION} diff --git a/scripts/systemd/cfn-auto-reloader.conf b/scripts/systemd/cfn-auto-reloader.conf new file mode 100644 index 00000000..fa1d04cf --- /dev/null +++ b/scripts/systemd/cfn-auto-reloader.conf @@ -0,0 +1,9 @@ +[cfn-auto-reloader-hook] +triggers=post.update +path=Resources.LaunchConfiguration.Metadata.AWS::CloudFormation::Init +action=/opt/aws/bin/cfn-init -v \ + --region ${AWS_REGION} \ + --role ${IAM_ROLE} \ + --stack ${AWS_STACK_NAME} \ + --resource LaunchConfiguration +runas=root diff --git a/scripts/systemd/cfn-hup.conf b/scripts/systemd/cfn-hup.conf new file mode 100644 index 00000000..bf3466f2 --- /dev/null +++ b/scripts/systemd/cfn-hup.conf @@ -0,0 +1,5 @@ +[main] +stack=${AWS_STACK_NAME} +region=${AWS_REGION} +role=${IAM_ROLE} +interval=1 diff --git a/scripts/systemd/cfn-hup.service b/scripts/systemd/cfn-hup.service new file mode 100644 index 00000000..7f46eec5 --- /dev/null +++ b/scripts/systemd/cfn-hup.service @@ -0,0 +1,6 @@ +[Service] +ExecStart=/opt/aws/bin/cfn-hup +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/scripts/webserver.setup.sh b/scripts/webserver.setup.sh new file mode 100755 index 00000000..9d80bfb7 --- /dev/null +++ b/scripts/webserver.setup.sh @@ -0,0 +1,17 @@ +#!/bin/bash -e + +. "$(dirname $0)/commons.setup.sh" + +PUBLIC=$(curl "$IMDSv1/meta-data/public-ipv4" -w "%{http_code}") +if [ "$PUBLIC" = "200" ] +then HOSTNAME=$(ec2-metadata -v | awk '{print $2}') +else HOSTNAME=$(ec2-metadata -o | awk '{print $2}') +fi +BASE_URL="http://$HOSTNAME:${WEB_SERVER_PORT}" +echo "AIRFLOW__WEBSERVER__BASE_URL=$BASE_URL" \ + >> /etc/sysconfig/airflow.env +echo "AIRFLOW__WEBSERVER__WEB_SERVER_PORT=${WEB_SERVER_PORT}" \ + >> /etc/sysconfig/airflow.env + +systemctl enable --now airflow-webserver +cd_agent diff --git a/scripts/workerset.setup.sh b/scripts/workerset.setup.sh new file mode 100755 index 00000000..018d911d --- /dev/null +++ b/scripts/workerset.setup.sh @@ -0,0 +1,19 @@ +#!/bin/bash -e + +. "$(dirname $0)/commons.setup.sh" + +if [ -d "/mnt/efs" ]; then + mkdir /mnt/efs + FSPEC="${FILE_SYSTEM_ID}.efs.$AWS_REGION.amazonaws.com:/" + PARAMS="nfsvers=4.1,rsize=1048576,wsize=1048576" + PARAMS="$PARAMS,hard,timeo=600,retrans=2,noresvport" + echo "$FSPEC /mnt/efs nfs $PARAMS,_netdev 0 0" >> /etc/fstab + mount /mnt/efs && chown -R ec2-user: /mnt/efs +fi + +if [ "$CD_PENDING_DEPLOY" = "false" ]; then + systemctl enable --now airflow-workerset +else + systemctl enable airflow-workerset +fi +cd_agent diff --git a/templates/turbine-cluster.template b/templates/turbine-cluster.template index efaabb1e..7e8b2f30 100644 --- a/templates/turbine-cluster.template +++ b/templates/turbine-cluster.template @@ -1,140 +1,279 @@ AWSTemplateFormatVersion: 2010-09-09 Description: >- - The Turbine-Airflow cluster stack, composed mainly of the Airflow web server, - the Airflow scheduler, and the Airflow worker nested stacks. Supporting - resources include an RDS to host the Airflow metadata database, an SQS to be - used as broker backend, S3 buckets for logs and deployment bundles, an EFS to - serve as shared directory, and a custom CloudWatch metric measured by a timed - AWS Lambda. + This template creates the Airflow supporting resources including an RDS + instance to host the Airflow metadata database, an SQS queue to be used as + broker backend, S3 buckets for logs and deployment packages, and then creates + the Airflow scheduler, webserver and workers nested stacks. The nested Airflow + services stacks create the Airflow instances in highly available auto scaling + groups spanning two subnets, plus for the workers stack an EFS shared network + directory and a custom cloudwatch load metric function to guide the auto + scaling alarm triggers. **WARNING** This template creates AWS resources. You + will be billed for the AWS resources used if you create a stack from this + template. QS(0027) +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: VPC network configuration + Parameters: + - VPCID + - PublicSubnet1ID + - PublicSubnet2ID + - PrivateSubnet1AID + - PrivateSubnet2AID + - AllowHTTPAccessCIDR + - Label: + default: Turbine Cluster configuration + Parameters: + - SchedulerInstanceType + - WebserverInstanceType + - WorkerInstanceType + - MinGroupSize + - MaxGroupSize + - ShrinkThreshold + - GrowthThreshold + - Label: + default: Apache Airflow configuration + Parameters: + - LoadExampleDags + - LoadDefaultCons + - WebServerPort + - Label: + default: AWS Quick Start configuration + Parameters: + - QSS3BucketName + - QSS3KeyPrefix + ParameterLabels: + VPCID: + default: VPC ID + PublicSubnet1ID: + default: Public subnet 1 ID + PublicSubnet2ID: + default: Public subnet 2 ID + PrivateSubnet1AID: + default: Private subnet 1 ID + PrivateSubnet2AID: + default: Private subnet 2 ID + AllowHTTPAccessCIDR: + default: Allowed HTTP access CIDR + SchedulerInstanceType: + default: Scheduler instance type + WebserverInstanceType: + default: Web server instance type + WorkerInstanceType: + default: Workers instance type + MinGroupSize: + default: Minimum group size + MaxGroupSize: + default: Maximum group size + ShrinkThreshold: + default: Shrink threshold + GrowthThreshold: + default: Growth threshold + LoadExampleDags: + default: Load example DAGs + LoadDefaultCons: + default: Load default connections + WebServerPort: + default: Web server port + QSS3BucketName: + default: Quick Start S3 bucket name + QSS3KeyPrefix: + default: Quick Start S3 key prefix Parameters: - # Networking VPCID: - Description: An existing VPC for the cluster + Description: An existing VPC for the cluster. Type: AWS::EC2::VPC::Id PublicSubnet1ID: - Description: An existing public Subnet in some Availability Zone + Description: An existing public Subnet in some Availability Zone. Type: AWS::EC2::Subnet::Id PublicSubnet2ID: - Description: An existing public Subnet in another Availability Zone + Description: An existing public Subnet in another Availability Zone. Type: AWS::EC2::Subnet::Id PrivateSubnet1AID: - Description: An existing private Subnet in some Availability Zone + Description: An existing private Subnet in some Availability Zone. Type: AWS::EC2::Subnet::Id PrivateSubnet2AID: - Description: An existing private Subnet in another Availability Zone + Description: An existing private Subnet in another Availability Zone. Type: AWS::EC2::Subnet::Id - AllowedWebBlock: + AllowHTTPAccessCIDR: Description: >- - The IPv4 CIDR block to allow HTTP access in the webserver. The default of - 0.0.0.0/0 allows HTTP from everywhere, which is convenient but less - secure. + The IPv4 CIDR block to allow HTTP access in the webserver. Using the same + CIDR for the VPC is a secure default. Using 0.0.0.0/0 allows access from + everywhere, which is convenient but less secure. + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28. + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ + Default: 10.0.0.0/16 Type: String - AllowedPattern: >- - ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ - Default: 0.0.0.0/0 - WebserverPort: - Description: >- - The port Airflow webserver will be listening. - Type: Number - Default: 8080 - MinValue: 1024 - MaxValue: 65535 - ConstraintDescription: >- - Ports below 1024 can be opened only with root privileges and the airflow - process does not run as such. - # Cluster Settings SchedulerInstanceType: Description: EC2 instance type to use for the scheduler. - Type: String Default: t3.micro + Type: String WebserverInstanceType: Description: EC2 instance type to use for the webserver. - Type: String Default: t3.micro + Type: String WorkerInstanceType: Description: EC2 instance type to use for the workers. - Type: String Default: t3.medium + Type: String MinGroupSize: Description: The minimum number of active worker instances. + Default: 0 Type: Number - Default: 1 MaxGroupSize: Description: The maximum number of active worker instances. - Type: Number Default: 10 + Type: Number ShrinkThreshold: Description: >- The threshold for the average queue size from which going equal or below will trigger the AutoScaling group to Scale In, deallocating one worker instance. - Type: Number Default: 0.5 + Type: Number GrowthThreshold: Description: >- The threshold for the average queue size from which going equal or above will trigger the AutoScaling group to Scale Out, allocating one worker instance. - Type: Number Default: 0.9 - DbMasterUsername: - Description: The username to be used in the airflow database. - Type: String - Default: airflow - DbMasterPassword: - Description: The password to be used in the airflow database. - Type: String - NoEcho: true + Type: Number - # Airflow Config LoadExampleDags: Description: >- Load the example DAGs distributed with Airflow. Useful if deploying a stack for demonstrating a few topologies, operators and scheduling strategies. - Type: String AllowedValues: - 'False' - 'True' Default: 'False' - LoadDefaultConn: + Type: String + LoadDefaultCons: Description: >- Load the default connections initialized by Airflow. Most consider these unnecessary, which is why the default is to not load them. - Type: String AllowedValues: - 'False' - 'True' Default: 'False' + Type: String + WebServerPort: + Description: >- + The port Airflow webserver will be listening. + ConstraintDescription: >- + Ports below 1024 can be opened only with root privileges and the airflow + process does not run as such. + MinValue: 1024 + MaxValue: 65535 + Default: 8080 + Type: Number - # Quick Start Overrides QSS3BucketName: Description: >- S3 bucket name for the Quick Start assets. You can specify your own bucket providing assets and submodules, if you want to override the Quick Start behavior for your specific implementation. - Type: String - Default: turbine-quickstart - AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$' ConstraintDescription: >- Quick Start bucket name can include numbers, lowercase letters, uppercase letters, and hyphens (-). It cannot start or end with a hyphen (-). + AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$' + Default: turbine-quickstart + Type: String QSS3KeyPrefix: Description: >- S3 key prefix for the Quick Start assets. You can scpeficy your own "directory" providing the stack templates, if you want to override the Quick Start behavior for your specific implementation. - Type: String - Default: quickstart-turbine-airflow/ - AllowedPattern: '^[0-9a-zA-Z-/]*$' ConstraintDescription: >- Quick Start key prefix can include numbers, lowercase letters, uppercase letters, hyphens (-), and forward slash (/). + AllowedPattern: '^[0-9a-zA-Z-/]*$' + Default: quickstart-turbine-airflow/ + Type: String Resources: + DBSubnetGroup: + Type: AWS::RDS::DBSubnetGroup + Properties: + DBSubnetGroupDescription: > + Associates the Database Instances with the selected VPC Subnets. + SubnetIds: + - !Ref PrivateSubnet1AID + - !Ref PrivateSubnet2AID + + DBInstance: + Type: AWS::RDS::DBInstance + Properties: + AllocatedStorage: '20' + DBInstanceClass: db.t2.micro + DBName: airflow + Engine: postgres + MasterUsername: !Join + - '' + - - '{{resolve:secretsmanager:' + - !Ref Secret + - ':SecretString:username}}' + MasterUserPassword: !Join + - '' + - - '{{resolve:secretsmanager:' + - !Ref Secret + - ':SecretString:password}}' + DBSubnetGroupName: !Ref DBSubnetGroup + VPCSecurityGroups: + - !Ref SecurityGroup + + SecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security Rules with permissions for database connections for Airflow. + SecurityGroupIngress: + - SourceSecurityGroupId: !Ref InstancesSecurityGroup + IpProtocol: TCP + FromPort: 5432 + ToPort: 5432 + VpcId: !Ref VPCID + + Secret: + Type: AWS::SecretsManager::Secret + Properties: + GenerateSecretString: + SecretStringTemplate: '{"username": "airflow"}' + GenerateStringKey: "password" + PasswordLength: 16 + ExcludePunctuation: True + + SecretTargetAttachment: + Type: AWS::SecretsManager::SecretTargetAttachment + Properties: + SecretId: !Ref Secret + TargetId: !Ref DBInstance + TargetType: AWS::RDS::DBInstance + + TaskQueue: + Type: AWS::SQS::Queue + + LogsBucket: + Type: AWS::S3::Bucket + + DeploymentsBucket: + Type: AWS::S3::Bucket + + InstancesSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: >- + The security group shared by all Airflow instances used as inbound rule + for the other more specific resource security groups. + VpcId: !Ref VPCID + Tags: + - Key: Name + Value: airflow-instances-sg + SchedulerStack: Type: AWS::CloudFormation::Stack Properties: @@ -144,20 +283,19 @@ Resources: - !Ref QSS3KeyPrefix - templates/turbine-scheduler.template Parameters: - VPCID: !Ref VPCID PrivateSubnet1AID: !Ref PrivateSubnet1AID PrivateSubnet2AID: !Ref PrivateSubnet2AID - InstancesSecurityGroup: !Ref InstancesSecurityGroup - IamInstanceProfile: !Ref AirflowProfile - IamRole: !Ref AirflowRole - ImageId: !FindInMap - - AWSAMIRegionMap - - !Ref AWS::Region - - AMZNLINUX2 + SecurityGroupID: !Ref InstancesSecurityGroup + DatabaseSecret: !Ref Secret + QueueName: !GetAtt TaskQueue.QueueName + LogsBucket: !Ref LogsBucket InstanceType: !Ref SchedulerInstanceType - SharedCloudInitStack: !Ref AWS::StackName + LoadExampleDags: !Ref LoadExampleDags + LoadDefaultCons: !Ref LoadDefaultCons + QSS3BucketName: !Ref QSS3BucketName + QSS3KeyPrefix: !Ref QSS3KeyPrefix DependsOn: - - SharedCloudInitMetadata + - SecretTargetAttachment WebserverStack: Type: AWS::CloudFormation::Stack @@ -171,19 +309,19 @@ Resources: VPCID: !Ref VPCID PublicSubnet1ID: !Ref PublicSubnet1ID PublicSubnet2ID: !Ref PublicSubnet2ID - InstancesSecurityGroup: !Ref InstancesSecurityGroup - IngressCIDR: !Ref AllowedWebBlock - IngressPort: !Ref WebserverPort - IamInstanceProfile: !Ref AirflowProfile - IamRole: !Ref AirflowRole - ImageId: !FindInMap - - AWSAMIRegionMap - - !Ref AWS::Region - - AMZNLINUX2 + SecurityGroupID: !Ref InstancesSecurityGroup + AllowHTTPAccessCIDR: !Ref AllowHTTPAccessCIDR + DatabaseSecret: !Ref Secret + QueueName: !GetAtt TaskQueue.QueueName + LogsBucket: !Ref LogsBucket InstanceType: !Ref WebserverInstanceType - SharedCloudInitStack: !Ref AWS::StackName + LoadExampleDags: !Ref LoadExampleDags + LoadDefaultCons: !Ref LoadDefaultCons + WebServerPort: !Ref WebServerPort + QSS3BucketName: !Ref QSS3BucketName + QSS3KeyPrefix: !Ref QSS3KeyPrefix DependsOn: - - SharedCloudInitMetadata + - SecretTargetAttachment WorkerSetStack: Type: AWS::CloudFormation::Stack @@ -197,50 +335,38 @@ Resources: VPCID: !Ref VPCID PrivateSubnet1AID: !Ref PrivateSubnet1AID PrivateSubnet2AID: !Ref PrivateSubnet2AID - InstancesSecurityGroup: !Ref InstancesSecurityGroup - IamInstanceProfile: !Ref AirflowProfile - IamRole: !Ref AirflowRole - ImageId: !FindInMap - - AWSAMIRegionMap - - !Ref AWS::Region - - AMZNLINUX2 + SecurityGroupID: !Ref InstancesSecurityGroup + DatabaseSecret: !Ref Secret + QueueName: !GetAtt TaskQueue.QueueName + LogsBucket: !Ref LogsBucket InstanceType: !Ref WorkerInstanceType - MinSize: !Ref MinGroupSize - MaxSize: !Ref MaxGroupSize - GrowthThreshold: !Ref GrowthThreshold + MinGroupSize: !Ref MinGroupSize + MaxGroupSize: !Ref MaxGroupSize ShrinkThreshold: !Ref ShrinkThreshold - QueueName: !GetAtt TaskQueue.QueueName - SharedCloudInitStack: !Ref AWS::StackName + GrowthThreshold: !Ref GrowthThreshold + LoadExampleDags: !Ref LoadExampleDags + LoadDefaultCons: !Ref LoadDefaultCons QSS3BucketName: !Ref QSS3BucketName QSS3KeyPrefix: !Ref QSS3KeyPrefix - DependsOn: - - SharedCloudInitMetadata - - LogsBucket: - Type: AWS::S3::Bucket - - DeploymentsBucket: - Type: AWS::S3::Bucket + - SecretTargetAttachment CodeDeployApplication: Type: AWS::CodeDeploy::Application Properties: - ApplicationName: !Sub ${AWS::StackName}-deployment-application + ApplicationName: !Ref AWS::StackName ComputePlatform: Server CodeDeployDeploymentGroup: Type: AWS::CodeDeploy::DeploymentGroup Properties: ApplicationName: !Ref CodeDeployApplication - DeploymentGroupName: !Sub ${AWS::StackName}-deployment-group + DeploymentGroupName: !Ref AWS::StackName AutoScalingGroups: - !GetAtt SchedulerStack.Outputs.AutoScalingGroup - !GetAtt WebserverStack.Outputs.AutoScalingGroup - !GetAtt WorkerSetStack.Outputs.AutoScalingGroup - ServiceRoleArn: !GetAtt - - CodeDeployServiceRole - - Arn + ServiceRoleArn: !GetAtt CodeDeployServiceRole.Arn CodeDeployServiceRole: Type: AWS::IAM::Role @@ -255,519 +381,7 @@ Resources: Action: - sts:AssumeRole ManagedPolicyArns: - - 'arn:aws:iam::aws:policy/service-role/AWSCodeDeployRole' - - EfsFileSystem: - Type: AWS::EFS::FileSystem - Properties: - FileSystemTags: - - Key: Name - Value: !Sub ${AWS::StackName}-filesystem - - EfsMountTarget1A: - Type: AWS::EFS::MountTarget - Properties: - FileSystemId: !Ref EfsFileSystem - SubnetId: !Ref PrivateSubnet1AID - SecurityGroups: - - !Ref Access - - EfsMountTarget2A: - Type: AWS::EFS::MountTarget - Properties: - FileSystemId: !Ref EfsFileSystem - SubnetId: !Ref PrivateSubnet2AID - SecurityGroups: - - !Ref Access - - DBs: - Type: AWS::RDS::DBSubnetGroup - Properties: - DBSubnetGroupDescription: Associates the Database Instances with the selected VPC Subnets. - SubnetIds: - - !Ref PrivateSubnet1AID - - !Ref PrivateSubnet2AID - - Database: - Type: AWS::RDS::DBInstance - Properties: - AllocatedStorage: '20' - DBInstanceClass: db.t2.micro - DBName: airflow - Engine: postgres - MasterUsername: !Ref DbMasterUsername - MasterUserPassword: !Ref DbMasterPassword - Tags: - - Key: Name - Value: !Sub ${AWS::StackName}-database - DBSubnetGroupName: !Ref DBs - VPCSecurityGroups: - - !Ref Connection - - TaskQueue: - Type: AWS::SQS::Queue - Properties: {} - - InstancesSecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: >- - The security group shared by all Airflow instances used as inbound rule - for the other more specific resource security groups. - VpcId: !Ref VPCID - Tags: - - Key: Name - Value: airflow-instances-sg - - Access: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: >- - Security Rules with permissions for the shared filesystem across Airflow - instances. - SecurityGroupIngress: - - SourceSecurityGroupId: !Ref InstancesSecurityGroup - IpProtocol: TCP - FromPort: 2049 - ToPort: 2049 - VpcId: !Ref VPCID - Tags: - - Key: Name - Value: !Sub '${AWS::StackName}-access' - - Connection: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: Security Rules with permissions for database connections for Airflow. - SecurityGroupIngress: - - SourceSecurityGroupId: !Ref InstancesSecurityGroup - IpProtocol: TCP - FromPort: 5432 - ToPort: 5432 - VpcId: !Ref VPCID - Tags: - - Key: Name - Value: !Sub ${AWS::StackName}-connection - - AirflowRole: - Type: AWS::IAM::Role - Properties: - AssumeRolePolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Principal: - Service: - - ec2.amazonaws.com - Action: - - sts:AssumeRole - ManagedPolicyArns: - - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM - Policies: - - PolicyName: !Sub ${AWS::StackName}-cfn-describe - PolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: - - cloudformation:DescribeStackResource - Resource: !Sub arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${AWS::StackName}/* - - PolicyName: !Sub ${AWS::StackName}-ssm-rw-policy - PolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: - - ssm:GetParameter - - ssm:PutParameter - Resource: - - !Sub arn:aws:ssm:*:${AWS::AccountId}:*/* - - PolicyName: !Sub ${AWS::StackName}-queue-rw-policy - PolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: - - sqs:ListQueues - Resource: - - !Sub arn:aws:sqs:*:${AWS::AccountId}:* - - Effect: Allow - Action: - - sqs:ChangeMessageVisibility - - sqs:DeleteMessage - - sqs:GetQueueAttributes - - sqs:GetQueueUrl - - sqs:ReceiveMessage - - sqs:SendMessage - Resource: !Sub - - arn:aws:sqs:*:${AWS::AccountId}:${queue} - - queue: !GetAtt - - TaskQueue - - QueueName - - PolicyName: !Sub ${AWS::StackName}-deployments-r-policy - PolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: - - s3:Get* - - s3:List* - Resource: !Sub arn:aws:s3:::${DeploymentsBucket}/* - - Effect: Allow - Action: - - codedeploy:List* - Resource: !Sub arn:aws:codedeploy:*:${AWS::AccountId}:deploymentgroup:* - - PolicyName: !Sub ${AWS::StackName}-logs-rw-policy - PolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: - - s3:Get* - - s3:Put* - Resource: !Sub arn:aws:s3:::${LogsBucket}/* - - PolicyName: !Sub ${AWS::StackName}-lifecycle-heartbeat - PolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: - - autoscaling:RecordLifecycleActionHeartbeat - - autoscaling:CompleteLifecycleAction - Resource: !Sub arn:aws:autoscaling:*:${AWS::AccountId}:autoScalingGroup:*:* - - Effect: Allow - Action: - - autoscaling:DescribeScalingActivities - Resource: '*' - - AirflowProfile: - Type: AWS::IAM::InstanceProfile - Properties: - Roles: - - !Ref AirflowRole - - SharedCloudInitMetadata: - Type: AWS::CloudFormation::WaitConditionHandle - Properties: {} - Metadata: - AWS::CloudFormation::Init: - configSets: - default: - - filesys - - runtime - - secrets - - sysconf - - migrate - - service - - lchooks - - metahup - - cdagent - filesys: - commands: - mkdir: - test: test ! -d /airflow - command: | - mkdir /airflow - chown -R ec2-user /airflow - mount: - test: test ! -d /mnt/efs - command: !Sub | - mkdir /mnt/efs - fspec="${EfsFileSystem}.efs.${AWS::Region}.amazonaws.com:/" - param="nfsvers=4.1,rsize=1048576,wsize=1048576" - param="$param,hard,timeo=600,retrans=2,noresvport" - echo "$fspec /mnt/efs nfs $param,_netdev 0 0" >> /etc/fstab - mount /mnt/efs && chown -R ec2-user /mnt/efs - runtime: - packages: - yum: - git: [] - gcc: [] - gcc-c++: [] - jq: [] - lapack-devel: [] - libcurl-devel: [] - libxml2-devel: [] - libxslt-devel: [] - openssl-devel: [] - postgresql-devel: [] - python3: [] - python3-devel: [] - python3-pip: [] - python3-wheel: [] - commands: - install: - command: | - PYCURL_SSL_LIBRARY=openssl pip3 install \ - --no-cache-dir --compile --ignore-installed \ - pycurl - SLUGIFY_USES_TEXT_UNIDECODE=yes pip3 install \ - celery[sqs] \ - apache-airflow[celery,postgres,s3,crypto]==1.10.9 - secrets: - commands: - generate: - command: !Sub | - export $(cat /etc/environment | xargs) - - if [ "$TURBINE_MACHINE" != "SCHEDULER" ]; then - echo "Secret generation reserved for the scheduler" - exit 0 - fi - FERNET_KEY=$(aws ssm get-parameter \ - --name ${AWS::StackName}-fernet-key \ - --region '${AWS::Region}' \ - --query 'Parameter.Value') - if [ "$FERNET_KEY" = "" ]; then - FERNET_KEY=$(python3 -c "if True:# - from cryptography.fernet import Fernet - key = Fernet.generate_key().decode() - print(key)") - aws ssm put-parameter \ - --name ${AWS::StackName}-fernet-key \ - --region '${AWS::Region}' \ - --value $FERNET_KEY \ - --type SecureString - fi - retrieve: - command: !Sub | - while [ "$FERNET_KEY" = "" ]; do - echo "Waiting for Fernet key to be available..." - sleep 1 - FERNET_KEY=$(aws ssm get-parameter \ - --name ${AWS::StackName}-fernet-key \ - --region '${AWS::Region}' \ - --with-decryption \ - --query 'Parameter.Value' \ - --output text) - done - echo "FERNET_KEY=$FERNET_KEY" >> /etc/environment - sysconf: - files: - /etc/sysconfig/airflow: - content: !Sub - - | - TURBINE_MACHINE=${!TURBINE_MACHINE} - AWS_DEFAULT_REGION=${AWS::Region} - AIRFLOW_HOME=/airflow - AIRFLOW__CORE__EXECUTOR=CeleryExecutor - AIRFLOW__CORE__FERNET_KEY=${!FERNET_KEY} - AIRFLOW__CORE__LOAD_EXAMPLES=${LoadExampleDags} - TURBINE__CORE__LOAD_DEFAULTS=${LoadDefaultConn} - AIRFLOW__CORE__SQL_ALCHEMY_CONN=${DbUri} - AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER=s3://${LogsBucket} - AIRFLOW__CORE__REMOTE_LOGGING=True - AIRFLOW__WEBSERVER__BASE_URL=http://${!HOSTNAME}:${WebserverPort} - AIRFLOW__WEBSERVER__WEB_SERVER_PORT=${WebserverPort} - AIRFLOW__CELERY__BROKER_URL=sqs:// - AIRFLOW__CELERY__DEFAULT_QUEUE=${QueueName} - AIRFLOW__CELERY__RESULT_BACKEND=db+${DbUri} - AIRFLOW__CELERY_BROKER_TRANSPORT_OPTIONS__REGION=${AWS::Region} - - QueueName: !GetAtt TaskQueue.QueueName - DbUri: !Join - - '' - - - postgresql:// - - !Ref DbMasterUsername - - ':' - - !Ref DbMasterPassword - - '@' - - !GetAtt Database.Endpoint.Address - - /airflow - commands: - envsubst: - command: | - export $(cat /etc/environment | xargs) - - PUBLIC=$(curl -s -o /dev/null -w "%{http_code}" \ - http://169.254.169.254/latest/meta-data/public-ipv4) - PUB_IPV4=$(ec2-metadata -v | awk '{print $2}') - LOC_IPV4=$(ec2-metadata -o | awk '{print $2}') - if [ $PUBLIC = "200" ] - then HOSTNAME=$PUB_IPV4 - else HOSTNAME=$LOC_IPV4 - fi - - echo "$(envsubst /etc/sysconfig/airflow - migrate: - commands: - migration: - command: | - export $(cat /etc/environment | xargs) - export $(cat /etc/sysconfig/airflow | xargs) - if [ "$TURBINE_MACHINE" != "SCHEDULER" ]; then - echo "Database setup reserved for the scheduler" - exit 0 - fi - if [ "$TURBINE__CORE__LOAD_DEFAULTS" == "True" ]; then - su -c '/usr/local/bin/airflow initdb' ec2-user - else - su -c '/usr/local/bin/airflow upgradedb' ec2-user - fi - service: - files: - /usr/bin/turbine: - mode: 755 - content: | - #!/bin/sh - if [ "$TURBINE_MACHINE" == "SCHEDULER" ] - then exec /usr/local/bin/airflow scheduler - elif [ "$TURBINE_MACHINE" == "WEBSERVER" ] - then exec /usr/local/bin/airflow webserver - elif [ "$TURBINE_MACHINE" == "WORKER" ] - then exec /usr/local/bin/airflow worker - else echo "TURBINE_MACHINE value unknown" && exit 1 - fi - /usr/lib/tmpfiles.d/airflow.conf: - content: | - D /run/airflow 0755 ec2-user ec2-user - /usr/lib/systemd/system/airflow.service: - content: | - [Service] - EnvironmentFile=/etc/sysconfig/airflow - User=ec2-user - Group=ec2-user - ExecStart=/usr/bin/turbine - Restart=always - RestartSec=5s - KillMode=mixed - TimeoutStopSec=24h - [Install] - WantedBy=multi-user.target - /usr/lib/systemd/system/watcher.path: - content: | - [Unit] - After=airflow.service - PartOf=airflow.service - [Path] - PathModified=/etc/sysconfig/airflow - [Install] - WantedBy=airflow.service - /usr/lib/systemd/system/watcher.service: - content: | - [Service] - Type=oneshot - ExecStartPre=/usr/bin/systemctl daemon-reload - ExecStart=/usr/bin/systemctl restart airflow - commands: - setup: - command: !Sub | - HAS_DEPLOYMENT=$(aws deploy list-deployments \ - --application-name ${AWS::StackName}-deployment-application \ - --deployment-group ${AWS::StackName}-deployment-group \ - --region ${AWS::Region} | \ - jq '.deployments | has(0)') - - systemctl enable airflow.service watcher.path - - if [ "$HAS_DEPLOYMENT" = "false" ]; then - systemctl start airflow - else - echo "Deployment pending, deferring service start" - fi - lchooks: - files: - /usr/bin/lchkill: - mode: 755 - content: !Sub | - #!/bin/sh - INSTANCE_ID=$(ec2-metadata -i | awk '{print $2}') - TERMINATE_MESSAGE="Terminating EC2 instance <$INSTANCE_ID>" - TERMINATING=$(aws autoscaling describe-scaling-activities \ - --auto-scaling-group-name '${AWS::StackName}-scaling-group' \ - --max-items 100 \ - --region '${AWS::Region}' | \ - jq --arg TERMINATE_MESSAGE "$TERMINATE_MESSAGE" \ - '.Activities[] - | select(.Description - | test($TERMINATE_MESSAGE)) != []') - - if [ "$TERMINATING" = "true" ]; then - systemctl stop airflow - fi - /usr/lib/systemd/system/lchkill.timer: - content: | - [Timer] - OnCalendar=*:0/1 - [Install] - WantedBy=airflow.service - /usr/lib/systemd/system/lchkill.service: - content: | - [Service] - Type=oneshot - ExecStart=/usr/bin/lchkill - /usr/bin/lchbeat: - mode: 755 - content: !Sub | - #!/bin/sh - SERVICE_STATUS=$(systemctl is-active airflow) - - if [ "$SERVICE_STATUS" = "deactivating" ]; then - aws autoscaling record-lifecycle-action-heartbeat \ - --instance-id $(ec2-metadata -i | awk '{print $2}') \ - --lifecycle-hook-name '${AWS::StackName}-scaling-lfhook' \ - --auto-scaling-group-name '${AWS::StackName}-scaling-group' \ - --region '${AWS::Region}' - fi - /usr/lib/systemd/system/lchbeat.timer: - content: | - [Timer] - OnCalendar=*:0/1 - [Install] - WantedBy=airflow.service - /usr/lib/systemd/system/lchbeat.service: - content: | - [Service] - Type=oneshot - ExecStart=/usr/bin/lchbeat - commands: - setup: - command: | - if [ "$TURBINE_MACHINE" = "WORKER" ]; then - systemctl enable lchkill.timer lchbeat.timer - fi - metahup: - files: - /etc/cfn/cfn-hup.conf: - content: !Sub | - [main] - stack=${AWS::StackId} - region=${AWS::Region} - role=${AirflowRole} - interval=1 - /etc/cfn/hooks.d/cfn-auto-reloader.conf: - content: !Sub | - [cfn-auto-reloader-hook] - triggers=post.update - path=Resources.Meta.Metadata.AWS::CloudFormation::Init - action=/opt/aws/bin/cfn-init -v \ - --region ${AWS::Region} \ - --role ${AirflowRole} \ - --stack ${AWS::StackName} \ - --resource Meta - runas=root - /lib/systemd/system/cfn-hup.service: - content: | - [Service] - ExecStart=/opt/aws/bin/cfn-hup - Restart=always - [Install] - WantedBy=multi-user.target - commands: - setup: - command: | - systemctl enable cfn-hup.service - systemctl start cfn-hup.service - cdagent: - packages: - yum: - ruby: [] - wget: [] - commands: - install: - command: !Sub | - wget https://aws-codedeploy-${AWS::Region}.s3.amazonaws.com/latest/install - chmod +x ./install - ./install auto + - arn:aws:iam::aws:policy/service-role/AWSCodeDeployRole Outputs: DeploymentsBucket: @@ -776,72 +390,3 @@ Outputs: Value: !Ref CodeDeployApplication CodeDeployDeploymentGroup: Value: !Ref CodeDeployDeploymentGroup - -Mappings: - AWSAMIRegionMap: - ap-northeast-1: - AMZNLINUX2: ami-00d101850e971728d - ap-northeast-2: - AMZNLINUX2: ami-08ab3f7e72215fe91 - ap-south-1: - AMZNLINUX2: ami-00e782930f1c3dbc7 - ap-southeast-1: - AMZNLINUX2: ami-0b5a47f8865280111 - ap-southeast-2: - AMZNLINUX2: ami-0fb7513bcdc525c3b - ca-central-1: - AMZNLINUX2: ami-08a9b721ecc5b0a53 - eu-central-1: - AMZNLINUX2: ami-0ebe657bc328d4e82 - eu-west-1: - AMZNLINUX2: ami-030dbca661d402413 - eu-west-2: - AMZNLINUX2: ami-0009a33f033d8b7b6 - eu-west-3: - AMZNLINUX2: ami-0ebb3a801d5fb8b9b - sa-east-1: - AMZNLINUX2: ami-058141e091292ecf0 - us-east-1: - AMZNLINUX2: ami-0c6b1d09930fac512 - us-east-2: - AMZNLINUX2: ami-0ebbf2179e615c338 - us-west-1: - AMZNLINUX2: ami-015954d5e5548d13b - us-west-2: - AMZNLINUX2: ami-0cb72367e98845d43 - -Metadata: - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: Networking - Parameters: - - VPCID - - PublicSubnet1ID - - PublicSubnet2ID - - PrivateSubnet1AID - - PrivateSubnet2AID - - AllowedWebBlock - - WebserverPort - - Label: - default: Cluster Settings - Parameters: - - SchedulerInstanceType - - WebserverInstanceType - - WorkerInstanceType - - MinGroupSize - - MaxGroupSize - - ShrinkThreshold - - GrowthThreshold - - DbMasterUsername - - DbMasterPassword - - Label: - default: Airflow Config - Parameters: - - LoadExampleDags - - LoadDefaultConn - - Label: - default: Quick Start Overrides - Parameters: - - QSS3BucketName - - QSS3KeyPrefix diff --git a/templates/turbine-master.template b/templates/turbine-master.template index a496df1e..76033e49 100644 --- a/templates/turbine-master.template +++ b/templates/turbine-master.template @@ -1,148 +1,227 @@ AWSTemplateFormatVersion: 2010-09-09 Description: >- - The Turbine-Airflow master stack, including a quickstart VPC stack and the - Turbine-Airflow cluster stack. + This template creates a Quick Start VPC stack and a Turbine Airflow cluster + stack. The Quick Start VPC stack creates a Multi-AZ VPC infrastructure with + two private subnets and managed NAT gateways in the two public subnets. The + Turbine Airflow cluster stack creates the Airflow supporting resources + including an RDS instance to host the Airflow metadata database, an SQS queue + to be used as broker backend, S3 buckets for logs and deployment packages, and + then creates the Airflow scheduler, webserver and workers nested stacks. The + nested Airflow services stacks create the Airflow instances in highly + available auto scaling groups spanning two subnets, plus for the workers stack + an EFS shared network directory and a custom cloudwatch load metric function + to guide the auto scaling alarm triggers. **WARNING** This template creates + AWS resources. You will be billed for the AWS resources used if you create a + stack from this template. QS(0027) +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: VPC network configuration + Parameters: + - VPCCIDR + - PublicSubnet1CIDR + - PublicSubnet2CIDR + - PrivateSubnet1ACIDR + - PrivateSubnet2ACIDR + - AllowHTTPAccessCIDR + - Label: + default: Turbine Cluster configuration + Parameters: + - SchedulerInstanceType + - WebserverInstanceType + - WorkerInstanceType + - MinGroupSize + - MaxGroupSize + - ShrinkThreshold + - GrowthThreshold + - Label: + default: Apache Airflow configuration + Parameters: + - LoadExampleDags + - LoadDefaultCons + - WebServerPort + - Label: + default: AWS Quick Start configuration + Parameters: + - QSS3BucketName + - QSS3KeyPrefix + ParameterLabels: + VPCCIDR: + default: VPC CIDR + PublicSubnet1CIDR: + default: Public subnet 1 CIDR + PublicSubnet2CIDR: + default: Public subnet 2 CIDR + PrivateSubnet1ACIDR: + default: Private subnet 1 CIDR + PrivateSubnet2ACIDR: + default: Private subnet 2 CIDR + AllowHTTPAccessCIDR: + default: Allowed HTTP access CIDR + SchedulerInstanceType: + default: Scheduler instance type + WebserverInstanceType: + default: Web server instance type + WorkerInstanceType: + default: Workers instance type + MinGroupSize: + default: Minimum group size + MaxGroupSize: + default: Maximum group size + ShrinkThreshold: + default: Shrink threshold + GrowthThreshold: + default: Growth threshold + LoadExampleDags: + default: Load example DAGs + LoadDefaultCons: + default: Load default connections + WebServerPort: + default: Web server port + QSS3BucketName: + default: Quick Start S3 bucket name + QSS3KeyPrefix: + default: Quick Start S3 key prefix Parameters: - # Networking VPCCIDR: - Description: The CIDR block for the VPC - Type: String - AllowedPattern: >- - ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ + Description: >- + CIDR block for the VPC. + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28. + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ Default: 10.0.0.0/16 - PublicSubnet1CIDR: - Description: The CIDR block for the public Subnet in the first Availability Zone Type: String - AllowedPattern: >- - ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ + PublicSubnet1CIDR: + Description: >- + CIDR block for the public subnet 1 located in Availability Zone 1. + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28. + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ Default: 10.0.0.0/24 - PublicSubnet2CIDR: - Description: The CIDR block for the public Subnet in the second Availability Zone Type: String - AllowedPattern: >- - ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ + PublicSubnet2CIDR: + Description: >- + CIDR block for the public subnet 1 located in Availability Zone 1. + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28. + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ Default: 10.0.1.0/24 - PrivateSubnet1ACIDR: - Description: The CIDR block for the private Subnet in the first Availability Zone Type: String - AllowedPattern: >- - ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ + PrivateSubnet1ACIDR: + Description: >- + CIDR block for private subnet 1 located in Availability Zone 1. + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28. + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ Default: 10.0.10.0/24 - PrivateSubnet2ACIDR: - Description: The CIDR block for the private Subnet in the second Availability Zone Type: String - AllowedPattern: >- - ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ - Default: 10.0.11.0/24 - AllowedWebBlock: + PrivateSubnet2ACIDR: Description: >- - The IPv4 CIDR block to allow HTTP access in the webserver. The default of - 0.0.0.0/0 allows HTTP from everywhere, which is convenient but less - secure. + CIDR block for private subnet 2 located in Availability Zone 2. + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28. + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ + Default: 10.0.11.0/24 Type: String - AllowedPattern: >- - ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ - Default: 0.0.0.0/0 - WebserverPort: + AllowHTTPAccessCIDR: Description: >- - The port Airflow webserver will be listening. - Type: Number - Default: 8080 - MinValue: 1024 - MaxValue: 65535 - ConstraintDescription: >- - Ports below 1024 can be opened only with root privileges and the airflow - process does not run as such. + The IPv4 CIDR block to allow HTTP access in the webserver. Using the same + CIDR for the VPC is a secure default. Using 0.0.0.0/0 allows access from + everywhere, which is convenient but less secure. + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28. + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ + Default: 10.0.0.0/16 + Type: String - # Cluster Settings SchedulerInstanceType: - Description: EC2 instance type to use for the scheduler. - Type: String + Description: >- + EC2 instance type to use for the scheduler. Default: t3.micro - WebserverInstanceType: - Description: EC2 instance type to use for the webserver. Type: String + WebserverInstanceType: + Description: >- + EC2 instance type to use for the webserver. Default: t3.micro - WorkerInstanceType: - Description: EC2 instance type to use for the workers. Type: String + WorkerInstanceType: + Description: >- + EC2 instance type to use for the workers. Default: t3.medium + Type: String MinGroupSize: - Description: The minimum number of active worker instances. + Description: >- + The minimum number of active worker instances. + Default: 0 Type: Number - Default: 1 MaxGroupSize: - Description: The maximum number of active worker instances. - Type: Number + Description: >- + The maximum number of active worker instances. Default: 10 + Type: Number ShrinkThreshold: Description: >- The threshold for the average queue size from which going equal or below will trigger the AutoScaling group to Scale In, deallocating one worker instance. - Type: Number Default: 0.5 + Type: Number GrowthThreshold: Description: >- The threshold for the average queue size from which going equal or above will trigger the AutoScaling group to Scale Out, allocating one worker instance. - Type: Number Default: 0.9 - DbMasterUsername: - Description: The username to be used in the airflow database. - Type: String - Default: airflow - DbMasterPassword: - Description: The password to be used in the airflow database. - Type: String - NoEcho: true + Type: Number - # Airflow Config LoadExampleDags: Description: >- Load the example DAGs distributed with Airflow. Useful if deploying a stack for demonstrating a few topologies, operators and scheduling strategies. - Type: String AllowedValues: - 'False' - 'True' Default: 'False' - LoadDefaultConn: + Type: String + LoadDefaultCons: Description: >- Load the default connections initialized by Airflow. Most consider these unnecessary, which is why the default is to not load them. - Type: String AllowedValues: - 'False' - 'True' Default: 'False' + Type: String + WebServerPort: + Description: >- + The port Airflow webserver will be listening. + ConstraintDescription: >- + Ports below 1024 can be opened only with root privileges and the airflow + process does not run as such. + MinValue: 1024 + MaxValue: 65535 + Default: 8080 + Type: Number - # Quick Start Overrides QSS3BucketName: Description: >- S3 bucket name for the Quick Start assets. You can specify your own bucket providing assets and submodules, if you want to override the Quick Start behavior for your specific implementation. - Type: String - Default: turbine-quickstart - AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$' ConstraintDescription: >- Quick Start bucket name can include numbers, lowercase letters, uppercase letters, and hyphens (-). It cannot start or end with a hyphen (-). + AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$' + Default: turbine-quickstart + Type: String QSS3KeyPrefix: Description: >- S3 key prefix for the Quick Start assets. You can scpeficy your own "directory" providing the stack templates, if you want to override the Quick Start behavior for your specific implementation. - Type: String - Default: quickstart-turbine-airflow/ - AllowedPattern: '^[0-9a-zA-Z-/]*$' ConstraintDescription: >- Quick Start key prefix can include numbers, lowercase letters, uppercase letters, hyphens (-), and forward slash (/). + AllowedPattern: '^[0-9a-zA-Z-/]*$' + Default: quickstart-turbine-airflow/ + Type: String Resources: @@ -185,8 +264,8 @@ Resources: PublicSubnet2ID: !GetAtt VPCStack.Outputs.PublicSubnet2ID PrivateSubnet1AID: !GetAtt VPCStack.Outputs.PrivateSubnet1AID PrivateSubnet2AID: !GetAtt VPCStack.Outputs.PrivateSubnet2AID - AllowedWebBlock: !Ref AllowedWebBlock - WebserverPort: !Ref WebserverPort + AllowHTTPAccessCIDR: !Ref AllowHTTPAccessCIDR + WebServerPort: !Ref WebServerPort SchedulerInstanceType: !Ref SchedulerInstanceType WebserverInstanceType: !Ref WebserverInstanceType WorkerInstanceType: !Ref WorkerInstanceType @@ -194,10 +273,8 @@ Resources: MaxGroupSize: !Ref MaxGroupSize GrowthThreshold: !Ref GrowthThreshold ShrinkThreshold: !Ref ShrinkThreshold - DbMasterUsername: !Ref DbMasterUsername - DbMasterPassword: !Ref DbMasterPassword LoadExampleDags: !Ref LoadExampleDags - LoadDefaultConn: !Ref LoadDefaultConn + LoadDefaultCons: !Ref LoadDefaultCons QSS3BucketName: !Ref QSS3BucketName QSS3KeyPrefix: !Ref QSS3KeyPrefix @@ -208,39 +285,3 @@ Outputs: Value: !GetAtt TurbineCluster.Outputs.CodeDeployApplication CodeDeployDeploymentGroup: Value: !GetAtt TurbineCluster.Outputs.CodeDeployDeploymentGroup - -Metadata: - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: Networking - Parameters: - - VPCCIDR - - PublicSubnet1CIDR - - PublicSubnet2CIDR - - PrivateSubnet1ACIDR - - PrivateSubnet2ACIDR - - AllowedWebBlock - - WebserverPort - - Label: - default: Cluster Settings - Parameters: - - SchedulerInstanceType - - WebserverInstanceType - - WorkerInstanceType - - MinGroupSize - - MaxGroupSize - - ShrinkThreshold - - GrowthThreshold - - DbMasterUsername - - DbMasterPassword - - Label: - default: Airflow Config - Parameters: - - LoadExampleDags - - LoadDefaultConn - - Label: - default: Quick Start Overrides - Parameters: - - QSS3BucketName - - QSS3KeyPrefix diff --git a/templates/turbine-scheduler.template b/templates/turbine-scheduler.template index ee9406cb..9f3afbb1 100644 --- a/templates/turbine-scheduler.template +++ b/templates/turbine-scheduler.template @@ -1,55 +1,139 @@ AWSTemplateFormatVersion: 2010-09-09 +Description: >- + This template creates the Airflow scheduler instance in a highly available + auto scaling group spanning two private subnets. **WARNING** This template + creates AWS resources. You will be billed for the AWS resources used if you + create a stack from this template. QS(0027) +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: VPC network configuration + Parameters: + - PrivateSubnet1AID + - PrivateSubnet2AID + - SecurityGroupID + - Label: + default: Turbine cluster configuration + Parameters: + - DatabaseSecret + - QueueName + - LogsBucket + - Label: + default: Turbine scheduler configuration + Parameters: + - InstanceType + - Label: + default: Apache Airflow configuration + Parameters: + - LoadExampleDags + - LoadDefaultCons + - Label: + default: AWS Quick Start configuration + Parameters: + - QSS3BucketName + - QSS3KeyPrefix + ParameterLabels: + PrivateSubnet1AID: + default: Private subnet 1 ID + PrivateSubnet2AID: + default: Private subnet 2 ID + SecurityGroupID: + default: Security group ID + DatabaseSecret: + default: Database secret + QueueName: + default: Queue name + LogsBucket: + default: Logs bucket + InstanceType: + default: Scheduler instance type + LoadExampleDags: + default: Load example DAGs + LoadDefaultCons: + default: Load default connections + QSS3BucketName: + default: Quick Start S3 bucket name + QSS3KeyPrefix: + default: Quick Start S3 key prefix Parameters: - VPCID: - Type: AWS::EC2::VPC::Id + PrivateSubnet1AID: + Description: An existing private Subnet in some Availability Zone. Type: AWS::EC2::Subnet::Id PrivateSubnet2AID: + Description: An existing private Subnet in another Availability Zone. Type: AWS::EC2::Subnet::Id - InstancesSecurityGroup: + SecurityGroupID: + Description: >- + Security Group ID of an externally managed security group that gives + instances access to relevant external resources like the metadata database + endpoints in the two provided subnets. Type: AWS::EC2::SecurityGroup::Id - IamInstanceProfile: + + DatabaseSecret: + Description: >- + The AWS SecretsManager Secret resource name (ARN) of the secure secret + storing the metadata database connection credentials. Type: String - IamRole: + QueueName: + Description: >- + Name of the queue to be used as message broker between the scheduler and + worker instances. Type: String - ImageId: - Type: AWS::EC2::Image::Id - InstanceType: + LogsBucket: + Description: >- + Name of the bucket where task logs are remotely stored. Type: String - SharedCloudInitStack: + + InstanceType: + Description: EC2 instance type to use for the scheduler. Type: String -Resources: + LoadExampleDags: + Description: >- + Load the example DAGs distributed with Airflow. Useful if deploying a + stack for demonstrating a few topologies, operators and scheduling + strategies. + AllowedValues: + - 'False' + - 'True' + Default: 'False' + Type: String + LoadDefaultCons: + Description: >- + Load the default connections initialized by Airflow. Most consider these + unnecessary, which is why the default is to not load them. + AllowedValues: + - 'False' + - 'True' + Default: 'False' + Type: String - LaunchConfiguration: - Type: AWS::AutoScaling::LaunchConfiguration - Properties: - IamInstanceProfile: !Ref IamInstanceProfile - ImageId: !Ref ImageId - InstanceType: !Ref InstanceType - SecurityGroups: - - !Ref InstancesSecurityGroup - - !Ref SchedulerSecurityGroup - UserData: - Fn::Base64: !Sub | - #!/bin/bash -xe - echo 'TURBINE_MACHINE=SCHEDULER' > /etc/environment - /opt/aws/bin/cfn-init -v \ - --region ${AWS::Region} \ - --role ${IamRole} \ - --stack ${SharedCloudInitStack} \ - --resource SharedCloudInitMetadata + QSS3BucketName: + Description: >- + S3 bucket name for the Quick Start assets. You can specify your own bucket + providing assets and submodules, if you want to override the Quick Start + behavior for your specific implementation. + ConstraintDescription: >- + Quick Start bucket name can include numbers, lowercase letters, uppercase + letters, and hyphens (-). It cannot start or end with a hyphen (-). + AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$' + Default: turbine-quickstart + Type: String + QSS3KeyPrefix: + Description: >- + S3 key prefix for the Quick Start assets. You can scpeficy your own + "directory" providing the stack templates, if you want to override the + Quick Start behavior for your specific implementation. + ConstraintDescription: >- + Quick Start key prefix can include numbers, lowercase letters, uppercase + letters, hyphens (-), and forward slash (/). + AllowedPattern: '^[0-9a-zA-Z-/]*$' + Default: quickstart-turbine-airflow/ + Type: String - SchedulerSecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: >- - The security group used by the Airflow scheduler instance. Not initially - useful but included to facilitate narrowing custom rules. - VpcId: !Ref VPCID - Tags: - - Key: Name - Value: turbine-scheduler-sg +Resources: AutoScalingGroup: Type: AWS::AutoScaling::AutoScalingGroup @@ -67,6 +151,146 @@ Resources: Value: turbine-scheduler PropagateAtLaunch: true + LaunchConfiguration: + Type: AWS::AutoScaling::LaunchConfiguration + Properties: + IamInstanceProfile: !Ref IamInstanceProfile + ImageId: !FindInMap + - AWSAMIRegionMap + - !Ref AWS::Region + - AMZNLINUX2 + InstanceType: !Ref InstanceType + SecurityGroups: + - !Ref SecurityGroupID + UserData: + Fn::Base64: !Sub | + #!/bin/bash -xe + /opt/aws/bin/cfn-init -v \ + --stack ${AWS::StackName} \ + --resource LaunchConfiguration + /opt/aws/bin/cfn-signal -e $? + --stack ${AWS::StackName} \ + --resource LaunchConfiguration + Metadata: + AWS::CloudFormation::Init: + config: + commands: + setup: + command: !Sub | + export AWS_STACK_NAME="${AWS::StackName}" + export LOGS_BUCKET="${LogsBucket}" + export QUEUE_NAME="${QueueName}" + export DB_SECRETS_ARN="${DatabaseSecret}" + export LOAD_EXAMPLES="${LoadExampleDags}" + export LOAD_DEFAULTS="${LoadDefaultCons}" + aws s3 sync s3://${QSS3BucketName}/${QSS3KeyPrefix}scripts /opt/turbine + chmod +x /opt/turbine/scheduler.setup.sh + /opt/turbine/scheduler.setup.sh + + IamInstanceProfile: + Type: AWS::IAM::InstanceProfile + Properties: + Roles: + - !Ref IamRole + + IamRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - ec2.amazonaws.com + Action: + - sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM + Policies: + - PolicyName: !Sub TurbineAirflowSchedulerDescribeStackPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - cloudformation:DescribeStackResource + Resource: !Join + - ':' + - - arn:aws:cloudformation + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub stack/${AWS::StackName}/* + - PolicyName: !Sub TurbineAirflowSchedulerGetSecretPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - secretsmanager:GetSecretValue + Resource: !Ref DatabaseSecret + - PolicyName: !Sub TurbineAirflowSchedulerQueueRWPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - sqs:ListQueues + Resource: + - !Sub arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:* + - Effect: Allow + Action: + - sqs:ChangeMessageVisibility + - sqs:DeleteMessage + - sqs:GetQueueAttributes + - sqs:GetQueueUrl + - sqs:ReceiveMessage + - sqs:SendMessage + Resource: !Sub arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:${QueueName} + - PolicyName: !Sub TurbineAirflowSchedulerLogsRWPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - s3:GetObject + - s3:PutObject + - s3:DeleteObject + Resource: !Sub arn:aws:s3:::${LogsBucket}/* + Outputs: AutoScalingGroup: Value: !Ref AutoScalingGroup + +Mappings: + AWSAMIRegionMap: + ap-northeast-1: + AMZNLINUX2: ami-052652af12b58691f + ap-northeast-2: + AMZNLINUX2: ami-0db78afd3d150fc18 + ap-south-1: + AMZNLINUX2: ami-03b5297d565ef30a6 + ap-southeast-1: + AMZNLINUX2: ami-0cbc6aae997c6538a + ap-southeast-2: + AMZNLINUX2: ami-08fdde86b93accf1c + ca-central-1: + AMZNLINUX2: ami-0bf54ac1b628cf143 + eu-central-1: + AMZNLINUX2: ami-0ec1ba09723e5bfac + eu-west-1: + AMZNLINUX2: ami-04d5cc9b88f9d1d39 + eu-west-2: + AMZNLINUX2: ami-0cb790308f7591fa6 + eu-west-3: + AMZNLINUX2: ami-07eda9385feb1e969 + sa-east-1: + AMZNLINUX2: ami-0b032e878a66c3b68 + us-east-1: + AMZNLINUX2: ami-0fc61db8544a617ed + us-east-2: + AMZNLINUX2: ami-0e01ce4ee18447327 + us-west-1: + AMZNLINUX2: ami-09a7fe78668f1e2c0 + us-west-2: + AMZNLINUX2: ami-0ce21b51cb31a48b8 diff --git a/templates/turbine-webserver.template b/templates/turbine-webserver.template index 75687f8b..a8a0bce8 100644 --- a/templates/turbine-webserver.template +++ b/templates/turbine-webserver.template @@ -1,52 +1,210 @@ AWSTemplateFormatVersion: 2010-09-09 +Description: >- + This template creates the Airflow web server instance in a highly available + auto scaling group spanning two public subnets. **WARNING** This template + creates AWS resources. You will be billed for the AWS resources used if you + create a stack from this template. QS(0027) +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: VPC network configuration + Parameters: + - VPCID + - PublicSubnet1ID + - PublicSubnet2ID + - SecurityGroupID + - AllowHTTPAccessCIDR + - Label: + default: Turbine Cluster configuration + Parameters: + - DatabaseSecret + - QueueName + - LogsBucket + - Label: + default: Turbine webserver configuration + Parameters: + - InstanceType + - Label: + default: Apache Airflow configuration + Parameters: + - LoadExampleDags + - LoadDefaultCons + - WebServerPort + - Label: + default: AWS Quick Start configuration + Parameters: + - QSS3BucketName + - QSS3KeyPrefix + ParameterLabels: + VPCID: + default: VPC ID + PublicSubnet1ID: + default: Public subnet 1 ID + PublicSubnet2ID: + default: Public subnet 2 ID + SecurityGroupID: + default: Security group ID + AllowHTTPAccessCIDR: + default: Allowed HTTP access CIDR + DatabaseSecret: + default: Database secret + QueueName: + default: Queue name + LogsBucket: + default: Logs bucket + InstanceType: + default: Workers instance type + LoadExampleDags: + default: Load example DAGs + LoadDefaultCons: + default: Load default connections + WebServerPort: + default: Web server port + QSS3BucketName: + default: Quick Start S3 bucket name + QSS3KeyPrefix: + default: Quick Start S3 key prefix Parameters: + VPCID: + Description: An existing VPC for the cluster. Type: AWS::EC2::VPC::Id PublicSubnet1ID: + Description: An existing public Subnet in some Availability Zone. Type: AWS::EC2::Subnet::Id PublicSubnet2ID: + Description: An existing public Subnet in another Availability Zone. Type: AWS::EC2::Subnet::Id - InstancesSecurityGroup: + SecurityGroupID: + Description: >- + Security Group ID of an externally managed security group that gives + instances access to relevant external resources like the metadata database + endpoints in the two provided subnets. Type: AWS::EC2::SecurityGroup::Id - IngressCIDR: + AllowHTTPAccessCIDR: + Description: >- + The IPv4 CIDR block to allow HTTP access in the webserver. Using the same + CIDR for the VPC is a secure default. Using 0.0.0.0/0 allows access from + everywhere, which is convenient but less secure. + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-28. + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ + Default: 10.0.0.0/16 Type: String - AllowedPattern: >- - ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$ - IngressPort: - Type: Number - IamInstanceProfile: + + DatabaseSecret: + Description: >- + The AWS SecretsManager Secret resource name (ARN) of the secure secret + storing the metadata database connection credentials. Type: String - IamRole: + QueueName: + Description: >- + Name of the queue to be used as message broker between the scheduler and + worker instances. Type: String - ImageId: - Type: AWS::EC2::Image::Id + LogsBucket: + Description: >- + Name of the bucket where task logs are remotely stored. + Type: String + InstanceType: + Description: >- + EC2 instance type to use for the scheduler. Type: String - SharedCloudInitStack: + + LoadExampleDags: + Description: >- + Load the example DAGs distributed with Airflow. Useful if deploying a + stack for demonstrating a few topologies, operators and scheduling + strategies. + AllowedValues: + - 'False' + - 'True' + Default: 'False' Type: String + LoadDefaultCons: + Description: >- + Load the default connections initialized by Airflow. Most consider these + unnecessary, which is why the default is to not load them. + AllowedValues: + - 'False' + - 'True' + Default: 'False' + Type: String + WebServerPort: + Description: >- + The port Airflow webserver will be listening. + ConstraintDescription: >- + Ports below 1024 can be opened only with root privileges and the airflow + process does not run as such. + MinValue: 1024 + MaxValue: 65535 + Default: 8080 + Type: Number + QSS3BucketName: + Description: >- + S3 bucket name for the Quick Start assets. You can specify your own bucket + providing assets and submodules, if you want to override the Quick Start + behavior for your specific implementation. + ConstraintDescription: >- + Quick Start bucket name can include numbers, lowercase letters, uppercase + letters, and hyphens (-). It cannot start or end with a hyphen (-). + AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$' + Default: turbine-quickstart + Type: String + QSS3KeyPrefix: + Description: >- + S3 key prefix for the Quick Start assets. You can scpeficy your own + "directory" providing the stack templates, if you want to override the + Quick Start behavior for your specific implementation. + ConstraintDescription: >- + Quick Start key prefix can include numbers, lowercase letters, uppercase + letters, hyphens (-), and forward slash (/). + AllowedPattern: '^[0-9a-zA-Z-/]*$' + Default: quickstart-turbine-airflow/ + Type: String Resources: LaunchConfiguration: Type: AWS::AutoScaling::LaunchConfiguration Properties: IamInstanceProfile: !Ref IamInstanceProfile - ImageId: !Ref ImageId + ImageId: !FindInMap + - AWSAMIRegionMap + - !Ref AWS::Region + - AMZNLINUX2 InstanceType: !Ref InstanceType SecurityGroups: - - !Ref InstancesSecurityGroup - - !Ref WebserverSecurityGroup + - !Ref SecurityGroup + - !Ref SecurityGroupID UserData: Fn::Base64: !Sub | #!/bin/bash -xe - echo 'TURBINE_MACHINE=WEBSERVER' > /etc/environment /opt/aws/bin/cfn-init -v \ - --region ${AWS::Region} \ - --role ${IamRole} \ - --stack ${SharedCloudInitStack} \ - --resource SharedCloudInitMetadata + --stack ${AWS::StackName} \ + --resource LaunchConfiguration + /opt/aws/bin/cfn-signal -e $? + --stack ${AWS::StackName} \ + --resource LaunchConfiguration + Metadata: + AWS::CloudFormation::Init: + config: + commands: + setup: + command: !Sub | + export AWS_STACK_NAME="${AWS::StackName}" + export LOGS_BUCKET="${LogsBucket}" + export QUEUE_NAME="${QueueName}" + export DB_SECRETS_ARN="${DatabaseSecret}" + export LOAD_EXAMPLES="${LoadExampleDags}" + export LOAD_DEFAULTS="${LoadDefaultCons}" + export WEB_SERVER_PORT="${WebServerPort}" + aws s3 sync s3://${QSS3BucketName}/${QSS3KeyPrefix}scripts /opt/turbine + chmod +x /opt/turbine/webserver.setup.sh + /opt/turbine/webserver.setup.sh - WebserverSecurityGroup: + SecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: >- @@ -54,10 +212,10 @@ Resources: the specified port for web access in from a given ip range. VpcId: !Ref VPCID SecurityGroupIngress: - - CidrIp: !Ref IngressCIDR + - CidrIp: !Ref AllowHTTPAccessCIDR IpProtocol: TCP - FromPort: !Ref IngressPort - ToPort: !Ref IngressPort + FromPort: !Ref WebServerPort + ToPort: !Ref WebServerPort Tags: - Key: Name Value: turbine-webserver-sg @@ -78,6 +236,96 @@ Resources: Value: turbine-webserver PropagateAtLaunch: true + IamInstanceProfile: + Type: AWS::IAM::InstanceProfile + Properties: + Roles: + - !Ref IamRole + + IamRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - ec2.amazonaws.com + Action: + - sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM + Policies: + - PolicyName: !Sub TurbineAirflowWebserverDescribeStackPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - cloudformation:DescribeStackResource + Resource: !Join + - ':' + - - arn:aws:cloudformation + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub stack/${AWS::StackName}/* + - PolicyName: !Sub TurbineAirflowWebserverGetSecretPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - secretsmanager:GetSecretValue + Resource: !Ref DatabaseSecret + - PolicyName: !Sub TurbineAirflowWebserverLogsRWPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - s3:GetObject + - s3:PutObject + - s3:DeleteObject + Resource: !Sub arn:aws:s3:::${LogsBucket}/* + Outputs: AutoScalingGroup: Value: !Ref AutoScalingGroup + IamRole: + Value: !Ref IamRole + SecurityGroup: + Value: !Ref SecurityGroup + +Mappings: + AWSAMIRegionMap: + ap-northeast-1: + AMZNLINUX2: ami-052652af12b58691f + ap-northeast-2: + AMZNLINUX2: ami-0db78afd3d150fc18 + ap-south-1: + AMZNLINUX2: ami-03b5297d565ef30a6 + ap-southeast-1: + AMZNLINUX2: ami-0cbc6aae997c6538a + ap-southeast-2: + AMZNLINUX2: ami-08fdde86b93accf1c + ca-central-1: + AMZNLINUX2: ami-0bf54ac1b628cf143 + eu-central-1: + AMZNLINUX2: ami-0ec1ba09723e5bfac + eu-west-1: + AMZNLINUX2: ami-04d5cc9b88f9d1d39 + eu-west-2: + AMZNLINUX2: ami-0cb790308f7591fa6 + eu-west-3: + AMZNLINUX2: ami-07eda9385feb1e969 + sa-east-1: + AMZNLINUX2: ami-0b032e878a66c3b68 + us-east-1: + AMZNLINUX2: ami-0fc61db8544a617ed + us-east-2: + AMZNLINUX2: ami-0e01ce4ee18447327 + us-west-1: + AMZNLINUX2: ami-09a7fe78668f1e2c0 + us-west-2: + AMZNLINUX2: ami-0ce21b51cb31a48b8 diff --git a/templates/turbine-workerset.template b/templates/turbine-workerset.template index 8958a811..d2fb6d54 100644 --- a/templates/turbine-workerset.template +++ b/templates/turbine-workerset.template @@ -1,57 +1,180 @@ AWSTemplateFormatVersion: 2010-09-09 +Description: >- + This template creates the Airflow worker instances in a highly available auto + scaling group spanning two private subnets, plus an EFS to work as shared + network directory and a custom cloudwatch load metric function to guide the + auto scaling alarm triggers. **WARNING** This template creates AWS resources. + You will be billed for the AWS resources used if you create a stack from this + template. QS(0027) +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: VPC network configuration + Parameters: + - VPCID + - PrivateSubnet1AID + - PrivateSubnet2AID + - SecurityGroupID + - Label: + default: Turbine cluster configuration + Parameters: + - DatabaseSecret + - QueueName + - LogsBucket + - Label: + default: Turbine workerset configuration + Parameters: + - InstanceType + - MinGroupSize + - MaxGroupSize + - ShrinkThreshold + - GrowthThreshold + - Label: + default: Apache Airflow configuration + Parameters: + - LoadExampleDags + - LoadDefaultCons + - Label: + default: AWS Quick Start configuration + Parameters: + - QSS3BucketName + - QSS3KeyPrefix + ParameterLabels: + VPCID: + default: VPC ID + PrivateSubnet1AID: + default: Private subnet 1 ID + PrivateSubnet2AID: + default: Private subnet 2 ID + SecurityGroupID: + default: Security group ID + DatabaseSecret: + default: Database secret + QueueName: + default: Queue name + LogsBucket: + default: Logs bucket + InstanceType: + default: Scheduler instance type + MinGroupSize: + default: Minimum group size + MaxGroupSize: + default: Maximum group size + ShrinkThreshold: + default: Shrink threshold + GrowthThreshold: + default: Growth threshold + LoadExampleDags: + default: Load example DAGs + LoadDefaultCons: + default: Load default connections + QSS3BucketName: + default: Quick Start S3 bucket name + QSS3KeyPrefix: + default: Quick Start S3 key prefix Parameters: + VPCID: + Description: An existing VPC for the cluster. Type: AWS::EC2::VPC::Id PrivateSubnet1AID: + Description: An existing private Subnet in some Availability Zone. Type: AWS::EC2::Subnet::Id PrivateSubnet2AID: + Description: An existing private Subnet in another Availability Zone. Type: AWS::EC2::Subnet::Id - InstancesSecurityGroup: + SecurityGroupID: + Description: >- + Security Group ID of an externally managed security group that gives + instances access to relevant external resources like the metadata database + endpoints in the two provided subnets. Type: AWS::EC2::SecurityGroup::Id - IamInstanceProfile: + + DatabaseSecret: + Description: >- + The AWS SecretsManager Secret resource name (ARN) of the secure secret + storing the metadata database connection credentials. Type: String - IamRole: + QueueName: + Description: >- + Name of the queue to be used as message broker between the scheduler and + worker instances. Type: String - ImageId: - Type: AWS::EC2::Image::Id + LogsBucket: + Description: >- + Name of the bucket where task logs are remotely stored. + Type: String + InstanceType: + Description: >- + EC2 instance type to use for the scheduler. Type: String - MinSize: + MinGroupSize: + Description: The minimum number of active worker instances. + Default: 0 Type: Number - MaxSize: + MaxGroupSize: + Description: The maximum number of active worker instances. + Default: 10 Type: Number ShrinkThreshold: + Description: >- + The threshold for the average queue size from which going equal or below + will trigger the AutoScaling group to Scale In, deallocating one worker + instance. + Default: 0.5 Type: Number GrowthThreshold: + Description: >- + The threshold for the average queue size from which going equal or above + will trigger the AutoScaling group to Scale Out, allocating one worker + instance. + Default: 0.9 Type: Number - QueueName: + + LoadExampleDags: + Description: >- + Load the example DAGs distributed with Airflow. Useful if deploying a + stack for demonstrating a few topologies, operators and scheduling + strategies. + AllowedValues: + - 'False' + - 'True' + Default: 'False' Type: String - SharedCloudInitStack: + LoadDefaultCons: + Description: >- + Load the default connections initialized by Airflow. Most consider these + unnecessary, which is why the default is to not load them. + AllowedValues: + - 'False' + - 'True' + Default: 'False' Type: String - # Quick Start Overrides QSS3BucketName: Description: >- S3 bucket name for the Quick Start assets. You can specify your own bucket providing assets and submodules, if you want to override the Quick Start behavior for your specific implementation. - Type: String - Default: turbine-quickstart - AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$' ConstraintDescription: >- Quick Start bucket name can include numbers, lowercase letters, uppercase letters, and hyphens (-). It cannot start or end with a hyphen (-). + AllowedPattern: '^[0-9a-zA-Z]+([0-9a-zA-Z-]*[0-9a-zA-Z])*$' + Default: turbine-quickstart + Type: String QSS3KeyPrefix: Description: >- S3 key prefix for the Quick Start assets. You can scpeficy your own "directory" providing the stack templates, if you want to override the Quick Start behavior for your specific implementation. - Type: String - Default: quickstart-turbine-airflow/ - AllowedPattern: '^[0-9a-zA-Z-/]*$' ConstraintDescription: >- Quick Start key prefix can include numbers, lowercase letters, uppercase letters, hyphens (-), and forward slash (/). + AllowedPattern: '^[0-9a-zA-Z-/]*$' + Default: quickstart-turbine-airflow/ + Type: String Resources: @@ -59,22 +182,41 @@ Resources: Type: AWS::AutoScaling::LaunchConfiguration Properties: IamInstanceProfile: !Ref IamInstanceProfile - ImageId: !Ref ImageId + ImageId: !FindInMap + - AWSAMIRegionMap + - !Ref AWS::Region + - AMZNLINUX2 InstanceType: !Ref InstanceType SecurityGroups: - - !Ref InstancesSecurityGroup - - !Ref WorkersetSecurityGroup + - !Ref SecurityGroup + - !Ref SecurityGroupID UserData: Fn::Base64: !Sub | #!/bin/bash -xe - echo 'TURBINE_MACHINE=WORKER' > /etc/environment /opt/aws/bin/cfn-init -v \ - --region ${AWS::Region} \ - --role ${IamRole} \ - --stack ${SharedCloudInitStack} \ - --resource SharedCloudInitMetadata + --stack ${AWS::StackName} \ + --resource LaunchConfiguration + /opt/aws/bin/cfn-signal -e $? + --stack ${AWS::StackName} \ + --resource LaunchConfiguration + Metadata: + AWS::CloudFormation::Init: + config: + commands: + setup: + command: !Sub | + export AWS_STACK_NAME="${AWS::StackName}" + export QUEUE_NAME="${QueueName}" + export LOGS_BUCKET="${LogsBucket}" + export FILE_SYSTEM_ID="${FileSystem}" + export DB_SECRETS_ARN="${DatabaseSecret}" + export LOAD_EXAMPLES="${LoadExampleDags}" + export LOAD_DEFAULTS="${LoadDefaultCons}" + aws s3 sync s3://${QSS3BucketName}/${QSS3KeyPrefix}scripts /opt/turbine + chmod +x /opt/turbine/workerset.setup.sh + /opt/turbine/workerset.setup.sh - WorkersetSecurityGroup: + SecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: >- @@ -82,7 +224,7 @@ Resources: a special port where the Airflow webserver can fetch logs directly. VpcId: !Ref VPCID SecurityGroupIngress: - - SourceSecurityGroupId: !Ref InstancesSecurityGroup + - SourceSecurityGroupId: !Ref SecurityGroupID IpProtocol: TCP FromPort: 8793 ToPort: 8793 @@ -94,8 +236,8 @@ Resources: Type: AWS::AutoScaling::AutoScalingGroup Properties: LaunchConfigurationName: !Ref LaunchConfiguration - MaxSize: !Ref MaxSize - MinSize: !Ref MinSize + MaxSize: !Ref MaxGroupSize + MinSize: !Ref MinGroupSize MetricsCollection: - Granularity: 1Minute VPCZoneIdentifier: @@ -106,6 +248,116 @@ Resources: Value: turbine-worker PropagateAtLaunch: true + IamInstanceProfile: + Type: AWS::IAM::InstanceProfile + Properties: + Roles: + - !Ref IamRole + + IamRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - ec2.amazonaws.com + Action: + - sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM + Policies: + - PolicyName: !Sub TurbineAirflowWorkersetDescribeStackPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - cloudformation:DescribeStackResource + Resource: !Join + - ':' + - - arn:aws:cloudformation + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub stack/${AWS::StackName}/* + - PolicyName: !Sub TurbineAirflowWorkersetGetSecretPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - secretsmanager:GetSecretValue + Resource: !Ref DatabaseSecret + - PolicyName: !Sub TurbineAirflowWorkersetQueueRWPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - sqs:ListQueues + Resource: + - !Sub arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:* + - Effect: Allow + Action: + - sqs:ChangeMessageVisibility + - sqs:DeleteMessage + - sqs:GetQueueAttributes + - sqs:GetQueueUrl + - sqs:ReceiveMessage + - sqs:SendMessage + Resource: !Sub arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:${QueueName} + - PolicyName: !Sub TurbineAirflowWorkersetLogsRWPolicy-${AWS::StackName} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - s3:GetObject + - s3:PutObject + - s3:DeleteObject + Resource: !Sub arn:aws:s3:::${LogsBucket}/* + + FileSystem: + Type: AWS::EFS::FileSystem + Properties: + FileSystemTags: + - Key: Name + Value: !Sub ${AWS::StackName}-filesystem + + MountTarget1A: + Type: AWS::EFS::MountTarget + Properties: + FileSystemId: !Ref FileSystem + SubnetId: !Ref PrivateSubnet1AID + SecurityGroups: + - !Ref EfsMountSecurityGroup + + MountTarget2A: + Type: AWS::EFS::MountTarget + Properties: + FileSystemId: !Ref FileSystem + SubnetId: !Ref PrivateSubnet2AID + SecurityGroups: + - !Ref EfsMountSecurityGroup + + EfsMountSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: >- + Security Rules with permissions for the shared filesystem across Airflow + instances. + SecurityGroupIngress: + - SourceSecurityGroupId: !Ref SecurityGroup + IpProtocol: TCP + FromPort: 2049 + ToPort: 2049 + VpcId: !Ref VPCID + Tags: + - Key: Name + Value: EfsMountSecurityGroup + CloudWatchMetricLambda: Type: AWS::Lambda::Function Properties: @@ -223,5 +475,41 @@ Resources: Outputs: AutoScalingGroup: - Description: The workers autoscaling group Value: !Ref AutoScalingGroup + IamRole: + Value: !Ref IamRole + SecurityGroup: + Value: !Ref SecurityGroup + +Mappings: + AWSAMIRegionMap: + ap-northeast-1: + AMZNLINUX2: ami-052652af12b58691f + ap-northeast-2: + AMZNLINUX2: ami-0db78afd3d150fc18 + ap-south-1: + AMZNLINUX2: ami-03b5297d565ef30a6 + ap-southeast-1: + AMZNLINUX2: ami-0cbc6aae997c6538a + ap-southeast-2: + AMZNLINUX2: ami-08fdde86b93accf1c + ca-central-1: + AMZNLINUX2: ami-0bf54ac1b628cf143 + eu-central-1: + AMZNLINUX2: ami-0ec1ba09723e5bfac + eu-west-1: + AMZNLINUX2: ami-04d5cc9b88f9d1d39 + eu-west-2: + AMZNLINUX2: ami-0cb790308f7591fa6 + eu-west-3: + AMZNLINUX2: ami-07eda9385feb1e969 + sa-east-1: + AMZNLINUX2: ami-0b032e878a66c3b68 + us-east-1: + AMZNLINUX2: ami-0fc61db8544a617ed + us-east-2: + AMZNLINUX2: ami-0e01ce4ee18447327 + us-west-1: + AMZNLINUX2: ami-09a7fe78668f1e2c0 + us-west-2: + AMZNLINUX2: ami-0ce21b51cb31a48b8 diff --git a/test/templates.py b/test/templates.py new file mode 100644 index 00000000..c51c7a58 --- /dev/null +++ b/test/templates.py @@ -0,0 +1,14 @@ +from cfn_tools import load_yaml + +with open("./templates/turbine-master.template") as f: + MASTER = load_yaml(f.read()) +with open("./templates/turbine-cluster.template") as f: + CLUSTER = load_yaml(f.read()) +with open("./templates/turbine-scheduler.template") as f: + SCHEDULER = load_yaml(f.read()) +with open("./templates/turbine-webserver.template") as f: + WEBSERVER = load_yaml(f.read()) +with open("./templates/turbine-workerset.template") as f: + WORKERSET = load_yaml(f.read()) + +ALL = [MASTER, CLUSTER, SCHEDULER, WEBSERVER, WORKERSET] diff --git a/test/test_quickstart_guidelines.py b/test/test_quickstart_guidelines.py new file mode 100644 index 00000000..fe2a3815 --- /dev/null +++ b/test/test_quickstart_guidelines.py @@ -0,0 +1,34 @@ +import re +from templates import ALL + + +def test_if_vpc_configuration_comes_first(): + for template in ALL: + interface = template["Metadata"]["AWS::CloudFormation::Interface"] + groups = [group["Label"]["default"] for group in interface["ParameterGroups"]] + assert "VPC" in groups[0] + + +def test_if_quickstart_configuration_comes_first(): + for template in ALL: + interface = template["Metadata"]["AWS::CloudFormation::Interface"] + groups = [group["Label"]["default"] for group in interface["ParameterGroups"]] + assert "Quick Start" in groups[-1] + + +def test_if_parameters_are_pascal_case(): + for template in ALL: + params = list(template["Parameters"].keys()) + for param in params: + assert param[0] == param[0].upper() + + +def test_if_labels_include_punctuation(): + for template in ALL: + interface = template["Metadata"]["AWS::CloudFormation::Interface"] + labels = list(interface.keys()) + for label in labels: + assert re.match(r"[a-zA-Z0-9]", label) + + +# TODO: continue implementing https://aws-quickstart.github.io/naming-parms.html diff --git a/test/test_template_descriptions.py b/test/test_template_descriptions.py new file mode 100644 index 00000000..ebf74d7f --- /dev/null +++ b/test/test_template_descriptions.py @@ -0,0 +1,14 @@ +import re +from templates import MASTER, CLUSTER + + +def strip_warning(description): + return re.sub(r"\*\*WARNING\*\*.*QS\(0027\)", "", description) + + +def test_nesting_consistency(): + master_desc = strip_warning(MASTER["Description"]) + cluster_desc = strip_warning(CLUSTER["Description"]).replace( + "This template", "The Turbine Airflow cluster stack" + ) + assert cluster_desc in master_desc diff --git a/test/test_template_interface.py b/test/test_template_interface.py new file mode 100644 index 00000000..a543022b --- /dev/null +++ b/test/test_template_interface.py @@ -0,0 +1,41 @@ +from templates import ALL + + +def test_if_all_parameters_are_grouped(): + for template in ALL: + interface = template["Metadata"]["AWS::CloudFormation::Interface"] + grouped = [ + param + for group in interface["ParameterGroups"] + for param in group["Parameters"] + ] + for param in template["Parameters"]: + assert param in grouped + + +def test_if_parameters_in_groups_are_ordered(): + for template in ALL: + interface = template["Metadata"]["AWS::CloudFormation::Interface"] + grouped = [ + param + for group in interface["ParameterGroups"] + for param in group["Parameters"] + ] + params = list(template["Parameters"].keys()) + assert grouped == params + + +def test_if_all_parameters_are_labeled(): + for template in ALL: + interface = template["Metadata"]["AWS::CloudFormation::Interface"] + labeled = list(interface["ParameterLabels"].keys()) + for param in template["Parameters"]: + assert param in labeled + + +def test_if_parameters_labels_are_ordered(): + for template in ALL: + interface = template["Metadata"]["AWS::CloudFormation::Interface"] + labeled = list(interface["ParameterLabels"].keys()) + params = list(template["Parameters"].keys()) + assert labeled == params diff --git a/test/test_template_params.py b/test/test_template_params.py new file mode 100644 index 00000000..85d0a4d9 --- /dev/null +++ b/test/test_template_params.py @@ -0,0 +1,73 @@ +import re +from cfn_tools import dump_yaml +from templates import ALL, MASTER, CLUSTER, SCHEDULER, WEBSERVER, WORKERSET + + +def test_if_important_properties_are_specified(): + for template in ALL: + for specs in template["Parameters"].values(): + assert "Description" in specs + assert "Type" in specs + if "AllowedPattern" in specs: + assert "ConstraintDescription" in specs + if "MinValue" in specs or "MaxValue" in specs: + assert "ConstraintDescription" in specs + + +def test_if_properties_are_in_order(): + def is_ordered(left, right, array): + left_index = array.index(left) if left in array else None + right_index = array.index(right) if right in array else None + if left_index is None or right_index is None: + return True + return left_index < right_index + + for template in ALL: + for spec in template["Parameters"].values(): + props = list(spec.keys()) + + assert is_ordered("Description", "ConstraintDescription", props) + assert is_ordered("ConstraintDescription", "AllowedPattern", props) + assert is_ordered("AllowedPattern", "Default", props) + assert is_ordered("Default", "Type", props) + + assert is_ordered("Description", "AllowedValues", props) + assert is_ordered("AllowedValues", "Default", props) + + assert is_ordered("ConstraintDescription", "MinValue", props) + assert is_ordered("MinValue", "MaxValue", props) + assert is_ordered("MaxValue", "Default", props) + + +def test_if_default_value_satisfies_pattern(): + for template in ALL: + for specs in template["Parameters"].values(): + if "AllowedPattern" in specs and "Default" in specs: + assert re.match(specs["AllowedPattern"], specs["Default"]) + + +def test_if_description_ends_in_dot(): + for template in ALL: + for specs in template["Parameters"].values(): + assert specs["Description"].endswith(".") + + +def test_if_constraint_description_ends_in_dot(): + for template in ALL: + for specs in template["Parameters"].values(): + if "ConstraintDescription" in specs: + assert specs["ConstraintDescription"].endswith(".") + + +def test_consistency(): + pairs = [ + (MASTER, CLUSTER), + (CLUSTER, SCHEDULER), + (CLUSTER, WEBSERVER), + (CLUSTER, WORKERSET), + ] + for (t_outer, t_inner) in pairs: + for param1, specs1 in t_outer["Parameters"].items(): + for param2, specs2 in t_inner["Parameters"].items(): + if param1 == param2: + assert (param1, dump_yaml(specs1)) == (param2, dump_yaml(specs2))