Merge pull request #154 from itzhapaz/develop/cdk-constructs

Develop/cdk constructs
aws-samples · Apr 9, 2021 · aafd09c · aafd09c
2 parents 1826068 + 182e361
commit aafd09c
Show file tree

Hide file tree

Showing 33 changed files with 1,942 additions and 1,256 deletions.
diff --git a/src/aws-genomics-cdk/README.md b/src/aws-genomics-cdk/README.md
@@ -1,14 +1,18 @@
 # Genomics Workflows on AWS - CDK code
 
-Contained herein is a CDK application for creating AWS resources for working with large-scale biomedical data - e.g. genomics.  
+Contained herein is a CDK application for creating AWS resources for working 
+with large-scale biomedical data - e.g. genomics.  
 
-In order to deploy this CDK application, you'll need an environment with AWS CLI access and AWS CDK installed. A quick 
-way yo get an environment for running this application is to launch [AWS Cloud9](https://aws.amazon.com/cloud9/).  
+In order to deploy this CDK application, you'll need an environment with AWS 
+CLI access and AWS CDK installed. A quick way to get an environment for running 
+this application is to launch [AWS Cloud9](https://aws.amazon.com/cloud9/).  
 
-AWS Cloud9 is a cloud-based integrated development environment (IDE) that lets you write, run, and debug your code 
-with just a browser. It includes a code editor, debugger, and terminal. Cloud9 comes prepackaged with essential 
-tools for popular programming languages, including JavaScript, Python, PHP, and more, so you don’t need to install 
-files or configure your development machine to start new projects.
+AWS Cloud9 is a cloud-based integrated development environment (IDE) that lets 
+you write, run, and debug your code with just a browser. It includes a code 
+editor, debugger, and terminal. Cloud9 comes prepackaged with essential 
+tools for popular programming languages, including JavaScript, Python, PHP, and 
+more, so you don’t need to install files or configure your development machine 
+to start new projects.
 
 
 ## Download
@@ -20,41 +24,70 @@ git clone https://github.com/aws-samples/aws-genomics-workflows.git
 
 ## Configure
 
-This CDK application requires an S3 bucket and a VPC. The application can create them as part of the deployment or 
-you could configure the application to use your own S3 bucket and/or existing VPC.
-
-After cloning the repo, open, update, and save the application configuration file - `app.config.json`.
-
-**accountID** - Your [AWS account id](https://docs.aws.amazon.com/IAM/latest/UserGuide/console_account-alias.html).  
-**region** - The [AWS region](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html) 
-you want to use for the deployment (e.g., us-east-1, us-west-2, etc.).
-**S3.existingBucket** - If you want to use an existing bucket, set this value to true, otherwise set it to false to 
-create a new bucket.  
+This CDK application requires an S3 bucket and a VPC. The application can 
+create them as part of the deployment or you could configure the application to 
+use your own S3 bucket and/or existing VPC.
+
+After cloning the repo, open, update, and save the application configuration 
+file - `app.config.json`.
+
+**accountID** - Your 
+[AWS account id](https://docs.aws.amazon.com/IAM/latest/UserGuide/console_account-alias.html).  
+**region** - The 
+[AWS region](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html) 
+you want to use for the deployment (e.g., us-east-1, us-west-2, etc.).  
+**projectName** - A name for the project that will be used as a prefix for the 
+CDK stacks and constrcuts.  
+**tags** - A list of key,value strings to use as tags for the AWS resources 
+created by this app.  
+**S3.existingBucket** - If you want to use an existing bucket, set this value 
+to true, otherwise set it to false to create a new bucket.  
 **S3.bucketName** - The bucket name to use or create.  
-**VPC.createVPC** - If you want to create a new VPC, set this to true, otherwise set to false.  
-**VPC.existingVPCName** - If you set the createVPC option to false, you must provide a valid VPC name to use in the 
-same region of the deployment.  
-**VPC.maxAZs** - The amount of availability zones to use when creating a new VPC.  
-**VPC.cidr** - The [CIDR block](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) for the new VPC.  
-**VPC.cidrMask** - The [CIDR block subnet mask](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#Subnet_masks) 
+**VPC.createVPC** - If you want to create a new VPC, set this to true, 
+otherwise set to false.  
+**VPC.VPCName** - The VPC name to use a create.  
+**VPC.maxAZs** - The amount of availability zones to use when creating a new 
+VPC.  
+**VPC.cidr** - The 
+[CIDR block](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) for 
+the new VPC.  
+**VPC.cidrMask** - The 
+[CIDR block subnet mask](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#Subnet_masks) 
 for the new VPC.  
-**batch.defaultVolumeSize** - The default EBS volume size in GiB to be attached to the EC2 instance under AWS Batch.  
-**batch.spotMaxVCPUs** - The limit on vcpus when using [spot instances](https://aws.amazon.com/ec2/spot/).  
+**batch.defaultVolumeSize** - The default EBS volume size in GiB to be attached 
+to the EC2 instance under AWS Batch.  
+**batch.spotMaxVCPUs** - The limit on vcpus when using 
+[spot instances](https://aws.amazon.com/ec2/spot/).  
 **batch.onDemendMaxVCPUs** - The limit on vcpus when using on-demand instances.  
-**batch.instanceTypes** - The [EC2 instance types](https://aws.amazon.com/ec2/instance-types/) to use in AWS Batch.  
-**stepFunctions.launchDemoPipeline** - If set to true, the application will deploy a demo pipeline using step fuinctions.  
-**stepFunctions.jobDefinitions** - List of parametrs for the demo application bioinformatics tools.
+**batch.instanceTypes** - The 
+[EC2 instance types](https://aws.amazon.com/ec2/instance-types/) to use in 
+AWS Batch.  
+**workflows** - A list of workflows that you would like to launch. There are 
+demo workflows under the `lib/workflows` directory. To add a workflow, update 
+the code in the `lib/aws-genomics-cdk-stack.ts` file. Look for the workflows 
+section.
+
 ```
 {
     "accountID": "111111111111",
     "region": "us-west-2",
+    "projectName": "genomics",
+    "tags": [{
+            "name": "Environment",
+            "value": "production"
+        },
+        {
+            "name": "Project",
+            "value": "genomics-pipeline"
+        }
+    ]
     "S3": {
         "existingBucket": true,
-        "bucketName": ""
+        "bucketName": "YOUR-BUCKET-NAME"
     },
     "VPC": {
         "createVPC": true,
-        "existingVPCName": "",
+        "VPCName": "genomics-vpc",
         "maxAZs": 2,
         "cidr": "10.0.0.0/16",
         "cidrMask": 24
@@ -79,44 +112,27 @@ for the new VPC.
             "c5.24xlarge"
         ]
     },
-    "stepFunctions": {
-        "launchDemoPipeline": true,
-        "jobDefinitions": {
-            "fastqc": {
-                "repository": "genomics/fastqc",
-                "memoryLimit": 8000,
-                "vcpus": 4,
-                "spot": true,
-                "retryAttempts":1,
-                "timeout": 600
-            },
-            "minimap2": {
-                "repository": "genomics/minimap2",
-                "memoryLimit": 16000,
-                "vcpus": 8,
-                "spot": true,
-                "retryAttempts":1,
-                "timeout": 3600
-            }
-        }
-    }
+    "workflows": [{
+        "name": "variantCalling",
+        "spot": true
+    }]
 }
 ```
 
 ## Deploy
 
-To deploy the CDK application, use the command line and make sure you are in the root folder of the CDK application. 
-(`src/aws-genomics-cdk`).  
+To deploy the CDK application, use the command line and make sure you are in 
+the root folder of the CDK application (`src/aws-genomics-cdk`).  
 First install the neccessary node.js modules
 ```
 npm install
 ```
 
 Then deploy the application.
 ```
-# The "--require-approval never" parameter will skip the question to approve specific resouce creation, 
-# such as IAM roles. You can remove this parameter if you want to be prompted to approve creating these 
-# resources.
+# The "--require-approval never" parameter will skip the question to approve 
+# specific resouce creation, such as IAM roles. You can remove this parameter 
+# if you want to be prompted to approve creating these resources.
 cdk deploy --all --require-approval never
 ```
 
@@ -129,7 +145,7 @@ cdk deploy --all --require-approval never
 | `lib/vpc/vpc-stack.ts` | An optional stack that will launch a VPC |
 | `lib/batch/batch-stack.ts` | An AWS Batch stack with 2 comnpute environments (spot and on demand) and 2 queues (default and high priority) |
 | `lib/batch/batch-iam-stack.ts` | An IAM stack with roles and policies required for running AWS Batch |
-| `lid/step-fuinctions/genomics-state-machine-stack.ts` | A step function demo of running a pipeline |
+| `llib/workflows` | A folder containing pipeline stacks |
 
 
 ## Constructs
@@ -139,6 +155,6 @@ cdk deploy --all --require-approval never
 | `lib/batch/batch-compute-environmnet-construct.ts` | A construct for creating an [AWS Batch compute environment](https://docs.aws.amazon.com/batch/latest/userguide/compute_environments.html) |
 | `lib/batch/job-queue-construct.ts` | A construct for creating an [AWS Batch job queue](https://docs.aws.amazon.com/batch/latest/userguide/job_queues.html) |
 | `lib/batch/launch-template-construct.ts` | A construct for creating an [EC2 launch template](https://docs.aws.amazon.com/autoscaling/ec2/userguide/LaunchTemplates.html) |
-| `lib/step-functions/genomics-task-construct.ts` | A construct for creating a step function task that submits a batch job |
-| `lib/step-functions/job-definition-construct.ts` | A construct for creating an [AWS Batch job definition](https://docs.aws.amazon.com/batch/latest/userguide/job_definitions.html) to be used as a task in step functions |
+| `lib/workflows/genomics-task-construct.ts` | A construct for creating a step function task that submits a batch job |
+| `lib/workflows/job-definition-construct.ts` | A construct for creating an [AWS Batch job definition](https://docs.aws.amazon.com/batch/latest/userguide/job_definitions.html) to be used as a task in step functions |
 
diff --git a/src/aws-genomics-cdk/app.config.json b/src/aws-genomics-cdk/app.config.json
@@ -1,13 +1,23 @@
 {
     "accountID": "111111111111",
     "region": "us-west-2",
+    "projectName": "genomics",
+    "tags": [{
+            "name": "Environment",
+            "value": "production"
+        },
+        {
+            "name": "Project",
+            "value": "genomics-pipeline"
+        }
+    ],
     "S3": {
         "existingBucket": true,
-        "bucketName": ""
+        "bucketName": "YOUR-BUCKET-NAME"
     },
     "VPC": {
         "createVPC": true,
-        "existingVPCName": "",
+        "VPCName": "genomics-vpc",
         "maxAZs": 2,
         "cidr": "10.0.0.0/16",
         "cidrMask": 24
@@ -32,25 +42,8 @@
             "c5.24xlarge"
         ]
     },
-    "stepFunctions": {
-        "launchDemoPipeline": true,
-        "jobDefinitions": {
-            "fastqc": {
-                "repository": "genomics/fastqc",
-                "memoryLimit": 8000,
-                "vcpus": 4,
-                "spot": true,
-                "retryAttempts":1,
-                "timeout": 600
-            },
-            "minimap2": {
-                "repository": "genomics/minimap2",
-                "memoryLimit": 16000,
-                "vcpus": 8,
-                "spot": true,
-                "retryAttempts":1,
-                "timeout": 3600
-            }
-        }
-    }
-}
+    "workflows": [{
+        "name": "variantCalling",
+        "spot": true
+    }]
+}
diff --git a/src/aws-genomics-cdk/assets/launch_template_user_data.txt b/src/aws-genomics-cdk/assets/launch_template_user_data.txt
@@ -4,15 +4,72 @@ Content-Type: multipart/mixed; boundary="==BOUNDARY=="
 --==BOUNDARY==
 Content-Type: text/cloud-config; charset="us-ascii"
 
+#cloud-config
+repo_update: true
+repo_upgrade: security
+
 packages:
 - jq
 - btrfs-progs
 - sed
 - git
 - amazon-ssm-agent
 - unzip
+- amazon-cloudwatch-agent
+
+write_files:
+- permissions: '0644'
+  path: /opt/aws/amazon-cloudwatch-agent/etc/config.json
+  content: |
+    {
+      "agent": {
+        "logfile": "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log"
+      },
+      "logs": {
+        "logs_collected": {
+          "files": {
+            "collect_list": [
+              {
+                "file_path": "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log",
+                "log_group_name": "/aws/ecs/container-instance/${Namespace}",
+                "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/amazon-cloudwatch-agent.log"
+              },
+              {
+                "file_path": "/var/log/cloud-init.log",
+                "log_group_name": "/aws/ecs/container-instance/${Namespace}",
+                "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/cloud-init.log"
+              },
+              {
+                "file_path": "/var/log/cloud-init-output.log",
+                "log_group_name": "/aws/ecs/container-instance/${Namespace}",
+                "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/cloud-init-output.log"
+              },
+              {
+                "file_path": "/var/log/ecs/ecs-init.log",
+                "log_group_name": "/aws/ecs/container-instance/${Namespace}",
+                "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-init.log"
+              },
+              {
+                "file_path": "/var/log/ecs/ecs-agent.log",
+                "log_group_name": "/aws/ecs/container-instance/${Namespace}",
+                "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-agent.log"
+              },
+              {
+                "file_path": "/var/log/ecs/ecs-volume-plugin.log",
+                "log_group_name": "/aws/ecs/container-instance/${Namespace}",
+                "log_stream_name": "/aws/ecs/container-instance/${Namespace}/{instance_id}/ecs-volume-plugin.log"
+              }
+            ]
+          }
+        }
+      }
+    }
 
 runcmd:
+
+# start the amazon-cloudwatch-agent
+- /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/opt/aws/amazon-cloudwatch-agent/etc/config.json
+
 # install aws-cli v2 and copy the static binary in an easy to find location for bind-mounts into containers
 - curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip"
 - unzip -q /tmp/awscliv2.zip -d /tmp
@@ -24,11 +81,19 @@ runcmd:
 - mkdir -p /opt/aws-cli/bin
 - cp -a $(dirname $(find /usr/local/aws-cli -name 'aws' -type f))/. /opt/aws-cli/bin/
 
+# set environment variables for provisioning
+- export GWFCORE_NAMESPACE=${Namespace}
+- export INSTALLED_ARTIFACTS_S3_ROOT_URL=$(aws ssm get-parameter --name /gwfcore/${Namespace}/installed-artifacts/s3-root-url --query 'Parameter.Value' --output text)
 
 # enable ecs spot instance draining
 - echo ECS_ENABLE_SPOT_INSTANCE_DRAINING=true >> /etc/ecs/ecs.config
 
-- systemctl enable amazon-ssm-agent
-- systemctl start amazon-ssm-agent
+# pull docker images only if missing
+- echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config
+
+- cd /opt
+- aws s3 sync $INSTALLED_ARTIFACTS_S3_ROOT_URL/ecs-additions ./ecs-additions
+- chmod a+x /opt/ecs-additions/provision.sh
+- /opt/ecs-additions/provision.sh
 
 --==BOUNDARY==--
diff --git a/src/aws-genomics-cdk/bin/aws-genomics-cdk.ts b/src/aws-genomics-cdk/bin/aws-genomics-cdk.ts
@@ -1,13 +1,23 @@
 #!/usr/bin/env node
-import 'source-map-support/register';
-import * as cdk from '@aws-cdk/core';
-import { AwsGenomicsCdkStack } from '../lib/aws-genomics-cdk-stack';
+import "source-map-support/register";
+import * as cdk from "@aws-cdk/core";
+import { AwsGenomicsCdkStack } from "../lib/aws-genomics-cdk-stack";
 import * as config from "../app.config.json";
 
 const env = {
-    account: process.env.CDK_DEFAULT_ACCOUNT ?? config.accountID,
-    region: process.env.CDK_DEFAULT_REGION ?? config.region 
-}
+  account: process.env.CDK_DEFAULT_ACCOUNT ?? config.accountID,
+  region: process.env.CDK_DEFAULT_REGION ?? config.region,
+};
 
 const app = new cdk.App();
-new AwsGenomicsCdkStack(app, 'AwsGenomicsCdkStack', {env: env});
+const genomicsStack = new AwsGenomicsCdkStack(
+  app,
+  `${config.projectName}CdkStack`,
+  {
+    env: env,
+  }
+);
+
+for (let i = 0; i < config.tags.length; i++) {
+  cdk.Tags.of(genomicsStack).add(config.tags[i].name, config.tags[i].value);
+}