From 8a1300a56c587bf149e36456156085713d7bc35e Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Thu, 11 Jul 2019 09:09:45 -0500
Subject: [PATCH 01/11] WIP add test/prod deployment

---
 .travis.yml        | 18 +++++++----
 _scripts/deploy.sh | 78 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 69 insertions(+), 27 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e59bdae8c..10dc5d663 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -27,9 +27,15 @@ before_deploy:
   - bash _scripts/configure-deploy.sh
 
 deploy:
-  provider: script
-  script: bash _scripts/deploy.sh
-  skip_cleanup: true
-  on:
-    repo: aws-samples/aws-genomics-workflows
-    branch: master
\ No newline at end of file
+  - provider: script
+    script: bash _scripts/deploy.sh production
+    skip_cleanup: true
+    on:
+      repo: aws-samples/aws-genomics-workflows
+      branch: master
+  - provider: script
+    script: bash _scripts/deploy.sh test
+    skip_cleanup: true
+    on:
+      repo: aws-samples/aws-genomics-workflows
+      branch: test
\ No newline at end of file
diff --git a/_scripts/deploy.sh b/_scripts/deploy.sh
index ee7691ba2..7b2cf5369 100644
--- a/_scripts/deploy.sh
+++ b/_scripts/deploy.sh
@@ -5,30 +5,66 @@ set -e
 bash _scripts/make-artifacts.sh
 mkdocs build
 
+ASSET_BUCKET=s3://aws-genomics-workflows
+ASSET_STAGE=${1:-production}
 
-echo "publishing artifacts:"
-aws s3 sync \
-    --profile asset-publisher \
-    --acl public-read \
-    --delete \
-    ./artifacts \
-    s3://aws-genomics-workflows/artifacts
+function artifacts() {
+    IFS=""
+    S3_URI_PARTS=($ASSET_BUCKET $ASSET_STAGE_PATH "artifacts")
+    S3_URI_PARTS=(${S3_URI_PARTS[@]})
+    S3_URI=$(printf '/%s' "${S3_URI_PARTS[@]%/}")
 
+    echo "publishing artifacts: $S3_URI"
+    aws s3 sync \
+        --profile asset-publisher \
+        --acl public-read \
+        --delete \
+        ./artifacts \
+        $S3_URI
+}
 
-echo "publishing templates:"
-aws s3 sync \
-    --profile asset-publisher \
-    --acl public-read \
-    --delete \
-    --metadata commit=$(git rev-parse HEAD) \
-    ./src/templates \
-    s3://aws-genomics-workflows/templates
+function templates() {
+    IFS=""
+    S3_URI_PARTS=($ASSET_BUCKET $ASSET_STAGE_PATH "artifacts")
+    S3_URI_PARTS=(${S3_URI_PARTS[@]})
+    S3_URI=$(printf '/%s' "${S3_URI_PARTS[@]%/}")
+    
+    echo "publishing templates: $S3_URI"
+    aws s3 sync \
+        --profile asset-publisher \
+        --acl public-read \
+        --delete \
+        --metadata commit=$(git rev-parse HEAD) \
+        ./src/templates \
+        $S3_URI
+}
 
+function site() {
+    echo "publishing site"
+    aws s3 sync \
+        --acl public-read \
+        --delete \
+        ./site \
+        s3://docs.opendata.aws/genomics-workflows
+}
 
-echo "publishing site"
-aws s3 sync \
-    --acl public-read \
-    --delete \
-    ./site \
-    s3://docs.opendata.aws/genomics-workflows
+function all() {
+    artifacts
+    templates
+    site
+}
 
+case $STAGE in
+    production)
+        ASSET_STAGE_PATH=""
+        all
+        ;;
+    test)
+        ASSET_STAGE_PATH="test"
+        artifacts
+        templates
+        ;;
+    *)
+        echo "unsupported staging level"
+        exit 1
+esac

From 4a845f7e5f7f607246bf6d3adb3eb48a27ba0529 Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Wed, 17 Jul 2019 22:59:05 -0400
Subject: [PATCH 02/11] WIP: simplify nextflow container entrypoint

---
 docs/orchestration/nextflow/nextflow-overview.md | 7 ++-----
 src/containers/nextflow/nextflow.aws.sh          | 8 +++++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/docs/orchestration/nextflow/nextflow-overview.md b/docs/orchestration/nextflow/nextflow-overview.md
index 853e3b8db..a24609504 100644
--- a/docs/orchestration/nextflow/nextflow-overview.md
+++ b/docs/orchestration/nextflow/nextflow-overview.md
@@ -425,14 +425,11 @@ This is what starting a workflow via the AWS CLI would look like:
 
 ```bash
 
-git clone https://github.com/nf-core/rnaseq.git
-aws s3 sync rnaseq s3://path/to/workflow/folder
-
 aws batch submit-job \
-    --job-name run-workflow-nf \
+    --job-name nf-core-rnaseq \
     --job-queue <queue-name> \
     --job-definition nextflow \
-    --container-overrides command=s3://path/to/workflow/folder,\
+    --container-overrides command=nf-core/rnaseq,\
 "--reads","'s3://1000genomes/phase3/data/HG00243/sequence_read/SRR*_{1,2}.filt.fastq.gz'",\
 "--genome","GRCh37",\
 "--skip_qc"
diff --git a/src/containers/nextflow/nextflow.aws.sh b/src/containers/nextflow/nextflow.aws.sh
index cd8ad96a3..bf183a741 100644
--- a/src/containers/nextflow/nextflow.aws.sh
+++ b/src/containers/nextflow/nextflow.aws.sh
@@ -31,12 +31,14 @@ mkdir -p /opt/work/$GUID
 cd /opt/work/$GUID
 
 # stage workflow definition
-NF_FILE=""
-if [ ! -z "$NEXTFLOW_PROJECT" ]; then
+NF_FILE=$NEXTFLOW_PROJECT
+if [[ "$NEXTFLOW_PROJECT" =~ "^s3://.*" ]]; then
     aws s3 sync --only-show-errors --exclude 'runs/*' --exclude '.*' $NEXTFLOW_PROJECT .
     NF_FILE=$(find . -maxdepth 1 -name "*.nf")
 fi
 
 echo "== Running Workflow =="
 echo "nextflow run $NF_FILE $NEXTFLOW_PARAMS"
-nextflow run $NF_FILE $NEXTFLOW_PARAMS
\ No newline at end of file
+nextflow run $NF_FILE $NEXTFLOW_PARAMS
+
+# TODO: stage sessions from/to s3 to enable resume
\ No newline at end of file

From 97cf3e329384e19ea569013fd0fc6157a592c25c Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Tue, 30 Jul 2019 15:17:05 -0700
Subject: [PATCH 03/11] functionalize s3_uri creation

---
 _scripts/deploy.sh | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/_scripts/deploy.sh b/_scripts/deploy.sh
index 7b2cf5369..1f6dedfbe 100644
--- a/_scripts/deploy.sh
+++ b/_scripts/deploy.sh
@@ -8,11 +8,22 @@ mkdocs build
 ASSET_BUCKET=s3://aws-genomics-workflows
 ASSET_STAGE=${1:-production}
 
-function artifacts() {
+
+function s3_uri() {
+    BUCKET=$1
+    shift
+
     IFS=""
-    S3_URI_PARTS=($ASSET_BUCKET $ASSET_STAGE_PATH "artifacts")
-    S3_URI_PARTS=(${S3_URI_PARTS[@]})
-    S3_URI=$(printf '/%s' "${S3_URI_PARTS[@]%/}")
+    PREFIX_PARTS=("$@")
+    PREFIX_PARTS=(${PREFIX_PARTS[@]})
+    PREFIX=$(printf '/%s' "${PREFIX_PARTS[@]%/}")
+    
+    echo "${BUCKET%/}/${PREFIX:1}"
+}
+
+
+function artifacts() {
+    S3_URI=$(s3_uri $ASSET_BUCKET $ASSET_STAGE_PATH "artifacts")
 
     echo "publishing artifacts: $S3_URI"
     aws s3 sync \
@@ -24,11 +35,8 @@ function artifacts() {
 }
 
 function templates() {
-    IFS=""
-    S3_URI_PARTS=($ASSET_BUCKET $ASSET_STAGE_PATH "artifacts")
-    S3_URI_PARTS=(${S3_URI_PARTS[@]})
-    S3_URI=$(printf '/%s' "${S3_URI_PARTS[@]%/}")
-    
+    S3_URI=$(s3_uri $ASSET_BUCKET $ASSET_STAGE_PATH "templates")
+
     echo "publishing templates: $S3_URI"
     aws s3 sync \
         --profile asset-publisher \

From eabba8fe1facfc41c6b4da412e3cc13a0b5c5280 Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Wed, 31 Jul 2019 10:26:46 -0500
Subject: [PATCH 04/11] fix typo

resolve #57
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 630f472df..d88105812 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ The documentation is built using mkdocs.
 Install dependencies:
 
 ```bash
-$ conda env create --file enviroment.yaml
+$ conda env create --file environment.yaml
 ```
 
 This will create a `conda` environment called `mkdocs`

From 94c2eff95bd0c5f9d55c6be57f8046a4cb942892 Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Mon, 9 Sep 2019 16:31:27 -0700
Subject: [PATCH 05/11] Update README

---
 src/templates/README.md | 90 +++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 54 deletions(-)

diff --git a/src/templates/README.md b/src/templates/README.md
index 5d5e14f7f..e4091f66a 100644
--- a/src/templates/README.md
+++ b/src/templates/README.md
@@ -1,54 +1,36 @@
-# Genomics on AWS CloudFormation templates
-
-This directory contains example CloudFormation templates for setting up the resources for working with genomics and other large-scale biomedical research data.
-
-
-root = to do
-* inputs:
-  - stack name root
-  - az
-  - tags
-  - key pair name
-  - s3 bucket name
-* outputs:
-  - job queue names
-  - s3 bucket name
-
-
-vpc = https://raw.githubusercontent.com/aws-quickstart/quickstart-aws-vpc/master/templates/aws-vpc.template
-* inputs:
-  * stack name
-  * Availability Zones
-  * tag for public & private subnets
-  * key pair name
-* outputs:
-  - az
-  - sg
-  -
-s3 = to do
-* input:
-  - stack name
-  - s3 bucket name
-
-iam = to do
-* inputs:
-  - stack name
-  - s3 bucket name
-* outputs
-  - iam instance profile
-  - iam ecs service role
-  - iam ecs task roles
-  - iam batch service role
-
-batch =
-* inputs:
-  - stack name
-  - azs
-  - key pair name
-  - iam instance profile
-  - iam ecs role
-  - iam ecs task roles
-  - iam batch service role
-  - iam batch spot fleet role
-- outputs:
-  - job Queue names
+# Genomics Workflows on AWS CloudFormation templates
+
+Contained herein are CloudFormation templates for creating AWS resources for working with large-scale biomedical data - e.g. genomics.
+
+## Core Stack
+
+Templates at the root level represent the "core" stack.  The root template is:
+
+| File | Description |
+| :--- | :---------- |
+| `aws-genomics-root-novpc.template.yaml` | Root stack that invokes nested stacks (see below) |
+
+Nested stacks are as follows and listed in order of creation:
+
+| File | Description |
+| :--- | :---------- |
+| `aws-genomics-s3.template.yaml` | Creates an S3 bucket for storing workflow input and output data |
+| `aws-genomics-launch-template.template.yaml` | Creates an EC2 Launch Template used in AWS Batch Compute Environments |
+| `aws-genomics-iam.template.yaml` | Creates IAM roles for AWS Batch resources |
+| `aws-genomics-batch.template.yaml` | Creates AWS Batch Job Queues and Compute Environments for job execution |
+
+## All-in-One ("AIO") Stacks
+
+All-in-One stacks are provided for solutions that utilize:
+
+* AWS Step-Functions
+* Cromwell
+* Nextflow
+
+and build atop the Core Stackk above.  They also include additional stacks specific to the solution:
+
+| File | Description |
+| :--- | :---------- |
+| `step-functions/sfn-example.template.yaml` | Creates an example AWS Step Functions state-machine and containers for an example genomics workflow using BWA, samtools, and bcftools. |
+| `cromwell/cromwell-server.template.yaml` | Creates an EC2 instance with Cromwell pre-installed and launched in "server" mode |
+| `nextflow/nextflow-resources.template.yaml` | Creates a nextflow container and AWS Batch Job Definition for running nextflow |

From e4c3aa1d3e3b5ca88e212010283da64c31ab8753 Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Mon, 9 Sep 2019 16:34:58 -0700
Subject: [PATCH 06/11] refactor nextflow assets

* update container entrypoint script
  * handle s3 uris as projects
  * sync session cache for resume
  * use logdir and workdir as defined in environment variables
* update job definition to match container entrypoint script
  * create logdir and workdir environment variables
* update s3 paths to create / use logdir and workdir
---
 src/containers/nextflow/nextflow.aws.sh       |  35 ++++--
 .../nextflow/nextflow-resources.template.yaml | 115 ++++++++++--------
 2 files changed, 91 insertions(+), 59 deletions(-)

diff --git a/src/containers/nextflow/nextflow.aws.sh b/src/containers/nextflow/nextflow.aws.sh
index bf183a741..ab0919155 100644
--- a/src/containers/nextflow/nextflow.aws.sh
+++ b/src/containers/nextflow/nextflow.aws.sh
@@ -1,7 +1,13 @@
 #!/bin/bash
-# $1    S3 URI to Nextflow project files.  If not using S3 set to "".
+# $1    Nextflow project. Can be an S3 URI, or git repo name.
 # $2..  Additional parameters passed on to the nextflow cli
 
+# using nextflow needs the following locations/directories provided as
+# environment variables to the container
+#  * NF_LOGSDIR: where caching and logging data are stored
+#  * NF_WORKDIR: where intermmediate results are stored
+
+
 echo "$@"
 NEXTFLOW_PROJECT=$1
 shift
@@ -30,15 +36,28 @@ fi
 mkdir -p /opt/work/$GUID
 cd /opt/work/$GUID
 
+# stage in session cache
+# .nextflow directory holds all session information for the current and past runs.
+# it should be `sync`'d with an s3 uri, so that runs from previous sessions can be 
+# resumed
+aws s3 sync --only-show-errors $NF_LOGSDIR/.nextflow .nextflow
+
 # stage workflow definition
-NF_FILE=$NEXTFLOW_PROJECT
 if [[ "$NEXTFLOW_PROJECT" =~ "^s3://.*" ]]; then
-    aws s3 sync --only-show-errors --exclude 'runs/*' --exclude '.*' $NEXTFLOW_PROJECT .
-    NF_FILE=$(find . -maxdepth 1 -name "*.nf")
+    aws s3 sync --only-show-errors --exclude 'runs/*' --exclude '.*' $NEXTFLOW_PROJECT ./project
+    NEXTFLOW_PROJECT=./project
 fi
 
 echo "== Running Workflow =="
-echo "nextflow run $NF_FILE $NEXTFLOW_PARAMS"
-nextflow run $NF_FILE $NEXTFLOW_PARAMS
-
-# TODO: stage sessions from/to s3 to enable resume
\ No newline at end of file
+echo "nextflow run $NEXTFLOW_PROJECT $NEXTFLOW_PARAMS"
+nextflow run $NEXTFLOW_PROJECT $NEXTFLOW_PARAMS
+
+# stage out session cache
+aws s3 sync --only-show-errors .nextflow $NF_LOGSDIR/.nextflow
+
+# .nextflow.log file has more detailed logging from the workflow run and is
+# nominally unique per run.
+#
+# when run locally, .nextflow.logs are automatically rotated
+# when syncing to S3 uniquely identify logs by the batch GUID
+aws s3 cp --only-show-errors .nextflow.log $NF_LOGSDIR/.nextflow.log.${GUID/\//.}
\ No newline at end of file
diff --git a/src/templates/nextflow/nextflow-resources.template.yaml b/src/templates/nextflow/nextflow-resources.template.yaml
index d6987c49d..81de08bf5 100644
--- a/src/templates/nextflow/nextflow-resources.template.yaml
+++ b/src/templates/nextflow/nextflow-resources.template.yaml
@@ -44,32 +44,43 @@ Parameters:
   S3NextflowBucketName:
     Type: String
     Description: >-
-      S3 Bucket used to store *.nf scripts.
+      S3 Bucket used to store Nextflow metadata (session cache, logs, and intermediate results)
     
-  S3ScriptPrefix:
+  ExistingBucket:
     Type: String
     Description: >-
-      (Optional) Parent folder in the S3 bucket that contains *.nf workflow scripts
+      Does the S3 Bucket for Nextflow metadata already exist?  If not, it will be created.
+    AllowedValues:
+      - Yes
+      - No
+    Default: No
   
-  S3WorkDirPrefix:
+  S3NextflowPrefix:
+    Type: String
+    Description: >-
+      (Optional) Parent folder in the Nextflow metadata bucket for metadata folders.
+      Used only if the Nextflow metadata bucket is the same as the Data bucket.
+    Default: _nextflow
+
+  S3LogsDirPrefix:
     Type: String
     Description: >-
-      (Optional) Parent folder in the S3 bucket that contains workflow execution logs
+      (Optional) Folder in the Nextflow metadata bucket (under the {Nextflow Prefix} if needed)
+      for session cache and logs.
+    Default: logs
   
-  ExistingBucket:
+  S3WorkDirPrefix:
     Type: String
     Description: >-
-      Does the S3 Bucket for *.nf scripts already exist?  If not, it will be created.
-    AllowedValues:
-      - Yes
-      - No
-    Default: No
+      (Optional) Folder in the Nextflow metadata bucket (under the {Nextflow Prefix} if needed)
+      that contains workflow intermediate results
+    Default: runs
   
   NextflowContainerImage:
     Type: String
     Description: >-
       (Optional) Container image for nextflow with custom entrypoint for config and workflow
-      script staging. (Example, "<dockerhubuser>/nextflow:latest").  
+      script staging. (Example, "<dockerhub-user>/nextflow:latest").  
       Provide this if you have a specific version of nextflow you want to use, otherwise a 
       container will be built using the latest version. 
 
@@ -93,16 +104,6 @@ Conditions:
     Fn::Equals:
       - !Ref ExistingBucket
       - No
-    
-  NoS3ScriptPrefix:
-    Fn::Equals:
-      - !Ref S3ScriptPrefix
-      - ""
-  
-  NoS3WorkDirPrefix:
-    Fn::Equals:
-      - !Ref S3WorkDirPrefix
-      - ""
 
 
 Resources:
@@ -317,15 +318,6 @@ Resources:
     Type: AWS::Batch::JobDefinition
     Properties:
       Type: container
-      Parameters:
-        NextflowScript: 
-          Fn::Join:
-            - "/"
-            - - Fn::If:
-                  - NoS3ScriptPrefix
-                  - !Sub "s3://${S3NextflowBucketName}"
-                  - !Join ["/", [!Sub "s3://${S3NextflowBucketName}", !Ref S3ScriptPrefix]]
-              - workflow.nf
       ContainerProperties: 
         MountPoints: 
           - ContainerPath: /opt/work
@@ -347,41 +339,61 @@ Resources:
         Environment:
           - Name: "NF_JOB_QUEUE"
             Value: !Ref BatchDefaultJobQueue
+          - Name: "NF_LOGSDIR"
+            Value: 
+              Fn::Join: 
+                - "/"
+                - - Fn::If:
+                      - DataBucketIsNextflowBucket
+                      - !Join ["/", [!Sub "s3://${S3NextflowBucketName}", !Ref S3NextflowPrefix]]
+                      - !Sub "s3://${S3NextflowBucketName}"
+                  - !Ref S3LogsDirPrefix
           - Name: "NF_WORKDIR"
             Value: 
               Fn::Join: 
-              - "/"
-              - - Fn::If:
-                    - NoS3WorkDirPrefix
-                    - !Sub "s3://${S3NextflowBucketName}"
-                    - !Join ["/", [!Sub "s3://${S3NextflowBucketName}", !Ref S3WorkDirPrefix]]
-                - runs
-            
+                - "/"
+                - - Fn::If:
+                      - DataBucketIsNextflowBucket
+                      - !Join ["/", [!Sub "s3://${S3NextflowBucketName}", !Ref S3NextflowPrefix]]
+                      - !Sub "s3://${S3NextflowBucketName}"
+                  - !Ref S3WorkDirPrefix
+              
       JobDefinitionName: nextflow
 
 
 Outputs:
-  BucketName:
+  NextflowBucket:
     Description: >-
-      S3 Bucket used to store *.nf scripts
+      S3 Bucket used to store Nextflow metadata (session cache, logs, and intermediate results)
     Value:
       Fn::If:
         - NextflowBucketDoesNotExist
         - !Ref S3NextflowBucket
         - !Ref S3NextflowBucketName
-    
-  ScriptPrefix:
+  
+  LogsDir:
     Description: >-
-      Path in the S3 bucket where *.nf script files are located.  If blank,
-      then they are located at the root level of the bucket.
-    Value: !Ref S3ScriptPrefix
+      S3 URI where nextflow session cache and logs are stored.
+    Value:
+      Fn::Join:
+        - "/"
+        - - Fn::If:
+              - DataBucketIsNextflowBucket
+              - !Join ["/", [!Sub "s3://${S3NextflowBucketName}", !Ref S3NextflowPrefix]]
+              - !Sub "s3://${S3NextflowBucketName}"
+          - !Ref S3LogsDirPrefix
 
-  WorkDirPrefix:
+  WorkDir:
     Description: >-
-      Path in the S3 bucket where "runs" folder with workflow logs and final 
-      outputs is located.  If blank, then they are located at the root level of
-      the bucket.
-    Value: !Ref S3WorkDirPrefix
+      S3 URI where workflow intermediate results are stored.
+    Value:
+      Fn::Join: 
+        - "/"
+        - - Fn::If:
+              - DataBucketIsNextflowBucket
+              - !Join ["/", [!Sub "s3://${S3NextflowBucketName}", !Ref S3NextflowPrefix]]
+              - !Sub "s3://${S3NextflowBucketName}"
+          - !Ref S3WorkDirPrefix
 
   NextflowContainerImage:
     Description: >-
@@ -400,4 +412,5 @@ Outputs:
   NextflowJobRole:
     Description: >-
       IAM Role that allows the nextflow head node job access to S3 and Batch
-    Value: !GetAtt IAMNextflowJobRole.Arn
\ No newline at end of file
+    Value: !GetAtt IAMNextflowJobRole.Arn
+...
\ No newline at end of file

From 6d763e1be63aac7eddfe35c03b3b4340033e3a2d Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Mon, 9 Sep 2019 16:43:11 -0700
Subject: [PATCH 07/11] update generated config

use the new (19.07) config syntax for specifying path to awscli
---
 src/containers/nextflow/nextflow.aws.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/containers/nextflow/nextflow.aws.sh b/src/containers/nextflow/nextflow.aws.sh
index ab0919155..200231e7b 100644
--- a/src/containers/nextflow/nextflow.aws.sh
+++ b/src/containers/nextflow/nextflow.aws.sh
@@ -21,7 +21,7 @@ cat << EOF > $NF_CONFIG
 workDir = "$NF_WORKDIR"
 process.executor = "awsbatch"
 process.queue = "$NF_JOB_QUEUE"
-executor.awscli = "/home/ec2-user/miniconda/bin/aws"
+aws.batch.cliPath = "/home/ec2-user/miniconda/bin/aws"
 EOF
 
 # AWS Batch places multiple jobs on an instance

From 5a0db4700e9e75e0d12032fcf7ef0520c299b6b0 Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Fri, 13 Sep 2019 14:12:54 -0700
Subject: [PATCH 08/11] add more logging output to entrypoint

---
 src/containers/nextflow/nextflow.aws.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/containers/nextflow/nextflow.aws.sh b/src/containers/nextflow/nextflow.aws.sh
index 200231e7b..3b560af13 100644
--- a/src/containers/nextflow/nextflow.aws.sh
+++ b/src/containers/nextflow/nextflow.aws.sh
@@ -8,7 +8,12 @@
 #  * NF_WORKDIR: where intermmediate results are stored
 
 
+echo "=== ENVIRONMENT ==="
+echo `env`
+
+echo "=== RUN COMMAND ==="
 echo "$@"
+
 NEXTFLOW_PROJECT=$1
 shift
 NEXTFLOW_PARAMS="$@"

From 5ac47b377bd94ef3c2121f95597c7e3aa69a315b Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Fri, 13 Sep 2019 15:13:51 -0700
Subject: [PATCH 09/11] update nextflow guide

---
 .../nextflow/nextflow-overview.md             | 203 +++++++-----------
 1 file changed, 76 insertions(+), 127 deletions(-)

diff --git a/docs/orchestration/nextflow/nextflow-overview.md b/docs/orchestration/nextflow/nextflow-overview.md
index a24609504..b6e4330dc 100644
--- a/docs/orchestration/nextflow/nextflow-overview.md
+++ b/docs/orchestration/nextflow/nextflow-overview.md
@@ -65,25 +65,28 @@ ENTRYPOINT ["/opt/bin/nextflow.aws.sh"]
 !!! note
     If you are trying to keep your container image as small as possible, keep in mind that Nextflow relies on basic linux tools such as `awk`, `bash`, `ps`, `date`, `sed`, `grep`, `egrep`, and `tail` which may need to be installed on extra minimalist base images like `alpine`.
 
-The script used for the entrypoint is shown below. The first parameter is the folder in S3 where you have staged your Nextflow scripts and supporting files (like additional config files). Any additional parameters are passed along to the Nextflow executable. This is important to remember when submiting the head node job. Notice that it automatically configures some Nextflow values based on environment variables set by AWS Batch.
+The script used for the entrypoint is shown below. The first parameter should be a Nextflow "project".  Nextflow supports pulling projects directly from Git repositories.  This script also allows for projects to be specified as an S3 URI - a bucket and folder therein where you have staged your Nextflow scripts and supporting files (like additional config files). Any additional parameters are passed along to the Nextflow executable.  Also, the script automatically configures some Nextflow values based on environment variables set by AWS Batch.
 
 ```bash
-#!/bin/bash
-echo $@
-NEXTFLOW_SCRIPT=$1
+echo "=== ENVIRONMENT ==="
+echo `env`
+
+echo "=== RUN COMMAND ==="
+echo "$@"
+
+NEXTFLOW_PROJECT=$1
 shift
-NEXTFLOW_PARAMS=$@
+NEXTFLOW_PARAMS="$@"
 
 # Create the default config using environment variables
 # passed into the container
-mkdir -p /opt/config
 NF_CONFIG=~/.nextflow/config
 
 cat << EOF > $NF_CONFIG
 workDir = "$NF_WORKDIR"
 process.executor = "awsbatch"
 process.queue = "$NF_JOB_QUEUE"
-executor.awscli = "/home/ec2-user/miniconda/bin/aws"
+aws.batch.cliPath = "/home/ec2-user/miniconda/bin/aws"
 EOF
 
 # AWS Batch places multiple jobs on an instance
@@ -91,20 +94,41 @@ EOF
 # to create a unique path
 GUID="$AWS_BATCH_JOB_ID/$AWS_BATCH_JOB_ATTEMPT"
 
+if [ "$GUID" = "/" ]; then
+    GUID=`date | md5sum | cut -d " " -f 1`
+fi
+
 mkdir -p /opt/work/$GUID
 cd /opt/work/$GUID
 
-# stage workflow definition
-aws s3 sync --only-show-errors --exclude '.*' $NEXTFLOW_SCRIPT .
+# stage in session cache
+# .nextflow directory holds all session information for the current and past runs.
+# it should be `sync`'d with an s3 uri, so that runs from previous sessions can be 
+# resumed
+aws s3 sync --only-show-errors $NF_LOGSDIR/.nextflow .nextflow
 
-NF_FILE=$(find . -name "*.nf" -maxdepth 1)
+# stage workflow definition
+if [[ "$NEXTFLOW_PROJECT" =~ "^s3://.*" ]]; then
+    aws s3 sync --only-show-errors --exclude 'runs/*' --exclude '.*' $NEXTFLOW_PROJECT ./project
+    NEXTFLOW_PROJECT=./project
+fi
 
 echo "== Running Workflow =="
-echo "nextflow run $NF_FILE $NEXTFLOW_PARAMS"
-nextflow run $NF_FILE $NEXTFLOW_PARAMS
+echo "nextflow run $NEXTFLOW_PROJECT $NEXTFLOW_PARAMS"
+nextflow run $NEXTFLOW_PROJECT $NEXTFLOW_PARAMS
+
+# stage out session cache
+aws s3 sync --only-show-errors .nextflow $NF_LOGSDIR/.nextflow
+
+# .nextflow.log file has more detailed logging from the workflow run and is
+# nominally unique per run.
+#
+# when run locally, .nextflow.logs are automatically rotated
+# when syncing to S3 uniquely identify logs by the batch GUID
+aws s3 cp --only-show-errors .nextflow.log $NF_LOGSDIR/.nextflow.log.${GUID/\//.}
 ```
 
-The `AWS_BATCH_JOB_ID` and `AWS_BATCH_JOB_ATTEMPT` are [environment variables that are automatically provided](https://docs.aws.amazon.com/batch/latest/userguide/job_env_vars.html) to all AWS Batch jobs.  The `NF_WORKDIR` and `NF_JOB_QUEUE` variables are ones set by the Batch Job Definition ([see below](#batch-job-definition)).
+The `AWS_BATCH_JOB_ID` and `AWS_BATCH_JOB_ATTEMPT` are [environment variables that are automatically provided](https://docs.aws.amazon.com/batch/latest/userguide/job_env_vars.html) to all AWS Batch jobs.  The `NF_WORKDIR`, `NF_LOGSDIR`, and `NF_JOB_QUEUE` variables are ones set by the Batch Job Definition ([see below](#batch-job-definition)).
 
 ### Job instance AWS CLI
 
@@ -146,51 +170,41 @@ An AWS Batch Job Definition for the containerized Nextflow described above is sh
 {
     "jobDefinitionName": "nextflow",
     "jobDefinitionArn": "arn:aws:batch:<region>:<account-number>:job-definition/nextflow:1",
-    "revision": 1,
-    "status": "ACTIVE",
     "type": "container",
-    "parameters": {
-        "NextflowScript": "s3://<bucket-name>/nextflow/workflow.nf"
-    },
+    "parameters": {},
     "containerProperties": {
-        "image": "<dockerhub-user>/nextflow:latest",
+        "image": "<account-number>.dkr.ecr.<region>.amazonaws.com/nextflow:latest",
         "vcpus": 2,
         "memory": 1024,
-        "command": [
-            "Ref::NextflowScript"
-        ],
-        "volumes": [
-            {
-                "host": {
-                    "sourcePath": "/scratch"
-                },
-                "name": "scratch"
-            }
-        ],
+        "command": [],
+        "jobRoleArn": "<nextflowJobRoleArn>",
+        "volumes": [],
         "environment": [
+            {
+                "name": "NF_LOGSDIR",
+                "value": "s3://<bucket>/_nextflow/logs"
+            },
             {
                 "name": "NF_JOB_QUEUE",
                 "value": "<jobQueueArn>"
             },
             {
                 "name": "NF_WORKDIR",
-                "value": "s3://<bucket-name>/runs"
+                "value": "s3://<bucket>/_nextflow/runs"
             }
         ],
-        "mountPoints": [
-            {
-                "containerPath": "/opt/work",
-                "sourceVolume": "scratch"
-            }
-        ],
-        "ulimits": []
+        "mountPoints": [],
+        "ulimits": [],
+        "resourceRequirements": []
     }
 }
 ```
 
+The `<nextflowJobRoleArn>` is described below.
+
 ### Nextflow IAM Role
 
-Nextflow needs to be able to create and submit Batch Job Defintions and Batch Jobs, and read workflow script files in an S3 bucket. These permissions are provided via a Job Role associated with the Job Definition.  Policies for this role would look like the following:
+Nextflow needs to be able to create and submit Batch Job Defintions and Batch Jobs, and read workflow logs and session information from an S3 bucket. These permissions are provided via a Job Role associated with the Job Definition.  Policies for this role would look like the following:
 
 #### Nextflow-Batch-Access
 
@@ -213,7 +227,7 @@ This policy gives **full** access to AWS Batch.
 
 #### Nextflow-S3Bucket-Access
 
-This policy gives **full** access to the buckets used to store data and workflow scripts.
+This policy gives **full** access to the buckets used to store workflow data and Nextflow session metadata.
 
 ```json
 {
@@ -224,8 +238,8 @@ This policy gives **full** access to the buckets used to store data and workflow
                 "s3:*"
             ],
             "Resource": [
-                "arn:aws:s3:::<script-bucket-name>",
-                "arn:aws:s3:::<script-bucket-name>/*",
+                "arn:aws:s3:::<nextflow-bucket-name>",
+                "arn:aws:s3:::<nextflow-bucket-name>/*",
                 "arn:aws:s3:::<data-bucket-name>",
                 "arn:aws:s3:::<data-bucket-name>/*"
             ],
@@ -237,7 +251,9 @@ This policy gives **full** access to the buckets used to store data and workflow
 
 ## A Nextflow S3 Bucket
 
-The containerized version of `nextflow` above reads a `*.nf` script from an S3 bucket and writes workflow logs and outputs back to it.  This bucket can either be the same one that your workflow inputs and outputs are stored (e.g. in a separate folder therein) or it can be another bucket entirely.
+Because running as a container will be an ephemeral process, the containerized version of `nextflow` stores workflow session information in S3 using paths described by `NF_WORKDIR` and `NF_LOGSDIR` environment variables.  These allow you to use Nextflow's `-resume` flag to restart a workflow that was previously interrupted at the step it left off at.
+
+This bucket can be independent of the S3 bucket used to store workflow input and output data if necessary.
 
 ## Running a workflow
 
@@ -333,86 +349,11 @@ For each process in your workflow, Nextflow will create a corresponding Batch Jo
 
 You can customize these job definitions to incorporate additional environment variables or volumes/mount points as needed.
 
-!!! important
-    In order to take advantage of automatically [expandable scratch space](../../../core-env/create-custom-compute-resources/) in the host instance, you will need to modify Nextflow created job definitions to map a container volume from `/scratch` on the host to `/tmp` in the container.
-
-For example, a customized job definition for the process above that maps `/scratch` on the host to `/scratch` in the container and still work with Nextflow would be:
-
-```json
-{
-    "jobDefinitionName": "nf-ubuntu-latest",
-    "jobDefinitionArn": "arn:aws:batch:<region>:<account-number>:job-definition/nf-ubuntu-latest:2",
-    "revision": 2,
-    "status": "ACTIVE",
-    "type": "container",
-    "parameters": {
-        "nf-token": "43869867b5fbae16fa7cfeb5ea2c3522"
-    },
-    "containerProperties": {
-        "image": "ubuntu:latest",
-        "vcpus": 1,
-        "memory": 1024,
-        "command": [
-            "true"
-        ],
-        "volumes": [
-            {
-                "host": {
-                    "sourcePath": "/home/ec2-user/miniconda"
-                },
-                "name": "aws-cli"
-            },
-            {
-                "host": {
-                    "sourcePath": "/scratch"
-                },
-                "name": "scratch"
-            }
-        ],
-        "environment": [],
-        "mountPoints": [
-            {
-                "containerPath": "/home/ec2-user/miniconda",
-                "readOnly": true,
-                "sourceVolume": "aws-cli"
-            },
-            {
-                "containerPath": "/scratch",
-                "sourceVolume": "scratch"
-            }
-        ],
-        "ulimits": []
-    }
-}
-```
-
-Nextflow will use the most recent revision of a Job Definition.
-
-You can also predefine Job Definitions that leverage extra volume mappings and refer to them in the process definition.  Assuming you had an existing Job Definition named `say-hello`, a process definition that utilized it would look like:
-
-```groovy
-texts = Channel.from("AWS", "Nextflow")
-
-process hello {
-    // directives
-    // substitute the container image reference with a job-definition reference
-    container "job-definition://say-hello"
-
-    // compute resources for the Batch Job
-    cpus 1
-    memory '512 MB'
-
-    input:
-    val text from texts
-
-    output:
-    file 'hello.txt'
+!!! note
+    As of Nextflow 19.07 you can use the `aws.batch.volumes` config option to define additional volumes and mount points.
 
-    """
-    echo "Hello $text" > hello.txt
-    """
-}
-```
+!!! important
+    Instances provisioned using the Nextflow specific EC2 Launch Template configure `/var/lib/docker` in the host instance to use automatically [expandable scratch space](../../../core-env/create-custom-compute-resources/), allowing containerized jobs to stage as much data as needed without running into disk space limits.
 
 ### Running the workflow
 
@@ -421,10 +362,22 @@ To run a workflow you submit a `nextflow` Batch job to the appropriate Batch Job
 * the AWS Batch Console
 * or the command line with the AWS CLI
 
-This is what starting a workflow via the AWS CLI would look like:
+This is what starting a workflow via the AWS CLI would look like using Nextflow's built-in "hello-world" workflow:
 
 ```bash
+aws batch submit-job \
+    --job-name nf-hello \
+    --job-queue <queue-name> \
+    --job-definition nextflow \
+    --container-overrides command=hello
+```
 
+After submitting a workflow, you can monitor the progress of tasks via the AWS Batch console.
+For the "Hello World" workflow above you will see five jobs run in Batch - one for the head node, and one for each `Channel` text as it goes through the `hello` process.
+
+For a more complex example, you can try the following, which will run the [RNASeq workflow](https://nf-co.re/rnaseq) developed by the [NF-Core project](https://nf-co.re/) against data in the [1000 Genomes AWS Public Dataset](https://registry.opendata.aws/1000-genomes/):
+
+```bash
 aws batch submit-job \
     --job-name nf-core-rnaseq \
     --job-queue <queue-name> \
@@ -435,8 +388,4 @@ aws batch submit-job \
 "--skip_qc"
 ```
 
-After submitting a workflow, you can monitor the progress of tasks via the AWS Batch console.
-
-For the "Hello World" workflow above you will see three jobs run in Batch - one for the head node, and one for each `Channel` text as it goes through the `hello` process.
-
 For the nf-core example "rnaseq" workflow you will see 11 jobs run in Batch over the course of a couple hours - the head node will last the whole duration of the pipeline while the others will stop once their step is complete. You can look at the CloudWatch logs for the head node job to monitor workflow progress. Note the additional single quotes wrapping the 1000genomes path.

From f8e084091d4149cfb6b716e355b59fc20c58e955 Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Fri, 13 Sep 2019 15:15:41 -0700
Subject: [PATCH 10/11] update branches for two stage deployment

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 10dc5d663..0b5080264 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,10 +32,10 @@ deploy:
     skip_cleanup: true
     on:
       repo: aws-samples/aws-genomics-workflows
-      branch: master
+      branch: release
   - provider: script
     script: bash _scripts/deploy.sh test
     skip_cleanup: true
     on:
       repo: aws-samples/aws-genomics-workflows
-      branch: test
\ No newline at end of file
+      branch: master
\ No newline at end of file

From f9e1985ab250f68b217e18dde8aa1764ca366a9c Mon Sep 17 00:00:00 2001
From: "W. Lee Pang" <wleepang@gmail.com>
Date: Fri, 13 Sep 2019 15:24:49 -0700
Subject: [PATCH 11/11] use simplified parameters requirements

---
 .../aws-genomics-batch.template.yaml          | 15 +---
 .../aws-genomics-root-novpc.template.yaml     |  7 --
 .../nextflow/nextflow-aio.template.yaml       | 73 +++++++++----------
 .../nextflow/nextflow-resources.template.yaml |  3 +-
 4 files changed, 39 insertions(+), 59 deletions(-)

diff --git a/src/templates/aws-genomics-batch.template.yaml b/src/templates/aws-genomics-batch.template.yaml
index b8f9a7a75..3817a79c4 100644
--- a/src/templates/aws-genomics-batch.template.yaml
+++ b/src/templates/aws-genomics-batch.template.yaml
@@ -11,7 +11,6 @@ Metadata:
           default: "AWS Batch Environment Config"
         Parameters:
         - LaunchTemplateId
-        - Ec2KeyPairName
         - VpcId
         - SubnetIds
         - SpotBidPercentage
@@ -19,15 +18,12 @@ Metadata:
         - DefaultCEMaxvCpus
         - HighPriorityCEMinvCpus
         - HighPriorityCEMaxvCpus
-        - DefaultRetryNumber
         - BatchServiceRoleArn
         - Ec2InstanceProfileArn
         - SpotFleetRoleArn
     ParameterLabels:
       LaunchTemplateId:
         default: Launch Template ID
-      Ec2KeyPairName:
-        default: EC2 Key Pair Name
       VpcId:
         default: VPC ID
       SubnetIds:
@@ -66,9 +62,6 @@ Parameters:
   LaunchTemplateId:
     Type: String
     Description: Launch Template you want your AWS Batch Compute Environments to use
-  Ec2KeyPairName:
-    Type: AWS::EC2::KeyPair::KeyName
-    Description: Name of the EC2 Key Pair for connecting to EC2 instances launched in your compute environment
   HighPriorityCEMinvCpus:
     Type: Number
     Description: Minimum number of CPUs in the high-priority compute environment. Default 0.
@@ -109,7 +102,6 @@ Resources:
         Ref: VpcId
   SGSSHIngress:
     Type: AWS::EC2::SecurityGroupIngress
-    DependsOn: GenomicsBatchSecurityGroup
     Properties:
       GroupId: !Ref GenomicsBatchSecurityGroup
       IpProtocol: tcp
@@ -118,7 +110,6 @@ Resources:
       CidrIp: 0.0.0.0/0
   SGAllTcpEgress:
     Type: AWS::EC2::SecurityGroupEgress
-    DependsOn: GenomicsBatchSecurityGroup
     Properties:
       GroupId: !Ref GenomicsBatchSecurityGroup
       IpProtocol: tcp
@@ -127,7 +118,6 @@ Resources:
       CidrIp: 0.0.0.0/0
   SGAllTcpSelfIngress:
     Type: AWS::EC2::SecurityGroupIngress
-    DependsOn: GenomicsBatchSecurityGroup
     Properties:
       GroupId: !Ref GenomicsBatchSecurityGroup
       IpProtocol: tcp
@@ -137,7 +127,6 @@ Resources:
 
   GenomicsDefaultComputeEnv:
     Type: AWS::Batch::ComputeEnvironment
-    DependsOn: GenomicsBatchSecurityGroup
     Properties:
       ComputeEnvironmentName: !Sub 
        - spot-${StackGuid}
@@ -147,7 +136,7 @@ Resources:
       State: ENABLED
       ComputeResources:
         BidPercentage: !Ref SpotBidPercentage
-        Ec2KeyPair: !Ref Ec2KeyPairName
+        # Ec2KeyPair: !Ref Ec2KeyPairName
         LaunchTemplate:
           LaunchTemplateId: !Ref LaunchTemplateId
         InstanceRole: !Ref Ec2InstanceProfileArn
@@ -167,7 +156,6 @@ Resources:
 
   GenomicsHighPriorityComputeEnv:
     Type: AWS::Batch::ComputeEnvironment
-    DependsOn: GenomicsBatchSecurityGroup
     Properties:
       ComputeEnvironmentName: !Sub 
        - ondemand-${StackGuid}
@@ -176,7 +164,6 @@ Resources:
       Type: MANAGED
       State: ENABLED
       ComputeResources:
-        Ec2KeyPair: !Ref Ec2KeyPairName
         LaunchTemplate:
           LaunchTemplateId: !Ref LaunchTemplateId
         InstanceRole:  !Ref Ec2InstanceProfileArn
diff --git a/src/templates/aws-genomics-root-novpc.template.yaml b/src/templates/aws-genomics-root-novpc.template.yaml
index 3412aeb72..473f99f90 100644
--- a/src/templates/aws-genomics-root-novpc.template.yaml
+++ b/src/templates/aws-genomics-root-novpc.template.yaml
@@ -15,7 +15,6 @@ Metadata:
           - WorkflowOrchestrator
           - VpcId
           - SubnetIds
-          - KeyPairName
       - Label:
           default: Optional
         Parameters:
@@ -37,8 +36,6 @@ Metadata:
         default: VPC ID
       SubnetIds:
         default: VPC Subnet IDs
-      KeyPairName:
-        default: EC2 Key Pair Name
       SpotBidPercentage:
         default: Spot Bid %
       DefaultCEMinvCpus:
@@ -85,9 +82,6 @@ Parameters:
       - cromwell
       - nextflow
     Default: step-functions
-  KeyPairName:
-    Description: Key Pair name
-    Type: AWS::EC2::KeyPair::KeyName
   SpotBidPercentage:
     Description: The percent of on-demand pricing for max bid for Spot intances
     Type: Number
@@ -160,7 +154,6 @@ Resources:
       TimeoutInMinutes: 10
       Parameters:
         LaunchTemplateId: !Sub ${LaunchTplStack.Outputs.LaunchTemplateId}
-        Ec2KeyPairName: !Ref KeyPairName
         VpcId: !Ref VpcId
         SubnetIds: !Join [",", !Ref SubnetIds ]
         SpotBidPercentage: !Ref SpotBidPercentage
diff --git a/src/templates/nextflow/nextflow-aio.template.yaml b/src/templates/nextflow/nextflow-aio.template.yaml
index e1b144e39..ec70e03c4 100644
--- a/src/templates/nextflow/nextflow-aio.template.yaml
+++ b/src/templates/nextflow/nextflow-aio.template.yaml
@@ -25,8 +25,6 @@ Metadata:
         Parameters:
           - S3DataBucketName
           - ExistingDataBucket
-          - KeyPairName
-          - AvailabilityZones
       - Label:
           default: "AWS Batch"
         Parameters:
@@ -41,8 +39,6 @@ Metadata:
           - NextflowContainerImage
           - S3NextflowBucketName
           - ExistingNextflowBucket
-          - S3NextflowScriptPrefix
-          - S3NextflowWorkDirPrefix
 
     ParameterLabels:
       S3DataBucketName:
@@ -53,8 +49,6 @@ Metadata:
         default: S3 Nextflow Bucket Name
       ExistingNextflowBucket:
         default: Existing Nextflow Bucket?
-      KeyPairName:
-        default: EC2 Key Pair Name
       SpotBidPercentage:
         default: Spot Bid %
       DefaultCEMinvCpus:
@@ -70,9 +64,6 @@ Metadata:
 
 
 Parameters:
-  AvailabilityZones:
-    Description: "Choose the two Availability Zones to deploy instances for AWS Batch."
-    Type: List<AWS::EC2::AvailabilityZone::Name>
   S3DataBucketName:
     Description: >-
       A S3 bucket name for storing analysis results
@@ -81,8 +72,10 @@ Parameters:
       If left blank a unique bucket name will be generated.
 
     Type: String
+    Default: ""
     AllowedPattern: "((?=^.{3,63}$)(?!^(\\d+\\.)+\\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])\\.)*([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])$)|(^.{0}$))"
     ConstraintDescription: "Must respect S3 bucket naming conventions"
+
   ExistingDataBucket:
     Description: Does this bucket already exist?
     Type: String
@@ -90,9 +83,6 @@ Parameters:
       - Yes
       - No
     Default: No
-  KeyPairName:
-    Description: Key Pair name
-    Type: AWS::EC2::KeyPair::KeyName
   SpotBidPercentage:
     Description: The percent of on-demand pricing for max bid for Spot intances
     Type: Number
@@ -116,22 +106,13 @@ Parameters:
   
   S3NextflowBucketName:
     Type: String
+    Default: ""
     Description: >-
-      (Optional) S3 Bucket used to store *.nf scripts.
+      (Optional) S3 Bucket used to store Nextflow metadata (session cache, logs, and intermediate results).
       Defaults to the S3 Bucket used for data.
     AllowedPattern: "(^$|(?=^.{3,63}$)(?!^(\\d+\\.)+\\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])\\.)*([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])$))"
     ConstraintDescription: "Must respect S3 bucket naming conventions"
-  
-  S3NextflowWorkDirPrefix:
-    Type: String
-    Description: >-
-      (Optional) Parent folder in the S3 bucket that contains workflow execution logs
-  
-  S3NextflowScriptPrefix:
-    Type: String
-    Description: >-
-      (Optional) Parent folder in the S3 bucket that contains *.nf workflow scripts
-  
+      
   ExistingNextflowBucket:
     Type: String
     Description: >-
@@ -143,6 +124,7 @@ Parameters:
   
   NextflowContainerImage:
     Type: String
+    Default: ""
     Description: >-
       (Optional) Container image for nextflow with custom entrypoint for config and workflow
       script staging. (Example, "<dockerhubuser>/nextflow:latest").  
@@ -151,7 +133,7 @@ Parameters:
   
   ArtifactRootUrl:
     Type: String
-    Default: https://aws-genomics-workflows.s3.amazonaws.com/artifacts
+    Default: https://s3.amazonaws.com/aws-genomics-workflows/artifacts
     Description: >-
       Root URL for where artifacts / additions scripts are stored
 
@@ -159,7 +141,7 @@ Parameters:
     Type: String
     Description: >-
       Root URL for where nested templates are stored
-    Default: https://s3.amazonaws.com/aws-genomics-workflows/templates
+    Default: https://s3.amazonaws.com/pwyming-demo-templates/nextflow-workshop
     ConstraintDescription: >-
       Must be a valid S3 URL
     AllowedPattern: "https://s3(-[a-z0-9]+)*\\.amazonaws\\.com/[a-z0-9-./]{3,}"
@@ -179,9 +161,12 @@ Resources:
       TemplateURL:  https://aws-quickstart.s3.amazonaws.com/quickstart-aws-vpc/templates/aws-vpc.template
       TimeoutInMinutes: 15
       Parameters:
-        AvailabilityZones: !Join ["," , !Ref AvailabilityZones]
+        AvailabilityZones:
+          Fn::Join:
+            - "," 
+            - - !Sub "${AWS::Region}a"
+              - !Sub "${AWS::Region}b"
         NumberOfAZs: "2"
-        KeyPairName: !Ref KeyPairName
       Tags: !FindInMap ["TagMap", "default", "tags"]
 
   GenomicsWorkflowStack:
@@ -193,7 +178,6 @@ Resources:
         SubnetIds: !Sub "${VpcStack.Outputs.PrivateSubnet1AID}, ${VpcStack.Outputs.PrivateSubnet2AID}"
         S3BucketName: !Ref S3DataBucketName
         ExistingBucket: !Ref ExistingDataBucket
-        KeyPairName: !Ref 'KeyPairName'
         WorkflowOrchestrator: nextflow
         SpotBidPercentage: !Ref 'SpotBidPercentage'
         DefaultCEMinvCpus: !Ref 'DefaultCEMinvCpus'
@@ -222,8 +206,6 @@ Resources:
             - UseOneBucket
             - True  # by the time this stack is created, the data bucket should exist
             - !Ref ExistingNextflowBucket
-        S3WorkDirPrefix: !Ref S3NextflowWorkDirPrefix
-        S3ScriptPrefix: !Ref S3NextflowScriptPrefix
         NextflowContainerImage: !Ref NextflowContainerImage
         BatchDefaultJobQueue: !GetAtt GenomicsWorkflowStack.Outputs.GenomicsEnvDefaultJobQueueArn
       Tags: !FindInMap ["TagMap", "default", "tags"]
@@ -235,18 +217,29 @@ Outputs:
     
   NextflowJobDefinition:
     Value: !GetAtt NextflowStack.Outputs.NextflowJobDefinition
+    Export:
+      Name: !Sub "${AWS::StackName}-NextflowJobDefinition"
     Description: >-
       Batch Job Definition that creates a nextflow head node for running workflows
   
   S3NextFlowBucket:
-    Value: !GetAtt NextflowStack.Outputs.BucketName
+    Value: !GetAtt NextflowStack.Outputs.NextflowBucket
+    Export:
+      Name: !Sub "${AWS::StackName}-NextflowBucket"
+    Description: >-
+      S3 Bucket used to store Nextflow metadata (session cache, logs, and intermediate results)
+  S3NextflowLogsDir:
+    Value: !GetAtt NextflowStack.Outputs.LogsDir
+    Export:
+      Name: !Sub "${AWS::StackName}-NextflowLogsDir"
     Description: >-
-      S3 Bucket used to store *.nf scripts
-  S3NextflowScriptPrefix:
-    Value: !GetAtt NextflowStack.Outputs.ScriptPrefix
+      S3 URI where nextflow session cache and logs are stored.
+  S3NextflowWorkDir:
+    Value: !GetAtt NextflowStack.Outputs.WorkDir
+    Export:
+      Name: !Sub "${AWS::StackName}-NextflowWorkDir"
     Description: >-
-      Path in the S3 bucket where *.nf script files are located.  If blank,
-      then they are located at the root level of the "nextflow" bucket.
+      S3 URI where workflow intermediate results are stored.
   
   VpcId:
     Description: >-
@@ -254,14 +247,20 @@ Outputs:
     Value: !GetAtt 'VpcStack.Outputs.VPCID'
   S3DataBucket:
     Value: !GetAtt 'GenomicsWorkflowStack.Outputs.GenomicsEnvS3Bucket'
+    Export:
+      Name: !Sub "${AWS::StackName}-DataBucket"
     Description: >-
       S3 bucket for storing genomics workflow input and output data
   BatchDefaultQueue:
     Value: !GetAtt 'GenomicsWorkflowStack.Outputs.GenomicsEnvDefaultJobQueueArn'
+    Export:
+      Name: !Sub "${AWS::StackName}-DefaultJobQueue"
     Description: >-
       The default AWS Batch job queue for workflow jobs, based on EC2 SPOT instances
   BatchHighPriorityQueue:
     Value: !GetAtt 'GenomicsWorkflowStack.Outputs.GenomicsEnvHighPriorityJobQueueArn'
+    Export:
+      Name: !Sub "${AWS::StackName}-HighPriorityJobQueue"
     Description: >-
       AWS Batch job queue for high priority workflow jobs, based on EC2 On-Demand
       instances
\ No newline at end of file
diff --git a/src/templates/nextflow/nextflow-resources.template.yaml b/src/templates/nextflow/nextflow-resources.template.yaml
index 81de08bf5..24d8ed16c 100644
--- a/src/templates/nextflow/nextflow-resources.template.yaml
+++ b/src/templates/nextflow/nextflow-resources.template.yaml
@@ -30,7 +30,8 @@ Metadata:
           default: "Optional"
         Parameters:
           - NextflowContainerImage
-          - S3ScriptPrefix
+          - S3NextflowPrefix
+          - S3LogsDirPrefix
           - S3WorkDirPrefix