Skip to content

Commit

Permalink
Merge pull request #184 from broadinstitute/dp-clades
Browse files Browse the repository at this point in the history
add new sarscov2_lineages workflow and travis cleanups
  • Loading branch information
dpark01 authored Dec 22, 2020
2 parents 7e05397 + 6a68311 commit a3bf70c
Show file tree
Hide file tree
Showing 19 changed files with 153 additions and 471 deletions.
5 changes: 5 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ workflows:
primaryDescriptorPath: /pipes/WDL/workflows/multiqc_only.wdl
testParameterFiles:
- empty.json
- name: sarscov2_lineages
subclass: WDL
primaryDescriptorPath: /pipes/WDL/workflows/sarscov2_lineages.wdl
testParameterFiles:
- empty.json
- name: scaffold_and_refine
subclass: WDL
primaryDescriptorPath: /pipes/WDL/workflows/scaffold_and_refine.wdl
Expand Down
70 changes: 3 additions & 67 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,12 @@ services:

env:
global:
- CACHE_DIR="$HOME/misc_cache"
- MINICONDA_DIR="$HOME/miniconda"
- CONDA_DEFAULT_ENV="$HOME/misc_cache/default_env"
- PYTHONIOENCODING=UTF8
- BOTO_CONFIG=/dev/null # bogus value to override config on travis
- DOCKER_REGISTRY="quay.io"
- DOCKER_REPO_PROD="quay.io/broadinstitute/viral-pipelines"
- DOCKER_REPO_DEV="quay.io/broadinstitute/viral-pipelines-dev"
- MODULE_VERSIONS="requirements-modules.txt"

cache:
directories:
- $HOME/misc_cache
- $HOME/miniconda
timeout: 1000

stages:
- validate
- build
Expand All @@ -43,6 +33,7 @@ jobs:
install:
- pip3 -q install miniwdl
script:
- set -e
- travis/check-wdl-runtimes.sh
- miniwdl check pipes/WDL/workflows/*.wdl

Expand All @@ -54,6 +45,7 @@ jobs:
install:
- travis/install-wdl.sh
script:
- set -e
- travis/check-wdl-runtimes.sh
- travis/validate-wdl-womtool.sh

Expand All @@ -74,7 +66,7 @@ jobs:
install:
- travis/install-wdl.sh
script:
- travis/check-wdl-runtimes.sh
- set -e
- travis/relative-wdl-paths.sh
- travis/tests-cromwell.sh

Expand All @@ -89,7 +81,6 @@ jobs:
install:
- pip3 -q install miniwdl
script:
- travis/check-wdl-runtimes.sh
- travis/tests-miniwdl.sh

- language: java
Expand All @@ -106,63 +97,8 @@ jobs:
- travis/install-wdl.sh
script:
- set -e
- travis/check-wdl-runtimes.sh
- travis/relative-wdl-paths.sh
- travis/build-dx.sh
- travis/tests-dx.sh

- language: python
stage: build
python: "3.8"
env:
- TRAVIS_JOB=deploy_github_staging
install:
- pip3 -q install miniwdl
script:
- travis/github-viral-ngs-staging.sh
after_failure:
- sleep 10

- language: python
stage: build
python: "3.8"
env:
- TRAVIS_JOB=deploy_gcs
script: skip
before_deploy:
- set -e -o pipefail
- openssl aes-256-cbc -K $encrypted_6def3e87f286_key -iv $encrypted_6def3e87f286_iv -in travis/viral-ngs-wdl.json.enc -out travis/viral-ngs-wdl.json -d
- export DEST_DIR=`travis/list-docker-tags.sh | tail -1 | sed 's/:/\//'`
- pip3 -q install miniwdl
- travis/check-wdl-runtimes.sh
- travis/flatten-wdls.sh
deploy:
provider: gcs
edge: true
key_file: travis/viral-ngs-wdl.json
bucket: viral-ngs-wdl
local_dir: pipes/WDL/flattened
upload_dir: "$DEST_DIR"
acl: public-read
on:
all_branches: true

#- language: python
# stage: build
# python: "3.8"
# env:
# - TRAVIS_JOB=deploy_github
# before_deploy:
# - set -e
# - pip3 -q install miniwdl
# - travis/check-wdl-runtimes.sh
# - travis/flatten-wdls.sh
# deploy:
# provider: releases
# edge: true
# token: "$GITHUB_OAUTH_TOKEN"
# file: pipes/WDL/flattened/*.wdl
# on:
# tags: true


103 changes: 103 additions & 0 deletions pipes/WDL/tasks/tasks_sarscov2.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
version 1.0

task nextclade_one_sample {
meta {
description: "Nextclade classification of one sample. Leaving optional inputs unspecified will use SARS-CoV-2 defaults."
}
input {
File genome_fasta
File? root_sequence
File? auspice_reference_tree_json
File? qc_config_json
File? gene_annotations_json
File? pcr_primers_csv
}
String basename = basename(genome_fasta, ".fasta")
command {
set -e
nextclade.js --version > VERSION
nextclade.js \
--input-fasta "~{genome_fasta}" \
~{"--input-root-seq " + root_sequence} \
~{"--input-tree " + auspice_reference_tree_json} \
~{"--input-qc-config " + qc_config_json} \
~{"--input-gene-map " + gene_annotations_json} \
~{"--input-pcr-primers " + pcr_primers_csv} \
--output-json "~{basename}".nextclade.json \
--output-tsv "~{basename}".nextclade.tsv \
--output-tree "~{basename}".nextclade.auspice.json
cp "~{basename}".nextclade.tsv input.tsv
python3 <<CODE
# transpose table
with open('input.tsv', 'rt') as inf:
with open('transposed.tsv', 'wt') as outf:
for c in zip(*(l.rstrip().split('\t') for l in inf)):
outf.write('\t'.join(c)+'\n')
CODE
}
runtime {
docker: "neherlab/nextclade:0.10.0"
memory: "3 GB"
cpu: 2
disks: "local-disk 50 HDD"
dx_instance_type: "mem1_ssd1_v2_x2"
}
output {
String nextclade_version = read_string("VERSION")
File nextclade_json = "~{basename}.nextclade.json"
File auspice_json = "~{basename}.nextclade.auspice.json"
File nextclade_tsv = "~{basename}.nextclade.tsv"
String nextclade_clade = read_map("transposed.tsv")["clade"]
}
}
task pangolin_one_sample {
meta {
description: "Pangolin classification of one SARS-CoV-2 sample."
}
input {
File genome_fasta
Int? min_length
Float? max_ambig
Boolean include_putative = true
}
String basename = basename(genome_fasta, ".fasta")
command {
set -e
pangolin -v > VERSION_PANGOLIN
pangolin -lv > VERSION_LINEAGES
pangolin -pv > VERSION_PANGOLEARN
pangolin "~{genome_fasta}" \
--outfile "~{basename}.pangolin_report.csv" \
-t "$(nproc)" \
--include-putative \
~{"--min-length " + min_length} \
~{"--max-ambig " + max_ambig} \
~{true="--include-putative" false="" include_putative} \
--verbose
cp "~{basename}.pangolin_report.csv" input.csv
python3 <<CODE
# transpose table and convert csv to tsv
with open('input.csv', 'rt') as inf:
with open('transposed.tsv', 'wt') as outf:
for c in zip(*(l.rstrip().split(',') for l in inf)):
outf.write('\t'.join(c)+'\n')
CODE
}
runtime {
docker: "staphb/pangolin:2.1.1"
memory: "3 GB"
cpu: 2
disks: "local-disk 50 HDD"
dx_instance_type: "mem1_ssd1_v2_x2"
}
output {
String pangolin_version = read_string("VERSION_PANGOLIN")
String lineages_version = read_string("VERSION_LINEAGES")
String pangolearn_version = read_string("VERSION_PANGOLEARN")
File pangolin_csv = "~{basename}.pangolin_report.csv"
String pangolin_clade = read_map("transposed.tsv")["lineage"]
}
}
30 changes: 30 additions & 0 deletions pipes/WDL/workflows/sarscov2_lineages.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
version 1.0

import "../tasks/tasks_sarscov2.wdl" as sarscov2

workflow sarscov2_lineages {
meta {
description: "Call Nextclade and Pangolin lineages on a single SARS-CoV-2 genome"
}

input {
File genome_fasta
}

call sarscov2.nextclade_one_sample {
input:
genome_fasta = genome_fasta
}

call sarscov2.pangolin_one_sample {
input:
genome_fasta = genome_fasta
}

output {
String nextclade_clade = nextclade_one_sample.nextclade_clade
File nextclade_tsv = nextclade_one_sample.nextclade_tsv
String pangolin_clade = pangolin_one_sample.pangolin_clade
File pangolin_csv = pangolin_one_sample.pangolin_csv
}
}
2 changes: 2 additions & 0 deletions requirements-modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ broadinstitute/beast-beagle-cuda=1.10.5pre
broadinstitute/ncbi-tools=2.10.7.1
nextstrain/base=build-20200629T201240Z
andersenlabapps/ivar=1.2.2
staphb/pangolin=2.1.1
neherlab/nextclade=0.10.0
20 changes: 0 additions & 20 deletions travis/before_install.sh

This file was deleted.

54 changes: 0 additions & 54 deletions travis/build-conda.sh

This file was deleted.

24 changes: 0 additions & 24 deletions travis/deploy-docker.sh

This file was deleted.

12 changes: 0 additions & 12 deletions travis/flatten-wdls.sh

This file was deleted.

Binary file removed travis/github-deploy-id_rsa.enc
Binary file not shown.
Loading

0 comments on commit a3bf70c

Please sign in to comment.