-
Notifications
You must be signed in to change notification settings - Fork 596
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rudimentary support for other output types, support for scattering ex…
…tract cohort (#6949) * cohort extract work for WGS * rudimentary support for configurable output types * support for nocalls * extract wdl, missing classes, and compiler warning * tidying
- Loading branch information
Showing
12 changed files
with
377 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"NgsCohortExtract.reference": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", | ||
"NgsCohortExtract.reference_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", | ||
"NgsCohortExtract.reference_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", | ||
|
||
"NgsCohortExtract.gatk_override": "gs://broad-dsp-spec-ops/kcibul/gatk-package-4.1.8.1-140-g8aa14d3-SNAPSHOT-local.jar", | ||
|
||
"NgsCohortExtract.fq_sample_table": "spec-ops-aou.kc_high_cov_ccdg.cohort_100_of_194", | ||
"NgsCohortExtract.fq_cohort_extract_table": "spec-ops-aou.kc_high_cov_ccdg.exported_cohort_100_test", | ||
"NgsCohortExtract.query_project": "spec-ops-aou", | ||
|
||
"NgsCohortExtract.output_file_base_name": "ccdg_high_cov_export_100" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
version 1.0 | ||
|
||
workflow NgsCohortExtract { | ||
input { | ||
Int max_chrom_id = 24 | ||
|
||
# bug in cromwell, can't support large integers... | ||
# https://github.com/broadinstitute/cromwell/issues/2685 | ||
String chrom_offset = "1000000000000" | ||
|
||
File reference | ||
File reference_index | ||
File reference_dict | ||
|
||
String fq_sample_table | ||
String fq_cohort_extract_table | ||
String query_project | ||
|
||
String output_file_base_name | ||
File? gatk_override | ||
} | ||
|
||
scatter(i in range(max_chrom_id)) { | ||
call ExtractTask { | ||
input: | ||
gatk_override = gatk_override, | ||
reference = reference, | ||
reference_index = reference_index, | ||
reference_dict = reference_dict, | ||
fq_sample_table = fq_sample_table, | ||
chrom_offset = chrom_offset, | ||
chrom_id = i+1, | ||
fq_cohort_extract_table = fq_cohort_extract_table, | ||
read_project_id = query_project, | ||
output_file = "${output_file_base_name}_${i}.vcf.gz" | ||
} | ||
} | ||
} | ||
|
||
################################################################################ | ||
task ExtractTask { | ||
# indicates that this task should NOT be call cached | ||
meta { | ||
volatile: true | ||
} | ||
|
||
input { | ||
# ------------------------------------------------ | ||
# Input args: | ||
File reference | ||
File reference_index | ||
File reference_dict | ||
|
||
String fq_sample_table | ||
|
||
# bug in cromwell, can't support large integers... | ||
# https://github.com/broadinstitute/cromwell/issues/2685 | ||
String chrom_offset | ||
Int chrom_id | ||
|
||
String fq_cohort_extract_table | ||
String read_project_id | ||
String output_file | ||
|
||
# Runtime Options: | ||
File? gatk_override | ||
|
||
Int? local_sort_max_records_in_ram = 10000000 | ||
} | ||
|
||
|
||
# ------------------------------------------------ | ||
# Run our command: | ||
command <<< | ||
set -e | ||
export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override} | ||
|
||
df -h | ||
min_location=$(echo "~{chrom_id} * ~{chrom_offset}" | bc) | ||
max_location=$(echo "( ~{chrom_id} + 1 ) * ~{chrom_offset}" | bc) | ||
|
||
gatk --java-options "-Xmx4g" \ | ||
ExtractCohort \ | ||
--mode GENOMES --ref-version 38 --query-mode LOCAL_SORT \ | ||
-R "~{reference}" \ | ||
-O "~{output_file}" \ | ||
--local-sort-max-records-in-ram ~{local_sort_max_records_in_ram} \ | ||
--sample-table ~{fq_sample_table} \ | ||
--cohort-extract-table ~{fq_cohort_extract_table} \ | ||
--min-location ${min_location} --max-location ${max_location} \ | ||
--project-id ~{read_project_id} | ||
>>> | ||
|
||
# ------------------------------------------------ | ||
# Runtime settings: | ||
runtime { | ||
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_d8a72b825eab2d979c8877448c0ca948fd9b34c7_change_to_hwe" | ||
memory: "7 GB" | ||
disks: "local-disk 10 HDD" | ||
bootDiskSizeGb: 15 | ||
preemptible: 3 | ||
cpu: 2 | ||
} | ||
|
||
# ------------------------------------------------ | ||
# Outputs: | ||
output { | ||
File output_vcf = "~{output_file}" | ||
File output_vcf_index = "~{output_file}.tbi" | ||
} | ||
} | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -197,4 +197,10 @@ public enum ModeEnum { | |
GENOMES, | ||
ARRAYS | ||
} | ||
|
||
public enum OutputType { | ||
TSV, | ||
ORC, | ||
PARQUET | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.