Skip to content

Commit

Permalink
basename fix for imputation beagle ref panel generation (#1332)
Browse files Browse the repository at this point in the history
* try auto specifying chr at end of basename

* both tasks

* add liftovervcfs to dockstore

* allow specifying max mem
  • Loading branch information
mmorgantaylor authored Jul 15, 2024
1 parent 8cb21b1 commit c57210c
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
4 changes: 4 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ workflows:
subclass: WDL
primaryDescriptorPath: /pipelines/broad/arrays/imputation_beagle/CreateImputationRefPanelBeagle.wdl

- name: LiftoverVcfs
subclass: WDL
primaryDescriptorPath: /pipelines/broad/arrays/imputation_beagle/LiftoverVcfs.wdl

- name: RNAWithUMIsPipeline
subclass: WDL
primaryDescriptorPath: /pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,31 @@ workflow CreateImputationRefPanelBeagle {
Array[File] ref_vcf_index
Int disk_size

String? output_basename

Boolean make_brefs = true
Boolean make_interval_lists = true
}

scatter (idx in range(length(ref_vcf))) {
Int? chr = idx + 1
String? custom_basename_with_chr = output_basename + ".chr" + chr

if (make_brefs) {
call BuildBref3 {
input:
vcf = ref_vcf[idx],
disk_size = disk_size
disk_size = disk_size,
output_basename = custom_basename_with_chr
}
}

if (make_interval_lists) {
call CreateRefPanelIntervalLists {
input:
ref_panel_vcf = ref_vcf[idx],
ref_panel_vcf_index = ref_vcf_index[idx]
ref_panel_vcf_index = ref_vcf_index[idx],
output_basename = custom_basename_with_chr,
}
}
}
Expand All @@ -38,10 +45,12 @@ workflow CreateImputationRefPanelBeagle {
task BuildBref3 {
input {
File vcf
String? output_basename
Int disk_size
}

String name = basename(vcf, ".vcf.gz")
String name_from_file = basename(vcf, ".vcf.gz")
String name = select_first([output_basename, name_from_file])

command <<<
java -jar /usr/gitc/bref3.22Jul22.46e.jar ~{vcf} > ~{name}.bref3
Expand All @@ -64,6 +73,8 @@ task CreateRefPanelIntervalLists {
File ref_panel_vcf
File ref_panel_vcf_index

String? output_basename

Int disk_size_gb = ceil(2*size(ref_panel_vcf, "GiB")) + 50 # not sure how big the disk size needs to be since we aren't downloading the entire VCF here
Int cpu = 1
Int memory_mb = 8000
Expand All @@ -73,7 +84,8 @@ task CreateRefPanelIntervalLists {
Int command_mem = memory_mb - 1000
Int max_heap = memory_mb - 500

String basename = basename(ref_panel_vcf, '.vcf.gz')
String name_from_file = basename(ref_panel_vcf, ".vcf.gz")
String basename = select_first([output_basename, name_from_file])

command {
gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \
Expand Down
10 changes: 7 additions & 3 deletions pipelines/broad/arrays/imputation_beagle/LiftoverVcfs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ workflow LiftoverVcfs {

String docker = "us.gcr.io/broad-gatk/gatk:4.2.6.1"
Int min_disk_size = 100
Int mem_gb = 16

File hg38_reference_fasta
File hg38_reference_fasta_index
Expand All @@ -36,7 +37,8 @@ workflow LiftoverVcfs {
docker = docker,
max_retries = max_retries,
preemptible_tries = preemptible_tries,
min_disk_size = min_disk_size
min_disk_size = min_disk_size,
mem_gb = mem_gb
}

output {
Expand All @@ -57,15 +59,17 @@ task LiftOverArrays {
Int max_retries
Int preemptible_tries
Int min_disk_size
Int mem_gb
}

Int disk_size_from_file = (ceil(size(input_vcf, "GiB") + size(liftover_chain, "GiB") + size(reference_fasta, "GiB")) * 2) + 20
Int disk_size = if ( disk_size_from_file > min_disk_size ) then disk_size_from_file else min_disk_size
Int max_mem_gb = mem_gb - 1

command <<<
set -euo pipefail

gatk --java-options "-Xms4g -Xmx15g" \
gatk --java-options "-Xms4g -Xmx~{max_mem_gb}g" \
LiftoverVcf \
--INPUT ~{input_vcf} \
--OUTPUT ~{output_basename}.liftedover.vcf \
Expand All @@ -83,7 +87,7 @@ task LiftOverArrays {

runtime {
docker: docker
memory: "16 GiB"
memory: "~{mem_gb} GiB"
cpu: "1"
disks: "local-disk ~{disk_size} HDD"
maxRetries: max_retries
Expand Down

0 comments on commit c57210c

Please sign in to comment.