Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

basename fix for imputation beagle ref panel generation #1332

Merged
merged 4 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ workflows:
subclass: WDL
primaryDescriptorPath: /pipelines/broad/arrays/imputation_beagle/CreateImputationRefPanelBeagle.wdl

- name: LiftoverVcfs
subclass: WDL
primaryDescriptorPath: /pipelines/broad/arrays/imputation_beagle/LiftoverVcfs.wdl

- name: RNAWithUMIsPipeline
subclass: WDL
primaryDescriptorPath: /pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,31 @@ workflow CreateImputationRefPanelBeagle {
Array[File] ref_vcf_index
Int disk_size

String? output_basename

Boolean make_brefs = true
Boolean make_interval_lists = true
}

scatter (idx in range(length(ref_vcf))) {
Int? chr = idx + 1
String? custom_basename_with_chr = output_basename + ".chr" + chr

if (make_brefs) {
call BuildBref3 {
input:
vcf = ref_vcf[idx],
disk_size = disk_size
disk_size = disk_size,
output_basename = custom_basename_with_chr
}
}

if (make_interval_lists) {
call CreateRefPanelIntervalLists {
input:
ref_panel_vcf = ref_vcf[idx],
ref_panel_vcf_index = ref_vcf_index[idx]
ref_panel_vcf_index = ref_vcf_index[idx],
output_basename = custom_basename_with_chr,
}
}
}
Expand All @@ -38,10 +45,12 @@ workflow CreateImputationRefPanelBeagle {
task BuildBref3 {
input {
File vcf
String? output_basename
Int disk_size
}

String name = basename(vcf, ".vcf.gz")
String name_from_file = basename(vcf, ".vcf.gz")
String name = select_first([output_basename, name_from_file])

command <<<
java -jar /usr/gitc/bref3.22Jul22.46e.jar ~{vcf} > ~{name}.bref3
Expand All @@ -64,6 +73,8 @@ task CreateRefPanelIntervalLists {
File ref_panel_vcf
File ref_panel_vcf_index

String? output_basename

Int disk_size_gb = ceil(2*size(ref_panel_vcf, "GiB")) + 50 # not sure how big the disk size needs to be since we aren't downloading the entire VCF here
Int cpu = 1
Int memory_mb = 8000
Expand All @@ -73,7 +84,8 @@ task CreateRefPanelIntervalLists {
Int command_mem = memory_mb - 1000
Int max_heap = memory_mb - 500

String basename = basename(ref_panel_vcf, '.vcf.gz')
String name_from_file = basename(ref_panel_vcf, ".vcf.gz")
String basename = select_first([output_basename, name_from_file])

command {
gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \
Expand Down
10 changes: 7 additions & 3 deletions pipelines/broad/arrays/imputation_beagle/LiftoverVcfs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ workflow LiftoverVcfs {

String docker = "us.gcr.io/broad-gatk/gatk:4.2.6.1"
Int min_disk_size = 100
Int mem_gb = 16

File hg38_reference_fasta
File hg38_reference_fasta_index
Expand All @@ -36,7 +37,8 @@ workflow LiftoverVcfs {
docker = docker,
max_retries = max_retries,
preemptible_tries = preemptible_tries,
min_disk_size = min_disk_size
min_disk_size = min_disk_size,
mem_gb = mem_gb
}

output {
Expand All @@ -57,15 +59,17 @@ task LiftOverArrays {
Int max_retries
Int preemptible_tries
Int min_disk_size
Int mem_gb
}

Int disk_size_from_file = (ceil(size(input_vcf, "GiB") + size(liftover_chain, "GiB") + size(reference_fasta, "GiB")) * 2) + 20
Int disk_size = if ( disk_size_from_file > min_disk_size ) then disk_size_from_file else min_disk_size
Int max_mem_gb = mem_gb - 1

command <<<
set -euo pipefail

gatk --java-options "-Xms4g -Xmx15g" \
gatk --java-options "-Xms4g -Xmx~{max_mem_gb}g" \
LiftoverVcf \
--INPUT ~{input_vcf} \
--OUTPUT ~{output_basename}.liftedover.vcf \
Expand All @@ -83,7 +87,7 @@ task LiftOverArrays {

runtime {
docker: docker
memory: "16 GiB"
memory: "~{mem_gb} GiB"
cpu: "1"
disks: "local-disk ~{disk_size} HDD"
maxRetries: max_retries
Expand Down