Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dev -> staging #1200

Merged
merged 2 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.1.6
2023-02-06 (Date of Last Commit)

* Updated VETS filtering pipeline to GATK version 4.5.0.0. Does not affect outputs.

# 1.1.5
2023-09-08 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version 1.0

import "../../../../../../tasks/broad/JointGenotypingTasks.wdl" as Tasks
import "https://raw.githubusercontent.com/broadinstitute/gatk/4.3.0.0/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl" as Filtering
import "https://raw.githubusercontent.com/broadinstitute/gatk/4.5.0.0/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl" as Filtering
import "../../../../../../tasks/broad/UltimaGenomicsGermlineFilteringThreshold.wdl" as FilteringThreshold


Expand All @@ -11,7 +11,7 @@ import "../../../../../../tasks/broad/UltimaGenomicsGermlineFilteringThreshold.w
# For choosing a filtering threshold (where on the ROC curve to filter) a sample with truth data is required.
workflow UltimaGenomicsJointGenotyping {

String pipeline_version = "1.1.5"
String pipeline_version = "1.1.6"

input {
File unpadded_intervals_file
Expand Down Expand Up @@ -51,10 +51,11 @@ workflow UltimaGenomicsJointGenotyping {
String flow_order

#inputs for training and applying filter model
String snp_annotations
String indel_annotations
Boolean use_allele_specific_annotations
Array[String] snp_annotations
Array[String] indel_annotations
String model_backend
String snp_resource_args = "--resource:hapmap,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/hapmap_3.3.hg38.vcf.gz --resource:omni,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/1000G_omni2.5.hg38.vcf.gz --resource:1000G,training=true,calibration=false gs://gcp-public-data--broad-references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
String indel_resource_args = "--resource:mills,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"

Int? top_level_scatter_count
Boolean? gather_vcfs
Expand Down Expand Up @@ -154,24 +155,42 @@ workflow UltimaGenomicsJointGenotyping {
disk_size_gb = medium_disk
}

call Filtering.JointVcfFiltering as TrainAndApplyFilteringModel {
call Filtering.JointVcfFiltering as TrainAndApplyFilteringModelSNPs {
input:
vcf = CalculateAverageAnnotations.output_vcf,
vcf_index = CalculateAverageAnnotations.output_vcf_index,
input_vcfs = CalculateAverageAnnotations.output_vcf,
input_vcf_idxs = CalculateAverageAnnotations.output_vcf_index,
sites_only_vcf = SitesOnlyGatherVcf.output_vcf,
sites_only_vcf_index = SitesOnlyGatherVcf.output_vcf_index,
snp_annotations = snp_annotations,
indel_annotations = indel_annotations,
sites_only_vcf_idx = SitesOnlyGatherVcf.output_vcf_index,
annotations = snp_annotations,
resource_args = snp_resource_args,
model_backend = model_backend,
use_allele_specific_annotations = use_allele_specific_annotations,
basename = callset_name,
gatk_docker = "us.gcr.io/broad-gatk/gatk:4.3.0.0"
output_prefix = callset_name,
extract_extra_args = "--mode SNP",
train_extra_args = "--mode SNP",
score_extra_args = "--mode SNP",
gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
}

call Filtering.JointVcfFiltering as TrainAndApplyFilteringModelINDELs {
input:
input_vcfs = TrainAndApplyFilteringModelSNPs.scored_vcfs,
input_vcf_idxs = TrainAndApplyFilteringModelSNPs.scored_vcf_idxs,
sites_only_vcf = SitesOnlyGatherVcf.output_vcf,
sites_only_vcf_idx = SitesOnlyGatherVcf.output_vcf_index,
annotations = indel_annotations,
resource_args = indel_resource_args,
model_backend = model_backend,
output_prefix = callset_name,
extract_extra_args = "--mode INDEL",
train_extra_args = "--mode INDEL",
score_extra_args = "--mode INDEL",
gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
}

call FilteringThreshold.ExtractOptimizeSingleSample as FindFilteringThresholdAndFilter {
input:
input_vcf = TrainAndApplyFilteringModel.variant_scored_vcf,
input_vcf_index = TrainAndApplyFilteringModel.variant_scored_vcf_index,
input_vcf = TrainAndApplyFilteringModelINDELs.scored_vcfs,
input_vcf_index = TrainAndApplyFilteringModelINDELs.scored_vcf_idxs,
base_file_name = callset_name,
call_sample_name = call_sample_name,
truth_vcf = truth_vcf,
Expand All @@ -188,7 +207,7 @@ workflow UltimaGenomicsJointGenotyping {
medium_disk = medium_disk
}

scatter (idx in range(length(TrainAndApplyFilteringModel.variant_scored_vcf))) {
scatter (idx in range(length(TrainAndApplyFilteringModelINDELs.scored_vcfs))) {
# For large callsets we need to collect metrics from the shards and gather them later.
if (!is_small_callset) {
call Tasks.CollectVariantCallingMetrics as CollectMetricsSharded {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
"UltimaGenomicsJointGenotyping.scatter_cross_check_fingerprints":false,
"UltimaGenomicsJointGenotyping.unbounded_scatter_count_scale_factor":2.5,
"UltimaGenomicsJointGenotyping.unpadded_intervals_file":"gs://gcp-public-data--broad-references/hg38/v0/hg38.even.handcurated.20k.intervals",
"UltimaGenomicsJointGenotyping.snp_annotations": "-A AS_ReadPosRankSum -A AS_FS -A AS_SOR -A AS_QD -A AVERAGE_TREE_SCORE -A AVERAGE_ASSEMBLED_HAPS -A AVERAGE_FILTERED_HAPS",
"UltimaGenomicsJointGenotyping.indel_annotations": "-A AS_MQRankSum -A AS_ReadPosRankSum -A AS_FS -A AS_SOR -A AS_QD -A AVERAGE_TREE_SCORE",
"UltimaGenomicsJointGenotyping.snp_annotations": ["AS_ReadPosRankSum", "AS_FS", "AS_SOR", "AS_QD", "AVERAGE_TREE_SCORE", "AVERAGE_ASSEMBLED_HAPS", "AVERAGE_FILTERED_HAPS"],
"UltimaGenomicsJointGenotyping.indel_annotations": ["AS_MQRankSum", "AS_ReadPosRankSum", "AS_FS", "AS_SOR", "AS_QD", "AVERAGE_TREE_SCORE"],
"UltimaGenomicsJointGenotyping.flow_order": "TGCA",
"UltimaGenomicsJointGenotyping.ref_fasta_sdf": "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/plumbing/reference_sdf.tar",
"UltimaGenomicsJointGenotyping.runs_file": "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/plumbing/runs.conservative.bed",
"UltimaGenomicsJointGenotyping.annotation_intervals": ["gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/plumbing/LCR-hs38.bed", "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/plumbing/mappability.0.bed", "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/plumbing/exome.twist.bed"],
"UltimaGenomicsJointGenotyping.use_allele_specific_annotations": true,
"UltimaGenomicsJointGenotyping.truth_vcf":"gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/plumbing/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_PGandRTGphasetransfer.broad-header.vcf.gz",
"UltimaGenomicsJointGenotyping.truth_vcf_index":"gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/plumbing/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_PGandRTGphasetransfer.broad-header.vcf.gz",
"UltimaGenomicsJointGenotyping.truth_highconf_intervals": "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/plumbing/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed",
"UltimaGenomicsJointGenotyping.call_sample_name": "NA12878",
"UltimaGenomicsJointGenotyping.truth_sample_name": "HG001",
"UltimaGenomicsJointGenotyping.model_backend": "PYTHON_IFOREST"
"UltimaGenomicsJointGenotyping.model_backend": "PYTHON_IFOREST",
"UltimaGenomicsJointGenotyping.TrainAndApplyFilteringModelSNPs.train_runtime_attributes": {"additional_mem_gb":2}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@
"UltimaGenomicsJointGenotyping.scatter_cross_check_fingerprints":false,
"UltimaGenomicsJointGenotyping.unbounded_scatter_count_scale_factor":2.5,
"UltimaGenomicsJointGenotyping.unpadded_intervals_file":"gs://gcp-public-data--broad-references/hg38/v0/hg38.even.handcurated.20k.intervals",
"UltimaGenomicsJointGenotyping.snp_annotations": "-A AS_ReadPosRankSum -A AS_FS -A AS_SOR -A AS_QD -A AVERAGE_TREE_SCORE -A AVERAGE_ASSEMBLED_HAPS -A AVERAGE_FILTERED_HAPS",
"UltimaGenomicsJointGenotyping.indel_annotations": "-A AS_MQRankSum -A AS_ReadPosRankSum -A AS_FS -A AS_SOR -A AS_QD -A AVERAGE_TREE_SCORE",
"UltimaGenomicsJointGenotyping.snp_annotations": ["AS_ReadPosRankSum", "AS_FS", "AS_SOR", "AS_QD", "AVERAGE_TREE_SCORE", "AVERAGE_ASSEMBLED_HAPS", "AVERAGE_FILTERED_HAPS"],
"UltimaGenomicsJointGenotyping.indel_annotations": ["AS_MQRankSum", "AS_ReadPosRankSum", "AS_FS", "AS_SOR", "AS_QD", "AVERAGE_TREE_SCORE"],
"UltimaGenomicsJointGenotyping.flow_order": "TGCA",
"UltimaGenomicsJointGenotyping.ref_fasta_sdf": "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/scientific/reference_sdf.tar",
"UltimaGenomicsJointGenotyping.runs_file": "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/scientific/runs.conservative.bed",
"UltimaGenomicsJointGenotyping.annotation_intervals": ["gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/scientific/LCR-hs38.bed", "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/scientific/mappability.0.bed", "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/scientific/exome.twist.bed"],
"UltimaGenomicsJointGenotyping.use_allele_specific_annotations": true,
"UltimaGenomicsJointGenotyping.truth_vcf":"gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/scientific/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_PGandRTGphasetransfer.broad-header.vcf.gz",
"UltimaGenomicsJointGenotyping.truth_vcf_index":"gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/scientific/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_PGandRTGphasetransfer.broad-header.vcf.gz",
"UltimaGenomicsJointGenotyping.truth_highconf_intervals": "gs://broad-gotc-test-storage/UltimaGenomicsJointGenotyping/wgs/scientific/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed",
"UltimaGenomicsJointGenotyping.call_sample_name": "NA12878",
"UltimaGenomicsJointGenotyping.truth_sample_name": "HG001",
"UltimaGenomicsJointGenotyping.model_backend": "PYTHON_IFOREST"
"UltimaGenomicsJointGenotyping.model_backend": "PYTHON_IFOREST",
"UltimaGenomicsJointGenotyping.TrainAndApplyFilteringModelSNPs.extract_runtime_attributes": {"command_mem_gb":13, "additional_mem_gb":2},
"UltimaGenomicsJointGenotyping.TrainAndApplyFilteringModelSNPs.train_runtime_attributes": {"command_mem_gb":13, "additional_mem_gb":2}
}
6 changes: 2 additions & 4 deletions verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@ workflow TestUltimaGenomicsJointGenotyping {
File runs_file
Array[File] annotation_intervals
String flow_order
String snp_annotations
String indel_annotations
Boolean use_allele_specific_annotations
Array[String] snp_annotations
Array[String] indel_annotations
String model_backend
Int? top_level_scatter_count
Boolean? gather_vcfs
Expand Down Expand Up @@ -83,7 +82,6 @@ workflow TestUltimaGenomicsJointGenotyping {
flow_order = flow_order,
snp_annotations = snp_annotations,
indel_annotations = indel_annotations,
use_allele_specific_annotations = use_allele_specific_annotations,
model_backend = model_backend,
top_level_scatter_count = top_level_scatter_count,
gather_vcfs = gather_vcfs,
Expand Down
Loading