Skip to content

Commit

Permalink
Allow for incremental addition of data to alt_allele [VS-52] (#7993)
Browse files Browse the repository at this point in the history
  • Loading branch information
rsasch authored Aug 22, 2022
1 parent 315303b commit 187fe60
Show file tree
Hide file tree
Showing 20 changed files with 119 additions and 62 deletions.
7 changes: 3 additions & 4 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,14 @@ workflows:
branches:
- master
- ah_var_store
- name: GvsCreateAltAllele
- name: GvsPopulateAltAllele
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsCreateAltAllele.wdl
testParameterFiles:
- /scripts/variantstore/wdl/GvsCreateAltAllele.example.inputs.json
primaryDescriptorPath: /scripts/variantstore/wdl/GvsPopulateAltAllele.wdl
filters:
branches:
- master
- ah_var_store
- rsa_vs_52_incremental_alt_allele
- name: GvsCreateTables
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsCreateTables.wdl
Expand Down
4 changes: 2 additions & 2 deletions scripts/variantstore/AOU_DELIVERABLES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
- [GvsAssignIds](https://dockstore.org/my-workflows/github.com/broadinstitute/gatk/GvsAssignIds) workflow
- [GvsImportGenomes](https://dockstore.org/my-workflows/github.com/broadinstitute/gatk/GvsImportGenomes) workflow
- [GvsWithdrawSamples](https://dockstore.org/my-workflows/github.com/broadinstitute/gatk/GvsWithdrawSamples) workflow
- [GvsCreateAltAllele](https://dockstore.org/my-workflows/github.com/broadinstitute/gatk/GvsCreateAltAllele) workflow
- [GvsPopulateAltAllele](https://dockstore.org/my-workflows/github.com/broadinstitute/gatk/GvsPopulateAltAllele) workflow
- [GvsCreateFilterSet](https://dockstore.org/my-workflows/github.com/broadinstitute/gatk/GvsCreateFilterSet) workflow
- [GvsPrepareRangesCallset](https://dockstore.org/my-workflows/github.com/broadinstitute/gatk/GvsPrepareRangesCallset) workflow (VCF output)
- [GvsExtractCallset](https://dockstore.org/my-workflows/github.com/broadinstitute/gatk/GvsExtractCallset) workflow (VCF output)
Expand Down Expand Up @@ -43,7 +43,7 @@
3. `GvsWithdrawSamples` workflow
- Run if there are any samples to withdraw from the last callset.
4. **TBD Workflow to soft delete samples**
5. `GvsCreateAltAllele` workflow
5. `GvsPopulateAltAllele` workflow
- **TODO:** needs to be made cumulative so that it can add data to the existing table instead of creating it from scratch on each run (see [VS-52](https://broadworkbench.atlassian.net/browse/VS-52))
- This step loads data into the `alt_allele` table from the `vet_*` tables in preparation for running the filtering step.
- This workflow does not use the Terra Data Entity Model to run, so be sure to select the `Run workflow with inputs defined by file paths` workflow submission option.
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/TERRA_QUICKSTART.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ This step loads data into the ALT_ALLELE table from the `vet_*` tables.

This workflow does not use the Terra data model to run, so be sure to select `Run workflow with inputs defined by file paths`.

This is done by running the `GvsCreateAltAllele` workflow with the following parameters:
This is done by running the `GvsPopulateAltAllele` workflow with the following parameters:

| Parameter | Description |
| ----------------- | ----------- |
Expand Down
8 changes: 4 additions & 4 deletions scripts/variantstore/wdl/GvsBenchmarkExtractTask.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ workflow GvsBenchmarkExtractTask {

File? excluded_intervals
Boolean? emit_pls = false

Int? extract_cpu_override = 2
String? extract_memory_override = "12 GB"

Int? extract_preemptible_override
Int? extract_maxretries_override
Int? split_intervals_disk_size_override
Expand Down Expand Up @@ -206,7 +206,7 @@ task SumBytes {
}

runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "3 GB"
disks: "local-disk 10 HDD"
preemptible: 3
Expand Down Expand Up @@ -240,7 +240,7 @@ task CreateManifest {
}

runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "3 GB"
disks: "local-disk 10 HDD"
preemptible: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ task Add_AS_MAX_VQSLOD_ToVcf {
File input_vcf
String output_basename

String docker = "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_16"
String docker = "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_22"
Int cpu = 1
Int memory_mb = 3500
Int disk_size_gb = ceil(2*size(input_vcf, "GiB")) + 50
Expand Down
6 changes: 3 additions & 3 deletions scripts/variantstore/wdl/GvsCallsetCost.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ task WorkflowComputeCosts {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_16"
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_22"
}

output {
Expand Down Expand Up @@ -97,7 +97,7 @@ task CoreStorageModelSizes {
get_billable_bytes_in_gib "alt_allele" alt_allele_gib.txt
>>>
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:390.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
}
output {
Float vet_gib = read_float("vet_gib.txt")
Expand Down Expand Up @@ -125,7 +125,7 @@ task ReadCostObservabilityTable {
> cost_observability.json
>>>
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:390.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
}
output {
File cost_observability = "cost_observability.json"
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsCreateTables.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ task CreateTables {
}

runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:latest"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "3 GB"
disks: "local-disk 10 HDD"
cpu: 1
Expand Down
4 changes: 2 additions & 2 deletions scripts/variantstore/wdl/GvsCreateVAT.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ task MakeSubpopulationFilesAndReadSchemaFiles {
# ------------------------------------------------
# Runtime settings:
runtime {
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_16"
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_22"
memory: "1 GB"
preemptible: 3
cpu: "1"
Expand Down Expand Up @@ -405,7 +405,7 @@ task BigQuerySmokeTest {
# ------------------------------------------------
# Runtime settings:
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "1 GB"
preemptible: 3
cpu: "1"
Expand Down
4 changes: 2 additions & 2 deletions scripts/variantstore/wdl/GvsCreateVATAnnotations.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ task ExtractAnAcAfFromVCF {
# ------------------------------------------------
# Runtime settings:
runtime {
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_16"
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_22"
maxRetries: 3
memory: "16 GB"
preemptible: 3
Expand Down Expand Up @@ -291,7 +291,7 @@ task PrepAnnotationJson {
# ------------------------------------------------
# Runtime settings:
runtime {
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_16"
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_22"
memory: "8 GB"
preemptible: 5
cpu: "1"
Expand Down
6 changes: 3 additions & 3 deletions scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ task CountSamples {
Int num_samples = read_int(stdout())
}
runtime {
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_01"
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_22"
}
}
Expand Down Expand Up @@ -152,7 +152,7 @@ task ExtractFromNonSuperpartitionedTables {
}
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:latest"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
}
}
Expand Down Expand Up @@ -203,6 +203,6 @@ task ExtractFromSuperpartitionedTables {
}
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:latest"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
}
}
8 changes: 4 additions & 4 deletions scripts/variantstore/wdl/GvsExtractCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ task ValidateFilterSetName {
}

runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "3 GB"
disks: "local-disk 10 HDD"
preemptible: 3
Expand Down Expand Up @@ -393,7 +393,7 @@ task SumBytes {
print(total_mb);"
>>>
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "3 GB"
disks: "local-disk 10 HDD"
preemptible: 3
Expand Down Expand Up @@ -432,7 +432,7 @@ task CreateManifest {
}

runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "3 GB"
disks: "local-disk 10 HDD"
preemptible: 3
Expand Down Expand Up @@ -474,7 +474,7 @@ task GenerateSampleListFile {
}

runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "3 GB"
disks: "local-disk 10 HDD"
preemptible: 3
Expand Down
4 changes: 2 additions & 2 deletions scripts/variantstore/wdl/GvsExtractCohortFromSampleNames.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import "GvsExtractCallset.wdl" as GvsExtractCallset
workflow GvsExtractCohortFromSampleNames {

input {
# cohort_sample_names_array will take precedence over cohort_sample_names if both are set
# cohort_sample_names_array will take precedence over cohort_sample_names if both are set
Array[String]? cohort_sample_names_array
File? cohort_sample_names

Expand Down Expand Up @@ -43,7 +43,7 @@ workflow GvsExtractCohortFromSampleNames {
call write_array_task {
input:
input_array = select_first([cohort_sample_names_array]),
docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
}
}

Expand Down
8 changes: 4 additions & 4 deletions scripts/variantstore/wdl/GvsImportGenomes.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ task CreateFOFNs {
split -d -a 5 -l ~{batch_size} ~{sample_name_list} batched_sample_names.
>>>
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
bootDiskSizeGb: 15
memory: "3 GB"
disks: "local-disk 10 HDD"
Expand Down Expand Up @@ -275,7 +275,7 @@ task SetIsLoadedColumn {
'UPDATE `~{dataset_name}.sample_info` SET is_loaded = true WHERE sample_id IN (SELECT CAST(partition_id AS INT64) from `~{dataset_name}.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id NOT LIKE "__%" AND total_logical_bytes > 0 AND table_name LIKE "vet_%") OR sample_id IN (SELECT sample_id FROM `~{dataset_name}.sample_load_status` GROUP BY 1 HAVING COUNT(1) = 2)'
>>>
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "1 GB"
disks: "local-disk 10 HDD"
cpu: 1
Expand Down Expand Up @@ -354,7 +354,7 @@ task GetUningestedSampleIds {
bq --project_id=~{project_id} rm -f=true ${TEMP_TABLE}
>>>
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "1 GB"
disks: "local-disk 10 HDD"
preemptible: 5
Expand Down Expand Up @@ -391,7 +391,7 @@ task CurateInputLists {
--output_files True
>>>
runtime {
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_16"
docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_2022_08_22"
memory: "3 GB"
disks: "local-disk 100 HDD"
bootDiskSizeGb: 15
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsIngestTieout.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ task IngestTieout {
}

runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:latest"
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0"
memory: "14 GB"
disks: "local-disk 2000 HDD"
preemptible: 3
Expand Down
Loading

0 comments on commit 187fe60

Please sign in to comment.