Skip to content

Commit

Permalink
Core storage model cost [VS-473] (#7913)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcovarr authored Jun 28, 2022
1 parent 13b5660 commit 586f3f7
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 37 deletions.
1 change: 1 addition & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ workflows:
- master
- ah_var_store
- vs_472_workflow_compute_costs
- vs_473_core_storage_model_cost
- name: MitochondriaPipeline
subclass: WDL
primaryDescriptorPath: /scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl
Expand Down
89 changes: 52 additions & 37 deletions scripts/variantstore/wdl/GvsCallsetCost.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ version 1.0

workflow GvsCallsetCost {
input {
# String project_id
# String dataset_name
String project_id
String dataset_name
String workspace_namespace
String workspace_name
# String callset_name
String call_set_identifier
Array[String] excluded_submission_ids = []
}

Expand All @@ -17,38 +17,41 @@ workflow GvsCallsetCost {
excluded_submission_ids = excluded_submission_ids
}

# call BigQueryWriteAPICost {
# input:
# project_id = project_id,
# dataset_name = dataset_name
# }
#
call CoreStorageModelSizes {
input:
project_id = project_id,
dataset_name = dataset_name
}

# call BigQueryScannedCost {
# input:
# project_id = project_id,
# dataset_name = dataset_name,
# callset_name = callset_name
# call_set_identifier = call_set_identifier
# }
#
# call BigQueryStorageAPIScannedCost {
# input:
# project_id = project_id,
# dataset_name = dataset_name,
# callset_name = callset_name
# call_set_identifier = call_set_identifier
# }
output {
File workflow_compute_costs = WorkflowComputeCosts.costs
File workflow_compute_costs_log = WorkflowComputeCosts.log
String vet_gib = CoreStorageModelSizes.vet_gib
String ref_ranges_gib = CoreStorageModelSizes.ref_ranges_gib
String alt_allele_gib = CoreStorageModelSizes.alt_allele_gib
}
}


task WorkflowComputeCosts {
meta {
description: "Calculate workflow compute costs by calling Firecloud APIs for submissions in the specified workspace"
volatile: true
}

input {
String workspace_namespace
String workspace_name
Expand All @@ -75,29 +78,41 @@ task WorkflowComputeCosts {
}
}

#task BigQueryWriteAPICost {
# meta {
# description: "Estimate GvsImportGenomes use of the BQ Write API via core storage costs from the sizes of vet_% and ref_ranges_% tables."
# volatile: true
# }
#
# input {
# String project_id
# String dataset_name
# }
# command <<<
# >>>
#
# runtime {
# docker: ""
# }
#
# output {
# Float vet_gib = read_float("")
# Float ref_ranges_gib = read_float("")
# Float import_genomes_cost = 3
# }
#}
task CoreStorageModelSizes {
meta {
description: "Read sizes of vet_%, ref_ranges_%, and alt_allele tables from `INFORMATION_SCHEMA.PARTITIONS`."
# Definitely don't cache this, the values will change while the inputs to this task will not!
volatile: true
}
input {
String project_id
String dataset_name
}
command <<<

get_billable_bytes_in_gib() {
local table_pattern="$1"
local output_file_name="$2"

bq query --location=US --project_id='~{project_id}' --format=csv --use_legacy_sql=false \
"SELECT round(sum(total_billable_bytes) / (1024*1024*1024),2) \
FROM \`~{project_id}.~{dataset_name}.INFORMATION_SCHEMA.PARTITIONS\` \
WHERE table_name LIKE '${table_pattern}'" | tail -1 > ${output_file_name}
}

get_billable_bytes_in_gib "vet_%" vet_gib.txt
get_billable_bytes_in_gib "ref_ranges_%" ref_ranges_gib.txt
get_billable_bytes_in_gib "alt_allele" alt_allele_gib.txt
>>>
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:390.0.0"
}
output {
Float vet_gib = read_float("vet_gib.txt")
Float ref_ranges_gib = read_float("ref_ranges_gib.txt")
Float alt_allele_gib = read_float("alt_allele_gib.txt")
}
}

#task BigQueryScannedCost {
# meta {
Expand All @@ -108,7 +123,7 @@ task WorkflowComputeCosts {
# input {
# String project_id
# String dataset_name
# String callset_name
# String call_set_identifier
# }
#
# command <<<
Expand All @@ -135,7 +150,7 @@ task WorkflowComputeCosts {
# input {
# String project_id
# String dataset_name
# String callset_name
# String call_set_identifier
# }
#
# command <<<
Expand Down

0 comments on commit 586f3f7

Please sign in to comment.