diff --git a/.dockstore.yml b/.dockstore.yml index 9e1c1337db4..a478bd749f0 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -120,7 +120,7 @@ workflows: branches: - master - ah_var_store - - rsa_handle_prepare_error + - ah_flag_in_prepare - name: GvsSitesOnlyVCF subclass: WDL primaryDescriptorPath: /scripts/variantstore/wdl/GvsSitesOnlyVCF.wdl diff --git a/scripts/variantstore/wdl/GvsPrepareCallset.wdl b/scripts/variantstore/wdl/GvsPrepareCallset.wdl index cfb08467a91..504030cb8a7 100644 --- a/scripts/variantstore/wdl/GvsPrepareCallset.wdl +++ b/scripts/variantstore/wdl/GvsPrepareCallset.wdl @@ -19,6 +19,7 @@ workflow GvsPrepareCallset { String fq_destination_dataset = "~{destination_project}.~{destination_dataset}" Int temp_table_ttl_in_hours = 72 + Boolean skip_pet_insert = false String? service_account_json_path String? docker } @@ -36,6 +37,7 @@ workflow GvsPrepareCallset { fq_temp_table_dataset = fq_temp_table_dataset, fq_destination_dataset = fq_destination_dataset, temp_table_ttl_in_hours = temp_table_ttl_in_hours, + skip_pet_insert = skip_pet_insert, service_account_json_path = service_account_json_path, docker = docker_final } @@ -63,6 +65,7 @@ task PrepareCallsetTask { String fq_temp_table_dataset String fq_destination_dataset Int temp_table_ttl_in_hours + Boolean skip_pet_insert String? service_account_json_path String docker @@ -72,7 +75,7 @@ task PrepareCallsetTask { String has_service_account_file = if (defined(service_account_json_path)) then 'true' else 'false' String use_sample_names_file = if (defined(sample_names_to_extract)) then 'true' else 'false' - String python_option = if (defined(sample_names_to_extract)) then '--sample_names_to_extract sample_names_file' else '--fq_cohort_sample_names ' + fq_sample_mapping_table + String sample_list_param = if (defined(sample_names_to_extract)) then '--sample_names_to_extract sample_names_file' else '--fq_cohort_sample_names ' + fq_sample_mapping_table parameter_meta { sample_names_to_extract: { @@ -83,7 +86,7 @@ task PrepareCallsetTask { command <<< set -e - echo ~{python_option} + echo ~{sample_list_param} if [ ~{has_service_account_file} = 'true' ]; then gsutil cp ~{service_account_json_path} local.service_account.json @@ -99,11 +102,12 @@ task PrepareCallsetTask { --fq_temp_table_dataset ~{fq_temp_table_dataset} \ --fq_destination_dataset ~{fq_destination_dataset} \ --destination_cohort_table_prefix ~{destination_cohort_table_prefix} \ - ~{python_option} \ + ~{sample_list_param} \ --query_project ~{query_project} \ ~{sep=" " query_label_args} \ --fq_sample_mapping_table ~{fq_sample_mapping_table} \ --ttl ~{temp_table_ttl_in_hours} \ + --skip_pet_insert ~{skip_pet_insert} \ $SERVICE_ACCOUNT_STANZA >>> diff --git a/scripts/variantstore/wdl/extract/create_cohort_extract_data_table.py b/scripts/variantstore/wdl/extract/create_cohort_extract_data_table.py index cd03a26db75..ba074913210 100644 --- a/scripts/variantstore/wdl/extract/create_cohort_extract_data_table.py +++ b/scripts/variantstore/wdl/extract/create_cohort_extract_data_table.py @@ -222,7 +222,7 @@ def create_final_extract_table(fq_destination_table_data): print(sql) results = utils.execute_with_retry(client, "create-final-export-table", sql) -def populate_final_extract_table_with_vet_new(fq_temp_table_dataset, fq_destination_table_data): +def populate_final_extract_table_with_vet_new(fq_temp_table_dataset, fq_destination_table_data, skip_pet_insert): sql = f""" INSERT INTO `{fq_destination_table_data}` SELECT @@ -241,7 +241,12 @@ def populate_final_extract_table_with_vet_new(fq_temp_table_dataset, fq_destinat `{fq_temp_table_dataset}.{VET_NEW_TABLE}` """ print(sql) - results = utils.execute_with_retry(client, "populate-final-export-vet", sql) + if (not skip_pet_insert): + results = utils.execute_with_retry(client, "populate-final-export-vet", sql) + print(f"\nFinal cohort extract data written to {fq_destination_table_data}\n") + else: + print(f"\nFinal vet data NOT written to {fq_destination_table_data}. Manually execute the command above!\n") + return def make_extract_table(fq_pet_vet_dataset, @@ -256,7 +261,8 @@ def make_extract_table(fq_pet_vet_dataset, min_variant_samples, fq_sample_mapping_table, sa_key_path, - temp_table_ttl_hours + temp_table_ttl_hours, + skip_pet_insert ): try: fq_destination_table_data = f"{fq_destination_dataset}.{destination_table_prefix}__DATA" @@ -328,12 +334,10 @@ def make_extract_table(fq_pet_vet_dataset, create_position_table(fq_temp_table_dataset, min_variant_samples) create_final_extract_table(fq_destination_table_data) populate_final_extract_table_with_pet(fq_pet_vet_dataset, fq_temp_table_dataset, fq_destination_table_data, sample_ids) - populate_final_extract_table_with_vet_new(fq_temp_table_dataset, fq_destination_table_data) + populate_final_extract_table_with_vet_new(fq_temp_table_dataset, fq_destination_table_data, skip_pet_insert) finally: dump_job_stats() - print(f"\nFinal cohort extract data written to {fq_destination_table_data}\n") - if __name__ == '__main__': parser = argparse.ArgumentParser(allow_abbrev=False, description='Extract a cohort from BigQuery Variant Store ') @@ -348,6 +352,9 @@ def make_extract_table(fq_pet_vet_dataset, parser.add_argument('--sa_key_path',type=str, help='Path to json key file for SA', required=False) parser.add_argument('--max_tables',type=int, help='Maximum number of PET/VET tables to consider', required=False, default=250) parser.add_argument('--ttl',type=int, help='Temp table TTL in hours', required=False, default=72) + parser.add_argument('--skip_pet_insert',type=bool, + help='This will not execute the final sql query to insert the pet_new data into the DATA table, but will print out the command instead. Useful when flex slots need to be allocated.', + required=False, default=False) sample_args = parser.add_mutually_exclusive_group(required=True) sample_args.add_argument('--sample_names_to_extract',type=str, help='File containing list of samples to extract, 1 per line') sample_args.add_argument('--fq_cohort_sample_names',type=str, help='FQN of cohort table to extract, contains "sample_name" column') @@ -368,4 +375,5 @@ def make_extract_table(fq_pet_vet_dataset, args.min_variant_samples, args.fq_sample_mapping_table, args.sa_key_path, - args.ttl) + args.ttl, + args.skip_pet_insert)