broadinstitute · RoriCremer · Mar 9, 2022 · Mar 1, 2022 · Mar 1, 2022 · Mar 1, 2022
diff --git a/.dockstore.yml b/.dockstore.yml
@@ -89,7 +89,7 @@ workflows:
        branches:
          - master
          - ah_var_store
-         - rc-split-intervals-odd
+         - rc-vs-222-dataset-id
    - name: GvsCreateAltAllele
      subclass: WDL
      primaryDescriptorPath: /scripts/variantstore/wdl/GvsCreateAltAllele.wdl

diff --git a/scripts/variantstore/wdl/GvsCreateFilterSet.wdl b/scripts/variantstore/wdl/GvsCreateFilterSet.wdl
@@ -24,7 +24,7 @@ workflow GvsCreateFilterSet {
         File? excluded_intervals
 
         String output_file_base_name
-        File? gatk_override = "gs://broad-dsp-spec-ops/scratch/bigquery-jointcalling/jars/kc_extract_perf_20220111/gatk-package-4.2.0.0-455-g40a40bc-SNAPSHOT-local.jar"
+        File? gatk_override = "gs://broad-dsp-spec-ops/scratch/bigquery-jointcalling/jars/rc_testing_dataset_id_20220303/gatk-package-4.2.0.0-484-gf9e5c0e-SNAPSHOT-local.jar"
 
         File dbsnp_vcf
         File dbsnp_vcf_index
@@ -122,6 +122,7 @@ workflow GvsCreateFilterSet {
                 fq_alt_allele_table      = fq_alt_allele_table,
                 excess_alleles_threshold = excess_alleles_threshold,
                 read_project_id          = query_project,
+                default_dataset_id       = default_dataset,
                 output_file              = "${output_file_base_name}_${i}.vcf.gz",
                 service_account_json_path     = service_account_json_path,
                 query_project            = query_project,
@@ -349,6 +350,7 @@ task ExtractFilterTask {
 
         String fq_alt_allele_table
         String read_project_id
+        String default_dataset_id
         String output_file
         Int? excess_alleles_threshold
 
@@ -391,7 +393,8 @@ task ExtractFilterTask {
                 ~{sep=" " query_label_args} \
                 -L ~{intervals} \
                 ~{"-XL " + excluded_intervals} \
-                --project-id ~{read_project_id}
+                --project-id ~{read_project_id} \
+                --dataset-id ~{default_dataset_id}
     >>>
 
     runtime {

diff --git a/src/main/java/org/broadinstitute/hellbender/tools/gvs/common/ExtractTool.java b/src/main/java/org/broadinstitute/hellbender/tools/gvs/common/ExtractTool.java
@@ -38,6 +38,15 @@ public abstract class ExtractTool extends GATKTool {
     )
     protected String projectID = null;
 
+
+    @Argument(
+            fullName = "dataset-id",
+            doc = "ID of the Google Cloud dataset to use when executing queries",
+            optional = true // I guess, but wont it break otherwise or require that a dataset be created with the name temp_tables?
+    )
+    protected String datasetID = null;
+
+
     @Argument(
             fullName = "sample-table",
             doc = "Fully qualified name of a bigquery table containing a single column `sample` that describes the full list of samples to extract",

diff --git a/src/main/java/org/broadinstitute/hellbender/tools/gvs/filtering/ExtractFeatures.java b/src/main/java/org/broadinstitute/hellbender/tools/gvs/filtering/ExtractFeatures.java
@@ -113,6 +113,7 @@ protected void onStartup() {
 
         engine = new ExtractFeaturesEngine(
             projectID,
+            datasetID,
             vcfWriter,
             header,
             annotationEngine,

diff --git a/src/main/java/org/broadinstitute/hellbender/tools/gvs/filtering/ExtractFeaturesEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/gvs/filtering/ExtractFeaturesEngine.java
@@ -51,6 +51,7 @@ public class ExtractFeaturesEngine {
 
     private final ReferenceConfidenceVariantContextMerger variantContextMerger;
     private final String projectID;
+    private final String datasetID;
 
     private final TableReference altAlleleTable;
     private final TableReference sampleListTable;
@@ -69,6 +70,7 @@ public class ExtractFeaturesEngine {
 //    /** Set of sample names seen in the variant data from BigQuery. */
 //    private final Set<String> sampleNames = new HashSet<>();
     public ExtractFeaturesEngine(final String projectID,
+                               final String datasetID,
                                final VariantContextWriter vcfWriter,
                                final VCFHeader vcfHeader,
                                final VariantAnnotatorEngine annotationEngine,
@@ -93,6 +95,7 @@ public ExtractFeaturesEngine(final String projectID,
         this.localSortMaxRecordsInRam = localSortMaxRecordsInRam;
 
         this.projectID = projectID;
+        this.datasetID = datasetID;
         this.vcfWriter = vcfWriter;
         this.refSource = refSource;
         this.altAlleleTable = new TableReference(fqAltAlleleTable, SchemaUtils.ALT_ALLELE_FIELDS);
@@ -134,6 +137,7 @@ public void traverse() {
                 featureQueryString,
                 SchemaUtils.FEATURE_EXTRACT_FIELDS,
                 projectID,
+                datasetID,
                 userDefinedFunctions,
                 useBatchQueries,
                 cleanQueryLabels);

diff --git a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java
@@ -412,21 +412,22 @@ private static long getQueryCostBytesProcessedEstimate(String queryString, Strin
     public static StorageAPIAvroReader executeQueryWithStorageAPI(final String queryString,
                                                                   final List<String> fieldsToRetrieve,
                                                                   final String projectID,
+                                                                  final String datasetID,
                                                                   final String userDefinedFunctions,
                                                                   Map<String, String> labels) {
 
-        return executeQueryWithStorageAPI(queryString, fieldsToRetrieve, projectID, userDefinedFunctions, false, labels);
+        return executeQueryWithStorageAPI(queryString, fieldsToRetrieve, projectID, datasetID, userDefinedFunctions, false, labels);
     }
 
     public static StorageAPIAvroReader executeQueryWithStorageAPI(final String queryString,
                                                                   final List<String> fieldsToRetrieve,
                                                                   final String projectID,
+                                                                  final String datasetID,
                                                                   final String userDefinedFunctions,
                                                                   final boolean runQueryInBatchMode,
                                                                   Map<String, String> labels) {
-        final String tempTableDataset = "temp_tables";
-        final String tempTableName = UUID.randomUUID().toString().replace('-', '_');
-        final String tempTableFullyQualified = String.format("%s.%s.%s", projectID, tempTableDataset, tempTableName);
+        final String tempTableName = String.format("%s_%s", "temp_table", UUID.randomUUID().toString().replace('-', '_'));
+        final String tempTableFullyQualified = String.format("%s.%s.%s", projectID, datasetID, tempTableName);
 
         final String queryStringWithUDFs = userDefinedFunctions == null ? queryString : userDefinedFunctions + queryString;
 
@@ -446,7 +447,7 @@ public static StorageAPIAvroReader executeQueryWithStorageAPI(final String query
 
         executeQuery(projectID, queryStringIntoTempTable, runQueryInBatchMode, labels);
 
-        final Table tableInfo = getBigQueryEndPoint(projectID).getTable( TableId.of(projectID, tempTableDataset, tempTableName) );
+        final Table tableInfo = getBigQueryEndPoint(projectID).getTable( TableId.of(projectID, datasetID, tempTableName) );
         logger.info(String.format("Query temp table created with %s rows and %s bytes in size", tableInfo.getNumRows(), tableInfo.getNumBytes()));
 
         TableReference tr = new TableReference(tempTableFullyQualified, fieldsToRetrieve);

diff --git a/src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java
@@ -88,7 +88,7 @@ public void testQueryWithStorageAPI() {
         fieldsToRetrieve.add("name");
         Map<String, String> labels = new HashMap<String, String>();
         labels.put("gatktestquery", "teststorageapi" + runUuid);
-        final StorageAPIAvroReader result = BigQueryUtils.executeQueryWithStorageAPI(query, fieldsToRetrieve, BIGQUERY_TEST_PROJECT, noUDFs, labels);
+        final StorageAPIAvroReader result = BigQueryUtils.executeQueryWithStorageAPI(query, fieldsToRetrieve, BIGQUERY_TEST_PROJECT, BIGQUERY_TEST_DATASET, noUDFs, labels);
 
         int rowCount = 0;
         final Set<String> retrievedNames = new HashSet<>();
@@ -123,7 +123,7 @@ public void testQueryWithEmptyDatasetStorageAPI() {
         fieldsToRetrieve.add("name");
         Map<String, String> labels = new HashMap<String, String>();
         labels.put("gatktestquery", "testapiwithemptydata" + runUuid);
-        final StorageAPIAvroReader result = BigQueryUtils.executeQueryWithStorageAPI(query, fieldsToRetrieve, BIGQUERY_TEST_PROJECT, noUDFs, labels);
+        final StorageAPIAvroReader result = BigQueryUtils.executeQueryWithStorageAPI(query, fieldsToRetrieve, BIGQUERY_TEST_PROJECT, BIGQUERY_TEST_DATASET, noUDFs, labels);
 
         int rowCount = 0;
         final Set<String> retrievedNames = new HashSet<>();