diff --git a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/SampleList.java b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/SampleList.java index a18712f14b0..2f0163633e2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/SampleList.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/SampleList.java @@ -81,8 +81,8 @@ private TableResult querySampleTable(String fqSampleTableName, String whereClaus "SELECT " + SchemaUtils.SAMPLE_ID_FIELD_NAME + ", " + SchemaUtils.SAMPLE_NAME_FIELD_NAME + " FROM `" + fqSampleTableName + "`" + whereClause; - // Execute the query: - final TableResult result = BigQueryUtils.executeQuery(BigQueryUtils.getBigQueryEndPoint(executionProjectId) , sampleListQueryString, false); + // Execute the query: + final TableResult result = BigQueryUtils.executeQuery(BigQueryUtils.getBigQueryEndPoint(executionProjectId) , sampleListQueryString, false, null); // Show our pretty results: if (printDebugInformation) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractCohortEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractCohortEngine.java index b6f3227456c..19ee725f9ab 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractCohortEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractCohortEngine.java @@ -174,7 +174,7 @@ public void traverse() { } // create the query string String q = "SELECT " + StringUtils.join(SchemaUtils.COHORT_FIELDS,",") + " FROM " + cohortTableRef.getFQTableName() + " ORDER BY " + SchemaUtils.LOCATION_FIELD_NAME; - TableResult tr = BigQueryUtils.executeQuery(BigQueryUtils.getBigQueryEndPoint(), cohortTableRef.tableProject, cohortTableRef.tableDataset, q); + TableResult tr = BigQueryUtils.executeQuery(BigQueryUtils.getBigQueryEndPoint(), cohortTableRef.tableProject, cohortTableRef.tableDataset, q, null); createVariantsFromSortedTableResults(tr, fullVqsLodMap, fullYngMap, noFilteringRequested); break; } @@ -329,7 +329,7 @@ private double getQUALapproxFromSampleRecord(GenericRecord sampleRecord) { if (s.contains("|")) { // take the sum of all non-* alleles - // basically if our alleles are '*,T' or 'G,*' we want to ignore the * part + // basically if our alleles are '*,T' or 'G,*' we want to ignore the * part String[] alleles = sampleRecord.get(SchemaUtils.ALT_ALLELE_FIELD_NAME).toString().split(","); String[] parts = s.split("\\|"); @@ -389,7 +389,7 @@ private void processSampleRecordsForLocation(final long location, final Iterable totalAsQualApprox += getQUALapproxFromSampleRecord(sampleRecord); - // hasSnpAllele should be set to true if any sample has at least one snp (gnarly definition here) + // hasSnpAllele should be set to true if any sample has at least one snp (gnarly definition here) boolean thisHasSnp = vc.getAlternateAlleles().stream().anyMatch(allele -> allele != Allele.SPAN_DEL && allele.length() == vc.getReference().length()); // logger.info("\t" + contig + ":" + currentPosition + ": calculated thisHasSnp of " + thisHasSnp + " from " + vc.getAlternateAlleles() + " and ref " + vc.getReference()); hasSnpAllele = hasSnpAllele || thisHasSnp; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractFeaturesEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractFeaturesEngine.java index b722a186d18..1b9f511443e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractFeaturesEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/variantdb/nextgen/ExtractFeaturesEngine.java @@ -100,12 +100,9 @@ public ExtractFeaturesEngine(final String projectID, public void traverse() { - final String featureQueryString = - - ExtractFeaturesBQ.getVQSRFeatureExtractQueryString(altAlleleTable, sampleListTable, minLocation, maxLocation, trainingSitesOnly, SNP_QUAL_THRESHOLD, INDEL_QUAL_THRESHOLD); - + final String featureQueryString = ExtractFeaturesBQ.getVQSRFeatureExtractQueryString(altAlleleTable, sampleListTable, minLocation, maxLocation, trainingSitesOnly, SNP_QUAL_THRESHOLD, INDEL_QUAL_THRESHOLD); logger.info(featureQueryString); - final StorageAPIAvroReader storageAPIAvroReader = BigQueryUtils.executeQueryWithStorageAPI(featureQueryString, SchemaUtils.FEATURE_EXTRACT_FIELDS, projectID, useBatchQueries); + final StorageAPIAvroReader storageAPIAvroReader = BigQueryUtils.executeQueryWithStorageAPI(featureQueryString, SchemaUtils.FEATURE_EXTRACT_FIELDS, projectID, useBatchQueries, null); createVQSRInputFromTableResult(storageAPIAvroReader); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java index 845211290b8..7e71b5b82c3 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java @@ -54,10 +54,11 @@ public static BigQuery getBigQueryEndPoint(String executionProjectId) { * Will block until results are returned. * For more information on querying BigQuery tables, see: https://cloud.google.com/bigquery/sql-reference/ * @param queryString The {@link BigQuery} query string to execute. Must use standard SQL syntax. Must contain the project ID, data set, and table name in the `FROM` clause for the table from which to retrieve data. + * @param labels The {@link BigQuery} label to add the job run. Must use Map. Can be null to indicate no labels. * @return A {@link TableResult} object containing the results of the query executed. */ - public static TableResult executeQuery(final String queryString) { - return executeQuery(getBigQueryEndPoint(), queryString, false); + public static TableResult executeQuery(final String queryString, final Map labels) { + return executeQuery(getBigQueryEndPoint(), queryString, false, labels ); } /** @@ -66,10 +67,11 @@ public static TableResult executeQuery(final String queryString) { * For more information on querying BigQuery tables, see: https://cloud.google.com/bigquery/sql-reference/ * @param queryString The {@link BigQuery} query string to execute. Must use standard SQL syntax. Must contain the project ID, data set, and table name in the `FROM` clause for the table from which to retrieve data. * @param runQueryInBatchMode If true, run the query in batch mode, which is lower priority but has no limit on the number of concurrent queries + * @param labels The {@link BigQuery} label to add the job run. Must use Map. Can be null to indicate no labels. * @return A {@link TableResult} object containing the results of the query executed. */ - public static TableResult executeQuery(final String queryString, final boolean runQueryInBatchMode) { - return executeQuery(getBigQueryEndPoint(), queryString, runQueryInBatchMode); + public static TableResult executeQuery(final String queryString, final boolean runQueryInBatchMode, final Map labels) { + return executeQuery(getBigQueryEndPoint(), queryString, runQueryInBatchMode, labels); } /** @@ -79,15 +81,17 @@ public static TableResult executeQuery(final String queryString, final boolean r * @param bigQuery The {@link BigQuery} instance against which to execute the given {@code queryString}. * @param queryString The {@link BigQuery} query string to execute. Must use standard SQL syntax. Must contain the project ID, data set, and table name in the `FROM` clause for the table from which to retrieve data. * @param runQueryInBatchMode If true, run the query in batch mode, which is lower priority but has no limit on the number of concurrent queries + * @param labels The {@link BigQuery} label to add the job run. Must use Map. Can be null to indicate no labels. * @return A {@link TableResult} object containing the results of the query executed. */ - public static TableResult executeQuery(final BigQuery bigQuery, final String queryString, final boolean runQueryInBatchMode) { + public static TableResult executeQuery(final BigQuery bigQuery, final String queryString, final boolean runQueryInBatchMode, final Map labels) { // Create a query configuration we can run based on our query string: final QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder( queryString ) .setUseLegacySql(false) .setPriority(runQueryInBatchMode ? QueryJobConfiguration.Priority.BATCH : QueryJobConfiguration.Priority.INTERACTIVE) + .setLabels(labels) .build(); logger.info("Executing Query: \n\n" + queryString); @@ -104,18 +108,21 @@ public static TableResult executeQuery(final BigQuery bigQuery, final String que * @param projectID The BigQuery {@code project ID} containing the {@code dataSet} and table from which to query data. * @param dataSet The BigQuery {@code dataSet} containing the table from which to query data. * @param queryString The {@link BigQuery} query string to execute. Must use standard SQL syntax. Must contain the project ID, data set, and table ID in the `FROM` clause for the table from which to retrieve data. + * @param labels The {@link BigQuery} label to add the job run. Must use Map. Can be null to indicate no labels. * @return A {@link TableResult} object containing the results of the query executed. */ public static TableResult executeQuery(final BigQuery bigQuery, final String projectID, final String dataSet, - final String queryString) { + final String queryString, + final Map labels) { // Create a query configuration we can run based on our query string: final QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder( queryString ) .setUseLegacySql(false) .setDefaultDataset(DatasetId.of(projectID, dataSet)) + .setLabels(labels) .build(); return submitQueryAndWaitForResults( bigQuery, queryConfig ); @@ -374,12 +381,12 @@ private static long getQueryCostBytesProcessedEstimate(String queryString) { return bytesProcessed; } - public static StorageAPIAvroReader executeQueryWithStorageAPI(final String queryString, final List fieldsToRetrieve, final String projectID) { + public static StorageAPIAvroReader executeQueryWithStorageAPI(final String queryString, final List fieldsToRetrieve, final String projectID, Map labels) { - return executeQueryWithStorageAPI(queryString, fieldsToRetrieve, projectID, false); + return executeQueryWithStorageAPI(queryString, fieldsToRetrieve, projectID, false, labels); } - public static StorageAPIAvroReader executeQueryWithStorageAPI(final String queryString, final List fieldsToRetrieve, final String projectID, final boolean runQueryInBatchMode) { + public static StorageAPIAvroReader executeQueryWithStorageAPI(final String queryString, final List fieldsToRetrieve, final String projectID, final boolean runQueryInBatchMode, Map labels) { final String tempTableDataset = "temp_tables"; final String tempTableName = UUID.randomUUID().toString().replace('-', '_'); final String tempTableFullyQualified = String.format("%s.%s.%s", projectID, tempTableDataset, tempTableName); @@ -393,7 +400,7 @@ public static StorageAPIAvroReader executeQueryWithStorageAPI(final String query ") AS\n" + queryString; - executeQuery(queryStringIntoTempTable, runQueryInBatchMode); + executeQuery(queryStringIntoTempTable, runQueryInBatchMode, labels); final Table tableInfo = getBigQueryEndPoint().getTable( TableId.of(projectID, tempTableDataset, tempTableName) ); logger.info(String.format("Query temp table created with %s rows and %s bytes in size", tableInfo.getNumRows(), tableInfo.getNumBytes())); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java index 7a13c6fe1ea..c5cca305f8c 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java @@ -8,7 +8,6 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; - import java.util.*; /** @@ -22,12 +21,14 @@ public class BigQueryUtilsUnitTest extends GATKBaseTest { private static final String BIGQUERY_FULLY_QUALIFIED_TABLE = String.format("%s.%s.%s", BIGQUERY_TEST_PROJECT, BIGQUERY_TEST_DATASET, BIGQUERY_TEST_TABLE); + private static final UUID runUuid = UUID.randomUUID(); @Test(groups = {"cloud"}) public void testExecuteQueryAllRecords() { final String query = String.format("SELECT * FROM `%s`", BIGQUERY_FULLY_QUALIFIED_TABLE); - - final TableResult result = BigQueryUtils.executeQuery(query); + Map labels = new HashMap(); + labels.put("gatktestquery", "getallrecords" + runUuid); + final TableResult result = BigQueryUtils.executeQuery(query, labels); checkQueryResults(result, getAllExpectedNamesAndAges(), query); } @@ -38,8 +39,10 @@ public void testExecuteQueryWithWhereClause() { expectedNamesAndAges.put("Fred", "35"); final String query = String.format("SELECT * FROM `%s` WHERE name = 'Fred'", BIGQUERY_FULLY_QUALIFIED_TABLE); - - final TableResult result = BigQueryUtils.executeQuery(query); + Map labels = new HashMap(); + labels.put("gatktestquery", "testwhereclause" + runUuid); + System.out.print("testwhereclause" + runUuid); + final TableResult result = BigQueryUtils.executeQuery(query, labels); checkQueryResults(result, expectedNamesAndAges, query); } @@ -47,8 +50,9 @@ public void testExecuteQueryWithWhereClause() { @Test(groups = {"cloud"}) public void testExecuteQueryInBatchMode() { final String query = String.format("SELECT * FROM `%s`", BIGQUERY_FULLY_QUALIFIED_TABLE); - - final TableResult result = BigQueryUtils.executeQuery(query, true); + Map labels = new HashMap(); + labels.put("gatktestquery", "testbatchmode" + runUuid); + final TableResult result = BigQueryUtils.executeQuery(query, true, labels); checkQueryResults(result, getAllExpectedNamesAndAges(), query); } @@ -56,8 +60,9 @@ public void testExecuteQueryInBatchMode() { @Test(groups = {"cloud"}) public void testSpecifiedExecuteQuery() { final String query = String.format("SELECT * FROM `%s`", BIGQUERY_TEST_TABLE); - - final TableResult result = BigQueryUtils.executeQuery(BigQueryUtils.getBigQueryEndPoint(), BIGQUERY_TEST_PROJECT, BIGQUERY_TEST_DATASET, query); + Map labels = new HashMap(); + labels.put("gatktestquery", "testspecifiedexecutequery" + runUuid); + final TableResult result = BigQueryUtils.executeQuery(BigQueryUtils.getBigQueryEndPoint(), BIGQUERY_TEST_PROJECT, BIGQUERY_TEST_DATASET, query, labels); checkQueryResults(result, getAllExpectedNamesAndAges(), query); } @@ -70,8 +75,9 @@ public void testQueryWithStorageAPI() { final List fieldsToRetrieve = new LinkedList<>(); fieldsToRetrieve.add("name"); - - final StorageAPIAvroReader result = BigQueryUtils.executeQueryWithStorageAPI(query, fieldsToRetrieve, BIGQUERY_TEST_PROJECT); + Map labels = new HashMap(); + labels.put("gatktestquery", "teststorageapi" + runUuid); + final StorageAPIAvroReader result = BigQueryUtils.executeQueryWithStorageAPI(query, fieldsToRetrieve, BIGQUERY_TEST_PROJECT, labels); int rowCount = 0; final Set retrievedNames = new HashSet<>(); @@ -103,8 +109,9 @@ public void testQueryWithEmptyDatasetStorageAPI() { final List fieldsToRetrieve = new LinkedList<>(); fieldsToRetrieve.add("name"); - - final StorageAPIAvroReader result = BigQueryUtils.executeQueryWithStorageAPI(query, fieldsToRetrieve, BIGQUERY_TEST_PROJECT); + Map labels = new HashMap(); + labels.put("gatktestquery", "testapiwithemptydata" + runUuid); + final StorageAPIAvroReader result = BigQueryUtils.executeQueryWithStorageAPI(query, fieldsToRetrieve, BIGQUERY_TEST_PROJECT, labels); int rowCount = 0; final Set retrievedNames = new HashSet<>(); @@ -116,6 +123,15 @@ public void testQueryWithEmptyDatasetStorageAPI() { Assert.assertTrue(retrievedNames.isEmpty(), "No Result expected"); } + @Test(groups = {"cloud"}) + public void testQueryWithNullLabel() { + final String query = String.format("SELECT * FROM `%s`", BIGQUERY_FULLY_QUALIFIED_TABLE); + Map labels = null; + final TableResult result = BigQueryUtils.executeQuery(query, labels); + + checkQueryResults(result, getAllExpectedNamesAndAges(), query); + } + private Map getAllExpectedNamesAndAges() { final Map expectedNamesAndAges = new HashMap<>(); expectedNamesAndAges.put("Fred", "35");