diff --git a/docs/src/main/sphinx/connector/bigquery.rst b/docs/src/main/sphinx/connector/bigquery.rst index 409a730c3766..25d8cf5a044e 100644 --- a/docs/src/main/sphinx/connector/bigquery.rst +++ b/docs/src/main/sphinx/connector/bigquery.rst @@ -133,6 +133,8 @@ Property Description ``BIGNUMERIC`` and ``TIMESTAMP`` types are unsupported. ``false`` ``bigquery.views-cache-ttl`` Duration for which the materialization of a view will be ``15m`` cached and reused. Set to ``0ms`` to disable the cache. +``bigquery.metadata.cache-ttl`` Duration for which metadata retrieved from BigQuery ``0ms`` + is cached and reused. Set to ``0ms`` to disable the cache. ``bigquery.max-read-rows-retries`` The number of retries in case of retryable server issues ``3`` ``bigquery.credentials-key`` The base64 encoded credentials key None. See the `requirements <#requirements>`_ section. ``bigquery.credentials-file`` The path to the JSON credentials file None. See the `requirements <#requirements>`_ section. diff --git a/plugin/trino-bigquery/pom.xml b/plugin/trino-bigquery/pom.xml index 66da8c5fdc99..d926ccf7f659 100644 --- a/plugin/trino-bigquery/pom.xml +++ b/plugin/trino-bigquery/pom.xml @@ -381,6 +381,7 @@ **/TestBigQueryConnectorTest.java + **/TestBigQueryMetadataCaching.java **/TestBigQueryTypeMapping.java **/TestBigQueryMetadata.java **/TestBigQueryInstanceCleaner.java @@ -415,6 +416,7 @@ **/TestBigQueryConnectorTest.java + **/TestBigQueryMetadataCaching.java **/TestBigQueryTypeMapping.java **/TestBigQueryMetadata.java **/TestBigQueryInstanceCleaner.java diff --git a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClient.java b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClient.java index 3e57a3300ff3..5deb24871df2 100644 --- a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClient.java +++ b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClient.java @@ -33,9 +33,12 @@ import com.google.cloud.bigquery.TableInfo; import com.google.cloud.bigquery.TableResult; import com.google.cloud.http.BaseHttpServiceException; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableSet; import io.airlift.log.Logger; import io.airlift.units.Duration; +import io.trino.collect.cache.EvictableCacheBuilder; import io.trino.spi.TrinoException; import io.trino.spi.connector.TableNotFoundException; @@ -45,6 +48,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.function.Supplier; import static com.google.cloud.bigquery.JobStatistics.QueryStatistics.StatementType.SELECT; @@ -58,9 +62,11 @@ import static io.trino.plugin.bigquery.BigQueryErrorCode.BIGQUERY_AMBIGUOUS_OBJECT_NAME; import static io.trino.plugin.bigquery.BigQueryErrorCode.BIGQUERY_FAILED_TO_EXECUTE_QUERY; import static io.trino.plugin.bigquery.BigQueryErrorCode.BIGQUERY_INVALID_STATEMENT; +import static io.trino.plugin.bigquery.BigQueryErrorCode.BIGQUERY_LISTING_DATASET_ERROR; import static java.lang.String.format; import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.stream.Collectors.joining; public class BigQueryClient @@ -70,12 +76,17 @@ public class BigQueryClient private final BigQuery bigQuery; private final ViewMaterializationCache materializationCache; private final boolean caseInsensitiveNameMatching; + private final LoadingCache> remoteDatasetCache; - public BigQueryClient(BigQuery bigQuery, boolean caseInsensitiveNameMatching, ViewMaterializationCache materializationCache) + public BigQueryClient(BigQuery bigQuery, boolean caseInsensitiveNameMatching, ViewMaterializationCache materializationCache, Duration metadataCacheTtl) { this.bigQuery = requireNonNull(bigQuery, "bigQuery is null"); this.materializationCache = requireNonNull(materializationCache, "materializationCache is null"); this.caseInsensitiveNameMatching = caseInsensitiveNameMatching; + this.remoteDatasetCache = EvictableCacheBuilder.newBuilder() + .expireAfterWrite(metadataCacheTtl.toMillis(), MILLISECONDS) + .shareNothingWhenDisabled() + .build(CacheLoader.from(this::listDatasetsFromBigQuery)); } public Optional toRemoteDataset(String projectId, String datasetName) @@ -175,7 +186,18 @@ public String getProjectId() public Iterable listDatasets(String projectId) { - return bigQuery.listDatasets(projectId).iterateAll(); + try { + return remoteDatasetCache.get(projectId); + } + catch (ExecutionException e) { + throw new TrinoException(BIGQUERY_LISTING_DATASET_ERROR, "Failed to retrieve datasets from BigQuery", e); + } + } + + private List listDatasetsFromBigQuery(String projectId) + { + return stream(bigQuery.listDatasets(projectId).iterateAll()) + .collect(toImmutableList()); } public Iterable listTables(DatasetId remoteDatasetId, TableDefinition.Type... types) diff --git a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClientFactory.java b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClientFactory.java index 2d856f8d1575..edbcb266f593 100644 --- a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClientFactory.java +++ b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClientFactory.java @@ -19,6 +19,7 @@ import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryOptions; import com.google.common.cache.CacheBuilder; +import io.airlift.units.Duration; import io.trino.collect.cache.NonEvictableCache; import io.trino.spi.connector.ConnectorSession; @@ -40,6 +41,7 @@ public class BigQueryClientFactory private final ViewMaterializationCache materializationCache; private final HeaderProvider headerProvider; private final NonEvictableCache clientCache; + private final Duration metadataCacheTtl; @Inject public BigQueryClientFactory( @@ -56,6 +58,7 @@ public BigQueryClientFactory( this.caseInsensitiveNameMatching = bigQueryConfig.isCaseInsensitiveNameMatching(); this.materializationCache = requireNonNull(materializationCache, "materializationCache is null"); this.headerProvider = requireNonNull(headerProvider, "headerProvider is null"); + this.metadataCacheTtl = bigQueryConfig.getMetadataCacheTtl(); CacheBuilder cacheBuilder = CacheBuilder.newBuilder() .expireAfterWrite(bigQueryConfig.getServiceCacheTtl().toMillis(), MILLISECONDS); @@ -72,7 +75,7 @@ public BigQueryClient create(ConnectorSession session) protected BigQueryClient createBigQueryClient(ConnectorSession session) { - return new BigQueryClient(createBigQuery(session), caseInsensitiveNameMatching, materializationCache); + return new BigQueryClient(createBigQuery(session), caseInsensitiveNameMatching, materializationCache, metadataCacheTtl); } protected BigQuery createBigQuery(ConnectorSession session) diff --git a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryConfig.java b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryConfig.java index 81606f7cd4a0..8bc914eb8937 100644 --- a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryConfig.java +++ b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryConfig.java @@ -29,6 +29,7 @@ import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.MINUTES; @DefunctConfig("bigquery.case-insensitive-name-matching.cache-ttl") @@ -51,6 +52,7 @@ public class BigQueryConfig private boolean caseInsensitiveNameMatching; private Duration viewsCacheTtl = new Duration(15, MINUTES); private Duration serviceCacheTtl = new Duration(3, MINUTES); + private Duration metadataCacheTtl = new Duration(0, MILLISECONDS); private boolean queryResultsCacheEnabled; private int rpcInitialChannelCount = 1; @@ -222,6 +224,21 @@ public BigQueryConfig setServiceCacheTtl(Duration serviceCacheTtl) return this; } + @NotNull + @MinDuration("0ms") + public Duration getMetadataCacheTtl() + { + return metadataCacheTtl; + } + + @Config("bigquery.metadata.cache-ttl") + @ConfigDescription("Duration for which BigQuery client metadata is cached after listing") + public BigQueryConfig setMetadataCacheTtl(Duration metadataCacheTtl) + { + this.metadataCacheTtl = metadataCacheTtl; + return this; + } + public boolean isQueryResultsCacheEnabled() { return queryResultsCacheEnabled; diff --git a/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryConfig.java b/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryConfig.java index 285517849e71..08d4525112fd 100644 --- a/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryConfig.java +++ b/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryConfig.java @@ -24,6 +24,7 @@ import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; import static java.util.concurrent.TimeUnit.DAYS; import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.MINUTES; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -44,6 +45,7 @@ public void testDefaults() .setCaseInsensitiveNameMatching(false) .setViewsCacheTtl(new Duration(15, MINUTES)) .setServiceCacheTtl(new Duration(3, MINUTES)) + .setMetadataCacheTtl(new Duration(0, MILLISECONDS)) .setViewsEnabled(false) .setQueryResultsCacheEnabled(false) .setRpcInitialChannelCount(1) @@ -69,6 +71,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey() .put("bigquery.case-insensitive-name-matching", "true") .put("bigquery.views-cache-ttl", "1m") .put("bigquery.service-cache-ttl", "10d") + .put("bigquery.metadata.cache-ttl", "5d") .put("bigquery.query-results-cache.enabled", "true") .put("bigquery.channel-pool.initial-size", "11") .put("bigquery.channel-pool.min-size", "12") @@ -90,6 +93,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey() .setCaseInsensitiveNameMatching(true) .setViewsCacheTtl(new Duration(1, MINUTES)) .setServiceCacheTtl(new Duration(10, DAYS)) + .setMetadataCacheTtl(new Duration(5, DAYS)) .setQueryResultsCacheEnabled(true) .setRpcInitialChannelCount(11) .setRpcMinChannelCount(12) diff --git a/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryMetadataCaching.java b/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryMetadataCaching.java new file mode 100644 index 000000000000..4bf645a0397d --- /dev/null +++ b/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryMetadataCaching.java @@ -0,0 +1,61 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.bigquery; + +import com.google.common.collect.ImmutableMap; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.QueryRunner; +import org.testng.annotations.Test; + +import static io.trino.plugin.bigquery.BigQueryQueryRunner.BigQuerySqlExecutor; +import static io.trino.testing.sql.TestTable.randomTableSuffix; +import static org.testng.Assert.assertEquals; + +public class TestBigQueryMetadataCaching + extends AbstractTestQueryFramework +{ + protected BigQuerySqlExecutor bigQuerySqlExecutor; + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + this.bigQuerySqlExecutor = new BigQuerySqlExecutor(); + return BigQueryQueryRunner.createQueryRunner( + ImmutableMap.of(), + ImmutableMap.of("bigquery.metadata.cache-ttl", "5m")); + } + + @Test + public void testMetadataCaching() + { + String schema = "test_metadata_caching_" + randomTableSuffix(); + try { + getQueryRunner().execute("CREATE SCHEMA " + schema); + assertEquals(getQueryRunner().execute("SHOW SCHEMAS IN bigquery LIKE '" + schema + "'").getOnlyValue(), schema); + + String schemaTableName = schema + ".test_metadata_caching"; + getQueryRunner().execute("CREATE TABLE " + schemaTableName + " AS SELECT * FROM tpch.tiny.region"); + assertEquals(getQueryRunner().execute("SELECT * FROM " + schemaTableName).getRowCount(), 5); + + bigQuerySqlExecutor.execute("DROP SCHEMA " + schema + " CASCADE"); + assertEquals(getQueryRunner().execute("SHOW SCHEMAS IN bigquery LIKE '" + schema + "'").getOnlyValue(), schema); + + assertQueryFails("SELECT * FROM " + schemaTableName, ".*Schema '.+' does not exist.*"); + } + finally { + bigQuerySqlExecutor.execute("DROP SCHEMA IF EXISTS " + schema + " CASCADE"); + } + } +}