Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow disabling caching missing tables in CachingMetastore #17177

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Allow disabling caching missing tables in CachingMetastore
Table absence is harder to less interesting to cache and sometimes
undesirable at all. This is similar to `metadata.cache-missing` config
we have for JDBC connectors.
  • Loading branch information
findepi committed Apr 24, 2023
commit 5ebca9015c61bd92707374cc4a621009fd23dc85
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import java.util.function.Supplier;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Throwables.throwIfInstanceOf;
import static com.google.common.base.Verify.verifyNotNull;
import static com.google.common.cache.CacheLoader.asyncReloading;
Expand Down Expand Up @@ -113,6 +114,7 @@ public enum StatsRecording
}

protected final HiveMetastore delegate;
private final boolean cacheMissing;
private final LoadingCache<String, Optional<Database>> databaseCache;
private final LoadingCache<String, List<String>> databaseNamesCache;
private final LoadingCache<HiveTableName, Optional<Table>> tableCache;
Expand Down Expand Up @@ -146,6 +148,7 @@ public static CachingHiveMetastoreBuilder builder(CachingHiveMetastoreBuilder ot
other.refreshMills,
other.maximumSize,
other.statsRecording,
other.cacheMissing,
other.partitionCacheEnabled);
}

Expand All @@ -157,6 +160,7 @@ public static CachingHiveMetastore memoizeMetastore(HiveMetastore delegate, long
.statsCacheEnabled(true)
.maximumSize(maximumSize)
.statsRecording(StatsRecording.DISABLED)
.cacheMissing(true)
.build();
}

Expand All @@ -172,6 +176,7 @@ public static class CachingHiveMetastoreBuilder
private OptionalLong refreshMills = OptionalLong.empty();
private Long maximumSize;
private StatsRecording statsRecording = StatsRecording.ENABLED;
private Boolean cacheMissing;
private boolean partitionCacheEnabled = true;

public CachingHiveMetastoreBuilder() {}
Expand All @@ -186,6 +191,7 @@ private CachingHiveMetastoreBuilder(
OptionalLong refreshMills,
Long maximumSize,
StatsRecording statsRecording,
Boolean cacheMissing,
boolean partitionCacheEnabled)
{
this.delegate = delegate;
Expand All @@ -197,6 +203,7 @@ private CachingHiveMetastoreBuilder(
this.refreshMills = refreshMills;
this.maximumSize = maximumSize;
this.statsRecording = statsRecording;
this.cacheMissing = cacheMissing;
this.partitionCacheEnabled = partitionCacheEnabled;
}

Expand Down Expand Up @@ -272,6 +279,13 @@ public CachingHiveMetastoreBuilder statsRecording(StatsRecording statsRecording)
return this;
}

@CanIgnoreReturnValue
public CachingHiveMetastoreBuilder cacheMissing(boolean cacheMissing)
{
this.cacheMissing = cacheMissing;
return this;
}

@CanIgnoreReturnValue
public CachingHiveMetastoreBuilder partitionCacheEnabled(boolean partitionCacheEnabled)
{
Expand All @@ -285,6 +299,7 @@ public CachingHiveMetastore build()
requireNonNull(statsCacheEnabled, "statsCacheEnabled is null");
requireNonNull(delegate, "delegate not set");
requireNonNull(maximumSize, "maximumSize not set");
requireNonNull(cacheMissing, "cacheMissing not set");
return new CachingHiveMetastore(
delegate,
metadataCacheEnabled,
Expand All @@ -295,6 +310,7 @@ public CachingHiveMetastore build()
executor,
maximumSize,
statsRecording,
cacheMissing,
partitionCacheEnabled);
}
}
Expand All @@ -309,10 +325,12 @@ protected CachingHiveMetastore(
Optional<Executor> executor,
long maximumSize,
StatsRecording statsRecording,
boolean cacheMissing,
boolean partitionCacheEnabled)
{
checkArgument(metadataCacheEnabled || statsCacheEnabled, "Cache not enabled");
this.delegate = requireNonNull(delegate, "delegate is null");
this.cacheMissing = cacheMissing;
requireNonNull(executor, "executor is null");

CacheFactory cacheFactory;
Expand Down Expand Up @@ -401,6 +419,28 @@ private AtomicReference<PartitionStatistics> refreshTableStatistics(HiveTableNam
private static <K, V> V get(LoadingCache<K, V> cache, K key)
{
try {
V value = cache.getUnchecked(key);
checkState(!(value instanceof Optional), "This must not be used for caches with Optional values, as it doesn't implement cacheMissing logic. Use getOptional()");
return value;
}
catch (UncheckedExecutionException e) {
throwIfInstanceOf(e.getCause(), TrinoException.class);
throw e;
}
}

private <K, V> Optional<V> getOptional(LoadingCache<K, Optional<V>> cache, K key)
{
try {
Optional<V> value = cache.getIfPresent(key);
@SuppressWarnings("OptionalAssignedToNull")
boolean valueIsPresent = value != null;
if (valueIsPresent) {
if (value.isPresent() || cacheMissing) {
return value;
}
cache.invalidate(key);
}
return cache.getUnchecked(key);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary call this method if the value is not present?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, cache.getIfPresent didn't load anything yet

}
catch (UncheckedExecutionException e) {
Expand Down Expand Up @@ -524,7 +564,7 @@ private static <K, V> Map<K, V> getAll(
@Override
public Optional<Database> getDatabase(String databaseName)
{
return get(databaseCache, databaseName);
return getOptional(databaseCache, databaseName);
}

private Optional<Database> loadDatabase(String databaseName)
Expand Down Expand Up @@ -552,7 +592,7 @@ private Table getExistingTable(String databaseName, String tableName)
@Override
public Optional<Table> getTable(String databaseName, String tableName)
{
return get(tableCache, hiveTableName(databaseName, tableName));
return getOptional(tableCache, hiveTableName(databaseName, tableName));
}

@Override
Expand Down Expand Up @@ -943,7 +983,7 @@ public Optional<List<String>> getPartitionNamesByFilter(
List<String> columnNames,
TupleDomain<String> partitionKeysFilter)
{
return get(partitionFilterCache, partitionFilter(databaseName, tableName, columnNames, partitionKeysFilter));
return getOptional(partitionFilterCache, partitionFilter(databaseName, tableName, columnNames, partitionKeysFilter));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't it be also used in bulk loaded cache?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps, but I didn't find the bulk loaded cache which caches absence, can you help me with that?

}

private Optional<List<String>> loadPartitionNamesByFilter(PartitionFilter partitionFilter)
Expand Down Expand Up @@ -1168,7 +1208,7 @@ public Set<HivePrivilegeInfo> listTablePrivileges(String databaseName, String ta
@Override
public Optional<String> getConfigValue(String name)
{
return get(configValuesCache, name);
return getOptional(configValuesCache, name);
}

private Optional<String> loadConfigValue(String name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class CachingHiveMetastoreConfig
private Optional<Duration> metastoreRefreshInterval = Optional.empty();
private long metastoreCacheMaximumSize = 10000;
private int maxMetastoreRefreshThreads = 10;
private boolean cacheMissing = true;
private boolean partitionCacheEnabled = true;

@NotNull
Expand Down Expand Up @@ -101,6 +102,18 @@ public CachingHiveMetastoreConfig setMaxMetastoreRefreshThreads(int maxMetastore
return this;
}

public boolean isCacheMissing()
{
return cacheMissing;
}

@Config("hive.metastore-cache.cache-missing")
public CachingHiveMetastoreConfig setCacheMissing(boolean cacheMissing)
{
this.cacheMissing = cacheMissing;
return this;
}

public boolean isPartitionCacheEnabled()
{
return partitionCacheEnabled;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ public SharedHiveMetastoreCache(
.statsCacheTtl(statsCacheTtl)
.refreshInterval(config.getMetastoreRefreshInterval())
.maximumSize(config.getMetastoreCacheMaximumSize())
.cacheMissing(config.isCacheMissing())
.partitionCacheEnabled(config.isPartitionCacheEnabled());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,7 @@ protected final void setup(HostAndPort metastoreAddress, String databaseName)
.cacheTtl(new Duration(1, MINUTES))
.refreshInterval(new Duration(15, SECONDS))
.maximumSize(10000)
.cacheMissing(new CachingHiveMetastoreConfig().isCacheMissing())
.partitionCacheEnabled(new CachingHiveMetastoreConfig().isPartitionCacheEnabled())
.build();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ public class TestCachingHiveMetastore

private MockThriftMetastoreClient mockClient;
private ListeningExecutorService executor;
private CachingHiveMetastoreBuilder metastoreBuilder;
private CachingHiveMetastore metastore;
private CachingHiveMetastore statsOnlyCacheMetastore;
private ThriftMetastoreStats stats;
Expand All @@ -135,18 +136,19 @@ public void setUp()
ThriftMetastore thriftHiveMetastore = createThriftHiveMetastore();
executor = listeningDecorator(newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")));

CachingHiveMetastoreBuilder builder = CachingHiveMetastore.builder()
metastoreBuilder = CachingHiveMetastore.builder()
.delegate(new BridgingHiveMetastore(thriftHiveMetastore))
.executor(executor)
.metadataCacheEnabled(true)
.statsCacheEnabled(true)
.cacheTtl(new Duration(5, TimeUnit.MINUTES))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add dedicated test?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

.refreshInterval(new Duration(1, TimeUnit.MINUTES))
.maximumSize(1000)
.cacheMissing(new CachingHiveMetastoreConfig().isCacheMissing())
.partitionCacheEnabled(true);

metastore = builder.build();
statsOnlyCacheMetastore = CachingHiveMetastore.builder(builder)
metastore = metastoreBuilder.build();
statsOnlyCacheMetastore = CachingHiveMetastore.builder(metastoreBuilder)
.metadataCacheEnabled(false)
.statsCacheEnabled(true) // only cache stats
.build();
Expand Down Expand Up @@ -810,6 +812,7 @@ private CachingHiveMetastore createMetastore(MockThriftMetastoreClient mockClien
.cacheTtl(new Duration(5, TimeUnit.MINUTES))
.refreshInterval(new Duration(1, TimeUnit.MINUTES))
.maximumSize(1000)
.cacheMissing(new CachingHiveMetastoreConfig().isCacheMissing())
.partitionCacheEnabled(true)
.build();
}
Expand Down Expand Up @@ -859,6 +862,45 @@ public void testNoCacheExceptions()
assertEquals(mockClient.getAccessCount(), 2);
}

@Test
public void testNoCacheMissing()
{
CachingHiveMetastore metastore = CachingHiveMetastore.builder(metastoreBuilder)
.cacheMissing(false)
.build();

mockClient.setReturnTable(false);
assertEquals(mockClient.getAccessCount(), 0);

// First access
assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isEmpty();
assertEquals(mockClient.getAccessCount(), 1);

// Second access, second load
assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isEmpty();
assertEquals(mockClient.getAccessCount(), 2);

// Table get be accessed once it exists
mockClient.setReturnTable(true);
assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isPresent();
assertEquals(mockClient.getAccessCount(), 3);

// Table existence is cached
mockClient.setReturnTable(true);
assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isPresent();
assertEquals(mockClient.getAccessCount(), 3);

// Table is returned even if no longer exists
mockClient.setReturnTable(false);
assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isPresent();
assertEquals(mockClient.getAccessCount(), 3);

// After cache invalidation, table absence is apparent
metastore.invalidateTable(TEST_DATABASE, TEST_TABLE);
assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isEmpty();
assertEquals(mockClient.getAccessCount(), 4);
}

@Test
public void testCachingHiveMetastoreCreationWithTtlOnly()
{
Expand Down Expand Up @@ -973,6 +1015,7 @@ public Map<String, Optional<Partition>> getPartitionsByNames(Table table, List<S
.cacheTtl(new Duration(5, TimeUnit.MINUTES))
.refreshInterval(new Duration(1, TimeUnit.MINUTES))
.maximumSize(1000)
.cacheMissing(new CachingHiveMetastoreConfig().isCacheMissing())
.partitionCacheEnabled(true)
.build();

Expand Down Expand Up @@ -1113,6 +1156,7 @@ private PartitionCachingAssertions()
.cacheTtl(new Duration(5, TimeUnit.MINUTES))
.refreshInterval(new Duration(1, TimeUnit.MINUTES))
.maximumSize(1000)
.cacheMissing(true)
.partitionCacheEnabled(false)
.build();
}
Expand Down Expand Up @@ -1165,6 +1209,7 @@ private CachingHiveMetastore createMetastoreWithDirectExecutor(CachingHiveMetast
.cacheTtl(config.getMetastoreCacheTtl())
.refreshInterval(config.getMetastoreRefreshInterval())
.maximumSize(config.getMetastoreCacheMaximumSize())
.cacheMissing(config.isCacheMissing())
.partitionCacheEnabled(config.isPartitionCacheEnabled())
.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public void testDefaults()
.setMetastoreRefreshInterval(null)
.setMetastoreCacheMaximumSize(10000)
.setMaxMetastoreRefreshThreads(10)
.setCacheMissing(true)
.setPartitionCacheEnabled(true));
}

Expand All @@ -48,6 +49,7 @@ public void testExplicitPropertyMappings()
.put("hive.metastore-cache-maximum-size", "5000")
.put("hive.metastore-refresh-max-threads", "2500")
.put("hive.metastore-cache.cache-partitions", "false")
.put("hive.metastore-cache.cache-missing", "false")
.buildOrThrow();

CachingHiveMetastoreConfig expected = new CachingHiveMetastoreConfig()
Expand All @@ -56,6 +58,7 @@ public void testExplicitPropertyMappings()
.setMetastoreRefreshInterval(new Duration(30, TimeUnit.MINUTES))
.setMetastoreCacheMaximumSize(5000)
.setMaxMetastoreRefreshThreads(2500)
.setCacheMissing(false)
.setPartitionCacheEnabled(false);

assertFullMapping(properties, expected);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public class MockThriftMetastoreClient
private final Map<SchemaTableName, Map<String, Map<String, ColumnStatisticsObj>>> databaseTablePartitionColumnStatistics = new HashMap<>();

private boolean throwException;
private boolean returnTable = true;

public MockThriftMetastoreClient()
{
Expand Down Expand Up @@ -145,6 +146,11 @@ public void setThrowException(boolean throwException)
this.throwException = throwException;
}

public void setReturnTable(boolean returnTable)
{
this.returnTable = returnTable;
}

public int getAccessCount()
{
return accessCount.get();
Expand Down Expand Up @@ -207,7 +213,7 @@ public Table getTable(String dbName, String tableName)
if (throwException) {
throw new RuntimeException();
}
if (!dbName.equals(TEST_DATABASE) || !tableName.equals(TEST_TABLE)) {
if (!returnTable || !dbName.equals(TEST_DATABASE) || !tableName.equals(TEST_TABLE)) {
throw new NoSuchObjectException();
}
return new Table(
Expand Down