From 52a17f11a6552b589e51e94c52b9d585432863fc Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Mon, 5 Feb 2024 14:32:20 -0800 Subject: [PATCH] Replace HiveMetastore get*Statistics with get*ColumnStatistics It is simpler and more efficient to only support column statistics in HiveMetastore since the HiveBasicStatistics are actually extracted from table/partition properties, and are not loaded the underlying metastore implementation. --- .../plugin/hive/HiveMetastoreClosure.java | 39 ++-- .../plugin/hive/PartitionStatistics.java | 10 -- .../plugin/hive/metastore/HiveMetastore.java | 27 ++- .../SemiTransactionalHiveMetastore.java | 139 ++++++++++---- ...storeUtil.java => SparkMetastoreUtil.java} | 123 +++++++++---- .../metastore/cache/CachingHiveMetastore.java | 147 +++++++-------- .../cache/SharedHiveMetastoreCache.java | 4 +- .../metastore/file/FileHiveMetastore.java | 61 ++++--- .../DefaultGlueColumnStatisticsProvider.java | 55 +++--- .../glue/GlueColumnStatisticsProvider.java | 10 +- .../metastore/glue/GlueHiveMetastore.java | 60 ++++--- .../thrift/BridgingHiveMetastore.java | 22 ++- .../metastore/thrift/ThriftHiveMetastore.java | 120 +++++-------- .../metastore/thrift/ThriftMetastore.java | 7 +- .../ThriftMetastoreParameterParserUtils.java | 85 --------- .../metastore/thrift/ThriftMetastoreUtil.java | 16 +- .../tracing/TracingHiveMetastore.java | 33 ++-- .../plugin/hive/TestHive3OnDataLake.java | 22 ++- .../hive/metastore/MetastoreMethod.java | 4 +- ...eUtil.java => TestSparkMetastoreUtil.java} | 51 +++--- .../cache/TestCachingHiveMetastore.java | 170 +++++++++--------- .../TestHiveMetastoreAccessOperations.java | 18 +- 22 files changed, 638 insertions(+), 585 deletions(-) rename plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/{thrift/ThriftSparkMetastoreUtil.java => SparkMetastoreUtil.java} (63%) delete mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreParameterParserUtils.java rename plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/{thrift/TestThriftSparkMetastoreUtil.java => TestSparkMetastoreUtil.java} (80%) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java index 4dd9d306439d..48d4cefd6eaa 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java @@ -13,13 +13,12 @@ */ package io.trino.plugin.hive; -import com.google.common.collect.ImmutableList; import io.trino.hive.thrift.metastore.DataOperationType; import io.trino.plugin.hive.acid.AcidOperation; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.metastore.AcidTransactionOwner; -import io.trino.plugin.hive.metastore.Column; import io.trino.plugin.hive.metastore.Database; +import io.trino.plugin.hive.metastore.HiveColumnStatistics; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HivePrincipal; import io.trino.plugin.hive.metastore.HivePrivilegeInfo; @@ -99,37 +98,23 @@ public Set getSupportedColumnStatistics(Type type) return delegate.getSupportedColumnStatistics(type); } - public PartitionStatistics getTableStatistics(String databaseName, String tableName, Optional> columns) + public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount) { - Table table = getExistingTable(databaseName, tableName); - if (columns.isPresent()) { - Set requestedColumnNames = columns.get(); - List requestedColumns = table.getDataColumns().stream() - .filter(column -> requestedColumnNames.contains(column.getName())) - .collect(toImmutableList()); - table = Table.builder(table).setDataColumns(requestedColumns).build(); - } - return delegate.getTableStatistics(table); + return delegate.getTableColumnStatistics(databaseName, tableName, columnNames, rowCount); } - public Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) + public Map> getPartitionColumnStatistics( + String databaseName, + String tableName, + Map partitionNamesWithRowCount, + Set columnNames) { - return getPartitionStatistics(databaseName, tableName, partitionNames, Optional.empty()); + return delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNamesWithRowCount, columnNames); } - public Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames, Optional> columns) + public boolean useSparkTableStatistics() { - Table table = getExistingTable(databaseName, tableName); - List partitions = getExistingPartitionsByNames(table, ImmutableList.copyOf(partitionNames)); - if (columns.isPresent()) { - Set requestedColumnNames = columns.get(); - List requestedColumns = table.getDataColumns().stream() - .filter(column -> requestedColumnNames.contains(column.getName())) - .collect(toImmutableList()); - table = Table.builder(table).setDataColumns(requestedColumns).build(); - partitions = partitions.stream().map(partition -> Partition.builder(partition).setColumns(requestedColumns).build()).collect(toImmutableList()); - } - return delegate.getPartitionStatistics(table, partitions); + return delegate.useSparkTableStatistics(); } public void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, StatisticsUpdateMode mode, PartitionStatistics statisticsUpdate) @@ -267,7 +252,7 @@ public Optional> getPartitionNamesByFilter( return delegate.getPartitionNamesByFilter(databaseName, tableName, columnNames, partitionKeysFilter); } - private List getExistingPartitionsByNames(Table table, List partitionNames) + public List getExistingPartitionsByNames(Table table, List partitionNames) { Map partitions = getPartitionsByNames(table, partitionNames).entrySet().stream() .map(entry -> immutableEntry(entry.getKey(), entry.getValue().orElseThrow(() -> diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java index 151289496aaa..f77000f0ea10 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java @@ -24,7 +24,6 @@ import java.util.Objects; import static com.google.common.base.MoreObjects.toStringHelper; -import static com.google.common.collect.ImmutableMap.toImmutableMap; import static java.util.Objects.requireNonNull; @Immutable @@ -100,15 +99,6 @@ public PartitionStatistics withBasicStatistics(HiveBasicStatistics basicStatisti return new PartitionStatistics(basicStatistics, columnStatistics); } - public PartitionStatistics withEmptyColumnStatisticsRemoved() - { - return new PartitionStatistics( - basicStatistics, - columnStatistics.entrySet().stream() - .filter(entry -> !entry.getValue().equals(HiveColumnStatistics.empty())) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); - } - public static class Builder { private HiveBasicStatistics basicStatistics = HiveBasicStatistics.createEmptyStatistics(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java index c61e3a1bd2db..b5a8a72232d3 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java @@ -48,9 +48,32 @@ public interface HiveMetastore Set getSupportedColumnStatistics(Type type); - PartitionStatistics getTableStatistics(Table table); + /** + * @param columnNames Must not be empty. + * @param rowCount row count is required to calculate the number of distinct values, because Hive treats a NULL as a distinct value resulting in a count that is off by one + */ + Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount); + + /** + * @param columnNames Must not be empty. + * @param partitionNamesWithRowCount row count for each partition is required to calculate the number of distinct values, because + * Hive treats a NULL as a distinct value resulting in a count that is off by one + */ + Map> getPartitionColumnStatistics( + String databaseName, + String tableName, + Map partitionNamesWithRowCount, + Set columnNames); - Map getPartitionStatistics(Table table, List partitions); + /** + * If true, callers should inspect table and partition parameters for spark stats. + * This method realy only exists for the ThriftHiveMetastore implementation. Spark mixes table and column statistics into the table parameters, and this breaks + * the abstractions of the metastore interface. + */ + default boolean useSparkTableStatistics() + { + return false; + } void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, StatisticsUpdateMode mode, PartitionStatistics statisticsUpdate); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java index 4d52c3084fa8..3c8a0b8a2e95 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java @@ -33,6 +33,7 @@ import io.trino.filesystem.TrinoFileSystem; import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.hive.thrift.metastore.DataOperationType; +import io.trino.plugin.hive.HiveBasicStatistics; import io.trino.plugin.hive.HiveColumnStatisticType; import io.trino.plugin.hive.HiveMetastoreClosure; import io.trino.plugin.hive.HiveTableHandle; @@ -72,6 +73,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.OptionalLong; import java.util.Queue; @@ -87,6 +89,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkArgument; @@ -102,6 +105,7 @@ import static io.trino.plugin.hive.HiveErrorCode.HIVE_PATH_ALREADY_EXISTS; import static io.trino.plugin.hive.HiveErrorCode.HIVE_TABLE_DROPPED_DURING_QUERY; import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; +import static io.trino.plugin.hive.HivePartitionManager.extractPartitionValues; import static io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY; import static io.trino.plugin.hive.TableType.MANAGED_TABLE; import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; @@ -110,10 +114,13 @@ import static io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet; import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; +import static io.trino.plugin.hive.metastore.SparkMetastoreUtil.getSparkTableStatistics; import static io.trino.plugin.hive.metastore.StatisticsUpdateMode.MERGE_INCREMENTAL; import static io.trino.plugin.hive.metastore.StatisticsUpdateMode.OVERWRITE_ALL; import static io.trino.plugin.hive.metastore.StatisticsUpdateMode.OVERWRITE_SOME_COLUMNS; import static io.trino.plugin.hive.metastore.StatisticsUpdateMode.UNDO_MERGE_INCREMENTAL; +import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getBasicStatisticsWithSparkFallback; +import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; import static io.trino.plugin.hive.util.AcidTables.isTransactionalTable; import static io.trino.plugin.hive.util.HiveUtil.makePartName; import static io.trino.plugin.hive.util.HiveUtil.toPartitionValues; @@ -324,7 +331,26 @@ public synchronized PartitionStatistics getTableStatistics(String databaseName, checkReadable(); Action tableAction = tableActions.get(new SchemaTableName(databaseName, tableName)); if (tableAction == null) { - return delegate.getTableStatistics(databaseName, tableName, columns); + Table table = getExistingTable(databaseName, tableName); + Set columnNames = columns.orElseGet(() -> Stream.concat(table.getDataColumns().stream(), table.getPartitionColumns().stream()) + .map(Column::getName) + .collect(toImmutableSet())); + + if (delegate.useSparkTableStatistics()) { + Optional sparkTableStatistics = getSparkTableStatistics(table.getParameters(), columnNames.stream() + .map(table::getColumn) + .flatMap(Optional::stream) + .collect(toImmutableMap(Column::getName, Column::getType))); + if (sparkTableStatistics.isPresent()) { + return sparkTableStatistics.get(); + } + } + + HiveBasicStatistics basicStatistics = getHiveBasicStatistics(table.getParameters()); + if (columnNames.isEmpty()) { + return new PartitionStatistics(basicStatistics, ImmutableMap.of()); + } + return new PartitionStatistics(basicStatistics, delegate.getTableColumnStatistics(databaseName, tableName, columnNames, basicStatistics.getRowCount())); } switch (tableAction.getType()) { case ADD: @@ -372,12 +398,47 @@ public synchronized Map getPartitionStatistics(Stri } } - Map delegateResult = delegate.getPartitionStatistics(databaseName, tableName, partitionNamesToQuery.build(), Optional.of(columns)); - if (!delegateResult.isEmpty()) { - resultBuilder.putAll(delegateResult); + Set missingPartitions = partitionNamesToQuery.build(); + if (missingPartitions.isEmpty()) { + return resultBuilder.buildOrThrow(); } - else { - partitionNamesToQuery.build().forEach(partitionName -> resultBuilder.put(partitionName, PartitionStatistics.empty())); + + Map existingPartitions = getExistingPartitions(databaseName, tableName, partitionNames); + if (delegate.useSparkTableStatistics()) { + Map unprocessedPartitions = new HashMap<>(); + existingPartitions.forEach((partitionName, partition) -> { + Optional sparkPartitionStatistics = getSparkTableStatistics(partition.getParameters(), columns.stream() + .map(table.get()::getColumn) + .flatMap(Optional::stream) + .collect(toImmutableMap(Column::getName, Column::getType))); + sparkPartitionStatistics.ifPresentOrElse( + statistics -> resultBuilder.put(partitionName, statistics), + () -> unprocessedPartitions.put(partitionName, partition)); + }); + existingPartitions = unprocessedPartitions; + } + + if (!existingPartitions.isEmpty()) { + Map basicStats = existingPartitions.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> { + if (delegate.useSparkTableStatistics()) { + return getBasicStatisticsWithSparkFallback(entry.getValue().getParameters()); + } + return getHiveBasicStatistics(entry.getValue().getParameters()); + })); + + if (columns.isEmpty()) { + basicStats.forEach((partitionName, basicStatistics) -> resultBuilder.put(partitionName, new PartitionStatistics(basicStatistics, ImmutableMap.of()))); + } + else { + Map> columnStats = delegate.getPartitionColumnStatistics( + databaseName, + tableName, + basicStats.entrySet().stream().collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getRowCount())), + columns); + + basicStats.entrySet().forEach(entry -> resultBuilder.put(entry.getKey(), new PartitionStatistics(entry.getValue(), columnStats.getOrDefault(entry.getKey(), ImmutableMap.of())))); + } } return clearRowCountWhenAllPartitionsHaveNoRows(resultBuilder.buildOrThrow()); } @@ -447,7 +508,7 @@ public synchronized HivePageSinkMetadata generatePageSinkMetadata(SchemaTableNam } else { ImmutableMap.Builder, Optional> modifiedPartitionMapBuilder = ImmutableMap.builder(); - for (Map.Entry, Action> entry : partitionActionMap.entrySet()) { + for (Entry, Action> entry : partitionActionMap.entrySet()) { modifiedPartitionMapBuilder.put(entry.getKey(), getPartitionFromPartitionAction(entry.getValue())); } modifiedPartitionMap = modifiedPartitionMapBuilder.buildOrThrow(); @@ -1030,55 +1091,53 @@ public synchronized void finishInsertIntoExistingPartitions( boolean cleanExtraOutputFilesOnCommit) { setShared(); - SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); - Map, Action> partitionActionsOfTable = partitionActions.computeIfAbsent(schemaTableName, k -> new HashMap<>()); + Table table = getExistingTable(databaseName, tableName); + Map, Action> partitionActionsOfTable = partitionActions.computeIfAbsent(table.getSchemaTableName(), k -> new HashMap<>()); for (PartitionUpdateInfo partitionInfo : partitionUpdateInfos) { Action oldPartitionAction = partitionActionsOfTable.get(partitionInfo.partitionValues); if (oldPartitionAction != null) { switch (oldPartitionAction.getType()) { - case DROP, DROP_PRESERVE_DATA -> throw new PartitionNotFoundException(schemaTableName, partitionInfo.partitionValues); + case DROP, DROP_PRESERVE_DATA -> throw new PartitionNotFoundException(table.getSchemaTableName(), partitionInfo.partitionValues); case ADD, ALTER, INSERT_EXISTING, MERGE -> throw new UnsupportedOperationException("Inserting into a partition that were added, altered, or inserted into in the same transaction is not supported"); default -> throw new IllegalStateException("Unknown action type: " + oldPartitionAction.getType()); } } } + // new data will on include current table columns + // partition column stats do not include the partition keys + Set columnNames = table.getDataColumns().stream() + .map(Column::getName) + .collect(toImmutableSet()); for (List partitionInfoBatch : Iterables.partition(partitionUpdateInfos, 100)) { List partitionNames = partitionInfoBatch.stream() .map(PartitionUpdateInfo::partitionValues) .map(partitionValues -> getPartitionName(databaseName, tableName, partitionValues)) .collect(toImmutableList()); - Map> partitionsByNames = delegate.getPartitionsByNames( - schemaTableName.getSchemaName(), - schemaTableName.getTableName(), - partitionNames); - Map partitionStatistics = delegate.getPartitionStatistics( - schemaTableName.getSchemaName(), - schemaTableName.getTableName(), - ImmutableSet.copyOf(partitionNames)); + Map partitionsByNames = getExistingPartitions(databaseName, tableName, partitionNames); + Map basicStats = partitionsByNames.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> getHiveBasicStatistics(entry.getValue().getParameters()))); + Map> columnStats = delegate.getPartitionColumnStatistics( + databaseName, + tableName, + basicStats.entrySet().stream().collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getRowCount())), + columnNames); for (int i = 0; i < partitionInfoBatch.size(); i++) { PartitionUpdateInfo partitionInfo = partitionInfoBatch.get(i); String partitionName = partitionNames.get(i); - Optional partition = partitionsByNames.get(partitionName); - if (partition.isEmpty()) { - throw new PartitionNotFoundException(schemaTableName, partitionInfo.partitionValues); - } - - PartitionStatistics currentStatistics = partitionStatistics.get(partitionName); - if (currentStatistics == null) { - throw new TrinoException(HIVE_METASTORE_ERROR, "currentStatistics is null"); - } + Partition partition = partitionsByNames.get(partitionName); + PartitionStatistics currentStatistics = new PartitionStatistics(basicStats.get(partitionName), columnStats.get(partitionName)); partitionActionsOfTable.put( partitionInfo.partitionValues, new Action<>( ActionType.INSERT_EXISTING, new PartitionAndMore( - partition.get(), + partition, partitionInfo.currentLocation, Optional.of(partitionInfo.fileNames), MERGE_INCREMENTAL.updatePartitionStatistics(currentStatistics, partitionInfo.statisticsUpdate), @@ -1210,6 +1269,13 @@ private Table getExistingTable(String databaseName, String tableName) .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); } + private Map getExistingPartitions(String databaseName, String tableName, Collection partitionNames) + { + return delegate.getPartitionsByNames(databaseName, tableName, ImmutableList.copyOf(partitionNames)).entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> entry.getValue() + .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), extractPartitionValues(entry.getKey()))))); + } + @Override public synchronized void grantTablePrivileges(String databaseName, String tableName, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) { @@ -1532,7 +1598,7 @@ private void commitShared() Committer committer = new Committer(transaction); try { - for (Map.Entry> entry : tableActions.entrySet()) { + for (Entry> entry : tableActions.entrySet()) { SchemaTableName schemaTableName = entry.getKey(); Action action = entry.getValue(); switch (action.getType()) { @@ -1555,9 +1621,9 @@ private void commitShared() throw new IllegalStateException("Unknown action type: " + action.getType()); } } - for (Map.Entry, Action>> tableEntry : partitionActions.entrySet()) { + for (Entry, Action>> tableEntry : partitionActions.entrySet()) { SchemaTableName schemaTableName = tableEntry.getKey(); - for (Map.Entry, Action> partitionEntry : tableEntry.getValue().entrySet()) { + for (Entry, Action> partitionEntry : tableEntry.getValue().entrySet()) { List partitionValues = partitionEntry.getKey(); Action action = partitionEntry.getValue(); switch (action.getType()) { @@ -1971,14 +2037,19 @@ private void cleanExtraOutputFiles(ConnectorIdentity identity, String queryId, T private PartitionStatistics getExistingPartitionStatistics(Partition partition, String partitionName) { try { - PartitionStatistics statistics = delegate.getPartitionStatistics(partition.getDatabaseName(), partition.getTableName(), ImmutableSet.of(partitionName)) + HiveBasicStatistics basicStatistics = getHiveBasicStatistics(partition.getParameters()); + Map columnStatistics = delegate.getPartitionColumnStatistics( + partition.getDatabaseName(), + partition.getTableName(), + ImmutableMap.of(partitionName, basicStatistics.getRowCount()), + partition.getColumns().stream().map(Column::getName).collect(toImmutableSet())) .get(partitionName); - if (statistics == null) { + if (columnStatistics == null) { throw new TrinoException( TRANSACTION_CONFLICT, format("The partition that this transaction modified was deleted in another transaction. %s %s", partition.getTableName(), partition.getValues())); } - return statistics; + return new PartitionStatistics(basicStatistics, columnStatistics); } catch (TrinoException e) { if (e.getErrorCode().equals(HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode())) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftSparkMetastoreUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SparkMetastoreUtil.java similarity index 63% rename from plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftSparkMetastoreUtil.java rename to plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SparkMetastoreUtil.java index 5b75c4837073..70c289781d75 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftSparkMetastoreUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SparkMetastoreUtil.java @@ -11,20 +11,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.plugin.hive.metastore.thrift; +package io.trino.plugin.hive.metastore; import com.google.common.annotations.VisibleForTesting; -import io.trino.hive.thrift.metastore.FieldSchema; -import io.trino.hive.thrift.metastore.Table; +import com.google.common.primitives.Doubles; +import com.google.common.primitives.Longs; import io.trino.plugin.hive.HiveBasicStatistics; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.PartitionStatistics; -import io.trino.plugin.hive.metastore.HiveColumnStatistics; import io.trino.plugin.hive.type.PrimitiveTypeInfo; import io.trino.plugin.hive.type.TypeInfo; +import jakarta.annotation.Nullable; -import java.util.AbstractMap; +import java.math.BigDecimal; +import java.time.DateTimeException; +import java.time.LocalDate; import java.util.Map; +import java.util.Optional; import java.util.OptionalDouble; import java.util.OptionalLong; @@ -36,15 +39,11 @@ import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics; import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics; import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createStringColumnStatistics; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreParameterParserUtils.toDate; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreParameterParserUtils.toDecimal; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreParameterParserUtils.toDouble; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreParameterParserUtils.toLong; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.NUM_ROWS; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getTotalSizeInBytes; import static io.trino.plugin.hive.type.Category.PRIMITIVE; -final class ThriftSparkMetastoreUtil +public final class SparkMetastoreUtil { private static final String SPARK_SQL_STATS_PREFIX = "spark.sql.statistics."; private static final String COLUMN_STATS_PREFIX = SPARK_SQL_STATS_PREFIX + "colStats."; @@ -54,22 +53,24 @@ final class ThriftSparkMetastoreUtil private static final String COLUMN_MIN = "min"; private static final String COLUMN_MAX = "max"; - private ThriftSparkMetastoreUtil() {} + private SparkMetastoreUtil() {} - public static PartitionStatistics getTableStatistics(Table table) + public static Optional getSparkTableStatistics(Map parameters, Map columns) { - Map parameters = table.getParameters(); + if (toLong(parameters.get(NUM_ROWS)).isPresent()) { + return Optional.empty(); + } + HiveBasicStatistics sparkBasicStatistics = getSparkBasicStatistics(parameters); if (sparkBasicStatistics.getRowCount().isEmpty()) { - return PartitionStatistics.empty(); + return Optional.empty(); } - Map columnStatistics = table.getSd().getCols().stream() - .map(fieldSchema -> new AbstractMap.SimpleEntry<>( - fieldSchema.getName(), - fromMetastoreColumnStatistics(fieldSchema, parameters, sparkBasicStatistics.getRowCount().getAsLong()))) + long rowCount = sparkBasicStatistics.getRowCount().getAsLong(); + Map columnStatistics = columns.entrySet().stream() + .map(entry -> Map.entry(entry.getKey(), fromMetastoreColumnStatistics(entry.getKey(), entry.getValue(), parameters, rowCount))) .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); - return new PartitionStatistics(sparkBasicStatistics, columnStatistics); + return Optional.of(new PartitionStatistics(sparkBasicStatistics, columnStatistics)); } public static HiveBasicStatistics getSparkBasicStatistics(Map parameters) @@ -85,19 +86,18 @@ public static HiveBasicStatistics getSparkBasicStatistics(Map pa } @VisibleForTesting - static HiveColumnStatistics fromMetastoreColumnStatistics(FieldSchema fieldSchema, Map columnStatistics, long rowCount) + static HiveColumnStatistics fromMetastoreColumnStatistics(String columnName, HiveType type, Map parameters, long rowCount) { - HiveType type = HiveType.valueOf(fieldSchema.getType()); TypeInfo typeInfo = type.getTypeInfo(); if (typeInfo.getCategory() != PRIMITIVE) { // Spark does not support table statistics for non-primitive types return HiveColumnStatistics.empty(); } - String field = COLUMN_STATS_PREFIX + fieldSchema.getName() + "."; - OptionalLong maxLength = toLong(columnStatistics.get(field + "maxLen")); - OptionalDouble avgLength = toDouble(columnStatistics.get(field + "avgLen")); - OptionalLong nullsCount = toLong(columnStatistics.get(field + "nullCount")); - OptionalLong distinctValuesCount = toLong(columnStatistics.get(field + "distinctCount")); + String field = COLUMN_STATS_PREFIX + columnName + "."; + OptionalLong maxLength = toLong(parameters.get(field + "maxLen")); + OptionalDouble avgLength = toDouble(parameters.get(field + "avgLen")); + OptionalLong nullsCount = toLong(parameters.get(field + "nullCount")); + OptionalLong distinctValuesCount = toLong(parameters.get(field + "distinctCount")); return switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { case BOOLEAN -> createBooleanColumnStatistics( @@ -105,8 +105,8 @@ static HiveColumnStatistics fromMetastoreColumnStatistics(FieldSchema fieldSchem OptionalLong.empty(), nullsCount); case BYTE, SHORT, INT, LONG -> createIntegerColumnStatistics( - toLong(columnStatistics.get(field + COLUMN_MIN)), - toLong(columnStatistics.get(field + COLUMN_MAX)), + toLong(parameters.get(field + COLUMN_MIN)), + toLong(parameters.get(field + COLUMN_MAX)), nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); case TIMESTAMP -> createIntegerColumnStatistics( @@ -115,8 +115,8 @@ static HiveColumnStatistics fromMetastoreColumnStatistics(FieldSchema fieldSchem nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); case FLOAT, DOUBLE -> createDoubleColumnStatistics( - toDouble(columnStatistics.get(field + COLUMN_MIN)), - toDouble(columnStatistics.get(field + COLUMN_MAX)), + toDouble(parameters.get(field + COLUMN_MIN)), + toDouble(parameters.get(field + COLUMN_MAX)), nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); case STRING, VARCHAR, CHAR -> createStringColumnStatistics( @@ -125,8 +125,8 @@ static HiveColumnStatistics fromMetastoreColumnStatistics(FieldSchema fieldSchem nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); case DATE -> createDateColumnStatistics( - toDate(columnStatistics.get(field + COLUMN_MIN)), - toDate(columnStatistics.get(field + COLUMN_MAX)), + toDate(parameters.get(field + COLUMN_MIN)), + toDate(parameters.get(field + COLUMN_MAX)), nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); case BINARY -> createBinaryColumnStatistics( @@ -134,8 +134,8 @@ static HiveColumnStatistics fromMetastoreColumnStatistics(FieldSchema fieldSchem getTotalSizeInBytes(avgLength, OptionalLong.of(rowCount), nullsCount), nullsCount); case DECIMAL -> createDecimalColumnStatistics( - toDecimal(columnStatistics.get(field + COLUMN_MIN)), - toDecimal(columnStatistics.get(field + COLUMN_MAX)), + toDecimal(parameters.get(field + COLUMN_MIN)), + toDecimal(parameters.get(field + COLUMN_MAX)), nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); case TIMESTAMPLOCALTZ, INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME, VOID, UNKNOWN -> HiveColumnStatistics.empty(); @@ -164,4 +164,59 @@ private static long fromMetastoreDistinctValuesCount(long distinctValuesCount, l // the metastore may store an estimate, so the value stored may be higher than the total number of rows return Math.min(distinctValuesCount, nonNullsCount); } + + private static OptionalLong toLong(@Nullable String parameterValue) + { + if (parameterValue == null) { + return OptionalLong.empty(); + } + Long longValue = Longs.tryParse(parameterValue); + if (longValue == null || longValue < 0) { + return OptionalLong.empty(); + } + return OptionalLong.of(longValue); + } + + private static OptionalDouble toDouble(@Nullable String parameterValue) + { + if (parameterValue == null) { + return OptionalDouble.empty(); + } + Double doubleValue = Doubles.tryParse(parameterValue); + if (doubleValue == null || doubleValue < 0) { + return OptionalDouble.empty(); + } + return OptionalDouble.of(doubleValue); + } + + private static Optional toDecimal(@Nullable String parameterValue) + { + if (parameterValue == null) { + return Optional.empty(); + } + try { + BigDecimal decimal = new BigDecimal(parameterValue); + if (decimal.compareTo(BigDecimal.ZERO) < 0) { + return Optional.empty(); + } + return Optional.of(decimal); + } + catch (NumberFormatException exception) { + return Optional.empty(); + } + } + + private static Optional toDate(@Nullable String parameterValue) + { + if (parameterValue == null) { + return Optional.empty(); + } + try { + LocalDate date = LocalDate.parse(parameterValue); + return Optional.of(date); + } + catch (DateTimeException exception) { + return Optional.empty(); + } + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java index 65d82c839f6b..43903cd7ce83 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java @@ -35,7 +35,6 @@ import io.trino.plugin.hive.acid.AcidOperation; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.metastore.AcidTransactionOwner; -import io.trino.plugin.hive.metastore.Column; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveColumnStatistics; import io.trino.plugin.hive.metastore.HiveMetastore; @@ -85,20 +84,18 @@ import static com.google.common.cache.CacheLoader.asyncReloading; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.Sets.difference; import static com.google.common.util.concurrent.Futures.immediateFuture; import static io.trino.cache.CacheUtils.invalidateAllIf; import static io.trino.cache.CacheUtils.uncheckedCacheGet; import static io.trino.plugin.hive.metastore.HivePartitionName.hivePartitionName; import static io.trino.plugin.hive.metastore.HiveTableName.hiveTableName; -import static io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName; import static io.trino.plugin.hive.metastore.PartitionFilter.partitionFilter; +import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; import static io.trino.plugin.hive.util.HiveUtil.makePartName; import static java.util.Collections.unmodifiableSet; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.MILLISECONDS; -import static java.util.function.UnaryOperator.identity; /** * Hive Metastore Cache @@ -123,8 +120,8 @@ public enum StatsRecording private final LoadingCache> relationTypesCache; private final LoadingCache>> allRelationTypesCache; private final LoadingCache> tablesWithParameterCache; - private final Cache> tableStatisticsCache; - private final Cache> partitionStatisticsCache; + private final Cache>> tableColumnStatisticsCache; + private final Cache>> partitionStatisticsCache; private final LoadingCache> viewNamesCache; private final LoadingCache>> allViewNamesCache; private final Cache>> partitionCache; @@ -211,7 +208,7 @@ private CachingHiveMetastore( relationTypesCache = cacheFactory.buildCache(this::loadRelationTypes); allRelationTypesCache = cacheFactory.buildCache(ignore -> loadRelationTypes()); tablesWithParameterCache = cacheFactory.buildCache(this::loadTablesMatchingParameter); - tableStatisticsCache = statsCacheFactory.buildCache(this::refreshTableStatistics); + tableColumnStatisticsCache = statsCacheFactory.buildCache(this::refreshTableColumnStatistics); tableCache = cacheFactory.buildCache(this::loadTable); viewNamesCache = cacheFactory.buildCache(this::loadAllViews); allViewNamesCache = cacheFactory.buildCache(ignore -> loadAllViews()); @@ -240,7 +237,7 @@ public void flushCache() partitionCache.invalidateAll(); partitionFilterCache.invalidateAll(); tablePrivilegesCache.invalidateAll(); - tableStatisticsCache.invalidateAll(); + tableColumnStatisticsCache.invalidateAll(); partitionStatisticsCache.invalidateAll(); rolesCache.invalidateAll(); } @@ -256,20 +253,22 @@ public void flushPartitionCache(String schemaName, String tableName, List partitionNameToCheck.map(value -> value.equals(providedPartitionName)).orElse(false)); } - private AtomicReference refreshTableStatistics(HiveTableName tableName, AtomicReference currentValueHolder) + private AtomicReference> refreshTableColumnStatistics(HiveTableName tableName, AtomicReference> currentValueHolder) { - PartitionStatistics currentValue = currentValueHolder.get(); + Map currentValue = currentValueHolder.get(); if (currentValue == null) { // do not refresh empty value return currentValueHolder; } - PartitionStatistics reloaded = getTable(tableName.getDatabaseName(), tableName.getTableName()) - .map(table -> table.withSelectedDataColumnsOnly(currentValue.getColumnStatistics().keySet())) - .map(delegate::getTableStatistics) + OptionalLong rowCount = getTable(tableName.getDatabaseName(), tableName.getTableName()) + .map(table -> getHiveBasicStatistics(table.getParameters()).getRowCount()) .orElseThrow(); + // only refresh currently loaded columns + Map columnStatistics = delegate.getTableColumnStatistics(tableName.getDatabaseName(), tableName.getTableName(), currentValue.keySet(), rowCount); + // return new value holder to have only fresh data in case of concurrent loads - return new AtomicReference<>(reloaded); + return new AtomicReference<>(columnStatistics); } private static V get(LoadingCache cache, K key) @@ -456,57 +455,57 @@ private Optional loadTable(HiveTableName hiveTableName) return delegate.getTable(hiveTableName.getDatabaseName(), hiveTableName.getTableName()); } - /** - * The method will cache and return columns specified in the {@link Table#getDataColumns()} - * but may return more if other columns are already cached. - */ @Override - public PartitionStatistics getTableStatistics(Table table) - { - Set dataColumns = table.getDataColumns().stream().map(Column::getName).collect(toImmutableSet()); - - return getIncrementally( - tableStatisticsCache, - hiveTableName(table.getDatabaseName(), table.getTableName()), - currentStatistics -> currentStatistics.getColumnStatistics().keySet().containsAll(dataColumns), - () -> delegate.getTableStatistics(table), + public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount) + { + checkArgument(!columnNames.isEmpty(), "columnNames is empty"); + Map columnStatistics = getIncrementally( + tableColumnStatisticsCache, + hiveTableName(databaseName, tableName), + currentStatistics -> currentStatistics.keySet().containsAll(columnNames), + () -> delegate.getTableColumnStatistics(databaseName, tableName, columnNames, rowCount), currentStatistics -> { - SetView missingColumns = difference(dataColumns, currentStatistics.getColumnStatistics().keySet()); - Table tableWithOnlyMissingColumns = table.withSelectedDataColumnsOnly(missingColumns); - return delegate.getTableStatistics(tableWithOnlyMissingColumns); + SetView missingColumns = difference(columnNames, currentStatistics.keySet()); + return delegate.getTableColumnStatistics(databaseName, tableName, missingColumns, rowCount); }, - (currentStats, newStats) -> mergePartitionColumnStatistics(currentStats, newStats, dataColumns)) - // HiveColumnStatistics.empty() are removed to make output consistent with non-cached metastore which simplifies testing - .withEmptyColumnStatisticsRemoved(); + (currentStats, newStats) -> mergeColumnStatistics(currentStats, newStats, columnNames)); + // HiveColumnStatistics.empty() are removed to make output consistent with non-cached metastore which simplifies testing + return removeEmptyColumnStatistics(columnNames, columnStatistics); } - /** - * The method will cache and return columns specified in the {@link Table#getDataColumns()} - * but may return more if other columns are already cached for a given partition. - */ @Override - public Map getPartitionStatistics(Table table, List partitions) + public Map> getPartitionColumnStatistics(String databaseName, String tableName, Map partitionNamesWithRowCount, Set columnNames) { - HiveTableName hiveTableName = hiveTableName(table.getDatabaseName(), table.getTableName()); - Map partitionsByName = partitions.stream() - .collect(toImmutableMap(partition -> hivePartitionName(hiveTableName, makePartitionName(table, partition)), identity())); - - Set dataColumns = table.getDataColumns().stream().map(Column::getName).collect(toImmutableSet()); - - Map statistics = getAll( + checkArgument(!columnNames.isEmpty(), "columnNames is empty"); + HiveTableName hiveTableName = hiveTableName(databaseName, tableName); + List partitionNames = partitionNamesWithRowCount.keySet().stream().map(partitionName -> hivePartitionName(hiveTableName, partitionName)).toList(); + Map> statistics = getAll( partitionStatisticsCache, - partitionsByName.keySet(), - missingPartitions -> loadPartitionsColumnStatistics(table, partitionsByName, missingPartitions), - currentStats -> currentStats.getColumnStatistics().keySet().containsAll(dataColumns), - (currentStats, newStats) -> mergePartitionColumnStatistics(currentStats, newStats, dataColumns)); + partitionNames, + missingPartitions -> loadPartitionsColumnStatistics(databaseName, tableName, columnNames, missingPartitions, partitionNamesWithRowCount), + currentStats -> currentStats.keySet().containsAll(columnNames), + (currentStats, newStats) -> mergeColumnStatistics(currentStats, newStats, columnNames)); + // HiveColumnStatistics.empty() are removed to make output consistent with non-cached metastore which simplifies testing return statistics.entrySet().stream() .collect(toImmutableMap( entry -> entry.getKey().getPartitionName().orElseThrow(), - // HiveColumnStatistics.empty() are removed to make output consistent with non-cached metastore which simplifies testing - entry -> entry.getValue().withEmptyColumnStatisticsRemoved())); + entry -> removeEmptyColumnStatistics(columnNames, entry.getValue()))); + } + + @Override + public boolean useSparkTableStatistics() + { + return delegate.useSparkTableStatistics(); } - private PartitionStatistics mergePartitionColumnStatistics(PartitionStatistics currentStats, PartitionStatistics newStats, Set dataColumns) + private static ImmutableMap removeEmptyColumnStatistics(Set columnNames, Map columnStatistics) + { + return columnStatistics.entrySet().stream() + .filter(entry -> columnNames.contains(entry.getKey()) && !entry.getValue().equals(HiveColumnStatistics.empty())) + .collect(toImmutableMap(Entry::getKey, Entry::getValue)); + } + + private Map mergeColumnStatistics(Map currentStats, Map newStats, Set dataColumns) { requireNonNull(newStats, "newStats is null"); ImmutableMap.Builder columnStatisticsBuilder = ImmutableMap.builder(); @@ -517,27 +516,31 @@ private PartitionStatistics mergePartitionColumnStatistics(PartitionStatistics c column -> new AbstractMap.SimpleEntry<>(column, HiveColumnStatistics.empty()))); } if (currentStats != null) { - columnStatisticsBuilder.putAll(currentStats.getColumnStatistics()); + columnStatisticsBuilder.putAll(currentStats); } - columnStatisticsBuilder.putAll(newStats.getColumnStatistics()); - return new PartitionStatistics( - newStats.getBasicStatistics(), - columnStatisticsBuilder.buildKeepingLast()); + columnStatisticsBuilder.putAll(newStats); + return columnStatisticsBuilder.buildKeepingLast(); } - private Map loadPartitionsColumnStatistics(Table table, Map partitionsByName, Collection partitionNamesToLoad) + private Map> loadPartitionsColumnStatistics( + String databaseName, + String tableName, + Set columnNames, + Collection partitionNamesToLoad, + Map rowCounts) { if (partitionNamesToLoad.isEmpty()) { return ImmutableMap.of(); } - ImmutableMap.Builder result = ImmutableMap.builder(); - List partitionsToLoad = partitionNamesToLoad.stream() - .map(partitionsByName::get) - .collect(toImmutableList()); - Map statisticsByPartitionName = delegate.getPartitionStatistics(table, partitionsToLoad); + Map partitionsToLoadWithRowCount = partitionNamesToLoad.stream() + .collect(toImmutableMap( + partitionName -> partitionName.getPartitionName().orElseThrow(), + partitionName -> rowCounts.get(partitionName.getPartitionName().orElseThrow()))); + Map> columnStatistics = delegate.getPartitionColumnStatistics(databaseName, tableName, partitionsToLoadWithRowCount, columnNames); + + ImmutableMap.Builder> result = ImmutableMap.builder(); for (HivePartitionName partitionName : partitionNamesToLoad) { - String stringNameForPartition = partitionName.getPartitionName().orElseThrow(); - result.put(partitionName, statisticsByPartitionName.get(stringNameForPartition)); + result.put(partitionName, columnStatistics.getOrDefault(partitionName.getPartitionName().orElseThrow(), ImmutableMap.of())); } return result.buildOrThrow(); } @@ -550,7 +553,7 @@ public void updateTableStatistics(String databaseName, String tableName, AcidTra } finally { HiveTableName hiveTableName = hiveTableName(databaseName, tableName); - tableStatisticsCache.invalidate(hiveTableName); + tableColumnStatisticsCache.invalidate(hiveTableName); // basic stats are stored as table properties tableCache.invalidate(hiveTableName); } @@ -832,7 +835,7 @@ public void invalidateTable(String databaseName, String tableName) viewNamesCache.invalidate(databaseName); allViewNamesCache.invalidateAll(); invalidateAllIf(tablePrivilegesCache, userTableKey -> userTableKey.matches(databaseName, tableName)); - tableStatisticsCache.invalidate(hiveTableName); + tableColumnStatisticsCache.invalidate(hiveTableName); invalidateTablesWithParameterCache(databaseName, tableName); invalidatePartitionCache(databaseName, tableName); } @@ -1348,9 +1351,9 @@ public CacheStatsMBean getTableWithParameterStats() @Managed @Nested - public CacheStatsMBean getTableStatisticsStats() + public CacheStatsMBean getTableColumnStatisticsStats() { - return new CacheStatsMBean(tableStatisticsCache); + return new CacheStatsMBean(tableColumnStatisticsCache); } @Managed @@ -1459,12 +1462,12 @@ LoadingCache> getTablesWithParameterCa return tablesWithParameterCache; } - Cache> getTableStatisticsCache() + Cache>> getTableColumnStatisticsCache() { - return tableStatisticsCache; + return tableColumnStatisticsCache; } - Cache> getPartitionStatisticsCache() + Cache>> getPartitionStatisticsCache() { return partitionStatisticsCache; } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/SharedHiveMetastoreCache.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/SharedHiveMetastoreCache.java index 9eba6ae621e8..955fbbbdc33c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/SharedHiveMetastoreCache.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/SharedHiveMetastoreCache.java @@ -278,9 +278,9 @@ public AggregateCacheStatsMBean getTableWithParameterStats() @Managed @Nested - public AggregateCacheStatsMBean getTableStatisticsStats() + public AggregateCacheStatsMBean getTableColumnStatisticsCache() { - return new AggregateCacheStatsMBean(CachingHiveMetastore::getTableStatisticsCache); + return new AggregateCacheStatsMBean(CachingHiveMetastore::getTableColumnStatisticsCache); } @Managed diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java index b8ce5770b52d..5eb5b36fdfb3 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java @@ -31,7 +31,6 @@ import io.trino.filesystem.TrinoFileSystem; import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.filesystem.TrinoOutputFile; -import io.trino.plugin.hive.HiveBasicStatistics; import io.trino.plugin.hive.HiveColumnStatisticType; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.NodeVersion; @@ -84,6 +83,7 @@ import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; +import java.util.OptionalLong; import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; @@ -444,36 +444,37 @@ public Set getSupportedColumnStatistics(Type type) } @Override - public synchronized PartitionStatistics getTableStatistics(Table table) - { - return getTableStatistics(table.getDatabaseName(), table.getTableName()); - } - - private synchronized PartitionStatistics getTableStatistics(String databaseName, String tableName) + public synchronized Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount) { + checkArgument(!columnNames.isEmpty(), "columnNames is empty"); Location tableMetadataDirectory = getTableMetadataDirectory(databaseName, tableName); TableMetadata tableMetadata = readSchemaFile(TABLE, tableMetadataDirectory, tableCodec) .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); checkVersion(tableMetadata.getWriterVersion()); - HiveBasicStatistics basicStatistics = getHiveBasicStatistics(tableMetadata.getParameters()); - Map columnStatistics = tableMetadata.getColumnStatistics(); - return new PartitionStatistics(basicStatistics, columnStatistics); + return tableMetadata.getColumnStatistics().entrySet().stream() + .filter(entry -> columnNames.contains(entry.getKey())) + .collect(toImmutableMap(Entry::getKey, Entry::getValue)); } @Override - public synchronized Map getPartitionStatistics(Table table, List partitions) + public synchronized Map> getPartitionColumnStatistics(String databaseName, String tableName, Map partitionNamesWithRowCount, Set columnNames) { - return partitions.stream() - .collect(toImmutableMap(partition -> makePartitionName(table, partition), partition -> getPartitionStatisticsInternal(table, partition.getValues()))); + checkArgument(!columnNames.isEmpty(), "columnNames is empty"); + ImmutableMap.Builder> result = ImmutableMap.builder(); + for (String partitionName : partitionNamesWithRowCount.keySet()) { + result.put(partitionName, getPartitionStatisticsInternal(databaseName, tableName, partitionName, columnNames)); + } + return result.buildOrThrow(); } - private synchronized PartitionStatistics getPartitionStatisticsInternal(Table table, List partitionValues) + private synchronized Map getPartitionStatisticsInternal(String databaseName, String tableName, String partitionName, Set columnNames) { - Location partitionDirectory = getPartitionMetadataDirectory(table, ImmutableList.copyOf(partitionValues)); + Location partitionDirectory = getPartitionMetadataDirectory(databaseName, tableName, partitionName); PartitionMetadata partitionMetadata = readSchemaFile(PARTITION, partitionDirectory, partitionCodec) - .orElseThrow(() -> new PartitionNotFoundException(table.getSchemaTableName(), partitionValues)); - HiveBasicStatistics basicStatistics = getHiveBasicStatistics(partitionMetadata.getParameters()); - return new PartitionStatistics(basicStatistics, partitionMetadata.getColumnStatistics()); + .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), extractPartitionValues(partitionName))); + return partitionMetadata.getColumnStatistics().entrySet().stream() + .filter(entry -> columnNames.contains(entry.getKey())) + .collect(toImmutableMap(Entry::getKey, Entry::getValue)); } private Table getRequiredTable(String databaseName, String tableName) @@ -499,14 +500,14 @@ private void verifyTableNotExists(String newDatabaseName, String newTableName) @Override public synchronized void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, StatisticsUpdateMode mode, PartitionStatistics statisticsUpdate) { - PartitionStatistics originalStatistics = getTableStatistics(databaseName, tableName); - PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(originalStatistics, statisticsUpdate); - Location tableMetadataDirectory = getTableMetadataDirectory(databaseName, tableName); TableMetadata tableMetadata = readSchemaFile(TABLE, tableMetadataDirectory, tableCodec) .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); checkVersion(tableMetadata.getWriterVersion()); + PartitionStatistics originalStatistics = new PartitionStatistics(getHiveBasicStatistics(tableMetadata.getParameters()), tableMetadata.getColumnStatistics()); + PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(originalStatistics, statisticsUpdate); + TableMetadata updatedMetadata = tableMetadata .withParameters(currentVersion, updateStatisticsParameters(tableMetadata.getParameters(), updatedStatistics.getBasicStatistics())) .withColumnStatistics(currentVersion, updatedStatistics.getColumnStatistics()); @@ -518,13 +519,12 @@ public synchronized void updateTableStatistics(String databaseName, String table public synchronized void updatePartitionStatistics(Table table, StatisticsUpdateMode mode, Map partitionUpdates) { partitionUpdates.forEach((partitionName, partitionUpdate) -> { - PartitionStatistics originalStatistics = getPartitionStatisticsInternal(table, extractPartitionValues(partitionName)); - PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(originalStatistics, partitionUpdate); - - List partitionValues = extractPartitionValues(partitionName); - Location partitionDirectory = getPartitionMetadataDirectory(table, partitionValues); + Location partitionDirectory = getPartitionMetadataDirectory(table, partitionName); PartitionMetadata partitionMetadata = readSchemaFile(PARTITION, partitionDirectory, partitionCodec) - .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName()), partitionValues)); + .orElseThrow(() -> new PartitionNotFoundException(table.getSchemaTableName(), extractPartitionValues(partitionName))); + PartitionStatistics originalStatistics = new PartitionStatistics(getHiveBasicStatistics(partitionMetadata.getParameters()), partitionMetadata.getColumnStatistics()); + + PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(originalStatistics, partitionUpdate); PartitionMetadata updatedMetadata = partitionMetadata .withParameters(updateStatisticsParameters(partitionMetadata.getParameters(), updatedStatistics.getBasicStatistics())) @@ -1567,7 +1567,12 @@ private Location getPartitionMetadataDirectory(Table table, List values) private Location getPartitionMetadataDirectory(Table table, String partitionName) { - return getTableMetadataDirectory(table).appendPath(partitionName); + return getPartitionMetadataDirectory(table.getDatabaseName(), table.getTableName(), partitionName); + } + + private Location getPartitionMetadataDirectory(String databaseName, String tableName, String partitionName) + { + return getTableMetadataDirectory(databaseName, tableName).appendPath(partitionName); } private Location getPermissionsDirectory(Table table) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/DefaultGlueColumnStatisticsProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/DefaultGlueColumnStatisticsProvider.java index 5a37e5f84fcf..9ab9bd5596f8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/DefaultGlueColumnStatisticsProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/DefaultGlueColumnStatisticsProvider.java @@ -45,10 +45,10 @@ import io.trino.spi.type.Type; import java.util.ArrayList; -import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.OptionalLong; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; @@ -98,28 +98,26 @@ public Set getSupportedColumnStatistics(Type type) } @Override - public Map getTableColumnStatistics(Table table) + public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount) { try { - List columnNames = getAllColumns(table); - List> columnChunks = Lists.partition(columnNames, GLUE_COLUMN_READ_STAT_PAGE_SIZE); + List> columnChunks = Lists.partition(ImmutableList.copyOf(columnNames), GLUE_COLUMN_READ_STAT_PAGE_SIZE); List> getStatsFutures = columnChunks.stream() .map(partialColumns -> supplyAsync(() -> { GetColumnStatisticsForTableRequest request = new GetColumnStatisticsForTableRequest() - .withDatabaseName(table.getDatabaseName()) - .withTableName(table.getTableName()) + .withDatabaseName(databaseName) + .withTableName(tableName) .withColumnNames(partialColumns); return stats.getGetColumnStatisticsForTable().call(() -> glueClient.getColumnStatisticsForTable(request)); }, readExecutor)).collect(toImmutableList()); - HiveBasicStatistics tableStatistics = getHiveBasicStatistics(table.getParameters()); ImmutableMap.Builder columnStatsMapBuilder = ImmutableMap.builder(); for (CompletableFuture future : getStatsFutures) { GetColumnStatisticsForTableResult tableColumnsStats = getFutureValue(future, TrinoException.class); for (ColumnStatistics columnStatistics : tableColumnsStats.getColumnStatisticsList()) { columnStatsMapBuilder.put( columnStatistics.getColumnName(), - fromGlueColumnStatistics(columnStatistics.getStatisticsData(), tableStatistics.getRowCount())); + fromGlueColumnStatistics(columnStatistics.getStatisticsData(), rowCount)); } } return columnStatsMapBuilder.buildOrThrow(); @@ -130,41 +128,40 @@ public Map getTableColumnStatistics(Table table) } @Override - public Map> getPartitionColumnStatistics(Collection partitions) + public Map> getPartitionColumnStatistics( + String databaseName, + String tableName, + Map partitionNamesWithRowCount, + Set columnNames) { - Map>> resultsForPartition = new HashMap<>(); - for (Partition partition : partitions) { + Map>> resultsForPartition = new HashMap<>(); + for (String partitionName : partitionNamesWithRowCount.keySet()) { ImmutableList.Builder> futures = ImmutableList.builder(); - List> columnChunks = Lists.partition(partition.getColumns(), GLUE_COLUMN_READ_STAT_PAGE_SIZE); - for (List partialPartitionColumns : columnChunks) { - List columnsNames = partialPartitionColumns.stream() - .map(Column::getName) - .collect(toImmutableList()); + for (List columnBatch : Lists.partition(ImmutableList.copyOf(columnNames), GLUE_COLUMN_READ_STAT_PAGE_SIZE)) { GetColumnStatisticsForPartitionRequest request = new GetColumnStatisticsForPartitionRequest() - .withDatabaseName(partition.getDatabaseName()) - .withTableName(partition.getTableName()) - .withColumnNames(columnsNames) - .withPartitionValues(partition.getValues()); + .withDatabaseName(databaseName) + .withTableName(tableName) + .withColumnNames(columnBatch) + .withPartitionValues(partitionName); futures.add(supplyAsync(() -> stats.getGetColumnStatisticsForPartition().call(() -> glueClient.getColumnStatisticsForPartition(request)), readExecutor)); } - resultsForPartition.put(partition, futures.build()); + resultsForPartition.put(partitionName, futures.build()); } try { - ImmutableMap.Builder> partitionStatistics = ImmutableMap.builder(); - resultsForPartition.forEach((partition, futures) -> { - HiveBasicStatistics tableStatistics = getHiveBasicStatistics(partition.getParameters()); + ImmutableMap.Builder> partitionStatistics = ImmutableMap.builder(); + resultsForPartition.forEach((partitionName, futures) -> { ImmutableMap.Builder columnStatsMapBuilder = ImmutableMap.builder(); - + OptionalLong rowCount = partitionNamesWithRowCount.get(partitionName); for (CompletableFuture getColumnStatisticsResultFuture : futures) { GetColumnStatisticsForPartitionResult getColumnStatisticsResult = getFutureValue(getColumnStatisticsResultFuture); getColumnStatisticsResult.getColumnStatisticsList().forEach(columnStatistics -> columnStatsMapBuilder.put( columnStatistics.getColumnName(), - fromGlueColumnStatistics(columnStatistics.getStatisticsData(), tableStatistics.getRowCount()))); + fromGlueColumnStatistics(columnStatistics.getStatisticsData(), rowCount))); } - partitionStatistics.put(partition, columnStatsMapBuilder.buildOrThrow()); + partitionStatistics.put(partitionName, columnStatsMapBuilder.buildOrThrow()); }); return partitionStatistics.buildOrThrow(); @@ -296,9 +293,9 @@ public void updatePartitionStatistics(Set partitionSt } } - private List getAllColumns(Table table) + private Set getAllColumns(Table table) { - ImmutableList.Builder allColumns = ImmutableList.builderWithExpectedSize(table.getDataColumns().size() + table.getPartitionColumns().size()); + ImmutableSet.Builder allColumns = ImmutableSet.builderWithExpectedSize(table.getDataColumns().size() + table.getPartitionColumns().size()); table.getDataColumns().stream().map(Column::getName).forEach(allColumns::add); table.getPartitionColumns().stream().map(Column::getName).forEach(allColumns::add); return allColumns.build(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueColumnStatisticsProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueColumnStatisticsProvider.java index f13846d56398..ea62cc8e3c1b 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueColumnStatisticsProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueColumnStatisticsProvider.java @@ -21,8 +21,8 @@ import io.trino.plugin.hive.metastore.Table; import io.trino.spi.type.Type; -import java.util.Collection; import java.util.Map; +import java.util.OptionalLong; import java.util.Set; import static java.util.Objects.requireNonNull; @@ -31,9 +31,13 @@ public interface GlueColumnStatisticsProvider { Set getSupportedColumnStatistics(Type type); - Map getTableColumnStatistics(Table table); + Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount); - Map> getPartitionColumnStatistics(Collection partitions); + Map> getPartitionColumnStatistics( + String databaseName, + String tableName, + Map partitionNamesWithRowCount, + Set columns); void updateTableColumnStatistics(Table table, Map columnStatistics); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index 24e94f8a5ed5..a6c723e5a19a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -77,6 +77,7 @@ import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.plugin.hive.HiveBasicStatistics; import io.trino.plugin.hive.HiveColumnStatisticType; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.PartitionNotFoundException; @@ -135,6 +136,7 @@ import java.util.regex.Pattern; import java.util.stream.Stream; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.isNullOrEmpty; import static com.google.common.base.Verify.verify; import static com.google.common.collect.Comparators.lexicographical; @@ -148,7 +150,6 @@ import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; import static io.trino.plugin.hive.TableType.MANAGED_TABLE; import static io.trino.plugin.hive.TableType.VIRTUAL_VIEW; -import static io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName; import static io.trino.plugin.hive.metastore.MetastoreUtil.toPartitionName; import static io.trino.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn; import static io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults; @@ -302,18 +303,21 @@ private Table getExistingTable(String databaseName, String tableName) } @Override - public PartitionStatistics getTableStatistics(Table table) + public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount) { - return new PartitionStatistics(getHiveBasicStatistics(table.getParameters()), columnStatisticsProvider.getTableColumnStatistics(table)); + checkArgument(!columnNames.isEmpty(), "columnNames is empty"); + return columnStatisticsProvider.getTableColumnStatistics(databaseName, tableName, columnNames, rowCount); } @Override - public Map getPartitionStatistics(Table table, List partitions) + public Map> getPartitionColumnStatistics( + String databaseName, + String tableName, + Map partitionNamesWithRowCount, + Set columnNames) { - return columnStatisticsProvider.getPartitionColumnStatistics(partitions).entrySet().stream() - .collect(toImmutableMap( - entry -> makePartitionName(table, entry.getKey()), - entry -> new PartitionStatistics(getHiveBasicStatistics(entry.getKey().getParameters()), entry.getValue()))); + checkArgument(!columnNames.isEmpty(), "columnNames is empty"); + return columnStatisticsProvider.getPartitionColumnStatistics(databaseName, tableName, partitionNamesWithRowCount, columnNames); } @Override @@ -329,7 +333,15 @@ private void updateTableStatisticsInternal(String databaseName, String tableName if (transaction.isAcidTransactionRunning()) { table = Table.builder(table).setWriteId(OptionalLong.of(transaction.getWriteId())).build(); } - PartitionStatistics currentStatistics = getTableStatistics(table); + // load current statistics + HiveBasicStatistics currentBasicStatistics = getHiveBasicStatistics(table.getParameters()); + Map currentColumnStatistics = getTableColumnStatistics( + databaseName, + tableName, + Stream.concat(table.getDataColumns().stream(), table.getPartitionColumns().stream()).map(Column::getName).collect(toImmutableSet()), + currentBasicStatistics.getRowCount()); + PartitionStatistics currentStatistics = new PartitionStatistics(currentBasicStatistics, currentColumnStatistics); + PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(currentStatistics, statisticsUpdate); try { @@ -359,19 +371,25 @@ public void updatePartitionStatistics(Table table, StatisticsUpdateMode mode, Ma private void updatePartitionStatisticsBatch(Table table, StatisticsUpdateMode mode, Map partitionUpdates) { + // Missing partitions are ignored + Map partitions = getPartitionsByNamesInternal(table, partitionUpdates.keySet()).entrySet().stream() + .filter(entry -> entry.getValue().isPresent()) + .collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().orElseThrow())); + Map currentBasicStats = partitions.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> getHiveBasicStatistics(entry.getValue().getParameters()))); + Map> currentColumnStats = columnStatisticsProvider.getPartitionColumnStatistics( + table.getDatabaseName(), + table.getTableName(), + currentBasicStats.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getRowCount())), + table.getDataColumns().stream().map(Column::getName).collect(toImmutableSet())); + ImmutableList.Builder partitionUpdateRequests = ImmutableList.builder(); ImmutableSet.Builder columnStatisticsUpdates = ImmutableSet.builder(); + partitions.forEach((partitionName, partition) -> { + PartitionStatistics update = partitionUpdates.get(partitionName); - Map, String> partitionValuesToName = partitionUpdates.keySet().stream() - .collect(toImmutableMap(HiveUtil::toPartitionValues, identity())); - - List partitions = batchGetPartition(table, ImmutableList.copyOf(partitionUpdates.keySet())); - Map partitionsStatistics = getPartitionStatistics(table, partitions); - - partitions.forEach(partition -> { - PartitionStatistics update = partitionUpdates.get(partitionValuesToName.get(partition.getValues())); - - PartitionStatistics currentStatistics = partitionsStatistics.get(makePartitionName(table, partition)); + PartitionStatistics currentStatistics = new PartitionStatistics(currentBasicStats.get(partitionName), currentColumnStats.get(partitionName)); PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(currentStatistics, update); Map updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics()); @@ -993,7 +1011,7 @@ public Map> getPartitionsByNames(Table table, List getPartitionsByNamesInternal(table, partitionNames)); } - private Map> getPartitionsByNamesInternal(Table table, List partitionNames) + private Map> getPartitionsByNamesInternal(Table table, Collection partitionNames) { requireNonNull(partitionNames, "partitionNames is null"); if (partitionNames.isEmpty()) { @@ -1015,7 +1033,7 @@ private Map> getPartitionsByNamesInternal(Table tabl return resultBuilder.buildOrThrow(); } - private List batchGetPartition(Table table, List partitionNames) + private List batchGetPartition(Table table, Collection partitionNames) { List> batchGetPartitionFutures = new ArrayList<>(); try { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java index a2767cf04971..4dd55ba1a3c7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java @@ -26,6 +26,7 @@ import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.metastore.AcidTransactionOwner; import io.trino.plugin.hive.metastore.Database; +import io.trino.plugin.hive.metastore.HiveColumnStatistics; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HivePrincipal; import io.trino.plugin.hive.metastore.HivePrivilegeInfo; @@ -54,6 +55,7 @@ import java.util.Set; import java.util.stream.Collectors; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; @@ -116,19 +118,23 @@ public Set getSupportedColumnStatistics(Type type) } @Override - public PartitionStatistics getTableStatistics(Table table) + public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount) { - return delegate.getTableStatistics(toMetastoreApiTable(table)); + checkArgument(!columnNames.isEmpty(), "columnNames is empty"); + return delegate.getTableColumnStatistics(databaseName, tableName, columnNames, rowCount); } @Override - public Map getPartitionStatistics(Table table, List partitions) + public Map> getPartitionColumnStatistics(String databaseName, String tableName, Map partitionNamesWithRowCount, Set columnNames) { - return delegate.getPartitionStatistics( - toMetastoreApiTable(table), - partitions.stream() - .map(ThriftMetastoreUtil::toMetastoreApiPartition) - .collect(toImmutableList())); + checkArgument(!columnNames.isEmpty(), "columnNames is empty"); + return delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNamesWithRowCount, columnNames); + } + + @Override + public boolean useSparkTableStatistics() + { + return delegate.useSparkTableStatistics(); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java index 3dadb4880297..ca8fe3c17a64 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java @@ -123,18 +123,17 @@ import static io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; import static io.trino.plugin.hive.metastore.MetastoreUtil.adjustRowCount; import static io.trino.plugin.hive.metastore.MetastoreUtil.partitionKeyFilterToStringList; +import static io.trino.plugin.hive.metastore.SparkMetastoreUtil.getSparkTableStatistics; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.createMetastoreColumnStatistics; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiPrincipalType; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromRolePrincipalGrants; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromTrinoPrincipalType; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getBasicStatisticsWithSparkFallback; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.isAvroTableWithSchemaSet; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.parsePrivilege; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters; -import static io.trino.plugin.hive.util.HiveUtil.makePartName; import static io.trino.spi.StandardErrorCode.ALREADY_EXISTS; import static io.trino.spi.StandardErrorCode.GENERIC_USER_ERROR; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; @@ -334,26 +333,7 @@ public Set getSupportedColumnStatistics(Type type) } @Override - public PartitionStatistics getTableStatistics(Table table) - { - List dataColumns = table.getSd().getCols().stream() - .map(FieldSchema::getName) - .collect(toImmutableList()); - Map parameters = table.getParameters(); - HiveBasicStatistics basicStatistics = getHiveBasicStatistics(parameters); - - if (useSparkTableStatisticsFallback && basicStatistics.getRowCount().isEmpty()) { - PartitionStatistics sparkTableStatistics = ThriftSparkMetastoreUtil.getTableStatistics(table); - if (sparkTableStatistics.getBasicStatistics().getRowCount().isPresent()) { - return sparkTableStatistics; - } - } - - Map columnStatistics = getTableColumnStatistics(table.getDbName(), table.getTableName(), dataColumns, basicStatistics.getRowCount()); - return new PartitionStatistics(basicStatistics, columnStatistics); - } - - private Map getTableColumnStatistics(String databaseName, String tableName, List columns, OptionalLong rowCount) + public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount) { try { return retry() @@ -361,7 +341,7 @@ private Map getTableColumnStatistics(String databa .stopOnIllegalExceptions() .run("getTableColumnStatistics", stats.getGetTableColumnStatistics().wrap(() -> { try (ThriftMetastoreClient client = createMetastoreClient()) { - return groupStatisticsByColumn(client.getTableColumnStatistics(databaseName, tableName, columns), rowCount); + return groupStatisticsByColumn(client.getTableColumnStatistics(databaseName, tableName, ImmutableList.copyOf(columnNames)), rowCount); } })); } @@ -377,41 +357,19 @@ private Map getTableColumnStatistics(String databa } @Override - public Map getPartitionStatistics(Table table, List partitions) + public Map> getPartitionColumnStatistics(String databaseName, String tableName, Map partitionNamesWithRowCount, Set columnNames) { - List dataColumns = table.getSd().getCols().stream() - .map(FieldSchema::getName) - .collect(toImmutableList()); - List partitionColumns = table.getPartitionKeys().stream() - .map(FieldSchema::getName) - .collect(toImmutableList()); - - Map partitionBasicStatistics = partitions.stream() + return getPartitionColumnStatistics(databaseName, tableName, partitionNamesWithRowCount.keySet(), ImmutableList.copyOf(columnNames)).entrySet().stream() + .filter(entry -> !entry.getValue().isEmpty()) .collect(toImmutableMap( - partition -> makePartName(partitionColumns, partition.getValues()), - partition -> { - if (useSparkTableStatisticsFallback) { - return getBasicStatisticsWithSparkFallback(partition.getParameters()); - } - return getHiveBasicStatistics(partition.getParameters()); - })); - Map partitionRowCounts = partitionBasicStatistics.entrySet().stream() - .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount())); - - Map> partitionColumnStatistics = getPartitionColumnStatistics( - table.getDbName(), - table.getTableName(), - partitionBasicStatistics.keySet(), - dataColumns, - partitionRowCounts); - ImmutableMap.Builder result = ImmutableMap.builder(); - for (String partitionName : partitionBasicStatistics.keySet()) { - HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName); - Map columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of()); - result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics)); - } + Map.Entry::getKey, + entry -> groupStatisticsByColumn(entry.getValue(), partitionNamesWithRowCount.getOrDefault(entry.getKey(), OptionalLong.empty())))); + } - return result.buildOrThrow(); + @Override + public boolean useSparkTableStatistics() + { + return useSparkTableStatisticsFallback; } @Override @@ -438,21 +396,7 @@ public Optional> getFields(String databaseName, String tableNa } } - private Map> getPartitionColumnStatistics( - String databaseName, - String tableName, - Set partitionNames, - List columnNames, - Map partitionRowCounts) - { - return getMetastorePartitionColumnStatistics(databaseName, tableName, partitionNames, columnNames).entrySet().stream() - .filter(entry -> !entry.getValue().isEmpty()) - .collect(toImmutableMap( - Map.Entry::getKey, - entry -> groupStatisticsByColumn(entry.getValue(), partitionRowCounts.getOrDefault(entry.getKey(), OptionalLong.empty())))); - } - - private Map> getMetastorePartitionColumnStatistics(String databaseName, String tableName, Set partitionNames, List columnNames) + private Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames, List columnNames) { try { return retry() @@ -499,7 +443,7 @@ public void updateTableStatistics(String databaseName, String tableName, AcidTra Table originalTable = getTable(databaseName, tableName) .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); - PartitionStatistics currentStatistics = getTableStatistics(originalTable); + PartitionStatistics currentStatistics = getCurrentTableStatistics(originalTable); PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(currentStatistics, statisticsUpdate); Table modifiedTable = originalTable.deepCopy(); @@ -532,6 +476,23 @@ public void updateTableStatistics(String databaseName, String tableName, AcidTra removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(databaseName, tableName, column)); } + private PartitionStatistics getCurrentTableStatistics(Table table) + { + Map columns = table.getSd().getCols().stream() + .collect(toImmutableMap(FieldSchema::getName, fieldSchema -> HiveType.valueOf(fieldSchema.getType()))); + + if (useSparkTableStatisticsFallback) { + Optional sparkTableStatistics = getSparkTableStatistics(table.getParameters(), columns); + if (sparkTableStatistics.isPresent()) { + return sparkTableStatistics.get(); + } + } + + HiveBasicStatistics basicStatistics = getHiveBasicStatistics(table.getParameters()); + Map columnStatistics = getTableColumnStatistics(table.getDbName(), table.getTableName(), columns.keySet(), basicStatistics.getRowCount()); + return new PartitionStatistics(basicStatistics, columnStatistics); + } + private void setTableColumnStatistics(String databaseName, String tableName, List statistics) { try { @@ -592,9 +553,16 @@ public void updatePartitionStatistics(Table table, String partitionName, Statist } Partition originalPartition = getOnlyElement(partitions); - PartitionStatistics currentStatistics = requireNonNull( - getPartitionStatistics(table, partitions).get(partitionName), "getPartitionStatistics() did not return statistics for partition"); - PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(currentStatistics, statisticsUpdate); + HiveBasicStatistics currentBasicStats = getHiveBasicStatistics(originalPartition.getParameters()); + Map currentColumnStats = getPartitionColumnStatistics( + table.getDbName(), + table.getTableName(), + ImmutableMap.of(partitionName, currentBasicStats.getRowCount()), + table.getSd().getCols().stream() + .map(FieldSchema::getName) + .collect(toImmutableSet())) + .getOrDefault(partitionName, ImmutableMap.of()); + PartitionStatistics updatedStatistics = mode.updatePartitionStatistics(new PartitionStatistics(currentBasicStats, currentColumnStats), statisticsUpdate); Partition modifiedPartition = originalPartition.deepCopy(); HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics(); @@ -605,7 +573,7 @@ public void updatePartitionStatistics(Table table, String partitionName, Statist .collect(toImmutableMap(FieldSchema::getName, schema -> HiveType.valueOf(schema.getType()))); setPartitionColumnStatistics(table.getDbName(), table.getTableName(), partitionName, columns, updatedStatistics.getColumnStatistics(), basicStatistics.getRowCount()); - Set removedStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet()); + Set removedStatistics = difference(currentColumnStats.keySet(), updatedStatistics.getColumnStatistics().keySet()); removedStatistics.forEach(column -> deletePartitionColumnStatistics(table.getDbName(), table.getTableName(), partitionName, column)); } @@ -1379,7 +1347,7 @@ private void dropExtraColumnStatisticsAfterAlterPartition( // check if statistics for the columnsWithMissingStatistics are actually stored in the metastore // when trying to remove any missing statistics the metastore throws NoSuchObjectException String partitionName = partitionWithStatistics.getPartitionName(); - List statisticsToBeRemoved = getMetastorePartitionColumnStatistics( + List statisticsToBeRemoved = getPartitionColumnStatistics( databaseName, tableName, ImmutableSet.of(partitionName), diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java index 99758b535780..65d81628d52e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java @@ -24,6 +24,7 @@ import io.trino.plugin.hive.acid.AcidOperation; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.metastore.AcidTransactionOwner; +import io.trino.plugin.hive.metastore.HiveColumnStatistics; import io.trino.plugin.hive.metastore.HivePrincipal; import io.trino.plugin.hive.metastore.HivePrivilegeInfo; import io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege; @@ -93,9 +94,11 @@ public sealed interface ThriftMetastore Set getSupportedColumnStatistics(Type type); - PartitionStatistics getTableStatistics(Table table); + Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount); - Map getPartitionStatistics(Table table, List partitions); + Map> getPartitionColumnStatistics(String databaseName, String tableName, Map partitionNamesWithRowCount, Set columnNames); + + boolean useSparkTableStatistics(); void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, StatisticsUpdateMode mode, PartitionStatistics statisticsUpdate); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreParameterParserUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreParameterParserUtils.java deleted file mode 100644 index 1276294bc6b8..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreParameterParserUtils.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.thrift; - -import com.google.common.primitives.Doubles; -import com.google.common.primitives.Longs; -import jakarta.annotation.Nullable; - -import java.math.BigDecimal; -import java.time.DateTimeException; -import java.time.LocalDate; -import java.util.Optional; -import java.util.OptionalDouble; -import java.util.OptionalLong; - -final class ThriftMetastoreParameterParserUtils -{ - private ThriftMetastoreParameterParserUtils() {} - - static OptionalLong toLong(@Nullable String parameterValue) - { - if (parameterValue == null) { - return OptionalLong.empty(); - } - Long longValue = Longs.tryParse(parameterValue); - if (longValue == null || longValue < 0) { - return OptionalLong.empty(); - } - return OptionalLong.of(longValue); - } - - static OptionalDouble toDouble(@Nullable String parameterValue) - { - if (parameterValue == null) { - return OptionalDouble.empty(); - } - Double doubleValue = Doubles.tryParse(parameterValue); - if (doubleValue == null || doubleValue < 0) { - return OptionalDouble.empty(); - } - return OptionalDouble.of(doubleValue); - } - - static Optional toDecimal(@Nullable String parameterValue) - { - if (parameterValue == null) { - return Optional.empty(); - } - try { - BigDecimal decimal = new BigDecimal(parameterValue); - if (decimal.compareTo(BigDecimal.ZERO) < 0) { - return Optional.empty(); - } - return Optional.of(decimal); - } - catch (NumberFormatException exception) { - return Optional.empty(); - } - } - - static Optional toDate(@Nullable String parameterValue) - { - if (parameterValue == null) { - return Optional.empty(); - } - try { - LocalDate date = LocalDate.parse(parameterValue); - return Optional.of(date); - } - catch (DateTimeException exception) { - return Optional.empty(); - } - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java index 650ca4623595..4562cf5a2c1f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java @@ -20,6 +20,7 @@ import com.google.common.collect.Streams; import com.google.common.io.ByteArrayDataOutput; import com.google.common.io.ByteStreams; +import com.google.common.primitives.Longs; import com.google.common.primitives.Shorts; import io.airlift.compress.Compressor; import io.airlift.compress.zstd.ZstdCompressor; @@ -137,8 +138,7 @@ import static io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; import static io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.SELECT; import static io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.UPDATE; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreParameterParserUtils.toLong; -import static io.trino.plugin.hive.metastore.thrift.ThriftSparkMetastoreUtil.getSparkBasicStatistics; +import static io.trino.plugin.hive.metastore.SparkMetastoreUtil.getSparkBasicStatistics; import static io.trino.plugin.hive.type.Category.PRIMITIVE; import static io.trino.spi.security.PrincipalType.ROLE; import static io.trino.spi.security.PrincipalType.USER; @@ -1050,4 +1050,16 @@ public static LanguageFunction decodeFunction(String name, List uri throw new TrinoException(HIVE_INVALID_METADATA, "Failed to decode function: " + name, e); } } + + private static OptionalLong toLong(@Nullable String parameterValue) + { + if (parameterValue == null) { + return OptionalLong.empty(); + } + Long longValue = Longs.tryParse(parameterValue); + if (longValue == null || longValue < 0) { + return OptionalLong.empty(); + } + return OptionalLong.of(longValue); + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/tracing/TracingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/tracing/TracingHiveMetastore.java index 572ecfa074b3..018303b6c7b0 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/tracing/TracingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/tracing/TracingHiveMetastore.java @@ -24,6 +24,7 @@ import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.metastore.AcidTransactionOwner; import io.trino.plugin.hive.metastore.Database; +import io.trino.plugin.hive.metastore.HiveColumnStatistics; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HivePrincipal; import io.trino.plugin.hive.metastore.HivePrivilegeInfo; @@ -110,30 +111,36 @@ public Set getSupportedColumnStatistics(Type type) } @Override - public PartitionStatistics getTableStatistics(Table table) + public Map getTableColumnStatistics(String databaseName, String tableName, Set columnNames, OptionalLong rowCount) { - Span span = tracer.spanBuilder("HiveMetastore.getTableStatistics") - .setAttribute(SCHEMA, table.getDatabaseName()) - .setAttribute(TABLE, table.getTableName()) + Span span = tracer.spanBuilder("HiveMetastore.getTableColumnStatistics") + .setAttribute(SCHEMA, databaseName) + .setAttribute(TABLE, tableName) .startSpan(); - return withTracing(span, () -> delegate.getTableStatistics(table)); + return withTracing(span, () -> delegate.getTableColumnStatistics(databaseName, tableName, columnNames, rowCount)); } @Override - public Map getPartitionStatistics(Table table, List partitions) + public Map> getPartitionColumnStatistics(String databaseName, String tableName, Map partitionNamesWithRowCount, Set columnNames) { - Span span = tracer.spanBuilder("HiveMetastore.getPartitionStatistics") - .setAttribute(SCHEMA, table.getDatabaseName()) - .setAttribute(TABLE, table.getTableName()) - .setAttribute(PARTITION_REQUEST_COUNT, (long) partitions.size()) + Span span = tracer.spanBuilder("HiveMetastore.getPartitionColumnStatistics") + .setAttribute(SCHEMA, databaseName) + .setAttribute(TABLE, tableName) + .setAttribute(PARTITION_REQUEST_COUNT, (long) partitionNamesWithRowCount.size()) .startSpan(); return withTracing(span, () -> { - Map partitionStatistics = delegate.getPartitionStatistics(table, partitions); - span.setAttribute(PARTITION_RESPONSE_COUNT, partitionStatistics.size()); - return partitionStatistics; + var partitionColumnStatistics = delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNamesWithRowCount, columnNames); + span.setAttribute(PARTITION_RESPONSE_COUNT, partitionColumnStatistics.size()); + return partitionColumnStatistics; }); } + @Override + public boolean useSparkTableStatistics() + { + return delegate.useSparkTableStatistics(); + } + @Override public void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, StatisticsUpdateMode mode, PartitionStatistics statisticsUpdate) { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive3OnDataLake.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive3OnDataLake.java index fd2ccbf226c2..b93f3071ef76 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive3OnDataLake.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive3OnDataLake.java @@ -19,6 +19,8 @@ import io.trino.Session; import io.trino.plugin.hive.containers.HiveHadoop; import io.trino.plugin.hive.containers.HiveMinioDataLake; +import io.trino.plugin.hive.metastore.Column; +import io.trino.plugin.hive.metastore.HiveColumnStatistics; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.Partition; import io.trino.plugin.hive.metastore.PartitionWithStatistics; @@ -46,6 +48,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalLong; import java.util.TimeZone; import java.util.stream.Collectors; @@ -53,6 +56,7 @@ import static io.airlift.slice.Slices.utf8Slice; import static io.airlift.units.DataSize.Unit.MEGABYTE; import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; +import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.testing.MaterializedResult.resultBuilder; import static io.trino.testing.TestingNames.randomNameSuffix; @@ -62,6 +66,7 @@ import static java.time.temporal.ChronoUnit.MINUTES; import static java.util.regex.Pattern.quote; import static java.util.stream.Collectors.joining; +import static java.util.stream.Collectors.toSet; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @@ -1967,20 +1972,23 @@ private void renamePartitionResourcesOutsideTrino(String tableName, String parti }); // Delete old partition and update metadata to point to location of new copy - Table hiveTable = metastoreClient.getTable(HIVE_TEST_SCHEMA, tableName).get(); - Partition hivePartition = metastoreClient.getPartition(hiveTable, List.of(regionKey)).get(); - Map partitionStatistics = - metastoreClient.getPartitionStatistics(hiveTable, List.of(hivePartition)); + Table hiveTable = metastoreClient.getTable(HIVE_TEST_SCHEMA, tableName).orElseThrow(); + Partition partition = metastoreClient.getPartition(hiveTable, List.of(regionKey)).orElseThrow(); + Map> partitionStatistics = metastoreClient.getPartitionColumnStatistics( + HIVE_TEST_SCHEMA, + tableName, + Map.of(partitionName, OptionalLong.empty()), + partition.getColumns().stream().map(Column::getName).collect(toSet())); metastoreClient.dropPartition(HIVE_TEST_SCHEMA, tableName, List.of(regionKey), true); metastoreClient.addPartitions(HIVE_TEST_SCHEMA, tableName, List.of( new PartitionWithStatistics( - Partition.builder(hivePartition) + Partition.builder(partition) .withStorage(builder -> builder.setLocation( - hivePartition.getStorage().getLocation() + renamedPartitionSuffix)) + partition.getStorage().getLocation() + renamedPartitionSuffix)) .build(), partitionName, - partitionStatistics.get(partitionName)))); + new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), partitionStatistics.get(partitionName))))); } protected void assertInsertFailure(String testTable, String expectedMessageRegExp) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreMethod.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreMethod.java index 30c6461a532b..5079db8be41d 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreMethod.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreMethod.java @@ -29,7 +29,7 @@ public enum MetastoreMethod GET_RELATION_TYPES, GET_ALL_RELATION_TYPES, GET_TABLES_WITH_PARAMETER, - GET_TABLE_STATISTICS, + GET_TABLE_COLUMN_STATISTICS, GET_ALL_VIEWS, GET_VIEWS, UPDATE_TABLE_STATISTICS, @@ -37,7 +37,7 @@ public enum MetastoreMethod GET_PARTITION_NAMES_BY_FILTER, GET_PARTITIONS_BY_NAMES, GET_PARTITION, - GET_PARTITION_STATISTICS, + GET_PARTITION_COLUMN_STATISTICS, UPDATE_PARTITION_STATISTICS, REPLACE_TABLE, DROP_TABLE, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftSparkMetastoreUtil.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSparkMetastoreUtil.java similarity index 80% rename from plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftSparkMetastoreUtil.java rename to plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSparkMetastoreUtil.java index 6b356bfb27a5..8f986b492b60 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftSparkMetastoreUtil.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSparkMetastoreUtil.java @@ -11,15 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.plugin.hive.metastore.thrift; - -import io.trino.hive.thrift.metastore.FieldSchema; -import io.trino.plugin.hive.metastore.BooleanStatistics; -import io.trino.plugin.hive.metastore.DateStatistics; -import io.trino.plugin.hive.metastore.DecimalStatistics; -import io.trino.plugin.hive.metastore.DoubleStatistics; -import io.trino.plugin.hive.metastore.HiveColumnStatistics; -import io.trino.plugin.hive.metastore.IntegerStatistics; +package io.trino.plugin.hive.metastore; + +import io.trino.plugin.hive.HiveType; import org.junit.jupiter.api.Test; import java.math.BigDecimal; @@ -29,17 +23,11 @@ import java.util.OptionalDouble; import java.util.OptionalLong; -import static io.trino.plugin.hive.metastore.thrift.ThriftSparkMetastoreUtil.fromMetastoreColumnStatistics; -import static io.trino.plugin.hive.util.SerdeConstants.BIGINT_TYPE_NAME; -import static io.trino.plugin.hive.util.SerdeConstants.BINARY_TYPE_NAME; -import static io.trino.plugin.hive.util.SerdeConstants.BOOLEAN_TYPE_NAME; -import static io.trino.plugin.hive.util.SerdeConstants.DATE_TYPE_NAME; +import static io.trino.plugin.hive.metastore.SparkMetastoreUtil.fromMetastoreColumnStatistics; import static io.trino.plugin.hive.util.SerdeConstants.DECIMAL_TYPE_NAME; -import static io.trino.plugin.hive.util.SerdeConstants.DOUBLE_TYPE_NAME; -import static io.trino.plugin.hive.util.SerdeConstants.STRING_TYPE_NAME; import static org.assertj.core.api.Assertions.assertThat; -public class TestThriftSparkMetastoreUtil +public class TestSparkMetastoreUtil { @Test public void testSparkLongStatsToColumnStatistics() @@ -51,12 +39,12 @@ public void testSparkLongStatsToColumnStatistics() "spark.sql.statistics.colStats.c_long.nullCount", "0", "spark.sql.statistics.colStats.c_long.version", "2", "spark.sql.statistics.colStats.c_long.max", "4"); - HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics(new FieldSchema("c_long_not_exists", BIGINT_TYPE_NAME, null), columnStatistics, 4); + HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics("c_long_not_exists", HiveType.HIVE_LONG, columnStatistics, 4); assertThat(actualNotExists).isEqualTo(HiveColumnStatistics.builder() .setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())) .build()); - HiveColumnStatistics actual = fromMetastoreColumnStatistics(new FieldSchema("c_long", BIGINT_TYPE_NAME, null), columnStatistics, 4); + HiveColumnStatistics actual = fromMetastoreColumnStatistics("c_long", HiveType.HIVE_LONG, columnStatistics, 4); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setIntegerStatistics(new IntegerStatistics(OptionalLong.of(1), OptionalLong.of(4))) .setNullsCount(0) @@ -74,12 +62,12 @@ public void testSparkDoubleStatsToColumnStatistics() "spark.sql.statistics.colStats.c_double.nullCount", "1", "spark.sql.statistics.colStats.c_double.version", "2", "spark.sql.statistics.colStats.c_double.max", "3.3"); - HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics(new FieldSchema("c_double_not_exists", DOUBLE_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics("c_double_not_exists", HiveType.HIVE_DOUBLE, columnStatistics, 10); assertThat(actualNotExists).isEqualTo(HiveColumnStatistics.builder() .setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())) .build()); - HiveColumnStatistics actual = fromMetastoreColumnStatistics(new FieldSchema("c_double", DOUBLE_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actual = fromMetastoreColumnStatistics("c_double", HiveType.HIVE_DOUBLE, columnStatistics, 10); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0.3), OptionalDouble.of(3.3))) .setNullsCount(1) @@ -97,12 +85,13 @@ public void testSparkDecimalStatsToColumnStatistics() "spark.sql.statistics.colStats.c_decimal.nullCount", "1", "spark.sql.statistics.colStats.c_decimal.version", "2", "spark.sql.statistics.colStats.c_decimal.max", "3.3"); - HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics(new FieldSchema("c_decimal_not_exists", DECIMAL_TYPE_NAME, null), columnStatistics, 10); + HiveType decimalType = HiveType.valueOf(DECIMAL_TYPE_NAME); + HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics("c_decimal_not_exists", decimalType, columnStatistics, 10); assertThat(actualNotExists).isEqualTo(HiveColumnStatistics.builder() .setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())) .build()); - HiveColumnStatistics actual = fromMetastoreColumnStatistics(new FieldSchema("c_decimal", DECIMAL_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actual = fromMetastoreColumnStatistics("c_decimal", decimalType, columnStatistics, 10); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDecimalStatistics(new DecimalStatistics(Optional.of(new BigDecimal("0.3")), Optional.of(new BigDecimal("3.3")))) .setNullsCount(1) @@ -120,12 +109,12 @@ public void testSparkBooleanStatsToColumnStatistics() "spark.sql.statistics.colStats.c_bool.nullCount", "1", "spark.sql.statistics.colStats.c_bool.version", "2", "spark.sql.statistics.colStats.c_bool.max", "true"); - HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics(new FieldSchema("c_bool_not_exists", BOOLEAN_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics("c_bool_not_exists", HiveType.HIVE_BOOLEAN, columnStatistics, 10); assertThat(actualNotExists).isEqualTo(HiveColumnStatistics.builder() .setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())) .build()); - HiveColumnStatistics actual = fromMetastoreColumnStatistics(new FieldSchema("c_bool", BOOLEAN_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actual = fromMetastoreColumnStatistics("c_bool", HiveType.HIVE_BOOLEAN, columnStatistics, 10); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())) .setNullsCount(1) @@ -142,12 +131,12 @@ public void testSparkDateStatsToColumnStatistics() "spark.sql.statistics.colStats.c_date.nullCount", "3", "spark.sql.statistics.colStats.c_date.version", "2", "spark.sql.statistics.colStats.c_date.max", "2030-12-31"); - HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics(new FieldSchema("c_date_not_exists", DATE_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics("c_date_not_exists", HiveType.HIVE_DATE, columnStatistics, 10); assertThat(actualNotExists).isEqualTo(HiveColumnStatistics.builder() .setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())) .build()); - HiveColumnStatistics actual = fromMetastoreColumnStatistics(new FieldSchema("c_date", DATE_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actual = fromMetastoreColumnStatistics("c_date", HiveType.HIVE_DATE, columnStatistics, 10); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDateStatistics(new DateStatistics(Optional.of(LocalDate.of(2000, 1, 1)), Optional.of(LocalDate.of(2030, 12, 31)))) .setNullsCount(3) @@ -164,10 +153,10 @@ public void testSparkStringStatsToColumnStatistics() "spark.sql.statistics.colStats.char_col.maxLen", "10", "spark.sql.statistics.colStats.c_char.nullCount", "7", "spark.sql.statistics.colStats.c_char.version", "2"); - HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics(new FieldSchema("c_char_not_exists", STRING_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics("c_char_not_exists", HiveType.HIVE_STRING, columnStatistics, 10); assertThat(actualNotExists).isEqualTo(HiveColumnStatistics.builder().build()); - HiveColumnStatistics actual = fromMetastoreColumnStatistics(new FieldSchema("c_char", STRING_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actual = fromMetastoreColumnStatistics("c_char", HiveType.HIVE_STRING, columnStatistics, 10); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setNullsCount(7) .setDistinctValuesCount(3) @@ -184,10 +173,10 @@ public void testSparkBinaryStatsToColumnStatistics() "spark.sql.statistics.colStats.c_bin.maxLen", "10", "spark.sql.statistics.colStats.c_bin.nullCount", "3", "spark.sql.statistics.colStats.c_bin.version", "2"); - HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics(new FieldSchema("c_bin_not_exists", BINARY_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actualNotExists = fromMetastoreColumnStatistics("c_bin_not_exists", HiveType.HIVE_BINARY, columnStatistics, 10); assertThat(actualNotExists).isEqualTo(HiveColumnStatistics.builder().build()); - HiveColumnStatistics actual = fromMetastoreColumnStatistics(new FieldSchema("c_bin", BINARY_TYPE_NAME, null), columnStatistics, 10); + HiveColumnStatistics actual = fromMetastoreColumnStatistics("c_bin", HiveType.HIVE_BINARY, columnStatistics, 10); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setNullsCount(3) .setTotalSizeInBytes(70) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java index f35ef7d53fe5..de73a93cf1ac 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultimap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -27,7 +28,6 @@ import io.trino.plugin.hive.HiveColumnHandle; import io.trino.plugin.hive.HiveMetastoreClosure; import io.trino.plugin.hive.PartitionStatistics; -import io.trino.plugin.hive.metastore.Column; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveColumnStatistics; import io.trino.plugin.hive.metastore.HiveMetastore; @@ -73,7 +73,6 @@ import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveType.HIVE_LONG; import static io.trino.plugin.hive.HiveType.HIVE_STRING; import static io.trino.plugin.hive.TableType.VIRTUAL_VIEW; import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; @@ -105,6 +104,7 @@ import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.TimeUnit.SECONDS; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @@ -112,9 +112,11 @@ @Execution(SAME_THREAD) public class TestCachingHiveMetastore { + private static final HiveBasicStatistics TEST_BASIC_STATS = new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty()); + private static final ImmutableMap TEST_COLUMN_STATS = ImmutableMap.of(TEST_COLUMN, createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty())); private static final PartitionStatistics TEST_STATS = PartitionStatistics.builder() - .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) - .setColumnStatistics(ImmutableMap.of(TEST_COLUMN, createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()))) + .setBasicStatistics(TEST_BASIC_STATS) + .setColumnStatistics(TEST_COLUMN_STATS) .build(); private static final SchemaTableName TEST_SCHEMA_TABLE = new SchemaTableName(TEST_DATABASE, TEST_TABLE); private static final Duration CACHE_TTL = new Duration(5, TimeUnit.MINUTES); @@ -515,56 +517,49 @@ public void testGetTableStatistics() { assertThat(mockClient.getAccessCount()).isEqualTo(0); - Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isPresent(); assertThat(mockClient.getAccessCount()).isEqualTo(1); - assertThat(metastore.getTableStatistics(table)).isEqualTo(TEST_STATS); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, TEST_COLUMN_STATS.keySet(), TEST_BASIC_STATS.getRowCount())).isEqualTo(TEST_COLUMN_STATS); assertThat(mockClient.getAccessCount()).isEqualTo(2); - assertThat(metastore.getTableStatistics(table)).isEqualTo(TEST_STATS); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, TEST_COLUMN_STATS.keySet(), TEST_BASIC_STATS.getRowCount())).isEqualTo(TEST_COLUMN_STATS); assertThat(mockClient.getAccessCount()).isEqualTo(2); - assertThat(metastore.getTableStatisticsStats().getRequestCount()).isEqualTo(2); - assertThat(metastore.getTableStatisticsStats().getHitRate()).isEqualTo(0.5); + assertThat(metastore.getTableColumnStatisticsStats().getRequestCount()).isEqualTo(2); + assertThat(metastore.getTableColumnStatisticsStats().getHitRate()).isEqualTo(0.5); assertThat(metastore.getTableStats().getRequestCount()).isEqualTo(1); assertThat(metastore.getTableStats().getHitRate()).isEqualTo(0.0); // check empty column list does not trigger the call - Table emptyColumnListTable = Table.builder(table).setDataColumns(ImmutableList.of()).build(); - assertThat(metastore.getTableStatistics(emptyColumnListTable).getBasicStatistics()).isEqualTo(TEST_STATS.getBasicStatistics()); - assertThat(metastore.getTableStatisticsStats().getRequestCount()).isEqualTo(3); - assertThat(metastore.getTableStatisticsStats().getHitRate()).isEqualTo(2.0 / 3); + assertThatThrownBy(() -> metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(), TEST_BASIC_STATS.getRowCount())) + .isInstanceOf(IllegalArgumentException.class); + assertThat(metastore.getTableColumnStatisticsStats().getRequestCount()).isEqualTo(2); + assertThat(metastore.getTableColumnStatisticsStats().getHitRate()).isEqualTo(0.5); mockClient.mockColumnStats(TEST_DATABASE, TEST_TABLE, ImmutableMap.of( "col1", ColumnStatisticsData.longStats(new LongColumnStatsData().setNumNulls(1)), "col2", ColumnStatisticsData.longStats(new LongColumnStatsData().setNumNulls(2)), "col3", ColumnStatisticsData.longStats(new LongColumnStatsData().setNumNulls(3)))); - Table tableCol1 = Table.builder(table).setDataColumns(ImmutableList.of(new Column("col1", HIVE_LONG, Optional.empty(), Map.of()))).build(); - assertThat(metastore.getTableStatistics(tableCol1).getColumnStatistics()).containsEntry("col1", intColumnStats(1)); - Table tableCol2 = Table.builder(table).setDataColumns(ImmutableList.of(new Column("col2", HIVE_LONG, Optional.empty(), Map.of()))).build(); - assertThat(metastore.getTableStatistics(tableCol2).getColumnStatistics()).containsEntry("col2", intColumnStats(2)); - Table tableCol23 = Table.builder(table) - .setDataColumns(ImmutableList.of(new Column("col2", HIVE_LONG, Optional.empty(), Map.of()), new Column("col3", HIVE_LONG, Optional.empty(), Map.of()))) - .build(); - assertThat(metastore.getTableStatistics(tableCol23).getColumnStatistics()) + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of("col1"), TEST_BASIC_STATS.getRowCount())).containsEntry("col1", intColumnStats(1)); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of("col2"), TEST_BASIC_STATS.getRowCount())).containsEntry("col2", intColumnStats(2)); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of("col2", "col3"), TEST_BASIC_STATS.getRowCount())) .containsEntry("col2", intColumnStats(2)) .containsEntry("col3", intColumnStats(3)); - metastore.getTableStatistics(table); // ensure cached + metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(TEST_COLUMN), TEST_BASIC_STATS.getRowCount()); // ensure cached assertThat(mockClient.getAccessCount()).isEqualTo(5); ColumnStatisticsData newStats = new ColumnStatisticsData(); newStats.setLongStats(new LongColumnStatsData(327843, 4324)); mockClient.mockColumnStats(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_COLUMN, newStats)); metastore.invalidateTable(TEST_DATABASE, TEST_TABLE); - assertThat(metastore.getTableStatistics(table)).isEqualTo(PartitionStatistics.builder() - .setBasicStatistics(TEST_STATS.getBasicStatistics()) - .setColumnStatistics(ImmutableMap.of(TEST_COLUMN, createIntegerColumnStatistics( + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(TEST_COLUMN), TEST_BASIC_STATS.getRowCount())) + .isEqualTo(ImmutableMap.of(TEST_COLUMN, createIntegerColumnStatistics( OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(newStats.getLongStats().getNumNulls()), - OptionalLong.of(newStats.getLongStats().getNumDVs() - 1)))) - .build()); + OptionalLong.of(newStats.getLongStats().getNumDVs() - 1)))); assertThat(mockClient.getAccessCount()).isEqualTo(6); } @@ -573,20 +568,16 @@ public void testGetTableStatisticsWithEmptyColumnStats() { assertThat(mockClient.getAccessCount()).isEqualTo(0); - Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isPresent(); assertThat(mockClient.getAccessCount()).isEqualTo(1); // Force TEST_TABLE to not have column statistics available mockClient.mockColumnStats(TEST_DATABASE, TEST_TABLE, ImmutableMap.of()); - PartitionStatistics expectedStats = PartitionStatistics.builder() - .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) - .setColumnStatistics(ImmutableMap.of()) - .build(); - assertThat(metastore.getTableStatistics(table)).isEqualTo(expectedStats); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(TEST_COLUMN), TEST_BASIC_STATS.getRowCount())).isEmpty(); assertThat(mockClient.getAccessCount()).isEqualTo(2); // Absence of column statistics should get cached and metastore client access count should stay the same - assertThat(metastore.getTableStatistics(table)).isEqualTo(expectedStats); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(TEST_COLUMN), TEST_BASIC_STATS.getRowCount())).isEmpty(); assertThat(mockClient.getAccessCount()).isEqualTo(2); } @@ -597,20 +588,16 @@ public void testTableStatisticsWithEmptyColumnStatsWithNoCacheMissing() assertThat(mockClient.getAccessCount()).isEqualTo(0); - Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isPresent(); assertThat(mockClient.getAccessCount()).isEqualTo(1); // Force TEST_TABLE to not have column statistics available mockClient.mockColumnStats(TEST_DATABASE, TEST_TABLE, ImmutableMap.of()); - PartitionStatistics expectedStats = PartitionStatistics.builder() - .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) - .setColumnStatistics(ImmutableMap.of()) - .build(); - assertThat(metastore.getTableStatistics(table)).isEqualTo(expectedStats); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(TEST_COLUMN), TEST_BASIC_STATS.getRowCount())).isEmpty(); assertThat(mockClient.getAccessCount()).isEqualTo(2); // Absence of column statistics does not get cached and metastore client access count increases - assertThat(metastore.getTableStatistics(table)).isEqualTo(expectedStats); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(TEST_COLUMN), TEST_BASIC_STATS.getRowCount())).isEmpty(); assertThat(mockClient.getAccessCount()).isEqualTo(3); } @@ -619,17 +606,17 @@ public void testGetTableStatisticsWithoutMetadataCache() { assertThat(mockClient.getAccessCount()).isEqualTo(0); - Table table = statsOnlyCacheMetastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(statsOnlyCacheMetastore.getTable(TEST_DATABASE, TEST_TABLE)).isPresent(); assertThat(mockClient.getAccessCount()).isEqualTo(1); - assertThat(statsOnlyCacheMetastore.getTableStatistics(table)).isEqualTo(TEST_STATS); + assertThat(statsOnlyCacheMetastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(TEST_COLUMN), TEST_BASIC_STATS.getRowCount())).isEqualTo(TEST_COLUMN_STATS); assertThat(mockClient.getAccessCount()).isEqualTo(2); - assertThat(statsOnlyCacheMetastore.getTableStatistics(table)).isEqualTo(TEST_STATS); + assertThat(statsOnlyCacheMetastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableSet.of(TEST_COLUMN), TEST_BASIC_STATS.getRowCount())).isEqualTo(TEST_COLUMN_STATS); assertThat(mockClient.getAccessCount()).isEqualTo(2); - assertThat(statsOnlyCacheMetastore.getTableStatisticsStats().getRequestCount()).isEqualTo(2); - assertThat(statsOnlyCacheMetastore.getTableStatisticsStats().getHitRate()).isEqualTo(0.5); + assertThat(statsOnlyCacheMetastore.getTableColumnStatisticsStats().getRequestCount()).isEqualTo(2); + assertThat(statsOnlyCacheMetastore.getTableColumnStatisticsStats().getHitRate()).isEqualTo(0.5); assertThat(statsOnlyCacheMetastore.getTableStats().getRequestCount()).isEqualTo(0); assertThat(statsOnlyCacheMetastore.getTableStats().getHitRate()).isEqualTo(1.0); @@ -661,7 +648,7 @@ public List getTableColumnStatistics(String databaseName, S }; CachingHiveMetastore metastore = createCachingHiveMetastore(new BridgingHiveMetastore(createThriftHiveMetastore(mockClient)), CACHE_TTL, true, true, executor); - Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(metastore.getTable(TEST_DATABASE, TEST_TABLE)).isPresent(); ExecutorService executorService = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setNameFormat("invalidation-%d").build()); try { @@ -679,10 +666,10 @@ public List getTableColumnStatistics(String databaseName, S }); // start get stats before the invalidation, it will wait until invalidation is done to finish - assertThat(metastore.getTableStatistics(table)).isEqualTo(TEST_STATS); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, TEST_COLUMN_STATS.keySet(), TEST_BASIC_STATS.getRowCount())).isEqualTo(TEST_COLUMN_STATS); assertThat(mockClient.getAccessCount()).isEqualTo(2); // get stats after invalidate - assertThat(metastore.getTableStatistics(table)).isEqualTo(TEST_STATS); + assertThat(metastore.getTableColumnStatistics(TEST_DATABASE, TEST_TABLE, TEST_COLUMN_STATS.keySet(), TEST_BASIC_STATS.getRowCount())).isEqualTo(TEST_COLUMN_STATS); // the value was not cached assertThat(mockClient.getAccessCount()).isEqualTo(3); // make sure invalidateFuture is done @@ -709,11 +696,13 @@ public void testGetPartitionStatistics() String partition3Name = makePartitionName(table, partition3); assertThat(mockClient.getAccessCount()).isEqualTo(4); - assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_STATS)); + assertThat(metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION1, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_COLUMN_STATS)); assertThat(mockClient.getAccessCount()).isEqualTo(5); assertThat(metastore.getPartitionStatisticsStats().getRequestCount()).isEqualTo(1); - assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_STATS)); + assertThat(metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION1, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_COLUMN_STATS)); assertThat(mockClient.getAccessCount()).isEqualTo(5); assertThat(metastore.getPartitionStatisticsStats().getRequestCount()).isEqualTo(2); @@ -726,32 +715,25 @@ public void testGetPartitionStatistics() assertThat(metastore.getPartitionStats().getHitRate()).isEqualTo(0.0); // check empty column list does not trigger the call - Table emptyColumnListTable = Table.builder(table).setDataColumns(ImmutableList.of()).build(); - Map partitionStatistics = metastore.getPartitionStatistics(emptyColumnListTable, ImmutableList.of(partition)); - assertThat(partitionStatistics).containsOnlyKeys(TEST_PARTITION1); - assertThat(partitionStatistics.get(TEST_PARTITION1).getBasicStatistics()).isEqualTo(TEST_STATS.getBasicStatistics()); - assertThat(metastore.getPartitionStatisticsStats().getRequestCount()).isEqualTo(3); - assertThat(metastore.getPartitionStatisticsStats().getHitRate()).isEqualTo(2.0 / 3); + assertThatThrownBy(() -> metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION1, OptionalLong.empty()), ImmutableSet.of())) + .isInstanceOf(IllegalArgumentException.class); + assertThat(metastore.getPartitionStatisticsStats().getRequestCount()).isEqualTo(2); + assertThat(metastore.getPartitionStatisticsStats().getHitRate()).isEqualTo(0.5); mockClient.mockPartitionColumnStats(TEST_DATABASE, TEST_TABLE, TEST_PARTITION1, ImmutableMap.of( "col1", ColumnStatisticsData.longStats(new LongColumnStatsData().setNumNulls(1)), "col2", ColumnStatisticsData.longStats(new LongColumnStatsData().setNumNulls(2)), "col3", ColumnStatisticsData.longStats(new LongColumnStatsData().setNumNulls(3)))); - Table tableCol1 = Table.builder(table).setDataColumns(ImmutableList.of(new Column("col1", HIVE_LONG, Optional.empty(), Map.of()))).build(); - Map tableCol1PartitionStatistics = metastore.getPartitionStatistics(tableCol1, ImmutableList.of(partition)); + var tableCol1PartitionStatistics = metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(partitionName, OptionalLong.empty()), ImmutableSet.of("col1")); assertThat(tableCol1PartitionStatistics).containsOnlyKeys(partitionName); - assertThat(tableCol1PartitionStatistics.get(partitionName).getColumnStatistics()).containsEntry("col1", intColumnStats(1)); - Table tableCol2 = Table.builder(table).setDataColumns(ImmutableList.of(new Column("col2", HIVE_LONG, Optional.empty(), Map.of()))).build(); - Map tableCol2PartitionStatistics = metastore.getPartitionStatistics(tableCol2, ImmutableList.of(partition)); + assertThat(tableCol1PartitionStatistics.get(partitionName)).containsEntry("col1", intColumnStats(1)); + var tableCol2PartitionStatistics = metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(partitionName, OptionalLong.empty()), ImmutableSet.of("col2")); assertThat(tableCol2PartitionStatistics).containsOnlyKeys(partitionName); - assertThat(tableCol2PartitionStatistics.get(partitionName).getColumnStatistics()).containsEntry("col2", intColumnStats(2)); - Table tableCol23 = Table.builder(table) - .setDataColumns(ImmutableList.of(new Column("col2", HIVE_LONG, Optional.empty(), Map.of()), new Column("col3", HIVE_LONG, Optional.empty(), Map.of()))) - .build(); - Map tableCol23PartitionStatistics = metastore.getPartitionStatistics(tableCol23, ImmutableList.of(partition)); + assertThat(tableCol2PartitionStatistics.get(partitionName)).containsEntry("col2", intColumnStats(2)); + var tableCol23PartitionStatistics = metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(partitionName, OptionalLong.empty()), ImmutableSet.of("col2", "col3")); assertThat(tableCol23PartitionStatistics).containsOnlyKeys(partitionName); - assertThat(tableCol23PartitionStatistics.get(partitionName).getColumnStatistics()) + assertThat(tableCol23PartitionStatistics.get(partitionName)) .containsEntry("col2", intColumnStats(2)) .containsEntry("col3", intColumnStats(3)); @@ -765,19 +747,23 @@ public void testGetPartitionStatistics() "col2", ColumnStatisticsData.longStats(new LongColumnStatsData().setNumNulls(32)), "col3", ColumnStatisticsData.longStats(new LongColumnStatsData().setNumNulls(33)))); - Map tableCol2Partition2Statistics = metastore.getPartitionStatistics(tableCol2, ImmutableList.of(partition2)); + var tableCol2Partition2Statistics = metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(partition2Name, OptionalLong.empty()), ImmutableSet.of("col2")); assertThat(tableCol2Partition2Statistics).containsOnlyKeys(partition2Name); - assertThat(tableCol2Partition2Statistics.get(partition2Name).getColumnStatistics()).containsEntry("col2", intColumnStats(22)); + assertThat(tableCol2Partition2Statistics.get(partition2Name)).containsEntry("col2", intColumnStats(22)); - Map tableCol23Partition123Statistics = metastore.getPartitionStatistics(tableCol23, ImmutableList.of(partition, partition2, partition3)); + var tableCol23Partition123Statistics = metastore.getPartitionColumnStatistics( + TEST_DATABASE, + TEST_TABLE, + ImmutableMap.of(partitionName, OptionalLong.empty(), partition2Name, OptionalLong.empty(), partition3Name, OptionalLong.empty()), + ImmutableSet.of("col2", "col3")); assertThat(tableCol23Partition123Statistics).containsOnlyKeys(partitionName, partition2Name, partition3Name); - assertThat(tableCol23Partition123Statistics.get(partitionName).getColumnStatistics()) + assertThat(tableCol23Partition123Statistics.get(partitionName)) .containsEntry("col2", intColumnStats(2)) .containsEntry("col3", intColumnStats(3)); - assertThat(tableCol23Partition123Statistics.get(partition2Name).getColumnStatistics()) + assertThat(tableCol23Partition123Statistics.get(partition2Name)) .containsEntry("col2", intColumnStats(22)) .containsEntry("col3", intColumnStats(23)); - assertThat(tableCol23Partition123Statistics.get(partition3Name).getColumnStatistics()) + assertThat(tableCol23Partition123Statistics.get(partition3Name)) .containsEntry("col2", intColumnStats(32)) .containsEntry("col3", intColumnStats(33)); } @@ -790,19 +776,21 @@ public void testGetPartitionStatisticsWithEmptyColumnStats() Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); assertThat(mockClient.getAccessCount()).isEqualTo(1); - Partition partition = metastore.getPartition(table, TEST_PARTITION_VALUES2).orElseThrow(); + assertThat(metastore.getPartition(table, TEST_PARTITION_VALUES2)).isPresent(); assertThat(mockClient.getAccessCount()).isEqualTo(2); // TEST_PARTITION2 does not have column statistics available PartitionStatistics expectedStats = PartitionStatistics.builder() - .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) + .setBasicStatistics(TEST_BASIC_STATS) .setColumnStatistics(ImmutableMap.of()) .build(); - assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats)); + assertThat(metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION2, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats.getColumnStatistics())); assertThat(mockClient.getAccessCount()).isEqualTo(3); // Absence of column statistics should get cached and metastore client access count should stay the same - assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats)); + assertThat(metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION2, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats.getColumnStatistics())); assertThat(mockClient.getAccessCount()).isEqualTo(3); } @@ -816,19 +804,21 @@ public void testGetPartitionStatisticsWithEmptyColumnStatsWithNoCacheMissing() Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); assertThat(mockClient.getAccessCount()).isEqualTo(1); - Partition partition = metastore.getPartition(table, TEST_PARTITION_VALUES2).orElseThrow(); + assertThat(metastore.getPartition(table, TEST_PARTITION_VALUES2)).isPresent(); assertThat(mockClient.getAccessCount()).isEqualTo(2); // TEST_PARTITION2 does not have column statistics available PartitionStatistics expectedStats = PartitionStatistics.builder() - .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) + .setBasicStatistics(TEST_BASIC_STATS) .setColumnStatistics(ImmutableMap.of()) .build(); - assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats)); + assertThat(metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION2, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats.getColumnStatistics())); assertThat(mockClient.getAccessCount()).isEqualTo(3); // Absence of column statistics does not get cached and metastore client access count increases - assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats)); + assertThat(metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION2, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats.getColumnStatistics())); assertThat(mockClient.getAccessCount()).isEqualTo(4); } @@ -840,13 +830,15 @@ public void testGetPartitionStatisticsWithoutMetadataCache() Table table = statsOnlyCacheMetastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); assertThat(mockClient.getAccessCount()).isEqualTo(1); - Partition partition = statsOnlyCacheMetastore.getPartition(table, TEST_PARTITION_VALUES1).orElseThrow(); + assertThat(statsOnlyCacheMetastore.getPartition(table, TEST_PARTITION_VALUES1)).isPresent(); assertThat(mockClient.getAccessCount()).isEqualTo(2); - assertThat(statsOnlyCacheMetastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_STATS)); + assertThat(statsOnlyCacheMetastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION1, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_COLUMN_STATS)); assertThat(mockClient.getAccessCount()).isEqualTo(3); - assertThat(statsOnlyCacheMetastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_STATS)); + assertThat(statsOnlyCacheMetastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION1, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_COLUMN_STATS)); assertThat(mockClient.getAccessCount()).isEqualTo(3); assertThat(statsOnlyCacheMetastore.getPartitionStatisticsStats().getRequestCount()).isEqualTo(2); @@ -887,7 +879,7 @@ public Map> getPartitionColumnStatistics(Strin Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); - Partition partition = metastore.getPartition(table, TEST_PARTITION_VALUES1).orElseThrow(); + assertThat(metastore.getPartition(table, TEST_PARTITION_VALUES1)).isPresent(); ExecutorService executorService = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setNameFormat("invalidation-%d").build()); try { @@ -905,10 +897,12 @@ public Map> getPartitionColumnStatistics(Strin }); // start get stats before the invalidation, it will wait until invalidation is done to finish - assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_STATS)); + assertThat(metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION1, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_COLUMN_STATS)); assertThat(mockClient.getAccessCount()).isEqualTo(3); // get stats after invalidate - assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_STATS)); + assertThat(metastore.getPartitionColumnStatistics(TEST_DATABASE, TEST_TABLE, ImmutableMap.of(TEST_PARTITION1, OptionalLong.empty()), ImmutableSet.of(TEST_COLUMN))) + .isEqualTo(ImmutableMap.of(TEST_PARTITION1, TEST_COLUMN_STATS)); // the value was not cached assertThat(mockClient.getAccessCount()).isEqualTo(4); // make sure invalidateFuture is done diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreAccessOperations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreAccessOperations.java index a850c387e0b6..da83783c021e 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreAccessOperations.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreAccessOperations.java @@ -27,10 +27,10 @@ import static io.trino.plugin.hive.metastore.MetastoreMethod.CREATE_TABLE; import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_DATABASE; import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_PARTITIONS_BY_NAMES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_PARTITION_COLUMN_STATISTICS; import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_PARTITION_NAMES_BY_FILTER; -import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_PARTITION_STATISTICS; import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE; -import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE_STATISTICS; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE_COLUMN_STATISTICS; import static io.trino.plugin.hive.metastore.MetastoreMethod.UPDATE_PARTITION_STATISTICS; import static io.trino.plugin.hive.metastore.MetastoreMethod.UPDATE_TABLE_STATISTICS; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @@ -163,7 +163,7 @@ public void testJoin() assertMetastoreInvocations("SELECT name, age FROM test_join_t1 JOIN test_join_t2 ON test_join_t2.id = test_join_t1.id", ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) - .addCopies(GET_TABLE_STATISTICS, 2) + .addCopies(GET_TABLE_COLUMN_STATISTICS, 2) .build()); } @@ -175,7 +175,7 @@ public void testSelfJoin() assertMetastoreInvocations("SELECT child.age, parent.age FROM test_self_join_table child JOIN test_self_join_table parent ON child.parent = parent.id", ImmutableMultiset.builder() .add(GET_TABLE) - .add(GET_TABLE_STATISTICS) + .addCopies(GET_TABLE_COLUMN_STATISTICS, 2) .build()); } @@ -187,7 +187,7 @@ public void testExplainSelect() assertMetastoreInvocations("EXPLAIN SELECT * FROM test_explain", ImmutableMultiset.builder() .add(GET_TABLE) - .add(GET_TABLE_STATISTICS) + .add(GET_TABLE_COLUMN_STATISTICS) .build()); } @@ -211,7 +211,7 @@ public void testShowStatsForTable() assertMetastoreInvocations("SHOW STATS FOR test_show_stats", ImmutableMultiset.builder() .add(GET_TABLE) - .add(GET_TABLE_STATISTICS) + .add(GET_TABLE_COLUMN_STATISTICS) .build()); } @@ -223,7 +223,7 @@ public void testShowStatsForTableWithFilter() assertMetastoreInvocations("SHOW STATS FOR (SELECT * FROM test_show_stats_with_filter where age >= 2)", ImmutableMultiset.builder() .add(GET_TABLE) - .add(GET_TABLE_STATISTICS) + .add(GET_TABLE_COLUMN_STATISTICS) .build()); } @@ -249,7 +249,7 @@ public void testAnalyzePartitionedTable() .addCopies(GET_TABLE, 1) .add(GET_PARTITION_NAMES_BY_FILTER) .add(GET_PARTITIONS_BY_NAMES) - .add(GET_PARTITION_STATISTICS) + .add(GET_PARTITION_COLUMN_STATISTICS) .add(UPDATE_PARTITION_STATISTICS) .build()); @@ -260,7 +260,7 @@ public void testAnalyzePartitionedTable() .add(GET_TABLE) .add(GET_PARTITION_NAMES_BY_FILTER) .add(GET_PARTITIONS_BY_NAMES) - .add(GET_PARTITION_STATISTICS) + .add(GET_PARTITION_COLUMN_STATISTICS) .add(UPDATE_PARTITION_STATISTICS) .build()); }