diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/BackgroundHiveSplitLoader.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/BackgroundHiveSplitLoader.java index 8719d4d03969..4a1bcc113c26 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/BackgroundHiveSplitLoader.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/BackgroundHiveSplitLoader.java @@ -80,6 +80,7 @@ import static io.prestosql.plugin.hive.HiveUtil.getInputFormat; import static io.prestosql.plugin.hive.S3SelectPushdown.shouldEnablePushdownForTable; import static io.prestosql.plugin.hive.metastore.MetastoreUtil.getHiveSchema; +import static io.prestosql.plugin.hive.metastore.MetastoreUtil.getPartitionLocation; import static io.prestosql.plugin.hive.util.ConfigurationUtils.toJobConf; import static io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryPolicy.FAIL; import static io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryPolicy.IGNORED; @@ -531,14 +532,6 @@ private static Properties getPartitionSchema(Table table, Optional pa return getHiveSchema(partition.get(), table); } - private static String getPartitionLocation(Table table, Optional partition) - { - if (!partition.isPresent()) { - return table.getStorage().getLocation(); - } - return partition.get().getStorage().getLocation(); - } - public static class BucketSplitInfo { private final List bucketColumns; diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveAnalyzeProperties.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveAnalyzeProperties.java new file mode 100644 index 000000000000..66ef0cb2a41b --- /dev/null +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveAnalyzeProperties.java @@ -0,0 +1,89 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.hive; + +import com.google.common.collect.ImmutableList; +import io.prestosql.spi.PrestoException; +import io.prestosql.spi.session.PropertyMetadata; +import io.prestosql.spi.type.TypeManager; + +import javax.inject.Inject; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.MoreObjects.firstNonNull; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static io.prestosql.plugin.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION; +import static io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY; +import static io.prestosql.spi.type.TypeSignature.parseTypeSignature; + +public class HiveAnalyzeProperties +{ + public static final String PARTITIONS_PROPERTY = "partitions"; + + private final List> analyzeProperties; + + @Inject + public HiveAnalyzeProperties(TypeManager typeManager) + { + analyzeProperties = ImmutableList.of( + new PropertyMetadata<>( + PARTITIONS_PROPERTY, + "Partitions to be analyzed", + typeManager.getType(parseTypeSignature("array(array(varchar))")), + List.class, + null, + false, + HiveAnalyzeProperties::decodePartitionLists, + value -> value)); + } + + public List> getAnalyzeProperties() + { + return analyzeProperties; + } + + @SuppressWarnings("unchecked") + public static Optional>> getPartitionList(Map properties) + { + List> partitions = (List>) properties.get(PARTITIONS_PROPERTY); + return partitions == null ? Optional.empty() : Optional.of(partitions); + } + + private static List> decodePartitionLists(Object object) + { + if (object == null) { + return null; + } + + // replace null partition value with hive default partition + return ImmutableList.copyOf(((Collection) object).stream() + .peek(HiveAnalyzeProperties::throwIfNull) + .map(partition -> ((Collection) partition).stream() + .map(name -> firstNonNull((String) name, HIVE_DEFAULT_DYNAMIC_PARTITION)) + .collect(toImmutableList())) + .collect(toImmutableSet())); + } + + private static void throwIfNull(Object object) + { + if (object == null) { + throw new PrestoException(INVALID_ANALYZE_PROPERTY, "Invalid null value in analyze partitions property"); + } + } +} diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveClientModule.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveClientModule.java index 1e9d8a231df8..3e4b4f93d470 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveClientModule.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveClientModule.java @@ -70,6 +70,7 @@ public void configure(Binder binder) binder.bind(HiveSessionProperties.class).in(Scopes.SINGLETON); binder.bind(HiveTableProperties.class).in(Scopes.SINGLETON); + binder.bind(HiveAnalyzeProperties.class).in(Scopes.SINGLETON); binder.bind(NamenodeStats.class).in(Scopes.SINGLETON); newExporter(binder).export(NamenodeStats.class).withGeneratedName(); diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConnector.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConnector.java index fbf3169a9ffc..2a5bab798077 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConnector.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConnector.java @@ -57,6 +57,8 @@ public class HiveConnector private final List> sessionProperties; private final List> schemaProperties; private final List> tableProperties; + private final List> analyzeProperties; + private final ConnectorAccessControl accessControl; private final ClassLoader classLoader; @@ -75,6 +77,7 @@ public HiveConnector( List> sessionProperties, List> schemaProperties, List> tableProperties, + List> analyzeProperties, ConnectorAccessControl accessControl, ClassLoader classLoader) { @@ -90,6 +93,7 @@ public HiveConnector( this.sessionProperties = ImmutableList.copyOf(requireNonNull(sessionProperties, "sessionProperties is null")); this.schemaProperties = ImmutableList.copyOf(requireNonNull(schemaProperties, "schemaProperties is null")); this.tableProperties = ImmutableList.copyOf(requireNonNull(tableProperties, "tableProperties is null")); + this.analyzeProperties = ImmutableList.copyOf(requireNonNull(analyzeProperties, "analyzeProperties is null")); this.accessControl = requireNonNull(accessControl, "accessControl is null"); this.classLoader = requireNonNull(classLoader, "classLoader is null"); } @@ -150,6 +154,12 @@ public List> getSchemaProperties() return schemaProperties; } + @Override + public List> getAnalyzeProperties() + { + return analyzeProperties; + } + @Override public List> getTableProperties() { diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConnectorFactory.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConnectorFactory.java index d24abfa3eadd..0ca03b6643d9 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConnectorFactory.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveConnectorFactory.java @@ -129,6 +129,7 @@ public Connector create(String catalogName, Map config, Connecto ConnectorNodePartitioningProvider connectorDistributionProvider = injector.getInstance(ConnectorNodePartitioningProvider.class); HiveSessionProperties hiveSessionProperties = injector.getInstance(HiveSessionProperties.class); HiveTableProperties hiveTableProperties = injector.getInstance(HiveTableProperties.class); + HiveAnalyzeProperties hiveAnalyzeProperties = injector.getInstance(HiveAnalyzeProperties.class); ConnectorAccessControl accessControl = new PartitionsAwareAccessControl(injector.getInstance(ConnectorAccessControl.class)); Set procedures = injector.getInstance(Key.get(new TypeLiteral>() {})); @@ -145,6 +146,7 @@ public Connector create(String catalogName, Map config, Connecto hiveSessionProperties.getSessionProperties(), HiveSchemaProperties.SCHEMA_PROPERTIES, hiveTableProperties.getTableProperties(), + hiveAnalyzeProperties.getAnalyzeProperties(), accessControl, classLoader); } diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveErrorCode.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveErrorCode.java index e02f1669c130..190b460915c6 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveErrorCode.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveErrorCode.java @@ -63,6 +63,7 @@ public enum HiveErrorCode // HIVE_TOO_MANY_BUCKET_SORT_FILES(36) is deprecated HIVE_CORRUPTED_COLUMN_STATISTICS(37, EXTERNAL), HIVE_EXCEEDED_SPLIT_BUFFERING_LIMIT(38, USER_ERROR), + HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE(39, INTERNAL_ERROR), /**/; private final ErrorCode errorCode; diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java index e9c051ebaa88..0adf0d3b96c9 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java @@ -16,7 +16,9 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Splitter; +import com.google.common.base.Suppliers; import com.google.common.base.Verify; +import com.google.common.base.VerifyException; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap.Builder; @@ -81,6 +83,7 @@ import io.prestosql.spi.statistics.ColumnStatisticMetadata; import io.prestosql.spi.statistics.ColumnStatisticType; import io.prestosql.spi.statistics.ComputedStatistics; +import io.prestosql.spi.statistics.TableStatisticType; import io.prestosql.spi.statistics.TableStatistics; import io.prestosql.spi.statistics.TableStatisticsMetadata; import io.prestosql.spi.type.Type; @@ -108,6 +111,7 @@ import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -119,6 +123,7 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.Iterables.concat; import static com.google.common.collect.Streams.stream; +import static io.prestosql.plugin.hive.HiveAnalyzeProperties.getPartitionList; import static io.prestosql.plugin.hive.HiveBasicStatistics.createEmptyStatistics; import static io.prestosql.plugin.hive.HiveBasicStatistics.createZeroStatistics; import static io.prestosql.plugin.hive.HiveBucketing.getHiveBucketHandle; @@ -188,15 +193,19 @@ import static io.prestosql.plugin.hive.util.ConfigurationUtils.toJobConf; import static io.prestosql.plugin.hive.util.Statistics.ReduceOperator.ADD; import static io.prestosql.plugin.hive.util.Statistics.createComputedStatisticsToPartitionMap; +import static io.prestosql.plugin.hive.util.Statistics.createEmptyPartitionStatistics; import static io.prestosql.plugin.hive.util.Statistics.fromComputedStatistics; import static io.prestosql.plugin.hive.util.Statistics.reduce; import static io.prestosql.spi.StandardErrorCode.ALREADY_EXISTS; +import static io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY; import static io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY; import static io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY; import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED; import static io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY; import static io.prestosql.spi.predicate.TupleDomain.withColumnDomains; import static io.prestosql.spi.security.PrincipalType.USER; +import static io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT; +import static io.prestosql.spi.type.BigintType.BIGINT; import static java.lang.String.format; import static java.util.Collections.emptyList; import static java.util.Objects.requireNonNull; @@ -302,6 +311,25 @@ public HiveTableHandle getTableHandle(ConnectorSession session, SchemaTableName return new HiveTableHandle(tableName.getSchemaName(), tableName.getTableName()); } + @Override + public ConnectorTableHandle getTableHandleForStatisticsCollection(ConnectorSession session, SchemaTableName tableName, Map analyzeProperties) + { + HiveTableHandle handle = getTableHandle(session, tableName); + if (handle == null) { + return null; + } + Optional>> partitionValuesList = getPartitionList(analyzeProperties); + ConnectorTableMetadata tableMetadata = getTableMetadata(handle.getSchemaTableName()); + handle = handle.withAnalyzePartitionValues(partitionValuesList); + + List partitionedBy = getPartitionedBy(tableMetadata.getProperties()); + + if (partitionValuesList.isPresent() && partitionedBy.isEmpty()) { + throw new PrestoException(INVALID_ANALYZE_PROPERTY, "Only partitioned table can be analyzed with a partition list"); + } + return handle; + } + @Override public Optional getSystemTable(ConnectorSession session, SchemaTableName tableName) { @@ -913,6 +941,77 @@ public void dropTable(ConnectorSession session, ConnectorTableHandle tableHandle metastore.dropTable(session, handle.getSchemaName(), handle.getTableName()); } + @Override + public ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle) + { + verifyJvmTimeZone(); + HiveTableHandle handle = (HiveTableHandle) tableHandle; + SchemaTableName tableName = handle.getSchemaTableName(); + + metastore.getTable(tableName.getSchemaName(), tableName.getTableName()) + .orElseThrow(() -> new TableNotFoundException(tableName)); + return handle; + } + + @Override + public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection computedStatistics) + { + HiveTableHandle handle = (HiveTableHandle) tableHandle; + SchemaTableName tableName = handle.getSchemaTableName(); + Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()) + .orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName())); + + List partitionColumns = table.getPartitionColumns(); + List partitionColumnNames = partitionColumns.stream() + .map(Column::getName) + .collect(toImmutableList()); + List hiveColumnHandles = hiveColumnHandles(table); + Map columnTypes = hiveColumnHandles.stream() + .filter(columnHandle -> !columnHandle.isHidden()) + .collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager))); + + Map, ComputedStatistics> computedStatisticsMap = createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes); + + if (partitionColumns.isEmpty()) { + // commit analyze to unpartitioned table + metastore.setTableStatistics(table, createPartitionStatistics(session, columnTypes, computedStatisticsMap.get(ImmutableList.of()))); + } + else { + List> partitionValuesList; + if (handle.getAnalyzePartitionValues().isPresent()) { + partitionValuesList = handle.getAnalyzePartitionValues().get(); + } + else { + partitionValuesList = metastore.getPartitionNames(handle.getSchemaName(), handle.getTableName()) + .orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())) + .stream() + .map(HiveUtil::toPartitionValues) + .collect(toImmutableList()); + } + + ImmutableMap.Builder, PartitionStatistics> partitionStatistics = ImmutableMap.builder(); + Map> columnStatisticTypes = hiveColumnHandles.stream() + .filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())) + .filter(column -> !column.isHidden()) + .collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(typeManager.getType(column.getTypeSignature()))))); + Supplier emptyPartitionStatistics = Suppliers.memoize(() -> createEmptyPartitionStatistics(columnTypes, columnStatisticTypes)); + + int usedComputedStatistics = 0; + for (List partitionValues : partitionValuesList) { + ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues); + if (collectedStatistics == null) { + partitionStatistics.put(partitionValues, emptyPartitionStatistics.get()); + } + else { + usedComputedStatistics++; + partitionStatistics.put(partitionValues, createPartitionStatistics(session, columnTypes, collectedStatistics)); + } + } + verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used"); + metastore.setPartitionStatistics(table, partitionStatistics.build()); + } + } + @Override public HiveOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional layout) { @@ -1019,7 +1118,7 @@ public Optional finishCreateTable(ConnectorSession sess .map(PartitionUpdate::getStatistics) .reduce((first, second) -> reduce(first, second, ADD)) .orElse(createZeroStatistics()); - tableStatistics = createPartitionStatistics(session, basicStatistics, ImmutableList.of(), columnTypes, partitionComputedStatistics); + tableStatistics = createPartitionStatistics(session, basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of())); } else { tableStatistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()); @@ -1033,7 +1132,11 @@ public Optional finishCreateTable(ConnectorSession sess } for (PartitionUpdate update : partitionUpdates) { Partition partition = buildPartitionObject(session, table, update); - PartitionStatistics partitionStatistics = createPartitionStatistics(session, update.getStatistics(), partition.getValues(), columnTypes, partitionComputedStatistics); + PartitionStatistics partitionStatistics = createPartitionStatistics( + session, + update.getStatistics(), + columnTypes, + getColumnStatistics(partitionComputedStatistics, partition.getValues())); metastore.addPartition( session, handle.getSchemaName(), @@ -1228,19 +1331,36 @@ public Optional finishInsert(ConnectorSession session, Map, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes); for (PartitionUpdate partitionUpdate : partitionUpdates) { + if (partitionUpdate.getFileNames().size() == 0) { + HiveWriteUtils.createDirectory( + new HdfsContext(session, table.get().getDatabaseName(), table.get().getTableName()), + hdfsEnvironment, + partitionUpdate.getWritePath()); + } + if (partitionUpdate.getName().isEmpty()) { // insert into unpartitioned table + PartitionStatistics partitionStatistics = createPartitionStatistics( + session, + partitionUpdate.getStatistics(), + columnTypes, + getColumnStatistics(partitionComputedStatistics, ImmutableList.of())); metastore.finishInsertIntoExistingTable( session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), - createPartitionStatistics(session, partitionUpdate.getStatistics(), ImmutableList.of(), columnTypes, partitionComputedStatistics)); + partitionStatistics); } else if (partitionUpdate.getUpdateMode() == APPEND) { // insert into existing partition List partitionValues = toPartitionValues(partitionUpdate.getName()); + PartitionStatistics partitionStatistics = createPartitionStatistics( + session, + partitionUpdate.getStatistics(), + columnTypes, + getColumnStatistics(partitionComputedStatistics, partitionValues)); metastore.finishInsertIntoExistingPartition( session, handle.getSchemaName(), @@ -1248,7 +1368,7 @@ else if (partitionUpdate.getUpdateMode() == APPEND) { partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), - createPartitionStatistics(session, partitionUpdate.getStatistics(), partitionValues, columnTypes, partitionComputedStatistics)); + partitionStatistics); } else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) { // insert into new partition or overwrite existing partition @@ -1259,13 +1379,12 @@ else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode if (partitionUpdate.getUpdateMode() == OVERWRITE) { metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), partition.getValues()); } - metastore.addPartition( + PartitionStatistics partitionStatistics = createPartitionStatistics( session, - handle.getSchemaName(), - handle.getTableName(), - partition, - partitionUpdate.getWritePath(), - createPartitionStatistics(session, partitionUpdate.getStatistics(), partition.getValues(), columnTypes, partitionComputedStatistics)); + partitionUpdate.getStatistics(), + columnTypes, + getColumnStatistics(partitionComputedStatistics, partition.getValues())); + metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), partitionStatistics); } else { throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode())); @@ -1299,16 +1418,27 @@ private Partition buildPartitionObject(ConnectorSession session, Table table, Pa .build(); } + private PartitionStatistics createPartitionStatistics( + ConnectorSession session, + Map columnTypes, + ComputedStatistics computedStatistics) + { + Map computedColumnStatistics = computedStatistics.getColumnStatistics(); + + Block rowCountBlock = Optional.ofNullable(computedStatistics.getTableStatistics().get(ROW_COUNT)) + .orElseThrow(() -> new VerifyException("rowCount not present")); + verify(!rowCountBlock.isNull(0), "rowCount must never be null"); + long rowCount = BIGINT.getLong(rowCountBlock, 0); + HiveBasicStatistics rowCountOnlyBasicStatistics = new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(rowCount), OptionalLong.empty(), OptionalLong.empty()); + return createPartitionStatistics(session, rowCountOnlyBasicStatistics, columnTypes, computedColumnStatistics); + } + private PartitionStatistics createPartitionStatistics( ConnectorSession session, HiveBasicStatistics basicStatistics, - List partitionValues, Map columnTypes, - Map, ComputedStatistics> partitionComputedStatistics) + Map computedColumnStatistics) { - Map computedColumnStatistics = Optional.ofNullable(partitionComputedStatistics.get(partitionValues)) - .map(ComputedStatistics::getColumnStatistics) - .orElse(ImmutableMap.of()); long rowCount = basicStatistics.getRowCount().orElseThrow(() -> new IllegalArgumentException("rowCount not present")); Map columnStatistics = fromComputedStatistics( session, @@ -1319,6 +1449,13 @@ private PartitionStatistics createPartitionStatistics( return new PartitionStatistics(basicStatistics, columnStatistics); } + private Map getColumnStatistics(Map, ComputedStatistics> partitionComputedStatistics, List partitionValues) + { + return Optional.ofNullable(partitionComputedStatistics.get(partitionValues)) + .map(ComputedStatistics::getColumnStatistics) + .orElse(ImmutableMap.of()); + } + @Override public void createView(ConnectorSession session, SchemaTableName viewName, String viewData, boolean replace) { @@ -1483,7 +1620,14 @@ public boolean supportsMetadataDelete(ConnectorSession session, ConnectorTableHa public List getTableLayouts(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint, Optional> desiredColumns) { HiveTableHandle handle = (HiveTableHandle) tableHandle; - HivePartitionResult hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, constraint); + HivePartitionResult hivePartitionResult; + if (handle.getAnalyzePartitionValues().isPresent()) { + verify(constraint.getSummary().isAll(), "There shouldn't be any constraint for ANALYZE operation"); + hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, handle.getAnalyzePartitionValues().get()); + } + else { + hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, constraint); + } return ImmutableList.of(new ConnectorTableLayoutResult( getTableLayout( @@ -1731,16 +1875,23 @@ public Optional getNewTableLayout(ConnectorSession sess } @Override - public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) + public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(ConnectorSession session, ConnectorTableMetadata tableMetadata) { if (!isCollectColumnStatisticsOnWrite(session)) { return TableStatisticsMetadata.empty(); } List partitionedBy = firstNonNull(getPartitionedBy(tableMetadata.getProperties()), ImmutableList.of()); - return getStatisticsCollectionMetadata(tableMetadata.getColumns(), partitionedBy); + return getStatisticsCollectionMetadata(tableMetadata.getColumns(), partitionedBy, false); } - private TableStatisticsMetadata getStatisticsCollectionMetadata(List columns, List partitionedBy) + @Override + public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + List partitionedBy = firstNonNull(getPartitionedBy(tableMetadata.getProperties()), ImmutableList.of()); + return getStatisticsCollectionMetadata(tableMetadata.getColumns(), partitionedBy, true); + } + + private TableStatisticsMetadata getStatisticsCollectionMetadata(List columns, List partitionedBy, boolean includeRowCount) { Set columnStatistics = columns.stream() .filter(column -> !partitionedBy.contains(column.getName())) @@ -1748,7 +1899,9 @@ private TableStatisticsMetadata getStatisticsCollectionMetadata(List tableStatistics = includeRowCount ? ImmutableSet.of(ROW_COUNT) : ImmutableSet.of(); + return new TableStatisticsMetadata(columnStatistics, tableStatistics, partitionedBy); } private List getColumnStatisticMetadata(ColumnMetadata columnMetadata) diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HivePartitionManager.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HivePartitionManager.java index 1fe5e6a3695a..d503e933f2d2 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HivePartitionManager.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HivePartitionManager.java @@ -14,6 +14,7 @@ package io.prestosql.plugin.hive; import com.google.common.base.Predicates; +import com.google.common.base.VerifyException; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; @@ -61,13 +62,18 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Predicates.not; +import static com.google.common.collect.ImmutableList.toImmutableList; import static io.prestosql.plugin.hive.HiveBucketing.getHiveBucketFilter; import static io.prestosql.plugin.hive.HiveBucketing.getHiveBucketHandle; import static io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles; import static io.prestosql.plugin.hive.HiveUtil.parsePartitionValue; import static io.prestosql.plugin.hive.metastore.MetastoreUtil.getProtectMode; +import static io.prestosql.plugin.hive.metastore.MetastoreUtil.makePartName; import static io.prestosql.plugin.hive.metastore.MetastoreUtil.verifyOnline; import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED; +import static io.prestosql.spi.connector.Constraint.alwaysTrue; +import static io.prestosql.spi.predicate.TupleDomain.all; +import static io.prestosql.spi.predicate.TupleDomain.none; import static io.prestosql.spi.type.Chars.padSpaces; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -119,7 +125,7 @@ public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastor List partitionColumns = getPartitionKeyColumnHandles(table); if (effectivePredicate.isNone()) { - return new HivePartitionResult(partitionColumns, ImmutableList.of(), TupleDomain.none(), TupleDomain.none(), TupleDomain.none(), hiveBucketHandle, Optional.empty()); + return new HivePartitionResult(partitionColumns, ImmutableList.of(), none(), none(), none(), hiveBucketHandle, Optional.empty()); } Optional bucketFilter = getHiveBucketFilter(table, effectivePredicate); @@ -131,7 +137,7 @@ public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastor ImmutableList.of(new HivePartition(tableName)), compactEffectivePredicate, effectivePredicate, - TupleDomain.none(), + none(), hiveBucketHandle, bucketFilter); } @@ -155,6 +161,27 @@ public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastor return new HivePartitionResult(partitionColumns, partitionsIterable, compactEffectivePredicate, remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle, bucketFilter); } + public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, List> partitionValuesList) + { + HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle; + SchemaTableName tableName = hiveTableHandle.getSchemaTableName(); + + Table table = getTable(metastore, tableName); + + List partitionColumns = getPartitionKeyColumnHandles(table); + List partitionColumnTypes = partitionColumns.stream() + .map(column -> typeManager.getType(column.getTypeSignature())) + .collect(toImmutableList()); + + List partitionList = partitionValuesList.stream() + .map(partitionValues -> makePartName(table.getPartitionColumns(), partitionValues)) + .map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionColumnTypes, alwaysTrue())) + .map(partition -> partition.orElseThrow(() -> new VerifyException("partition must exist"))) + .collect(toImmutableList()); + + return new HivePartitionResult(partitionColumns, partitionList, all(), all(), none(), getHiveBucketHandle(table), Optional.empty()); + } + private static TupleDomain toCompactTupleDomain(TupleDomain effectivePredicate, int threshold) { ImmutableMap.Builder builder = ImmutableMap.builder(); diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveTableHandle.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveTableHandle.java index 17856dbc025a..8004bbdcc3d3 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveTableHandle.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveTableHandle.java @@ -18,8 +18,11 @@ import io.prestosql.spi.connector.ConnectorTableHandle; import io.prestosql.spi.connector.SchemaTableName; +import java.util.List; import java.util.Objects; +import java.util.Optional; +import static com.google.common.base.MoreObjects.toStringHelper; import static java.util.Objects.requireNonNull; public class HiveTableHandle @@ -28,13 +31,27 @@ public class HiveTableHandle private final String schemaName; private final String tableName; + private final Optional>> analyzePartitionValues; + @JsonCreator public HiveTableHandle( @JsonProperty("schemaName") String schemaName, - @JsonProperty("tableName") String tableName) + @JsonProperty("tableName") String tableName, + @JsonProperty("analyzePartitionValues") Optional>> analyzePartitionValues) { this.schemaName = requireNonNull(schemaName, "schemaName is null"); this.tableName = requireNonNull(tableName, "tableName is null"); + this.analyzePartitionValues = requireNonNull(analyzePartitionValues, "analyzePartitionValues is null"); + } + + public HiveTableHandle(String schemaName, String tableName) + { + this(schemaName, tableName, Optional.empty()); + } + + public HiveTableHandle withAnalyzePartitionValues(Optional>> analyzePartitionValues) + { + return new HiveTableHandle(schemaName, tableName, analyzePartitionValues); } @JsonProperty @@ -49,34 +66,45 @@ public String getTableName() return tableName; } - public SchemaTableName getSchemaTableName() + @JsonProperty + public Optional>> getAnalyzePartitionValues() { - return new SchemaTableName(schemaName, tableName); + return analyzePartitionValues; } - @Override - public int hashCode() + public SchemaTableName getSchemaTableName() { - return Objects.hash(schemaName, tableName); + return new SchemaTableName(schemaName, tableName); } @Override - public boolean equals(Object obj) + public boolean equals(Object o) { - if (this == obj) { + if (this == o) { return true; } - if (obj == null || getClass() != obj.getClass()) { + if (o == null || getClass() != o.getClass()) { return false; } - HiveTableHandle other = (HiveTableHandle) obj; - return Objects.equals(this.schemaName, other.schemaName) && - Objects.equals(this.tableName, other.tableName); + HiveTableHandle that = (HiveTableHandle) o; + return Objects.equals(schemaName, that.schemaName) && + Objects.equals(tableName, that.tableName) && + Objects.equals(analyzePartitionValues, that.analyzePartitionValues); + } + + @Override + public int hashCode() + { + return Objects.hash(schemaName, tableName, analyzePartitionValues); } @Override public String toString() { - return schemaName + ":" + tableName; + return toStringHelper(this) + .add("schemaName", schemaName) + .add("tableName", tableName) + .add("analyzePartitionValues", analyzePartitionValues) + .toString(); } } diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/MetastoreUtil.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/MetastoreUtil.java index e8702007b7f0..da539a14fbea 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/MetastoreUtil.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/MetastoreUtil.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Properties; @@ -177,11 +178,21 @@ public static ProtectMode getProtectMode(Table table) public static String makePartName(List partitionColumns, List values) { - checkArgument(partitionColumns.size() == values.size()); + checkArgument(partitionColumns.size() == values.size(), "Partition value count does not match the partition column count"); + checkArgument(values.stream().allMatch(Objects::nonNull), "partitionValue must not have null elements"); + List partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toList()); return FileUtils.makePartName(partitionColumnNames, values); } + public static String getPartitionLocation(Table table, Optional partition) + { + if (!partition.isPresent()) { + return table.getStorage().getLocation(); + } + return partition.get().getStorage().getLocation(); + } + private static String toThriftDdl(String structName, List columns) { // Mimics function in Hive: diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/SemiTransactionalHiveMetastore.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/SemiTransactionalHiveMetastore.java index c71842d6e92a..826f51a4b46e 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/SemiTransactionalHiveMetastore.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/SemiTransactionalHiveMetastore.java @@ -24,6 +24,7 @@ import io.airlift.log.Logger; import io.prestosql.plugin.hive.HdfsEnvironment; import io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext; +import io.prestosql.plugin.hive.HiveBasicStatistics; import io.prestosql.plugin.hive.HiveType; import io.prestosql.plugin.hive.LocationHandle.WriteMode; import io.prestosql.plugin.hive.PartitionNotFoundException; @@ -54,6 +55,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalLong; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; @@ -299,6 +301,45 @@ public synchronized void renameDatabase(String source, String target) setExclusive((delegate, hdfsEnvironment) -> delegate.renameDatabase(source, target)); } + // TODO: Allow updating statistics for 2 tables in the same transaction + public synchronized void setTableStatistics(Table table, PartitionStatistics tableStatistics) + { + setExclusive((delegate, hdfsEnvironment) -> + delegate.updateTableStatistics(table.getDatabaseName(), table.getTableName(), statistics -> updatePartitionStatistics(statistics, tableStatistics))); + } + + // TODO: Allow updating statistics for 2 tables in the same transaction + public synchronized void setPartitionStatistics(Table table, Map, PartitionStatistics> partitionStatisticsMap) + { + setExclusive((delegate, hdfsEnvironment) -> + partitionStatisticsMap.forEach((partitionValues, newPartitionStats) -> + delegate.updatePartitionStatistics( + table.getDatabaseName(), + table.getTableName(), + getPartitionName(table, partitionValues), + oldPartitionStats -> updatePartitionStatistics(oldPartitionStats, newPartitionStats)))); + } + + // For HiveBasicStatistics, we only overwrite the original statistics if the new one is not empty. + // For HiveColumnStatistics, we always overwrite every statistics. + // TODO: Collect file count, on-disk size and in-memory size during ANALYZE + private PartitionStatistics updatePartitionStatistics(PartitionStatistics oldPartitionStats, PartitionStatistics newPartitionStats) + { + HiveBasicStatistics oldBasicStatistics = oldPartitionStats.getBasicStatistics(); + HiveBasicStatistics newBasicStatistics = newPartitionStats.getBasicStatistics(); + HiveBasicStatistics updatedBasicStatistics = new HiveBasicStatistics( + firstPresent(newBasicStatistics.getFileCount(), oldBasicStatistics.getFileCount()), + firstPresent(newBasicStatistics.getRowCount(), oldBasicStatistics.getRowCount()), + firstPresent(newBasicStatistics.getInMemoryDataSizeInBytes(), oldBasicStatistics.getInMemoryDataSizeInBytes()), + firstPresent(newBasicStatistics.getOnDiskDataSizeInBytes(), oldBasicStatistics.getOnDiskDataSizeInBytes())); + return new PartitionStatistics(updatedBasicStatistics, newPartitionStats.getColumnStatistics()); + } + + private static OptionalLong firstPresent(OptionalLong first, OptionalLong second) + { + return first.isPresent() ? first : second; + } + /** * {@code currentLocation} needs to be supplied if a writePath exists for the table. */ @@ -731,6 +772,11 @@ private String getPartitionName(String databaseName, String tableName, List new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + return getPartitionName(table, partitionValues); + } + + private String getPartitionName(Table table, List partitionValues) + { List columnNames = table.getPartitionColumns().stream() .map(Column::getName) .collect(toImmutableList()); diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/statistics/MetastoreHiveStatisticsProvider.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/statistics/MetastoreHiveStatisticsProvider.java index ae8b3002a9e1..9031cfa400d6 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/statistics/MetastoreHiveStatisticsProvider.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/statistics/MetastoreHiveStatisticsProvider.java @@ -440,10 +440,14 @@ private static ColumnStatistics createPartitionColumnStatistics( double averageRowsPerPartition, double rowCount) { + List nonEmptyPartitions = partitions.stream() + .filter(partition -> getPartitionRowCount(partition.getPartitionId(), statistics).orElse(averageRowsPerPartition) != 0) + .collect(toImmutableList()); + return ColumnStatistics.builder() - .setDistinctValuesCount(Estimate.of(calculateDistinctPartitionKeys(column, partitions, statistics, averageRowsPerPartition))) + .setDistinctValuesCount(Estimate.of(calculateDistinctPartitionKeys(column, nonEmptyPartitions))) .setNullsFraction(Estimate.of(calculateNullsFractionForPartitioningKey(column, partitions, statistics, averageRowsPerPartition, rowCount))) - .setRange(calculateRangeForPartitioningKey(column, type, partitions)) + .setRange(calculateRangeForPartitioningKey(column, type, nonEmptyPartitions)) .setDataSize(calculateDataSizeForPartitioningKey(column, type, partitions, statistics, averageRowsPerPartition)) .build(); } @@ -451,13 +455,9 @@ private static ColumnStatistics createPartitionColumnStatistics( @VisibleForTesting static long calculateDistinctPartitionKeys( HiveColumnHandle column, - List partitions, - Map statistics, - double averageRowsPerPartition) + List partitions) { return partitions.stream() - // consider only non empty partitions - .filter(partition -> getPartitionRowCount(partition.getPartitionId(), statistics).orElse(averageRowsPerPartition) > 0) .map(partition -> partition.getKeys().get(column)) .filter(value -> !value.isNull()) .distinct() diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/util/Statistics.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/util/Statistics.java index dfc2124396bb..3e850a201422 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/util/Statistics.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/util/Statistics.java @@ -23,6 +23,7 @@ import io.prestosql.plugin.hive.metastore.HiveColumnStatistics; import io.prestosql.plugin.hive.metastore.IntegerStatistics; import io.prestosql.spi.Page; +import io.prestosql.spi.PrestoException; import io.prestosql.spi.block.Block; import io.prestosql.spi.connector.ConnectorSession; import io.prestosql.spi.statistics.ColumnStatisticMetadata; @@ -51,6 +52,8 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.Sets.intersection; +import static io.prestosql.plugin.hive.HiveBasicStatistics.createZeroStatistics; +import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE; import static io.prestosql.plugin.hive.HiveWriteUtils.createPartitionValues; import static io.prestosql.plugin.hive.util.Statistics.ReduceOperator.ADD; import static io.prestosql.plugin.hive.util.Statistics.ReduceOperator.MAX; @@ -70,6 +73,7 @@ import static io.prestosql.spi.type.SmallintType.SMALLINT; import static io.prestosql.spi.type.TimestampType.TIMESTAMP; import static io.prestosql.spi.type.TinyintType.TINYINT; +import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.MILLISECONDS; public final class Statistics @@ -244,6 +248,67 @@ private static > T min(T first, T second) return first.compareTo(second) <= 0 ? first : second; } + public static PartitionStatistics createEmptyPartitionStatistics(Map columnTypes, Map> columnStatisticsMetadataTypes) + { + Map columnStatistics = columnStatisticsMetadataTypes.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> createColumnStatisticsForEmptyPartition(columnTypes.get(entry.getKey()), entry.getValue()))); + return new PartitionStatistics(createZeroStatistics(), columnStatistics); + } + + private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set columnStatisticTypes) + { + requireNonNull(columnType, "columnType is null"); + HiveColumnStatistics.Builder result = HiveColumnStatistics.builder(); + for (ColumnStatisticType columnStatisticType : columnStatisticTypes) { + switch (columnStatisticType) { + case MAX_VALUE_SIZE_IN_BYTES: + result.setMaxValueSizeInBytes(0); + break; + case TOTAL_SIZE_IN_BYTES: + result.setTotalSizeInBytes(0); + break; + case NUMBER_OF_DISTINCT_VALUES: + result.setDistinctValuesCount(0); + break; + case NUMBER_OF_NON_NULL_VALUES: + result.setNullsCount(0); + break; + case NUMBER_OF_TRUE_VALUES: + result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L))); + break; + case MIN_VALUE: + case MAX_VALUE: + setMinMaxForEmptyPartition(columnType, result); + break; + default: + throw new PrestoException(HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name()); + } + } + return result.build(); + } + + private static void setMinMaxForEmptyPartition(Type type, HiveColumnStatistics.Builder result) + { + if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { + result.setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())); + } + else if (type.equals(DOUBLE) || type.equals(REAL)) { + result.setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())); + } + else if (type.equals(DATE)) { + result.setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())); + } + else if (type.equals(TIMESTAMP)) { + result.setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())); + } + else if (type instanceof DecimalType) { + result.setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())); + } + else { + throw new IllegalArgumentException("Unexpected type: " + type); + } + } + public static Map, ComputedStatistics> createComputedStatisticsToPartitionMap( Collection computedStatistics, List partitionColumns, diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java index 41db04c39519..d4f3167955f1 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java @@ -944,6 +944,30 @@ public void testCastNullToColumnTypes() assertUpdate("DROP TABLE " + tableName); } + @Test + public void testCreateEmptyPartition() + { + String tableName = "empty_partition_table"; + assertUpdate(format("" + + "CREATE TABLE %s " + + "WITH ( " + + " FORMAT = 'ORC', " + + " partitioned_by = ARRAY['p_varchar'] " + + ") " + + "AS " + + "SELECT c_bigint, p_varchar " + + "FROM ( " + + " VALUES " + + " (BIGINT '7', 'longlonglong')" + + ") AS x (c_bigint, p_varchar)", tableName), 1); + assertQuery(format("SELECT count(*) FROM \"%s$partitions\"", tableName), "SELECT 1"); + + // create an empty partition + assertUpdate(format("CALL system.create_empty_partition('%s', '%s', ARRAY['p_varchar'], ARRAY['%s'])", TPCH_SCHEMA, tableName, "empty")); + assertQuery(format("SELECT count(*) FROM \"%s$partitions\"", tableName), "SELECT 2"); + assertUpdate("DROP TABLE " + tableName); + } + @Test public void testCreateEmptyBucketedPartition() { @@ -2997,6 +3021,335 @@ public void testCollectColumnStatisticsOnInsert() assertUpdate(format("DROP TABLE %s", tableName)); } + @Test + public void testAnalyzeEmptyTable() + { + String tableName = "test_analyze_empty_table"; + assertUpdate(format("CREATE TABLE %s (c_bigint BIGINT, c_varchar VARCHAR(2))", tableName)); + assertUpdate("ANALYZE " + tableName, 0); + } + + @Test + public void testInvalidAnalyzePartitionedTable() + { + String tableName = "test_invalid_analyze_partitioned_table"; + + // Test table does not exist + assertQueryFails("ANALYZE " + tableName, format(".*Table 'hive.tpch.%s' does not exist.*", tableName)); + + createPartitionedTableForAnalyzeTest(tableName); + + // Test invalid property + assertQueryFails(format("ANALYZE %s WITH (error = 1)", tableName), ".*'hive' does not support analyze property 'error'.*"); + assertQueryFails(format("ANALYZE %s WITH (partitions = 1)", tableName), ".*Cannot convert '1' to \\Qarray(array(varchar))\\E.*"); + assertQueryFails(format("ANALYZE %s WITH (partitions = NULL)", tableName), ".*Invalid null value for analyze property.*"); + assertQueryFails(format("ANALYZE %s WITH (partitions = ARRAY[NULL])", tableName), ".*Invalid null value in analyze partitions property.*"); + + // Test non-existed partition + assertQueryFails(format("ANALYZE %s WITH (partitions = ARRAY[ARRAY['p4', '10']])", tableName), ".*Partition no longer exists.*"); + + // Test partition schema mismatch + assertQueryFails(format("ANALYZE %s WITH (partitions = ARRAY[ARRAY['p4']])", tableName), ".*Partition value count does not match the partition column count.*"); + assertQueryFails(format("ANALYZE %s WITH (partitions = ARRAY[ARRAY['p4', '10', 'error']])", tableName), ".*Partition value count does not match the partition column count.*"); + + // Drop the partitioned test table + assertUpdate(format("DROP TABLE %s", tableName)); + } + + @Test + public void testInvalidAnalyzeUnpartitionedTable() + { + String tableName = "test_invalid_analyze_unpartitioned_table"; + + // Test table does not exist + assertQueryFails("ANALYZE " + tableName, ".*Table.*does not exist.*"); + + createUnpartitionedTableForAnalyzeTest(tableName); + + // Test partition properties on unpartitioned table + assertQueryFails(format("ANALYZE %s WITH (partitions = ARRAY[])", tableName), ".*Only partitioned table can be analyzed with a partition list.*"); + assertQueryFails(format("ANALYZE %s WITH (partitions = ARRAY[ARRAY['p1']])", tableName), ".*Only partitioned table can be analyzed with a partition list.*"); + + // Drop the partitioned test table + assertUpdate(format("DROP TABLE %s", tableName)); + } + + @Test + public void testAnalyzePartitionedTable() + { + String tableName = "test_analyze_partitioned_table"; + createPartitionedTableForAnalyzeTest(tableName); + + // No column stats before running analyze + assertQuery("SHOW STATS FOR " + tableName, + "SELECT * FROM VALUES " + + "('c_boolean', null, null, null, null, null, null), " + + "('c_bigint', null, null, null, null, null, null), " + + "('c_double', null, null, null, null, null, null), " + + "('c_timestamp', null, null, null, null, null, null), " + + "('c_varchar', null, null, null, null, null, null), " + + "('c_varbinary', null, null, null, null, null, null), " + + "('p_varchar', 24.0, 3.0, 0.25, null, null, null), " + + "('p_bigint', null, 2.0, 0.25, null, '7', '8'), " + + "(null, null, null, null, 16.0, null, null)"); + + // No column stats after running an empty analyze + assertUpdate(format("ANALYZE %s WITH (partitions = ARRAY[])", tableName), 0); + assertQuery("SHOW STATS FOR " + tableName, + "SELECT * FROM VALUES " + + "('c_boolean', null, null, null, null, null, null), " + + "('c_bigint', null, null, null, null, null, null), " + + "('c_double', null, null, null, null, null, null), " + + "('c_timestamp', null, null, null, null, null, null), " + + "('c_varchar', null, null, null, null, null, null), " + + "('c_varbinary', null, null, null, null, null, null), " + + "('p_varchar', 24.0, 3.0, 0.25, null, null, null), " + + "('p_bigint', null, 2.0, 0.25, null, '7', '8'), " + + "(null, null, null, null, 16.0, null, null)"); + + // Run analyze on 3 partitions including a null partition and a duplicate partition + assertUpdate(format("ANALYZE %s WITH (partitions = ARRAY[ARRAY['p1', '7'], ARRAY['p2', '7'], ARRAY['p2', '7'], ARRAY[NULL, NULL]])", tableName), 12); + + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'p1' AND p_bigint = 7)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 2.0, 0.5, null, null, null), " + + "('c_bigint', null, 2.0, 0.5, null, '0', '1'), " + + "('c_double', null, 2.0, 0.5, null, '1.2', '2.2'), " + + "('c_timestamp', null, 2.0, 0.5, null, null, null), " + + "('c_varchar', 8.0, 2.0, 0.5, null, null, null), " + + "('c_varbinary', 4.0, null, 0.5, null, null, null), " + + "('p_varchar', 8.0, 1.0, 0.0, null, null, null), " + + "('p_bigint', null, 1.0, 0.0, null, '7', '7'), " + + "(null, null, null, null, 4.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'p2' AND p_bigint = 7)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 2.0, 0.5, null, null, null), " + + "('c_bigint', null, 2.0, 0.5, null, '1', '2'), " + + "('c_double', null, 2.0, 0.5, null, '2.3', '3.3'), " + + "('c_timestamp', null, 2.0, 0.5, null, null, null), " + + "('c_varchar', 8.0, 2.0, 0.5, null, null, null), " + + "('c_varbinary', 4.0, null, 0.5, null, null, null), " + + "('p_varchar', 8.0, 1.0, 0.0, null, null, null), " + + "('p_bigint', null, 1.0, 0.0, null, '7', '7'), " + + "(null, null, null, null, 4.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar IS NULL AND p_bigint IS NULL)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 1.0, 0.0, null, null, null), " + + "('c_bigint', null, 4.0, 0.0, null, '4', '7'), " + + "('c_double', null, 4.0, 0.0, null, '4.7', '7.7'), " + + "('c_timestamp', null, 4.0, 0.0, null, null, null), " + + "('c_varchar', 16.0, 4.0, 0.0, null, null, null), " + + "('c_varbinary', 8.0, null, 0.0, null, null, null), " + + "('p_varchar', 0.0, 0.0, 1.0, null, null, null), " + + "('p_bigint', null, 0.0, 1.0, null, null, null), " + + "(null, null, null, null, 4.0, null, null)"); + + // Partition [p3, 8], [e1, 9], [e2, 9] have no column stats + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'p3' AND p_bigint = 8)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, null, null, null, null, null), " + + "('c_bigint', null, null, null, null, null, null), " + + "('c_double', null, null, null, null, null, null), " + + "('c_timestamp', null, null, null, null, null, null), " + + "('c_varchar', null, null, null, null, null, null), " + + "('c_varbinary', null, null, null, null, null, null), " + + "('p_varchar', 8.0, 1.0, 0.0, null, null, null), " + + "('p_bigint', null, 1.0, 0.0, null, '8', '8'), " + + "(null, null, null, null, 4.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'e1' AND p_bigint = 9)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, null, null, null, null, null), " + + "('c_bigint', null, null, null, null, null, null), " + + "('c_double', null, null, null, null, null, null), " + + "('c_timestamp', null, null, null, null, null, null), " + + "('c_varchar', null, null, null, null, null, null), " + + "('c_varbinary', null, null, null, null, null, null), " + + "('p_varchar', 0.0, 0.0, 0.0, null, null, null), " + + "('p_bigint', null, 0.0, 0.0, null, null, null), " + + "(null, null, null, null, 0.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'e2' AND p_bigint = 9)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, null, null, null, null, null), " + + "('c_bigint', null, null, null, null, null, null), " + + "('c_double', null, null, null, null, null, null), " + + "('c_timestamp', null, null, null, null, null, null), " + + "('c_varchar', null, null, null, null, null, null), " + + "('c_varbinary', null, null, null, null, null, null), " + + "('p_varchar', 0.0, 0.0, 0.0, null, null, null), " + + "('p_bigint', null, 0.0, 0.0, null, null, null), " + + "(null, null, null, null, 0.0, null, null)"); + + // Run analyze on the whole table + assertUpdate("ANALYZE " + tableName, 16); + + // All partitions except empty partitions have column stats + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'p1' AND p_bigint = 7)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 2.0, 0.5, null, null, null), " + + "('c_bigint', null, 2.0, 0.5, null, '0', '1'), " + + "('c_double', null, 2.0, 0.5, null, '1.2', '2.2'), " + + "('c_timestamp', null, 2.0, 0.5, null, null, null), " + + "('c_varchar', 8.0, 2.0, 0.5, null, null, null), " + + "('c_varbinary', 4.0, null, 0.5, null, null, null), " + + "('p_varchar', 8.0, 1.0, 0.0, null, null, null), " + + "('p_bigint', null, 1.0, 0.0, null, '7', '7'), " + + "(null, null, null, null, 4.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'p2' AND p_bigint = 7)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 2.0, 0.5, null, null, null), " + + "('c_bigint', null, 2.0, 0.5, null, '1', '2'), " + + "('c_double', null, 2.0, 0.5, null, '2.3', '3.3'), " + + "('c_timestamp', null, 2.0, 0.5, null, null, null), " + + "('c_varchar', 8.0, 2.0, 0.5, null, null, null), " + + "('c_varbinary', 4.0, null, 0.5, null, null, null), " + + "('p_varchar', 8.0, 1.0, 0.0, null, null, null), " + + "('p_bigint', null, 1.0, 0.0, null, '7', '7'), " + + "(null, null, null, null, 4.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar IS NULL AND p_bigint IS NULL)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 1.0, 0.0, null, null, null), " + + "('c_bigint', null, 4.0, 0.0, null, '4', '7'), " + + "('c_double', null, 4.0, 0.0, null, '4.7', '7.7'), " + + "('c_timestamp', null, 4.0, 0.0, null, null, null), " + + "('c_varchar', 16.0, 4.0, 0.0, null, null, null), " + + "('c_varbinary', 8.0, null, 0.0, null, null, null), " + + "('p_varchar', 0.0, 0.0, 1.0, null, null, null), " + + "('p_bigint', null, 0.0, 1.0, null, null, null), " + + "(null, null, null, null, 4.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'p3' AND p_bigint = 8)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 2.0, 0.5, null, null, null), " + + "('c_bigint', null, 2.0, 0.5, null, '2', '3'), " + + "('c_double', null, 2.0, 0.5, null, '3.4', '4.4'), " + + "('c_timestamp', null, 2.0, 0.5, null, null, null), " + + "('c_varchar', 8.0, 2.0, 0.5, null, null, null), " + + "('c_varbinary', 4.0, null, 0.5, null, null, null), " + + "('p_varchar', 8.0, 1.0, 0.0, null, null, null), " + + "('p_bigint', null, 1.0, 0.0, null, '8', '8'), " + + "(null, null, null, null, 4.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'e1' AND p_bigint = 9)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 0.0, 0.0, null, null, null), " + + "('c_bigint', null, 0.0, 0.0, null, null, null), " + + "('c_double', null, 0.0, 0.0, null, null, null), " + + "('c_timestamp', null, 0.0, 0.0, null, null, null), " + + "('c_varchar', 0.0, 0.0, 0.0, null, null, null), " + + "('c_varbinary', 0.0, null, 0.0, null, null, null), " + + "('p_varchar', 0.0, 0.0, 0.0, null, null, null), " + + "('p_bigint', null, 0.0, 0.0, null, null, null), " + + "(null, null, null, null, 0.0, null, null)"); + assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar = 'e2' AND p_bigint = 9)", tableName), + "SELECT * FROM VALUES " + + "('c_boolean', null, 0.0, 0.0, null, null, null), " + + "('c_bigint', null, 0.0, 0.0, null, null, null), " + + "('c_double', null, 0.0, 0.0, null, null, null), " + + "('c_timestamp', null, 0.0, 0.0, null, null, null), " + + "('c_varchar', 0.0, 0.0, 0.0, null, null, null), " + + "('c_varbinary', 0.0, null, 0.0, null, null, null), " + + "('p_varchar', 0.0, 0.0, 0.0, null, null, null), " + + "('p_bigint', null, 0.0, 0.0, null, null, null), " + + "(null, null, null, null, 0.0, null, null)"); + + // Drop the partitioned test table + assertUpdate(format("DROP TABLE %s", tableName)); + } + + @Test + public void testAnalyzeUnpartitionedTable() + { + String tableName = "test_analyze_unpartitioned_table"; + createUnpartitionedTableForAnalyzeTest(tableName); + + // No column stats before running analyze + assertQuery("SHOW STATS FOR " + tableName, + "SELECT * FROM VALUES " + + "('c_boolean', null, null, null, null, null, null), " + + "('c_bigint', null, null, null, null, null, null), " + + "('c_double', null, null, null, null, null, null), " + + "('c_timestamp', null, null, null, null, null, null), " + + "('c_varchar', null, null, null, null, null, null), " + + "('c_varbinary', null, null, null, null, null, null), " + + "('p_varchar', null, null, null, null, null, null), " + + "('p_bigint', null, null, null, null, null, null), " + + "(null, null, null, null, 16.0, null, null)"); + + // Run analyze on the whole table + assertUpdate("ANALYZE " + tableName, 16); + + assertQuery("SHOW STATS FOR " + tableName, + "SELECT * FROM VALUES " + + "('c_boolean', null, 2.0, 0.375, null, null, null), " + + "('c_bigint', null, 8.0, 0.375, null, '0', '7'), " + + "('c_double', null, 10.0, 0.375, null, '1.2', '7.7'), " + + "('c_timestamp', null, 10.0, 0.375, null, null, null), " + + "('c_varchar', 40.0, 10.0, 0.375, null, null, null), " + + "('c_varbinary', 20.0, null, 0.375, null, null, null), " + + "('p_varchar', 24.0, 3.0, 0.25, null, null, null), " + + "('p_bigint', null, 2.0, 0.25, null, '7', '8'), " + + "(null, null, null, null, 16.0, null, null)"); + + // Drop the unpartitioned test table + assertUpdate(format("DROP TABLE %s", tableName)); + } + + protected void createPartitionedTableForAnalyzeTest(String tableName) + { + createTableForAnalyzeTest(tableName, true); + } + + protected void createUnpartitionedTableForAnalyzeTest(String tableName) + { + createTableForAnalyzeTest(tableName, false); + } + + private void createTableForAnalyzeTest(String tableName, boolean partitioned) + { + Session defaultSession = getSession(); + + // Disable column statistics collection when creating the table + Session disableColumnStatsSession = Session.builder(defaultSession) + .setCatalogSessionProperty(defaultSession.getCatalog().get(), "collect_column_statistics_on_write", "false") + .build(); + + assertUpdate( + disableColumnStatsSession, + "" + + "CREATE TABLE " + + tableName + + (partitioned ? " WITH (partitioned_by = ARRAY['p_varchar', 'p_bigint'])\n" : " ") + + "AS " + + "SELECT c_boolean, c_bigint, c_double, c_timestamp, c_varchar, c_varbinary, p_varchar, p_bigint " + + "FROM ( " + + " VALUES " + + // p_varchar = 'p1', p_bigint = BIGINT '7' + " (null, null, null, null, null, null, 'p1', BIGINT '7'), " + + " (null, null, null, null, null, null, 'p1', BIGINT '7'), " + + " (true, BIGINT '1', DOUBLE '2.2', TIMESTAMP '2012-08-08 01:00', 'abc1', X'bcd1', 'p1', BIGINT '7'), " + + " (false, BIGINT '0', DOUBLE '1.2', TIMESTAMP '2012-08-08 00:00', 'abc2', X'bcd2', 'p1', BIGINT '7'), " + + // p_varchar = 'p2', p_bigint = BIGINT '7' + " (null, null, null, null, null, null, 'p2', BIGINT '7'), " + + " (null, null, null, null, null, null, 'p2', BIGINT '7'), " + + " (true, BIGINT '2', DOUBLE '3.3', TIMESTAMP '2012-09-09 01:00', 'cba1', X'dcb1', 'p2', BIGINT '7'), " + + " (false, BIGINT '1', DOUBLE '2.3', TIMESTAMP '2012-09-09 00:00', 'cba2', X'dcb2', 'p2', BIGINT '7'), " + + // p_varchar = 'p3', p_bigint = BIGINT '8' + " (null, null, null, null, null, null, 'p3', BIGINT '8'), " + + " (null, null, null, null, null, null, 'p3', BIGINT '8'), " + + " (true, BIGINT '3', DOUBLE '4.4', TIMESTAMP '2012-10-10 01:00', 'bca1', X'cdb1', 'p3', BIGINT '8'), " + + " (false, BIGINT '2', DOUBLE '3.4', TIMESTAMP '2012-10-10 00:00', 'bca2', X'cdb2', 'p3', BIGINT '8'), " + + // p_varchar = NULL, p_bigint = NULL + " (false, BIGINT '7', DOUBLE '7.7', TIMESTAMP '1977-07-07 07:07', 'efa1', X'efa1', NULL, NULL), " + + " (false, BIGINT '6', DOUBLE '6.7', TIMESTAMP '1977-07-07 07:06', 'efa2', X'efa2', NULL, NULL), " + + " (false, BIGINT '5', DOUBLE '5.7', TIMESTAMP '1977-07-07 07:05', 'efa3', X'efa3', NULL, NULL), " + + " (false, BIGINT '4', DOUBLE '4.7', TIMESTAMP '1977-07-07 07:04', 'efa4', X'efa4', NULL, NULL) " + + ") AS x (c_boolean, c_bigint, c_double, c_timestamp, c_varchar, c_varbinary, p_varchar, p_bigint)", 16); + + if (partitioned) { + // Create empty partitions + assertUpdate(disableColumnStatsSession, format("CALL system.create_empty_partition('%s', '%s', ARRAY['p_varchar', 'p_bigint'], ARRAY['%s', '%s'])", TPCH_SCHEMA, tableName, "e1", "9")); + assertUpdate(disableColumnStatsSession, format("CALL system.create_empty_partition('%s', '%s', ARRAY['p_varchar', 'p_bigint'], ARRAY['%s', '%s'])", TPCH_SCHEMA, tableName, "e2", "9")); + } + } + @Test public void testInsertMultipleColumnsFromSameChannel() { diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/statistics/TestMetastoreHiveStatisticsProvider.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/statistics/TestMetastoreHiveStatisticsProvider.java index 851a5dae6127..2145c0e77709 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/statistics/TestMetastoreHiveStatisticsProvider.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/statistics/TestMetastoreHiveStatisticsProvider.java @@ -253,76 +253,41 @@ public void testCalculateAverageRowsPerPartition() @Test public void testCalculateDistinctPartitionKeys() { - assertEquals(calculateDistinctPartitionKeys(PARTITION_COLUMN_1, ImmutableList.of(), ImmutableMap.of(), 1000), 0); + assertEquals(calculateDistinctPartitionKeys(PARTITION_COLUMN_1, ImmutableList.of()), 0); assertEquals( calculateDistinctPartitionKeys( PARTITION_COLUMN_1, - ImmutableList.of(partition("p1=string1/p2=1234")), - ImmutableMap.of("p1=string1/p2=1234", rowsCount(1000)), - 2000), - 1); - assertEquals( - calculateDistinctPartitionKeys( - PARTITION_COLUMN_1, - ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string2/p2=1234")), - ImmutableMap.of("p1=string1/p2=1234", rowsCount(1000), "p1=string2/p2=1234", rowsCount(1)), - 2000), - 2); - assertEquals( - calculateDistinctPartitionKeys( - PARTITION_COLUMN_1, - ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string2/p2=1234")), - ImmutableMap.of("p1=string1/p2=1234", rowsCount(1000), "p1=string2/p2=1234", rowsCount(0)), - 2000), + ImmutableList.of(partition("p1=string1/p2=1234"))), 1); assertEquals( calculateDistinctPartitionKeys( PARTITION_COLUMN_1, - ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string2/p2=1234")), - ImmutableMap.of("p1=string1/p2=1234", rowsCount(1000)), - 2000), + ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string2/p2=1234"))), 2); - assertEquals( - calculateDistinctPartitionKeys( - PARTITION_COLUMN_1, - ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string2/p2=1234")), - ImmutableMap.of("p1=string1/p2=1234", rowsCount(1000)), - 0), - 1); assertEquals( calculateDistinctPartitionKeys( PARTITION_COLUMN_2, - ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string2/p2=1234")), - ImmutableMap.of("p1=string1/p2=1234", rowsCount(1000), "p1=string2/p2=1234", rowsCount(1)), - 2000), + ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string2/p2=1234"))), 1); assertEquals( calculateDistinctPartitionKeys( PARTITION_COLUMN_2, - ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string1/p2=1235")), - ImmutableMap.of("p1=string1/p2=1234", rowsCount(1000), "p1=string1/p2=1235", rowsCount(1)), - 2000), + ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string1/p2=1235"))), 2); assertEquals( calculateDistinctPartitionKeys( PARTITION_COLUMN_1, - ImmutableList.of(partition("p1=__HIVE_DEFAULT_PARTITION__/p2=1234"), partition("p1=string1/p2=1235")), - ImmutableMap.of("p1=__HIVE_DEFAULT_PARTITION__/p2=1234", rowsCount(1000), "p1=string1/p2=1235", rowsCount(1)), - 2000), + ImmutableList.of(partition("p1=__HIVE_DEFAULT_PARTITION__/p2=1234"), partition("p1=string1/p2=1235"))), 1); assertEquals( calculateDistinctPartitionKeys( PARTITION_COLUMN_2, - ImmutableList.of(partition("p1=123/p2=__HIVE_DEFAULT_PARTITION__"), partition("p1=string1/p2=1235")), - ImmutableMap.of("p1=123/p2=__HIVE_DEFAULT_PARTITION__", rowsCount(1000), "p1=string1/p2=1235", rowsCount(1)), - 2000), + ImmutableList.of(partition("p1=123/p2=__HIVE_DEFAULT_PARTITION__"), partition("p1=string1/p2=1235"))), 1); assertEquals( calculateDistinctPartitionKeys( PARTITION_COLUMN_2, - ImmutableList.of(partition("p1=123/p2=__HIVE_DEFAULT_PARTITION__"), partition("p1=string1/p2=__HIVE_DEFAULT_PARTITION__")), - ImmutableMap.of("p1=123/p2=__HIVE_DEFAULT_PARTITION__", rowsCount(1000), "p1=string1/p2=__HIVE_DEFAULT_PARTITION__", rowsCount(1)), - 2000), + ImmutableList.of(partition("p1=123/p2=__HIVE_DEFAULT_PARTITION__"), partition("p1=string1/p2=__HIVE_DEFAULT_PARTITION__"))), 0); } @@ -489,12 +454,6 @@ public void testCalculateRangeForPartitioningKey() BIGINT, ImmutableList.of(partition("p1=string1/p2=2"), partition("p1=string1/p2=1"))), Optional.of(new DoubleRange(1, 2))); - assertEquals( - calculateRangeForPartitioningKey( - PARTITION_COLUMN_2, - BIGINT, - ImmutableList.of(partition("p1=string1/p2=2"), partition("p1=string1/p2=3"), partition("p1=string1/p2=1"))), - Optional.of(new DoubleRange(1, 3))); } @Test diff --git a/presto-main/src/main/java/io/prestosql/connector/ConnectorManager.java b/presto-main/src/main/java/io/prestosql/connector/ConnectorManager.java index 628dbf231bd7..5ae7d0f86383 100644 --- a/presto-main/src/main/java/io/prestosql/connector/ConnectorManager.java +++ b/presto-main/src/main/java/io/prestosql/connector/ConnectorManager.java @@ -267,6 +267,7 @@ private synchronized void addConnectorInternal(MaterializedConnector connector) metadataManager.getTablePropertyManager().addProperties(connectorId, connector.getTableProperties()); metadataManager.getColumnPropertyManager().addProperties(connectorId, connector.getColumnProperties()); metadataManager.getSchemaPropertyManager().addProperties(connectorId, connector.getSchemaProperties()); + metadataManager.getAnalyzePropertyManager().addProperties(connectorId, connector.getAnalyzeProperties()); metadataManager.getSessionPropertyManager().addConnectorSessionProperties(connectorId, connector.getSessionProperties()); } @@ -337,6 +338,7 @@ private static class MaterializedConnector private final List> tableProperties; private final List> schemaProperties; private final List> columnProperties; + private final List> analyzeProperties; public MaterializedConnector(ConnectorId connectorId, Connector connector) { @@ -425,6 +427,10 @@ public MaterializedConnector(ConnectorId connectorId, Connector connector) List> columnProperties = connector.getColumnProperties(); requireNonNull(columnProperties, "Connector %s returned a null column properties set"); this.columnProperties = ImmutableList.copyOf(columnProperties); + + List> analyzeProperties = connector.getAnalyzeProperties(); + requireNonNull(analyzeProperties, "Connector %s returned a null analyze properties set"); + this.analyzeProperties = ImmutableList.copyOf(analyzeProperties); } public ConnectorId getConnectorId() @@ -496,5 +502,10 @@ public List> getSchemaProperties() { return schemaProperties; } + + public List> getAnalyzeProperties() + { + return analyzeProperties; + } } } diff --git a/presto-main/src/main/java/io/prestosql/metadata/AnalyzeMetadata.java b/presto-main/src/main/java/io/prestosql/metadata/AnalyzeMetadata.java new file mode 100644 index 000000000000..7ea134663e57 --- /dev/null +++ b/presto-main/src/main/java/io/prestosql/metadata/AnalyzeMetadata.java @@ -0,0 +1,40 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.metadata; + +import io.prestosql.spi.statistics.TableStatisticsMetadata; + +import static java.util.Objects.requireNonNull; + +public class AnalyzeMetadata +{ + private final TableStatisticsMetadata statisticsMetadata; + private final TableHandle tableHandle; + + public AnalyzeMetadata(TableStatisticsMetadata statisticsMetadata, TableHandle tableHandle) + { + this.statisticsMetadata = requireNonNull(statisticsMetadata, "statisticsMetadata is null"); + this.tableHandle = requireNonNull(tableHandle, "tableHandle is null"); + } + + public TableStatisticsMetadata getStatisticsMetadata() + { + return statisticsMetadata; + } + + public TableHandle getTableHandle() + { + return tableHandle; + } +} diff --git a/presto-main/src/main/java/io/prestosql/metadata/AnalyzePropertyManager.java b/presto-main/src/main/java/io/prestosql/metadata/AnalyzePropertyManager.java new file mode 100644 index 000000000000..84b4de98356d --- /dev/null +++ b/presto-main/src/main/java/io/prestosql/metadata/AnalyzePropertyManager.java @@ -0,0 +1,25 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.metadata; + +import static io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY; + +public class AnalyzePropertyManager + extends AbstractPropertyManager +{ + public AnalyzePropertyManager() + { + super("analyze", INVALID_ANALYZE_PROPERTY); + } +} diff --git a/presto-main/src/main/java/io/prestosql/metadata/AnalyzeTableHandle.java b/presto-main/src/main/java/io/prestosql/metadata/AnalyzeTableHandle.java new file mode 100644 index 000000000000..d5bc3d7e9e11 --- /dev/null +++ b/presto-main/src/main/java/io/prestosql/metadata/AnalyzeTableHandle.java @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.metadata; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.prestosql.connector.ConnectorId; +import io.prestosql.spi.connector.ConnectorTableHandle; +import io.prestosql.spi.connector.ConnectorTransactionHandle; + +import java.util.Objects; + +import static java.util.Objects.requireNonNull; + +public class AnalyzeTableHandle +{ + private final ConnectorId connectorId; + private final ConnectorTransactionHandle transactionHandle; + private final ConnectorTableHandle connectorHandle; + + @JsonCreator + public AnalyzeTableHandle( + @JsonProperty("connectorId") ConnectorId connectorId, + @JsonProperty("transactionHandle") ConnectorTransactionHandle transactionHandle, + @JsonProperty("connectorHandle") ConnectorTableHandle connectorHandle) + { + this.connectorId = requireNonNull(connectorId, "connectorId is null"); + this.transactionHandle = requireNonNull(transactionHandle, "transactionHandle is null"); + this.connectorHandle = requireNonNull(connectorHandle, "connectorHandle is null"); + } + + @JsonProperty + public ConnectorId getConnectorId() + { + return connectorId; + } + + @JsonProperty + public ConnectorTableHandle getConnectorHandle() + { + return connectorHandle; + } + + @JsonProperty + public ConnectorTransactionHandle getTransactionHandle() + { + return transactionHandle; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + AnalyzeTableHandle that = (AnalyzeTableHandle) o; + return Objects.equals(connectorId, that.connectorId) && + Objects.equals(transactionHandle, that.transactionHandle) && + Objects.equals(connectorHandle, that.connectorHandle); + } + + @Override + public int hashCode() + { + return Objects.hash(connectorId, transactionHandle, connectorHandle); + } + + @Override + public String toString() + { + return connectorId + ":" + connectorHandle + ":" + transactionHandle; + } +} diff --git a/presto-main/src/main/java/io/prestosql/metadata/Metadata.java b/presto-main/src/main/java/io/prestosql/metadata/Metadata.java index b768ab284ab1..d8f505f83d1f 100644 --- a/presto-main/src/main/java/io/prestosql/metadata/Metadata.java +++ b/presto-main/src/main/java/io/prestosql/metadata/Metadata.java @@ -71,6 +71,8 @@ public interface Metadata Optional getSystemTable(Session session, QualifiedObjectName tableName); + Optional getTableHandleForStatisticsCollection(Session session, QualifiedObjectName tableName, Map analyzeProperties); + List getLayouts(Session session, TableHandle tableHandle, Constraint constraint, Optional> desiredColumns); TableLayout getLayout(Session session, TableLayoutHandle handle); @@ -193,8 +195,23 @@ public interface Metadata /** * Describes statistics that must be collected during a write. */ + TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(Session session, String catalogName, ConnectorTableMetadata tableMetadata); + + /** + * Describe statistics that must be collected during a statistics collection + */ TableStatisticsMetadata getStatisticsCollectionMetadata(Session session, String catalogName, ConnectorTableMetadata tableMetadata); + /** + * Begin statistics collection + */ + AnalyzeTableHandle beginStatisticsCollection(Session session, TableHandle tableHandle); + + /** + * Finish statistics collection + */ + void finishStatisticsCollection(Session session, AnalyzeTableHandle tableHandle, Collection computedStatistics); + /** * Start a SELECT/UPDATE/INSERT/DELETE query */ @@ -361,4 +378,6 @@ public interface Metadata TablePropertyManager getTablePropertyManager(); ColumnPropertyManager getColumnPropertyManager(); + + AnalyzePropertyManager getAnalyzePropertyManager(); } diff --git a/presto-main/src/main/java/io/prestosql/metadata/MetadataManager.java b/presto-main/src/main/java/io/prestosql/metadata/MetadataManager.java index 555368ed8c17..411ae09220f9 100644 --- a/presto-main/src/main/java/io/prestosql/metadata/MetadataManager.java +++ b/presto-main/src/main/java/io/prestosql/metadata/MetadataManager.java @@ -123,6 +123,7 @@ public class MetadataManager private final SchemaPropertyManager schemaPropertyManager; private final TablePropertyManager tablePropertyManager; private final ColumnPropertyManager columnPropertyManager; + private final AnalyzePropertyManager analyzePropertyManager; private final TransactionManager transactionManager; private final ConcurrentMap> catalogsByQueryId = new ConcurrentHashMap<>(); @@ -134,6 +135,7 @@ public MetadataManager(FeaturesConfig featuresConfig, SchemaPropertyManager schemaPropertyManager, TablePropertyManager tablePropertyManager, ColumnPropertyManager columnPropertyManager, + AnalyzePropertyManager analyzePropertyManager, TransactionManager transactionManager) { this(featuresConfig, @@ -144,6 +146,7 @@ public MetadataManager(FeaturesConfig featuresConfig, schemaPropertyManager, tablePropertyManager, columnPropertyManager, + analyzePropertyManager, transactionManager); } @@ -156,6 +159,7 @@ public MetadataManager(FeaturesConfig featuresConfig, SchemaPropertyManager schemaPropertyManager, TablePropertyManager tablePropertyManager, ColumnPropertyManager columnPropertyManager, + AnalyzePropertyManager analyzePropertyManager, TransactionManager transactionManager) { functions = new FunctionRegistry(typeManager, blockEncodingSerde, featuresConfig); @@ -167,6 +171,7 @@ public MetadataManager(FeaturesConfig featuresConfig, this.schemaPropertyManager = requireNonNull(schemaPropertyManager, "schemaPropertyManager is null"); this.tablePropertyManager = requireNonNull(tablePropertyManager, "tablePropertyManager is null"); this.columnPropertyManager = requireNonNull(columnPropertyManager, "columnPropertyManager is null"); + this.analyzePropertyManager = requireNonNull(analyzePropertyManager, "analyzePropertyManager is null"); this.transactionManager = requireNonNull(transactionManager, "transactionManager is null"); verifyComparableOrderableContract(); @@ -203,6 +208,7 @@ public static MetadataManager createTestMetadataManager(TransactionManager trans new SchemaPropertyManager(), new TablePropertyManager(), new ColumnPropertyManager(), + new AnalyzePropertyManager(), transactionManager); } @@ -328,6 +334,25 @@ public Optional getTableHandle(Session session, QualifiedObjectName return Optional.empty(); } + @Override + public Optional getTableHandleForStatisticsCollection(Session session, QualifiedObjectName table, Map analyzeProperties) + { + requireNonNull(table, "table is null"); + + Optional catalog = getOptionalCatalogMetadata(session, table.getCatalogName()); + if (catalog.isPresent()) { + CatalogMetadata catalogMetadata = catalog.get(); + ConnectorId connectorId = catalogMetadata.getConnectorId(session, table); + ConnectorMetadata metadata = catalogMetadata.getMetadataFor(connectorId); + + ConnectorTableHandle tableHandle = metadata.getTableHandleForStatisticsCollection(session.toConnectorSession(connectorId), table.asSchemaTableName(), analyzeProperties); + if (tableHandle != null) { + return Optional.of(new TableHandle(connectorId, tableHandle)); + } + } + return Optional.empty(); + } + @Override public Optional getSystemTable(Session session, QualifiedObjectName tableName) { @@ -621,6 +646,15 @@ public Optional getInsertLayout(Session session, TableHandle tab .map(layout -> new NewTableLayout(connectorId, catalogMetadata.getTransactionHandleFor(connectorId), layout)); } + @Override + public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(Session session, String catalogName, ConnectorTableMetadata tableMetadata) + { + CatalogMetadata catalogMetadata = getCatalogMetadataForWrite(session, catalogName); + ConnectorMetadata metadata = catalogMetadata.getMetadata(); + ConnectorId connectorId = catalogMetadata.getConnectorId(); + return metadata.getStatisticsCollectionMetadataForWrite(session.toConnectorSession(connectorId), tableMetadata); + } + @Override public TableStatisticsMetadata getStatisticsCollectionMetadata(Session session, String catalogName, ConnectorTableMetadata tableMetadata) { @@ -630,6 +664,26 @@ public TableStatisticsMetadata getStatisticsCollectionMetadata(Session session, return metadata.getStatisticsCollectionMetadata(session.toConnectorSession(connectorId), tableMetadata); } + @Override + public AnalyzeTableHandle beginStatisticsCollection(Session session, TableHandle tableHandle) + { + ConnectorId connectorId = tableHandle.getConnectorId(); + CatalogMetadata catalogMetadata = getCatalogMetadataForWrite(session, connectorId); + ConnectorMetadata metadata = catalogMetadata.getMetadata(); + + ConnectorTransactionHandle transactionHandle = catalogMetadata.getTransactionHandleFor(connectorId); + ConnectorTableHandle connectorTableHandle = metadata.beginStatisticsCollection(session.toConnectorSession(connectorId), tableHandle.getConnectorHandle()); + return new AnalyzeTableHandle(connectorId, transactionHandle, connectorTableHandle); + } + + @Override + public void finishStatisticsCollection(Session session, AnalyzeTableHandle tableHandle, Collection computedStatistics) + { + ConnectorId connectorId = tableHandle.getConnectorId(); + CatalogMetadata catalogMetadata = getCatalogMetadataForWrite(session, connectorId); + catalogMetadata.getMetadata().finishStatisticsCollection(session.toConnectorSession(), tableHandle.getConnectorHandle(), computedStatistics); + } + @Override public Optional getNewTableLayout(Session session, String catalogName, ConnectorTableMetadata tableMetadata) { @@ -1064,6 +1118,11 @@ public ColumnPropertyManager getColumnPropertyManager() return columnPropertyManager; } + public AnalyzePropertyManager getAnalyzePropertyManager() + { + return analyzePropertyManager; + } + private ViewDefinition deserializeView(String data) { try { diff --git a/presto-main/src/main/java/io/prestosql/operator/StatisticsWriterOperator.java b/presto-main/src/main/java/io/prestosql/operator/StatisticsWriterOperator.java new file mode 100644 index 000000000000..4acd45a796ed --- /dev/null +++ b/presto-main/src/main/java/io/prestosql/operator/StatisticsWriterOperator.java @@ -0,0 +1,197 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.operator; + +import com.google.common.collect.ImmutableList; +import io.prestosql.spi.Page; +import io.prestosql.spi.PageBuilder; +import io.prestosql.spi.block.Block; +import io.prestosql.spi.block.BlockBuilder; +import io.prestosql.spi.statistics.ComputedStatistics; +import io.prestosql.spi.type.Type; +import io.prestosql.sql.planner.plan.PlanNodeId; +import io.prestosql.sql.planner.plan.StatisticAggregationsDescriptor; + +import java.util.Collection; +import java.util.List; +import java.util.Objects; + +import static com.google.common.base.Preconditions.checkState; +import static io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT; +import static io.prestosql.spi.type.BigintType.BIGINT; +import static java.util.Objects.requireNonNull; + +public class StatisticsWriterOperator + implements Operator +{ + public static final List TYPES = ImmutableList.of(BIGINT); + + public static class StatisticsWriterOperatorFactory + implements OperatorFactory + { + private final int operatorId; + private final PlanNodeId planNodeId; + private final StatisticsWriter statisticsWriter; + private final boolean rowCountEnabled; + private final StatisticAggregationsDescriptor descriptor; + private boolean closed; + + public StatisticsWriterOperatorFactory(int operatorId, PlanNodeId planNodeId, StatisticsWriter statisticsWriter, boolean rowCountEnabled, StatisticAggregationsDescriptor descriptor) + { + this.operatorId = operatorId; + this.planNodeId = requireNonNull(planNodeId, "planNodeId is null"); + this.statisticsWriter = requireNonNull(statisticsWriter, "statisticsWriter is null"); + this.rowCountEnabled = rowCountEnabled; + this.descriptor = requireNonNull(descriptor, "descriptor is null"); + } + + @Override + public Operator createOperator(DriverContext driverContext) + { + checkState(!closed, "Factory is already closed"); + OperatorContext context = driverContext.addOperatorContext(operatorId, planNodeId, StatisticsWriterOperator.class.getSimpleName()); + return new StatisticsWriterOperator(context, statisticsWriter, descriptor, rowCountEnabled); + } + + @Override + public void noMoreOperators() + { + closed = true; + } + + @Override + public OperatorFactory duplicate() + { + return new StatisticsWriterOperatorFactory(operatorId, planNodeId, statisticsWriter, rowCountEnabled, descriptor); + } + } + + private enum State + { + RUNNING, FINISHING, FINISHED + } + + private final OperatorContext operatorContext; + private final StatisticsWriter statisticsWriter; + private final StatisticAggregationsDescriptor descriptor; + private final boolean rowCountEnabled; + + private State state = State.RUNNING; + private final ImmutableList.Builder computedStatisticsBuilder = ImmutableList.builder(); + + public StatisticsWriterOperator(OperatorContext operatorContext, StatisticsWriter statisticsWriter, StatisticAggregationsDescriptor descriptor, boolean rowCountEnabled) + { + this.operatorContext = requireNonNull(operatorContext, "operatorContext is null"); + this.statisticsWriter = requireNonNull(statisticsWriter, "statisticsWriter is null"); + this.descriptor = requireNonNull(descriptor, "descriptor is null"); + this.rowCountEnabled = rowCountEnabled; + } + + @Override + public OperatorContext getOperatorContext() + { + return operatorContext; + } + + @Override + public boolean needsInput() + { + return state == State.RUNNING; + } + + @Override + public void addInput(Page page) + { + requireNonNull(page, "page is null"); + checkState(state == State.RUNNING, "Operator is %s", state); + + for (int position = 0; position < page.getPositionCount(); position++) { + computedStatisticsBuilder.add(getComputedStatistics(page, position)); + } + } + + @Override + public Page getOutput() + { + if (state != State.FINISHING) { + return null; + } + state = State.FINISHED; + + Collection computedStatistics = computedStatisticsBuilder.build(); + statisticsWriter.writeStatistics(computedStatistics); + + // output page will only be constructed once, + // so a new PageBuilder is constructed (instead of using PageBuilder.reset) + PageBuilder page = new PageBuilder(1, TYPES); + page.declarePosition(); + BlockBuilder rowsBuilder = page.getBlockBuilder(0); + if (rowCountEnabled) { + BIGINT.writeLong(rowsBuilder, getRowCount(computedStatistics)); + } + else { + rowsBuilder.appendNull(); + } + + return page.build(); + } + + @Override + public void finish() + { + if (state == State.RUNNING) { + state = State.FINISHING; + } + } + + @Override + public boolean isFinished() + { + return state == State.FINISHED; + } + + private ComputedStatistics getComputedStatistics(Page page, int position) + { + ImmutableList.Builder groupingColumns = ImmutableList.builder(); + ImmutableList.Builder groupingValues = ImmutableList.builder(); + descriptor.getGrouping().forEach((column, channel) -> { + groupingColumns.add(column); + groupingValues.add(page.getBlock(channel).getSingleValueBlock(position)); + }); + + ComputedStatistics.Builder statistics = ComputedStatistics.builder(groupingColumns.build(), groupingValues.build()); + + descriptor.getTableStatistics().forEach((type, channel) -> + statistics.addTableStatistic(type, page.getBlock(channel).getSingleValueBlock(position))); + + descriptor.getColumnStatistics().forEach((metadata, channel) -> statistics.addColumnStatistic(metadata, page.getBlock(channel).getSingleValueBlock(position))); + + return statistics.build(); + } + + private static long getRowCount(Collection computedStatistics) + { + return computedStatistics.stream() + .map(statistics -> statistics.getTableStatistics().get(ROW_COUNT)) + .filter(Objects::nonNull) + .mapToLong(block -> BIGINT.getLong(block, 0)) + .reduce((first, second) -> first + second) + .orElse(0L); + } + + public interface StatisticsWriter + { + void writeStatistics(Collection computedStatistics); + } +} diff --git a/presto-main/src/main/java/io/prestosql/server/ServerMainModule.java b/presto-main/src/main/java/io/prestosql/server/ServerMainModule.java index 064cf6ebde53..d582e72da902 100644 --- a/presto-main/src/main/java/io/prestosql/server/ServerMainModule.java +++ b/presto-main/src/main/java/io/prestosql/server/ServerMainModule.java @@ -66,6 +66,7 @@ import io.prestosql.memory.MemoryResource; import io.prestosql.memory.NodeMemoryConfig; import io.prestosql.memory.ReservedSystemMemoryConfig; +import io.prestosql.metadata.AnalyzePropertyManager; import io.prestosql.metadata.CatalogManager; import io.prestosql.metadata.ColumnPropertyManager; import io.prestosql.metadata.DiscoveryNodeManager; @@ -220,6 +221,9 @@ protected void setup(Binder binder) // column properties binder.bind(ColumnPropertyManager.class).in(Scopes.SINGLETON); + // analyze properties + binder.bind(AnalyzePropertyManager.class).in(Scopes.SINGLETON); + // node manager discoveryBinder(binder).bindSelector("presto"); binder.bind(DiscoveryNodeManager.class).in(Scopes.SINGLETON); diff --git a/presto-main/src/main/java/io/prestosql/sql/analyzer/Analysis.java b/presto-main/src/main/java/io/prestosql/sql/analyzer/Analysis.java index f1c36de99c6f..aaaf1c9232b6 100644 --- a/presto-main/src/main/java/io/prestosql/sql/analyzer/Analysis.java +++ b/presto-main/src/main/java/io/prestosql/sql/analyzer/Analysis.java @@ -130,6 +130,7 @@ public class Analysis private Optional createTableComment = Optional.empty(); private Optional insert = Optional.empty(); + private Optional analyzeTarget = Optional.empty(); // for describe input and describe output private final boolean isDescribe; @@ -522,6 +523,16 @@ public Optional getCreateTableDestination() return createTableDestination; } + public Optional getAnalyzeTarget() + { + return analyzeTarget; + } + + public void setAnalyzeTarget(TableHandle analyzeTarget) + { + this.analyzeTarget = Optional.of(analyzeTarget); + } + public void setCreateTableProperties(Map createTableProperties) { this.createTableProperties = ImmutableMap.copyOf(createTableProperties); diff --git a/presto-main/src/main/java/io/prestosql/sql/analyzer/StatementAnalyzer.java b/presto-main/src/main/java/io/prestosql/sql/analyzer/StatementAnalyzer.java index c94d8f08efbf..615cc26d5f66 100644 --- a/presto-main/src/main/java/io/prestosql/sql/analyzer/StatementAnalyzer.java +++ b/presto-main/src/main/java/io/prestosql/sql/analyzer/StatementAnalyzer.java @@ -23,6 +23,7 @@ import com.google.common.collect.Multimap; import io.prestosql.Session; import io.prestosql.SystemSessionProperties; +import io.prestosql.connector.ConnectorId; import io.prestosql.execution.warnings.WarningCollector; import io.prestosql.metadata.FunctionKind; import io.prestosql.metadata.Metadata; @@ -39,6 +40,7 @@ import io.prestosql.spi.connector.ColumnHandle; import io.prestosql.spi.connector.ColumnMetadata; import io.prestosql.spi.function.OperatorType; +import io.prestosql.spi.security.AccessDeniedException; import io.prestosql.spi.security.Identity; import io.prestosql.spi.type.ArrayType; import io.prestosql.spi.type.MapType; @@ -53,6 +55,7 @@ import io.prestosql.sql.tree.AddColumn; import io.prestosql.sql.tree.AliasedRelation; import io.prestosql.sql.tree.AllColumns; +import io.prestosql.sql.tree.Analyze; import io.prestosql.sql.tree.Call; import io.prestosql.sql.tree.Commit; import io.prestosql.sql.tree.CreateSchema; @@ -153,6 +156,7 @@ import static io.prestosql.metadata.FunctionKind.WINDOW; import static io.prestosql.metadata.MetadataUtil.createQualifiedObjectName; import static io.prestosql.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; +import static io.prestosql.spi.StandardErrorCode.NOT_FOUND; import static io.prestosql.spi.type.BigintType.BIGINT; import static io.prestosql.spi.type.BooleanType.BOOLEAN; import static io.prestosql.spi.type.VarcharType.VARCHAR; @@ -209,6 +213,7 @@ import static io.prestosql.sql.tree.WindowFrame.Type.RANGE; import static io.prestosql.type.UnknownType.UNKNOWN; import static java.lang.Math.toIntExact; +import static java.lang.String.format; import static java.util.Collections.emptyList; import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; @@ -402,6 +407,49 @@ protected Scope visitDelete(Delete node, Optional scope) return createAndAssignScope(node, scope, Field.newUnqualified("rows", BIGINT)); } + @Override + protected Scope visitAnalyze(Analyze node, Optional scope) + { + analysis.setUpdateType("ANALYZE"); + QualifiedObjectName tableName = createQualifiedObjectName(session, node, node.getTableName()); + + // verify the target table exists and it's not a view + if (metadata.getView(session, tableName).isPresent()) { + throw new SemanticException(NOT_SUPPORTED, node, "Analyzing views is not supported"); + } + + validateProperties(node.getProperties(), scope); + ConnectorId connectorId = metadata.getCatalogHandle(session, tableName.getCatalogName()) + .orElseThrow(() -> new PrestoException(NOT_FOUND, "Catalog not found: " + tableName.getCatalogName())); + + Map analyzeProperties = metadata.getAnalyzePropertyManager().getProperties( + connectorId, + connectorId.getCatalogName(), + mapFromProperties(node.getProperties()), + session, + metadata, + analysis.getParameters()); + TableHandle tableHandle = metadata.getTableHandleForStatisticsCollection(session, tableName, analyzeProperties) + .orElseThrow(() -> (new SemanticException(MISSING_TABLE, node, "Table '%s' does not exist", tableName))); + + // user must have read and insert permission in order to analyze stats of a table + analysis.addTableColumnReferences( + accessControl, + session.getIdentity(), + ImmutableMultimap.builder() + .putAll(tableName, metadata.getColumnHandles(session, tableHandle).keySet()) + .build()); + try { + accessControl.checkCanInsertIntoTable(session.getRequiredTransactionId(), session.getIdentity(), tableName); + } + catch (AccessDeniedException exception) { + throw new AccessDeniedException(format("Cannot ANALYZE (missing insert privilege) table %s", tableName)); + } + + analysis.setAnalyzeTarget(tableHandle); + return createAndAssignScope(node, scope, Field.newUnqualified("rows", BIGINT)); + } + @Override protected Scope visitCreateTableAsSelect(CreateTableAsSelect node, Optional scope) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/DistributedExecutionPlanner.java b/presto-main/src/main/java/io/prestosql/sql/planner/DistributedExecutionPlanner.java index 92322714998f..eef8f8e602c5 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/DistributedExecutionPlanner.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/DistributedExecutionPlanner.java @@ -46,6 +46,7 @@ import io.prestosql.sql.planner.plan.SemiJoinNode; import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -336,6 +337,12 @@ public Map visitTableFinish(TableFinishNode node, Void return node.getSource().accept(this, context); } + @Override + public Map visitStatisticsWriterNode(StatisticsWriterNode node, Void context) + { + return node.getSource().accept(this, context); + } + @Override public Map visitDelete(DeleteNode node, Void context) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/LocalExecutionPlanner.java b/presto-main/src/main/java/io/prestosql/sql/planner/LocalExecutionPlanner.java index 0bed69e9e372..56523947c13d 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/LocalExecutionPlanner.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/LocalExecutionPlanner.java @@ -84,6 +84,7 @@ import io.prestosql.operator.SpatialIndexBuilderOperator.SpatialPredicate; import io.prestosql.operator.SpatialJoinOperator.SpatialJoinOperatorFactory; import io.prestosql.operator.StageExecutionStrategy; +import io.prestosql.operator.StatisticsWriterOperator.StatisticsWriterOperatorFactory; import io.prestosql.operator.StreamingAggregationOperator.StreamingAggregationOperatorFactory; import io.prestosql.operator.TableScanOperator.TableScanOperatorFactory; import io.prestosql.operator.TaskContext; @@ -167,6 +168,7 @@ import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; import io.prestosql.sql.planner.plan.StatisticAggregationsDescriptor; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -2244,6 +2246,22 @@ public PhysicalOperation visitTableWriter(TableWriterNode node, LocalExecutionPl return new PhysicalOperation(operatorFactory, outputMapping.build(), context, source); } + @Override + public PhysicalOperation visitStatisticsWriterNode(StatisticsWriterNode node, LocalExecutionPlanContext context) + { + PhysicalOperation source = node.getSource().accept(this, context); + + StatisticAggregationsDescriptor descriptor = node.getDescriptor().map(symbol -> source.getLayout().get(symbol)); + + OperatorFactory operatorFactory = new StatisticsWriterOperatorFactory( + context.getNextOperatorId(), + node.getId(), + computedStatistics -> metadata.finishStatisticsCollection(session, ((StatisticsWriterNode.WriteStatisticsHandle) node.getTarget()).getHandle(), computedStatistics), + node.isRowCountEnabled(), + descriptor); + return new PhysicalOperation(operatorFactory, makeLayout(node), context, source); + } + @Override public PhysicalOperation visitTableFinish(TableFinishNode node, LocalExecutionPlanContext context) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/LogicalPlanner.java b/presto-main/src/main/java/io/prestosql/sql/planner/LogicalPlanner.java index fb4f96290c98..557685f6d1e8 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/LogicalPlanner.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/LogicalPlanner.java @@ -14,6 +14,7 @@ package io.prestosql.sql.planner; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.prestosql.Session; import io.prestosql.connector.ConnectorId; @@ -28,6 +29,7 @@ import io.prestosql.metadata.Metadata; import io.prestosql.metadata.NewTableLayout; import io.prestosql.metadata.QualifiedObjectName; +import io.prestosql.metadata.TableHandle; import io.prestosql.metadata.TableMetadata; import io.prestosql.spi.PrestoException; import io.prestosql.spi.connector.ColumnHandle; @@ -43,6 +45,7 @@ import io.prestosql.sql.parser.SqlParser; import io.prestosql.sql.planner.StatisticsAggregationPlanner.TableStatisticAggregation; import io.prestosql.sql.planner.optimizations.PlanOptimizer; +import io.prestosql.sql.planner.plan.AggregationNode; import io.prestosql.sql.planner.plan.Assignments; import io.prestosql.sql.planner.plan.DeleteNode; import io.prestosql.sql.planner.plan.ExplainAnalyzeNode; @@ -51,10 +54,13 @@ import io.prestosql.sql.planner.plan.PlanNode; import io.prestosql.sql.planner.plan.ProjectNode; import io.prestosql.sql.planner.plan.StatisticAggregations; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; +import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; import io.prestosql.sql.planner.plan.ValuesNode; import io.prestosql.sql.planner.sanity.PlanSanityChecker; +import io.prestosql.sql.tree.Analyze; import io.prestosql.sql.tree.Cast; import io.prestosql.sql.tree.CreateTableAsSelect; import io.prestosql.sql.tree.Delete; @@ -84,8 +90,10 @@ import static com.google.common.collect.Streams.zip; import static io.prestosql.spi.StandardErrorCode.NOT_FOUND; import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED; +import static io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT; import static io.prestosql.spi.type.BigintType.BIGINT; import static io.prestosql.spi.type.VarbinaryType.VARBINARY; +import static io.prestosql.sql.planner.plan.AggregationNode.singleGroupingSet; import static io.prestosql.sql.planner.plan.TableWriterNode.CreateName; import static io.prestosql.sql.planner.plan.TableWriterNode.InsertReference; import static io.prestosql.sql.planner.plan.TableWriterNode.WriterTarget; @@ -195,6 +203,9 @@ private RelationPlan planStatementWithoutOutput(Analysis analysis, Statement sta } return createTableCreationPlan(analysis, ((CreateTableAsSelect) statement).getQuery()); } + else if (statement instanceof Analyze) { + return createAnalyzePlan(analysis, (Analyze) statement); + } else if (statement instanceof Insert) { checkState(analysis.getInsert().isPresent(), "Insert handle is missing"); return createInsertPlan(analysis, (Insert) statement); @@ -223,6 +234,50 @@ private RelationPlan createExplainAnalyzePlan(Analysis analysis, Explain stateme return new RelationPlan(root, scope, ImmutableList.of(outputSymbol)); } + private RelationPlan createAnalyzePlan(Analysis analysis, Analyze analyzeStatement) + { + TableHandle targetTable = analysis.getAnalyzeTarget().get(); + + // Plan table scan + Map columnHandles = metadata.getColumnHandles(session, targetTable); + ImmutableList.Builder tableScanOutputs = ImmutableList.builder(); + ImmutableMap.Builder symbolToColumnHandle = ImmutableMap.builder(); + ImmutableMap.Builder columnNameToSymbol = ImmutableMap.builder(); + TableMetadata tableMetadata = metadata.getTableMetadata(session, targetTable); + for (ColumnMetadata column : tableMetadata.getColumns()) { + Symbol symbol = symbolAllocator.newSymbol(column.getName(), column.getType()); + tableScanOutputs.add(symbol); + symbolToColumnHandle.put(symbol, columnHandles.get(column.getName())); + columnNameToSymbol.put(column.getName(), symbol); + } + + TableStatisticsMetadata tableStatisticsMetadata = metadata.getStatisticsCollectionMetadata( + session, + targetTable.getConnectorId().getCatalogName(), + tableMetadata.getMetadata()); + + TableStatisticAggregation tableStatisticAggregation = statisticsAggregationPlanner.createStatisticsAggregation(tableStatisticsMetadata, columnNameToSymbol.build()); + StatisticAggregations statisticAggregations = tableStatisticAggregation.getAggregations(); + List groupingSymbols = statisticAggregations.getGroupingSymbols(); + + PlanNode planNode = new StatisticsWriterNode( + idAllocator.getNextId(), + new AggregationNode( + idAllocator.getNextId(), + new TableScanNode(idAllocator.getNextId(), targetTable, tableScanOutputs.build(), symbolToColumnHandle.build()), + statisticAggregations.getAggregations(), + singleGroupingSet(groupingSymbols), + ImmutableList.of(), + AggregationNode.Step.SINGLE, + Optional.empty(), + Optional.empty()), + new StatisticsWriterNode.WriteStatisticsReference(targetTable), + symbolAllocator.newSymbol("rows", BIGINT), + tableStatisticsMetadata.getTableStatistics().contains(ROW_COUNT), + tableStatisticAggregation.getDescriptor()); + return new RelationPlan(planNode, analysis.getScope(analyzeStatement), planNode.getOutputSymbols()); + } + private RelationPlan createTableCreationPlan(Analysis analysis, Query query) { QualifiedObjectName destination = analysis.getCreateTableDestination().get(); @@ -242,7 +297,7 @@ private RelationPlan createTableCreationPlan(Analysis analysis, Query query) .map(ColumnMetadata::getName) .collect(toImmutableList()); - TableStatisticsMetadata statisticsMetadata = metadata.getStatisticsCollectionMetadata(session, destination.getCatalogName(), tableMetadata); + TableStatisticsMetadata statisticsMetadata = metadata.getStatisticsCollectionMetadataForWrite(session, destination.getCatalogName(), tableMetadata); return createTableWriterPlan( analysis, @@ -305,7 +360,7 @@ private RelationPlan createInsertPlan(Analysis analysis, Insert insertStatement) Optional newTableLayout = metadata.getInsertLayout(session, insert.getTarget()); String catalogName = insert.getTarget().getConnectorId().getCatalogName(); - TableStatisticsMetadata statisticsMetadata = metadata.getStatisticsCollectionMetadata(session, catalogName, tableMetadata.getMetadata()); + TableStatisticsMetadata statisticsMetadata = metadata.getStatisticsCollectionMetadataForWrite(session, catalogName, tableMetadata.getMetadata()); return createTableWriterPlan( analysis, diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/PlanFragmenter.java b/presto-main/src/main/java/io/prestosql/sql/planner/PlanFragmenter.java index 49e57415a146..0461ab593461 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/PlanFragmenter.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/PlanFragmenter.java @@ -40,6 +40,7 @@ import io.prestosql.sql.planner.plan.PlanVisitor; import io.prestosql.sql.planner.plan.RemoteSourceNode; import io.prestosql.sql.planner.plan.SimplePlanRewriter; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -243,6 +244,13 @@ public PlanNode visitExplainAnalyze(ExplainAnalyzeNode node, RewriteContext context) + { + context.get().setCoordinatorOnlyDistribution(); + return context.defaultRewrite(node, context.get()); + } + @Override public PlanNode visitTableFinish(TableFinishNode node, RewriteContext context) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/AddExchanges.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/AddExchanges.java index 368d7b0b5a20..e6d698166cc6 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/AddExchanges.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/AddExchanges.java @@ -64,6 +64,7 @@ import io.prestosql.sql.planner.plan.SemiJoinNode; import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -586,6 +587,25 @@ public PlanWithProperties visitExplainAnalyze(ExplainAnalyzeNode node, Preferred return rebaseAndDeriveProperties(node, child); } + @Override + public PlanWithProperties visitStatisticsWriterNode(StatisticsWriterNode node, PreferredProperties context) + { + PlanWithProperties child = planChild(node, PreferredProperties.any()); + + // if the child is already a gathering exchange, don't add another + if ((child.getNode() instanceof ExchangeNode) && ((ExchangeNode) child.getNode()).getType().equals(GATHER)) { + return rebaseAndDeriveProperties(node, child); + } + + if (!child.getProperties().isCoordinatorOnly()) { + child = withDerivedProperties( + gatheringExchange(idAllocator.getNextId(), REMOTE, child.getNode()), + child.getProperties()); + } + + return rebaseAndDeriveProperties(node, child); + } + @Override public PlanWithProperties visitTableFinish(TableFinishNode node, PreferredProperties preferredProperties) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/AddLocalExchanges.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/AddLocalExchanges.java index 26df1bccbef3..50ad56de998c 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/AddLocalExchanges.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/AddLocalExchanges.java @@ -49,6 +49,7 @@ import io.prestosql.sql.planner.plan.SemiJoinNode; import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableWriterNode; import io.prestosql.sql.planner.plan.TopNNode; @@ -191,6 +192,14 @@ public PlanWithProperties visitSort(SortNode node, StreamPreferredProperties par return planAndEnforceChildren(node, singleStream(), defaultParallelism(session)); } + @Override + public PlanWithProperties visitStatisticsWriterNode(StatisticsWriterNode node, StreamPreferredProperties context) + { + // analyze finish requires that all data be in one stream + // this node changes the input organization completely, so we do not pass through parent preferences + return planAndEnforceChildren(node, singleStream(), defaultParallelism(session)); + } + @Override public PlanWithProperties visitTableFinish(TableFinishNode node, StreamPreferredProperties parentPreferences) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/BeginTableWrite.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/BeginTableWrite.java index 65cbb1c1220c..7057b2344e27 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/BeginTableWrite.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/BeginTableWrite.java @@ -35,6 +35,7 @@ import io.prestosql.sql.planner.plan.ProjectNode; import io.prestosql.sql.planner.plan.SemiJoinNode; import io.prestosql.sql.planner.plan.SimplePlanRewriter; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -116,6 +117,24 @@ public PlanNode visitDelete(DeleteNode node, RewriteContext context) node.getOutputSymbols()); } + @Override + public PlanNode visitStatisticsWriterNode(StatisticsWriterNode node, RewriteContext context) + { + PlanNode child = node.getSource(); + child = child.accept(this, context); + + StatisticsWriterNode.WriteStatisticsHandle analyzeHandle = + new StatisticsWriterNode.WriteStatisticsHandle(metadata.beginStatisticsCollection(session, ((StatisticsWriterNode.WriteStatisticsReference) node.getTarget()).getHandle())); + + return new StatisticsWriterNode( + node.getId(), + child, + analyzeHandle, + node.getRowCountSymbol(), + node.isRowCountEnabled(), + node.getDescriptor()); + } + @Override public PlanNode visitTableFinish(TableFinishNode node, RewriteContext context) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/PropertyDerivations.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/PropertyDerivations.java index 0b13a5a6d330..dd0a7e73dd30 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/PropertyDerivations.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/PropertyDerivations.java @@ -65,6 +65,7 @@ import io.prestosql.sql.planner.plan.SemiJoinNode; import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -364,6 +365,14 @@ public ActualProperties visitDistinctLimit(DistinctLimitNode node, List context) + { + return ActualProperties.builder() + .global(coordinatorSingleStreamPartition()) + .build(); + } + @Override public ActualProperties visitTableFinish(TableFinishNode node, List inputProperties) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/PruneUnreferencedOutputs.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/PruneUnreferencedOutputs.java index 1d4c1e41df0c..3e0b5ef191a9 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/PruneUnreferencedOutputs.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/PruneUnreferencedOutputs.java @@ -58,6 +58,7 @@ import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; import io.prestosql.sql.planner.plan.StatisticAggregations; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -653,6 +654,19 @@ public PlanNode visitTableWriter(TableWriterNode node, RewriteContext> context) + { + PlanNode source = context.rewrite(node.getSource(), ImmutableSet.copyOf(node.getSource().getOutputSymbols())); + return new StatisticsWriterNode( + node.getId(), + source, + node.getTarget(), + node.getRowCountSymbol(), + node.isRowCountEnabled(), + node.getDescriptor()); + } + @Override public PlanNode visitTableFinish(TableFinishNode node, RewriteContext> context) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/StreamPropertyDerivations.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/StreamPropertyDerivations.java index 4bc7d6a33f7f..3ec90944eb5a 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/StreamPropertyDerivations.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/StreamPropertyDerivations.java @@ -52,6 +52,7 @@ import io.prestosql.sql.planner.plan.SemiJoinNode; import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -377,6 +378,14 @@ public StreamProperties visitAggregation(AggregationNode node, List node.getGroupingKeys().contains(symbol) ? Optional.of(symbol) : Optional.empty()); } + @Override + public StreamProperties visitStatisticsWriterNode(StatisticsWriterNode node, List inputProperties) + { + StreamProperties properties = Iterables.getOnlyElement(inputProperties); + // analyze finish only outputs row count + return properties.withUnspecifiedPartitioning(); + } + @Override public StreamProperties visitTableFinish(TableFinishNode node, List inputProperties) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/SymbolMapper.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/SymbolMapper.java index 55f7a331c255..63c5740abe77 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/SymbolMapper.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/SymbolMapper.java @@ -26,6 +26,7 @@ import io.prestosql.sql.planner.plan.PlanNodeId; import io.prestosql.sql.planner.plan.StatisticAggregations; import io.prestosql.sql.planner.plan.StatisticAggregationsDescriptor; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableWriterNode; import io.prestosql.sql.planner.plan.TopNNode; @@ -163,6 +164,17 @@ public TableWriterNode map(TableWriterNode node, PlanNode source, PlanNodeId new node.getStatisticsAggregationDescriptor().map(this::map)); } + public StatisticsWriterNode map(StatisticsWriterNode node, PlanNode source) + { + return new StatisticsWriterNode( + node.getId(), + source, + node.getTarget(), + node.getRowCountSymbol(), + node.isRowCountEnabled(), + node.getDescriptor().map(this::map)); + } + public TableFinishNode map(TableFinishNode node, PlanNode source) { return new TableFinishNode( diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java index 7533157584f0..78fab3929edf 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java @@ -61,6 +61,7 @@ import io.prestosql.sql.planner.plan.SimplePlanRewriter; import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -361,6 +362,14 @@ public PlanNode visitDelete(DeleteNode node, RewriteContext context) return new DeleteNode(node.getId(), context.rewrite(node.getSource()), node.getTarget(), canonicalize(node.getRowId()), node.getOutputSymbols()); } + @Override + public PlanNode visitStatisticsWriterNode(StatisticsWriterNode node, RewriteContext context) + { + PlanNode source = context.rewrite(node.getSource()); + SymbolMapper mapper = new SymbolMapper(mapping); + return mapper.map(node, source); + } + @Override public PlanNode visitTableFinish(TableFinishNode node, RewriteContext context) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/plan/PlanNode.java b/presto-main/src/main/java/io/prestosql/sql/planner/plan/PlanNode.java index 0b97ca58a5cf..0b630a31451a 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/plan/PlanNode.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/plan/PlanNode.java @@ -61,7 +61,9 @@ @JsonSubTypes.Type(value = ExplainAnalyzeNode.class, name = "explainAnalyze"), @JsonSubTypes.Type(value = ApplyNode.class, name = "apply"), @JsonSubTypes.Type(value = AssignUniqueId.class, name = "assignUniqueId"), - @JsonSubTypes.Type(value = LateralJoinNode.class, name = "lateralJoin")}) + @JsonSubTypes.Type(value = LateralJoinNode.class, name = "lateralJoin"), + @JsonSubTypes.Type(value = StatisticsWriterNode.class, name = "statisticsWriterNode"), +}) public abstract class PlanNode { private final PlanNodeId id; diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/plan/PlanVisitor.java b/presto-main/src/main/java/io/prestosql/sql/planner/plan/PlanVisitor.java index 7317d3c257ee..91c56f6af997 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/plan/PlanVisitor.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/plan/PlanVisitor.java @@ -134,6 +134,11 @@ public R visitTableFinish(TableFinishNode node, C context) return visitPlan(node, context); } + public R visitStatisticsWriterNode(StatisticsWriterNode node, C context) + { + return visitPlan(node, context); + } + public R visitUnion(UnionNode node, C context) { return visitPlan(node, context); diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/plan/StatisticsWriterNode.java b/presto-main/src/main/java/io/prestosql/sql/planner/plan/StatisticsWriterNode.java new file mode 100644 index 000000000000..be18624363ec --- /dev/null +++ b/presto-main/src/main/java/io/prestosql/sql/planner/plan/StatisticsWriterNode.java @@ -0,0 +1,172 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.sql.planner.plan; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import io.prestosql.metadata.AnalyzeTableHandle; +import io.prestosql.metadata.TableHandle; +import io.prestosql.sql.planner.Symbol; + +import java.util.List; + +import static java.util.Objects.requireNonNull; + +public class StatisticsWriterNode + extends PlanNode +{ + private final PlanNode source; + private final Symbol rowCountSymbol; + private final WriteStatisticsTarget target; + private final boolean rowCountEnabled; + private final StatisticAggregationsDescriptor descriptor; + + @JsonCreator + public StatisticsWriterNode( + @JsonProperty("id") PlanNodeId id, + @JsonProperty("source") PlanNode source, + @JsonProperty("target") WriteStatisticsTarget target, + @JsonProperty("rowCountSymbol") Symbol rowCountSymbol, + @JsonProperty("rowCountEnabled") boolean rowCountEnabled, + @JsonProperty("descriptor") StatisticAggregationsDescriptor descriptor) + { + super(id); + this.source = requireNonNull(source, "source is null"); + this.target = requireNonNull(target, "target is null"); + this.rowCountSymbol = requireNonNull(rowCountSymbol, "rowCountSymbol is null"); + this.rowCountEnabled = rowCountEnabled; + this.descriptor = requireNonNull(descriptor, "descriptor is null"); + } + + @JsonProperty + public PlanNode getSource() + { + return source; + } + + @JsonProperty + public WriteStatisticsTarget getTarget() + { + return target; + } + + @JsonProperty + public StatisticAggregationsDescriptor getDescriptor() + { + return descriptor; + } + + @JsonProperty + public Symbol getRowCountSymbol() + { + return rowCountSymbol; + } + + @JsonProperty + public boolean isRowCountEnabled() + { + return rowCountEnabled; + } + + @Override + public List getSources() + { + return ImmutableList.of(source); + } + + @Override + public List getOutputSymbols() + { + return ImmutableList.of(rowCountSymbol); + } + + @Override + public PlanNode replaceChildren(List newChildren) + { + return new StatisticsWriterNode( + getId(), + Iterables.getOnlyElement(newChildren), + target, + rowCountSymbol, + rowCountEnabled, + descriptor); + } + + @Override + public R accept(PlanVisitor visitor, C context) + { + return visitor.visitStatisticsWriterNode(this, context); + } + + @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "@type") + @JsonSubTypes({ + @JsonSubTypes.Type(value = WriteStatisticsHandle.class, name = "WriteStatisticsHandle")}) + @SuppressWarnings({"EmptyClass", "ClassMayBeInterface"}) + public abstract static class WriteStatisticsTarget + { + @Override + public abstract String toString(); + } + + public static class WriteStatisticsHandle + extends WriteStatisticsTarget + { + private final AnalyzeTableHandle handle; + + @JsonCreator + public WriteStatisticsHandle(@JsonProperty("handle") AnalyzeTableHandle handle) + { + this.handle = requireNonNull(handle, "handle is null"); + } + + @JsonProperty + public AnalyzeTableHandle getHandle() + { + return handle; + } + + @Override + public String toString() + { + return handle.toString(); + } + } + + // only used during planning -- will not be serialized + public static class WriteStatisticsReference + extends WriteStatisticsTarget + { + private final TableHandle handle; + + public WriteStatisticsReference(TableHandle handle) + { + this.handle = requireNonNull(handle, "handle is null"); + } + + public TableHandle getHandle() + { + return handle; + } + + @Override + public String toString() + { + return handle.toString(); + } + } +} diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/planPrinter/PlanPrinter.java b/presto-main/src/main/java/io/prestosql/sql/planner/planPrinter/PlanPrinter.java index bf0330c9f6e9..b79ad607ef29 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/planPrinter/PlanPrinter.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/planPrinter/PlanPrinter.java @@ -90,6 +90,7 @@ import io.prestosql.sql.planner.plan.SpatialJoinNode; import io.prestosql.sql.planner.plan.StatisticAggregations; import io.prestosql.sql.planner.plan.StatisticAggregationsDescriptor; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -1121,6 +1122,15 @@ public Void visitTableWriter(TableWriterNode node, Integer indent) return processChildren(node, indent + 1); } + @Override + public Void visitStatisticsWriterNode(StatisticsWriterNode node, Integer indent) + { + print(indent, "- StatisticsWriterNode[%s] => [%s]", node.getTarget(), formatOutputs(node.getOutputSymbols())); + printPlanNodesStatsAndCost(indent + 2, node); + printStats(indent + 2, node.getId()); + return processChildren(node, indent + 1); + } + @Override public Void visitTableFinish(TableFinishNode node, Integer indent) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/sanity/ValidateDependenciesChecker.java b/presto-main/src/main/java/io/prestosql/sql/planner/sanity/ValidateDependenciesChecker.java index b68cf7d3feba..56a8175897b2 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/sanity/ValidateDependenciesChecker.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/sanity/ValidateDependenciesChecker.java @@ -53,6 +53,8 @@ import io.prestosql.sql.planner.plan.SetOperationNode; import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; +import io.prestosql.sql.planner.plan.StatisticAggregationsDescriptor; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -528,6 +530,22 @@ public Void visitMetadataDelete(MetadataDeleteNode node, Set boundSymbol return null; } + @Override + public Void visitStatisticsWriterNode(StatisticsWriterNode node, Set boundSymbols) + { + node.getSource().accept(this, boundSymbols); // visit child + + StatisticAggregationsDescriptor descriptor = node.getDescriptor(); + Set dependencies = ImmutableSet.builder() + .addAll(descriptor.getGrouping().values()) + .addAll(descriptor.getColumnStatistics().values()) + .addAll(descriptor.getTableStatistics().values()) + .build(); + List outputSymbols = node.getSource().getOutputSymbols(); + checkDependencies(dependencies, dependencies, "Invalid node. Dependencies (%s) not in source plan output (%s)", dependencies, outputSymbols); + return null; + } + @Override public Void visitTableFinish(TableFinishNode node, Set boundSymbols) { diff --git a/presto-main/src/main/java/io/prestosql/testing/LocalQueryRunner.java b/presto-main/src/main/java/io/prestosql/testing/LocalQueryRunner.java index 82d9a2061656..ad142d9658c3 100644 --- a/presto-main/src/main/java/io/prestosql/testing/LocalQueryRunner.java +++ b/presto-main/src/main/java/io/prestosql/testing/LocalQueryRunner.java @@ -71,6 +71,7 @@ import io.prestosql.execution.warnings.WarningCollector; import io.prestosql.index.IndexManager; import io.prestosql.memory.MemoryManagerConfig; +import io.prestosql.metadata.AnalyzePropertyManager; import io.prestosql.metadata.CatalogManager; import io.prestosql.metadata.ColumnPropertyManager; import io.prestosql.metadata.HandleResolver; @@ -304,6 +305,7 @@ private LocalQueryRunner(Session defaultSession, FeaturesConfig featuresConfig, new SchemaPropertyManager(), new TablePropertyManager(), new ColumnPropertyManager(), + new AnalyzePropertyManager(), transactionManager); this.planFragmenter = new PlanFragmenter(this.metadata, this.nodePartitioningManager, new QueryManagerConfig()); this.joinCompiler = new JoinCompiler(metadata, featuresConfig); diff --git a/presto-main/src/main/java/io/prestosql/testing/TestingMetadata.java b/presto-main/src/main/java/io/prestosql/testing/TestingMetadata.java index bc9fa5e860ca..5f6a8e1bf6e7 100644 --- a/presto-main/src/main/java/io/prestosql/testing/TestingMetadata.java +++ b/presto-main/src/main/java/io/prestosql/testing/TestingMetadata.java @@ -86,6 +86,12 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable return new TestingTableHandle(tableName); } + @Override + public ConnectorTableHandle getTableHandleForStatisticsCollection(ConnectorSession session, SchemaTableName tableName, Map analyzeProperties) + { + return getTableHandle(session, tableName); + } + @Override public List getTableLayouts(ConnectorSession session, ConnectorTableHandle table, Constraint constraint, Optional> desiredColumns) { diff --git a/presto-main/src/main/java/io/prestosql/util/GraphvizPrinter.java b/presto-main/src/main/java/io/prestosql/util/GraphvizPrinter.java index 05d02ad5949f..ed4d152fdbf0 100644 --- a/presto-main/src/main/java/io/prestosql/util/GraphvizPrinter.java +++ b/presto-main/src/main/java/io/prestosql/util/GraphvizPrinter.java @@ -48,6 +48,7 @@ import io.prestosql.sql.planner.plan.SemiJoinNode; import io.prestosql.sql.planner.plan.SortNode; import io.prestosql.sql.planner.plan.SpatialJoinNode; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode; @@ -96,7 +97,8 @@ private enum NodeType TABLE_WRITER, TABLE_FINISH, INDEX_SOURCE, - UNNEST + UNNEST, + ANALYZE_FINISH, } private static final Map NODE_COLORS = immutableEnumMap(ImmutableMap.builder() @@ -120,6 +122,7 @@ private enum NodeType .put(NodeType.INDEX_SOURCE, "dodgerblue3") .put(NodeType.UNNEST, "crimson") .put(NodeType.SAMPLE, "goldenrod4") + .put(NodeType.ANALYZE_FINISH, "plum") .build()); static { @@ -224,6 +227,13 @@ public Void visitTableWriter(TableWriterNode node, Void context) return node.getSource().accept(this, context); } + @Override + public Void visitStatisticsWriterNode(StatisticsWriterNode node, Void context) + { + printNode(node, format("StatisticsWriterNode[%s]", Joiner.on(", ").join(node.getOutputSymbols())), NODE_COLORS.get(NodeType.ANALYZE_FINISH)); + return node.getSource().accept(this, context); + } + @Override public Void visitTableFinish(TableFinishNode node, Void context) { diff --git a/presto-main/src/main/java/io/prestosql/util/StatementUtils.java b/presto-main/src/main/java/io/prestosql/util/StatementUtils.java index 97860f7542c9..bdeba52c81a8 100644 --- a/presto-main/src/main/java/io/prestosql/util/StatementUtils.java +++ b/presto-main/src/main/java/io/prestosql/util/StatementUtils.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableMap; import io.prestosql.spi.resourcegroups.QueryType; import io.prestosql.sql.tree.AddColumn; +import io.prestosql.sql.tree.Analyze; import io.prestosql.sql.tree.Call; import io.prestosql.sql.tree.Commit; import io.prestosql.sql.tree.CreateRole; @@ -77,6 +78,7 @@ private StatementUtils() {} builder.put(Query.class, QueryType.SELECT); builder.put(Explain.class, QueryType.EXPLAIN); + builder.put(Analyze.class, QueryType.ANALYZE); builder.put(CreateTableAsSelect.class, QueryType.INSERT); builder.put(Insert.class, QueryType.INSERT); diff --git a/presto-main/src/test/java/io/prestosql/execution/TestResetSessionTask.java b/presto-main/src/test/java/io/prestosql/execution/TestResetSessionTask.java index 7d27e2a2accf..f44723e8115b 100644 --- a/presto-main/src/test/java/io/prestosql/execution/TestResetSessionTask.java +++ b/presto-main/src/test/java/io/prestosql/execution/TestResetSessionTask.java @@ -18,6 +18,7 @@ import io.prestosql.Session; import io.prestosql.block.BlockEncodingManager; import io.prestosql.execution.warnings.WarningCollector; +import io.prestosql.metadata.AnalyzePropertyManager; import io.prestosql.metadata.Catalog; import io.prestosql.metadata.CatalogManager; import io.prestosql.metadata.ColumnPropertyManager; @@ -72,6 +73,7 @@ public TestResetSessionTask() new SchemaPropertyManager(), new TablePropertyManager(), new ColumnPropertyManager(), + new AnalyzePropertyManager(), transactionManager); metadata.getSessionPropertyManager().addSystemSessionProperty(stringProperty( diff --git a/presto-main/src/test/java/io/prestosql/execution/TestSetPathTask.java b/presto-main/src/test/java/io/prestosql/execution/TestSetPathTask.java index 90cac1f56bd2..e5e852b8655c 100644 --- a/presto-main/src/test/java/io/prestosql/execution/TestSetPathTask.java +++ b/presto-main/src/test/java/io/prestosql/execution/TestSetPathTask.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableList; import io.prestosql.block.BlockEncodingManager; import io.prestosql.execution.warnings.WarningCollector; +import io.prestosql.metadata.AnalyzePropertyManager; import io.prestosql.metadata.CatalogManager; import io.prestosql.metadata.ColumnPropertyManager; import io.prestosql.metadata.MetadataManager; @@ -70,6 +71,7 @@ public TestSetPathTask() new SchemaPropertyManager(), new TablePropertyManager(), new ColumnPropertyManager(), + new AnalyzePropertyManager(), transactionManager); } diff --git a/presto-main/src/test/java/io/prestosql/execution/TestSetRoleTask.java b/presto-main/src/test/java/io/prestosql/execution/TestSetRoleTask.java index 53f469d31ba5..e93beac922f7 100644 --- a/presto-main/src/test/java/io/prestosql/execution/TestSetRoleTask.java +++ b/presto-main/src/test/java/io/prestosql/execution/TestSetRoleTask.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap; import io.prestosql.block.BlockEncodingManager; import io.prestosql.execution.warnings.WarningCollector; +import io.prestosql.metadata.AnalyzePropertyManager; import io.prestosql.metadata.CatalogManager; import io.prestosql.metadata.ColumnPropertyManager; import io.prestosql.metadata.MetadataManager; @@ -78,6 +79,7 @@ public void setUp() new SchemaPropertyManager(), new TablePropertyManager(), new ColumnPropertyManager(), + new AnalyzePropertyManager(), transactionManager); catalogManager.registerCatalog(createBogusTestingCatalog(CATALOG_NAME)); diff --git a/presto-main/src/test/java/io/prestosql/execution/TestSetSessionTask.java b/presto-main/src/test/java/io/prestosql/execution/TestSetSessionTask.java index 5aeba6197982..824267881fc5 100644 --- a/presto-main/src/test/java/io/prestosql/execution/TestSetSessionTask.java +++ b/presto-main/src/test/java/io/prestosql/execution/TestSetSessionTask.java @@ -17,6 +17,7 @@ import com.google.common.collect.ImmutableMap; import io.prestosql.block.BlockEncodingManager; import io.prestosql.execution.warnings.WarningCollector; +import io.prestosql.metadata.AnalyzePropertyManager; import io.prestosql.metadata.Catalog; import io.prestosql.metadata.CatalogManager; import io.prestosql.metadata.ColumnPropertyManager; @@ -84,6 +85,7 @@ public TestSetSessionTask() new SchemaPropertyManager(), new TablePropertyManager(), new ColumnPropertyManager(), + new AnalyzePropertyManager(), transactionManager); metadata.getSessionPropertyManager().addSystemSessionProperty(stringProperty( diff --git a/presto-main/src/test/java/io/prestosql/metadata/AbstractMockMetadata.java b/presto-main/src/test/java/io/prestosql/metadata/AbstractMockMetadata.java index ec6eecd15453..5ce0beed4a7f 100644 --- a/presto-main/src/test/java/io/prestosql/metadata/AbstractMockMetadata.java +++ b/presto-main/src/test/java/io/prestosql/metadata/AbstractMockMetadata.java @@ -101,6 +101,12 @@ public Optional getTableHandle(Session session, QualifiedObjectName throw new UnsupportedOperationException(); } + @Override + public Optional getTableHandleForStatisticsCollection(Session session, QualifiedObjectName tableName, Map analyzeProperties) + { + throw new UnsupportedOperationException(); + } + @Override public Optional getSystemTable(Session session, QualifiedObjectName tableName) { @@ -245,12 +251,30 @@ public Optional getInsertLayout(Session session, TableHandle tar throw new UnsupportedOperationException(); } + @Override + public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(Session session, String catalogName, ConnectorTableMetadata tableMetadata) + { + throw new UnsupportedOperationException(); + } + @Override public TableStatisticsMetadata getStatisticsCollectionMetadata(Session session, String catalogName, ConnectorTableMetadata tableMetadata) { throw new UnsupportedOperationException(); } + @Override + public AnalyzeTableHandle beginStatisticsCollection(Session session, TableHandle tableHandle) + { + throw new UnsupportedOperationException(); + } + + @Override + public void finishStatisticsCollection(Session session, AnalyzeTableHandle tableHandle, Collection computedStatistics) + { + throw new UnsupportedOperationException(); + } + @Override public void beginQuery(Session session, Set connectors) { @@ -467,6 +491,12 @@ public ColumnPropertyManager getColumnPropertyManager() throw new UnsupportedOperationException(); } + @Override + public AnalyzePropertyManager getAnalyzePropertyManager() + { + throw new UnsupportedOperationException(); + } + @Override public void dropColumn(Session session, TableHandle tableHandle, ColumnHandle column) { diff --git a/presto-main/src/test/java/io/prestosql/metadata/TestInformationSchemaMetadata.java b/presto-main/src/test/java/io/prestosql/metadata/TestInformationSchemaMetadata.java index 2e9a2b5cb58b..47b1bdf8237c 100644 --- a/presto-main/src/test/java/io/prestosql/metadata/TestInformationSchemaMetadata.java +++ b/presto-main/src/test/java/io/prestosql/metadata/TestInformationSchemaMetadata.java @@ -99,6 +99,7 @@ public TestInformationSchemaMetadata() new SchemaPropertyManager(), new TablePropertyManager(), new ColumnPropertyManager(), + new AnalyzePropertyManager(), transactionManager); } diff --git a/presto-main/src/test/java/io/prestosql/sql/analyzer/TestAnalyzer.java b/presto-main/src/test/java/io/prestosql/sql/analyzer/TestAnalyzer.java index 0c8f4734ce4e..eb192b5301b5 100644 --- a/presto-main/src/test/java/io/prestosql/sql/analyzer/TestAnalyzer.java +++ b/presto-main/src/test/java/io/prestosql/sql/analyzer/TestAnalyzer.java @@ -25,6 +25,7 @@ import io.prestosql.execution.TaskManagerConfig; import io.prestosql.execution.warnings.WarningCollector; import io.prestosql.memory.MemoryManagerConfig; +import io.prestosql.metadata.AnalyzePropertyManager; import io.prestosql.metadata.Catalog; import io.prestosql.metadata.CatalogManager; import io.prestosql.metadata.ColumnPropertyManager; @@ -47,6 +48,7 @@ import io.prestosql.spi.connector.ConnectorTableMetadata; import io.prestosql.spi.connector.ConnectorTransactionHandle; import io.prestosql.spi.connector.SchemaTableName; +import io.prestosql.spi.session.PropertyMetadata; import io.prestosql.spi.transaction.IsolationLevel; import io.prestosql.spi.type.ArrayType; import io.prestosql.spi.type.TypeManager; @@ -60,6 +62,7 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; +import java.util.List; import java.util.Optional; import java.util.function.Consumer; @@ -67,6 +70,8 @@ import static io.prestosql.connector.ConnectorId.createSystemTablesConnectorId; import static io.prestosql.metadata.ViewDefinition.ViewColumn; import static io.prestosql.operator.scalar.ApplyFunction.APPLY_FUNCTION; +import static io.prestosql.spi.session.PropertyMetadata.integerProperty; +import static io.prestosql.spi.session.PropertyMetadata.stringProperty; import static io.prestosql.spi.type.BigintType.BIGINT; import static io.prestosql.spi.type.DoubleType.DOUBLE; import static io.prestosql.spi.type.VarcharType.VARCHAR; @@ -1096,6 +1101,16 @@ public void testCreateTable() assertFails(DUPLICATE_PROPERTY, ".* Duplicate property: p1", "CREATE TABLE test (id bigint) WITH (p1 = 'p1', \"p1\" = 'p2')"); } + @Test + public void testAnalyze() + { + analyze("ANALYZE t1"); + analyze("ANALYZE t1 WITH (p1 = 'p1')"); + + assertFails(DUPLICATE_PROPERTY, ".* Duplicate property: p1", "ANALYZE t1 WITH (p1 = 'p1', p2 = 2, p1 = 'p3')"); + assertFails(DUPLICATE_PROPERTY, ".* Duplicate property: p1", "ANALYZE t1 WITH (p1 = 'p1', \"p1\" = 'p2')"); + } + @Test public void testCreateSchema() { @@ -1500,11 +1515,15 @@ public void setup() new SchemaPropertyManager(), new TablePropertyManager(), new ColumnPropertyManager(), + new AnalyzePropertyManager(), transactionManager); metadata.getFunctionRegistry().addFunctions(ImmutableList.of(APPLY_FUNCTION)); - catalogManager.registerCatalog(createTestingCatalog(TPCH_CATALOG, TPCH_CONNECTOR_ID)); + Catalog tpchTestCatalog = createTestingCatalog(TPCH_CATALOG, TPCH_CONNECTOR_ID); + catalogManager.registerCatalog(tpchTestCatalog); + metadata.getAnalyzePropertyManager().addProperties(TPCH_CONNECTOR_ID, tpchTestCatalog.getConnector(TPCH_CONNECTOR_ID).getAnalyzeProperties()); + catalogManager.registerCatalog(createTestingCatalog(SECOND_CATALOG, SECOND_CONNECTOR_ID)); catalogManager.registerCatalog(createTestingCatalog(THIRD_CATALOG, THIRD_CONNECTOR_ID)); @@ -1768,6 +1787,14 @@ public ConnectorSplitManager getSplitManager() { throw new UnsupportedOperationException(); } + + @Override + public List> getAnalyzeProperties() + { + return ImmutableList.of( + stringProperty("p1", "test string property", "", false), + integerProperty("p2", "test integer property", 0, false)); + } }; } } diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/TestLogicalPlanner.java b/presto-main/src/test/java/io/prestosql/sql/planner/TestLogicalPlanner.java index 34e63e56ce7a..01941181acdd 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/TestLogicalPlanner.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/TestLogicalPlanner.java @@ -33,6 +33,7 @@ import io.prestosql.sql.planner.plan.LateralJoinNode; import io.prestosql.sql.planner.plan.PlanNode; import io.prestosql.sql.planner.plan.SemiJoinNode; +import io.prestosql.sql.planner.plan.StatisticsWriterNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.ValuesNode; import io.prestosql.sql.tree.LongLiteral; @@ -101,6 +102,21 @@ public class TestLogicalPlanner extends BasePlanTest { + @Test + public void testAnalyze() + { + assertDistributedPlan("ANALYZE orders", + anyTree( + node(StatisticsWriterNode.class, + anyTree( + exchange(REMOTE, GATHER, + node(AggregationNode.class, + anyTree( + exchange(REMOTE, GATHER, + node(AggregationNode.class, + tableScan("orders", ImmutableMap.of())))))))))); + } + @Test public void testAggregation() { diff --git a/presto-parser/src/main/antlr4/io/prestosql/sql/parser/SqlBase.g4 b/presto-parser/src/main/antlr4/io/prestosql/sql/parser/SqlBase.g4 index c308fe330b23..5e4f5164cc73 100644 --- a/presto-parser/src/main/antlr4/io/prestosql/sql/parser/SqlBase.g4 +++ b/presto-parser/src/main/antlr4/io/prestosql/sql/parser/SqlBase.g4 @@ -56,6 +56,7 @@ statement DROP COLUMN column=qualifiedName #dropColumn | ALTER TABLE tableName=qualifiedName ADD COLUMN column=columnDefinition #addColumn + | ANALYZE qualifiedName (WITH properties)? #analyze | CREATE (OR REPLACE)? VIEW qualifiedName (SECURITY (DEFINER | INVOKER))? AS query #createView | DROP VIEW (IF EXISTS)? qualifiedName #dropView diff --git a/presto-parser/src/main/java/io/prestosql/sql/SqlFormatter.java b/presto-parser/src/main/java/io/prestosql/sql/SqlFormatter.java index 0a26db29dad1..78da4d51b98c 100644 --- a/presto-parser/src/main/java/io/prestosql/sql/SqlFormatter.java +++ b/presto-parser/src/main/java/io/prestosql/sql/SqlFormatter.java @@ -18,6 +18,7 @@ import io.prestosql.sql.tree.AddColumn; import io.prestosql.sql.tree.AliasedRelation; import io.prestosql.sql.tree.AllColumns; +import io.prestosql.sql.tree.Analyze; import io.prestosql.sql.tree.AstVisitor; import io.prestosql.sql.tree.Call; import io.prestosql.sql.tree.CallArgument; @@ -956,6 +957,15 @@ protected Void visitDropColumn(DropColumn node, Integer context) return null; } + @Override + protected Void visitAnalyze(Analyze node, Integer context) + { + builder.append("ANALYZE ") + .append(formatName(node.getTableName())); + builder.append(formatPropertiesMultiLine(node.getProperties())); + return null; + } + @Override protected Void visitAddColumn(AddColumn node, Integer indent) { diff --git a/presto-parser/src/main/java/io/prestosql/sql/parser/AstBuilder.java b/presto-parser/src/main/java/io/prestosql/sql/parser/AstBuilder.java index 2cd52f6e106a..0b0b3e5d57f9 100644 --- a/presto-parser/src/main/java/io/prestosql/sql/parser/AstBuilder.java +++ b/presto-parser/src/main/java/io/prestosql/sql/parser/AstBuilder.java @@ -20,6 +20,7 @@ import io.prestosql.sql.tree.AddColumn; import io.prestosql.sql.tree.AliasedRelation; import io.prestosql.sql.tree.AllColumns; +import io.prestosql.sql.tree.Analyze; import io.prestosql.sql.tree.ArithmeticBinaryExpression; import io.prestosql.sql.tree.ArithmeticUnaryExpression; import io.prestosql.sql.tree.ArrayConstructor; @@ -365,6 +366,19 @@ public Node visitRenameColumn(SqlBaseParser.RenameColumnContext context) (Identifier) visit(context.to)); } + @Override + public Node visitAnalyze(SqlBaseParser.AnalyzeContext context) + { + List properties = ImmutableList.of(); + if (context.properties() != null) { + properties = visit(context.properties().property(), Property.class); + } + return new Analyze( + getLocation(context), + getQualifiedName(context.qualifiedName()), + properties); + } + @Override public Node visitAddColumn(SqlBaseParser.AddColumnContext context) { diff --git a/presto-parser/src/main/java/io/prestosql/sql/tree/Analyze.java b/presto-parser/src/main/java/io/prestosql/sql/tree/Analyze.java new file mode 100644 index 000000000000..cdd07ec945c5 --- /dev/null +++ b/presto-parser/src/main/java/io/prestosql/sql/tree/Analyze.java @@ -0,0 +1,99 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.sql.tree; + +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class Analyze + extends Statement +{ + private final QualifiedName tableName; + private final List properties; + + public Analyze(QualifiedName tableName, List properties) + { + this(Optional.empty(), tableName, properties); + } + + public Analyze(NodeLocation location, QualifiedName tableName, List properties) + { + this(Optional.of(location), tableName, properties); + } + + private Analyze(Optional location, QualifiedName tableName, List properties) + { + super(location); + this.tableName = requireNonNull(tableName, "table is null"); + this.properties = ImmutableList.copyOf(requireNonNull(properties, "properties is null")); + } + + public QualifiedName getTableName() + { + return tableName; + } + + public List getProperties() + { + return properties; + } + + @Override + public R accept(AstVisitor visitor, C context) + { + return visitor.visitAnalyze(this, context); + } + + @Override + public List getChildren() + { + return properties; + } + + @Override + public int hashCode() + { + return Objects.hash(tableName, properties); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + + if ((obj == null) || (getClass() != obj.getClass())) { + return false; + } + Analyze o = (Analyze) obj; + return Objects.equals(tableName, o.tableName) && + Objects.equals(properties, o.properties); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("tableName", tableName) + .add("properties", properties) + .toString(); + } +} diff --git a/presto-parser/src/main/java/io/prestosql/sql/tree/AstVisitor.java b/presto-parser/src/main/java/io/prestosql/sql/tree/AstVisitor.java index 0f1f2c7d7803..2ad27c336c95 100644 --- a/presto-parser/src/main/java/io/prestosql/sql/tree/AstVisitor.java +++ b/presto-parser/src/main/java/io/prestosql/sql/tree/AstVisitor.java @@ -567,6 +567,11 @@ protected R visitAddColumn(AddColumn node, C context) return visitStatement(node, context); } + protected R visitAnalyze(Analyze node, C context) + { + return visitStatement(node, context); + } + protected R visitCreateView(CreateView node, C context) { return visitStatement(node, context); diff --git a/presto-parser/src/main/java/io/prestosql/sql/tree/DefaultTraversalVisitor.java b/presto-parser/src/main/java/io/prestosql/sql/tree/DefaultTraversalVisitor.java index f281104c2b39..0466095d10bb 100644 --- a/presto-parser/src/main/java/io/prestosql/sql/tree/DefaultTraversalVisitor.java +++ b/presto-parser/src/main/java/io/prestosql/sql/tree/DefaultTraversalVisitor.java @@ -544,6 +544,15 @@ protected R visitProperty(Property node, C context) return null; } + @Override + protected R visitAnalyze(Analyze node, C context) + { + for (Property property : node.getProperties()) { + process(property, context); + } + return null; + } + @Override protected R visitCreateView(CreateView node, C context) { diff --git a/presto-parser/src/test/java/io/prestosql/sql/parser/TestSqlParser.java b/presto-parser/src/test/java/io/prestosql/sql/parser/TestSqlParser.java index 4d01036a7061..97ad63f663c2 100644 --- a/presto-parser/src/test/java/io/prestosql/sql/parser/TestSqlParser.java +++ b/presto-parser/src/test/java/io/prestosql/sql/parser/TestSqlParser.java @@ -19,6 +19,7 @@ import io.prestosql.sql.tree.AddColumn; import io.prestosql.sql.tree.AliasedRelation; import io.prestosql.sql.tree.AllColumns; +import io.prestosql.sql.tree.Analyze; import io.prestosql.sql.tree.ArithmeticBinaryExpression; import io.prestosql.sql.tree.ArrayConstructor; import io.prestosql.sql.tree.AtTimeZone; @@ -1338,6 +1339,25 @@ public void testRenameColumn() assertStatement("ALTER TABLE foo.t RENAME COLUMN a TO b", new RenameColumn(QualifiedName.of("foo", "t"), identifier("a"), identifier("b"))); } + @Test + public void testAnalyze() + { + QualifiedName table = QualifiedName.of("foo"); + assertStatement("ANALYZE foo", new Analyze(table, ImmutableList.of())); + + assertStatement("ANALYZE foo WITH ( \"string\" = 'bar', \"long\" = 42, computed = concat('ban', 'ana'), a = ARRAY[ 'v1', 'v2' ] )", + new Analyze(table, ImmutableList.of( + new Property(new Identifier("string"), new StringLiteral("bar")), + new Property(new Identifier("long"), new LongLiteral("42")), + new Property( + new Identifier("computed"), + new FunctionCall(QualifiedName.of("concat"), ImmutableList.of(new StringLiteral("ban"), new StringLiteral("ana")))), + new Property(new Identifier("a"), new ArrayConstructor(ImmutableList.of(new StringLiteral("v1"), new StringLiteral("v2"))))))); + + assertStatement("EXPLAIN ANALYZE foo", new Explain(new Analyze(table, ImmutableList.of()), false, false, ImmutableList.of())); + assertStatement("EXPLAIN ANALYZE ANALYZE foo", new Explain(new Analyze(table, ImmutableList.of()), true, false, ImmutableList.of())); + } + @Test public void testAddColumn() { diff --git a/presto-parser/src/test/java/io/prestosql/sql/parser/TestSqlParserErrorHandling.java b/presto-parser/src/test/java/io/prestosql/sql/parser/TestSqlParserErrorHandling.java index e097530cbf33..238b2e4bfb5a 100644 --- a/presto-parser/src/test/java/io/prestosql/sql/parser/TestSqlParserErrorHandling.java +++ b/presto-parser/src/test/java/io/prestosql/sql/parser/TestSqlParserErrorHandling.java @@ -39,10 +39,10 @@ public Object[][] getStatements() { return new Object[][] { {"", - "line 1:1: mismatched input ''. Expecting: 'ALTER', 'CALL', 'COMMIT', 'CREATE', 'DEALLOCATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXECUTE', 'EXPLAIN', 'GRANT', " + + "line 1:1: mismatched input ''. Expecting: 'ALTER', 'ANALYZE', 'CALL', 'COMMIT', 'CREATE', 'DEALLOCATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXECUTE', 'EXPLAIN', 'GRANT', " + "'INSERT', 'PREPARE', 'RESET', 'REVOKE', 'ROLLBACK', 'SET', 'SHOW', 'START', 'USE', "}, {"@select", - "line 1:1: mismatched input '@'. Expecting: 'ALTER', 'CALL', 'COMMIT', 'CREATE', 'DEALLOCATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXECUTE', 'EXPLAIN', 'GRANT', " + + "line 1:1: mismatched input '@'. Expecting: 'ALTER', 'ANALYZE', 'CALL', 'COMMIT', 'CREATE', 'DEALLOCATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXECUTE', 'EXPLAIN', 'GRANT', " + "'INSERT', 'PREPARE', 'RESET', 'REVOKE', 'ROLLBACK', 'SET', 'SHOW', 'START', 'USE', "}, {"select * from foo where @what", "line 1:25: mismatched input '@'. Expecting: "}, diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestExternalHiveTable.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestExternalHiveTable.java index 174a6fdefdf7..cf23b62b2a6e 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestExternalHiveTable.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestExternalHiveTable.java @@ -21,10 +21,12 @@ import org.testng.annotations.Test; import static io.prestodb.tempto.Requirements.compose; +import static io.prestodb.tempto.assertions.QueryAssert.Row.row; import static io.prestodb.tempto.assertions.QueryAssert.assertThat; import static io.prestodb.tempto.fulfillment.table.MutableTablesState.mutableTablesState; import static io.prestodb.tempto.fulfillment.table.TableRequirements.mutableTable; import static io.prestodb.tempto.fulfillment.table.hive.tpch.TpchTableDefinitions.NATION; +import static io.prestodb.tempto.query.QueryExecutor.query; import static io.prestosql.tests.hive.HiveTableDefinitions.NATION_PARTITIONED_BY_BIGINT_REGIONKEY; import static io.prestosql.tests.hive.HiveTableDefinitions.NATION_PARTITIONED_BY_REGIONKEY_NUMBER_OF_LINES_PER_SPLIT; import static io.prestosql.tests.utils.QueryExecutors.onHive; @@ -43,6 +45,43 @@ public Requirement getRequirements(Configuration configuration) mutableTable(NATION_PARTITIONED_BY_BIGINT_REGIONKEY)); } + @Test + public void testShowStatisticsForExternalTable() + { + TableInstance nation = mutableTablesState().get(NATION_PARTITIONED_BY_BIGINT_REGIONKEY.getName()); + onHive().executeQuery("DROP TABLE IF EXISTS " + EXTERNAL_TABLE_NAME); + onHive().executeQuery("CREATE EXTERNAL TABLE " + EXTERNAL_TABLE_NAME + " LIKE " + nation.getNameInDatabase() + " LOCATION '/tmp/" + EXTERNAL_TABLE_NAME + "_" + nation.getNameInDatabase() + "'"); + insertNationPartition(nation, 1); + + onHive().executeQuery("ANALYZE TABLE " + EXTERNAL_TABLE_NAME + " PARTITION (p_regionkey) COMPUTE STATISTICS"); + assertThat(query("SHOW STATS FOR " + EXTERNAL_TABLE_NAME)).containsOnly( + row("p_nationkey", null, null, null, null, null, null), + row("p_name", null, null, null, null, null, null), + row("p_comment", null, null, null, null, null, null), + row("p_regionkey", null, 1.0, 0.0, null, "1", "1"), + row(null, null, null, null, 5.0, null, null)); + + onHive().executeQuery("ANALYZE TABLE " + EXTERNAL_TABLE_NAME + " PARTITION (p_regionkey) COMPUTE STATISTICS FOR COLUMNS"); + assertThat(query("SHOW STATS FOR " + EXTERNAL_TABLE_NAME)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 38.0, 5.0, 0.0, null, null, null), + row("p_comment", 499.0, 5.0, 0.0, null, null, null), + row("p_regionkey", null, 1.0, 0.0, null, "1", "1"), + row(null, null, null, null, 5.0, null, null)); + } + + @Test + public void testAnalyzeExternalTable() + { + TableInstance nation = mutableTablesState().get(NATION_PARTITIONED_BY_BIGINT_REGIONKEY.getName()); + onHive().executeQuery("DROP TABLE IF EXISTS " + EXTERNAL_TABLE_NAME); + onHive().executeQuery("CREATE EXTERNAL TABLE " + EXTERNAL_TABLE_NAME + " LIKE " + nation.getNameInDatabase() + " LOCATION '/tmp/" + EXTERNAL_TABLE_NAME + "_" + nation.getNameInDatabase() + "'"); + insertNationPartition(nation, 1); + + // Running ANALYZE on an external table is allowed as long as the user has the privileges. + assertThat(query("ANALYZE hive.default." + EXTERNAL_TABLE_NAME)).containsExactly(row(5)); + } + @Test public void testInsertIntoExternalTable() { diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java index 43c0eac4648f..e812a56ac247 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java @@ -144,6 +144,80 @@ public void testCreatePartitioned() } } + @Test(groups = {HIVE_TABLE_STATISTICS}) + public void testAnalyzePartitioned() + { + String tableName = "test_basic_statistics_analyze_partitioned"; + + onPresto().executeQuery("DROP TABLE IF EXISTS " + tableName); + onPresto().executeQuery(format("" + + "CREATE TABLE %s " + + "WITH ( " + + " partitioned_by = ARRAY['n_regionkey'], " + + " bucketed_by = ARRAY['n_nationkey'], " + + " bucket_count = 10 " + + ") " + + "AS " + + "SELECT n_nationkey, n_name, n_comment, n_regionkey " + + "FROM nation " + + "WHERE n_regionkey = 1", tableName)); + + try { + BasicStatistics tableStatistics = getBasicStatisticsForTable(onHive(), tableName); + assertThatStatisticsAreNotPresent(tableStatistics); + + BasicStatistics partitionStatisticsBefore = getBasicStatisticsForPartition(onHive(), tableName, "n_regionkey=1"); + assertThatStatisticsArePresent(partitionStatisticsBefore); + + // run ANALYZE + onPresto().executeQuery(format("ANALYZE %s", tableName)); + BasicStatistics partitionStatisticsAfter = getBasicStatisticsForPartition(onHive(), tableName, "n_regionkey=1"); + assertThatStatisticsArePresent(partitionStatisticsAfter); + + // ANALYZE must not change the basic stats + assertThat(partitionStatisticsBefore.getNumRows().getAsLong()).isEqualTo(partitionStatisticsAfter.getNumRows().getAsLong()); + assertThat(partitionStatisticsBefore.getNumFiles().getAsLong()).isEqualTo(partitionStatisticsAfter.getNumFiles().getAsLong()); + assertThat(partitionStatisticsBefore.getRawDataSize().getAsLong()).isEqualTo(partitionStatisticsAfter.getRawDataSize().getAsLong()); + assertThat(partitionStatisticsBefore.getTotalSize().getAsLong()).isEqualTo(partitionStatisticsAfter.getTotalSize().getAsLong()); + } + finally { + onPresto().executeQuery(format("DROP TABLE %s", tableName)); + } + } + + @Test(groups = {HIVE_TABLE_STATISTICS}) + public void testAnalyzeUnpartitioned() + { + String tableName = "test_basic_statistics_analyze_unpartitioned"; + + onPresto().executeQuery("DROP TABLE IF EXISTS " + tableName); + onPresto().executeQuery(format("" + + "CREATE TABLE %s " + + "AS " + + "SELECT n_nationkey, n_name, n_comment, n_regionkey " + + "FROM nation " + + "WHERE n_regionkey = 1", tableName)); + + try { + BasicStatistics tableStatisticsBefore = getBasicStatisticsForTable(onHive(), tableName); + assertThatStatisticsArePresent(tableStatisticsBefore); + + // run ANALYZE + onPresto().executeQuery(format("ANALYZE %s", tableName)); + BasicStatistics tableStatisticsAfter = getBasicStatisticsForTable(onHive(), tableName); + assertThatStatisticsArePresent(tableStatisticsAfter); + + // ANALYZE must not change the basic stats + assertThat(tableStatisticsBefore.getNumRows()).isEqualTo(tableStatisticsAfter.getNumRows()); + assertThat(tableStatisticsBefore.getNumFiles()).isEqualTo(tableStatisticsAfter.getNumFiles()); + assertThat(tableStatisticsBefore.getRawDataSize()).isEqualTo(tableStatisticsAfter.getRawDataSize()); + assertThat(tableStatisticsBefore.getTotalSize()).isEqualTo(tableStatisticsAfter.getTotalSize()); + } + finally { + onPresto().executeQuery(format("DROP TABLE %s", tableName)); + } + } + @Test(groups = {HIVE_TABLE_STATISTICS}) public void testInsertPartitioned() { diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveTableStatistics.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveTableStatistics.java index 38f2838a6dd5..6c164bc9aa38 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveTableStatistics.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveTableStatistics.java @@ -606,6 +606,375 @@ public void testStatisticsForAllDataTypesOnlyNulls() row(null, null, null, null, 1.0, null, null)); } + @Test + @Requires(UnpartitionedNationTable.class) + public void testStatisticsForSkewedTable() + { + String tableName = "test_hive_skewed_table_statistics"; + onHive().executeQuery("DROP TABLE IF EXISTS " + tableName); + onHive().executeQuery("CREATE TABLE " + tableName + " (c_string STRING, c_int INT) SKEWED BY (c_string) ON ('c1')"); + onHive().executeQuery("INSERT INTO TABLE " + tableName + " VALUES ('c1', 1), ('c1', 2)"); + + assertThat(query("SHOW STATS FOR " + tableName)).containsOnly( + row("c_string", null, null, null, null, null, null), + row("c_int", null, null, null, null, null, null), + row(null, null, null, null, 2.0, null, null)); + + onHive().executeQuery("ANALYZE TABLE " + tableName + " COMPUTE STATISTICS"); + + assertThat(query("SHOW STATS FOR " + tableName)).containsOnly( + row("c_string", null, null, null, null, null, null), + row("c_int", null, null, null, null, null, null), + row(null, null, null, null, 2.0, null, null)); + + onHive().executeQuery("ANALYZE TABLE " + tableName + " COMPUTE STATISTICS FOR COLUMNS"); + assertThat(query("SHOW STATS FOR " + tableName)).containsOnly( + row("c_string", 4.0, 1.0, 0.0, null, null, null), + row("c_int", null, 2.0, 0.0, null, "1", "2"), + row(null, null, null, null, 2.0, null, null)); + } + + @Test + @Requires(UnpartitionedNationTable.class) + public void testAnalyzesForSkewedTable() + { + String tableName = "test_analyze_skewed_table"; + onHive().executeQuery("DROP TABLE IF EXISTS " + tableName); + onHive().executeQuery("CREATE TABLE " + tableName + " (c_string STRING, c_int INT) SKEWED BY (c_string) ON ('c1')"); + onHive().executeQuery("INSERT INTO TABLE " + tableName + " VALUES ('c1', 1), ('c1', 2)"); + + assertThat(query("SHOW STATS FOR " + tableName)).containsOnly( + row("c_string", null, null, null, null, null, null), + row("c_int", null, null, null, null, null, null), + row(null, null, null, null, 2.0, null, null)); + + assertThat(query("ANALYZE " + tableName)).containsExactly(row(2)); + assertThat(query("SHOW STATS FOR " + tableName)).containsOnly( + row("c_string", 4.0, 1.0, 0.0, null, null, null), + row("c_int", null, 2.0, 0.0, null, "1", "2"), + row(null, null, null, null, 2.0, null, null)); + } + + @Test + @Requires(UnpartitionedNationTable.class) + public void testAnalyzeForUnpartitionedTable() + { + String tableNameInDatabase = mutableTablesState().get(NATION.getName()).getNameInDatabase(); + + String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase; + + // table not analyzed + assertThat(query(showStatsWholeTable)).containsOnly( + row("n_nationkey", null, null, anyOf(null, 0.0), null, null, null), + row("n_name", null, null, anyOf(null, 0.0), null, null, null), + row("n_regionkey", null, null, anyOf(null, 0.0), null, null, null), + row("n_comment", null, null, anyOf(null, 0.0), null, null, null), + row(null, null, null, null, anyOf(null, 0.0), null, null)); // anyOf because of different behaviour on HDP (hive 1.2) and CDH (hive 1.1) + + assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(25)); + + assertThat(query(showStatsWholeTable)).containsOnly( + row("n_nationkey", null, 25.0, 0.0, null, "0", "24"), + row("n_name", 177.0, 25.0, 0.0, null, null, null), + row("n_regionkey", null, 5.0, 0.0, null, "0", "4"), + row("n_comment", 1857.0, 25.0, 0.0, null, null, null), + row(null, null, null, null, 25.0, null, null)); + } + + @Test + @Requires(NationPartitionedByBigintTable.class) + public void testAnalyzeForTablePartitionedByBigint() + { + String tableNameInDatabase = mutableTablesState().get(NATION_PARTITIONED_BY_BIGINT_REGIONKEY.getName()).getNameInDatabase(); + + String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase; + String showStatsPartitionOne = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 1)"; + String showStatsPartitionTwo = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 2)"; + + // table not analyzed + + assertThat(query(showStatsWholeTable)).containsOnly( + row("p_nationkey", null, null, null, null, null, null), + row("p_name", null, null, null, null, null, null), + row("p_regionkey", null, null, null, null, null, null), + row("p_comment", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + + assertThat(query(showStatsPartitionOne)).containsOnly( + row("p_nationkey", null, null, null, null, null, null), + row("p_name", null, null, null, null, null, null), + row("p_regionkey", null, null, null, null, null, null), + row("p_comment", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + + // analyze for single partition + + assertThat(query("ANALYZE " + tableNameInDatabase + " WITH (partitions = ARRAY[ARRAY['1']])")).containsExactly(row(5)); + + assertThat(query(showStatsWholeTable)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 114.0, 5.0, 0.0, null, null, null), + row("p_regionkey", null, 3.0, 0.0, null, "1", "3"), + row("p_comment", 1497.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 15.0, null, null)); + + assertThat(query(showStatsPartitionOne)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 38.0, 5.0, 0.0, null, null, null), + row("p_regionkey", null, 1.0, 0.0, null, "1", "1"), + row("p_comment", 499.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 5.0, null, null)); + + assertThat(query(showStatsPartitionTwo)).containsOnly( + row("p_nationkey", null, null, null, null, null, null), + row("p_name", null, null, null, null, null, null), + row("p_regionkey", null, null, null, null, null, null), + row("p_comment", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + + // analyze for all partitions + + assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(15)); + + assertThat(query(showStatsWholeTable)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 109.0, 5.0, 0.0, null, null, null), + row("p_regionkey", null, 3.0, 0.0, null, "1", "3"), + row("p_comment", 1197.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 15.0, null, null)); + + assertThat(query(showStatsPartitionOne)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 38.0, 5.0, 0.0, null, null, null), + row("p_regionkey", null, 1.0, 0.0, null, "1", "1"), + row("p_comment", 499.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 5.0, null, null)); + + assertThat(query(showStatsPartitionTwo)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "8", "21"), + row("p_name", 31.0, 5.0, 0.0, null, null, null), + row("p_regionkey", null, 1.0, 0.0, null, "2", "2"), + row("p_comment", 351.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 5.0, null, null)); + } + + @Test + @Requires(NationPartitionedByVarcharTable.class) + public void testAnalyzeForTablePartitionedByVarchar() + { + String tableNameInDatabase = mutableTablesState().get(NATION_PARTITIONED_BY_VARCHAR_REGIONKEY.getName()).getNameInDatabase(); + + String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase; + String showStatsPartitionOne = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 'AMERICA')"; + String showStatsPartitionTwo = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 'ASIA')"; + + // table not analyzed + + assertThat(query(showStatsWholeTable)).containsOnly( + row("p_nationkey", null, null, null, null, null, null), + row("p_name", null, null, null, null, null, null), + row("p_regionkey", null, null, null, null, null, null), + row("p_comment", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + + assertThat(query(showStatsPartitionOne)).containsOnly( + row("p_nationkey", null, null, null, null, null, null), + row("p_name", null, null, null, null, null, null), + row("p_regionkey", null, null, null, null, null, null), + row("p_comment", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + + // analyze for single partition + + assertThat(query("ANALYZE " + tableNameInDatabase + " WITH (partitions = ARRAY[ARRAY['AMERICA']])")).containsExactly(row(5)); + + assertThat(query(showStatsWholeTable)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 114.0, 5.0, 0.0, null, null, null), + row("p_regionkey", 85.0, 3.0, 0.0, null, null, null), + row("p_comment", 1497.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 15.0, null, null)); + + assertThat(query(showStatsPartitionOne)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 38.0, 5.0, 0.0, null, null, null), + row("p_regionkey", 35.0, 1.0, 0.0, null, null, null), + row("p_comment", 499.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 5.0, null, null)); + + assertThat(query(showStatsPartitionTwo)).containsOnly( + row("p_nationkey", null, null, null, null, null, null), + row("p_name", null, null, null, null, null, null), + row("p_regionkey", null, null, null, null, null, null), + row("p_comment", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + + // column analysis for all partitions + + assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(15)); + + assertThat(query(showStatsWholeTable)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 109.0, 5.0, 0.0, null, null, null), + row("p_regionkey", 85.0, 3.0, 0.0, null, null, null), + row("p_comment", 1197.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 15.0, null, null)); + + assertThat(query(showStatsPartitionOne)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "1", "24"), + row("p_name", 38.0, 5.0, 0.0, null, null, null), + row("p_regionkey", 35.0, 1.0, 0.0, null, null, null), + row("p_comment", 499.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 5.0, null, null)); + + assertThat(query(showStatsPartitionTwo)).containsOnly( + row("p_nationkey", null, 5.0, 0.0, null, "8", "21"), + row("p_name", 31.0, 5.0, 0.0, null, null, null), + row("p_regionkey", 20.0, 1.0, 0.0, null, null, null), + row("p_comment", 351.0, 5.0, 0.0, null, null, null), + row(null, null, null, null, 5.0, null, null)); + } + + // This covers also stats calculation for unpartitioned table + @Test(groups = {SKIP_ON_CDH}) // skip on cdh due to no support for date column and stats + @Requires(AllTypesTable.class) + public void testAnalyzeForAllDataTypes() + { + String tableNameInDatabase = mutableTablesState().get(ALL_TYPES_TABLE_NAME).getNameInDatabase(); + + assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly( + row("c_tinyint", null, null, null, null, null, null), + row("c_smallint", null, null, null, null, null, null), + row("c_int", null, null, null, null, null, null), + row("c_bigint", null, null, null, null, null, null), + row("c_float", null, null, null, null, null, null), + row("c_double", null, null, null, null, null, null), + row("c_decimal", null, null, null, null, null, null), + row("c_decimal_w_params", null, null, null, null, null, null), + row("c_timestamp", null, null, null, null, null, null), + row("c_date", null, null, null, null, null, null), + row("c_string", null, null, null, null, null, null), + row("c_varchar", null, null, null, null, null, null), + row("c_char", null, null, null, null, null, null), + row("c_boolean", null, null, null, null, null, null), + row("c_binary", null, null, null, null, null, null), + row(null, null, null, null, 0.0, null, null)); + + assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(2)); + + // SHOW STATS FORMAT: column_name, data_size, distinct_values_count, nulls_fraction, row_count + assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly( + row("c_tinyint", null, 2.0, 0.0, null, "121", "127"), + row("c_smallint", null, 2.0, 0.0, null, "32761", "32767"), + row("c_int", null, 2.0, 0.0, null, "2147483641", "2147483647"), + row("c_bigint", null, 2.0, 0.0, null, "9223372036854775807", "9223372036854775807"), + row("c_float", null, 2.0, 0.0, null, "123.341", "123.345"), + row("c_double", null, 2.0, 0.0, null, "234.561", "235.567"), + row("c_decimal", null, 2.0, 0.0, null, "345.0", "346.0"), + row("c_decimal_w_params", null, 2.0, 0.0, null, "345.671", "345.678"), + row("c_timestamp", null, 2.0, 0.0, null, null, null), + row("c_date", null, 2.0, 0.0, null, "2015-05-09", "2015-06-10"), + row("c_string", 22.0, 2.0, 0.0, null, null, null), + row("c_varchar", 20.0, 2.0, 0.0, null, null, null), + row("c_char", 12.0, 2.0, 0.0, null, null, null), + row("c_boolean", null, 2.0, 0.0, null, null, null), + row("c_binary", 23.0, null, 0.0, null, null, null), + row(null, null, null, null, 2.0, null, null)); + } + + @Test(groups = {SKIP_ON_CDH}) // skip on cdh due to no support for date column and stats + @Requires(AllTypesTable.class) + public void testAnalyzeForAllDataTypesNoData() + { + String tableNameInDatabase = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase(); + + assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly( + row("c_tinyint", null, null, null, null, null, null), + row("c_smallint", null, null, null, null, null, null), + row("c_int", null, null, null, null, null, null), + row("c_bigint", null, null, null, null, null, null), + row("c_float", null, null, null, null, null, null), + row("c_double", null, null, null, null, null, null), + row("c_decimal", null, null, null, null, null, null), + row("c_decimal_w_params", null, null, null, null, null, null), + row("c_timestamp", null, null, null, null, null, null), + row("c_date", null, null, null, null, null, null), + row("c_string", null, null, null, null, null, null), + row("c_varchar", null, null, null, null, null, null), + row("c_char", null, null, null, null, null, null), + row("c_boolean", null, null, null, null, null, null), + row("c_binary", null, null, null, null, null, null), + row(null, null, null, null, 0.0, null, null)); + + assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(0)); + + assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly( + row("c_tinyint", null, 0.0, 0.0, null, null, null), + row("c_smallint", null, 0.0, 0.0, null, null, null), + row("c_int", null, 0.0, 0.0, null, null, null), + row("c_bigint", null, 0.0, 0.0, null, null, null), + row("c_float", null, 0.0, 0.0, null, null, null), + row("c_double", null, 0.0, 0.0, null, null, null), + row("c_decimal", null, 0.0, 0.0, null, null, null), + row("c_decimal_w_params", null, 0.0, 0.0, null, null, null), + row("c_timestamp", null, 0.0, 0.0, null, null, null), + row("c_date", null, 0.0, 0.0, null, null, null), + row("c_string", 0.0, 0.0, 0.0, null, null, null), + row("c_varchar", 0.0, 0.0, 0.0, null, null, null), + row("c_char", 0.0, 0.0, 0.0, null, null, null), + row("c_boolean", null, 0.0, 0.0, null, null, null), + row("c_binary", 0.0, null, 0.0, null, null, null), + row(null, null, null, null, 0.0, null, null)); + } + + @Test(groups = {SKIP_ON_CDH}) // skip on cdh due to no support for date column and stats + @Requires(AllTypesTable.class) + public void testAnalyzeForAllDataTypesOnlyNulls() + { + String tableNameInDatabase = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase(); + + // insert from hive to prevent Presto collecting statistics on insert + onHive().executeQuery("INSERT INTO TABLE " + tableNameInDatabase + " VALUES(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null)"); + + assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly( + row("c_tinyint", null, null, null, null, null, null), + row("c_smallint", null, null, null, null, null, null), + row("c_int", null, null, null, null, null, null), + row("c_bigint", null, null, null, null, null, null), + row("c_float", null, null, null, null, null, null), + row("c_double", null, null, null, null, null, null), + row("c_decimal", null, null, null, null, null, null), + row("c_decimal_w_params", null, null, null, null, null, null), + row("c_timestamp", null, null, null, null, null, null), + row("c_date", null, null, null, null, null, null), + row("c_string", null, null, null, null, null, null), + row("c_varchar", null, null, null, null, null, null), + row("c_char", null, null, null, null, null, null), + row("c_boolean", null, null, null, null, null, null), + row("c_binary", null, null, null, null, null, null), + row(null, null, null, null, 1.0, null, null)); + + assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(1)); + + assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly( + row("c_tinyint", null, 0.0, 1.0, null, null, null), + row("c_smallint", null, 0.0, 1.0, null, null, null), + row("c_int", null, 0.0, 1.0, null, null, null), + row("c_bigint", null, 0.0, 1.0, null, null, null), + row("c_float", null, 0.0, 1.0, null, null, null), + row("c_double", null, 0.0, 1.0, null, null, null), + row("c_decimal", null, 0.0, 1.0, null, null, null), + row("c_decimal_w_params", null, 0.0, 1.0, null, null, null), + row("c_timestamp", null, 0.0, 1.0, null, null, null), + row("c_date", null, 0.0, 1.0, null, null, null), + row("c_string", 0.0, 0.0, 1.0, null, null, null), + row("c_varchar", 0.0, 0.0, 1.0, null, null, null), + row("c_char", 0.0, 0.0, 1.0, null, null, null), + row("c_boolean", null, 0.0, 1.0, null, null, null), + row("c_binary", 0.0, null, 1.0, null, null, null), + row(null, null, null, null, 1.0, null, null)); + } + @Test @Requires(AllTypesTable.class) public void testComputeTableStatisticsOnCreateTable() diff --git a/presto-spi/src/main/java/io/prestosql/spi/StandardErrorCode.java b/presto-spi/src/main/java/io/prestosql/spi/StandardErrorCode.java index 5332cd6bbc2b..3b733b6bf892 100644 --- a/presto-spi/src/main/java/io/prestosql/spi/StandardErrorCode.java +++ b/presto-spi/src/main/java/io/prestosql/spi/StandardErrorCode.java @@ -62,6 +62,7 @@ public enum StandardErrorCode INVALID_COLUMN_PROPERTY(0x0000_0027, USER_ERROR), QUERY_HAS_TOO_MANY_STAGES(0x0000_0028, USER_ERROR), INVALID_SPATIAL_PARTITIONING(0x0000_0029, USER_ERROR), + INVALID_ANALYZE_PROPERTY(0x0000_002A, USER_ERROR), GENERIC_INTERNAL_ERROR(0x0001_0000, INTERNAL_ERROR), TOO_MANY_REQUESTS_FAILED(0x0001_0001, INTERNAL_ERROR), diff --git a/presto-spi/src/main/java/io/prestosql/spi/connector/Connector.java b/presto-spi/src/main/java/io/prestosql/spi/connector/Connector.java index 5ff343180ac6..b34680521894 100644 --- a/presto-spi/src/main/java/io/prestosql/spi/connector/Connector.java +++ b/presto-spi/src/main/java/io/prestosql/spi/connector/Connector.java @@ -107,6 +107,14 @@ default List> getSchemaProperties() return emptyList(); } + /** + * @return the analyze properties for this connector + */ + default List> getAnalyzeProperties() + { + return emptyList(); + } + /** * @return the table properties for this connector */ diff --git a/presto-spi/src/main/java/io/prestosql/spi/connector/ConnectorAnalyzeMetadata.java b/presto-spi/src/main/java/io/prestosql/spi/connector/ConnectorAnalyzeMetadata.java new file mode 100644 index 000000000000..92c63ec2cf32 --- /dev/null +++ b/presto-spi/src/main/java/io/prestosql/spi/connector/ConnectorAnalyzeMetadata.java @@ -0,0 +1,40 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.spi.connector; + +import io.prestosql.spi.statistics.TableStatisticsMetadata; + +import static java.util.Objects.requireNonNull; + +public class ConnectorAnalyzeMetadata +{ + private final TableStatisticsMetadata statisticsMetadata; + private final ConnectorTableHandle tableHandle; + + public ConnectorAnalyzeMetadata(TableStatisticsMetadata statisticsMetadata, ConnectorTableHandle tableHandle) + { + this.statisticsMetadata = requireNonNull(statisticsMetadata, "statisticsMetadata is null"); + this.tableHandle = requireNonNull(tableHandle, "tableHandle is null"); + } + + public TableStatisticsMetadata getStatisticsMetadata() + { + return statisticsMetadata; + } + + public ConnectorTableHandle getTableHandle() + { + return tableHandle; + } +} diff --git a/presto-spi/src/main/java/io/prestosql/spi/connector/ConnectorMetadata.java b/presto-spi/src/main/java/io/prestosql/spi/connector/ConnectorMetadata.java index d196a5b514a3..49847968b98e 100644 --- a/presto-spi/src/main/java/io/prestosql/spi/connector/ConnectorMetadata.java +++ b/presto-spi/src/main/java/io/prestosql/spi/connector/ConnectorMetadata.java @@ -59,6 +59,15 @@ default boolean schemaExists(ConnectorSession session, String schemaName) */ ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName); + /** + * Returns a table handle for the specified table name, or null if the connector does not contain the table. + * The returned table handle can contain information in analyzeProperties. + */ + default ConnectorTableHandle getTableHandleForStatisticsCollection(ConnectorSession session, SchemaTableName tableName, Map analyzeProperties) + { + throw new PrestoException(NOT_SUPPORTED, "This connector does not support analyze"); + } + /** * Returns the system table for the specified table name, if one exists. * The system tables handled via {@link #getSystemTable} differ form those returned by {@link Connector#getSystemTables()}. @@ -288,11 +297,35 @@ default Optional getInsertLayout(ConnectorSession sessi /** * Describes statistics that must be collected during a write. */ - default TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) + default TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(ConnectorSession session, ConnectorTableMetadata tableMetadata) { return TableStatisticsMetadata.empty(); } + /** + * Describe statistics that must be collected during a statistics collection + */ + default TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + throw new PrestoException(GENERIC_INTERNAL_ERROR, "ConnectorMetadata getTableHandleForStatisticsCollection() is implemented without getStatisticsCollectionMetadata()"); + } + + /** + * Begin statistics collection + */ + default ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle) + { + throw new PrestoException(GENERIC_INTERNAL_ERROR, "ConnectorMetadata getStatisticsCollectionMetadata() is implemented without beginStatisticsCollection()"); + } + + /** + * Finish statistics collection + */ + default void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection computedStatistics) + { + throw new PrestoException(GENERIC_INTERNAL_ERROR, "ConnectorMetadata beginStatisticsCollection() is implemented without finishStatisticsCollection()"); + } + /** * Begin the atomic creation of a table with data. */ diff --git a/presto-spi/src/main/java/io/prestosql/spi/connector/classloader/ClassLoaderSafeConnectorMetadata.java b/presto-spi/src/main/java/io/prestosql/spi/connector/classloader/ClassLoaderSafeConnectorMetadata.java index 878246dccc69..4b7ee5bb2bc6 100644 --- a/presto-spi/src/main/java/io/prestosql/spi/connector/classloader/ClassLoaderSafeConnectorMetadata.java +++ b/presto-spi/src/main/java/io/prestosql/spi/connector/classloader/ClassLoaderSafeConnectorMetadata.java @@ -117,6 +117,14 @@ public Optional getInsertLayout(ConnectorSession sessio } } + @Override + public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + return delegate.getStatisticsCollectionMetadataForWrite(session, tableMetadata); + } + } + @Override public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) { @@ -125,6 +133,22 @@ public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession } } + @Override + public ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + return delegate.beginStatisticsCollection(session, tableHandle); + } + } + + @Override + public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection computedStatistics) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + delegate.finishStatisticsCollection(session, tableHandle, computedStatistics); + } + } + @Override public boolean schemaExists(ConnectorSession session, String schemaName) { @@ -149,6 +173,14 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable } } + @Override + public ConnectorTableHandle getTableHandleForStatisticsCollection(ConnectorSession session, SchemaTableName tableName, Map analyzeProperties) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + return delegate.getTableHandleForStatisticsCollection(session, tableName, analyzeProperties); + } + } + @Override public Optional getSystemTable(ConnectorSession session, SchemaTableName tableName) { diff --git a/presto-spi/src/main/java/io/prestosql/spi/resourcegroups/QueryType.java b/presto-spi/src/main/java/io/prestosql/spi/resourcegroups/QueryType.java index 7824b8e27744..1aac0ff59410 100644 --- a/presto-spi/src/main/java/io/prestosql/spi/resourcegroups/QueryType.java +++ b/presto-spi/src/main/java/io/prestosql/spi/resourcegroups/QueryType.java @@ -19,6 +19,7 @@ public enum QueryType DELETE, DESCRIBE, EXPLAIN, + ANALYZE, INSERT, SELECT } diff --git a/presto-tests/src/test/java/io/prestosql/tests/TestLocalQueryRunner.java b/presto-tests/src/test/java/io/prestosql/tests/TestLocalQueryRunner.java index f52b1802027a..a970447472ae 100644 --- a/presto-tests/src/test/java/io/prestosql/tests/TestLocalQueryRunner.java +++ b/presto-tests/src/test/java/io/prestosql/tests/TestLocalQueryRunner.java @@ -15,6 +15,10 @@ import org.testng.annotations.Test; +import static io.prestosql.testing.TestingAccessControlManager.TestingPrivilegeType.INSERT_TABLE; +import static io.prestosql.testing.TestingAccessControlManager.TestingPrivilegeType.SELECT_COLUMN; +import static io.prestosql.testing.TestingAccessControlManager.privilege; + public class TestLocalQueryRunner extends AbstractTestQueryFramework { @@ -28,4 +32,13 @@ public void testSimpleQuery() { assertQuery("SELECT * FROM nation"); } + + @Test + public void testAnalyzeAccessControl() + { + assertAccessAllowed("ANALYZE nation"); + assertAccessDenied("ANALYZE nation", "Cannot ANALYZE \\(missing insert privilege\\) table .*.nation.*", privilege("nation", INSERT_TABLE)); + assertAccessDenied("ANALYZE nation", "Cannot select from columns \\[.*] in table or view .*.nation", privilege("nation", SELECT_COLUMN)); + assertAccessDenied("ANALYZE nation", "Cannot select from columns \\[.*nationkey.*] in table or view .*.nation", privilege("nationkey", SELECT_COLUMN)); + } } diff --git a/presto-tests/src/test/java/io/prestosql/tests/TestTpchDistributedQueries.java b/presto-tests/src/test/java/io/prestosql/tests/TestTpchDistributedQueries.java index 3bc0ae989c03..740ad4296246 100644 --- a/presto-tests/src/test/java/io/prestosql/tests/TestTpchDistributedQueries.java +++ b/presto-tests/src/test/java/io/prestosql/tests/TestTpchDistributedQueries.java @@ -36,6 +36,13 @@ public void testTooLongQuery() assertQueryFails(longQuery, "Query text length \\(1000037\\) exceeds the maximum length \\(1000000\\)"); } + @Test + public void testAnalyze() + { + assertUpdate("ANALYZE orders", 15000); + assertQueryFails("ANALYZE orders WITH (foo = 'bar')", ".* does not support analyze property 'foo'.*"); + } + @Test public void testTooManyStages() { diff --git a/presto-tpch/src/main/java/io/prestosql/plugin/tpch/TpchMetadata.java b/presto-tpch/src/main/java/io/prestosql/plugin/tpch/TpchMetadata.java index aee32d9dfcd7..bd9285742aea 100644 --- a/presto-tpch/src/main/java/io/prestosql/plugin/tpch/TpchMetadata.java +++ b/presto-tpch/src/main/java/io/prestosql/plugin/tpch/TpchMetadata.java @@ -54,13 +54,16 @@ import io.prestosql.spi.predicate.NullableValue; import io.prestosql.spi.predicate.TupleDomain; import io.prestosql.spi.statistics.ColumnStatistics; +import io.prestosql.spi.statistics.ComputedStatistics; import io.prestosql.spi.statistics.DoubleRange; import io.prestosql.spi.statistics.Estimate; import io.prestosql.spi.statistics.TableStatistics; +import io.prestosql.spi.statistics.TableStatisticsMetadata; import io.prestosql.spi.type.Type; import io.prestosql.spi.type.VarcharType; import java.time.LocalDate; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Optional; @@ -72,6 +75,7 @@ import static io.airlift.tpch.OrderColumn.ORDER_STATUS; import static io.prestosql.plugin.tpch.util.PredicateUtils.convertToPredicate; import static io.prestosql.plugin.tpch.util.PredicateUtils.filterOutColumnFromPredicate; +import static io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT; import static io.prestosql.spi.type.BigintType.BIGINT; import static io.prestosql.spi.type.DateType.DATE; import static io.prestosql.spi.type.DoubleType.DOUBLE; @@ -172,6 +176,12 @@ public TpchTableHandle getTableHandle(ConnectorSession session, SchemaTableName return new TpchTableHandle(tableName.getTableName(), scaleFactor); } + @Override + public ConnectorTableHandle getTableHandleForStatisticsCollection(ConnectorSession session, SchemaTableName tableName, Map analyzeProperties) + { + return getTableHandle(session, tableName); + } + @Override public List getTableLayouts( ConnectorSession session, @@ -412,6 +422,24 @@ private static double toDouble(Object value, Type columnType) throw new IllegalArgumentException("unsupported column type " + columnType); } + @Override + public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + return new TableStatisticsMetadata(ImmutableSet.of(), ImmutableSet.of(ROW_COUNT), ImmutableList.of()); + } + + @Override + public ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle) + { + return (TpchTableHandle) tableHandle; + } + + @Override + public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection computedStatistics) + { + // do nothing + } + @VisibleForTesting TpchColumnHandle toColumnHandle(TpchColumn column) {