Skip to content

Commit

Permalink
Analyze Iceberg tables
Browse files Browse the repository at this point in the history
Support `ANALYZE` in Iceberg connector. This collects number distinct
values (NDV) of selected columns and stores that in table properties.
This is interim solution until Iceberg library has first-class
statistics files support.
  • Loading branch information
findepi committed Sep 6, 2022
1 parent 85ec919 commit 0901525
Show file tree
Hide file tree
Showing 21 changed files with 1,084 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import static io.trino.spi.security.AccessDeniedException.denyDropView;
import static io.trino.spi.security.AccessDeniedException.denyExecuteFunction;
import static io.trino.spi.security.AccessDeniedException.denyExecuteQuery;
import static io.trino.spi.security.AccessDeniedException.denyExecuteTableProcedure;
import static io.trino.spi.security.AccessDeniedException.denyGrantExecuteFunctionPrivilege;
import static io.trino.spi.security.AccessDeniedException.denyImpersonateUser;
import static io.trino.spi.security.AccessDeniedException.denyInsertTable;
Expand Down Expand Up @@ -101,6 +102,7 @@
import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.DROP_VIEW;
import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.EXECUTE_FUNCTION;
import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.EXECUTE_QUERY;
import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.EXECUTE_TABLE_PROCEDURE;
import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.GRANT_EXECUTE_FUNCTION;
import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.IMPERSONATE_USER;
import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.INSERT_TABLE;
Expand Down Expand Up @@ -670,6 +672,17 @@ public void checkCanExecuteFunction(SecurityContext context, String functionName
}
}

@Override
public void checkCanExecuteTableProcedure(SecurityContext context, QualifiedObjectName table, String procedure)
{
if (shouldDenyPrivilege(context.getIdentity().getUser(), table + "." + procedure, EXECUTE_TABLE_PROCEDURE)) {
denyExecuteTableProcedure(table.toString(), procedure);
}
if (denyPrivileges.isEmpty()) {
super.checkCanExecuteTableProcedure(context, table, procedure);
}
}

@Override
public List<ViewExpression> getRowFilters(SecurityContext context, QualifiedObjectName tableName)
{
Expand Down Expand Up @@ -709,7 +722,7 @@ public enum TestingPrivilegeType
{
SET_USER, IMPERSONATE_USER,
EXECUTE_QUERY, VIEW_QUERY, KILL_QUERY,
EXECUTE_FUNCTION,
EXECUTE_FUNCTION, EXECUTE_TABLE_PROCEDURE,
CREATE_SCHEMA, DROP_SCHEMA, RENAME_SCHEMA,
SHOW_CREATE_TABLE, CREATE_TABLE, DROP_TABLE, RENAME_TABLE, COMMENT_TABLE, COMMENT_VIEW, COMMENT_COLUMN, INSERT_TABLE, DELETE_TABLE, MERGE_TABLE, UPDATE_TABLE, TRUNCATE_TABLE, SET_TABLE_PROPERTIES, SHOW_COLUMNS,
ADD_COLUMN, DROP_COLUMN, RENAME_COLUMN, SELECT_COLUMN,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.iceberg;

import com.google.common.collect.ImmutableList;
import io.trino.spi.TrinoException;
import io.trino.spi.session.PropertyMetadata;
import io.trino.spi.type.ArrayType;

import javax.inject.Inject;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static java.lang.String.format;

public class IcebergAnalyzeProperties
{
public static final String COLUMNS_PROPERTY = "columns";

private final List<PropertyMetadata<?>> analyzeProperties;

@Inject
public IcebergAnalyzeProperties()
{
analyzeProperties = ImmutableList.<PropertyMetadata<?>>builder()
.add(new PropertyMetadata<>(
COLUMNS_PROPERTY,
"Columns to be analyzed",
new ArrayType(VARCHAR),
Set.class,
null,
false,
IcebergAnalyzeProperties::decodeColumnNames,
value -> value))
.build();
}

public List<PropertyMetadata<?>> getAnalyzeProperties()
{
return analyzeProperties;
}

public static Optional<Set<String>> getColumnNames(Map<String, Object> properties)
{
@SuppressWarnings("unchecked")
Set<String> columns = (Set<String>) properties.get(COLUMNS_PROPERTY);
return Optional.ofNullable(columns);
}

private static Set<String> decodeColumnNames(Object object)
{
if (object == null) {
return null;
}

Collection<?> columns = ((Collection<?>) object);
return columns.stream()
.peek(property -> throwIfNull(property, "columns"))
.map(String.class::cast)
.collect(toImmutableSet());
}

private static void throwIfNull(Object object, String propertyName)
{
if (object == null) {
throw new TrinoException(INVALID_ANALYZE_PROPERTY, format("Invalid null value in analyze %s property", propertyName));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ public class IcebergConfig
{
public static final int FORMAT_VERSION_SUPPORT_MIN = 1;
public static final int FORMAT_VERSION_SUPPORT_MAX = 2;
public static final String EXTENDED_STATISTICS_CONFIG = "iceberg.experimental.extended-statistics.enabled";
public static final String EXTENDED_STATISTICS_DESCRIPTION = "Allow ANALYZE and use of extended statistics collected by it. Currently, the statistics are collected in Trino-specific format";
public static final String EXPIRE_SNAPSHOTS_MIN_RETENTION = "iceberg.expire_snapshots.min-retention";
public static final String REMOVE_ORPHAN_FILES_MIN_RETENTION = "iceberg.remove_orphan_files.min-retention";

Expand All @@ -50,6 +52,7 @@ public class IcebergConfig
private CatalogType catalogType = HIVE_METASTORE;
private Duration dynamicFilteringWaitTimeout = new Duration(0, SECONDS);
private boolean tableStatisticsEnabled = true;
private boolean extendedStatisticsEnabled;
private boolean projectionPushdownEnabled = true;
private Optional<String> hiveCatalogName = Optional.empty();
private int formatVersion = FORMAT_VERSION_SUPPORT_MAX;
Expand Down Expand Up @@ -180,6 +183,19 @@ public IcebergConfig setTableStatisticsEnabled(boolean tableStatisticsEnabled)
return this;
}

public boolean isExtendedStatisticsEnabled()
{
return extendedStatisticsEnabled;
}

@Config(EXTENDED_STATISTICS_CONFIG)
@ConfigDescription(EXTENDED_STATISTICS_DESCRIPTION)
public IcebergConfig setExtendedStatisticsEnabled(boolean extendedStatisticsEnabled)
{
this.extendedStatisticsEnabled = extendedStatisticsEnabled;
return this;
}

public boolean isProjectionPushdownEnabled()
{
return projectionPushdownEnabled;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public class IcebergConnector
private final List<PropertyMetadata<?>> schemaProperties;
private final List<PropertyMetadata<?>> tableProperties;
private final List<PropertyMetadata<?>> materializedViewProperties;
private final List<PropertyMetadata<?>> analyzeProperties;
private final Optional<ConnectorAccessControl> accessControl;
private final Set<Procedure> procedures;
private final Set<TableProcedureMetadata> tableProcedures;
Expand All @@ -73,6 +74,7 @@ public IcebergConnector(
List<PropertyMetadata<?>> schemaProperties,
List<PropertyMetadata<?>> tableProperties,
List<PropertyMetadata<?>> materializedViewProperties,
List<PropertyMetadata<?>> analyzeProperties,
Optional<ConnectorAccessControl> accessControl,
Set<Procedure> procedures,
Set<TableProcedureMetadata> tableProcedures)
Expand All @@ -89,6 +91,7 @@ public IcebergConnector(
this.schemaProperties = ImmutableList.copyOf(requireNonNull(schemaProperties, "schemaProperties is null"));
this.tableProperties = ImmutableList.copyOf(requireNonNull(tableProperties, "tableProperties is null"));
this.materializedViewProperties = ImmutableList.copyOf(requireNonNull(materializedViewProperties, "materializedViewProperties is null"));
this.analyzeProperties = ImmutableList.copyOf(requireNonNull(analyzeProperties, "analyzeProperties is null"));
this.accessControl = requireNonNull(accessControl, "accessControl is null");
this.procedures = ImmutableSet.copyOf(requireNonNull(procedures, "procedures is null"));
this.tableProcedures = ImmutableSet.copyOf(requireNonNull(tableProcedures, "tableProcedures is null"));
Expand Down Expand Up @@ -167,6 +170,12 @@ public List<PropertyMetadata<?>> getMaterializedViewProperties()
return materializedViewProperties;
}

@Override
public List<PropertyMetadata<?>> getAnalyzeProperties()
{
return analyzeProperties;
}

@Override
public ConnectorAccessControl getAccessControl()
{
Expand Down
Loading

0 comments on commit 0901525

Please sign in to comment.