Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix HiveMetadata.streamTableColumns not to include views #18343

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/trino-server/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
<plugin>
<groupId>io.takari.maven.plugins</groupId>
<artifactId>takari-lifecycle-plugin</artifactId>
<version>${dep.takari.version}</version>
<configuration>
<proc>none</proc>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ default ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTabl
}

/**
* Gets the metadata for all columns that match the specified table prefix.
* Gets the metadata for all columns that match the specified table prefix. Columns of views and materialized views are not included.
findepi marked this conversation as resolved.
Show resolved Hide resolved
*
* @deprecated use {@link #streamTableColumns} which handles redirected tables
*/
Expand All @@ -298,7 +298,7 @@ default Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(ConnectorSes

/**
* Gets the metadata for all columns that match the specified table prefix. Redirected table names are included, but
* the column metadata for them is not.
* the column metadata for them is not. Views and materialized views are not included.
*/
default Iterator<TableColumnsMetadata> streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@

import com.google.common.collect.ImmutableList;
import com.google.common.net.HostAndPort;
import io.trino.spi.connector.ConnectorMetadata;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.connector.SchemaTablePrefix;
import org.apache.hadoop.net.NetUtils;
import org.testng.SkipException;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Parameters;
import org.testng.annotations.Test;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

// staging directory is shared mutable state
Expand Down Expand Up @@ -66,6 +69,16 @@ public void testHideDeltaLakeTables()
throw new SkipException("not supported");
}

@Test
public void testHiveViewsHaveNoColumns()
{
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
assertThat(listTableColumns(metadata, newSession(), new SchemaTablePrefix(view.getSchemaName(), view.getTableName())))
.isEmpty();
}
}

@Test
public void testHiveViewTranslationError()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import java.util.OptionalLong;

import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;

@Immutable
Expand Down Expand Up @@ -84,12 +83,6 @@ public OptionalLong getOnDiskDataSizeInBytes()
return onDiskDataSizeInBytes;
}

public HiveBasicStatistics withAdjustedRowCount(long adjustment)
{
checkArgument(rowCount.isPresent(), "rowCount isn't present");
return new HiveBasicStatistics(fileCount, OptionalLong.of(rowCount.getAsLong() + adjustment), inMemoryDataSizeInBytes, onDiskDataSizeInBytes);
}

@Override
public boolean equals(Object o)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Injector;
import io.airlift.bootstrap.LifeCycleManager;
import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorMetadata;
import io.trino.plugin.base.session.SessionPropertiesProvider;
Expand Down Expand Up @@ -46,6 +47,7 @@
public class HiveConnector
implements Connector
{
private final Injector injector;
private final LifeCycleManager lifeCycleManager;
private final ConnectorSplitManager splitManager;
private final ConnectorPageSourceProvider pageSourceProvider;
Expand All @@ -68,6 +70,7 @@ public class HiveConnector
private final boolean singleStatementWritesOnly;

public HiveConnector(
Injector injector,
LifeCycleManager lifeCycleManager,
HiveTransactionManager transactionManager,
ConnectorSplitManager splitManager,
Expand All @@ -87,6 +90,7 @@ public HiveConnector(
boolean singleStatementWritesOnly,
ClassLoader classLoader)
{
this.injector = requireNonNull(injector, "injector is null");
this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null");
this.transactionManager = requireNonNull(transactionManager, "transactionManager is null");
this.splitManager = requireNonNull(splitManager, "splitManager is null");
Expand Down Expand Up @@ -233,4 +237,9 @@ public Set<TableProcedureMetadata> getTableProcedures()
{
return tableProcedures;
}

public Injector getInjector()
{
return injector;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@
import static io.trino.plugin.hive.ViewReaderUtil.encodeViewData;
import static io.trino.plugin.hive.ViewReaderUtil.isHiveOrPrestoView;
import static io.trino.plugin.hive.ViewReaderUtil.isPrestoView;
import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView;
import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION;
import static io.trino.plugin.hive.acid.AcidTransaction.forCreateTable;
import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet;
Expand Down Expand Up @@ -614,7 +615,24 @@ private ConnectorTableMetadata doGetTableMetadata(ConnectorSession session, Sche
throw new TrinoException(UNSUPPORTED_TABLE_TYPE, format("Not a Hive table '%s'", tableName));
}

if (!translateHiveViews && isHiveOrPrestoView(table)) {
boolean isTrinoView = isPrestoView(table);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice rename the method too

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed, but obviously out of scope for this PR, right?

boolean isHiveView = !isTrinoView && isHiveOrPrestoView(table);
boolean isTrinoMaterializedView = isTrinoMaterializedView(table);
if (isHiveView && translateHiveViews) {
// Produce metadata for a (translated) Hive view as if it was a table. This is incorrect from ConnectorMetadata.streamTableColumns
// perspective, but is done on purpose to keep information_schema.columns working.
// Because of fallback in ThriftHiveMetastoreClient.getAllViews, this method may return Trino/Presto views only,
// so HiveMetadata.getViews may fail to return Hive views.
}
else if (isHiveView) {
// When Hive view translation is not enabled, a Hive view is currently treated inconsistently
// - getView treats this as an unusable view (fails instead of returning Optional.empty)
// - getTableHandle treats this as a table (returns non-null)
// In any case, returning metadata is not useful.
throw new TableNotFoundException(tableName);
}
else if (isTrinoView || isTrinoMaterializedView) {
// streamTableColumns should not include views and materialized views
throw new TableNotFoundException(tableName);
}

Expand Down Expand Up @@ -837,6 +855,7 @@ private Stream<TableColumnsMetadata> streamTableColumns(ConnectorSession session
return Stream.empty();
}
catch (TableNotFoundException e) {
// it is not a table (e.g. it's a view) (TODO remove exception-driven logic for this case) OR
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re TODO -- FWIW this is how Iceberg does this

if (viewCache.containsKey(table) || materializedViewCache.containsKey(table)) {
throw new TableNotFoundException(table);

if (isPrestoView(parameters) && isHiveOrPrestoView(getTableType(table))) {
// this is a Presto Hive view, hence not a table
throw new TableNotFoundException(getSchemaTableName());

// table disappeared during listing operation
return Stream.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ public static Connector createConnector(
.map(accessControl -> new ClassLoaderSafeConnectorAccessControl(accessControl, classLoader));

return new HiveConnector(
injector,
lifeCycleManager,
transactionManager,
new ClassLoaderSafeConnectorSplitManager(splitManager, classLoader),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,6 @@ public Map<String, HiveColumnStatistics> getColumnStatistics()
return columnStatistics;
}

public PartitionStatistics withAdjustedRowCount(long adjustment)
{
return new PartitionStatistics(basicStatistics.withAdjustedRowCount(adjustment), columnStatistics);
}

@Override
public boolean equals(Object o)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import java.util.function.Consumer;

import static com.google.common.base.MoreObjects.toStringHelper;
import static io.trino.plugin.hive.metastore.MetastoreUtil.adjustRowCount;
import static java.util.Objects.requireNonNull;

@Immutable
Expand Down Expand Up @@ -57,12 +56,6 @@ public Partition(
this.parameters = ImmutableMap.copyOf(requireNonNull(parameters, "parameters is null"));
}

@JsonIgnore
public Partition withAdjustedRowCount(String partitionName, long rowCountDelta)
{
return new Partition(databaseName, tableName, values, storage, columns, adjustRowCount(parameters, partitionName, rowCountDelta));
}

@JsonProperty
public String getDatabaseName()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2560,15 +2560,6 @@ private void assertEmptyFile(HiveStorageFormat format)
}
}

@Test
public void testHiveViewsHaveNoColumns()
{
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
assertEquals(listTableColumns(metadata, newSession(), new SchemaTablePrefix(view.getSchemaName(), view.getTableName())), ImmutableMap.of());
}
}

@Test
public void testRenameTable()
{
Expand Down Expand Up @@ -3516,7 +3507,7 @@ public void testIllegalStorageFormatDuringTableScan()
}
}

private static Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(ConnectorMetadata metadata, ConnectorSession session, SchemaTablePrefix prefix)
protected static Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(ConnectorMetadata metadata, ConnectorSession session, SchemaTablePrefix prefix)
{
return stream(metadata.streamTableColumns(session, prefix))
.collect(toImmutableMap(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@
import io.trino.metadata.TableHandle;
import io.trino.metadata.TableMetadata;
import io.trino.plugin.hive.metastore.Column;
import io.trino.plugin.hive.metastore.HiveMetastore;
import io.trino.plugin.hive.metastore.HiveMetastoreFactory;
import io.trino.plugin.hive.metastore.PrincipalPrivileges;
import io.trino.plugin.hive.metastore.Storage;
import io.trino.plugin.hive.metastore.StorageFormat;
import io.trino.plugin.hive.metastore.Table;
import io.trino.plugin.hive.metastore.file.FileHiveMetastore;
import io.trino.spi.connector.CatalogSchemaTableName;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ColumnMetadata;
Expand Down Expand Up @@ -143,7 +144,6 @@
import static io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY;
import static io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY;
import static io.trino.plugin.hive.HiveType.toHiveType;
import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore;
import static io.trino.plugin.hive.util.HiveUtil.columnExtraInfo;
import static io.trino.spi.security.Identity.ofUser;
import static io.trino.spi.security.SelectedRole.Type.ROLE;
Expand Down Expand Up @@ -8370,6 +8370,8 @@ public void testSelectFromPrestoViewReferencingHiveTableWithTimestamps()
@Test
public void testTimestampWithTimeZone()
{
String catalog = getSession().getCatalog().orElseThrow();

assertUpdate("CREATE TABLE test_timestamptz_base (t timestamp) WITH (format = 'PARQUET')");
assertUpdate("INSERT INTO test_timestamptz_base (t) VALUES" +
"(timestamp '2022-07-26 12:13')", 1);
Expand All @@ -8379,7 +8381,9 @@ public void testTimestampWithTimeZone()
String tableLocation = getTableLocation("test_timestamptz_base");

// TIMESTAMP WITH LOCAL TIME ZONE is not mapped to any Trino type, so we need to create the metastore entry manually
FileHiveMetastore metastore = createTestingFileHiveMetastore(new File(getDistributedQueryRunner().getCoordinator().getBaseDataDir().toFile(), "hive_data"));
HiveMetastore metastore = ((HiveConnector) getDistributedQueryRunner().getCoordinator().getConnector(catalog))
.getInjector().getInstance(HiveMetastoreFactory.class)
.createMetastore(Optional.of(getSession().getIdentity().toConnectorIdentity(catalog)));
metastore.createTable(
new Table(
"tpch",
Expand Down
3 changes: 2 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@
<dep.arrow.version>12.0.1</dep.arrow.version>
<dep.avro.version>1.11.1</dep.avro.version>
<dep.packaging.version>${dep.airlift.version}</dep.packaging.version>
<dep.takari.version>2.1.1</dep.takari.version>
<dep.aws-sdk.version>1.12.505</dep.aws-sdk.version>
<dep.aws-sdk-v2.version>2.20.93</dep.aws-sdk-v2.version>
<dep.jsonwebtoken.version>0.11.5</dep.jsonwebtoken.version>
Expand Down Expand Up @@ -2211,7 +2212,7 @@
<DynamicDependency>
<groupId>io.takari.maven.plugins</groupId>
<artifactId>takari-lifecycle-plugin</artifactId>
<version>2.1.1</version>
<version>${dep.takari.version}</version>
<repositoryType>PLUGIN</repositoryType>
</DynamicDependency>
<DynamicDependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ private void testFailingHiveViewsWithInformationSchema()
.hasMessageContaining("Failed to translate Hive view 'test_list_failing_views.failing_view'");

// Queries on information_schema.columns also trigger ConnectorMetadata#getViews. Columns from failing_view are
// listed too since HiveMetadata#listTableColumns does not ignore views.
// listed too since HiveMetadata#listTableColumns does not ignore Hive views.
assertThat(onTrino().executeQuery("SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = 'test_list_failing_views'"))
.containsOnly(
row("correct_view", "n_nationkey"),
Expand Down Expand Up @@ -141,7 +141,7 @@ private void testFailingHiveViewsWithSystemJdbc()
.hasMessageContaining("Failed to translate Hive view 'test_list_failing_views.failing_view'");

// Queries on system.jdbc.columns also trigger ConnectorMetadata#getViews. Columns from failing_view are
// listed too since HiveMetadata#listTableColumns does not ignore views.
// listed too since HiveMetadata#listTableColumns does not ignore Hive views.
assertThat(onTrino().executeQuery("SELECT table_name, column_name FROM system.jdbc.columns WHERE table_cat = 'hive' AND table_schem = 'test_list_failing_views'"))
.containsOnly(
row("correct_view", "n_nationkey"),
Expand Down