Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter information_schema within engine #16080

Merged
merged 1 commit into from
Mar 8, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Filter information_schema within engine
ebyhr committed Mar 8, 2023
commit ff8876b6c0fe4831cec61ea9d6ecebca81cc7ca2
Original file line number Diff line number Diff line change
@@ -1339,7 +1339,7 @@ public void testGetColumnsMetadataCalls()
.collect(toImmutableList()),
new MetadataCallsCount()
.withListSchemasCount(5)
.withListTablesCount(5)
.withListTablesCount(4)
.withGetTableHandleCount(8)
.withGetColumnsCount(2));

Original file line number Diff line number Diff line change
@@ -250,6 +250,7 @@ public List<String> listSchemaNames(Session session, String catalogName)
ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle);
metadata.listSchemaNames(connectorSession).stream()
.map(schema -> schema.toLowerCase(Locale.ENGLISH))
.filter(schema -> !isExternalInformationSchema(catalogHandle, schema))
.forEach(schemaNames::add);
}
}
@@ -508,8 +509,12 @@ public List<QualifiedObjectName> listTables(Session session, QualifiedTablePrefi
for (CatalogHandle catalogHandle : catalogMetadata.listCatalogHandles()) {
ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle);
ConnectorSession connectorSession = session.toConnectorSession(catalogHandle);
if (isExternalInformationSchema(catalogHandle, prefix.getSchemaName())) {
continue;
}
metadata.listTables(connectorSession, prefix.getSchemaName()).stream()
.map(convertFromSchemaTableName(prefix.getCatalogName()))
.filter(table -> !isExternalInformationSchema(catalogHandle, table.getSchemaName()))
.filter(prefix::matches)
.forEach(tables::add);
}
@@ -554,13 +559,21 @@ public List<TableColumnsMetadata> listTableColumns(Session session, QualifiedTab

SchemaTablePrefix tablePrefix = prefix.asSchemaTablePrefix();
for (CatalogHandle catalogHandle : catalogMetadata.listCatalogHandles()) {
if (isExternalInformationSchema(catalogHandle, prefix.getSchemaName())) {
continue;
}

ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle);

ConnectorSession connectorSession = session.toConnectorSession(catalogHandle);

// Collect column metadata from tables
metadata.streamTableColumns(connectorSession, tablePrefix)
.forEachRemaining(columnsMetadata -> tableColumns.put(columnsMetadata.getTable(), columnsMetadata.getColumns()));
.forEachRemaining(columnsMetadata -> {
if (!isExternalInformationSchema(catalogHandle, columnsMetadata.getTable().getSchemaName())) {
tableColumns.put(columnsMetadata.getTable(), columnsMetadata.getColumns());
}
});

// Collect column metadata from views. if table and view names overlap, the view wins
for (Entry<QualifiedObjectName, ViewInfo> entry : getViews(session, prefix).entrySet()) {
@@ -1076,10 +1089,14 @@ public List<QualifiedObjectName> listViews(Session session, QualifiedTablePrefix
CatalogMetadata catalogMetadata = catalog.get();

for (CatalogHandle catalogHandle : catalogMetadata.listCatalogHandles()) {
if (isExternalInformationSchema(catalogHandle, prefix.getSchemaName())) {
continue;
}
ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle);
ConnectorSession connectorSession = session.toConnectorSession(catalogHandle);
metadata.listViews(connectorSession, prefix.getSchemaName()).stream()
.map(convertFromSchemaTableName(prefix.getCatalogName()))
.filter(view -> !isExternalInformationSchema(catalogHandle, view.getSchemaName()))
.filter(prefix::matches)
.forEach(views::add);
}
@@ -1100,6 +1117,10 @@ public Map<QualifiedObjectName, ViewInfo> getViews(Session session, QualifiedTab

SchemaTablePrefix tablePrefix = prefix.asSchemaTablePrefix();
for (CatalogHandle catalogHandle : catalogMetadata.listCatalogHandles()) {
if (isExternalInformationSchema(catalogHandle, tablePrefix.getSchema())) {
continue;
}

ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle);
ConnectorSession connectorSession = session.toConnectorSession(catalogHandle);

@@ -1114,6 +1135,9 @@ public Map<QualifiedObjectName, ViewInfo> getViews(Session session, QualifiedTab
}

for (Entry<SchemaTableName, ConnectorViewDefinition> entry : viewMap.entrySet()) {
if (isExternalInformationSchema(catalogHandle, entry.getKey().getSchemaName())) {
continue;
}
QualifiedObjectName viewName = new QualifiedObjectName(
prefix.getCatalogName(),
entry.getKey().getSchemaName(),
@@ -1299,10 +1323,14 @@ public List<QualifiedObjectName> listMaterializedViews(Session session, Qualifie
CatalogMetadata catalogMetadata = catalog.get();

for (CatalogHandle catalogHandle : catalogMetadata.listCatalogHandles()) {
if (isExternalInformationSchema(catalogHandle, prefix.getSchemaName())) {
continue;
}
ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle);
ConnectorSession connectorSession = session.toConnectorSession(catalogHandle);
metadata.listMaterializedViews(connectorSession, prefix.getSchemaName()).stream()
.map(convertFromSchemaTableName(prefix.getCatalogName()))
.filter(materializedView -> !isExternalInformationSchema(catalogHandle, materializedView.getSchemaName()))
.filter(prefix::matches)
.forEach(materializedViews::add);
}
@@ -1323,6 +1351,9 @@ public Map<QualifiedObjectName, ViewInfo> getMaterializedViews(Session session,

SchemaTablePrefix tablePrefix = prefix.asSchemaTablePrefix();
for (CatalogHandle catalogHandle : catalogMetadata.listCatalogHandles()) {
if (isExternalInformationSchema(catalogHandle, tablePrefix.getSchema())) {
continue;
}
ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle);
ConnectorSession connectorSession = session.toConnectorSession(catalogHandle);

@@ -1337,6 +1368,9 @@ public Map<QualifiedObjectName, ViewInfo> getMaterializedViews(Session session,
}

for (Entry<SchemaTableName, ConnectorMaterializedViewDefinition> entry : materializedViewMap.entrySet()) {
if (isExternalInformationSchema(catalogHandle, entry.getKey().getSchemaName())) {
continue;
}
QualifiedObjectName viewName = new QualifiedObjectName(
prefix.getCatalogName(),
entry.getKey().getSchemaName(),
@@ -1431,6 +1465,16 @@ public void setMaterializedViewProperties(Session session, QualifiedObjectName v
metadata.setMaterializedViewProperties(session.toConnectorSession(catalogHandle), viewName.asSchemaTableName(), properties);
}

private static boolean isExternalInformationSchema(CatalogHandle catalogHandle, Optional<String> schemaName)
{
return schemaName.isPresent() && isExternalInformationSchema(catalogHandle, schemaName.get());
}

private static boolean isExternalInformationSchema(CatalogHandle catalogHandle, String schemaName)
{
return !catalogHandle.getType().isInternal() && "information_schema".equalsIgnoreCase(schemaName);
}

@Override
public Optional<TableScanRedirectApplicationResult> applyTableScanRedirect(Session session, TableHandle tableHandle)
{
Original file line number Diff line number Diff line change
@@ -121,7 +121,6 @@ public class BigQueryMetadata

static final int DEFAULT_NUMERIC_TYPE_PRECISION = 38;
static final int DEFAULT_NUMERIC_TYPE_SCALE = 9;
static final String INFORMATION_SCHEMA = "information_schema";
private static final String VIEW_DEFINITION_SYSTEM_TABLE_SUFFIX = "$view_definition";

private final BigQueryClientFactory bigQueryClientFactory;
@@ -149,7 +148,6 @@ private List<String> listRemoteSchemaNames(ConnectorSession session)

Stream<String> remoteSchemaNames = Streams.stream(client.listDatasets(projectId))
.map(dataset -> dataset.getDatasetId().getDataset())
.filter(schemaName -> !schemaName.equalsIgnoreCase(INFORMATION_SCHEMA))
.distinct();

// filter out all the ambiguous schemas to prevent failures if anyone tries to access the listed schemas
@@ -182,19 +180,12 @@ public List<SchemaTableName> listTables(ConnectorSession session, Optional<Strin
BigQueryClient client = bigQueryClientFactory.create(session);

log.debug("listTables(session=%s, schemaName=%s)", session, schemaName);
if (schemaName.isPresent() && schemaName.get().equalsIgnoreCase(INFORMATION_SCHEMA)) {
return ImmutableList.of();
}

String projectId = client.getProjectId();

// filter ambiguous schemas
Optional<String> remoteSchema = schemaName.flatMap(schema -> client.toRemoteDataset(projectId, schema)
.filter(dataset -> !dataset.isAmbiguous())
.map(RemoteDatabaseObject::getOnlyRemoteName));
if (remoteSchema.isPresent() && remoteSchema.get().equalsIgnoreCase(INFORMATION_SCHEMA)) {
return ImmutableList.of();
}

Set<String> remoteSchemaNames = remoteSchema.map(ImmutableSet::of)
.orElseGet(() -> ImmutableSet.copyOf(listRemoteSchemaNames(session)));
Original file line number Diff line number Diff line change
@@ -248,10 +248,8 @@
import static io.trino.spi.type.VarcharType.VARCHAR;
import static java.lang.String.format;
import static java.time.Instant.EPOCH;
import static java.util.Collections.emptyIterator;
import static java.util.Collections.singletonList;
import static java.util.Collections.unmodifiableMap;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
import static java.util.UUID.randomUUID;
import static java.util.function.Function.identity;
@@ -360,10 +358,7 @@ public DeltaLakeMetadata(
public List<String> listSchemaNames(ConnectorSession session)
{
return metastore.getAllDatabases().stream()
.filter(schema -> {
String schemaName = schema.toLowerCase(ENGLISH);
return !(schemaName.equals("information_schema") || schemaName.equals("sys"));
})
.filter(schema -> !schema.equalsIgnoreCase("sys"))
.collect(toImmutableList());
}

@@ -485,10 +480,6 @@ public ConnectorTableMetadata getTableMetadata(ConnectorSession session, Connect
@Override
public List<SchemaTableName> listTables(ConnectorSession session, Optional<String> schemaName)
{
if (schemaName.isPresent() && schemaName.get().equals("information_schema")) {
// TODO https://github.com/trinodb/trino/issues/1559 information_schema should be handled by the engine fully
return ImmutableList.of();
}
return schemaName.map(Collections::singletonList)
.orElseGet(() -> listSchemaNames(session))
.stream()
@@ -556,11 +547,6 @@ public Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(ConnectorSess
@Override
public Iterator<TableColumnsMetadata> streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix)
{
if (prefix.getSchema().isPresent() && prefix.getSchema().get().equals("information_schema")) {
// TODO https://github.com/trinodb/trino/issues/1559 information_schema should be handled by the engine fully
return emptyIterator();
}

List<SchemaTableName> tables = prefix.getTable()
.map(ignored -> singletonList(prefix.toSchemaTableName()))
.orElseGet(() -> listTables(session, prefix.getSchema()));
Original file line number Diff line number Diff line change
@@ -1099,8 +1099,7 @@ public static String sortingColumnToString(SortingColumn column)
public static boolean isHiveSystemSchema(String schemaName)
{
if ("information_schema".equals(schemaName)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isHiveSystemSchema("information_schema") should still return true, because "information_schema" is indeed a Hive system schema.

we should reword the comment though

// For things like listing columns in information_schema.columns table, we need to explicitly filter out Hive's own information_schema.
// TODO https://github.com/trinodb/trino/issues/1559 this should be filtered out in engine.
// `information_schema` is filtered within engine. This condition exists for internal handling in Hive connector.
return true;
}
if ("sys".equals(schemaName)) {
Original file line number Diff line number Diff line change
@@ -199,10 +199,6 @@ public List<String> listNamespaces(ConnectorSession session)
private List<String> listNamespaces(ConnectorSession session, Optional<String> namespace)
{
if (namespace.isPresent()) {
if (isHiveSystemSchema(namespace.get())) {
// TODO https://github.com/trinodb/trino/issues/1559 information_schema should be handled by the engine fully
return ImmutableList.of();
}
return ImmutableList.of(namespace.get());
}
return listNamespaces(session);
Original file line number Diff line number Diff line change
@@ -605,7 +605,6 @@ private List<String> listNamespaces(ConnectorSession session, Optional<String> n
{
if (namespace.isPresent()) {
if (isHiveSystemSchema(namespace.get())) {
// TODO https://github.com/trinodb/trino/issues/1559 information_schema should be handled by the engine fully
return ImmutableList.of();
}
return ImmutableList.of(namespace.get());
Original file line number Diff line number Diff line change
@@ -163,10 +163,6 @@ public List<SchemaTableName> listTables(ConnectorSession session, Optional<Strin
private List<String> listNamespaces(ConnectorSession session, Optional<String> namespace)
{
if (namespace.isPresent() && namespaceExists(session, namespace.get())) {
if ("information_schema".equals(namespace.get())) {
// TODO https://github.com/trinodb/trino/issues/1559 this should be filtered out in engine.
return ImmutableList.of();
}
return ImmutableList.of(namespace.get());
}
return listNamespaces(session);
Original file line number Diff line number Diff line change
@@ -176,14 +176,9 @@ protected MockConnectorPlugin buildMockConnectorPlugin()
{
MockConnectorFactory connectorFactory = MockConnectorFactory.builder()
.withListSchemaNames(session -> ImmutableList.copyOf(mockTableListings.keySet()))
.withListTables((session, schemaName) -> {
if (schemaName.equals("information_schema")) {
// TODO (https://github.com/trinodb/trino/issues/1559) connector should not be asked about information_schema
return List.of();
}
return verifyNotNull(mockTableListings.get(schemaName), "No listing function registered for [%s]", schemaName)
.apply(session);
})
.withListTables((session, schemaName) ->
verifyNotNull(mockTableListings.get(schemaName), "No listing function registered for [%s]", schemaName)
.apply(session))
.build();
return new MockConnectorPlugin(connectorFactory);
}
Original file line number Diff line number Diff line change
@@ -153,7 +153,7 @@ public void testMetadataCalls()
"VALUES 2",
new MetadataCallsCount()
.withListSchemasCount(1)
.withListTablesCount(3));
.withListTablesCount(2));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice

assertMetadataCalls(
"SELECT count(*) from test_catalog.information_schema.tables WHERE table_name LIKE 'test_t_ble1' AND table_name IN ('test_table1', 'test_table2')",
"VALUES 2",
@@ -188,14 +188,13 @@ public void testMetadataCalls()
"VALUES 1",
new MetadataCallsCount()
.withListSchemasCount(1)
.withListTablesCount(1)
.withGetColumnsCount(0));
assertMetadataCalls(
"SELECT count(*) FROM (SELECT * from test_catalog.information_schema.columns LIMIT 1000)",
"VALUES 1000",
new MetadataCallsCount()
.withListSchemasCount(1)
.withListTablesCount(2)
.withListTablesCount(1)
.withGetColumnsCount(1000));

// Empty table schema and table name