Skip to content

Commit

Permalink
Remove apparent correctness issue with calculatePrefixesWithTableName
Browse files Browse the repository at this point in the history
There was a tight coupling between the method implementation and its
caller.
  • Loading branch information
findepi committed Apr 21, 2023
1 parent f70c702 commit 677b52d
Showing 1 changed file with 19 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -217,18 +217,10 @@ private Set<QualifiedTablePrefix> getPrefixes(ConnectorSession session, Informat
}

InformationSchemaTable informationSchemaTable = table.getTable();
Set<QualifiedTablePrefix> prefixes = calculatePrefixesWithSchemaName(session, constraint.getSummary(), constraint.predicate());
Set<QualifiedTablePrefix> tablePrefixes = calculatePrefixesWithTableName(informationSchemaTable, session, prefixes, constraint.getSummary(), constraint.predicate());

if (tablePrefixes.size() <= MAX_PREFIXES_COUNT) {
prefixes = tablePrefixes;
}
if (prefixes.size() > MAX_PREFIXES_COUNT) {
// in case of high number of prefixes it is better to populate all data and then filter
prefixes = defaultPrefixes(catalogName);
}

return prefixes;
Set<QualifiedTablePrefix> schemaPrefixes = calculatePrefixesWithSchemaName(session, constraint.getSummary(), constraint.predicate());
Set<QualifiedTablePrefix> tablePrefixes = calculatePrefixesWithTableName(informationSchemaTable, session, schemaPrefixes, constraint.getSummary(), constraint.predicate());
verify(tablePrefixes.size() <= MAX_PREFIXES_COUNT, "calculatePrefixesWithTableName returned too many prefixes: %s", tablePrefixes.size());
return tablePrefixes;
}

public static boolean isTablesEnumeratingTable(InformationSchemaTable table)
Expand Down Expand Up @@ -271,7 +263,7 @@ private Set<QualifiedTablePrefix> calculatePrefixesWithTableName(

Optional<Set<String>> tables = filterString(constraint, TABLE_NAME_COLUMN_HANDLE);
if (tables.isPresent()) {
return prefixes.stream()
Set<QualifiedTablePrefix> tablePrefixes = prefixes.stream()
.peek(prefix -> verify(prefix.asQualifiedObjectName().isEmpty()))
.flatMap(prefix -> prefix.getSchemaName()
.map(schemaName -> Stream.of(prefix))
Expand Down Expand Up @@ -304,31 +296,37 @@ private Set<QualifiedTablePrefix> calculatePrefixesWithTableName(
})
.filter(objectName -> predicate.isEmpty() || predicate.get().test(asFixedValues(objectName)))
.map(QualifiedObjectName::asQualifiedTablePrefix)
// In method #getPrefixes, if the prefix set returned by this method has size larger than MAX_PREFIXES_COUNT,
// we will overwrite it with #defaultPrefixes. Limiting the stream at MAX_PREFIXES_COUNT + 1 elements helps
// skip unnecessary computation because we know the resulting set will be discarded when more than MAX_PREFIXES_COUNT
// elements are present. Since there may be duplicate prefixes, a distinct operator is applied to the stream,
// otherwise the stream may be truncated incorrectly.
.distinct()
.limit(MAX_PREFIXES_COUNT + 1)
.collect(toImmutableSet());

if (tablePrefixes.size() > MAX_PREFIXES_COUNT) {
// in case of high number of prefixes it is better to populate all data and then filter
// TODO this may cause re-running the above filtering upon next applyFilter
return defaultPrefixes(catalogName);
}
return tablePrefixes;
}

if (predicate.isEmpty() || !isColumnsEnumeratingTable(informationSchemaTable)) {
return prefixes;
}

return prefixes.stream()
Set<QualifiedTablePrefix> tablePrefixes = prefixes.stream()
.flatMap(prefix -> Stream.concat(
metadata.listTables(session, prefix).stream(),
metadata.listViews(session, prefix).stream()))
.filter(objectName -> predicate.get().test(asFixedValues(objectName)))
.map(QualifiedObjectName::asQualifiedTablePrefix)
// Same as the prefixes computed above; we use the distinct operator and limit the stream to MAX_PREFIXES_COUNT + 1
// elements to skip unnecessary computation.
.distinct()
.limit(MAX_PREFIXES_COUNT + 1)
.collect(toImmutableSet());
if (tablePrefixes.size() > MAX_PREFIXES_COUNT) {
// in case of high number of prefixes it is better to populate all data and then filter
// TODO this may cause re-running the above filtering upon next applyFilter
return defaultPrefixes(catalogName);
}
return tablePrefixes;
}

private boolean isColumnsEnumeratingTable(InformationSchemaTable table)
Expand Down

0 comments on commit 677b52d

Please sign in to comment.