Skip to content

Commit

Permalink
Remove incorrect useless code from Iceberg TableStatisticsMaker
Browse files Browse the repository at this point in the history
The code used to extract partition values was useless, because
`Constraint#predicate` is never present there.

The code was also incorrect, because it would assign transformation
value to transformation's source column. For example, it would
instantiate a map like

    a_string_column: 2 (bucket number)
  • Loading branch information
findepi committed Jul 25, 2021
1 parent 28a1cfe commit cd9ca95
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.Constraint;
import io.trino.spi.predicate.Domain;
import io.trino.spi.predicate.NullableValue;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.statistics.ColumnStatistics;
import io.trino.spi.statistics.DoubleRange;
Expand All @@ -45,6 +44,7 @@
import java.util.function.Predicate;
import java.util.stream.Collectors;

import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression;
import static io.trino.plugin.iceberg.IcebergUtil.getColumns;
Expand Down Expand Up @@ -198,14 +198,14 @@ private boolean dataFileMatches(
List<PartitionField> partitionFields,
Map<Integer, ColumnFieldDetails> fieldDetails)
{
// Currently this method is used only for IcebergMetadata.getTableStatistics and there Constraint never carries a predicate.
// TODO support pruning with constraint when this changes.
verify(constraint.predicate().isEmpty(), "Unexpected Constraint predicate");

TupleDomain<ColumnHandle> constraintSummary = constraint.getSummary();

Map<ColumnHandle, Domain> domains = constraintSummary.getDomains().get();

Predicate<Map<ColumnHandle, NullableValue>> predicate = constraint.predicate().orElse(value -> true);

ImmutableMap.Builder<ColumnHandle, NullableValue> nullableValueBuilder = ImmutableMap.builder();

for (int index = 0; index < partitionFields.size(); index++) {
PartitionField field = partitionFields.get(index);
int fieldId = field.sourceId();
Expand All @@ -216,21 +216,11 @@ private boolean dataFileMatches(
if (allowedDomain != null && !allowedDomain.includesNullableValue(value)) {
return false;
}
nullableValueBuilder.put(column, makeNullableValue(details.getTrinoType(), value));
}

if (constraint.getPredicateColumns().isPresent()) {
return predicate.test(nullableValueBuilder.build());
}

return true;
}

private NullableValue makeNullableValue(io.trino.spi.type.Type type, Object value)
{
return value == null ? NullableValue.asNull(type) : NullableValue.of(type, value);
}

public List<Type> partitionTypes(List<PartitionField> partitionFields, Map<Integer, Type.PrimitiveType> idToTypeMapping)
{
ImmutableList.Builder<Type> partitionTypeBuilder = ImmutableList.builder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.iceberg;

import com.google.common.base.VerifyException;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import io.trino.Session;
Expand All @@ -28,7 +29,6 @@
import io.trino.spi.predicate.NullableValue;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.statistics.ColumnStatistics;
import io.trino.spi.statistics.DoubleRange;
import io.trino.spi.statistics.TableStatistics;
import io.trino.testing.BaseConnectorTest;
import io.trino.testing.MaterializedResult;
Expand Down Expand Up @@ -1517,24 +1517,28 @@ public void testStatisticsConstraints()
String insertStart = "INSERT INTO iceberg.tpch.test_simple_partitioned_table_statistics";
assertUpdate(insertStart + " VALUES (1, 101), (2, 102), (3, 103), (4, 104)", 4);
TableStatistics tableStatistics = getTableStatistics(tableName, new Constraint(TupleDomain.all()));

// TODO Change to use SHOW STATS FOR table_name when Iceberg applyFilter allows pushdown.
// Then I can get rid of the helper methods and direct use of TableStatistics

Predicate<Map<ColumnHandle, NullableValue>> predicate = new TestRelationalNumberPredicate("col1", 3, i -> i >= 0);
IcebergColumnHandle col1Handle = getColumnHandleFromStatistics(tableStatistics, "col1");
Constraint constraint = new Constraint(TupleDomain.all(), Optional.of(predicate), Optional.of(ImmutableSet.of(col1Handle)));
tableStatistics = getTableStatistics(tableName, constraint);
assertEquals(tableStatistics.getRowCount().getValue(), 2.0);
ColumnStatistics columnStatistics = getStatisticsForColumn(tableStatistics, "col1");
assertThat(columnStatistics.getRange()).hasValue(new DoubleRange(3, 4));

// This shows that Predicate<ColumnHandle, NullableValue> only filters rows for partitioned columns.
predicate = new TestRelationalNumberPredicate("col2", 102, i -> i >= 0);
tableStatistics = getTableStatistics(tableName, new Constraint(TupleDomain.all(), Optional.of(predicate), Optional.empty()));
assertEquals(tableStatistics.getRowCount().getValue(), 4.0);
columnStatistics = getStatisticsForColumn(tableStatistics, "col2");
assertThat(columnStatistics.getRange()).hasValue(new DoubleRange(101, 104));

// Constraint.predicate is currently not supported, because it's never provided by the engine.
// TODO add (restore) test coverage when this changes.

// predicate on a partition column
assertThatThrownBy(() ->
getTableStatistics(tableName, new Constraint(
TupleDomain.all(),
Optional.of(new TestRelationalNumberPredicate("col1", 3, i1 -> i1 >= 0)),
Optional.of(ImmutableSet.of(col1Handle)))))
.isInstanceOf(VerifyException.class)
.hasMessage("Unexpected Constraint predicate");

// predicate on an unspecified set of columns column
assertThatThrownBy(() ->
getTableStatistics(tableName, new Constraint(
TupleDomain.all(),
Optional.of(new TestRelationalNumberPredicate("col2", 102, i -> i >= 0)),
Optional.empty())))
.isInstanceOf(VerifyException.class)
.hasMessage("Unexpected Constraint predicate");

dropTable(tableName);
}
Expand Down

0 comments on commit cd9ca95

Please sign in to comment.