Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial refactoring of pushdown subscripts rule #12534

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

Expand Down Expand Up @@ -63,7 +63,7 @@ public enum ColumnType
private final ColumnType columnType;
private final Optional<String> comment;
private final SubfieldPath subfieldPath;
private final ArrayList<SubfieldPath> referencedSubfields;
private final List<SubfieldPath> referencedSubfields;

@JsonCreator
public HiveColumnHandle(
Expand All @@ -74,7 +74,7 @@ public HiveColumnHandle(
@JsonProperty("columnType") ColumnType columnType,
@JsonProperty("comment") Optional<String> comment,
@JsonProperty("subfieldPath") SubfieldPath subfieldPath,
@JsonProperty("referencedSubfields") ArrayList<SubfieldPath> referencedSubfields)
@JsonProperty("referencedSubfields") List<SubfieldPath> referencedSubfields)
{
this.name = requireNonNull(name, "name is null");
checkArgument(hiveColumnIndex >= 0 || columnType == PARTITION_KEY || columnType == SYNTHESIZED, "hiveColumnIndex is negative");
Expand All @@ -88,12 +88,12 @@ public HiveColumnHandle(
}

public HiveColumnHandle(
String name,
HiveType hiveType,
TypeSignature typeSignature,
int hiveColumnIndex,
ColumnType columnType,
Optional<String> comment)
String name,
HiveType hiveType,
TypeSignature typeSignature,
int hiveColumnIndex,
ColumnType columnType,
Optional<String> comment)
{
this(name, hiveType, typeSignature, hiveColumnIndex, columnType, comment, null, null);
}
Expand Down Expand Up @@ -156,7 +156,7 @@ public SubfieldPath getSubfieldPath()
}

@JsonProperty
public ArrayList<SubfieldPath> getReferencedSubfields()
public List<SubfieldPath> getReferencedSubfields()
{
return referencedSubfields;
}
Expand Down Expand Up @@ -232,20 +232,14 @@ public boolean supportsSubfieldTupleDomain()
return true;
}

@Override
public boolean supportsSubfieldPruning()
{
return true;
}

@Override
public ColumnHandle createSubfieldColumnHandle(SubfieldPath path)
{
return new HiveColumnHandle(name, hiveType, typeName, hiveColumnIndex, columnType, comment, path, null);
}

@Override
public ColumnHandle createSubfieldPruningColumnHandle(ArrayList<SubfieldPath> referencedSubfields)
public ColumnHandle createSubfieldPruningColumnHandle(List<SubfieldPath> referencedSubfields)
{
return new HiveColumnHandle(name, hiveType, typeName, hiveColumnIndex, columnType, comment, null, referencedSubfields);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
import com.facebook.presto.metadata.Metadata;
import com.facebook.presto.metadata.OperatorNotFoundException;
import com.facebook.presto.spi.SubfieldPath;
import com.facebook.presto.spi.SubfieldPath.LongSubscript;
import com.facebook.presto.spi.SubfieldPath.NestedField;
import com.facebook.presto.spi.SubfieldPath.StringSubscript;
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.function.FunctionHandle;
import com.facebook.presto.spi.predicate.DiscreteValues;
Expand Down Expand Up @@ -72,6 +75,8 @@
import static com.facebook.presto.sql.ExpressionUtils.combineConjuncts;
import static com.facebook.presto.sql.ExpressionUtils.combineDisjunctsWithDefault;
import static com.facebook.presto.sql.ExpressionUtils.or;
import static com.facebook.presto.sql.planner.SubfieldUtils.deferenceOrSubscriptExpressionToPath;
import static com.facebook.presto.sql.planner.SubfieldUtils.isDereferenceOrSubscriptExpression;
import static com.facebook.presto.sql.tree.BooleanLiteral.FALSE_LITERAL;
import static com.facebook.presto.sql.tree.BooleanLiteral.TRUE_LITERAL;
import static com.facebook.presto.sql.tree.ComparisonExpression.Operator.EQUAL;
Expand Down Expand Up @@ -113,30 +118,30 @@ public Expression toPredicate(TupleDomain<Symbol> tupleDomain)
.collect(collectingAndThen(toImmutableList(), ExpressionUtils::combineConjuncts));
}

private Expression toSymbolExpression(Symbol symbol)
private static Expression toSymbolExpression(Symbol symbol)
{
if (symbol instanceof SymbolWithSubfieldPath) {
SymbolWithSubfieldPath subfieldPath = (SymbolWithSubfieldPath) symbol;
SubfieldPath path = subfieldPath.getPath();
Expression base = new SymbolReference(path.getPath().get(0).getField());
SubfieldPath path = ((SymbolWithSubfieldPath) symbol).getSubfieldPath();
Expression base = new SymbolReference(symbol.getName());
for (int i = 1; i < path.getPath().size(); i++) {
SubfieldPath.PathElement element = path.getPath().get(i);
String field = element.getField();
if (element.getIsSubscript()) {
base = new SubscriptExpression(base, field != null ? new StringLiteral(field) : new LongLiteral(Long.valueOf(element.getSubscript()).toString()));
if (element instanceof NestedField) {
base = new DereferenceExpression(base, new Identifier(((NestedField) element).getName()));
}
else if (field != null) {
base = new DereferenceExpression(base, new Identifier(field));
else if (element instanceof LongSubscript) {
base = new SubscriptExpression(base, new LongLiteral(String.valueOf(((LongSubscript) element).getIndex())));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there's some extra parenthesis here and below

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tdcmeehan I don't see that. Would you show a rewrite?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, I didn't read it correctly.

}
else if (element instanceof StringSubscript) {
base = new SubscriptExpression(base, new StringLiteral(((StringSubscript) element).getIndex()));
}
else {
throw new IllegalArgumentException("Bad subfield path in DomainTranslator");
throw new IllegalArgumentException("Unsupported path element: " + element.getClass().getSimpleName());
}
}
return base;
}
else {
return symbol.toSymbolReference();
}

return symbol.toSymbolReference();
}

private Expression toPredicate(Domain domain, Expression reference)
Expand Down Expand Up @@ -436,6 +441,7 @@ protected ExtractionResult visitComparisonExpression(ComparisonExpression node,
if (!optionalNormalized.isPresent()) {
return super.visitComparisonExpression(node, complement);
}

NormalizedSimpleComparison normalized = optionalNormalized.get();

Expression symbolExpression = normalized.getSymbolExpression();
Expand All @@ -445,7 +451,8 @@ protected ExtractionResult visitComparisonExpression(ComparisonExpression node,
Type type = value.getType(); // common type for symbol and value
return createComparisonExtractionResult(normalized.getComparisonOperator(), symbol, type, value.getValue(), complement);
}
else if (symbolExpression instanceof Cast) {

if (symbolExpression instanceof Cast) {
Cast castExpression = (Cast) symbolExpression;
if (!isImplicitCoercion(castExpression)) {
//
Expand Down Expand Up @@ -481,21 +488,23 @@ else if (symbolExpression instanceof Cast) {

return super.visitComparisonExpression(node, complement);
}
else if (includeSubfields && SubfieldUtils.isSubfieldPath(symbolExpression)) {
SubfieldPath path = SubfieldUtils.subfieldToSubfieldPath(symbolExpression);

if (includeSubfields && isDereferenceOrSubscriptExpression(symbolExpression)) {
SubfieldPath path = deferenceOrSubscriptExpressionToPath(symbolExpression);
if (path == null) {
return super.visitComparisonExpression(node, complement);
}
else {
Symbol symbol = new SymbolWithSubfieldPath(path);
NullableValue value = normalized.getValue();
Type type = value.getType(); // common type for symbol and value
return createComparisonExtractionResult(normalized.getComparisonOperator(), symbol, type, value.getValue(), complement);
}
}
else {
return super.visitComparisonExpression(node, complement);

NullableValue value = normalized.getValue();
return createComparisonExtractionResult(
normalized.getComparisonOperator(),
new SymbolWithSubfieldPath(path),
value.getType(),
value.getValue(),
complement);
}

return super.visitComparisonExpression(node, complement);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
import com.facebook.presto.sql.planner.optimizations.PlanOptimizer;
import com.facebook.presto.sql.planner.optimizations.PredicatePushDown;
import com.facebook.presto.sql.planner.optimizations.PruneUnreferencedOutputs;
import com.facebook.presto.sql.planner.optimizations.PushdownSubscripts;
import com.facebook.presto.sql.planner.optimizations.ReplicateSemiJoinInDelete;
import com.facebook.presto.sql.planner.optimizations.SetFlatteningOptimizer;
import com.facebook.presto.sql.planner.optimizations.StatsRecordingPlanOptimizer;
Expand Down Expand Up @@ -470,6 +471,7 @@ public PlanOptimizers(
builder.add(inlineProjections);
builder.add(new UnaliasSymbolReferences()); // Run unalias after merging projections to simplify projections more efficiently
builder.add(new PruneUnreferencedOutputs());
builder.add(new PushdownSubscripts());

builder.add(new IterativeOptimizer(
ruleStats,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,78 +14,84 @@
package com.facebook.presto.sql.planner;

import com.facebook.presto.spi.SubfieldPath;
import com.facebook.presto.spi.type.TypeSignature;
import com.facebook.presto.sql.tree.DereferenceExpression;
import com.facebook.presto.sql.tree.Expression;
import com.facebook.presto.sql.tree.GenericLiteral;
import com.facebook.presto.sql.tree.LongLiteral;
import com.facebook.presto.sql.tree.Node;
import com.facebook.presto.sql.tree.StringLiteral;
import com.facebook.presto.sql.tree.SubscriptExpression;
import com.facebook.presto.sql.tree.SymbolReference;
import com.google.common.collect.ImmutableList;

import java.util.ArrayList;

import static java.util.Collections.reverse;
import static com.facebook.presto.spi.type.BigintType.BIGINT;

public class SubfieldUtils
{
private SubfieldUtils() {}

public static boolean isSubfieldPath(Node expression)
public static boolean isDereferenceOrSubscriptExpression(Node expression)
{
return expression instanceof DereferenceExpression || expression instanceof SubscriptExpression;
}

public static SubfieldPath subfieldToSubfieldPath(Node expr)
public static SubfieldPath deferenceOrSubscriptExpressionToPath(Node expression)
{
ArrayList<SubfieldPath.PathElement> steps = new ArrayList();
ImmutableList.Builder<SubfieldPath.PathElement> elements = ImmutableList.builder();
while (true) {
if (expr instanceof SymbolReference) {
SymbolReference symbolReference = (SymbolReference) expr;
steps.add(new SubfieldPath.PathElement(symbolReference.getName(), 0));
if (expression instanceof SymbolReference) {
elements.add(new SubfieldPath.NestedField(((SymbolReference) expression).getName()));
break;
}
else if (expr instanceof DereferenceExpression) {
DereferenceExpression dereference = (DereferenceExpression) expr;
steps.add(new SubfieldPath.PathElement(dereference.getField().getValue(), 0));
expr = dereference.getBase();

if (expression instanceof DereferenceExpression) {
DereferenceExpression dereference = (DereferenceExpression) expression;
elements.add(new SubfieldPath.NestedField(dereference.getField().getValue()));
expression = dereference.getBase();
}
else if (expr instanceof SubscriptExpression) {
SubscriptExpression subscript = (SubscriptExpression) expr;
else if (expression instanceof SubscriptExpression) {
SubscriptExpression subscript = (SubscriptExpression) expression;
Expression index = subscript.getIndex();
if (index instanceof LongLiteral) {
LongLiteral literal = (LongLiteral) index;
steps.add(new SubfieldPath.PathElement(null, literal.getValue(), true));
elements.add(new SubfieldPath.LongSubscript(((LongLiteral) index).getValue()));
}
else if (index instanceof StringLiteral) {
StringLiteral literal = (StringLiteral) index;
steps.add(new SubfieldPath.PathElement(literal.getValue(), 0, true));
elements.add(new SubfieldPath.StringSubscript(((StringLiteral) index).getValue()));
}
else if (index instanceof GenericLiteral) {
GenericLiteral literal = (GenericLiteral) index;
if (BIGINT.getTypeSignature().equals(TypeSignature.parseTypeSignature(literal.getType()))) {
elements.add(new SubfieldPath.LongSubscript(Long.valueOf(literal.getValue())));
}
else {
return null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this cause NPE on any of deferenceOrSubscriptExpressionToPath's callsites?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tdcmeehan It might. Some callers check for null and some don't. This is an existing behavior, hence, I'd like to revisit it in some future PR.

}
}
else {
return null;
}
expr = subscript.getBase();
expression = subscript.getBase();
}
else {
return null;
}
}
reverse(steps);
return new SubfieldPath(steps);

return new SubfieldPath(elements.build().reverse());
}

public static Node getSubfieldBase(Node expr)
public static Expression getDerefenceOrSubscriptBase(Expression expression)
{
while (true) {
if (expr instanceof DereferenceExpression) {
DereferenceExpression dereference = (DereferenceExpression) expr;
expr = dereference.getBase();
if (expression instanceof DereferenceExpression) {
expression = ((DereferenceExpression) expression).getBase();
}
else if (expr instanceof SubscriptExpression) {
SubscriptExpression subscript = (SubscriptExpression) expr;
expr = subscript.getBase();
else if (expression instanceof SubscriptExpression) {
expression = ((SubscriptExpression) expression).getBase();
}
else {
return expr;
return expression;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;

import java.util.Objects;

import static java.util.Objects.requireNonNull;

/**
Expand All @@ -38,7 +40,7 @@ public SymbolWithSubfieldPath(@JsonProperty("path") SubfieldPath path)
}

@JsonProperty("path")
public SubfieldPath getPath()
public SubfieldPath getSubfieldPath()
{
return path;
}
Expand All @@ -64,13 +66,14 @@ public boolean equals(Object o)
if (o == null || getClass() != o.getClass()) {
return false;
}

SymbolWithSubfieldPath other = (SymbolWithSubfieldPath) o;
return path.equals(other.getPath());
return Objects.equals(path, other.path);
}

@Override
public int hashCode()
{
return path.hashCode();
return Objects.hash(path);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,11 @@ public static List<PlanNode> listTableLayouts(
private static ColumnHandle toColumnHandle(Map<Symbol, ColumnHandle> assignments, Symbol symbol)
{
if (symbol instanceof SymbolWithSubfieldPath) {
SymbolWithSubfieldPath subfieldSymbol = (SymbolWithSubfieldPath) symbol;
SubfieldPath path = subfieldSymbol.getPath();
ColumnHandle topColumn = assignments.get(new Symbol(path.getPath().get(0).getField()));
return topColumn.createSubfieldColumnHandle(path);
}
else {
return assignments.get(symbol);
SubfieldPath path = ((SymbolWithSubfieldPath) symbol).getSubfieldPath();
return assignments.get(path.getColumnName()).createSubfieldColumnHandle(path);
}

return assignments.get(symbol);
}

private static Symbol fromColumnHandle(Map<ColumnHandle, Symbol> assignments, ColumnHandle columnHandle)
Expand Down
Loading