Skip to content

Commit

Permalink
optimize queries where lhs and rhs of predicate are equal (#10444)
Browse files Browse the repository at this point in the history
  • Loading branch information
jadami10 authored Mar 28, 2023
1 parent d40cba4 commit c6c4a29
Show file tree
Hide file tree
Showing 8 changed files with 377 additions and 162 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ private void validateQueries() {
for (String queryString : _queryWeightMap.keySet()) {
try {
PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(queryString);
// TODO: we should catch and log errors here so we don't fail queries on optimization.
// For now, because this modifies the query in place, we let the error propagate.
_queryOptimizer.optimize(pinotQuery, _schema);
QueryContext queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.pinot.common.request.PinotQuery;
import org.apache.pinot.core.query.optimizer.filter.FilterOptimizer;
import org.apache.pinot.core.query.optimizer.filter.FlattenAndOrFilterOptimizer;
import org.apache.pinot.core.query.optimizer.filter.IdenticalPredicateFilterOptimizer;
import org.apache.pinot.core.query.optimizer.filter.MergeEqInFilterOptimizer;
import org.apache.pinot.core.query.optimizer.filter.MergeRangeFilterOptimizer;
import org.apache.pinot.core.query.optimizer.filter.NumericalFilterOptimizer;
Expand All @@ -43,8 +44,9 @@ public class QueryOptimizer {
// - TimePredicateFilterOptimizer and MergeRangeFilterOptimizer relies on NumericalFilterOptimizer to convert the
// values to the proper format so that they can be properly parsed
private static final List<FilterOptimizer> FILTER_OPTIMIZERS =
Arrays.asList(new FlattenAndOrFilterOptimizer(), new MergeEqInFilterOptimizer(), new NumericalFilterOptimizer(),
new TimePredicateFilterOptimizer(), new MergeRangeFilterOptimizer());
Arrays.asList(new FlattenAndOrFilterOptimizer(), new IdenticalPredicateFilterOptimizer(),
new MergeEqInFilterOptimizer(), new NumericalFilterOptimizer(), new TimePredicateFilterOptimizer(),
new MergeRangeFilterOptimizer());

private static final List<StatementOptimizer> STATEMENT_OPTIMIZERS =
Collections.singletonList(new StringPredicateFilterOptimizer());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.query.optimizer.filter;

import java.util.List;
import javax.annotation.Nullable;
import org.apache.pinot.common.request.Expression;
import org.apache.pinot.common.request.Function;
import org.apache.pinot.common.utils.request.RequestUtils;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.sql.FilterKind;


/**
* This base class acts as a helper for any optimizer that is effectively removing filter conditions.
* It provides TRUE/FALSE literal classes that can be used to replace filter expressions that are always true/false.
* It provides an optimization implementation for AND/OR/NOT expressions.
*/
public abstract class BaseAndOrBooleanFilterOptimizer implements FilterOptimizer {

protected static final Expression TRUE = RequestUtils.getLiteralExpression(true);
protected static final Expression FALSE = RequestUtils.getLiteralExpression(false);

/**
* This recursively optimizes each part of the filter expression. For any AND/OR/NOT,
* we optimize each child, then we optimize the remaining statement. If there is only
* a child statement, we optimize that.
*/
@Override
public Expression optimize(Expression filterExpression, @Nullable Schema schema) {
if (!canBeOptimized(filterExpression, schema)) {
return filterExpression;
}

Function function = filterExpression.getFunctionCall();
List<Expression> operands = function.getOperands();
FilterKind kind = FilterKind.valueOf(function.getOperator());
switch (kind) {
case AND:
case OR:
case NOT:
// Recursively traverse the expression tree to find an operator node that can be rewritten.
operands.replaceAll(operand -> optimize(operand, schema));

// We have rewritten the child operands, so rewrite the parent if needed.
return optimizeCurrent(filterExpression);
default:
return optimizeChild(filterExpression, schema);
}
}

abstract boolean canBeOptimized(Expression filterExpression, @Nullable Schema schema);

/**
* Optimize any cases that are not AND/OR/NOT. This should be done by converting any cases
* that are always true to TRUE or always false to FALSE.
*/
abstract Expression optimizeChild(Expression filterExpression, @Nullable Schema schema);

/**
* If any of the operands of AND function is "false", then the AND function itself is false and can be replaced with
* "false" literal. Otherwise, remove all the "true" operands of the AND function. Similarly, if any of the operands
* of OR function is "true", then the OR function itself is true and can be replaced with "true" literal. Otherwise,
* remove all the "false" operands of the OR function.
*/
protected Expression optimizeCurrent(Expression expression) {
Function function = expression.getFunctionCall();
String operator = function.getOperator();
List<Expression> operands = function.getOperands();
if (operator.equals(FilterKind.AND.name())) {
// If any of the literal operands are always false, then replace AND function with FALSE.
for (Expression operand : operands) {
if (operand.equals(FALSE)) {
return FALSE;
}
}

// Remove all Literal operands that are always true.
operands.removeIf(operand -> operand.equals(TRUE));
if (operands.isEmpty()) {
return TRUE;
}
} else if (operator.equals(FilterKind.OR.name())) {
// If any of the literal operands are always true, then replace OR function with TRUE
for (Expression operand : operands) {
if (operand.equals(TRUE)) {
return TRUE;
}
}

// Remove all Literal operands that are always false.
operands.removeIf(operand -> operand.equals(FALSE));
if (operands.isEmpty()) {
return FALSE;
}
} else if (operator.equals(FilterKind.NOT.name())) {
assert operands.size() == 1;
Expression operand = operands.get(0);
if (operand.equals(TRUE)) {
return FALSE;
}
if (operand.equals(FALSE)) {
return TRUE;
}
}
return expression;
}

/** Change the expression value to boolean literal with given value. */
protected static Expression getExpressionFromBoolean(boolean value) {
return value ? TRUE : FALSE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ public Expression optimize(Expression filterExpression, @Nullable Schema schema)

private Expression optimize(Expression filterExpression) {
Function function = filterExpression.getFunctionCall();
if (function == null) {
return filterExpression;
}
String operator = function.getOperator();
if (!operator.equals(FilterKind.AND.name()) && !operator.equals(FilterKind.OR.name())) {
return filterExpression;
Expand All @@ -50,7 +53,7 @@ private Expression optimize(Expression filterExpression) {
for (Expression child : children) {
Expression optimizedChild = optimize(child);
Function childFunction = optimizedChild.getFunctionCall();
if (childFunction.getOperator().equals(operator)) {
if (childFunction != null && childFunction.getOperator().equals(operator)) {
newChildren.addAll(childFunction.getOperands());
} else {
newChildren.add(optimizedChild);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.query.optimizer.filter;

import java.util.List;
import javax.annotation.Nullable;
import org.apache.pinot.common.request.Expression;
import org.apache.pinot.common.request.Function;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.sql.FilterKind;


/**
* This optimizer converts all predicates where the left hand side == right hand side to
* a simple TRUE/FALSE literal value. While filters like, WHERE 1=1 OR "col1"="col1" are not
* typical, they end up expensive in Pinot because they are rewritten as A-A==0.
*/
public class IdenticalPredicateFilterOptimizer extends BaseAndOrBooleanFilterOptimizer {

@Override
boolean canBeOptimized(Expression filterExpression, @Nullable Schema schema) {
// if there's no function call, there's no lhs or rhs
return filterExpression.getFunctionCall() != null;
}

@Override
Expression optimizeChild(Expression filterExpression, @Nullable Schema schema) {
Function function = filterExpression.getFunctionCall();
FilterKind kind = FilterKind.valueOf(function.getOperator());
switch (kind) {
case EQUALS:
if (hasIdenticalLhsAndRhs(function.getOperands())) {
return TRUE;
}
break;
case NOT_EQUALS:
if (hasIdenticalLhsAndRhs(function.getOperands())) {
return FALSE;
}
break;
default:
break;
}
return filterExpression;
}

/**
* Pinot queries of the WHERE 1 != 1 AND "col1" = "col2" variety are rewritten as
* 1-1 != 0 AND "col1"-"col2" = 0. Therefore, we check specifically for the case where
* the operand is set up in this fashion.
*
* We return false specifically after every check to ensure we're only continuing when
* the input looks as expected. Otherwise, it's easy to for one of the operand functions
* to return null and fail the query.
*
* TODO: The rewrite is already happening in PredicateComparisonRewriter.updateFunctionExpression(),
* so we might just compare the lhs and rhs there.
*/
private boolean hasIdenticalLhsAndRhs(List<Expression> operands) {
boolean hasTwoChildren = operands.size() == 2;
Expression firstChild = operands.get(0);
if (firstChild.getFunctionCall() == null || !hasTwoChildren) {
return false;
}
boolean firstChildIsMinusOperator = firstChild.getFunctionCall().getOperator().equals("minus");
if (!firstChildIsMinusOperator) {
return false;
}
boolean firstChildHasTwoOperands = firstChild.getFunctionCall().getOperandsSize() == 2;
if (!firstChildHasTwoOperands) {
return false;
}
Expression minusOperandFirstChild = firstChild.getFunctionCall().getOperands().get(0);
Expression minusOperandSecondChild = firstChild.getFunctionCall().getOperands().get(1);
if (minusOperandFirstChild == null || minusOperandSecondChild == null || !minusOperandFirstChild.equals(
minusOperandSecondChild)) {
return false;
}
Expression secondChild = operands.get(1);
return isLiteralZero(secondChild);
}

private boolean isLiteralZero(Expression expression) {
if (!expression.isSetLiteral()) {
return false;
}
Object literalValue = expression.getLiteral().getFieldValue();
return literalValue.equals(0) || literalValue.equals(0L) || literalValue.equals(0d);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ public Expression optimize(Expression filterExpression, @Nullable Schema schema)

private Expression optimize(Expression filterExpression) {
Function function = filterExpression.getFunctionCall();
if (function == null) {
return filterExpression;
}
String operator = function.getOperator();
if (operator.equals(FilterKind.OR.name())) {
List<Expression> children = function.getOperands();
Expand All @@ -66,49 +69,53 @@ private Expression optimize(Expression filterExpression) {
// Iterate over all the child filters to merge EQ and IN predicates
for (Expression child : children) {
Function childFunction = child.getFunctionCall();
String childOperator = childFunction.getOperator();
assert !childOperator.equals(FilterKind.OR.name());
if (childOperator.equals(FilterKind.AND.name()) || childOperator.equals(FilterKind.NOT.name())) {
childFunction.getOperands().replaceAll(this::optimize);
if (childFunction == null) {
newChildren.add(child);
} else if (childOperator.equals(FilterKind.EQUALS.name())) {
List<Expression> operands = childFunction.getOperands();
Expression lhs = operands.get(0);
Expression value = operands.get(1);
Set<Expression> values = valuesMap.get(lhs);
if (values == null) {
values = new HashSet<>();
values.add(value);
valuesMap.put(lhs, values);
} else {
values.add(value);
// Recreate filter when multiple predicates can be merged
recreateFilter = true;
}
} else if (childOperator.equals(FilterKind.IN.name())) {
List<Expression> operands = childFunction.getOperands();
Expression lhs = operands.get(0);
Set<Expression> inPredicateValuesSet = new HashSet<>();
int numOperands = operands.size();
for (int i = 1; i < numOperands; i++) {
inPredicateValuesSet.add(operands.get(i));
}
int numUniqueValues = inPredicateValuesSet.size();
if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) {
// Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate), or values
// can be de-duplicated
recreateFilter = true;
}
Set<Expression> values = valuesMap.get(lhs);
if (values == null) {
valuesMap.put(lhs, inPredicateValuesSet);
} else {
String childOperator = childFunction.getOperator();
assert !childOperator.equals(FilterKind.OR.name());
if (childOperator.equals(FilterKind.AND.name()) || childOperator.equals(FilterKind.NOT.name())) {
childFunction.getOperands().replaceAll(this::optimize);
newChildren.add(child);
} else if (childOperator.equals(FilterKind.EQUALS.name())) {
List<Expression> operands = childFunction.getOperands();
Expression lhs = operands.get(0);
Expression value = operands.get(1);
Set<Expression> values = valuesMap.get(lhs);
if (values == null) {
values = new HashSet<>();
values.add(value);
valuesMap.put(lhs, values);
} else {
values.add(value);
// Recreate filter when multiple predicates can be merged
recreateFilter = true;
}
} else if (childOperator.equals(FilterKind.IN.name())) {
List<Expression> operands = childFunction.getOperands();
Expression lhs = operands.get(0);
Set<Expression> inPredicateValuesSet = new HashSet<>();
int numOperands = operands.size();
for (int i = 1; i < numOperands; i++) {
inPredicateValuesSet.add(operands.get(i));
}
int numUniqueValues = inPredicateValuesSet.size();
if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) {
// Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate),
// or values can be de-duplicated
recreateFilter = true;
}
Set<Expression> values = valuesMap.get(lhs);
if (values == null) {
valuesMap.put(lhs, inPredicateValuesSet);
} else {
values.addAll(inPredicateValuesSet);
// Recreate filter when multiple predicates can be merged
recreateFilter = true;
}
} else {
values.addAll(inPredicateValuesSet);
// Recreate filter when multiple predicates can be merged
recreateFilter = true;
newChildren.add(child);
}
} else {
newChildren.add(child);
}
}

Expand Down
Loading

0 comments on commit c6c4a29

Please sign in to comment.