Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gen4: add __sq_has_values for in/not in subqueries #8894

Merged
merged 12 commits into from
Sep 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions go/test/endtoend/vtgate/misc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,19 @@ func TestSimpleOrderBy(t *testing.T) {
assertMatches(t, conn, `SELECT id2 FROM t1 ORDER BY id2 ASC`, `[[INT64(5)] [INT64(6)] [INT64(7)] [INT64(8)] [INT64(9)] [INT64(10)]]`)
}

func TestSubqueriesHasValues(t *testing.T) {
defer cluster.PanicHandler(t)
ctx := context.Background()
conn, err := mysql.Connect(ctx, &vtParams)
require.NoError(t, err)
defer conn.Close()

defer exec(t, conn, `delete from t1`)
exec(t, conn, "insert into t1(id1, id2) values (0,1),(1,2),(2,3),(3,4),(4,5),(5,6)")
assertMatches(t, conn, `SELECT id2 FROM t1 WHERE id1 IN (SELECT id1 FROM t1 WHERE id1 > 10)`, `[]`)
assertMatches(t, conn, `SELECT id2 FROM t1 WHERE id1 NOT IN (SELECT id1 FROM t1 WHERE id1 > 10) ORDER BY id2`, `[[INT64(1)] [INT64(2)] [INT64(3)] [INT64(4)] [INT64(5)] [INT64(6)]]`)
}

func TestSelectEqualUniqueOuterJoinRightPredicate(t *testing.T) {
defer cluster.PanicHandler(t)
ctx := context.Background()
Expand Down
40 changes: 37 additions & 3 deletions go/vt/sqlparser/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,23 @@ func SplitAndExpression(filters []Expr, node Expr) []Expr {
return append(filters, node)
}

// AndExpressions ands together two expression, minimising the expr when possible
func AndExpressions(exprs ...Expr) Expr {
// SplitOrExpression breaks up the Expr into OR-separated conditions
// and appends them to filters. Outer parenthesis are removed. Precedence
// should be taken into account if expressions are recombined.
func SplitOrExpression(filters []Expr, node Expr) []Expr {
if node == nil {
return filters
}
switch node := node.(type) {
case *OrExpr:
filters = SplitOrExpression(filters, node.Left)
return SplitOrExpression(filters, node.Right)
}
return append(filters, node)
}

// joinExpressions join together a list of Expr using the baseType as operator (either AndExpr or OrExpr).
func joinExpressions(baseType Expr, exprs ...Expr) Expr {
switch len(exprs) {
case 0:
return nil
Expand All @@ -336,6 +351,9 @@ func AndExpressions(exprs ...Expr) Expr {
default:
result := (Expr)(nil)
for i, expr := range exprs {
if expr == nil {
continue
}
if result == nil {
result = expr
} else {
Expand All @@ -347,12 +365,28 @@ func AndExpressions(exprs ...Expr) Expr {
}
}
if !found {
result = &AndExpr{Left: result, Right: expr}
switch baseType.(type) {
case *AndExpr:
result = &AndExpr{Left: result, Right: expr}
case *OrExpr:
result = &OrExpr{Left: result, Right: expr}
}
}
}
}
return result
}

}

// AndExpressions ands together two or more expressions, minimising the expr when possible
func AndExpressions(exprs ...Expr) Expr {
return joinExpressions(&AndExpr{}, exprs...)
}

// OrExpressions ors together two or more expressions, minimising the expr when possible
func OrExpressions(exprs ...Expr) Expr {
return joinExpressions(&OrExpr{}, exprs...)
}

// TableFromStatement returns the qualified table name for the query.
Expand Down
16 changes: 16 additions & 0 deletions go/vt/sqlparser/ast_rewriting.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,22 @@ func (r *ReservedVars) ReserveSubQuery() string {
}
}

// ReserveSubQueryWithHasValues returns the next argument name to replace subquery with pullout value.
func (r *ReservedVars) ReserveSubQueryWithHasValues() (string, string) {
for {
r.sqNext++
joinVar := strconv.AppendInt(subQueryBaseArgName, r.sqNext, 10)
hasValuesJoinVar := strconv.AppendInt(HasValueSubQueryBaseName, r.sqNext, 10)
_, joinVarOK := r.reserved[string(joinVar)]
_, hasValuesJoinVarOK := r.reserved[string(hasValuesJoinVar)]
if !joinVarOK && !hasValuesJoinVarOK {
r.reserved[string(joinVar)] = struct{}{}
r.reserved[string(hasValuesJoinVar)] = struct{}{}
return string(joinVar), string(hasValuesJoinVar)
}
}
}

// ReserveHasValuesSubQuery returns the next argument name to replace subquery with has value.
func (r *ReservedVars) ReserveHasValuesSubQuery() string {
for {
Expand Down
12 changes: 7 additions & 5 deletions go/vt/vtgate/planbuilder/abstract/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ type (
// * SubQuery - Represents a query that encapsulates one or more sub-queries (SubQueryInner).
// * Vindex - Represents a query that selects from vindex tables.
// * Concatenate - Represents concatenation of the outputs of all the input sources
// * Distinct - Represents elimination of duplicates from the output of the input source
Operator interface {
// TableID returns a TableSet of the tables contained within
TableID() semantics.TableSet
Expand Down Expand Up @@ -211,10 +210,13 @@ func createOperatorFromSelect(sel *sqlparser.Select, semTable *semantics.SemTabl
return nil, err
}
resultantOp.Inner = append(resultantOp.Inner, &SubQueryInner{
SelectStatement: subquerySelectStatement,
Inner: opInner,
Type: sq.OpCode,
ArgName: sq.ArgName,
SelectStatement: subquerySelectStatement,
Inner: opInner,
Type: sq.OpCode,
ArgName: sq.ArgName,
HasValues: sq.HasValues,
ExprsNeedReplace: sq.ExprsNeedReplace,
ReplaceBy: sq.ReplaceBy,
})
}
}
Expand Down
31 changes: 28 additions & 3 deletions go/vt/vtgate/planbuilder/abstract/subquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,35 @@ var _ Operator = (*SubQuery)(nil)

// SubQueryInner stores the subquery information for a select statement
type SubQueryInner struct {
Inner Operator
Type engine.PulloutOpcode
// Inner is the Operator inside the parenthesis of the subquery.
// i.e: select (select 1 union select 1), the Inner here would be
// of type Concatenate since we have a Union.
Inner Operator

// Type represents the type of the subquery (value, in, not in, exists)
Type engine.PulloutOpcode

// SelectStatement is the inner's select
SelectStatement *sqlparser.Select
ArgName string

// ArgName is the substitution argument string for the subquery.
// Subquery argument name looks like: `__sq1`, with `1` being an
// unique identifier. This is used when we wish to replace the
// subquery by an argument for PullOut subqueries.
ArgName string

// HasValues is a string of form `__sq_has_values1` with `1` being
// a unique identifier that matches the one used in ArgName.
// We use `__sq_has_values` for in and not in subqueries.
HasValues string

// ExprsNeedReplace is a slice of all the expressions that were
// introduced by the rewrite of the subquery and that potentially
// need to be re-replace if we can merge the subquery into a route.
// An expression that contains at least all of ExprsNeedReplace will
// be replaced by the expression in ReplaceBy.
ExprsNeedReplace []sqlparser.Expr
ReplaceBy sqlparser.Expr
}

// TableID implements the Operator interface
Expand Down
11 changes: 6 additions & 5 deletions go/vt/vtgate/planbuilder/gen4_planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,11 @@ func newBuildSelectPlan(selStmt sqlparser.SelectStatement, reservedVars *sqlpars

func newPlanningContext(reservedVars *sqlparser.ReservedVars, semTable *semantics.SemTable, vschema ContextVSchema) *planningContext {
ctx := &planningContext{
reservedVars: reservedVars,
semTable: semTable,
vschema: vschema,
sqToReplace: map[string]*sqlparser.Select{},
reservedVars: reservedVars,
semTable: semTable,
vschema: vschema,
argToReplaceBySelect: map[string]*sqlparser.Select{},
exprToReplaceBySqExpr: map[sqlparser.Expr]sqlparser.Expr{},
}
return ctx
}
Expand Down Expand Up @@ -216,7 +217,7 @@ func planHorizon(ctx *planningContext, plan logicalPlan, in sqlparser.SelectStat
sel: node,
}

replaceSubQuery(ctx.sqToReplace, node)
replaceSubQuery(ctx.exprToReplaceBySqExpr, node)
var err error
plan, err = hp.planHorizon(ctx, plan)
if err != nil {
Expand Down
63 changes: 40 additions & 23 deletions go/vt/vtgate/planbuilder/querytree_transformers.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func transformSubqueryTree(ctx *planningContext, n *subqueryTree) (logicalPlan,
return nil, err
}

plan := newPulloutSubquery(n.opcode, n.argName, "", innerPlan)
plan := newPulloutSubquery(n.opcode, n.argName, n.hasValues, innerPlan)
outerPlan, err := transformToLogicalPlan(ctx, n.outer)
if err != nil {
return nil, err
Expand Down Expand Up @@ -324,7 +324,7 @@ func transformRoutePlan(ctx *planningContext, n *routeTree) (*route, error) {
Comments: ctx.semTable.Comments,
}

replaceSubQuery(ctx.sqToReplace, sel)
replaceSubQuery(ctx.exprToReplaceBySqExpr, sel)

// TODO clean up when gen4 is the only planner
var condition sqlparser.Expr
Expand Down Expand Up @@ -439,39 +439,56 @@ func relToTableExpr(t relation) (sqlparser.TableExpr, error) {
}

type subQReplacer struct {
sqToReplace map[string]*sqlparser.Select
err error
replaced bool
exprToReplaceBySqExpr map[sqlparser.Expr]sqlparser.Expr
replaced bool
}

func (sqr *subQReplacer) replacer(cursor *sqlparser.Cursor) bool {
argName := argumentName(cursor.Node())
if argName == "" {
var exprs []sqlparser.Expr
switch node := cursor.Node().(type) {
case *sqlparser.AndExpr:
exprs = sqlparser.SplitAndExpression(nil, node)
case *sqlparser.OrExpr:
exprs = sqlparser.SplitOrExpression(nil, node)
case sqlparser.Argument:
exprs = append(exprs, node)
case sqlparser.ListArg:
exprs = append(exprs, node)
case *sqlparser.ExistsExpr:
exprs = append(exprs, node)
default:
return true
}

var node sqlparser.SQLNode
subqSelect, exists := sqr.sqToReplace[argName]
if !exists {
sqr.err = vterrors.Errorf(vtrpcpb.Code_INTERNAL, "[BUG] unable to find subquery with argument: %s", argName)
return false
var replaceBy sqlparser.Expr
var remainder sqlparser.Expr
for _, expr := range exprs {
found := false
for sqExprToReplace, replaceByExpr := range sqr.exprToReplaceBySqExpr {
if sqlparser.EqualsExpr(expr, sqExprToReplace) {
allReplaceByExprs := sqlparser.SplitAndExpression(nil, replaceBy)
allReplaceByExprs = append(allReplaceByExprs, replaceByExpr)
replaceBy = sqlparser.AndExpressions(allReplaceByExprs...)
found = true
break
}
}
if !found {
remainder = sqlparser.AndExpressions(remainder, expr)
}
}
sq := &sqlparser.Subquery{Select: subqSelect}
node = sq

// if the subquery is in an EXISTS, e.g. "__sq_has_values1"
// then we encapsulate the subquery in an exists expression.
if strings.HasPrefix(argName, string(sqlparser.HasValueSubQueryBaseName)) {
node = &sqlparser.ExistsExpr{Subquery: sq}
if replaceBy == nil {
return true
}
cursor.Replace(node)
newNode := sqlparser.AndExpressions(remainder, replaceBy)
cursor.Replace(newNode)
sqr.replaced = true
return false
}

func replaceSubQuery(sqToReplace map[string]*sqlparser.Select, sel *sqlparser.Select) {
if len(sqToReplace) > 0 {
sqr := &subQReplacer{sqToReplace: sqToReplace}
func replaceSubQuery(exprToReplaceBySqExpr map[sqlparser.Expr]sqlparser.Expr, sel *sqlparser.Select) {
if len(exprToReplaceBySqExpr) > 0 {
sqr := &subQReplacer{exprToReplaceBySqExpr: exprToReplaceBySqExpr}
sqlparser.Rewrite(sel, sqr.replacer, nil)
for sqr.replaced {
// to handle subqueries inside subqueries, we need to do this again and again until no replacements are left
Expand Down
Loading