planner: generate IndexMergePath in physical optimization (#10512)

pingcap · Jul 5, 2019 · 407e50d · 407e50d
1 parent 7177291
commit 407e50d
Show file tree

Hide file tree

Showing 8 changed files with 291 additions and 11 deletions.
diff --git a/planner/core/indexmerge_test.go b/planner/core/indexmerge_test.go
@@ -0,0 +1,141 @@
+// Copyright 2019 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package core
+
+import (
+	. "github.com/pingcap/check"
+	"github.com/pingcap/parser"
+	"github.com/pingcap/parser/ast"
+	"github.com/pingcap/parser/model"
+	"github.com/pingcap/tidb/infoschema"
+	"github.com/pingcap/tidb/sessionctx"
+	"github.com/pingcap/tidb/util/testleak"
+)
+
+var _ = Suite(&testIndexMergeSuite{})
+
+type testIndexMergeSuite struct {
+	*parser.Parser
+
+	is  infoschema.InfoSchema
+	ctx sessionctx.Context
+}
+
+func (s *testIndexMergeSuite) SetUpSuite(c *C) {
+	s.is = infoschema.MockInfoSchema([]*model.TableInfo{MockTable(), MockView()})
+	s.ctx = MockContext()
+	s.Parser = parser.New()
+}
+
+func getIndexMergePathDigest(paths []*accessPath, startIndex int) string {
+	if len(paths) == startIndex {
+		return "[]"
+	}
+	idxMergeDisgest := "["
+	for i := startIndex; i < len(paths); i++ {
+		if i != startIndex {
+			idxMergeDisgest += ","
+		}
+		path := paths[i]
+		idxMergeDisgest += "{Idxs:["
+		for j := 0; j < len(path.partialIndexPaths); j++ {
+			if j > 0 {
+				idxMergeDisgest += ","
+			}
+			idxMergeDisgest += path.partialIndexPaths[j].index.Name.L
+		}
+		idxMergeDisgest += "],TbFilters:["
+		for j := 0; j < len(path.tableFilters); j++ {
+			if j > 0 {
+				idxMergeDisgest += ","
+			}
+			idxMergeDisgest += path.tableFilters[j].String()
+		}
+		idxMergeDisgest += "]}"
+	}
+	idxMergeDisgest += "]"
+	return idxMergeDisgest
+}
+
+func (s *testIndexMergeSuite) TestIndexMergePathGenerateion(c *C) {
+	defer testleak.AfterTest(c)()
+	tests := []struct {
+		sql            string
+		idxMergeDigest string
+	}{
+		{
+			sql:            "select * from t",
+			idxMergeDigest: "[]",
+		},
+		{
+			sql:            "select * from t where c < 1",
+			idxMergeDigest: "[]",
+		},
+		{
+			sql:            "select * from t where c < 1 or f > 2",
+			idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[]}]",
+		},
+		{
+			sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7)",
+			idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7))]}," +
+				"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2))]}]",
+		},
+		{
+			sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7) and (c < 1 or g > 2)",
+			idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.c, 1), gt(test.t.g, 2))]}," +
+				"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.c, 1), gt(test.t.g, 2))]}," +
+				"{Idxs:[c_d_e,g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(gt(test.t.c, 5), lt(test.t.f, 7))]}]",
+		},
+		{
+			sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7) and (e < 1 or f > 2)",
+			idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.e, 1), gt(test.t.f, 2))]}," +
+				"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.e, 1), gt(test.t.f, 2))]}]",
+		},
+	}
+	for i, tc := range tests {
+		comment := Commentf("case:%v sql:%s", i, tc.sql)
+		stmt, err := s.ParseOneStmt(tc.sql, "", "")
+		c.Assert(err, IsNil, comment)
+		Preprocess(s.ctx, stmt, s.is)
+		builder := &PlanBuilder{
+			ctx:       MockContext(),
+			is:        s.is,
+			colMapper: make(map[*ast.ColumnNameExpr]int),
+		}
+		p, err := builder.Build(stmt)
+		if err != nil {
+			c.Assert(err.Error(), Equals, tc.idxMergeDigest, comment)
+			continue
+		}
+		c.Assert(err, IsNil)
+		p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan))
+		c.Assert(err, IsNil)
+		lp := p.(LogicalPlan)
+		c.Assert(err, IsNil)
+		var ds *DataSource
+		for ds == nil {
+			switch v := lp.(type) {
+			case *DataSource:
+				ds = v
+			default:
+				lp = lp.Children()[0]
+			}
+		}
+		ds.ctx.GetSessionVars().EnableIndexMerge = true
+		idxMergeStartIndex := len(ds.possibleAccessPaths)
+		_, err = lp.recursiveDeriveStats()
+		c.Assert(err, IsNil)
+		c.Assert(getIndexMergePathDigest(ds.possibleAccessPaths, idxMergeStartIndex), Equals, tc.idxMergeDigest)
+	}
+}
diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
@@ -351,7 +351,8 @@ type DataSource struct {
 	handleCol *expression.Column
 }
 
-// accessPath tells how we access one index or just access table.
+// accessPath indicates the way we access a table: by using single index, or by using multiple indexes,
+// or just by using table scan.
 type accessPath struct {
 	index      *model.IndexInfo
 	idxCols    []*expression.Column
@@ -369,15 +370,18 @@ type accessPath struct {
 	isTablePath bool
 	// forced means this path is generated by `use/force index()`.
 	forced bool
+	// partialIndexPaths store all index access paths.
+	// If there are extra filters, store them in tableFilters.
+	partialIndexPaths []*accessPath
 }
 
 // deriveTablePathStats will fulfill the information that the accessPath need.
 // And it will check whether the primary key is covered only by point query.
-func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
+func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression.Expression) (bool, error) {
 	var err error
 	sc := ds.ctx.GetSessionVars().StmtCtx
 	path.countAfterAccess = float64(ds.statisticTable.Count)
-	path.tableFilters = ds.pushedDownConds
+	path.tableFilters = conds
 	var pkCol *expression.Column
 	columnLen := len(ds.schema.Columns)
 	isUnsigned := false
@@ -395,10 +399,10 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
 	}
 
 	path.ranges = ranger.FullIntRange(isUnsigned)
-	if len(ds.pushedDownConds) == 0 {
+	if len(conds) == 0 {
 		return false, nil
 	}
-	path.accessConds, path.tableFilters = ranger.DetachCondsForColumn(ds.ctx, ds.pushedDownConds, pkCol)
+	path.accessConds, path.tableFilters = ranger.DetachCondsForColumn(ds.ctx, conds, pkCol)
 	// If there's no access cond, we try to find that whether there's expression containing correlated column that
 	// can be used to access data.
 	corColInAccessConds := false
@@ -478,7 +482,8 @@ func (ds *DataSource) getHandleCol() *expression.Column {
 // deriveIndexPathStats will fulfill the information that the accessPath need.
 // And it will check whether this index is full matched by point query. We will use this check to
 // determine whether we remove other paths or not.
-func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
+// conds is the conditions used to generate the DetachRangeResult for path.
+func (ds *DataSource) deriveIndexPathStats(path *accessPath, conds []expression.Expression) (bool, error) {
 	sc := ds.ctx.GetSessionVars().StmtCtx
 	path.ranges = ranger.FullRange()
 	path.countAfterAccess = float64(ds.statisticTable.Count)
@@ -492,7 +497,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
 	}
 	eqOrInCount := 0
 	if len(path.idxCols) != 0 {
-		res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, path.idxCols, path.idxColLens)
+		res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, conds, path.idxCols, path.idxColLens)
 		if err != nil {
 			return false, err
 		}
@@ -506,7 +511,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
 			return false, err
 		}
 	} else {
-		path.tableFilters = ds.pushedDownConds
+		path.tableFilters = conds
 	}
 	if eqOrInCount == len(path.accessConds) {
 		accesses, remained := path.splitCorColAccessCondFromFilters(eqOrInCount)

diff --git a/planner/core/stats.go b/planner/core/stats.go
@@ -16,6 +16,7 @@ package core
 import (
 	"math"
 
+	"github.com/pingcap/parser/ast"
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/planner/property"
 	"github.com/pingcap/tidb/statistics"
@@ -123,7 +124,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
 	ds.deriveStatsByFilter(ds.pushedDownConds)
 	for _, path := range ds.possibleAccessPaths {
 		if path.isTablePath {
-			noIntervalRanges, err := ds.deriveTablePathStats(path)
+			noIntervalRanges, err := ds.deriveTablePathStats(path, ds.pushedDownConds)
 			if err != nil {
 				return nil, err
 			}
@@ -135,7 +136,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
 			}
 			continue
 		}
-		noIntervalRanges, err := ds.deriveIndexPathStats(path)
+		noIntervalRanges, err := ds.deriveIndexPathStats(path, ds.pushedDownConds)
 		if err != nil {
 			return nil, err
 		}
@@ -146,9 +147,131 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
 			break
 		}
 	}
+	// Consider the IndexMergePath. Now, we just generate `IndexMergePath` in DNF case.
+	if len(ds.pushedDownConds) > 0 && len(ds.possibleAccessPaths) > 1 && ds.ctx.GetSessionVars().EnableIndexMerge {
+		needConsiderIndexMerge := true
+		for i := 1; i < len(ds.possibleAccessPaths); i++ {
+			if len(ds.possibleAccessPaths[i].accessConds) != 0 {
+				needConsiderIndexMerge = false
+				break
+			}
+		}
+		if needConsiderIndexMerge {
+			ds.generateIndexMergeOrPaths()
+		}
+	}
 	return ds.stats, nil
 }
 
+// getIndexMergeOrPath generates all possible IndexMergeOrPaths.
+func (ds *DataSource) generateIndexMergeOrPaths() {
+	usedIndexCount := len(ds.possibleAccessPaths)
+	for i, cond := range ds.pushedDownConds {
+		sf, ok := cond.(*expression.ScalarFunction)
+		if !ok || sf.FuncName.L != ast.LogicOr {
+			continue
+		}
+		var partialPaths = make([]*accessPath, 0, usedIndexCount)
+		dnfItems := expression.FlattenDNFConditions(sf)
+		for _, item := range dnfItems {
+			cnfItems := expression.SplitCNFItems(item)
+			itemPaths := ds.accessPathsForConds(cnfItems, usedIndexCount)
+			if len(itemPaths) == 0 {
+				partialPaths = nil
+				break
+			}
+			partialPath := ds.buildIndexMergePartialPath(itemPaths)
+			if partialPath == nil {
+				partialPaths = nil
+				break
+			}
+			partialPaths = append(partialPaths, partialPath)
+		}
+		if len(partialPaths) > 1 {
+			possiblePath := ds.buildIndexMergeOrPath(partialPaths, i)
+			if possiblePath != nil {
+				ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath)
+			}
+		}
+	}
+}
+
+// accessPathsForConds generates all possible index paths for conditions.
+func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, usedIndexCount int) []*accessPath {
+	var results = make([]*accessPath, 0, usedIndexCount)
+	for i := 0; i < usedIndexCount; i++ {
+		path := &accessPath{}
+		if ds.possibleAccessPaths[i].isTablePath {
+			path.isTablePath = true
+			noIntervalRanges, err := ds.deriveTablePathStats(path, conditions)
+			if err != nil {
+				logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
+				continue
+			}
+			// If we have point or empty range, just remove other possible paths.
+			if noIntervalRanges || len(path.ranges) == 0 {
+				results[0] = path
+				results = results[:1]
+				break
+			}
+		} else {
+			path.index = ds.possibleAccessPaths[i].index
+			noIntervalRanges, err := ds.deriveIndexPathStats(path, conditions)
+			if err != nil {
+				logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
+				continue
+			}
+			// If we have empty range, or point range on unique index, just remove other possible paths.
+			if (noIntervalRanges && path.index.Unique) || len(path.ranges) == 0 {
+				results[0] = path
+				results = results[:1]
+				break
+			}
+		}
+		// If accessConds is empty or tableFilter is not empty, we ignore the access path.
+		// Now these conditions are too strict.
+		// For example, a sql `select * from t where a > 1 or (b < 2 and c > 3)` and table `t` with indexes
+		// on a and b separately. we can generate a `IndexMergePath` with table filter `a > 1 or (b < 2 and c > 3)`.
+		// TODO: solve the above case
+		if len(path.tableFilters) > 0 || len(path.accessConds) == 0 {
+			continue
+		}
+		results = append(results, path)
+	}
+	return results
+}
+
+// buildIndexMergePartialPath chooses the best index path from all possible paths.
+// Now we just choose the index with most columns.
+// We should improve this strategy, because it is not always better to choose index
+// with most columns, e.g, filter is c > 1 and the input indexes are c and c_d_e,
+// the former one is enough, and it is less expensive in execution compared with the latter one.
+// TODO: improve strategy of the partial path selection
+func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*accessPath) *accessPath {
+	if len(indexAccessPaths) == 1 {
+		return indexAccessPaths[0]
+	}
+
+	maxColsIndex := 0
+	maxCols := len(indexAccessPaths[0].idxCols)
+	for i := 1; i < len(indexAccessPaths); i++ {
+		current := len(indexAccessPaths[i].idxCols)
+		if current > maxCols {
+			maxColsIndex = i
+			maxCols = current
+		}
+	}
+	return indexAccessPaths[maxColsIndex]
+}
+
+// buildIndexMergeOrPath generates one possible IndexMergePath.
+func (ds *DataSource) buildIndexMergeOrPath(partialPaths []*accessPath, current int) *accessPath {
+	indexMergePath := &accessPath{partialIndexPaths: partialPaths}
+	indexMergePath.tableFilters = append(indexMergePath.tableFilters, ds.pushedDownConds[:current]...)
+	indexMergePath.tableFilters = append(indexMergePath.tableFilters, ds.pushedDownConds[current+1:]...)
+	return indexMergePath
+}
+
 // DeriveStats implement LogicalPlan DeriveStats interface.
 func (p *LogicalSelection) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
 	p.stats = childStats[0].Scale(selectionFactor)

diff --git a/session/session.go b/session/session.go
@@ -1697,6 +1697,7 @@ var builtinGlobalVariable = []string{
 	variable.TiDBEnableWindowFunction,
 	variable.TiDBEnableFastAnalyze,
 	variable.TiDBExpensiveQueryTimeThreshold,
+	variable.TiDBEnableIndexMerge,
 }
 
 var (