Skip to content

Commit

Permalink
planner: generate IndexMergePath in physical optimization (#10512)
Browse files Browse the repository at this point in the history
  • Loading branch information
hailanwhu authored and eurekaka committed Jul 5, 2019
1 parent 7177291 commit 407e50d
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 11 deletions.
141 changes: 141 additions & 0 deletions planner/core/indexmerge_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package core

import (
. "github.com/pingcap/check"
"github.com/pingcap/parser"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/model"
"github.com/pingcap/tidb/infoschema"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/util/testleak"
)

var _ = Suite(&testIndexMergeSuite{})

type testIndexMergeSuite struct {
*parser.Parser

is infoschema.InfoSchema
ctx sessionctx.Context
}

func (s *testIndexMergeSuite) SetUpSuite(c *C) {
s.is = infoschema.MockInfoSchema([]*model.TableInfo{MockTable(), MockView()})
s.ctx = MockContext()
s.Parser = parser.New()
}

func getIndexMergePathDigest(paths []*accessPath, startIndex int) string {
if len(paths) == startIndex {
return "[]"
}
idxMergeDisgest := "["
for i := startIndex; i < len(paths); i++ {
if i != startIndex {
idxMergeDisgest += ","
}
path := paths[i]
idxMergeDisgest += "{Idxs:["
for j := 0; j < len(path.partialIndexPaths); j++ {
if j > 0 {
idxMergeDisgest += ","
}
idxMergeDisgest += path.partialIndexPaths[j].index.Name.L
}
idxMergeDisgest += "],TbFilters:["
for j := 0; j < len(path.tableFilters); j++ {
if j > 0 {
idxMergeDisgest += ","
}
idxMergeDisgest += path.tableFilters[j].String()
}
idxMergeDisgest += "]}"
}
idxMergeDisgest += "]"
return idxMergeDisgest
}

func (s *testIndexMergeSuite) TestIndexMergePathGenerateion(c *C) {
defer testleak.AfterTest(c)()
tests := []struct {
sql string
idxMergeDigest string
}{
{
sql: "select * from t",
idxMergeDigest: "[]",
},
{
sql: "select * from t where c < 1",
idxMergeDigest: "[]",
},
{
sql: "select * from t where c < 1 or f > 2",
idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[]}]",
},
{
sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7)",
idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7))]}," +
"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2))]}]",
},
{
sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7) and (c < 1 or g > 2)",
idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.c, 1), gt(test.t.g, 2))]}," +
"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.c, 1), gt(test.t.g, 2))]}," +
"{Idxs:[c_d_e,g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(gt(test.t.c, 5), lt(test.t.f, 7))]}]",
},
{
sql: "select * from t where (c < 1 or f > 2) and (c > 5 or f < 7) and (e < 1 or f > 2)",
idxMergeDigest: "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.e, 1), gt(test.t.f, 2))]}," +
"{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.e, 1), gt(test.t.f, 2))]}]",
},
}
for i, tc := range tests {
comment := Commentf("case:%v sql:%s", i, tc.sql)
stmt, err := s.ParseOneStmt(tc.sql, "", "")
c.Assert(err, IsNil, comment)
Preprocess(s.ctx, stmt, s.is)
builder := &PlanBuilder{
ctx: MockContext(),
is: s.is,
colMapper: make(map[*ast.ColumnNameExpr]int),
}
p, err := builder.Build(stmt)
if err != nil {
c.Assert(err.Error(), Equals, tc.idxMergeDigest, comment)
continue
}
c.Assert(err, IsNil)
p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan))
c.Assert(err, IsNil)
lp := p.(LogicalPlan)
c.Assert(err, IsNil)
var ds *DataSource
for ds == nil {
switch v := lp.(type) {
case *DataSource:
ds = v
default:
lp = lp.Children()[0]
}
}
ds.ctx.GetSessionVars().EnableIndexMerge = true
idxMergeStartIndex := len(ds.possibleAccessPaths)
_, err = lp.recursiveDeriveStats()
c.Assert(err, IsNil)
c.Assert(getIndexMergePathDigest(ds.possibleAccessPaths, idxMergeStartIndex), Equals, tc.idxMergeDigest)
}
}
21 changes: 13 additions & 8 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,8 @@ type DataSource struct {
handleCol *expression.Column
}

// accessPath tells how we access one index or just access table.
// accessPath indicates the way we access a table: by using single index, or by using multiple indexes,
// or just by using table scan.
type accessPath struct {
index *model.IndexInfo
idxCols []*expression.Column
Expand All @@ -369,15 +370,18 @@ type accessPath struct {
isTablePath bool
// forced means this path is generated by `use/force index()`.
forced bool
// partialIndexPaths store all index access paths.
// If there are extra filters, store them in tableFilters.
partialIndexPaths []*accessPath
}

// deriveTablePathStats will fulfill the information that the accessPath need.
// And it will check whether the primary key is covered only by point query.
func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression.Expression) (bool, error) {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.countAfterAccess = float64(ds.statisticTable.Count)
path.tableFilters = ds.pushedDownConds
path.tableFilters = conds
var pkCol *expression.Column
columnLen := len(ds.schema.Columns)
isUnsigned := false
Expand All @@ -395,10 +399,10 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) (bool, error) {
}

path.ranges = ranger.FullIntRange(isUnsigned)
if len(ds.pushedDownConds) == 0 {
if len(conds) == 0 {
return false, nil
}
path.accessConds, path.tableFilters = ranger.DetachCondsForColumn(ds.ctx, ds.pushedDownConds, pkCol)
path.accessConds, path.tableFilters = ranger.DetachCondsForColumn(ds.ctx, conds, pkCol)
// If there's no access cond, we try to find that whether there's expression containing correlated column that
// can be used to access data.
corColInAccessConds := false
Expand Down Expand Up @@ -478,7 +482,8 @@ func (ds *DataSource) getHandleCol() *expression.Column {
// deriveIndexPathStats will fulfill the information that the accessPath need.
// And it will check whether this index is full matched by point query. We will use this check to
// determine whether we remove other paths or not.
func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
// conds is the conditions used to generate the DetachRangeResult for path.
func (ds *DataSource) deriveIndexPathStats(path *accessPath, conds []expression.Expression) (bool, error) {
sc := ds.ctx.GetSessionVars().StmtCtx
path.ranges = ranger.FullRange()
path.countAfterAccess = float64(ds.statisticTable.Count)
Expand All @@ -492,7 +497,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
}
eqOrInCount := 0
if len(path.idxCols) != 0 {
res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, path.idxCols, path.idxColLens)
res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, conds, path.idxCols, path.idxColLens)
if err != nil {
return false, err
}
Expand All @@ -506,7 +511,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
return false, err
}
} else {
path.tableFilters = ds.pushedDownConds
path.tableFilters = conds
}
if eqOrInCount == len(path.accessConds) {
accesses, remained := path.splitCorColAccessCondFromFilters(eqOrInCount)
Expand Down
127 changes: 125 additions & 2 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package core
import (
"math"

"github.com/pingcap/parser/ast"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/statistics"
Expand Down Expand Up @@ -123,7 +124,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
ds.deriveStatsByFilter(ds.pushedDownConds)
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
noIntervalRanges, err := ds.deriveTablePathStats(path)
noIntervalRanges, err := ds.deriveTablePathStats(path, ds.pushedDownConds)
if err != nil {
return nil, err
}
Expand All @@ -135,7 +136,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
}
continue
}
noIntervalRanges, err := ds.deriveIndexPathStats(path)
noIntervalRanges, err := ds.deriveIndexPathStats(path, ds.pushedDownConds)
if err != nil {
return nil, err
}
Expand All @@ -146,9 +147,131 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S
break
}
}
// Consider the IndexMergePath. Now, we just generate `IndexMergePath` in DNF case.
if len(ds.pushedDownConds) > 0 && len(ds.possibleAccessPaths) > 1 && ds.ctx.GetSessionVars().EnableIndexMerge {
needConsiderIndexMerge := true
for i := 1; i < len(ds.possibleAccessPaths); i++ {
if len(ds.possibleAccessPaths[i].accessConds) != 0 {
needConsiderIndexMerge = false
break
}
}
if needConsiderIndexMerge {
ds.generateIndexMergeOrPaths()
}
}
return ds.stats, nil
}

// getIndexMergeOrPath generates all possible IndexMergeOrPaths.
func (ds *DataSource) generateIndexMergeOrPaths() {
usedIndexCount := len(ds.possibleAccessPaths)
for i, cond := range ds.pushedDownConds {
sf, ok := cond.(*expression.ScalarFunction)
if !ok || sf.FuncName.L != ast.LogicOr {
continue
}
var partialPaths = make([]*accessPath, 0, usedIndexCount)
dnfItems := expression.FlattenDNFConditions(sf)
for _, item := range dnfItems {
cnfItems := expression.SplitCNFItems(item)
itemPaths := ds.accessPathsForConds(cnfItems, usedIndexCount)
if len(itemPaths) == 0 {
partialPaths = nil
break
}
partialPath := ds.buildIndexMergePartialPath(itemPaths)
if partialPath == nil {
partialPaths = nil
break
}
partialPaths = append(partialPaths, partialPath)
}
if len(partialPaths) > 1 {
possiblePath := ds.buildIndexMergeOrPath(partialPaths, i)
if possiblePath != nil {
ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath)
}
}
}
}

// accessPathsForConds generates all possible index paths for conditions.
func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, usedIndexCount int) []*accessPath {
var results = make([]*accessPath, 0, usedIndexCount)
for i := 0; i < usedIndexCount; i++ {
path := &accessPath{}
if ds.possibleAccessPaths[i].isTablePath {
path.isTablePath = true
noIntervalRanges, err := ds.deriveTablePathStats(path, conditions)
if err != nil {
logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
continue
}
// If we have point or empty range, just remove other possible paths.
if noIntervalRanges || len(path.ranges) == 0 {
results[0] = path
results = results[:1]
break
}
} else {
path.index = ds.possibleAccessPaths[i].index
noIntervalRanges, err := ds.deriveIndexPathStats(path, conditions)
if err != nil {
logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
continue
}
// If we have empty range, or point range on unique index, just remove other possible paths.
if (noIntervalRanges && path.index.Unique) || len(path.ranges) == 0 {
results[0] = path
results = results[:1]
break
}
}
// If accessConds is empty or tableFilter is not empty, we ignore the access path.
// Now these conditions are too strict.
// For example, a sql `select * from t where a > 1 or (b < 2 and c > 3)` and table `t` with indexes
// on a and b separately. we can generate a `IndexMergePath` with table filter `a > 1 or (b < 2 and c > 3)`.
// TODO: solve the above case
if len(path.tableFilters) > 0 || len(path.accessConds) == 0 {
continue
}
results = append(results, path)
}
return results
}

// buildIndexMergePartialPath chooses the best index path from all possible paths.
// Now we just choose the index with most columns.
// We should improve this strategy, because it is not always better to choose index
// with most columns, e.g, filter is c > 1 and the input indexes are c and c_d_e,
// the former one is enough, and it is less expensive in execution compared with the latter one.
// TODO: improve strategy of the partial path selection
func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*accessPath) *accessPath {
if len(indexAccessPaths) == 1 {
return indexAccessPaths[0]
}

maxColsIndex := 0
maxCols := len(indexAccessPaths[0].idxCols)
for i := 1; i < len(indexAccessPaths); i++ {
current := len(indexAccessPaths[i].idxCols)
if current > maxCols {
maxColsIndex = i
maxCols = current
}
}
return indexAccessPaths[maxColsIndex]
}

// buildIndexMergeOrPath generates one possible IndexMergePath.
func (ds *DataSource) buildIndexMergeOrPath(partialPaths []*accessPath, current int) *accessPath {
indexMergePath := &accessPath{partialIndexPaths: partialPaths}
indexMergePath.tableFilters = append(indexMergePath.tableFilters, ds.pushedDownConds[:current]...)
indexMergePath.tableFilters = append(indexMergePath.tableFilters, ds.pushedDownConds[current+1:]...)
return indexMergePath
}

// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalSelection) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
p.stats = childStats[0].Scale(selectionFactor)
Expand Down
1 change: 1 addition & 0 deletions session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1697,6 +1697,7 @@ var builtinGlobalVariable = []string{
variable.TiDBEnableWindowFunction,
variable.TiDBEnableFastAnalyze,
variable.TiDBExpensiveQueryTimeThreshold,
variable.TiDBEnableIndexMerge,
}

var (
Expand Down
Loading

0 comments on commit 407e50d

Please sign in to comment.