Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: introduce sampling by rate #27359

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 70 additions & 70 deletions executor/analyze.go

Large diffs are not rendered by default.

43 changes: 23 additions & 20 deletions executor/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2059,7 +2059,7 @@ func (b *executorBuilder) refreshForUpdateTSForRC() error {
return UpdateForUpdateTS(b.ctx, newForUpdateTS)
}

func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64, autoAnalyze string) *analyzeTask {
func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]interface{}, autoAnalyze string) *analyzeTask {
job := &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: autoAnalyze + "analyze index " + task.IndexInfo.Name.O}
_, offset := timeutil.Zone(b.ctx.GetSessionVars().Location())
sc := b.ctx.GetSessionVars().StmtCtx
Expand Down Expand Up @@ -2090,11 +2090,11 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeInde
idxInfo: task.IndexInfo,
}
topNSize := new(int32)
*topNSize = int32(opts[ast.AnalyzeOptNumTopN])
*topNSize = int32(opts[ast.AnalyzeOptNumTopN].(uint64))
statsVersion := new(int32)
*statsVersion = int32(task.StatsVersion)
e.analyzePB.IdxReq = &tipb.AnalyzeIndexReq{
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]),
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets].(uint64)),
NumColumns: int32(len(task.IndexInfo.Columns)),
TopNSize: topNSize,
Version: statsVersion,
Expand All @@ -2103,14 +2103,14 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeInde
if e.isCommonHandle && e.idxInfo.Primary {
e.analyzePB.Tp = tipb.AnalyzeType_TypeCommonHandle
}
depth := int32(opts[ast.AnalyzeOptCMSketchDepth])
width := int32(opts[ast.AnalyzeOptCMSketchWidth])
depth := int32(opts[ast.AnalyzeOptCMSketchDepth].(uint64))
width := int32(opts[ast.AnalyzeOptCMSketchWidth].(uint64))
e.analyzePB.IdxReq.CmsketchDepth = &depth
e.analyzePB.IdxReq.CmsketchWidth = &width
return &analyzeTask{taskType: idxTask, idxExec: e, job: job}
}

func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64) *analyzeTask {
func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]interface{}) *analyzeTask {
h := domain.GetDomain(b.ctx).StatsHandle()
statsTbl := h.GetPartitionStats(&model.TableInfo{}, task.TableID.GetStatisticsID())
analyzeTask := b.buildAnalyzeIndexPushdown(task, opts, "")
Expand Down Expand Up @@ -2152,7 +2152,7 @@ func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeI
return analyzeTask
}

func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64, autoAnalyze string, schemaForVirtualColEval *expression.Schema) *analyzeTask {
func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]interface{}, autoAnalyze string, schemaForVirtualColEval *expression.Schema) *analyzeTask {
job := &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: autoAnalyze + "analyze table"}
availableIdx := make([]*model.IndexInfo, 0, len(task.Indexes))
colGroups := make([]*tipb.AnalyzeColumnGroup, 0, len(task.Indexes))
Expand Down Expand Up @@ -2216,12 +2216,15 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC
baseModifyCnt: modifyCount,
}
e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]),
SampleSize: int64(opts[ast.AnalyzeOptNumSamples]),
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets].(uint64)),
SampleSize: int64(opts[ast.AnalyzeOptNumSamples].(uint64)),
SampleRate: new(float64),
SketchSize: maxSketchSize,
ColumnsInfo: util.ColumnsToProto(task.ColsInfo, task.TblInfo.PKIsHandle),
ColumnGroups: colGroups,
}
*e.analyzePB.ColReq.SampleRate = opts[ast.AnalyzeOptSampleRate].(float64)

if task.TblInfo != nil {
e.analyzePB.ColReq.PrimaryColumnIds = tables.TryGetCommonPkColumnIds(task.TblInfo)
if task.TblInfo.IsCommonHandle {
Expand All @@ -2232,7 +2235,7 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC
return &analyzeTask{taskType: colTask, colExec: e, job: job}
}

func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64, autoAnalyze string, schemaForVirtualColEval *expression.Schema) *analyzeTask {
func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]interface{}, autoAnalyze string, schemaForVirtualColEval *expression.Schema) *analyzeTask {
if task.StatsVersion == statistics.Version2 {
return b.buildAnalyzeSamplingPushdown(task, opts, autoAnalyze, schemaForVirtualColEval)
}
Expand Down Expand Up @@ -2279,10 +2282,10 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeCo
handleCols: task.HandleCols,
AnalyzeInfo: task.AnalyzeInfo,
}
depth := int32(opts[ast.AnalyzeOptCMSketchDepth])
width := int32(opts[ast.AnalyzeOptCMSketchWidth])
depth := int32(opts[ast.AnalyzeOptCMSketchDepth].(uint64))
width := int32(opts[ast.AnalyzeOptCMSketchWidth].(uint64))
e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]),
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets].(uint64)),
SampleSize: maxRegionSampleSize,
SketchSize: maxSketchSize,
ColumnsInfo: util.ColumnsToProto(cols, task.HandleCols != nil && task.HandleCols.IsInt()),
Expand All @@ -2297,17 +2300,17 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeCo
}
if task.CommonHandleInfo != nil {
topNSize := new(int32)
*topNSize = int32(opts[ast.AnalyzeOptNumTopN])
*topNSize = int32(opts[ast.AnalyzeOptNumTopN].(uint64))
statsVersion := new(int32)
*statsVersion = int32(task.StatsVersion)
e.analyzePB.IdxReq = &tipb.AnalyzeIndexReq{
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]),
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets].(uint64)),
NumColumns: int32(len(task.CommonHandleInfo.Columns)),
TopNSize: topNSize,
Version: statsVersion,
}
depth := int32(opts[ast.AnalyzeOptCMSketchDepth])
width := int32(opts[ast.AnalyzeOptCMSketchWidth])
depth := int32(opts[ast.AnalyzeOptCMSketchDepth].(uint64))
width := int32(opts[ast.AnalyzeOptCMSketchWidth].(uint64))
e.analyzePB.IdxReq.CmsketchDepth = &depth
e.analyzePB.IdxReq.CmsketchWidth = &width
e.analyzePB.IdxReq.SketchSize = maxSketchSize
Expand All @@ -2319,7 +2322,7 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeCo
return &analyzeTask{taskType: colTask, colExec: e, job: job}
}

func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64) *analyzeTask {
func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]interface{}) *analyzeTask {
h := domain.GetDomain(b.ctx).StatsHandle()
statsTbl := h.GetPartitionStats(&model.TableInfo{}, task.TableID.GetStatisticsID())
analyzeTask := b.buildAnalyzeColumnsPushdown(task, opts, "", nil)
Expand Down Expand Up @@ -2358,7 +2361,7 @@ func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColu
return analyzeTask
}

func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64) {
func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]interface{}) {
findTask := false
for _, eTask := range e.tasks {
if eTask.fastExec != nil && eTask.fastExec.tableID.Equals(&task.TableID) {
Expand Down Expand Up @@ -2406,7 +2409,7 @@ func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercor
}
}

func (b *executorBuilder) buildAnalyzeFastIndex(e *AnalyzeExec, task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64) {
func (b *executorBuilder) buildAnalyzeFastIndex(e *AnalyzeExec, task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]interface{}) {
findTask := false
for _, eTask := range e.tasks {
if eTask.fastExec != nil && eTask.fastExec.tableID.Equals(&task.TableID) {
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ require (
sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67
)

replace github.com/pingcap/tipb => github.com/winoros/tipb v0.0.0-20211013061641-faa265605b59

// cloud.google.com/go/storage will upgrade grpc to v1.40.0
// we need keep the replacement until go.etcd.io supports the higher version of grpc.
replace google.golang.org/grpc => google.golang.org/grpc v1.29.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -611,8 +611,6 @@ github.com/pingcap/tidb-dashboard v0.0.0-20210312062513-eef5d6404638/go.mod h1:O
github.com/pingcap/tidb-dashboard v0.0.0-20210716172320-2226872e3296/go.mod h1:OCXbZTBTIMRcIt0jFsuCakZP+goYRv6IjawKbwLS2TQ=
github.com/pingcap/tidb-tools v5.0.3+incompatible h1:vYMrW9ux+3HRMeRZ1fUOjy2nyiodtuVyAyK270EKBEs=
github.com/pingcap/tidb-tools v5.0.3+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM=
github.com/pingcap/tipb v0.0.0-20210802080519-94b831c6db55 h1:oxOovwOzm7VD37XpDo9NUtfGddZMwLpjtaQOxAq6HKg=
github.com/pingcap/tipb v0.0.0-20210802080519-94b831c6db55/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down Expand Up @@ -771,6 +769,8 @@ github.com/vmihailenco/msgpack/v5 v5.0.0-beta.1/go.mod h1:xlngVLeyQ/Qi05oQxhQ+oT
github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f h1:9DDCDwOyEy/gId+IEMrFHLuQ5R/WV0KNxWLler8X2OY=
github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f/go.mod h1:8sdOQnirw1PrcnTJYkmW1iOHtUmblMmGdUOHyWYycLI=
github.com/winoros/tipb v0.0.0-20211013061641-faa265605b59 h1:HZsmZsdaSGxDsqDORF69SqDWSQEDLqhdXGUYvG6ncCo=
github.com/winoros/tipb v0.0.0-20211013061641-faa265605b59/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
Expand Down
4 changes: 3 additions & 1 deletion parser/ast/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ const (
AnalyzeOptCMSketchDepth
AnalyzeOptCMSketchWidth
AnalyzeOptNumSamples
AnalyzeOptSampleRate
)

// AnalyzeOptionString stores the string form of analyze options.
Expand All @@ -61,6 +62,7 @@ var AnalyzeOptionString = map[AnalyzeOptionType]string{
AnalyzeOptCMSketchWidth: "CMSKETCH WIDTH",
AnalyzeOptCMSketchDepth: "CMSKETCH DEPTH",
AnalyzeOptNumSamples: "SAMPLES",
AnalyzeOptSampleRate: "SAMPLERATE",
}

// HistogramOperationType is the type for histogram operation.
Expand Down Expand Up @@ -88,7 +90,7 @@ func (hot HistogramOperationType) String() string {
// AnalyzeOpt stores the analyze option type and value.
type AnalyzeOpt struct {
Type AnalyzeOptionType
Value uint64
Value ValueExpr
}

// Restore implements Node interface.
Expand Down
1 change: 1 addition & 0 deletions parser/misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,7 @@ var tokenMap = map[string]int{
"RESUME": resume,
"RUNNING": running,
"S3": s3,
"SAMPLERATE": sampleRate,
"SAMPLES": samples,
"SAN": san,
"SCHEDULE": schedule,
Expand Down
Loading