Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: handle contants in Selectivity() #32761

Merged
merged 7 commits into from
Mar 6, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,10 @@ IndexReader 10.00 root index:IndexRangeScan
└─IndexRangeScan 10.00 cop[tikv] table:t, index:idx(a, b) range:[1,1], keep order:false, stats:pseudo
explain format = 'brief' select * from t where a = 1 and a = 2;
id estRows task access object operator info
TableDual 8000.00 root rows:0
TableDual 0.00 root rows:0
explain format = 'brief' select * from t where b = 1 and b = 2;
id estRows task access object operator info
TableDual 8000.00 root rows:0
TableDual 0.00 root rows:0
explain format = 'brief' select * from t t1 join t t2 where t1.b = t2.b and t2.b is null;
id estRows task access object operator info
Projection 0.00 root test.t.a, test.t.b, test.t.a, test.t.b
Expand All @@ -458,7 +458,7 @@ drop table if exists t;
create table t(a bigint primary key);
explain format = 'brief' select * from t where a = 1 and a = 2;
id estRows task access object operator info
TableDual 8000.00 root rows:0
TableDual 0.00 root rows:0
explain format = 'brief' select null or a > 1 from t;
id estRows task access object operator info
Projection 10000.00 root or(<nil>, gt(test.t.a, 1))->Column#2
Expand Down Expand Up @@ -506,17 +506,17 @@ PRIMARY KEY (`id`)
explain format = 'brief' SELECT COUNT(1) FROM (SELECT COALESCE(b.region_name, '不详') region_name, SUM(a.registration_num) registration_num FROM (SELECT stat_date, show_date, region_id, 0 registration_num FROM test01 WHERE period = 1 AND stat_date >= 20191202 AND stat_date <= 20191202 UNION ALL SELECT stat_date, show_date, region_id, registration_num registration_num FROM test01 WHERE period = 1 AND stat_date >= 20191202 AND stat_date <= 20191202) a LEFT JOIN test02 b ON a.region_id = b.id WHERE registration_num > 0 AND a.stat_date >= '20191202' AND a.stat_date <= '20191202' GROUP BY a.stat_date , a.show_date , COALESCE(b.region_name, '不详') ) JLS;
id estRows task access object operator info
StreamAgg 1.00 root funcs:count(1)->Column#22
└─HashAgg 8000.00 root group by:Column#32, Column#33, Column#34, funcs:count(1)->Column#31
└─Projection 10000.01 root Column#14, Column#15, coalesce(test.test02.region_name, 不详)->Column#34
└─HashJoin 10000.01 root left outer join, equal:[eq(Column#16, test.test02.id)]
├─TableReader(Build) 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:b keep order:false, stats:pseudo
└─Union(Probe) 8000.01 root
├─TableDual 8000.00 root rows:0
└─Projection 0.01 root test.test01.stat_date, test.test01.show_date, test.test01.region_id
└─TableReader 0.01 root data:Selection
└─Selection 0.01 cop[tikv] eq(test.test01.period, 1), ge(test.test01.stat_date, 20191202), gt(cast(test.test01.registration_num, bigint(20) BINARY), 0), le(test.test01.stat_date, 20191202)
└─TableFullScan 10000.00 cop[tikv] table:test01 keep order:false, stats:pseudo
└─HashAgg 1.00 root group by:Column#32, Column#33, Column#34, funcs:count(1)->Column#31
└─Projection 0.01 root Column#14, Column#15, coalesce(test.test02.region_name, 不详)->Column#34
└─IndexJoin 0.01 root left outer join, inner:TableReader, outer key:Column#16, inner key:test.test02.id, equal cond:eq(Column#16, test.test02.id)
├─Union(Build) 0.01 root
├─TableDual 0.00 root rows:0
│ └─Projection 0.01 root test.test01.stat_date, test.test01.show_date, test.test01.region_id
│ └─TableReader 0.01 root data:Selection
│ └─Selection 0.01 cop[tikv] eq(test.test01.period, 1), ge(test.test01.stat_date, 20191202), gt(cast(test.test01.registration_num, bigint(20) BINARY), 0), le(test.test01.stat_date, 20191202)
└─TableFullScan 10000.00 cop[tikv] table:test01 keep order:false, stats:pseudo
└─TableReader(Probe) 1.00 root data:TableRangeScan
└─TableRangeScan 1.00 cop[tikv] table:b range: decided by [Column#16], keep order:false, stats:pseudo
Comment on lines +509 to +519
Copy link
Member Author

@time-and-fate time-and-fate Mar 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change essentially reverts the changes in #21061.
Detail: https://github.com/pingcap/tidb/pull/21061/files#diff-b2102aac708eea9196b2b098c7484d1e70e76b864e2eaa28978a231419fe6a55.
I think it's expected and acceptable.
And because test cases here don't load real stats, this change doesn't make much sense.

drop table if exists t;
create table t(a int, nb int not null, nc int not null);
explain format = 'brief' select ifnull(a, 0) from t;
Expand Down
2 changes: 1 addition & 1 deletion executor/prepared_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func TestPreparedNullParam(t *testing.T) {
ps := []*util.ProcessInfo{tkProcess}
tk.Session().SetSessionManager(&mockSessionManager1{PS: ps})
tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Check(testkit.Rows(
"TableDual_5 8000.00 root rows:0"))
"TableDual_5 0.00 root rows:0"))
}
}

Expand Down
26 changes: 13 additions & 13 deletions expression/testdata/expression_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -197,45 +197,45 @@
{
"SQL": "explain format = 'brief' select * from t1 left join t2 on false",
"Result": [
"HashJoin 80000000.00 root CARTESIAN left outer join",
"├─TableDual(Build) 8000.00 root rows:0",
"HashJoin 10000.00 root CARTESIAN left outer join",
"├─TableDual(Build) 0.00 root rows:0",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
},
{
"SQL": "explain format = 'brief' select * from t1 right join t2 on false",
"Result": [
"HashJoin 80000000.00 root CARTESIAN right outer join",
"├─TableDual(Build) 8000.00 root rows:0",
"HashJoin 10000.00 root CARTESIAN right outer join",
"├─TableDual(Build) 0.00 root rows:0",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
]
},
{
"SQL": "explain format = 'brief' select * from t1 left join t2 on t1.a = 1 and t1.a = 2",
"Result": [
"HashJoin 80000000.00 root CARTESIAN left outer join",
"├─TableDual(Build) 8000.00 root rows:0",
"HashJoin 10000.00 root CARTESIAN left outer join",
"├─TableDual(Build) 0.00 root rows:0",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
},
{
"SQL": "explain format = 'brief' select * from t1 left join t2 on t1.a =1 where t1.a = 2",
"Result": [
"HashJoin 80000.00 root CARTESIAN left outer join",
"├─TableReader(Build) 10.00 root data:Selection",
"│ └─Selection 10.00 cop[tikv] eq(test.t1.a, 2)",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─TableDual(Probe) 8000.00 root rows:0"
"HashJoin 10.00 root CARTESIAN left outer join",
"├─TableDual(Build) 0.00 root rows:0",
"└─TableReader(Probe) 10.00 root data:Selection",
" └─Selection 10.00 cop[tikv] eq(test.t1.a, 2)",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
},
{
"SQL": "explain format = 'brief' select * from t1 left join t2 on t2.a = 1 and t2.a = 2",
"Result": [
"HashJoin 80000000.00 root CARTESIAN left outer join",
"├─TableDual(Build) 8000.00 root rows:0",
"HashJoin 10000.00 root CARTESIAN left outer join",
"├─TableDual(Build) 0.00 root rows:0",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
Expand Down
2 changes: 1 addition & 1 deletion planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3301,7 +3301,7 @@ func TestIssue14481(t *testing.T) {
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a int default null, b int default null, c int default null)")
plan := tk.MustQuery("explain format = 'brief' select * from t where a = 1 and a = 2")
plan.Check(testkit.Rows("TableDual 8000.00 root rows:0"))
plan.Check(testkit.Rows("TableDual 0.00 root rows:0"))
tk.MustExec("drop table t")
}

Expand Down
8 changes: 4 additions & 4 deletions planner/core/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -5574,7 +5574,7 @@
"Plan": [
"IndexMerge 8.00 root ",
"├─IndexRangeScan(Build) 1.00 cop[tikv] table:t, index:a(a) range:[1,1], keep order:false",
"├─Selection(Build) 0.80 cop[tikv] 1",
"├─Selection(Build) 1.00 cop[tikv] 1",
"│ └─IndexRangeScan 1.00 cop[tikv] table:t, index:b(b) range:[1,1], keep order:false",
"└─TableRowIDScan(Probe) 8.00 cop[tikv] table:t keep order:false"
],
Expand All @@ -5585,7 +5585,7 @@
"Plan": [
"IndexMerge 8.00 root ",
"├─IndexRangeScan(Build) 1.00 cop[tikv] table:t, index:a(a) range:[1,1], keep order:false",
"├─Selection(Build) 0.80 cop[tikv] 1",
"├─Selection(Build) 1.00 cop[tikv] 1",
"│ └─IndexRangeScan 1.00 cop[tikv] table:t, index:b(b) range:[1,1], keep order:false",
"└─TableRowIDScan(Probe) 8.00 cop[tikv] table:t keep order:false"
],
Expand All @@ -5595,9 +5595,9 @@
"SQL": "desc format='brief' select /*+ use_index_merge(t) */ * from t where (a=1 and length(a)=1) or (b=1 and length(b)=1)",
"Plan": [
"IndexMerge 8.00 root ",
"├─Selection(Build) 0.80 cop[tikv] 1",
"├─Selection(Build) 1.00 cop[tikv] 1",
"│ └─IndexRangeScan 1.00 cop[tikv] table:t, index:a(a) range:[1,1], keep order:false",
"├─Selection(Build) 0.80 cop[tikv] 1",
"├─Selection(Build) 1.00 cop[tikv] 1",
"│ └─IndexRangeScan 1.00 cop[tikv] table:t, index:b(b) range:[1,1], keep order:false",
"└─TableRowIDScan(Probe) 8.00 cop[tikv] table:t keep order:false"
],
Expand Down
8 changes: 4 additions & 4 deletions planner/core/testdata/partition_pruner_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -1485,10 +1485,10 @@
"SQL": "select * from t1 where a in (1,6) and (a=3 and b =3)",
"Result": null,
"Plan": [
"TableDual 8000.00 root rows:0"
"TableDual 0.00 root rows:0"
],
"IndexPlan": [
"TableDual 8000.00 root rows:0"
"TableDual 0.00 root rows:0"
]
},
{
Expand Down Expand Up @@ -2488,10 +2488,10 @@
"SQL": "select * from t1 where a in (1,6) and (a=3 and b =3)",
"Result": null,
"Plan": [
"TableDual 8000.00 root rows:0"
"TableDual 0.00 root rows:0"
],
"IndexPlan": [
"TableDual 8000.00 root rows:0"
"TableDual 0.00 root rows:0"
]
},
{
Expand Down
46 changes: 28 additions & 18 deletions statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,34 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
}
}

// Try to cover Constants
if mask > 0 {
for i, expr := range remainedExprs {
if mask&(1<<uint64(i)) == 0 {
continue
}
if c, ok := expr.(*expression.Constant); ok {
if expression.MaybeOverOptimized4PlanCache(ctx, []expression.Expression{c}) {
continue
}
if c.Value.IsNull() {
// c is null
ret *= 0
mask &^= 1 << uint64(i)
} else if isTrue, err := c.Value.ToBool(sc); err == nil {
if isTrue == 0 {
// c is false
ret *= 0
}
// c is true, no need to change ret
mask &^= 1 << uint64(i)
}
// Not expected to come here:
// err != nil, no need to do anything.
}
}
}

// Now we try to cover those still not covered DNF conditions using independence assumption,
// i.e., sel(condA or condB) = sel(condA) + sel(condB) - sel(condA) * sel(condB)
if mask > 0 {
Expand Down Expand Up @@ -354,24 +382,6 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
if ok {
continue
}
// where {"0" / 0 / "false" / false / null} or A or B ... the '0' constant item should be ignored.
if c, ok := cond.(*expression.Constant); ok {
if !expression.MaybeOverOptimized4PlanCache(ctx, []expression.Expression{cond}) {
if c.Value.IsNull() {
// constant is null
continue
}
if isTrue, err := c.Value.ToBool(sc); err == nil {
if isTrue == 0 {
// constant == 0
continue
}
// constant == 1
selectivity = 1.0
break
}
}
}

var cnfItems []expression.Expression
if scalar, ok := cond.(*expression.ScalarFunction); ok && scalar.FuncName.L == ast.LogicAnd {
Expand Down
4 changes: 2 additions & 2 deletions util/ranger/testdata/ranger_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
{
"SQL": "select * from t where a = 1 and (b = 1 or b = 2) and b = 3 and c > 1;",
"Plan": [
"TableDual_5 8000.00 root rows:0"
"TableDual_5 0.00 root rows:0"
],
"Result": null
},
Expand Down Expand Up @@ -169,7 +169,7 @@
{
"SQL": "select * from t where a = 1 and b is null and b = 1 and c > 1;",
"Plan": [
"TableDual_5 8000.00 root rows:0"
"TableDual_5 0.00 root rows:0"
],
"Result": null
}
Expand Down