pingcap · zz-jason · Mar 29, 2019 · Mar 28, 2019 · Mar 28, 2019 · Mar 29, 2019
diff --git a/cmd/explaintest/r/subquery.result b/cmd/explaintest/r/subquery.result
@@ -9,3 +9,19 @@ HashLeftJoin_8	8000.00	root	semi join, inner:TableReader_12, other cond:eq(test.
 │ └─TableScan_9	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
 └─TableReader_12	10000.00	root	data:TableScan_11
   └─TableScan_11	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
+drop table if exists t;
+create table t(a int primary key, b int, c int, d int, index idx(b,c,d));
+insert into t values(1,1,1,1),(2,2,2,2),(3,2,2,2),(4,2,2,2),(5,2,2,2);
+analyze table t;
+explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = 1 and s.c = 1 and s.d = t.a and s.a = t1.a) from t;
+id	count	task	operator info
+Projection_11	5.00	root	9_aux_0
+└─Apply_13	5.00	root	left outer semi join, inner:StreamAgg_20, other cond:eq(test.t.c, count(*))
+  ├─TableReader_15	5.00	root	data:TableScan_14
+  │ └─TableScan_14	5.00	cop	table:t, range:[-inf,+inf], keep order:false
+  └─StreamAgg_20	1.00	root	funcs:count(1)
+    └─IndexJoin_23	0.50	root	inner join, inner:TableReader_22, outer key:s.a, inner key:t1.a
+      ├─IndexReader_27	1.00	root	index:IndexScan_26
+      │ └─IndexScan_26	1.00	cop	table:s, index:b, c, d, range: decided by [eq(s.b, 1) eq(s.c, 1) eq(s.d, test.t.a)], keep order:false
+      └─TableReader_22	1.00	root	data:TableScan_21
+        └─TableScan_21	1.00	cop	table:t1, range: decided by [s.a], keep order:false
diff --git a/cmd/explaintest/t/subquery.test b/cmd/explaintest/t/subquery.test
@@ -3,3 +3,9 @@ drop table if exists t2;
 create table t1(a bigint, b bigint);
 create table t2(a bigint, b bigint);
 explain select * from t1 where t1.a in (select t1.b + t2.b from t2);
+
+drop table if exists t;
+create table t(a int primary key, b int, c int, d int, index idx(b,c,d));
+insert into t values(1,1,1,1),(2,2,2,2),(3,2,2,2),(4,2,2,2),(5,2,2,2);
+analyze table t;
+explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = 1 and s.c = 1 and s.d = t.a and s.a = t1.a) from t;
diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go
@@ -678,9 +678,9 @@ func (s *testAnalyzeSuite) TestCorrelatedEstimation(c *C) {
 			"  └─MaxOneRow_13 1.00 root ",
 			"    └─Projection_14 0.80 root concat(cast(t1.a), \",\", cast(t1.b))",
 			"      └─IndexLookUp_21 0.80 root ",
-			"        ├─IndexScan_18 1.00 cop table:t1, index:c, range: decided by [eq(t1.c, test.t.c)], keep order:false",
+			"        ├─IndexScan_18 1.25 cop table:t1, index:c, range: decided by [eq(t1.c, test.t.c)], keep order:false",
 			"        └─Selection_20 0.80 cop eq(t1.a, test.t.a)",
-			"          └─TableScan_19 1.00 cop table:t, keep order:false",
+			"          └─TableScan_19 1.25 cop table:t, keep order:false",
 		))
 }
 

diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
@@ -465,24 +465,26 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
 	} else {
 		path.tableFilters = ds.pushedDownConds
 	}
-	corColInAccessConds := false
 	if path.eqCondCount == len(path.accessConds) {
-		access, remained := path.splitCorColAccessCondFromFilters()
-		path.accessConds = append(path.accessConds, access...)
+		accesses, remained := path.splitCorColAccessCondFromFilters()
+		path.accessConds = append(path.accessConds, accesses...)
 		path.tableFilters = remained
-		if len(access) > 0 {
-			corColInAccessConds = true
-		}
-	}
-	path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.tableFilters, path.index.Columns, ds.tableInfo)
-	if corColInAccessConds {
-		idxHist, ok := ds.stats.HistColl.Indices[path.index.ID]
-		if ok && !ds.stats.HistColl.Pseudo {
-			path.countAfterAccess = idxHist.AvgCountPerValue(ds.statisticTable.Count)
-		} else {
+		if len(accesses) > 0 && ds.statisticTable.Pseudo {
 			path.countAfterAccess = ds.statisticTable.PseudoAvgCountPerValue()
+		} else {
+			selectivity := path.countAfterAccess / float64(ds.statisticTable.Count)
+			for i := range accesses {
+				col := path.idxCols[path.eqCondCount+i]
+				ndv := ds.getColumnNDV(col.ID)
+				ndv *= selectivity
+				if ndv < 1 {
+					ndv = 1.0
+				}
+				path.countAfterAccess = path.countAfterAccess / ndv
+			}
 		}
 	}
+	path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.tableFilters, path.index.Columns, ds.tableInfo)
 	// If the `countAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
 	// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
 	if path.countAfterAccess < ds.stats.RowCount {

diff --git a/planner/core/stats.go b/planner/core/stats.go
@@ -60,6 +60,19 @@ func (p *baseLogicalPlan) deriveStats() (*property.StatsInfo, error) {
 	return profile, nil
 }
 
+// getColumnNDV computes estimated NDV of specified column using the original
+// histogram of `DataSource` which is retrieved from storage(not the derived one).
+func (ds *DataSource) getColumnNDV(colID int64) (ndv float64) {
+	hist, ok := ds.statisticTable.Columns[colID]
+	if ok && hist.Count > 0 {
+		factor := float64(ds.statisticTable.Count) / float64(hist.Count)
+		ndv = float64(hist.NDV) * factor
+	} else {
+		ndv = float64(ds.statisticTable.Count) * distinctFactor
+	}
+	return ndv
+}
+
 func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *property.StatsInfo {
 	profile := &property.StatsInfo{
 		RowCount:       float64(ds.statisticTable.Count),
@@ -68,13 +81,7 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *property.Stat
 		UsePseudoStats: ds.statisticTable.Pseudo,
 	}
 	for i, col := range ds.Columns {
-		hist, ok := ds.statisticTable.Columns[col.ID]
-		if ok && hist.Count > 0 {
-			factor := float64(ds.statisticTable.Count) / float64(hist.Count)
-			profile.Cardinality[i] = float64(hist.NDV) * factor
-		} else {
-			profile.Cardinality[i] = profile.RowCount * distinctFactor
-		}
+		profile.Cardinality[i] = ds.getColumnNDV(col.ID)
 	}
 	ds.stats = profile
 	selectivity, err := profile.HistColl.Selectivity(ds.ctx, conds)