diff --git a/cmd/explaintest/r/imdbload.result b/cmd/explaintest/r/imdbload.result index 788fdd3ac42bb..e6387e1729c99 100644 --- a/cmd/explaintest/r/imdbload.result +++ b/cmd/explaintest/r/imdbload.result @@ -276,23 +276,23 @@ load stats 's/imdbload_stats/movie_info.json'; load stats 's/imdbload_stats/cast_info.json'; explain select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436')); id estRows task access object operator info -TableReader_7 804024.75 root data:Selection_6 -└─Selection_6 804024.75 cop[tikv] or(and(eq(imdbload.char_name.imdb_index, "I"), lt(imdbload.char_name.surname_pcode, "E436")), and(eq(imdbload.char_name.imdb_index, "L"), lt(imdbload.char_name.surname_pcode, "E436"))) - └─TableFullScan_5 4314864.00 cop[tikv] table:char_name keep order:false +IndexLookUp_10 1005030.94 root +├─IndexRangeScan_8(Build) 1005030.94 cop[tikv] table:char_name, index:itest2(imdb_index, surname_pcode, name_pcode_nf) range:["I" -inf,"I" "E436"), ["L" -inf,"L" "E436"), keep order:false +└─TableRowIDScan_9(Probe) 1005030.94 cop[tikv] table:char_name keep order:false explain select * from char_name use index (itest2) where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436')); id estRows task access object operator info -IndexLookUp_7 2010061.87 root -├─IndexRangeScan_5(Build) 2010061.87 cop[tikv] table:char_name, index:itest2(imdb_index, surname_pcode, name_pcode_nf) range:["I" -inf,"I" "E436"), ["L" -inf,"L" "E436"), keep order:false -└─TableRowIDScan_6(Probe) 2010061.87 cop[tikv] table:char_name keep order:false +IndexLookUp_7 1005030.94 root +├─IndexRangeScan_5(Build) 1005030.94 cop[tikv] table:char_name, index:itest2(imdb_index, surname_pcode, name_pcode_nf) range:["I" -inf,"I" "E436"), ["L" -inf,"L" "E436"), keep order:false +└─TableRowIDScan_6(Probe) 1005030.94 cop[tikv] table:char_name keep order:false trace plan target = 'estimation' select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436')); CE_trace -[{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'))","row_count":2010061},{"table_name":"char_name","type":"Index Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`or`(`and`(`eq`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.surname_pcode, 'E436')), `and`(`eq`(imdbload.char_name.imdb_index, 'L'), `lt`(imdbload.char_name.surname_pcode, 'E436')))","row_count":804024}] +[{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'I'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'L'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`or`(`and`(`eq`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.surname_pcode, 'E436')), `and`(`eq`(imdbload.char_name.imdb_index, 'L'), `lt`(imdbload.char_name.surname_pcode, 'E436')))","row_count":804024}] explain select * from char_name where ((imdb_index = 'V') and (surname_pcode < 'L3416')); id estRows task access object operator info -TableReader_7 1927106.39 root data:Selection_6 -└─Selection_6 1927106.39 cop[tikv] eq(imdbload.char_name.imdb_index, "V"), lt(imdbload.char_name.surname_pcode, "L3416") - └─TableFullScan_5 4314864.00 cop[tikv] table:char_name keep order:false +IndexLookUp_10 0.00 root +├─IndexRangeScan_8(Build) 0.00 cop[tikv] table:char_name, index:itest2(imdb_index, surname_pcode, name_pcode_nf) range:["V" -inf,"V" "L3416"), keep order:false +└─TableRowIDScan_9(Probe) 0.00 cop[tikv] table:char_name keep order:false explain select * from char_name where imdb_index > 'V'; id estRows task access object operator info IndexLookUp_10 0.00 root @@ -300,7 +300,7 @@ IndexLookUp_10 0.00 root └─TableRowIDScan_9(Probe) 0.00 cop[tikv] table:char_name keep order:false trace plan target = 'estimation' select * from char_name where imdb_index > 'V'; CE_trace -[{"table_name":"char_name","type":"Column Stats-Pseudo-Range","expr":"((imdb_index > 'V' and true))","row_count":1438288},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index > 'V' and true))","row_count":0},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`gt`(imdbload.char_name.imdb_index, 'V')","row_count":0}] +[{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Column Stats-Range","expr":"((imdb_index > 'V' and true))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index > 'V' and true))","row_count":0},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`gt`(imdbload.char_name.imdb_index, 'V')","row_count":0}] explain select * from movie_companies where company_type_id > 2; id estRows task access object operator info @@ -318,7 +318,7 @@ IndexLookUp_10 0.00 root └─TableRowIDScan_9(Probe) 0.00 cop[tikv] table:char_name keep order:false trace plan target = 'estimation' select * from char_name where imdb_index > 'I' and imdb_index < 'II'; CE_trace -[{"table_name":"char_name","type":"Column Stats-Pseudo-Range","expr":"((imdb_index > 'I' and imdb_index < 'II'))","row_count":107871},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index > 'I' and imdb_index < 'II'))","row_count":0},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`and`(`gt`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.imdb_index, 'II'))","row_count":0}] +[{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Column Stats-Range","expr":"((imdb_index > 'I' and imdb_index < 'II'))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index > 'I' and imdb_index < 'II'))","row_count":0},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`and`(`gt`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.imdb_index, 'II'))","row_count":0}] explain select * from char_name where imdb_index > 'I'; id estRows task access object operator info @@ -327,7 +327,7 @@ IndexLookUp_10 0.00 root └─TableRowIDScan_9(Probe) 0.00 cop[tikv] table:char_name keep order:false trace plan target = 'estimation' select * from char_name where imdb_index > 'I'; CE_trace -[{"table_name":"char_name","type":"Column Stats-Pseudo-Range","expr":"((imdb_index > 'I' and true))","row_count":1438288},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index > 'I' and true))","row_count":0},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`gt`(imdbload.char_name.imdb_index, 'I')","row_count":0}] +[{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Column Stats-Range","expr":"((imdb_index > 'I' and true))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index > 'I' and true))","row_count":0},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`gt`(imdbload.char_name.imdb_index, 'I')","row_count":0}] explain select * from cast_info where nr_order < -2068070866; id estRows task access object operator info diff --git a/cmd/explaintest/t/imdbload.test b/cmd/explaintest/t/imdbload.test index 5cb6390a6501d..df73903e0bb93 100644 --- a/cmd/explaintest/t/imdbload.test +++ b/cmd/explaintest/t/imdbload.test @@ -280,6 +280,7 @@ load stats 's/imdbload_stats/cast_info.json'; -- The statistics and actual row count are from the latest imdb dataset that is distributed as old text files. -- Actual row count: 1 +-- Index lookup on itest2 index is the best plan, runs <50ms for the first time. Table scan + Selection runs >800ms. (using 8 core tikv * 5, copr cache disabled) explain select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436')); explain select * from char_name use index (itest2) where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436')); trace plan target = 'estimation' select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436')); diff --git a/statistics/handle/dump.go b/statistics/handle/dump.go index 2d0979667f622..0f1c0d3ac3e5f 100644 --- a/statistics/handle/dump.go +++ b/statistics/handle/dump.go @@ -322,6 +322,7 @@ func TableStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, jsonTbl *J Info: colInfo, IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), StatsVer: statsVer, + Loaded: true, } col.Count = int64(col.TotalRowCount()) tbl.Columns[col.ID] = col diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index c762d814f3b95..d4e93f76f1cf9 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -613,7 +613,7 @@ func (h *Handle) LoadNeededHistograms() (err error) { continue } c, ok := tbl.Columns[col.ColumnID] - if !ok || c.Len() > 0 { + if !ok || c.IsLoaded() { statistics.HistogramNeededColumns.Delete(col) continue } @@ -645,6 +645,7 @@ func (h *Handle) LoadNeededHistograms() (err error) { FMSketch: fms, IsHandle: c.IsHandle, StatsVer: rows[0].GetInt64(0), + Loaded: true, } // Column.Count is calculated by Column.TotalRowCount(). Hence we don't set Column.Count when initializing colHist. colHist.Count = int64(colHist.TotalRowCount()) @@ -791,7 +792,7 @@ func (h *Handle) columnStatsFromStorage(reader *statsReader, row chunk.Row, tabl // 4. loadAll is false. notNeedLoad := h.Lease() > 0 && !isHandle && - (col == nil || col.Len() == 0 && col.LastUpdateVersion < histVer) && + (col == nil || !col.IsLoaded() && col.LastUpdateVersion < histVer) && !loadAll if notNeedLoad { count, err := h.columnCountFromStorage(reader, table.PhysicalID, histID, statsVer) @@ -833,6 +834,7 @@ func (h *Handle) columnStatsFromStorage(reader *statsReader, row chunk.Row, tabl IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), Flag: flag, StatsVer: statsVer, + Loaded: true, } // Column.Count is calculated by Column.TotalRowCount(). Hence we don't set Column.Count when initializing col. col.Count = int64(col.TotalRowCount()) diff --git a/statistics/handle/handle_hist.go b/statistics/handle/handle_hist.go index 1d591c099457e..e846c4568a57f 100644 --- a/statistics/handle/handle_hist.go +++ b/statistics/handle/handle_hist.go @@ -280,6 +280,7 @@ func (h *Handle) readStatsForOne(col model.TableColumnID, c *statistics.Column, FMSketch: fms, IsHandle: c.IsHandle, StatsVer: rows[0].GetInt64(0), + Loaded: true, } // Column.Count is calculated by Column.TotalRowCount(). Hence, we don't set Column.Count when initializing colHist. colHist.Count = int64(colHist.TotalRowCount()) diff --git a/statistics/histogram.go b/statistics/histogram.go index 43477463bf103..103e3b99cabd1 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -1049,6 +1049,17 @@ type Column struct { Flag int64 LastAnalyzePos types.Datum StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility + + // Loaded means if the histogram, the topn and the cm sketch are loaded fully. + // Those three parts of a Column is loaded lazily. It will only be loaded after trying to use them. + // Note: Currently please use Column.IsLoaded() to check if it's loaded. + Loaded bool +} + +// IsLoaded is a wrap around c.Loaded. +// It's just for safe when we are switching from `c.notNullCount() > 0)` to `c.Loaded`. +func (c *Column) IsLoaded() bool { + return c.Loaded || c.notNullCount() > 0 } func (c *Column) String() string { @@ -1108,20 +1119,23 @@ func (c *Column) IsInvalid(sctx sessionctx.Context, collPseudo bool) bool { if stmtctx != nil && stmtctx.StatsLoad.Fallback { return true } - if c.Histogram.NDV > 0 && c.notNullCount() == 0 && stmtctx != nil { + if !c.IsLoaded() && stmtctx != nil { if stmtctx.StatsLoad.Timeout > 0 { logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.", zap.String(strconv.FormatInt(c.Info.ID, 10), c.Info.Name.O)) } - HistogramNeededColumns.insert(tableColumnID{TableID: c.PhysicalID, ColumnID: c.Info.ID}) + // In some tests, the c.Info is not set, so we add this check here. + if c.Info != nil { + HistogramNeededColumns.insert(tableColumnID{TableID: c.PhysicalID, ColumnID: c.Info.ID}) + } } } - return c.TotalRowCount() == 0 || (c.Histogram.NDV > 0 && c.notNullCount() == 0) + return c.TotalRowCount() == 0 || !c.IsLoaded() } // IsHistNeeded checks if this column needs histogram to be loaded func (c *Column) IsHistNeeded(collPseudo bool) bool { - return (!collPseudo || !c.NotAccurate()) && c.Histogram.NDV > 0 && c.notNullCount() == 0 + return (!collPseudo || !c.NotAccurate()) && !c.IsLoaded() } func (c *Column) equalRowCount(sctx sessionctx.Context, val types.Datum, encodedVal []byte, realtimeRowCount int64) (float64, error) { @@ -1674,6 +1688,7 @@ func (coll *HistColl) NewHistCollBySelectivity(sctx sessionctx.Context, statsNod zap.Error(err)) continue } + newCol.Loaded = oldCol.Loaded newColl.Columns[node.ID] = newCol } for id, idx := range coll.Indices { diff --git a/statistics/histogram_test.go b/statistics/histogram_test.go index ce95ddfa6fcdd..ad8ef061dad92 100644 --- a/statistics/histogram_test.go +++ b/statistics/histogram_test.go @@ -40,6 +40,7 @@ func TestNewHistogramBySelectivity(t *testing.T) { intCol := &Column{} intCol.Histogram = *NewHistogram(1, 30, 30, 0, types.NewFieldType(mysql.TypeLonglong), chunk.InitialCapacity, 0) intCol.IsHandle = true + intCol.Loaded = true for i := 0; i < 10; i++ { intCol.Bounds.AppendInt64(0, int64(i*3)) intCol.Bounds.AppendInt64(0, int64(i*3+2)) @@ -61,6 +62,7 @@ num: 1 lower_bound: 12 upper_bound: 14 repeats: 0 ndv: 0 num: 30 lower_bound: 27 upper_bound: 29 repeats: 0 ndv: 0` stringCol := &Column{} + stringCol.Loaded = true stringCol.Histogram = *NewHistogram(2, 15, 30, 0, types.NewFieldType(mysql.TypeString), chunk.InitialCapacity, 0) stringCol.Bounds.AppendString(0, "a") stringCol.Bounds.AppendString(0, "aaaabbbb") diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 3ad5dfbd33d0b..59cebc3101524 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -866,7 +866,11 @@ func prepareSelectivity(testKit *testkit.TestKit, dom *domain.Domain) (*statisti return nil, err } for i := 1; i <= 5; i++ { - statsTbl.Columns[int64(i)] = &statistics.Column{Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(mysql.TypeLonglong)), Info: tbl.Columns[i-1]} + statsTbl.Columns[int64(i)] = &statistics.Column{ + Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(mysql.TypeLonglong)), + Info: tbl.Columns[i-1], + Loaded: true, + } } // Set the value of two indices' histograms. diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index 0c93cabcff7b6..639909016d93c 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -250,7 +250,12 @@ func SubTestColumnRange() func(*testing.T) { hg, err := BuildColumn(ctx, bucketCount, 2, collector, types.NewFieldType(mysql.TypeLonglong)) hg.PreCalculateScalar() require.NoError(t, err) - col := &Column{Histogram: *hg, CMSketch: buildCMSketch(s.rc.(*recordSet).data), Info: &model.ColumnInfo{}} + col := &Column{ + Histogram: *hg, + CMSketch: buildCMSketch(s.rc.(*recordSet).data), + Info: &model.ColumnInfo{}, + Loaded: true, + } tbl := &Table{ HistColl: HistColl{ Count: int64(col.TotalRowCount()), @@ -322,7 +327,7 @@ func SubTestIntColumnRanges() func(*testing.T) { hg.PreCalculateScalar() require.NoError(t, err) require.Equal(t, int64(100000), rowCount) - col := &Column{Histogram: *hg, Info: &model.ColumnInfo{}} + col := &Column{Histogram: *hg, Info: &model.ColumnInfo{}, Loaded: true} tbl := &Table{ HistColl: HistColl{ Count: int64(col.TotalRowCount()),