diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index 5a4e70053755..54791d56cb71 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -264,6 +264,7 @@ sql.stats.automatic_collection.min_stale_rows integer 500 target minimum number sql.stats.cleanup.recurrence string @hourly cron-tab recurrence for SQL Stats cleanup job sql.stats.flush.enabled boolean true if set, SQL execution statistics are periodically flushed to disk sql.stats.flush.interval duration 10m0s the interval at which SQL execution statistics are flushed to disk, this value must be less than or equal to sql.stats.aggregation.interval +sql.stats.forecasts.enabled boolean true when true, enables generation of statistics forecasts by default for all tables sql.stats.histogram_collection.enabled boolean true histogram collection mode sql.stats.multi_column_collection.enabled boolean true multi-column statistics collection mode sql.stats.non_default_columns.min_retention_period duration 24h0m0s minimum retention period for table statistics collected on non-default columns diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index be606f1d83d8..a7d4be9760a3 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -198,6 +198,7 @@ sql.stats.cleanup.recurrencestring@hourlycron-tab recurrence for SQL Stats cleanup job sql.stats.flush.enabledbooleantrueif set, SQL execution statistics are periodically flushed to disk sql.stats.flush.intervalduration10m0sthe interval at which SQL execution statistics are flushed to disk, this value must be less than or equal to sql.stats.aggregation.interval +sql.stats.forecasts.enabledbooleantruewhen true, enables generation of statistics forecasts by default for all tables sql.stats.histogram_collection.enabledbooleantruehistogram collection mode sql.stats.multi_column_collection.enabledbooleantruemulti-column statistics collection mode sql.stats.non_default_columns.min_retention_periodduration24h0m0sminimum retention period for table statistics collected on non-default columns diff --git a/pkg/sql/opt/exec/execbuilder/testdata/forecast b/pkg/sql/opt/exec/execbuilder/testdata/forecast index 56427ea9f783..17654977dd22 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/forecast +++ b/pkg/sql/opt/exec/execbuilder/testdata/forecast @@ -842,3 +842,100 @@ scan x ├── cost: 16.04 ├── key: (1) └── distribution: test + +# Test that sql.stats.forecasts.enabled can be used to enable and disable +# generation of forecasts in the stats cache. + +statement ok +SET CLUSTER SETTING sql.stats.forecasts.enabled = false + +query T +EXPLAIN SELECT * FROM g WHERE a > 8 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 0 (<0.01% of the table; stats collected ago) + table: g@g_pkey + spans: [/9 - ] + +query T +EXPLAIN SELECT * FROM s WHERE b < 3 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 3 (100% of the table; stats collected ago) + table: s@s_pkey + spans: [ - /2] + +query T +EXPLAIN SELECT * FROM c WHERE h > '1988-08-07' +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 0 (<0.01% of the table; stats collected ago) + table: c@c_pkey + spans: [/'1988-08-07 00:00:00.000001+00:00' - ] + +query T +EXPLAIN SELECT * FROM x WHERE a > 16 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 0 (<0.01% of the table; stats collected ago) + table: x@x_pkey + spans: [/17 - ] + +statement ok +RESET CLUSTER SETTING sql.stats.forecasts.enabled + +query T +EXPLAIN SELECT * FROM g WHERE a > 8 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 3 (22% of the table; stats collected ago; using stats forecast) + table: g@g_pkey + spans: [/9 - ] + +query T +EXPLAIN SELECT * FROM s WHERE b < 3 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 1 (100% of the table; stats collected ago; using stats forecast) + table: s@s_pkey + spans: [ - /2] + +query T +EXPLAIN SELECT * FROM c WHERE h > '1988-08-07' +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 23 (96% of the table; stats collected ago; using stats forecast) + table: c@c_pkey + spans: [/'1988-08-07 00:00:00.000001+00:00' - ] + +query T +EXPLAIN SELECT * FROM x WHERE a > 16 +---- +distribution: local +vectorized: true +· +• scan + estimated row count: 2 (50% of the table; stats collected ago; using stats forecast) + table: x@x_pkey + spans: [/17 - ] diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 3ef24c81ac10..c4e63088c79e 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -678,7 +678,7 @@ func (r *Refresher) maybeRefreshStats( rowsAffected int64, asOf time.Duration, ) { - tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID) + tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */) if err != nil { log.Errorf(ctx, "failed to get table statistics: %v", err) return diff --git a/pkg/sql/stats/delete_stats_test.go b/pkg/sql/stats/delete_stats_test.go index 1b0f13dbf892..6dc2955e9de4 100644 --- a/pkg/sql/stats/delete_stats_test.go +++ b/pkg/sql/stats/delete_stats_test.go @@ -262,7 +262,7 @@ func TestDeleteOldStatsForColumns(t *testing.T) { } return testutils.SucceedsSoonError(func() error { - tableStats, err := cache.getTableStatsFromCache(ctx, tableID) + tableStats, err := cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */) if err != nil { return err } @@ -270,7 +270,7 @@ func TestDeleteOldStatsForColumns(t *testing.T) { for i := range testData { stat := &testData[i] if stat.TableID != tableID { - stats, err := cache.getTableStatsFromCache(ctx, stat.TableID) + stats, err := cache.getTableStatsFromCache(ctx, stat.TableID, nil /* forecast */) if err != nil { return err } @@ -556,7 +556,7 @@ func TestDeleteOldStatsForOtherColumns(t *testing.T) { } return testutils.SucceedsSoonError(func() error { - tableStats, err := cache.getTableStatsFromCache(ctx, tableID) + tableStats, err := cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */) if err != nil { return err } @@ -564,7 +564,7 @@ func TestDeleteOldStatsForOtherColumns(t *testing.T) { for i := range testData { stat := &testData[i] if stat.TableID != tableID { - stats, err := cache.getTableStatsFromCache(ctx, stat.TableID) + stats, err := cache.getTableStatsFromCache(ctx, stat.TableID, nil /* forecast */) if err != nil { return err } diff --git a/pkg/sql/stats/forecast.go b/pkg/sql/stats/forecast.go index 08670e78ab23..2dce2d182bce 100644 --- a/pkg/sql/stats/forecast.go +++ b/pkg/sql/stats/forecast.go @@ -16,6 +16,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" + "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/types" @@ -24,6 +25,15 @@ import ( "github.com/cockroachdb/redact" ) +// UseStatisticsForecasts controls whether statistics forecasts are generated in +// the stats cache. +var UseStatisticsForecasts = settings.RegisterBoolSetting( + settings.TenantWritable, + "sql.stats.forecasts.enabled", + "when true, enables generation of statistics forecasts by default for all tables", + true, +).WithPublic() + // minObservationsForForecast is the minimum number of observed statistics // required to produce a statistics forecast. Forecasts based on 1 or 2 // observations will always have R² = 1 (perfect goodness of fit) regardless of diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index 9a273b90e937..6c815e1a1a03 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -102,6 +102,9 @@ type cacheEntry struct { // timestamp was moved, it will trigger another refresh. refreshing bool + // forecast is true if stats could contain forecasts. + forecast bool + stats []*TableStatistic // err is populated if the internal query to retrieve stats hit an error. @@ -217,7 +220,8 @@ func (sc *TableStatisticsCache) GetTableStats( if !statsUsageAllowed(table, sc.Settings) { return nil, nil } - return sc.getTableStatsFromCache(ctx, table.GetID()) + forecast := forecastAllowed(table, sc.Settings) + return sc.getTableStatsFromCache(ctx, table.GetID(), &forecast) } func statsDisallowedSystemTable(tableID descpb.ID) bool { @@ -286,19 +290,31 @@ func tableTypeCanHaveStats(table catalog.TableDescriptor) bool { return true } +// forecastAllowed returns true if statistics forecasting is allowed for the +// given table. +func forecastAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool { + return UseStatisticsForecasts.Get(&clusterSettings.SV) +} + // getTableStatsFromCache is like GetTableStats but assumes that the table ID // is safe to fetch statistics for: non-system, non-virtual, non-view, etc. func (sc *TableStatisticsCache) getTableStatsFromCache( - ctx context.Context, tableID descpb.ID, + ctx context.Context, tableID descpb.ID, forecast *bool, ) ([]*TableStatistic, error) { sc.mu.Lock() defer sc.mu.Unlock() if found, e := sc.lookupStatsLocked(ctx, tableID, false /* stealthy */); found { - return e.stats, e.err + if forecast != nil && e.forecast != *forecast { + // Forecasting was recently enabled or disabled on this table. Evict the + // cache entry and build it again. + sc.mu.cache.Del(tableID) + } else { + return e.stats, e.err + } } - return sc.addCacheEntryLocked(ctx, tableID) + return sc.addCacheEntryLocked(ctx, tableID, forecast != nil && *forecast) } // lookupStatsLocked retrieves any existing stats for the given table. @@ -351,7 +367,7 @@ func (sc *TableStatisticsCache) lookupStatsLocked( // - mutex is locked again and the entry is updated. // func (sc *TableStatisticsCache) addCacheEntryLocked( - ctx context.Context, tableID descpb.ID, + ctx context.Context, tableID descpb.ID, forecast bool, ) (stats []*TableStatistic, err error) { // Add a cache entry that other queries can find and wait on until we have the // stats. @@ -367,12 +383,12 @@ func (sc *TableStatisticsCache) addCacheEntryLocked( defer sc.mu.Lock() log.VEventf(ctx, 1, "reading statistics for table %d", tableID) - stats, err = sc.getTableStatsFromDB(ctx, tableID) + stats, err = sc.getTableStatsFromDB(ctx, tableID, forecast) log.VEventf(ctx, 1, "finished reading statistics for table %d", tableID) }() e.mustWait = false - e.stats, e.err = stats, err + e.forecast, e.stats, e.err = forecast, stats, err // Wake up any other callers that are waiting on these stats. e.waitCond.Broadcast() @@ -422,6 +438,7 @@ func (sc *TableStatisticsCache) refreshCacheEntry( } e.refreshing = true + forecast := e.forecast var stats []*TableStatistic var err error for { @@ -432,7 +449,7 @@ func (sc *TableStatisticsCache) refreshCacheEntry( log.VEventf(ctx, 1, "refreshing statistics for table %d", tableID) // TODO(radu): pass the timestamp and use AS OF SYSTEM TIME. - stats, err = sc.getTableStatsFromDB(ctx, tableID) + stats, err = sc.getTableStatsFromDB(ctx, tableID, forecast) log.VEventf(ctx, 1, "done refreshing statistics for table %d", tableID) }() if e.lastRefreshTimestamp.Equal(ts) { @@ -678,7 +695,7 @@ func (tabStat *TableStatistic) String() string { // It ignores any statistics that cannot be decoded (e.g. because a user-defined // type that doesn't exist) and returns the rest (with no error). func (sc *TableStatisticsCache) getTableStatsFromDB( - ctx context.Context, tableID descpb.ID, + ctx context.Context, tableID descpb.ID, forecast bool, ) ([]*TableStatistic, error) { const getTableStatisticsStmt = ` SELECT @@ -720,8 +737,10 @@ ORDER BY "createdAt" DESC, "columnIDs" DESC, "statisticID" DESC return nil, err } - forecasts := ForecastTableStatistics(ctx, statsList) - statsList = append(forecasts, statsList...) + if forecast { + forecasts := ForecastTableStatistics(ctx, statsList) + statsList = append(forecasts, statsList...) + } return statsList, nil } diff --git a/pkg/sql/stats/stats_cache_test.go b/pkg/sql/stats/stats_cache_test.go index 85787b2dd004..d0a272b800e1 100644 --- a/pkg/sql/stats/stats_cache_test.go +++ b/pkg/sql/stats/stats_cache_test.go @@ -117,7 +117,7 @@ func checkStatsForTable( // Perform the lookup and refresh, and confirm the // returned stats match the expected values. - statsList, err := sc.getTableStatsFromCache(ctx, tableID) + statsList, err := sc.getTableStatsFromCache(ctx, tableID, nil /* forecast */) if err != nil { t.Fatalf("error retrieving stats: %s", err) } @@ -426,7 +426,7 @@ func TestCacheWait(t *testing.T) { for n := 0; n < 10; n++ { wg.Add(1) go func() { - stats, err := sc.getTableStatsFromCache(ctx, id) + stats, err := sc.getTableStatsFromCache(ctx, id, nil /* forecast */) if err != nil { t.Error(err) } else if !checkStats(stats, expectedStats[id]) {